Commit f08b9a58 authored by Phil Wang's avatar Phil Wang
Browse files

bring in openai text tokenizer from another repo, as well as residual vq...

bring in openai text tokenizer from another repo, as well as residual vq needed for the audio-text joint embedding
parent 0c5f01ec
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
from musiclm_pytorch.musiclm_pytorch import MuLaN
from musiclm_pytorch.musiclm_pytorch import MuLaN, MusicLM
+15 −2
Original line number Diff line number Diff line
@@ -2,20 +2,31 @@ import torch
import torch.nn.functional as F
from torch import nn, einsum

from einops import rearrange, repeat, reduce
from x_clip.tokenizer import tokenizer
from vector_quantize_pytorch import ResidualVQ

from einops import rearrange, repeat, reduce, pack, unpack

# functions

def exists(val):
    return val is not None

# tensor functions

def log(t, eps = 1e-20):
    return torch.log(t.clamp(min = eps))

def l2norm(t):
    return F.normalize(t, p = 2, dim = -1)

# biasless layernorm

class LayerNorm(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.gamma = nn.Parameter(torch.ones(dim))
        self.register_buffer("beta", torch.zeros(dim))
        self.register_buffer('beta', torch.zeros(dim))

    def forward(self, x):
        return F.layer_norm(x, x.shape[-1:], self.gamma, self.beta)
@@ -153,6 +164,8 @@ class MuLaN(nn.Module):
    def forward(self, x):
        return x

# music lm

class MusicLM(nn.Module):
    def __init__(self):
        super().__init__()
+1 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ setup(
    'audiolm-pytorch',
    'einops>=0.4',
    'vector-quantize-pytorch>=0.10.15',
    'x-clip',
    'torch>=1.6',
    'torchaudio'
  ],