Loading README.md +9 −0 Original line number Diff line number Diff line Loading @@ -200,6 +200,15 @@ music = musiclm(['the crystalline sounds of the piano in a ballroom']) # torch.T } ``` ```bibtex @misc{gilmer2023intriguing title = {Intriguing Properties of Transformer Training Instabilities}, author = {Justin Gilmer, Andrea Schioppa, and Jeremy Cohen}, year = {2023}, status = {to be published - one attention stabilization technique is circulating within Google Brain, being used by multiple teams} } ``` *The only truth is music.* - Jack Kerouac *Music is the universal language of mankind.* - Henry Wadsworth Longfellow musiclm_pytorch/musiclm_pytorch.py +12 −4 Original line number Diff line number Diff line Loading @@ -113,11 +113,12 @@ class Attention(nn.Module): causal = False, dim_head = 64, heads = 8, dropout = 0. dropout = 0., scale = 8 ): super().__init__() self.heads = heads self.scale = dim_head ** -0.5 self.scale = scale self.causal = causal inner_dim = dim_head * heads Loading @@ -128,6 +129,9 @@ class Attention(nn.Module): self.to_q = nn.Linear(dim, inner_dim, bias = False) self.to_kv = nn.Linear(dim, inner_dim * 2, bias = False) self.q_scale = nn.Parameter(torch.ones(dim_head)) self.k_scale = nn.Parameter(torch.ones(dim_head)) self.to_out = nn.Sequential( nn.Linear(inner_dim, dim, bias = False), nn.Dropout(dropout) Loading @@ -153,11 +157,15 @@ class Attention(nn.Module): q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), (q, k, v)) q = q * self.scale # qk rmsnorm, technique circulating within brain used to stabilize a 22B parameter vision model training q, k = map(l2norm, (q, k)) q = q * self.q_scale k = k * self.k_scale # similarities sim = einsum('b h i d, b h j d -> b h i j', q, k) sim = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale if exists(rel_pos_bias): sim = sim + rel_pos_bias Loading setup.py +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup( name = 'musiclm-pytorch', packages = find_packages(exclude=[]), version = '0.0.22', version = '0.0.23', license='MIT', description = 'MusicLM - AudioLM + Audio CLIP to text to music synthesis', author = 'Phil Wang', Loading Loading
README.md +9 −0 Original line number Diff line number Diff line Loading @@ -200,6 +200,15 @@ music = musiclm(['the crystalline sounds of the piano in a ballroom']) # torch.T } ``` ```bibtex @misc{gilmer2023intriguing title = {Intriguing Properties of Transformer Training Instabilities}, author = {Justin Gilmer, Andrea Schioppa, and Jeremy Cohen}, year = {2023}, status = {to be published - one attention stabilization technique is circulating within Google Brain, being used by multiple teams} } ``` *The only truth is music.* - Jack Kerouac *Music is the universal language of mankind.* - Henry Wadsworth Longfellow
musiclm_pytorch/musiclm_pytorch.py +12 −4 Original line number Diff line number Diff line Loading @@ -113,11 +113,12 @@ class Attention(nn.Module): causal = False, dim_head = 64, heads = 8, dropout = 0. dropout = 0., scale = 8 ): super().__init__() self.heads = heads self.scale = dim_head ** -0.5 self.scale = scale self.causal = causal inner_dim = dim_head * heads Loading @@ -128,6 +129,9 @@ class Attention(nn.Module): self.to_q = nn.Linear(dim, inner_dim, bias = False) self.to_kv = nn.Linear(dim, inner_dim * 2, bias = False) self.q_scale = nn.Parameter(torch.ones(dim_head)) self.k_scale = nn.Parameter(torch.ones(dim_head)) self.to_out = nn.Sequential( nn.Linear(inner_dim, dim, bias = False), nn.Dropout(dropout) Loading @@ -153,11 +157,15 @@ class Attention(nn.Module): q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), (q, k, v)) q = q * self.scale # qk rmsnorm, technique circulating within brain used to stabilize a 22B parameter vision model training q, k = map(l2norm, (q, k)) q = q * self.q_scale k = k * self.k_scale # similarities sim = einsum('b h i d, b h j d -> b h i j', q, k) sim = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale if exists(rel_pos_bias): sim = sim + rel_pos_bias Loading
setup.py +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup( name = 'musiclm-pytorch', packages = find_packages(exclude=[]), version = '0.0.22', version = '0.0.23', license='MIT', description = 'MusicLM - AudioLM + Audio CLIP to text to music synthesis', author = 'Phil Wang', Loading