Loading README.md +1 −0 Original line number Diff line number Diff line Loading @@ -57,6 +57,7 @@ loss.backward() - [ ] add a version of mulan to <a href="https://github.com/mlfoundations/open_clip">open clip</a> - [ ] set all the proper spectrogram hyperparameters - [ ] mulan seems to be using decoupled contrastive learning, offer that as an option - [ ] email some contrastive learning experts and figure out why some papers are sharing the projection from embeddings to latent space ## Appreciation Loading musiclm_pytorch/musiclm_pytorch.py +16 −0 Original line number Diff line number Diff line Loading @@ -388,6 +388,22 @@ class MuLaN(nn.Module): self.text_to_latents = nn.Linear(self.text.dim, dim_latent) self.audio_to_latents = nn.Linear(self.audio.dim, dim_latent) def get_audio_latents( self, wavs ): audio_embeds = self.audio(wavs) audio_latents = self.audio_to_latents(audio_embeds) return l2norm(audio_latents) def get_text_latents( self, texts, ): text_embeds = self.text(texts) text_latents = self.text_to_latents(text_embeds) return l2norm(text_latents) def forward( self, wavs, Loading Loading
README.md +1 −0 Original line number Diff line number Diff line Loading @@ -57,6 +57,7 @@ loss.backward() - [ ] add a version of mulan to <a href="https://github.com/mlfoundations/open_clip">open clip</a> - [ ] set all the proper spectrogram hyperparameters - [ ] mulan seems to be using decoupled contrastive learning, offer that as an option - [ ] email some contrastive learning experts and figure out why some papers are sharing the projection from embeddings to latent space ## Appreciation Loading
musiclm_pytorch/musiclm_pytorch.py +16 −0 Original line number Diff line number Diff line Loading @@ -388,6 +388,22 @@ class MuLaN(nn.Module): self.text_to_latents = nn.Linear(self.text.dim, dim_latent) self.audio_to_latents = nn.Linear(self.audio.dim, dim_latent) def get_audio_latents( self, wavs ): audio_embeds = self.audio(wavs) audio_latents = self.audio_to_latents(audio_embeds) return l2norm(audio_latents) def get_text_latents( self, texts, ): text_embeds = self.text(texts) text_latents = self.text_to_latents(text_embeds) return l2norm(text_latents) def forward( self, wavs, Loading