Commit 8afeb585 authored by Phil Wang's avatar Phil Wang
Browse files

average the loss across time and batch for multi spectral recon loss, seems a...

average the loss across time and batch for multi spectral recon loss, seems a bit too high otherwise
parent 627d6fee
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -703,7 +703,11 @@ class SoundStream(nn.Module):
            for mel_transform, alpha in zip(self.mel_spec_transforms, self.mel_spec_recon_alphas):
                orig_mel, recon_mel = map(mel_transform, (orig_x, recon_x))
                log_orig_mel, log_recon_mel = map(log, (orig_mel, recon_mel))
                multi_spectral_recon_loss = multi_spectral_recon_loss + (orig_mel - recon_mel).abs().sum() + alpha * l2norm(log_orig_mel - log_recon_mel, dim = -2).sum()

                l1_mel_loss = (orig_mel - recon_mel).abs().sum(dim = -2).mean()
                l2_log_mel_loss = alpha * l2norm(log_orig_mel - log_recon_mel, dim = -2).mean()

                multi_spectral_recon_loss = multi_spectral_recon_loss + l1_mel_loss + l2_log_mel_loss

        # adversarial loss

+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
  name = 'audiolm-pytorch',
  packages = find_packages(exclude=[]),
  version = '0.11.6',
  version = '0.11.7',
  license='MIT',
  description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
  author = 'Phil Wang',