Commit a1cf75ca authored by Phil Wang's avatar Phil Wang
Browse files

expose the ema decay for the residual vq in soundstream

parent 03b21607
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -249,6 +249,7 @@ class SoundStream(nn.Module):
        codebook_size = 1024,
        rq_num_quantizers = 8,
        rq_commitment_weight = 1.,
        rq_ema_decay = 0.95,
        input_channels = 1,
        discr_multi_scales = (1, 0.5, 0.25),
        recon_loss_weight = 1.,
@@ -296,6 +297,7 @@ class SoundStream(nn.Module):
            dim = codebook_dim,
            num_quantizers = rq_num_quantizers,
            codebook_size = codebook_size,
            decay = rq_ema_decay,
            commitment_weight = rq_commitment_weight,
            kmeans_init = True,
            threshold_ema_dead_code = 2,
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
  name = 'audiolm-pytorch',
  packages = find_packages(exclude=[]),
  version = '0.1.11',
  version = '0.1.12',
  license='MIT',
  description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
  author = 'Phil Wang',