Commit 369e8e2e authored by Phil Wang's avatar Phil Wang
Browse files

allow for the structured quantized dropout from encodec paper

parent f54e19d5
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -454,3 +454,13 @@ $ accelerate launch train.py
    status = {to be published - one attention stabilization technique is circulating within Google Brain, being used by multiple teams}
}
```

```bibtex
@article{Defossez2022HighFN,
    title   = {High Fidelity Neural Audio Compression},
    author  = {Alexandre D'efossez and Jade Copet and Gabriel Synnaeve and Yossi Adi},
    journal = {ArXiv},
    year    = {2022},
    volume  = {abs/2210.13438}
}
```
+2 −0
Original line number Diff line number Diff line
@@ -362,6 +362,7 @@ class SoundStream(nn.Module):
        rq_num_quantizers = 8,
        rq_commitment_weight = 1.,
        rq_ema_decay = 0.95,
        rq_quantize_dropout_multiple_of = 1,
        input_channels = 1,
        discr_multi_scales = (1, 0.5, 0.25),
        stft_normalized = False,
@@ -437,6 +438,7 @@ class SoundStream(nn.Module):
            codebook_size = codebook_size,
            decay = rq_ema_decay,
            commitment_weight = rq_commitment_weight,
            quantize_dropout_multiple_of = rq_quantize_dropout_multiple_of,
            kmeans_init = True,
            threshold_ema_dead_code = 2,
            quantize_dropout = True,
+1 −1
Original line number Diff line number Diff line
__version__ = '0.21.0'
__version__ = '0.21.2'
+1 −1
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ setup(
    'torchaudio',
    'transformers',
    'tqdm',
    'vector-quantize-pytorch>=0.10.15'
    'vector-quantize-pytorch>=1.0.6'
  ],
  classifiers=[
    'Development Status :: 4 - Beta',