Commit dd3dcdbf authored by Phil Wang's avatar Phil Wang
Browse files

quick validation for vq-wav2vec

parent 8dacdeff
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -826,7 +826,7 @@ class SemanticTransformerWrapper(nn.Module):
        super().__init__()
        self.wav2vec = wav2vec
        self.transformer = transformer
        assert self.wav2vec.codebook_size == transformer.num_semantic_tokens, f'num_semantic_tokens on SemanticTransformer must be set to {self.wav2vec.codebook_size}'
        assert not exists(self.wav2vec) or self.wav2vec.codebook_size == transformer.num_semantic_tokens, f'num_semantic_tokens on SemanticTransformer must be set to {self.wav2vec.codebook_size}'

        self.unique_consecutive = unique_consecutive
        self.pad_id = pad_id
+2 −0
Original line number Diff line number Diff line
@@ -38,6 +38,8 @@ class FairseqVQWav2Vec(nn.Module):
        self.model = model[0]
        self.model.eval()

        assert hasattr(self.model, 'vector_quantizer') and hasattr(self.model.vector_quantizer, 'embedding'), 'the vq wav2vec model does not seem to be valid'

    @property
    def groups(self):
        return self.model.vector_quantizer.groups
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
  name = 'audiolm-pytorch',
  packages = find_packages(exclude=[]),
  version = '0.1.4',
  version = '0.1.5',
  license='MIT',
  description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
  author = 'Phil Wang',