Commit 0b078214 authored by Phil Wang's avatar Phil Wang
Browse files

use a hack to fix an issue, refactor semantic later

parent 9c4c56d3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -215,6 +215,7 @@ generated_wav_with_text_condition = audiolm(text = ['chirping of birds and the d
- [ ] add option to use flash attention
- [ ] simplify training even more within AudioLM class
- [ ] cli tool, something like `audiolm generate <wav.file | text>` and save generated wav file to local directory
- [ ] refactor so semantic transformer has a wrapper to that handles unique consecutives as well as wav to hubert or vq-wav2vec

## Citations

+5 −2
Original line number Diff line number Diff line
@@ -410,6 +410,7 @@ class SemanticTransformer(nn.Module):
            logits = self.forward_with_cond_scale(
                ids = sample_semantic_ids,
                text_embeds = text_embeds,
                unique_consecutive = False,
                **kwargs
            )

@@ -467,9 +468,11 @@ class SemanticTransformer(nn.Module):
        return_loss = False,
        text: Optional[List[str]] = None,
        text_embeds = None,
        cond_drop_prob = None
        cond_drop_prob = None,
        unique_consecutive = None
    ):
        device = self.device
        unique_consecutive = default(unique_consecutive, self.unique_consecutive)

        assert exists(raw_wave) ^ exists(ids)

@@ -482,7 +485,7 @@ class SemanticTransformer(nn.Module):
        if self.training:
            ids = append_eos_id(ids, self.eos_id)

        if self.unique_consecutive:
        if unique_consecutive:
            ids = batch_unique_consecutive(ids, pad_value = self.pad_id)

        has_text = exists(text) or exists(text_embeds)
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
  name = 'audiolm-pytorch',
  packages = find_packages(exclude=[]),
  version = '0.0.42',
  version = '0.0.43',
  license='MIT',
  description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
  author = 'Phil Wang',