Loading README.md +1 −0 Original line number Diff line number Diff line Loading @@ -215,6 +215,7 @@ generated_wav_with_text_condition = audiolm(text = ['chirping of birds and the d - [ ] add option to use flash attention - [ ] simplify training even more within AudioLM class - [ ] cli tool, something like `audiolm generate <wav.file | text>` and save generated wav file to local directory - [ ] refactor so semantic transformer has a wrapper to that handles unique consecutives as well as wav to hubert or vq-wav2vec ## Citations Loading audiolm_pytorch/audiolm_pytorch.py +5 −2 Original line number Diff line number Diff line Loading @@ -410,6 +410,7 @@ class SemanticTransformer(nn.Module): logits = self.forward_with_cond_scale( ids = sample_semantic_ids, text_embeds = text_embeds, unique_consecutive = False, **kwargs ) Loading Loading @@ -467,9 +468,11 @@ class SemanticTransformer(nn.Module): return_loss = False, text: Optional[List[str]] = None, text_embeds = None, cond_drop_prob = None cond_drop_prob = None, unique_consecutive = None ): device = self.device unique_consecutive = default(unique_consecutive, self.unique_consecutive) assert exists(raw_wave) ^ exists(ids) Loading @@ -482,7 +485,7 @@ class SemanticTransformer(nn.Module): if self.training: ids = append_eos_id(ids, self.eos_id) if self.unique_consecutive: if unique_consecutive: ids = batch_unique_consecutive(ids, pad_value = self.pad_id) has_text = exists(text) or exists(text_embeds) Loading setup.py +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup( name = 'audiolm-pytorch', packages = find_packages(exclude=[]), version = '0.0.42', version = '0.0.43', license='MIT', description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch', author = 'Phil Wang', Loading Loading
README.md +1 −0 Original line number Diff line number Diff line Loading @@ -215,6 +215,7 @@ generated_wav_with_text_condition = audiolm(text = ['chirping of birds and the d - [ ] add option to use flash attention - [ ] simplify training even more within AudioLM class - [ ] cli tool, something like `audiolm generate <wav.file | text>` and save generated wav file to local directory - [ ] refactor so semantic transformer has a wrapper to that handles unique consecutives as well as wav to hubert or vq-wav2vec ## Citations Loading
audiolm_pytorch/audiolm_pytorch.py +5 −2 Original line number Diff line number Diff line Loading @@ -410,6 +410,7 @@ class SemanticTransformer(nn.Module): logits = self.forward_with_cond_scale( ids = sample_semantic_ids, text_embeds = text_embeds, unique_consecutive = False, **kwargs ) Loading Loading @@ -467,9 +468,11 @@ class SemanticTransformer(nn.Module): return_loss = False, text: Optional[List[str]] = None, text_embeds = None, cond_drop_prob = None cond_drop_prob = None, unique_consecutive = None ): device = self.device unique_consecutive = default(unique_consecutive, self.unique_consecutive) assert exists(raw_wave) ^ exists(ids) Loading @@ -482,7 +485,7 @@ class SemanticTransformer(nn.Module): if self.training: ids = append_eos_id(ids, self.eos_id) if self.unique_consecutive: if unique_consecutive: ids = batch_unique_consecutive(ids, pad_value = self.pad_id) has_text = exists(text) or exists(text_embeds) Loading
setup.py +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup( name = 'audiolm-pytorch', packages = find_packages(exclude=[]), version = '0.0.42', version = '0.0.43', license='MIT', description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch', author = 'Phil Wang', Loading