use a hack to fix an issue, refactor semantic later (0b078214) · Commits · school / Capstone Design / 01 / AudioLM

README.md

+1 −0

Original line number	Diff line number	Diff line
		@@ -215,6 +215,7 @@ generated_wav_with_text_condition = audiolm(text = ['chirping of birds and the d
		- [ ] add option to use flash attention
		- [ ] simplify training even more within AudioLM class
		- [ ] cli tool, something like `audiolm generate <wav.file \| text>` and save generated wav file to local directory
		- [ ] refactor so semantic transformer has a wrapper to that handles unique consecutives as well as wav to hubert or vq-wav2vec

		## Citations

+5 −2

Original line number	Diff line number	Diff line
		@@ -410,6 +410,7 @@ class SemanticTransformer(nn.Module):
		logits = self.forward_with_cond_scale(
		ids = sample_semantic_ids,
		text_embeds = text_embeds,
		unique_consecutive = False,
		**kwargs
		)

		@@ -467,9 +468,11 @@ class SemanticTransformer(nn.Module):
		return_loss = False,
		text: Optional[List[str]] = None,
		text_embeds = None,
		cond_drop_prob = None
		cond_drop_prob = None,
		unique_consecutive = None
		):
		device = self.device
		unique_consecutive = default(unique_consecutive, self.unique_consecutive)

		assert exists(raw_wave) ^ exists(ids)

		@@ -482,7 +485,7 @@ class SemanticTransformer(nn.Module):
		if self.training:
		ids = append_eos_id(ids, self.eos_id)

		if self.unique_consecutive:
		if unique_consecutive:
		ids = batch_unique_consecutive(ids, pad_value = self.pad_id)

		has_text = exists(text) or exists(text_embeds)

+1 −1

Original line number	Diff line number	Diff line
		@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
		setup(
		name = 'audiolm-pytorch',
		packages = find_packages(exclude=[]),
		version = '0.0.42',
		version = '0.0.43',
		license='MIT',
		description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
		author = 'Phil Wang',