bring soundstream back (4f104f69) · Commits · school / Capstone Design / 01 / audiolm-pytorch-training

audiolm_pytorch_demo_laion.py

+21 −21

Original line number	Diff line number	Diff line
		@@ -68,27 +68,27 @@ make_placeholder_dataset()

		#######

		# soundstream = AudioLMSoundStream(
		# codebook_size = 1024,
		# rq_num_quantizers = 8,
		# attn_window_size = 128, # local attention receptive field at bottleneck
		# attn_depth = 2 # 2 local attention transformer blocks - the soundstream folks were not experts with attention, so i took the liberty to add some. encodec went with lstms, but attention should be better
		# )

		# soundstream_trainer = SoundStreamTrainer(
		# soundstream,
		# folder = dataset_folder,
		# lr=3e-4,
		# batch_size = 4,
		# grad_accum_every = 8, # effective batch size of batch_size * grad_accum_every = 32
		# data_max_length_seconds = 2, # train on 2 second audio
		# results_folder = f"{prefix}/soundstream_results",
		# save_results_every = 4,
		# save_model_every = 4,
		# num_train_steps = 9
		# ).cuda()

		# soundstream_trainer.train() # skip soundstream for now
		soundstream = AudioLMSoundStream(
		codebook_size = 1024,
		rq_num_quantizers = 8,
		attn_window_size = 128, # local attention receptive field at bottleneck
		attn_depth = 2 # 2 local attention transformer blocks - the soundstream folks were not experts with attention, so i took the liberty to add some. encodec went with lstms, but attention should be better
		)

		soundstream_trainer = SoundStreamTrainer(
		soundstream,
		folder = dataset_folder,
		lr=3e-4,
		batch_size = 4,
		grad_accum_every = 8, # effective batch size of batch_size * grad_accum_every = 32
		data_max_length_seconds = 2, # train on 2 second audio
		results_folder = f"{prefix}/soundstream_results",
		save_results_every = 4,
		save_model_every = 4,
		num_train_steps = 9
		).cuda()

		soundstream_trainer.train() # skip soundstream for now

		#############