Update demo NB to match audioLM v0.23.2 (572f3757) · Commits · school / Capstone Design / 01 / audiolm-pytorch-training

audiolm_pytorch_demo_backup.ipynb

+8 −4

Original line number	Diff line number	Diff line
		@@ -193,9 +193,9 @@
		"source": [
		"# !pip install audiolm-pytorch boto3 tensorboardX\n",
		"!pip install boto3 tensorboardX\n",
		"!pip install audiolm-pytorch==0.11.1\n",
		"!pip install audiolm-pytorch\n",
		"# !pip uninstall -y audiolm-pytorch\n",
		"raise AssertionError(\"don't forget to put in your patched version of audiolm and aws credentials!\")\n",
		"# raise AssertionError(\"don't forget to put in your patched version of audiolm and aws credentials!\")\n",
		"# tensorboardX required for lambda labs"
		]
		},
		@@ -11408,9 +11408,13 @@
		}
		],
		"source": [
		"soundstream = SoundStream(\n",
		"from audiolm_pytorch import AudioLMSoundStream\n",
		"\n",
		"soundstream = AudioLMSoundStream(\n",
		" codebook_size = 1024,\n",
		" rq_num_quantizers = 8,\n",
		" attn_window_size = 128, # local attention receptive field at bottleneck\n",
		" attn_depth = 2 # 2 local attention transformer blocks - the soundstream folks were not experts with attention, so i took the liberty to add some. encodec went with lstms, but attention should be better\n",
		")\n",
		"\n",
		"actual_num_train_steps = 20001\n",
		@@ -11422,7 +11426,7 @@
		" lr=3e-4,\n",
		" batch_size = 4,\n",
		" grad_accum_every = 8, # effective batch size of batch_size * grad_accum_every = 32\n",
		" data_max_length = 16000,\n",
		" data_max_length_seconds = 2, # train on 2 second audio\n",
		" results_folder = \"soundstream_results\",\n",
		" save_results_every = save_every,\n",
		" save_model_every = save_every,\n",