Commit 572f3757 authored by Leon Wu's avatar Leon Wu
Browse files

Update demo NB to match audioLM v0.23.2

parent 08ef21e7
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -193,9 +193,9 @@
   "source": [
    "# !pip install audiolm-pytorch boto3 tensorboardX\n",
    "!pip install boto3 tensorboardX\n",
    "!pip install audiolm-pytorch==0.11.1\n",
    "!pip install audiolm-pytorch\n",
    "# !pip uninstall -y audiolm-pytorch\n",
    "raise AssertionError(\"don't forget to put in your patched version of audiolm and aws credentials!\")\n",
    "# raise AssertionError(\"don't forget to put in your patched version of audiolm and aws credentials!\")\n",
    "# tensorboardX required for lambda labs"
   ]
  },
@@ -11408,9 +11408,13 @@
    }
   ],
   "source": [
    "soundstream = SoundStream(\n",
    "from audiolm_pytorch import AudioLMSoundStream\n",
    "\n",
    "soundstream = AudioLMSoundStream(\n",
    "    codebook_size = 1024,\n",
    "    rq_num_quantizers = 8,\n",
    "    attn_window_size = 128,       # local attention receptive field at bottleneck\n",
    "    attn_depth = 2                # 2 local attention transformer blocks - the soundstream folks were not experts with attention, so i took the liberty to add some. encodec went with lstms, but attention should be better\n",
    ")\n",
    "\n",
    "actual_num_train_steps = 20001\n",
@@ -11422,7 +11426,7 @@
    "    lr=3e-4,\n",
    "    batch_size = 4,\n",
    "    grad_accum_every = 8, # effective batch size of batch_size * grad_accum_every = 32\n",
    "    data_max_length = 16000,\n",
    "    data_max_length_seconds = 2,  # train on 2 second audio\n",
    "    results_folder = \"soundstream_results\",\n",
    "    save_results_every = save_every,\n",
    "    save_model_every = save_every,\n",