Commit dc003955 authored by Leon Wu's avatar Leon Wu
Browse files

separate hubert download script out

parent 5951585b
Loading
Loading
Loading
Loading
+0 −11
Original line number Diff line number Diff line
@@ -88,17 +88,6 @@ soundstream_trainer.train()

#############

# hubert checkpoints can be downloaded at
# https://github.com/facebookresearch/fairseq/tree/main/examples/hubert
if not os.path.isdir("hubert"):
  os.makedirs("hubert")
if not os.path.isfile(f"{prefix}/{hubert_ckpt}"):
  hubert_ckpt_download = f"https://dl.fbaipublicfiles.com/{hubert_ckpt}"
  urllib.request.urlretrieve(hubert_ckpt_download, f"{prefix}/{hubert_ckpt}")
if not os.path.isfile(f"{prefix}/{hubert_quantizer}"):
  hubert_quantizer_download = f"https://dl.fbaipublicfiles.com/{hubert_quantizer}"
  urllib.request.urlretrieve(hubert_quantizer_download, f"{prefix}/{hubert_quantizer}")

wav2vec = HubertWithKmeans(
    checkpoint_path = f"{prefix}/{hubert_ckpt}",
    kmeans_path = f"{prefix}/{hubert_quantizer}"
+18 −0
Original line number Diff line number Diff line
import os
import urllib

prefix = "/fsx/itsleonwu/audiolm-pytorch"
dataset_folder = f"{prefix}/placeholder_dataset"
hubert_ckpt = f'hubert/hubert_base_ls960.pt'
hubert_quantizer = f'hubert/hubert_base_ls960_L9_km500.bin' # listed in row "HuBERT Base (~95M params)", column Quantizer

# hubert checkpoints can be downloaded at
# https://github.com/facebookresearch/fairseq/tree/main/examples/hubert
if not os.path.isdir("hubert"):
  os.makedirs("hubert")
if not os.path.isfile(f"{prefix}/{hubert_ckpt}"):
  hubert_ckpt_download = f"https://dl.fbaipublicfiles.com/{hubert_ckpt}"
  urllib.request.urlretrieve(hubert_ckpt_download, f"{prefix}/{hubert_ckpt}")
if not os.path.isfile(f"{prefix}/{hubert_quantizer}"):
  hubert_quantizer_download = f"https://dl.fbaipublicfiles.com/{hubert_quantizer}"
  urllib.request.urlretrieve(hubert_quantizer_download, f"{prefix}/{hubert_quantizer}")