Commit cce09fb2 authored by Leon Wu's avatar Leon Wu
Browse files

skip soundstream for now

parent 7646555d
Loading
Loading
Loading
Loading
+21 −21
Original line number Diff line number Diff line
@@ -66,27 +66,27 @@ make_placeholder_dataset()

#######

soundstream = AudioLMSoundStream(
    codebook_size = 1024,
    rq_num_quantizers = 8,
    attn_window_size = 128,       # local attention receptive field at bottleneck
    attn_depth = 2                # 2 local attention transformer blocks - the soundstream folks were not experts with attention, so i took the liberty to add some. encodec went with lstms, but attention should be better
)

soundstream_trainer = SoundStreamTrainer(
    soundstream,
    folder = dataset_folder,
    lr=3e-4,
    batch_size = 4,
    grad_accum_every = 8, # effective batch size of batch_size * grad_accum_every = 32
    data_max_length_seconds = 2,  # train on 2 second audio
    results_folder = f"{prefix}/soundstream_results",
    save_results_every = 4,
    save_model_every = 4,
    num_train_steps = 9
).cuda()

soundstream_trainer.train()
# soundstream = AudioLMSoundStream(
#     codebook_size = 1024,
#     rq_num_quantizers = 8,
#     attn_window_size = 128,       # local attention receptive field at bottleneck
#     attn_depth = 2                # 2 local attention transformer blocks - the soundstream folks were not experts with attention, so i took the liberty to add some. encodec went with lstms, but attention should be better
# )

# soundstream_trainer = SoundStreamTrainer(
#     soundstream,
#     folder = dataset_folder,
#     lr=3e-4,
#     batch_size = 4,
#     grad_accum_every = 8, # effective batch size of batch_size * grad_accum_every = 32
#     data_max_length_seconds = 2,  # train on 2 second audio
#     results_folder = f"{prefix}/soundstream_results",
#     save_results_every = 4,
#     save_model_every = 4,
#     num_train_steps = 9
# ).cuda()

# soundstream_trainer.train() # skip soundstream for now

#############