assertexists(raw_wave_for_soundstream)orexists(coarse_token_ids),'either raw waveform (raw_wav) is given, or coarse and fine token ids (coarse_token_ids, fine_token_ids)'
assertexists(raw_wave_for_codec)orexists(coarse_token_ids),'either raw waveform (raw_wav) is given, or coarse and fine token ids (coarse_token_ids, fine_token_ids)'
assert (self.num_fine_quantizers+self.num_coarse_quantizers)==soundstream.num_quantizers,'number of fine and coarse quantizers on fine transformer must add up to total number of quantizers on soundstream'
ifexists(codec):
assert (self.num_fine_quantizers+self.num_coarse_quantizers)==codec.num_quantizers,'number of fine and coarse quantizers on fine transformer must add up to total number of quantizers on codec'
self.eos_id=transformer.eos_id
@@ -1596,13 +1596,13 @@ class FineTransformerWrapper(nn.Module):
ifnotreconstruct_wave:
returnsampled_fine_token_ids
# reconstruct the wave using soundstream, concatting the fine and coarse token ids together first across quantization dimension
# reconstruct the wave using codec, concatting the fine and coarse token ids together first across quantization dimension