Commit b6e5af78 authored by Phil Wang's avatar Phil Wang
Browse files

optionally allow for resampling directly within SoundDataset, if target_sample_khz specified

parent f7756f56
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ class SoundDataset(Dataset):
        folder,
        exts = ['flac', 'wav'],
        max_length = None,
        target_sample_khz = None,
        seq_len_multiple_of = None
    ):
        super().__init__()
@@ -32,6 +33,8 @@ class SoundDataset(Dataset):

        self.files = files
        self.max_length = max_length

        self.target_sample_khz = target_sample_khz
        self.seq_len_multiple_of = seq_len_multiple_of

    def __len__(self):
@@ -39,10 +42,13 @@ class SoundDataset(Dataset):

    def __getitem__(self, idx):
        file = self.files[idx]
        data, samplerate = torchaudio.load(file)
        data, sample_khz = torchaudio.load(file)

        data = rearrange(data, '1 ... -> ...')

        if exists(self.target_sample_khz):
            data = torchaudio.functional.resample(data, sample_khz, self.target_sample_khz)

        if exists(self.max_length):
            data = data[:self.max_length]

+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
  name = 'audiolm-pytorch',
  packages = find_packages(exclude=[]),
  version = '0.0.23',
  version = '0.0.24',
  license='MIT',
  description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
  author = 'Phil Wang',