optionally allow for resampling directly within SoundDataset, if target_sample_khz specified (b6e5af78) · Commits · school / Capstone Design / 01 / AudioLM

audiolm_pytorch/data.py

+7 −1

Original line number	Diff line number	Diff line
		@@ -21,6 +21,7 @@ class SoundDataset(Dataset):
		folder,
		exts = ['flac', 'wav'],
		max_length = None,
		target_sample_khz = None,
		seq_len_multiple_of = None
		):
		super().__init__()
		@@ -32,6 +33,8 @@ class SoundDataset(Dataset):

		self.files = files
		self.max_length = max_length

		self.target_sample_khz = target_sample_khz
		self.seq_len_multiple_of = seq_len_multiple_of

		def __len__(self):
		@@ -39,10 +42,13 @@ class SoundDataset(Dataset):

		def __getitem__(self, idx):
		file = self.files[idx]
		data, samplerate = torchaudio.load(file)
		data, sample_khz = torchaudio.load(file)

		data = rearrange(data, '1 ... -> ...')

		if exists(self.target_sample_khz):
		data = torchaudio.functional.resample(data, sample_khz, self.target_sample_khz)

		if exists(self.max_length):
		data = data[:self.max_length]

+1 −1

Original line number	Diff line number	Diff line
		@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
		setup(
		name = 'audiolm-pytorch',
		packages = find_packages(exclude=[]),
		version = '0.0.23',
		version = '0.0.24',
		license='MIT',
		description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
		author = 'Phil Wang',