Commit 241bae31 authored by Phil Wang's avatar Phil Wang
Browse files

address issue in soundstream with causal conv padding not taking into account...

address issue in soundstream with causal conv padding not taking into account strides https://github.com/lucidrains/audiolm-pytorch/issues/166
parent e50305de
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -36,6 +36,8 @@ In the future, <a href="https://www.youtube.com/watch?v=olNvmUCmY8o">this movie

- <a href="https://github.com/LWprogramming">LWprogramming</a> for adding Encodec compatibility!

- <a href="https://github.com/YoungloLee">@YoungloLee</a> for identifying a big bug in the 1d causal convolution for soundstream related to padding not accounting for strides!

## Install

```bash
+3 −2
Original line number Diff line number Diff line
@@ -305,12 +305,13 @@ class CausalConv1d(nn.Module):
        super().__init__()
        kernel_size = kernel_size
        dilation = kwargs.get('dilation', 1)
        self.causal_padding = dilation * (kernel_size - 1)
        stride = kwargs.get('stride', 1)
        self.causal_padding = dilation * (kernel_size - 1) + (1 - stride)

        self.conv = nn.Conv1d(chan_in, chan_out, kernel_size, **kwargs)

    def forward(self, x):
        x = F.pad(x, (self.causal_padding, 0))
        x = F.pad(x, (self.causal_padding, 0), mode = 'reflect')
        return self.conv(x)

class CausalConvTranspose1d(nn.Module):
+1 −1
Original line number Diff line number Diff line
@@ -122,7 +122,7 @@ class SoundStreamTrainer(nn.Module):
        num_train_steps: int,
        batch_size: int,
        data_max_length: int = None,
        data_max_length_seconds: float = None,
        data_max_length_seconds: Union[int, float] = None,
        folder: str = None,
        train_dataloader: DataLoader = None,
        val_dataloader: DataLoader = None,
+1 −1
Original line number Diff line number Diff line
__version__ = '0.26.2'
__version__ = '0.26.5'