Commit 25c5ce6c authored by Phil Wang's avatar Phil Wang
Browse files

only log errors

parent ab58e6ee
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -201,11 +201,11 @@ generated_wav_with_text_condition = audiolm(text = ['chirping of birds and the d
- [x] refactor so semantic transformer has a wrapper to that handles unique consecutives as well as wav to hubert or vq-wav2vec
- [x] simply not self attend to eos token on the prompting side (semantic for coarse transformer, coarse for fine transformer)
- [x] add structured dropout from forgetful causal masking, far better than traditional dropouts
- [x] figure out how to suppress logging in fairseq

- [ ] figure out how to do the normalization across each dimension mentioned in the paper, but ignore it for v1 of the framework
- [ ] offer option to weight tie coarse, fine, and semantic embeddings across the 3 hierarchical transformers
- [ ] DRY a little at the end
- [ ] figure out how to suppress logging in fairseq
- [ ] test with speech synthesis for starters
- [ ] abstract out conditioning + classifier free guidance into external module or potentially a package
- [ ] add option to use flash attention
+4 −0
Original line number Diff line number Diff line
@@ -5,12 +5,16 @@ from torch import nn
from einops import rearrange, pack, unpack

import joblib

import fairseq

from torchaudio.functional import resample

from audiolm_pytorch.utils import curtail_to_multiple

import logging
logging.root.setLevel(logging.ERROR)

def exists(val):
    return val is not None

+3 −0
Original line number Diff line number Diff line
@@ -10,6 +10,9 @@ from torchaudio.functional import resample

from audiolm_pytorch.utils import curtail_to_multiple

import logging
logging.root.setLevel(logging.ERROR)

def exists(val):
    return val is not None

+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
  name = 'audiolm-pytorch',
  packages = find_packages(exclude=[]),
  version = '0.1.5',
  version = '0.1.6',
  license='MIT',
  description = 'AudioLM - Language Modeling Approach to Audio Generation from Google Research - Pytorch',
  author = 'Phil Wang',