" attn_window_size = 128, # local attention receptive field at bottleneck\n",
" attn_depth = 2 # 2 local attention transformer blocks - the soundstream folks were not experts with attention, so i took the liberty to add some. encodec went with lstms, but attention should be better\n",