Loading requirements-zoo.txt +1 −1 Original line number Diff line number Diff line Loading @@ -6,10 +6,10 @@ tqdm onnx onnxoptimizer onnxsim onnxruntime click di-toolkit tensorboard einops thop accelerate timm No newline at end of file zoo/monochrome/metaformer.py +8 −4 Original line number Diff line number Diff line import torch from torch import nn from timm.models import create_model import zoo.monochrome.metaformer_timm # register models from .metaformer_timm import __file__ as _bullshit _ = _bullshit class CAFormerBuilder: __model_name__ = 'caformer' Loading @@ -28,6 +31,7 @@ class CAFormerBuilder: model = create_model(**self.create_model_args) return model if __name__ == '__main__': from thop import profile Loading zoo/monochrome/metaformer_timm.py +125 −116 Original line number Diff line number Diff line Loading @@ -18,12 +18,13 @@ ConvFormer and CAFormer. Some implementations are modified from timm (https://github.com/rwightman/pytorch-image-models). """ from functools import partial import torch import torch.nn as nn from timm.models.layers import trunc_normal_, DropPath from timm.models.registry import register_model from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from timm.models.layers import trunc_normal_, DropPath from timm.models.layers.helpers import to_2tuple from timm.models.registry import register_model def _cfg(url='', **kwargs): Loading @@ -48,7 +49,6 @@ default_cfgs = { 'identityformer_m48': _cfg( url='https://huggingface.co/sail/dl/resolve/main/identityformer/identityformer_m48.pth'), 'randformer_s12': _cfg( url='https://huggingface.co/sail/dl/resolve/main/randformer/randformer_s12.pth'), 'randformer_s24': _cfg( Loading @@ -71,8 +71,6 @@ default_cfgs = { 'poolformerv2_m48': _cfg( url='https://huggingface.co/sail/dl/resolve/main/poolformerv2/poolformerv2_m48.pth'), 'convformer_s18': _cfg( url='https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s18.pth'), 'convformer_s18_384': _cfg( Loading Loading @@ -129,7 +127,6 @@ default_cfgs = { url='https://huggingface.co/sail/dl/resolve/main/convformer/convformer_b36_in21k.pth', num_classes=21841), 'caformer_s18': _cfg( url='https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s18.pth'), 'caformer_s18_384': _cfg( Loading Loading @@ -196,6 +193,7 @@ class Downsampling(nn.Module): """ Downsampling implemented by a layer of convolution. """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, pre_norm=None, post_norm=None, pre_permute=False): Loading @@ -221,6 +219,7 @@ class Scale(nn.Module): """ Scale vector by element multiplications. """ def __init__(self, dim, init_value=1.0, trainable=True): super().__init__() self.scale = nn.Parameter(init_value * torch.ones(dim), requires_grad=trainable) Loading @@ -233,9 +232,11 @@ class SquaredReLU(nn.Module): """ Squared ReLU: https://arxiv.org/abs/2109.08668 """ def __init__(self, inplace=False): super().__init__() self.relu = nn.ReLU(inplace=inplace) def forward(self, x): return torch.square(self.relu(x)) Loading @@ -244,6 +245,7 @@ class StarReLU(nn.Module): """ StarReLU: s * relu(x) ** 2 + b """ def __init__(self, scale_value=1.0, bias_value=0.0, scale_learnable=True, bias_learnable=True, mode=None, inplace=False): Loading @@ -254,6 +256,7 @@ class StarReLU(nn.Module): requires_grad=scale_learnable) self.bias = nn.Parameter(bias_value * torch.ones(1), requires_grad=bias_learnable) def forward(self, x): return self.scale * self.relu(x) ** 2 + self.bias Loading @@ -263,6 +266,7 @@ class Attention(nn.Module): Vanilla self-attention from Transformer: https://arxiv.org/abs/1706.03762. Modified from timm. """ def __init__(self, dim, head_dim=32, num_heads=None, qkv_bias=False, attn_drop=0., proj_drop=0., proj_bias=False, **kwargs): super().__init__() Loading @@ -281,7 +285,6 @@ class Attention(nn.Module): self.proj = nn.Linear(self.attention_dim, dim, bias=proj_bias) self.proj_drop = nn.Dropout(proj_drop) def forward(self, x): B, H, W, C = x.shape N = H * W Loading @@ -304,6 +307,7 @@ class RandomMixing(nn.Module): self.random_matrix = nn.parameter.Parameter( data=torch.softmax(torch.rand(num_tokens, num_tokens), dim=-1), requires_grad=False) def forward(self, x): B, H, W, C = x.shape x = x.reshape(B, H * W, C) Loading Loading @@ -345,6 +349,7 @@ class LayerNormGeneral(nn.Module): IdentityFormer, RandFormer and PoolFormerV2 utilize Modified LayerNorm without bias (bias=False); ConvFormer and CAFormer utilizes LayerNorm without bias (bias=False). """ def __init__(self, affine_shape=None, normalized_dim=(-1,), scale=True, bias=True, eps=1e-5): super().__init__() Loading @@ -370,6 +375,7 @@ class SepConv(nn.Module): r""" Inverted separable convolution from MobileNetV2: https://arxiv.org/abs/1801.04381. """ def __init__(self, dim, expansion_ratio=2, act1_layer=StarReLU, act2_layer=nn.Identity, bias=False, kernel_size=7, padding=3, Loading Loading @@ -400,6 +406,7 @@ class Pooling(nn.Module): Implementation of pooling for PoolFormer: https://arxiv.org/abs/2111.11418 Modfiled for [B, H, W, C] input """ def __init__(self, pool_size=3, **kwargs): super().__init__() self.pool = nn.AvgPool2d( Loading @@ -416,6 +423,7 @@ class Mlp(nn.Module): """ MLP as used in MetaFormer models, eg Transformer, MLP-Mixer, PoolFormer, MetaFormer baslines and related networks. Mostly copied from timm. """ def __init__(self, dim, mlp_ratio=4, out_features=None, act_layer=StarReLU, drop=0., bias=False, **kwargs): super().__init__() in_features = dim Loading @@ -441,6 +449,7 @@ class Mlp(nn.Module): class MlpHead(nn.Module): """ MLP classification head """ def __init__(self, dim, num_classes=1000, mlp_ratio=4, act_layer=SquaredReLU, norm_layer=nn.LayerNorm, head_dropout=0., bias=True): super().__init__() Loading @@ -451,7 +460,6 @@ class MlpHead(nn.Module): self.fc2 = nn.Linear(hidden_features, num_classes, bias=bias) self.head_dropout = nn.Dropout(head_dropout) def forward(self, x): x = self.fc1(x) x = self.act(x) Loading @@ -465,13 +473,13 @@ class MetaFormerBlock(nn.Module): """ Implementation of one MetaFormer block. """ def __init__(self, dim, token_mixer=nn.Identity, mlp=Mlp, norm_layer=nn.LayerNorm, drop=0., drop_path=0., layer_scale_init_value=None, res_scale_init_value=None ): super().__init__() self.norm1 = norm_layer(dim) Loading Loading @@ -545,6 +553,7 @@ class MetaFormer(nn.Module): output_norm: norm before classifier head. Default: partial(nn.LayerNorm, eps=1e-6). head_fn: classification head. Default: nn.Linear. """ def __init__(self, in_chans=3, num_classes=1000, depths=[2, 2, 6, 2], dims=[64, 128, 320, 512], Loading Loading @@ -641,7 +650,6 @@ class MetaFormer(nn.Module): return x @register_model def identityformer_s12(pretrained=False, **kwargs): model = MetaFormer( Loading Loading @@ -822,7 +830,6 @@ def randformer_m48(pretrained=False, **kwargs): return model @register_model def poolformerv2_s12(pretrained=False, **kwargs): model = MetaFormer( Loading Loading @@ -1542,6 +1549,7 @@ def caformer_m364_in21k(pretrained=False, **kwargs): model.load_state_dict(state_dict, strict=False) return model @register_model def caformer_d30(pretrained=False, **kwargs): model = MetaFormer( Loading @@ -1553,6 +1561,7 @@ def caformer_d30(pretrained=False, **kwargs): model.default_cfg = default_cfgs['caformer_d30'] return model @register_model def caformer_b36(pretrained=False, **kwargs): model = MetaFormer( Loading zoo/monochrome/train_.py +1 −1 Original line number Diff line number Diff line Loading @@ -18,10 +18,10 @@ from .dataset import MonochromeDataset, Monochrome2DDataset, random_split_datase from .levit1d import LeSigTransformer from .levit2d import LeViT from .loss import FocalLoss from .metaformer import CAFormerBuilder from .resnet1d import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 from .resnet2d import ResNet182D, ResNet342D, ResNet502D, ResNet1012D, ResNet1522D from .transformer import SigTransformer from .metaformer import CAFormerBuilder from ..base import _TRAIN_DIR as _GLOBAL_TRAIN_DIR _TRAIN_DIR = os.path.join(_GLOBAL_TRAIN_DIR, 'monochrome') Loading Loading
requirements-zoo.txt +1 −1 Original line number Diff line number Diff line Loading @@ -6,10 +6,10 @@ tqdm onnx onnxoptimizer onnxsim onnxruntime click di-toolkit tensorboard einops thop accelerate timm No newline at end of file
zoo/monochrome/metaformer.py +8 −4 Original line number Diff line number Diff line import torch from torch import nn from timm.models import create_model import zoo.monochrome.metaformer_timm # register models from .metaformer_timm import __file__ as _bullshit _ = _bullshit class CAFormerBuilder: __model_name__ = 'caformer' Loading @@ -28,6 +31,7 @@ class CAFormerBuilder: model = create_model(**self.create_model_args) return model if __name__ == '__main__': from thop import profile Loading
zoo/monochrome/metaformer_timm.py +125 −116 Original line number Diff line number Diff line Loading @@ -18,12 +18,13 @@ ConvFormer and CAFormer. Some implementations are modified from timm (https://github.com/rwightman/pytorch-image-models). """ from functools import partial import torch import torch.nn as nn from timm.models.layers import trunc_normal_, DropPath from timm.models.registry import register_model from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from timm.models.layers import trunc_normal_, DropPath from timm.models.layers.helpers import to_2tuple from timm.models.registry import register_model def _cfg(url='', **kwargs): Loading @@ -48,7 +49,6 @@ default_cfgs = { 'identityformer_m48': _cfg( url='https://huggingface.co/sail/dl/resolve/main/identityformer/identityformer_m48.pth'), 'randformer_s12': _cfg( url='https://huggingface.co/sail/dl/resolve/main/randformer/randformer_s12.pth'), 'randformer_s24': _cfg( Loading @@ -71,8 +71,6 @@ default_cfgs = { 'poolformerv2_m48': _cfg( url='https://huggingface.co/sail/dl/resolve/main/poolformerv2/poolformerv2_m48.pth'), 'convformer_s18': _cfg( url='https://huggingface.co/sail/dl/resolve/main/convformer/convformer_s18.pth'), 'convformer_s18_384': _cfg( Loading Loading @@ -129,7 +127,6 @@ default_cfgs = { url='https://huggingface.co/sail/dl/resolve/main/convformer/convformer_b36_in21k.pth', num_classes=21841), 'caformer_s18': _cfg( url='https://huggingface.co/sail/dl/resolve/main/caformer/caformer_s18.pth'), 'caformer_s18_384': _cfg( Loading Loading @@ -196,6 +193,7 @@ class Downsampling(nn.Module): """ Downsampling implemented by a layer of convolution. """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, pre_norm=None, post_norm=None, pre_permute=False): Loading @@ -221,6 +219,7 @@ class Scale(nn.Module): """ Scale vector by element multiplications. """ def __init__(self, dim, init_value=1.0, trainable=True): super().__init__() self.scale = nn.Parameter(init_value * torch.ones(dim), requires_grad=trainable) Loading @@ -233,9 +232,11 @@ class SquaredReLU(nn.Module): """ Squared ReLU: https://arxiv.org/abs/2109.08668 """ def __init__(self, inplace=False): super().__init__() self.relu = nn.ReLU(inplace=inplace) def forward(self, x): return torch.square(self.relu(x)) Loading @@ -244,6 +245,7 @@ class StarReLU(nn.Module): """ StarReLU: s * relu(x) ** 2 + b """ def __init__(self, scale_value=1.0, bias_value=0.0, scale_learnable=True, bias_learnable=True, mode=None, inplace=False): Loading @@ -254,6 +256,7 @@ class StarReLU(nn.Module): requires_grad=scale_learnable) self.bias = nn.Parameter(bias_value * torch.ones(1), requires_grad=bias_learnable) def forward(self, x): return self.scale * self.relu(x) ** 2 + self.bias Loading @@ -263,6 +266,7 @@ class Attention(nn.Module): Vanilla self-attention from Transformer: https://arxiv.org/abs/1706.03762. Modified from timm. """ def __init__(self, dim, head_dim=32, num_heads=None, qkv_bias=False, attn_drop=0., proj_drop=0., proj_bias=False, **kwargs): super().__init__() Loading @@ -281,7 +285,6 @@ class Attention(nn.Module): self.proj = nn.Linear(self.attention_dim, dim, bias=proj_bias) self.proj_drop = nn.Dropout(proj_drop) def forward(self, x): B, H, W, C = x.shape N = H * W Loading @@ -304,6 +307,7 @@ class RandomMixing(nn.Module): self.random_matrix = nn.parameter.Parameter( data=torch.softmax(torch.rand(num_tokens, num_tokens), dim=-1), requires_grad=False) def forward(self, x): B, H, W, C = x.shape x = x.reshape(B, H * W, C) Loading Loading @@ -345,6 +349,7 @@ class LayerNormGeneral(nn.Module): IdentityFormer, RandFormer and PoolFormerV2 utilize Modified LayerNorm without bias (bias=False); ConvFormer and CAFormer utilizes LayerNorm without bias (bias=False). """ def __init__(self, affine_shape=None, normalized_dim=(-1,), scale=True, bias=True, eps=1e-5): super().__init__() Loading @@ -370,6 +375,7 @@ class SepConv(nn.Module): r""" Inverted separable convolution from MobileNetV2: https://arxiv.org/abs/1801.04381. """ def __init__(self, dim, expansion_ratio=2, act1_layer=StarReLU, act2_layer=nn.Identity, bias=False, kernel_size=7, padding=3, Loading Loading @@ -400,6 +406,7 @@ class Pooling(nn.Module): Implementation of pooling for PoolFormer: https://arxiv.org/abs/2111.11418 Modfiled for [B, H, W, C] input """ def __init__(self, pool_size=3, **kwargs): super().__init__() self.pool = nn.AvgPool2d( Loading @@ -416,6 +423,7 @@ class Mlp(nn.Module): """ MLP as used in MetaFormer models, eg Transformer, MLP-Mixer, PoolFormer, MetaFormer baslines and related networks. Mostly copied from timm. """ def __init__(self, dim, mlp_ratio=4, out_features=None, act_layer=StarReLU, drop=0., bias=False, **kwargs): super().__init__() in_features = dim Loading @@ -441,6 +449,7 @@ class Mlp(nn.Module): class MlpHead(nn.Module): """ MLP classification head """ def __init__(self, dim, num_classes=1000, mlp_ratio=4, act_layer=SquaredReLU, norm_layer=nn.LayerNorm, head_dropout=0., bias=True): super().__init__() Loading @@ -451,7 +460,6 @@ class MlpHead(nn.Module): self.fc2 = nn.Linear(hidden_features, num_classes, bias=bias) self.head_dropout = nn.Dropout(head_dropout) def forward(self, x): x = self.fc1(x) x = self.act(x) Loading @@ -465,13 +473,13 @@ class MetaFormerBlock(nn.Module): """ Implementation of one MetaFormer block. """ def __init__(self, dim, token_mixer=nn.Identity, mlp=Mlp, norm_layer=nn.LayerNorm, drop=0., drop_path=0., layer_scale_init_value=None, res_scale_init_value=None ): super().__init__() self.norm1 = norm_layer(dim) Loading Loading @@ -545,6 +553,7 @@ class MetaFormer(nn.Module): output_norm: norm before classifier head. Default: partial(nn.LayerNorm, eps=1e-6). head_fn: classification head. Default: nn.Linear. """ def __init__(self, in_chans=3, num_classes=1000, depths=[2, 2, 6, 2], dims=[64, 128, 320, 512], Loading Loading @@ -641,7 +650,6 @@ class MetaFormer(nn.Module): return x @register_model def identityformer_s12(pretrained=False, **kwargs): model = MetaFormer( Loading Loading @@ -822,7 +830,6 @@ def randformer_m48(pretrained=False, **kwargs): return model @register_model def poolformerv2_s12(pretrained=False, **kwargs): model = MetaFormer( Loading Loading @@ -1542,6 +1549,7 @@ def caformer_m364_in21k(pretrained=False, **kwargs): model.load_state_dict(state_dict, strict=False) return model @register_model def caformer_d30(pretrained=False, **kwargs): model = MetaFormer( Loading @@ -1553,6 +1561,7 @@ def caformer_d30(pretrained=False, **kwargs): model.default_cfg = default_cfgs['caformer_d30'] return model @register_model def caformer_b36(pretrained=False, **kwargs): model = MetaFormer( Loading
zoo/monochrome/train_.py +1 −1 Original line number Diff line number Diff line Loading @@ -18,10 +18,10 @@ from .dataset import MonochromeDataset, Monochrome2DDataset, random_split_datase from .levit1d import LeSigTransformer from .levit2d import LeViT from .loss import FocalLoss from .metaformer import CAFormerBuilder from .resnet1d import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 from .resnet2d import ResNet182D, ResNet342D, ResNet502D, ResNet1012D, ResNet1522D from .transformer import SigTransformer from .metaformer import CAFormerBuilder from ..base import _TRAIN_DIR as _GLOBAL_TRAIN_DIR _TRAIN_DIR = os.path.join(_GLOBAL_TRAIN_DIR, 'monochrome') Loading