Commit 0271d826 authored by narugo1992's avatar narugo1992
Browse files

dev(narugo): upadate from main

parent 05d1ddb5
Loading
Loading
Loading
Loading
+15 −9
Original line number Diff line number Diff line
import logging
import os
import random
from copy import deepcopy
@@ -6,9 +7,7 @@ from typing import Optional
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from copy import deepcopy
from tqdm.auto import tqdm
import random

from .encode import image_encode

@@ -22,7 +21,7 @@ TRANSFORM = transforms.Compose([
    transforms.Resize(450),
])

TRANSFORM_val = transforms.Compose([
TRANSFORM_VAL = transforms.Compose([
    transforms.Resize(450),
])

@@ -38,13 +37,16 @@ TRANSFORM2 = transforms.Compose([
    transforms.Normalize([0.5], [0.5])
])

TRANSFORM2_val = transforms.Compose([
TRANSFORM2_VAL = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])


class MonochromeDataset(Dataset):
    __dims__ = 1

    def __init__(self, root_dir: str, bins: int = 180, fc: Optional[int] = 75, transform=TRANSFORM):
        self.root_dir = root_dir
        self.bins = bins
@@ -87,7 +89,10 @@ class MonochromeDataset(Dataset):
        else:
            return self.pre_process(sample), label


class Monochrome2DDataset(MonochromeDataset):
    __dims__ = 2

    def __init__(self, root_dir: str, bins: int = 200, fc: Optional[int] = 50, transform=TRANSFORM2):
        super(Monochrome2DDataset, self).__init__(root_dir, bins, fc, transform)

@@ -97,7 +102,8 @@ class Monochrome2DDataset(MonochromeDataset):
            image = self.transform(image)
        return image

def random_split_dataset(dataset:MonochromeDataset, train_size, test_size, trans_val=TRANSFORM_val):

def random_split_dataset(dataset: MonochromeDataset, train_size, test_size, trans_val=TRANSFORM_VAL):
    train_data = deepcopy(dataset)
    random.shuffle(train_data.samples)
    all_samples = train_data.samples
@@ -105,7 +111,7 @@ def random_split_dataset(dataset:MonochromeDataset, train_size, test_size, trans

    test_data = dataset
    test_data.transform = trans_val
    print('pre-build testset')
    logging.info('Pre-build test dataset ...')
    test_data.samples = all_samples[train_size:train_size + test_size]
    test_data.cache_data()

+52 −40
Original line number Diff line number Diff line
from math import ceil

import torch
from einops import rearrange
from einops.layers.torch import Rearrange
from torch import nn, einsum
import torch.nn.functional as F

from einops import rearrange, repeat
from einops.layers.torch import Rearrange

# helpers

def exists(val):
    return val is not None


def default(val, d):
    return val if exists(val) else d


def cast_tuple(val, l=3):
    val = val if isinstance(val, tuple) else (val,)
    return (*val, *((val[-1],) * max(l - len(val), 0)))


def always(val):
    return lambda *args, **kwargs: val


# classes

class FeedForward(nn.Module):
@@ -34,9 +37,11 @@ class FeedForward(nn.Module):
            nn.Conv1d(dim * mult, dim, 1),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)


class Attention(nn.Module):
    def __init__(self, dim, fmap_size, heads=8, dim_key=32, dim_value=64, dropout=0., dim_out=None, downsample=False):
        super().__init__()
@@ -47,7 +52,8 @@ class Attention(nn.Module):
        self.heads = heads
        self.scale = dim_key ** -0.5

        self.to_q = nn.Sequential(nn.Conv1d(dim, inner_dim_key, 1, stride = (2 if downsample else 1), bias = False), nn.BatchNorm1d(inner_dim_key))
        self.to_q = nn.Sequential(nn.Conv1d(dim, inner_dim_key, 1, stride=(2 if downsample else 1), bias=False),
                                  nn.BatchNorm1d(inner_dim_key))
        self.to_k = nn.Sequential(nn.Conv1d(dim, inner_dim_key, 1, bias=False), nn.BatchNorm1d(inner_dim_key))
        self.to_v = nn.Sequential(nn.Conv1d(dim, inner_dim_value, 1, bias=False), nn.BatchNorm1d(inner_dim_value))

@@ -100,8 +106,10 @@ class Attention(nn.Module):
        out = rearrange(out, 'b h l d -> b (h d) l', h=h, l=l)
        return self.to_out(out)


class Transformer(nn.Module):
    def __init__(self, dim, fmap_size, depth, heads, dim_key, dim_value, mlp_mult = 2, dropout = 0., dim_out = None, downsample = False):
    def __init__(self, dim, fmap_size, depth, heads, dim_key, dim_value, mlp_mult=2, dropout=0., dim_out=None,
                 downsample=False):
        super().__init__()
        dim_out = default(dim_out, dim)
        self.layers = nn.ModuleList([])
@@ -109,9 +117,11 @@ class Transformer(nn.Module):

        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Attention(dim, fmap_size = fmap_size, heads = heads, dim_key = dim_key, dim_value = dim_value, dropout = dropout, downsample = downsample, dim_out = dim_out),
                Attention(dim, fmap_size=fmap_size, heads=heads, dim_key=dim_key, dim_value=dim_value, dropout=dropout,
                          downsample=downsample, dim_out=dim_out),
                FeedForward(dim_out, mlp_mult, dropout=dropout)
            ]))

    def forward(self, x):
        for attn, ff in self.layers:
            attn_res = (x if self.attn_residual else 0)
@@ -119,6 +129,7 @@ class Transformer(nn.Module):
            x = ff(x) + x
        return x


class LeSigTransformer(nn.Module):
    __model_name__ = 'le_transformer'

@@ -142,7 +153,8 @@ class LeSigTransformer(nn.Module):
        depths = cast_tuple(depth, stages)
        layer_heads = cast_tuple(heads, stages)

        assert all(map(lambda t: len(t) == stages, (dims, depths, layer_heads))), 'dimensions, depths, and heads must be a tuple that is less than the designated number of stages'
        assert all(map(lambda t: len(t) == stages, (dims, depths,
                                                    layer_heads))), 'dimensions, depths, and heads must be a tuple that is less than the designated number of stages'

        self.conv_embedding = nn.Sequential(
            nn.Conv1d(3, 32, 3, stride=1, padding=1),
@@ -160,7 +172,8 @@ class LeSigTransformer(nn.Module):

            if not is_last:
                next_dim = dims[ind + 1]
                layers.append(Transformer(dim, fmap_size, 1, heads * 2, dim_key, dim_value, dim_out = next_dim, downsample = True))
                layers.append(
                    Transformer(dim, fmap_size, 1, heads * 2, dim_key, dim_value, dim_out=next_dim, downsample=True))
                fmap_size = ceil(fmap_size / 2)

        self.backbone = nn.Sequential(*layers)
@@ -175,9 +188,7 @@ class LeSigTransformer(nn.Module):

    def forward(self, img):
        x = self.conv_embedding(img)

        x = self.backbone(x)

        x = self.pool(x)

        out = self.mlp_head(x)
@@ -188,12 +199,13 @@ class LeSigTransformer(nn.Module):

        return out


if __name__ == '__main__':
    from thop import profile

    transformer = LeSigTransformer()
    x = torch.randn(1, 3, 180)
    input_ = torch.randn(1, 3, 180)

    flops, params = profile(transformer, (x,))
    flops, params = profile(transformer, (input_,))
    print('FLOPs = ' + str(flops / 1000 ** 3) + 'G')
    print('Params = ' + str(params / 1000 ** 2) + 'M')
+57 −44
Original line number Diff line number Diff line
from math import ceil

import torch
from einops import rearrange
from einops.layers.torch import Rearrange
from torch import nn, einsum
import torch.nn.functional as F

from einops import rearrange, repeat
from einops.layers.torch import Rearrange

# helpers

def exists(val):
    return val is not None


def default(val, d):
    return val if exists(val) else d


def cast_tuple(val, l=3):
    val = val if isinstance(val, tuple) else (val,)
    return (*val, *((val[-1],) * max(l - len(val), 0)))


def always(val):
    return lambda *args, **kwargs: val


# classes

class FeedForward(nn.Module):
@@ -34,9 +37,11 @@ class FeedForward(nn.Module):
            nn.Conv2d(dim * mult, dim, 1),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)


class Attention(nn.Module):
    def __init__(self, dim, fmap_size, heads=8, dim_key=32, dim_value=64, dropout=0., dim_out=None, downsample=False):
        super().__init__()
@@ -47,7 +52,8 @@ class Attention(nn.Module):
        self.heads = heads
        self.scale = dim_key ** -0.5

        self.to_q = nn.Sequential(nn.Conv2d(dim, inner_dim_key, 1, stride = (2 if downsample else 1), bias = False), nn.BatchNorm2d(inner_dim_key))
        self.to_q = nn.Sequential(nn.Conv2d(dim, inner_dim_key, 1, stride=(2 if downsample else 1), bias=False),
                                  nn.BatchNorm2d(inner_dim_key))
        self.to_k = nn.Sequential(nn.Conv2d(dim, inner_dim_key, 1, bias=False), nn.BatchNorm2d(inner_dim_key))
        self.to_v = nn.Sequential(nn.Conv2d(dim, inner_dim_value, 1, bias=False), nn.BatchNorm2d(inner_dim_value))

@@ -107,8 +113,10 @@ class Attention(nn.Module):
        out = rearrange(out, 'b h (x y) d -> b (h d) x y', h=h, y=y)
        return self.to_out(out)


class Transformer(nn.Module):
    def __init__(self, dim, fmap_size, depth, heads, dim_key, dim_value, mlp_mult = 2, dropout = 0., dim_out = None, downsample = False):
    def __init__(self, dim, fmap_size, depth, heads, dim_key, dim_value, mlp_mult=2, dropout=0., dim_out=None,
                 downsample=False):
        super().__init__()
        dim_out = default(dim_out, dim)
        self.layers = nn.ModuleList([])
@@ -116,9 +124,11 @@ class Transformer(nn.Module):

        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                Attention(dim, fmap_size = fmap_size, heads = heads, dim_key = dim_key, dim_value = dim_value, dropout = dropout, downsample = downsample, dim_out = dim_out),
                Attention(dim, fmap_size=fmap_size, heads=heads, dim_key=dim_key, dim_value=dim_value, dropout=dropout,
                          downsample=downsample, dim_out=dim_out),
                FeedForward(dim_out, mlp_mult, dropout=dropout)
            ]))

    def forward(self, x):
        for attn, ff in self.layers:
            attn_res = (x if self.attn_residual else 0)
@@ -126,8 +136,10 @@ class Transformer(nn.Module):
            x = ff(x) + x
        return x


class LeViT(nn.Module):
    __model_name__ = 'levit'
    __dims__ = 2

    def __init__(
            self,
@@ -149,7 +161,8 @@ class LeViT(nn.Module):
        depths = cast_tuple(depth, stages)
        layer_heads = cast_tuple(heads, stages)

        assert all(map(lambda t: len(t) == stages, (dims, depths, layer_heads))), 'dimensions, depths, and heads must be a tuple that is less than the designated number of stages'
        assert all(map(lambda t: len(t) == stages, (dims, depths, layer_heads))), \
            'dimensions, depths, and heads must be a tuple that is less than the designated number of stages'

        self.conv_embedding = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
@@ -167,7 +180,8 @@ class LeViT(nn.Module):

            if not is_last:
                next_dim = dims[ind + 1]
                layers.append(Transformer(dim, fmap_size, 1, heads * 2, dim_key, dim_value, dim_out = next_dim, downsample = True))
                layers.append(
                    Transformer(dim, fmap_size, 1, heads * 2, dim_key, dim_value, dim_out=next_dim, downsample=True))
                fmap_size = ceil(fmap_size / 2)

        self.backbone = nn.Sequential(*layers)
@@ -182,9 +196,7 @@ class LeViT(nn.Module):

    def forward(self, img):
        x = self.conv_embedding(img)

        x = self.backbone(x)

        x = self.pool(x)

        out = self.mlp_head(x)
@@ -195,12 +207,13 @@ class LeViT(nn.Module):

        return out


if __name__ == '__main__':
    from thop import profile

    transformer = LeViT()
    x = torch.randn(1, 3, 384, 384)
    input_ = torch.randn(1, 3, 384, 384)

    flops, params = profile(transformer, (x,))
    flops, params = profile(transformer, (input_,))
    print('FLOPs = ' + str(flops / 1000 ** 3) + 'G')
    print('Params = ' + str(params / 1000 ** 2) + 'M')
+6 −2
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ import torch
from PIL import Image
from torch import nn

from .dataset import TRANSFORM2_VAL
from .encode import image_encode
from ..utils import get_testfile, onnx_optimize

@@ -22,9 +23,12 @@ class ModelWithSoftMax(nn.Module):


def export_model_to_onnx(model, onnx_filename, opset_version: int = 14, verbose: bool = True,
                         no_optimize: bool = False, feature_bins: int = 256):
                         no_optimize: bool = False, feature_bins: int = 180):
    image = Image.open(get_testfile('6125785.jpg')).convert('RGB')
    if getattr(model, '__dims__', 1) == 1:
        example_input = image_encode(image, bins=feature_bins, normalize=True).float().unsqueeze(0)
    else:
        example_input = TRANSFORM2_VAL(image).float().unsqueeze(0)
    model = ModelWithSoftMax(model).float()

    if torch.cuda.is_available():
+22 −8
Original line number Diff line number Diff line
@@ -8,17 +8,17 @@ from accelerate import Accelerator
from ditk import logging
from torch import nn
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm

from .alexnet import MonochromeAlexNet
from .dataset import MonochromeDataset, Monochrome2DDataset, random_split_dataset, TRANSFORM_val, TRANSFORM2_val
from .dataset import MonochromeDataset, Monochrome2DDataset, random_split_dataset, TRANSFORM_VAL, TRANSFORM2_VAL
from .levit1d import LeSigTransformer
from .levit2d import LeViT
from .loss import FocalLoss
from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
from .transformer import SigTransformer
from .levit1d import LeSigTransformer
from .levit2d import LeViT
from ..base import _TRAIN_DIR as _GLOBAL_TRAIN_DIR

_TRAIN_DIR = os.path.join(_GLOBAL_TRAIN_DIR, 'monochrome')
@@ -28,6 +28,7 @@ _CKPT_DIR = os.path.join(_TRAIN_DIR, 'ckpts')
_CKPT_PATTERN = re.compile(r'^monochrome-(?P<name>[a-zA-Z\d_\-]+)-(?P<epoch>\d+)\.ckpt$')

_KNOWN_MODELS = {}
_KNOWN_DATASETS = {}


def _register_model(cls: Type[nn.Module], *args, name=None, **kwargs):
@@ -46,6 +47,14 @@ _register_model(LeSigTransformer)
_register_model(LeViT)


def _register_dataset(cls: Type[Dataset]):
    _KNOWN_DATASETS[cls.__dims__] = cls


_register_dataset(MonochromeDataset)
_register_dataset(Monochrome2DDataset)


def _find_latest_ckpt(name: str) -> Optional[str]:
    if os.path.exists(_CKPT_DIR):
        ckpts = []
@@ -78,13 +87,14 @@ def train(dataset_dir: str, session_name: Optional[str] = None, from_ckpt: Optio
          train_ratio: float = 0.8, batch_size: int = 4, feature_bins: int = 180, fc: Optional[int] = 75,
          max_epochs: int = 500, learning_rate: float = 0.001, weight_decay: float = 1e-3, preference: float = 0.0,
          num_workers: Optional[int] = 8, save_per_epoch: int = 10, eval_epoch: int = 5,
          model_name: str = 'alexnet', data_2d=False):
          model_name: str = 'alexnet'):
    accelerator = Accelerator(
        # mixed_precision=self.cfgs.mixed_precision,
        step_scheduler_with_optimizer=False,
    )

    session_name = session_name or model_name
    model_dims = getattr(_KNOWN_MODELS[model_name], '__dims__', 1)
    _log_dir = os.path.join(_LOG_DIR, session_name)

    if accelerator.is_local_main_process:
@@ -100,14 +110,18 @@ def train(dataset_dir: str, session_name: Optional[str] = None, from_ckpt: Optio
        writer = None

    # Initialize dataset
    full_dataset = (Monochrome2DDataset if data_2d else MonochromeDataset)(dataset_dir, bins=feature_bins, fc=fc)
    full_dataset = _KNOWN_DATASETS[model_dims](dataset_dir, bins=feature_bins, fc=fc)
    dataset_size = len(full_dataset)
    train_size = int(train_ratio * dataset_size)
    test_size = dataset_size - train_size

    # 使用 random_split 函数拆分数据集
    train_dataset, test_dataset = random_split_dataset(full_dataset, train_size, test_size, trans_val=TRANSFORM2_val if data_2d else TRANSFORM_val)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
    train_dataset, test_dataset = random_split_dataset(
        full_dataset, train_size, test_size,
        trans_val=TRANSFORM2_VAL if full_dataset.__dims__ == 2 else TRANSFORM_VAL
    )
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers,
                                  drop_last=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers)

    # Load previous epoch