Unverified Commit 2e8a7961 authored by narugo1992's avatar narugo1992 Committed by GitHub
Browse files

Merge pull request #136 from deepghs/dev/transformers

dev(narugo): prepare for the clip preprocessor
parents 1fa81a1d 92633f4f
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -47,11 +47,13 @@ jobs:
          python -m pip install -r requirements-model.txt
          python -m pip install -r requirements-doc.txt
          python -m pip install -r requirements-torchvision.txt
          python -m pip install -r requirements-transformers.txt
      - name: Prepare dataset
        uses: nick-fields/retry@v2
        if: ${{ github.event_name == 'push' }}
        env:
          CI: 'true'
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        with:
          shell: bash
          timeout_minutes: 20
@@ -64,6 +66,7 @@ jobs:
        env:
          ENV_PROD: 'true'
          PLANTUML_HOST: http://localhost:18080
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          plantumlcli -c
          make docs
@@ -117,11 +120,15 @@ jobs:
          sudo apt-get install -y make wget curl cloc graphviz pandoc
          dot -V
          python -m pip install -r requirements.txt
          python -m pip install -r requirements-model.txt
          python -m pip install -r requirements-doc.txt
          python -m pip install -r requirements-torchvision.txt
          python -m pip install -r requirements-transformers.txt
      - name: Prepare dataset
        uses: nick-fields/retry@v2
        env:
          CI: 'true'
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        with:
          shell: bash
          timeout_minutes: 20
@@ -137,6 +144,7 @@ jobs:
        env:
          ENV_PROD: 'true'
          PLANTUML_HOST: http://localhost:18080
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          git fetch --all --tags
          git branch -av
+1 −0
Original line number Diff line number Diff line
@@ -101,6 +101,7 @@ jobs:
        run: |
          pip install -r requirements-model.txt
          pip install -r requirements-torchvision.txt
          pip install -r requirements-transformers.txt
      - name: Test the basic environment
        shell: bash
        run: |
+1 −0
Original line number Diff line number Diff line
@@ -12,4 +12,5 @@ imgutils.preprocess
    base
    pillow
    torchvision
    transformers
+88 −0
Original line number Diff line number Diff line
imgutils.preprocess.transformers
===========================================

.. currentmodule:: imgutils.preprocess.transformers

.. automodule:: imgutils.preprocess.transformers


register_creators_for_transformers
--------------------------------------------------------------------

.. autofunction:: register_creators_for_transformers



NotProcessorTypeError
--------------------------------------------------------------------

.. autoclass:: NotProcessorTypeError



create_transforms_from_transformers
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_transformers




create_clip_transforms
--------------------------------------------------------------------

.. autofunction:: create_clip_transforms



create_transforms_from_clip_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_clip_processor




create_convnext_transforms
--------------------------------------------------------------------

.. autofunction:: create_convnext_transforms



create_transforms_from_convnext_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_convnext_processor



create_vit_transforms
--------------------------------------------------------------------

.. autofunction:: create_vit_transforms



create_transforms_from_vit_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_vit_processor



create_siglip_transforms
--------------------------------------------------------------------

.. autofunction:: create_siglip_transforms



create_transforms_from_siglip_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_siglip_processor



+56 −0
Original line number Diff line number Diff line
import re
import warnings

import pandas as pd
import transformers
from hfutils.operate import get_hf_client

from imgutils.preprocess.transformers.base import _FN_CREATORS

hf_client = get_hf_client()
df = pd.read_parquet(hf_client.hf_hub_download(
    repo_id='deepghs/hf_models_preprocessors',
    repo_type='dataset',
    filename='repos.parquet'
))
df = df[~df['image_processor_type'].isnull()]
df = df.sort_values(by=['likes'], ascending=[False])

d_repo_count = {
    item['image_processor_type']: item['count']
    for item in df['image_processor_type'].value_counts().reset_index().to_dict('records')
}

d_create_functions = {}
for xfn in _FN_CREATORS:
    xname = xfn.__name__
    matching = re.fullmatch('^create_transforms_from_(?P<name>[\s\S]+)_processor$', xname)
    if not matching:
        warnings.warn(f'Cannot determine transformer type of {xfn!r}.')
        continue
    raw_name = matching.group('name').replace('_', '').lower()
    d_create_functions[raw_name] = xname

suffix = 'ImageProcessor'

rows = []
for name in dir(transformers):
    if name.endswith(suffix) and isinstance(getattr(transformers, name), type) \
            and issubclass(getattr(transformers, name), transformers.BaseImageProcessor) \
            and getattr(transformers, name) is not transformers.BaseImageProcessor:
        cls = getattr(transformers, name)
        pname = name[:-len(suffix)].lower()

        rows.append({
            'Name': name,
            'Supported': '' if pname in d_create_functions else '',
            'Repos': d_repo_count.get(name, 0),
            'Function': f':func:`{d_create_functions[pname]}`' if pname in d_create_functions else 'N/A'
        })

df = pd.DataFrame(rows)
total = df['Repos'].sum()
df = df[df['Repos'] >= 5]
df = df.sort_values(by=['Repos', 'Supported', 'Name'], ascending=[False, True, True])
df['Repos'] = df['Repos'].map(lambda x: f'{x} ({x / total * 100.0:.2f}%)')
print(df.to_markdown(headers='keys', tablefmt='rst', index=False))
Loading