Unverified Commit 705a93a4 authored by hyrulelinks's avatar hyrulelinks Committed by GitHub
Browse files

Update shell script to build macos app

parent 1f2ad36b
Loading
Loading
Loading
Loading
+160 −0
Original line number Diff line number Diff line
@@ -60,8 +60,8 @@ else
    exit 1
fi

# Download extra data files
echo "STEP 4: Download data files."
# Check data file hash
echo "STEP 4: Check data file hash."

# Function to calculate file hash
calculate_hash() {
@@ -69,165 +69,65 @@ calculate_hash() {
    shasum -a 256 "$file_path" | cut -d ' ' -f 1
}

# Function to download and process files
download_and_process_files() {
# Function to check file hash
check_file_hash() {
    local files=(
        'postag.zip|https://github.com/lancopku/pkuseg-python/releases/download/v0.0.16|zip|features.pkl|features.pkl|data/models/pkuseg/postag|17d734c186a0f6e76d15f4990e766a00eed5f72bea099575df23677435ee749d'
        'postag.zip|https://github.com/lancopku/pkuseg-python/releases/download/v0.0.16|zip|weights.npz|weights.npz|data/models/pkuseg/postag|2bbd53b366be82a1becedb4d29f76296b36ad7560b6a8c85d54054900336d59a'
        'spacy_ontonotes.zip|https://github.com/explosion/spacy-pkuseg/releases/download/v0.0.26|zip|features.msgpack|features.msgpack|data/models/pkuseg/spacy_ontonotes|fd4322482a7018b9bce9216173ae9d2848efe6d310b468bbb4383fb55c874a18'
        'spacy_ontonotes.zip|https://github.com/explosion/spacy-pkuseg/releases/download/v0.0.26|zip|weights.npz|weights.npz|data/models/pkuseg/spacy_ontonotes|5ada075eb25a854f71d6e6fa4e7d55e7be0ae049255b1f8f19d05c13b1b68c9e'
        'alphabet-all-v5.txt|data|c1295ae1962e69e35b5b225a0405d1f3432e368c9941d23bfd3acda12654da33'
        'alphabet-all-v7.txt|data|f5722368146aa0fbcc9f4726866e4efc3203318ebb66c811d8cbbe915576538a'
        'macos_libopencv_world.4.8.0.dylib|data/libs|843704ab096d3afd8709abe2a2c525ce3a836bb0a629ed1ee9b8f5cee9938310'
        'macos_libpatchmatch_inpaint.dylib|data/libs|849ca84759385d410c9587d69690e668822a3fc376ce2219e583e7e0be5b5e9a'
        'aot_inpainter.ckpt|data/models|878d541c68648969bc1b042a6e997f3a58e49b6c07c5636ad55130736977149f'
        'comictextdetector.pt|data/models|1f90fa60aeeb1eb82e2ac1167a66bf139a8a61b8780acd351ead55268540cccb'
        'comictextdetector.pt.onnx|data/models|1a86ace74961413cbd650002e7bb4dcec4980ffa21b2f19b86933372071d718f'
        'lama_large_512px.ckpt|data/models|11d30fbb3000fb2eceae318b75d9ced9229d99ae990a7f8b3ac35c8d31f2c935'
        'lama_mpe.ckpt|data/models|d625aa1b3e0d0408acfd6928aa84f005867aa8dbb9162480346a4e20660786cc'
        'config.json|data/models/manga-ocr-base|8c0e395de8fa699daaac21aee33a4ba9bd1309cfbff03147813d2a025f39f349'
        'preprocessor_config.json|data/models/manga-ocr-base|af4eb4d79cf61b47010fc0bc9352ee967579c417423b4917188d809b7e048948'
        'pytorch_model.bin|data/models/manga-ocr-base|c63e0bb5b3ff798c5991de18a8e0956c7ee6d1563aca6729029815eda6f5c2eb'
        'README.md|data/models/manga-ocr-base|32f413afcc4295151e77d25202c5c5d81ef621b46f947da1c3bde13256dc0d5f'
        'special_tokens_map.json|data/models/manga-ocr-base|303df45a03609e4ead04bc3dc1536d0ab19b5358db685b6f3da123d05ec200e3'
        'tokenizer_config.json|data/models/manga-ocr-base|d775ad1deac162dc56b84e9b8638f95ed8a1f263d0f56f4f40834e26e205e266'
        'vocab.txt|data/models/manga-ocr-base|344fbb6b8bf18c57839e924e2c9365434697e0227fac00b88bb4899b78aa594d'
        'mit32px_ocr.ckpt|data/models|d9f619a9dccce8ce88357d1b17d25f07806f225c033ea42c64e86c45446cfe71'
        'mit48pxctc_ocr.ckpt|data/models|8b0837a24da5fde96c23ca47bb7abd590cd5b185c307e348c6e0b7238178ed89'
        'ocr_ar_48px.ckpt|data/models|29daa46d080818bb4ab239a518a88338cbccff8f901bef8c9db191a7cb97671d'
        'features.pkl|data/models/pkuseg/postag|17d734c186a0f6e76d15f4990e766a00eed5f72bea099575df23677435ee749d'
        'weights.npz|data/models/pkuseg/postag|2bbd53b366be82a1becedb4d29f76296b36ad7560b6a8c85d54054900336d59a'
        'features.msgpack|data/models/pkuseg/spacy_ontonotes|fd4322482a7018b9bce9216173ae9d2848efe6d310b468bbb4383fb55c874a18'
        'weights.npz|data/models/pkuseg/spacy_ontonotes|5ada075eb25a854f71d6e6fa4e7d55e7be0ae049255b1f8f19d05c13b1b68c9e'
        'pkusegscores.json|data|ca6b8c6b8ba70d4370b0f2de6bd128ebb0f5f64ff06f01ba6358e49a776b0c3f'
    )
        
    # Iterate through file information
    for file_info in "${files[@]}"; do
        IFS='|' read -r -a file_data <<< "$file_info"
        source_file="${file_data[0]}"
        source_file_base_url="${file_data[1]}"
        is_zip="${file_data[2]}"
        unzip_file="${file_data[3]}"
        target_file="${file_data[4]}"
        target_dir="${file_data[5]}"
        target_file_expected_hash="${file_data[6]}"
        
        # Combine source file and base URL to get download URL
        local download_url="$source_file_base_url/$source_file"
        
        # Check if target_file exists and verify hash if it does
        if [ -e "$target_dir/$target_file" ]; then
            echo "INFO: $target_file already exists, verifying hash..."
            computed_hash=$(calculate_hash "$target_dir/$target_file")
            if [ "$computed_hash" == "$target_file_expected_hash" ]; then
                echo "INFO: ✅ Existing $target_file hash verification passed."
                continue
        target_file="${file_data[0]}"
        target_dir="${file_data[1]}"
        target_precalculated_hash="${file_data[2]}"
        target_file_path="$target_dir/$target_file"

        # Check if $target_file exists
        if [ -e "$target_file_path" ]; then
            target_computed_hash=$(calculate_hash "$target_file_path")
            
            # Compare hashes
            if [ "$target_computed_hash" == "$target_precalculated_hash" ]; then
                echo "INFO: ✅ $target_file found and hash matches."
            else
                echo "WARNING: ❌ Existing $target_file hash verification failed."
                rm -rf "$target_dir/$target_file"
            fi
        fi
            
        # Download and process accordingly based on is_zip and unzip_file
        echo "INFO: Downloading $target_file..."
        if [[ "$is_zip" == "zip" ]]; then
            curl -L "$download_url" -o "$source_file"
            unzip -j "$source_file" "$unzip_file" -d "$target_dir"
            if [ "$unzip_file" != "$target_file" ]; then
                # Rename the file
                mv "$target_dir/$unzip_file" "$target_dir/$target_file"
            fi
            rm -rf "$source_file"
        else
            curl -L "$download_url" -o "$target_dir/$target_file"
        fi
        
        # Calculate hash after download and processing
        downloaded_file_hash=$(calculate_hash "$target_dir/$target_file")
    
        # Check if hash matches expected hash
        if [ "$downloaded_file_hash" == "$target_file_expected_hash" ]; then
            echo "INFO: ✅ Downloaded $target_file hash verification passed."
            continue
        else
            echo "WARNING: ❌ Downloaded $target_file hash verification failed."
            # Remove the existing file
            rm -f "$target_dir/$target_file"

            # Redownload the file
            if [[ "$is_zip" == "zip" ]]; then
                curl -L "$download_url" -o "$source_file"
                unzip -j "$source_file" "$unzip_file" -d "$target_dir"
                if [ "$unzip_file" != "$target_file" ]; then
                    mv "$target_dir/$unzip_file" "$target_dir/$target_file"
                fi
                rm -f "$source_file"
            else
                curl -L "$download_url" -o "$target_dir/$target_file"
                echo "WARNING: ❌ $target_file found but hash mismatches."
                echo "INFO: Expected hash: $target_precalculated_hash"
                echo "INFO: Computed hash: $target_computed_hash"
                exit 1
            fi
            
            # Calculate hash after re-download
            redownloaded_file_hash=$(calculate_hash "$target_dir/$target_file")
            
            # Check if hash matches expected hash after re-download
            if [ "$redownloaded_file_hash" == "$target_file_expected_hash" ]; then
                echo "INFO: ✅ Re-downloaded $target_file hash verification passed."
                continue
        else
                echo "WARNING: ❌ Re-downloaded $target_file hash verification failed."
                echo "ERROR: ❌ Unable to download $target_file. Exiting."
            echo "WARNING: ❌ $target_file not found at $target_file_path."
            exit 1
        fi
        fi
    done
}

# Function to thin libraries based on system architecture
thin_liarary_files() {
    local arch=$(uname -m)
    
    # Thin multi-architecture library files into compatible single arch libraries
    echo "INFO: System architecture is $arch."
    echo "INFO: Extracting architecture specific libraries..."
    if [ "$arch" = "arm64" ]; then
        ditto --arch arm64 "$LIBS_DIR/libopencv_world.4.4.0.dylib" "$LIBS_DIR/libopencv_world2.4.4.0.dylib"
        ditto --arch arm64 "$LIBS_DIR/libpatchmatch_inpaint.dylib" "$LIBS_DIR/libpatchmatch_inpaint2.dylib"
    else
        ditto --arch x86_64 "$LIBS_DIR/libopencv_world.4.4.0.dylib" "$LIBS_DIR/libopencv_world2.4.4.0.dylib"
        ditto --arch x86_64 "$LIBS_DIR/libpatchmatch_inpaint.dylib" "$LIBS_DIR/libpatchmatch_inpaint2.dylib"
    fi
    
    # Remove fat libraries
    rm "$LIBS_DIR/libopencv_world.4.4.0.dylib" "$LIBS_DIR/libpatchmatch_inpaint.dylib"
    mv "$LIBS_DIR/libopencv_world2.4.4.0.dylib" "$LIBS_DIR/libopencv_world.4.4.0.dylib"
    mv "$LIBS_DIR/libpatchmatch_inpaint2.dylib" "$LIBS_DIR/libpatchmatch_inpaint.dylib"
    
    echo "INFO: ✅ Single architecture library files generated."
}

# Call the download functions
download_and_process_files
thin_liarary_files

# Checklist of extra data files
check_list="
data/alphabet-all-v5.txt
$LIBS_DIR/libopencv_world.4.4.0.dylib
$LIBS_DIR/libpatchmatch_inpaint.dylib
$MODELS_DIR/aot_inpainter.ckpt
$MODELS_DIR/comictextdetector.pt
$MODELS_DIR/comictextdetector.pt.onnx
$MODELS_DIR/lama_mpe.ckpt
$MANGA_OCR_BASE_DIR/README.md
$MANGA_OCR_BASE_DIR/config.json
$MANGA_OCR_BASE_DIR/preprocessor_config.json
$MANGA_OCR_BASE_DIR/pytorch_model.bin
$MANGA_OCR_BASE_DIR/special_tokens_map.json
$MANGA_OCR_BASE_DIR/tokenizer_config.json
$MANGA_OCR_BASE_DIR/vocab.txt
$MODELS_DIR/mit32px_ocr.ckpt
$MODELS_DIR/mit48pxctc_ocr.ckpt
$POSTAG_DIR/features.pkl
$POSTAG_DIR/weights.npz
$SPACY_ONTONOTES_DIR
$SPACY_ONTONOTES_DIR/features.msgpack
$SPACY_ONTONOTES_DIR/weights.npz
data/pkusegscores.json
"

# Validate extra data files exist
echo "STEP 5: Validate data files exist."
fail=false
for item in $check_list; do
    if [ ! -e "$item" ]; then
        echo "ERROR: ❌ $item not found"
        fail=true
    fi
done
 
if [ "$fail" = true ]; then
    echo "ERROR: ❌ Data files check failed. Exiting."
    exit 1
else
    echo "INFO: ✅ Data files all exist."
fi
# Call functions
check_file_hash

# Install Python dependencies
echo "STEP 6: Install Python dependencies."