#!/bin/bash

# =============================================================================
# Duplicate Finder Accuracy Test Dataset Generator
# =============================================================================
# Creates a test dataset with:
# - 600 true duplicates (200 groups × 3 files)
# - 154 trap files (should NOT be detected as duplicates)
# - 97 edge case files (90 duplicates + 7 trap bundles)
#
# Expected result: 230 duplicate groups, 0 false positives
# =============================================================================

set -e

# Configuration
BASE_DIR="${1:-$HOME/DuplicateTest/Dataset_01_AccuracyTest}"
MANIFEST_FILE="$BASE_DIR/manifest.json"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Counters
TOTAL_FILES=0
TOTAL_GROUPS=0
declare -a MANIFEST_GROUPS=()
declare -a MANIFEST_TRAPS=()

# 0 = case-insensitive FS (default on many macOS volumes), 1 = case-sensitive FS
FS_CASE_SENSITIVE=0

# =============================================================================
# Helper Functions
# =============================================================================

log_info() {
    echo -e "${BLUE}[INFO]${NC} $1"
}

log_success() {
    echo -e "${GREEN}[OK]${NC} $1"
}

log_warning() {
    echo -e "${YELLOW}[WARN]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# Detect whether the filesystem at a given path is case-sensitive.
# Prints "1" if case-sensitive, "0" if case-insensitive.
detect_case_sensitivity() {
    local path="$1"
    local test_dir="$path/.fs_case_test"
    mkdir -p "$test_dir"

    local f1="$test_dir/.case_test"
    local f2="$test_dir/.CASE_TEST"
    rm -f "$f1" "$f2" 2>/dev/null || true

    echo "a" > "$f1" 2>/dev/null || true
    echo "b" > "$f2" 2>/dev/null || true

    # On case-sensitive FS we should see BOTH distinct names.
    local count
    count=$(ls -1 "$test_dir" 2>/dev/null | grep -E '^\.(case_test|CASE_TEST)$' | wc -l | tr -d ' ')

    rm -f "$f1" "$f2" 2>/dev/null || true
    rmdir "$test_dir" 2>/dev/null || true

    if [[ "$count" == "2" ]]; then
        echo "1"
    else
        echo "0"
    fi
}

generate_random_content() {
    local size="$1"
    local output_file="$2"

    if [[ "$size" -eq 0 ]]; then
        : > "$output_file"
        return
    fi

    # Fast on macOS: reads N bytes in large chunks internally
    head -c "$size" /dev/urandom > "$output_file"
}

# Wrapper to match signature: (output_file, content_id, size)
generate_random_content_wrapper() {
    local output_file="$1"
    local _content_id="$2"   # not used
    local size="$3"
    generate_random_content "$size" "$output_file"
}

# Generate a simple text-based "image" file (for testing purposes)
generate_test_image() {
    local output_file=$1
    local content_id=$2
    local size=${3:-10240}  # Default 10KB

    # Create file with identifiable header + random content
    {
        echo "TEST_IMAGE_FILE_${content_id}"
        echo "Created: $(date)"
        dd if=/dev/urandom bs=1 count=$((size - 100)) 2>/dev/null
    } > "$output_file"
}

# Generate a simple test document
generate_test_document() {
    local output_file=$1
    local content_id=$2
    local size=${3:-5120}  # Default 5KB

    {
        echo "%PDF-TEST-${content_id}"
        echo "Document content for testing"
        dd if=/dev/urandom bs=1 count=$((size - 100)) 2>/dev/null
    } > "$output_file"
}

# Generate test video file
generate_test_video() {
    local output_file=$1
    local content_id=$2
    local size=${3:-102400}  # Default 100KB

    {
        echo "RIFF_VIDEO_TEST_${content_id}"
        dd if=/dev/urandom bs=1 count=$((size - 50)) 2>/dev/null
    } > "$output_file"
}

# Generate test audio file
generate_test_audio() {
    local output_file=$1
    local content_id=$2
    local size=${3:-51200}  # Default 50KB

    {
        echo "ID3_AUDIO_TEST_${content_id}"
        dd if=/dev/urandom bs=1 count=$((size - 50)) 2>/dev/null
    } > "$output_file"
}

# Generate test archive
generate_test_archive() {
    local output_file=$1
    local content_id=$2
    local size=${3:-20480}  # Default 20KB

    {
        printf 'PK\x03\x04'  # ZIP magic bytes
        echo "TEST_ARCHIVE_${content_id}"
        dd if=/dev/urandom bs=1 count=$((size - 50)) 2>/dev/null
    } > "$output_file"
}

# Generate test text file
generate_test_text() {
    local output_file=$1
    local content_id=$2
    local size=${3:-1024}  # Default 1KB

    {
        echo "Text file content ID: ${content_id}"
        echo "Generated for duplicate finder testing"
        echo "Random content follows:"
        dd if=/dev/urandom bs=1 count=$((size - 100)) 2>/dev/null | base64
    } > "$output_file"
}

# Generate test code file
generate_test_code() {
    local output_file=$1
    local content_id=$2
    local ext=$3

    case $ext in
        py)
            cat > "$output_file" << EOF
#!/usr/bin/env python3
# Test file ${content_id}
# Generated for duplicate finder testing

def main():
    content_id = "${content_id}"
    print(f"Content ID: {content_id}")
    # Random data: $(dd if=/dev/urandom bs=1 count=100 2>/dev/null | base64)

if __name__ == "__main__":
    main()
EOF
            ;;
        js)
            cat > "$output_file" << EOF
// Test file ${content_id}
// Generated for duplicate finder testing

const contentId = "${content_id}";
console.log(\`Content ID: \${contentId}\`);
// Random: $(dd if=/dev/urandom bs=1 count=100 2>/dev/null | base64)
EOF
            ;;
        swift)
            cat > "$output_file" << EOF
// Test file ${content_id}
// Generated for duplicate finder testing

import Foundation

let contentId = "${content_id}"
print("Content ID: \\(contentId)")
// Random: $(dd if=/dev/urandom bs=1 count=100 2>/dev/null | base64)
EOF
            ;;
    esac
}

# Create duplicate group (3 files)
create_duplicate_group() {
    local group_id=$1
    local category=$2
    local type=$3
    local base_dir=$4
    local file_ext=$5
    local generator_func=$6
    local size=${7:-10240}

    local folder_a="$base_dir/folder_A"
    local folder_b="$base_dir/folder_B"
    mkdir -p "$folder_a" "$folder_b"

    local base_name="file_${group_id}"
    local original="$folder_a/${base_name}.${file_ext}"
    local copy1="$folder_a/${base_name}_copy.${file_ext}"
    local copy2="$folder_b/${base_name}.${file_ext}"

    # Generate original
    $generator_func "$original" "$group_id" "$size"

    # Create exact copies
    cp "$original" "$copy1"
    cp "$original" "$copy2"

    # Calculate hash
    local hash=$(shasum -a 256 "$original" | cut -d' ' -f1)

    # Add to manifest
    MANIFEST_GROUPS+=("{\"id\":\"G${group_id}\",\"category\":\"${category}\",\"type\":\"${type}\",\"files\":[\"${original#$BASE_DIR/}\",\"${copy1#$BASE_DIR/}\",\"${copy2#$BASE_DIR/}\"],\"hash_sha256\":\"${hash}\"}")

    TOTAL_FILES=$((TOTAL_FILES + 3))
    TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
}

# Create trap set (2 + 5 files that are NOT duplicates)
create_trap_set() {
    local trap_id=$1
    local category=$2
    local type=$3
    local base_dir=$4
    local file_ext=$5
    local generator_func=$6
    local size=${7:-10240}
    local name_pattern=${8:-"file"}

    mkdir -p "$base_dir/set_A" "$base_dir/set_B"

    local files_a=()
    local files_b=()

    # Set A: 2 files
    for i in 1 2; do
        local fname="$base_dir/set_A/${name_pattern}_${i}.${file_ext}"
        $generator_func "$fname" "${trap_id}_A_${i}" "$size"
        files_a+=("${fname#$BASE_DIR/}")
        TOTAL_FILES=$((TOTAL_FILES + 1))
    done

    # Set B: 5 files
    for i in 1 2 3 4 5; do
        local fname="$base_dir/set_B/${name_pattern}_${i}.${file_ext}"
        $generator_func "$fname" "${trap_id}_B_${i}" "$size"
        files_b+=("${fname#$BASE_DIR/}")
        TOTAL_FILES=$((TOTAL_FILES + 1))
    done

    # Add to manifest traps
    local files_json_a=$(printf ',"%s"' "${files_a[@]}")
    files_json_a="[${files_json_a:1}]"
    local files_json_b=$(printf ',"%s"' "${files_b[@]}")
    files_json_b="[${files_json_b:1}]"

    MANIFEST_TRAPS+=("{\"id\":\"TRAP_${trap_id}_A\",\"category\":\"${category}\",\"type\":\"${type}\",\"reason\":\"Different content\",\"files\":${files_json_a}}")
    MANIFEST_TRAPS+=("{\"id\":\"TRAP_${trap_id}_B\",\"category\":\"${category}\",\"type\":\"${type}\",\"reason\":\"Different content\",\"files\":${files_json_b}}")
}

# =============================================================================
# Part 1: True Duplicates
# =============================================================================

create_part1_simple_duplicates() {
    log_info "Creating Part 1.1: Simple duplicates..."

    local base="$BASE_DIR/01_true_duplicates/1.1_simple"
    local group_id=1

    # Photos JPEG (20 groups)
    log_info "  - Photos JPEG (20 groups)..."
    for i in $(seq 1 20); do
        create_duplicate_group "$(printf '%04d' $group_id)" "1.1_simple" "photos_jpeg" \
            "$base/photos_jpeg/group_$i" "jpg" generate_test_image 15360
        group_id=$((group_id + 1))
    done

    # Photos PNG (10 groups)
    log_info "  - Photos PNG (10 groups)..."
    for i in $(seq 1 10); do
        create_duplicate_group "$(printf '%04d' $group_id)" "1.1_simple" "photos_png" \
            "$base/photos_png/group_$i" "png" generate_test_image 20480
        group_id=$((group_id + 1))
    done

    # Documents PDF (15 groups)
    log_info "  - Documents PDF (15 groups)..."
    for i in $(seq 1 15); do
        create_duplicate_group "$(printf '%04d' $group_id)" "1.1_simple" "documents_pdf" \
            "$base/documents_pdf/group_$i" "pdf" generate_test_document 10240
        group_id=$((group_id + 1))
    done

    # Videos MP4 (10 groups)
    log_info "  - Videos MP4 (10 groups)..."
    for i in $(seq 1 10); do
        create_duplicate_group "$(printf '%04d' $group_id)" "1.1_simple" "videos_mp4" \
            "$base/videos_mp4/group_$i" "mp4" generate_test_video 102400
        group_id=$((group_id + 1))
    done

    # Audio MP3 (10 groups)
    log_info "  - Audio MP3 (10 groups)..."
    for i in $(seq 1 10); do
        create_duplicate_group "$(printf '%04d' $group_id)" "1.1_simple" "audio_mp3" \
            "$base/audio_mp3/group_$i" "mp3" generate_test_audio 51200
        group_id=$((group_id + 1))
    done

    # Archives ZIP (10 groups)
    log_info "  - Archives ZIP (10 groups)..."
    for i in $(seq 1 10); do
        create_duplicate_group "$(printf '%04d' $group_id)" "1.1_simple" "archives_zip" \
            "$base/archives_zip/group_$i" "zip" generate_test_archive 30720
        group_id=$((group_id + 1))
    done

    # Text TXT (10 groups)
    log_info "  - Text TXT (10 groups)..."
    for i in $(seq 1 10); do
        create_duplicate_group "$(printf '%04d' $group_id)" "1.1_simple" "text_txt" \
            "$base/text_txt/group_$i" "txt" generate_test_text 2048
        group_id=$((group_id + 1))
    done

    # Code files (15 groups: 5 py, 5 js, 5 swift)
    log_info "  - Code files (15 groups)..."
    for i in $(seq 1 5); do
        local dir="$base/code/group_py_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        generate_test_code "$dir/folder_A/script_${group_id}.py" "$group_id" "py"
        cp "$dir/folder_A/script_${group_id}.py" "$dir/folder_A/script_${group_id}_copy.py"
        cp "$dir/folder_A/script_${group_id}.py" "$dir/folder_B/script_${group_id}.py"
        local hash=$(shasum -a 256 "$dir/folder_A/script_${group_id}.py" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.1_simple\",\"type\":\"code_py\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/script_${group_id}.py\",\"${dir#$BASE_DIR/}/folder_A/script_${group_id}_copy.py\",\"${dir#$BASE_DIR/}/folder_B/script_${group_id}.py\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    for i in $(seq 1 5); do
        local dir="$base/code/group_js_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        generate_test_code "$dir/folder_A/script_${group_id}.js" "$group_id" "js"
        cp "$dir/folder_A/script_${group_id}.js" "$dir/folder_A/script_${group_id}_copy.js"
        cp "$dir/folder_A/script_${group_id}.js" "$dir/folder_B/script_${group_id}.js"
        local hash=$(shasum -a 256 "$dir/folder_A/script_${group_id}.js" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.1_simple\",\"type\":\"code_js\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/script_${group_id}.js\",\"${dir#$BASE_DIR/}/folder_A/script_${group_id}_copy.js\",\"${dir#$BASE_DIR/}/folder_B/script_${group_id}.js\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    for i in $(seq 1 5); do
        local dir="$base/code/group_swift_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        generate_test_code "$dir/folder_A/script_${group_id}.swift" "$group_id" "swift"
        cp "$dir/folder_A/script_${group_id}.swift" "$dir/folder_A/script_${group_id}_copy.swift"
        cp "$dir/folder_A/script_${group_id}.swift" "$dir/folder_B/script_${group_id}.swift"
        local hash=$(shasum -a 256 "$dir/folder_A/script_${group_id}.swift" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.1_simple\",\"type\":\"code_swift\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/script_${group_id}.swift\",\"${dir#$BASE_DIR/}/folder_A/script_${group_id}_copy.swift\",\"${dir#$BASE_DIR/}/folder_B/script_${group_id}.swift\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    log_success "Part 1.1 complete: 100 groups, 300 files"
}

create_part1_metadata_duplicates() {
    log_info "Creating Part 1.2: Duplicates with changed metadata..."

    local base="$BASE_DIR/01_true_duplicates/1.2_metadata_changed"
    local group_id=101

    # Renamed files (20 groups)
    log_info "  - Renamed files (20 groups)..."
    for i in $(seq 1 20); do
        local dir="$base/renamed/group_$i"
        mkdir -p "$dir"
        generate_test_image "$dir/original_$i.jpg" "meta_rename_$i" 10240
        cp "$dir/original_$i.jpg" "$dir/IMG_$(printf '%04d' $i).jpg"
        cp "$dir/original_$i.jpg" "$dir/DSC_$(printf '%05d' $i).jpg"
        local hash=$(shasum -a 256 "$dir/original_$i.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"renamed\",\"files\":[\"${dir#$BASE_DIR/}/original_$i.jpg\",\"${dir#$BASE_DIR/}/IMG_$(printf '%04d' $i).jpg\",\"${dir#$BASE_DIR/}/DSC_$(printf '%05d' $i).jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Extension case (10 groups)
    log_info "  - Extension case variations (10 groups)..."
    for i in $(seq 1 10); do
        local dir="$base/extension_case/group_$i"
        mkdir -p "$dir"
        generate_test_image "$dir/photo_$i.jpg" "meta_ext_$i" 10240

        # macOS default volumes are often case-insensitive, so "photo_1.jpg" and
        # "photo_1.JPG" would collide and cp would fail (and, with set -e, stop the script).
        # Fallback: keep different extension casing, but make the base name distinct.
        if [[ "$FS_CASE_SENSITIVE" == "1" ]]; then
            cp "$dir/photo_$i.jpg" "$dir/photo_$i.JPG"
            cp "$dir/photo_$i.jpg" "$dir/photo_$i.Jpg"
            local f2="photo_$i.JPG"
            local f3="photo_$i.Jpg"
        else
            cp "$dir/photo_$i.jpg" "$dir/photo_${i}_upper.JPG"
            cp "$dir/photo_$i.jpg" "$dir/photo_${i}_mixed.Jpg"
            local f2="photo_${i}_upper.JPG"
            local f3="photo_${i}_mixed.Jpg"
        fi

        local hash=$(shasum -a 256 "$dir/photo_$i.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"extension_case\",\"files\":[\"${dir#$BASE_DIR/}/photo_$i.jpg\",\"${dir#$BASE_DIR/}/$f2\",\"${dir#$BASE_DIR/}/$f3\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Date modified (20 groups)
    log_info "  - Different modification dates (20 groups)..."
    for i in $(seq 1 20); do
        local dir="$base/date_modified/group_$i"
        mkdir -p "$dir"
        generate_test_document "$dir/doc_$i.pdf" "meta_date_$i" 8192
        cp "$dir/doc_$i.pdf" "$dir/doc_${i}_old.pdf"
        cp "$dir/doc_$i.pdf" "$dir/doc_${i}_new.pdf"
        touch -t 202301010000 "$dir/doc_${i}_old.pdf"
        touch -t 202501010000 "$dir/doc_${i}_new.pdf"
        local hash=$(shasum -a 256 "$dir/doc_$i.pdf" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"date_modified\",\"files\":[\"${dir#$BASE_DIR/}/doc_$i.pdf\",\"${dir#$BASE_DIR/}/doc_${i}_old.pdf\",\"${dir#$BASE_DIR/}/doc_${i}_new.pdf\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Date created (10 groups)
    log_info "  - Different creation dates (10 groups)..."
    for i in $(seq 1 10); do
        local dir="$base/date_created/group_$i"
        mkdir -p "$dir"
        generate_test_image "$dir/image_$i.png" "meta_birth_$i" 15360
        cp "$dir/image_$i.png" "$dir/image_${i}_v1.png"
        cp "$dir/image_$i.png" "$dir/image_${i}_v2.png"
        # SetFile not available on all systems, using touch as fallback
        touch -t 202201010000 "$dir/image_${i}_v1.png" 2>/dev/null || true
        touch -t 202401010000 "$dir/image_${i}_v2.png" 2>/dev/null || true
        local hash=$(shasum -a 256 "$dir/image_$i.png" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"date_created\",\"files\":[\"${dir#$BASE_DIR/}/image_$i.png\",\"${dir#$BASE_DIR/}/image_${i}_v1.png\",\"${dir#$BASE_DIR/}/image_${i}_v2.png\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Permissions (10 groups)
    log_info "  - Different permissions (10 groups)..."
    for i in $(seq 1 10); do
        local dir="$base/permissions/group_$i"
        mkdir -p "$dir"
        generate_test_text "$dir/config_$i.txt" "meta_perm_$i" 1024
        cp "$dir/config_$i.txt" "$dir/config_${i}_644.txt"
        cp "$dir/config_$i.txt" "$dir/config_${i}_755.txt"
        chmod 644 "$dir/config_${i}_644.txt"
        chmod 755 "$dir/config_${i}_755.txt"
        local hash=$(shasum -a 256 "$dir/config_$i.txt" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"permissions\",\"files\":[\"${dir#$BASE_DIR/}/config_$i.txt\",\"${dir#$BASE_DIR/}/config_${i}_644.txt\",\"${dir#$BASE_DIR/}/config_${i}_755.txt\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Extended attributes (10 groups)
    log_info "  - Different xattr (10 groups)..."
    for i in $(seq 1 10); do
        local dir="$base/xattr/group_$i"
        mkdir -p "$dir"
        generate_test_document "$dir/file_$i.pdf" "meta_xattr_$i" 5120
        cp "$dir/file_$i.pdf" "$dir/file_${i}_tagged.pdf"
        cp "$dir/file_$i.pdf" "$dir/file_${i}_colored.pdf"
        xattr -w com.apple.metadata:kMDItemFinderComment "Tagged file" "$dir/file_${i}_tagged.pdf" 2>/dev/null || true
        xattr -w com.apple.FinderInfo "0000000000000000000C00000000000000000000000000000000000000000000" "$dir/file_${i}_colored.pdf" 2>/dev/null || true
        local hash=$(shasum -a 256 "$dir/file_$i.pdf" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"xattr\",\"files\":[\"${dir#$BASE_DIR/}/file_$i.pdf\",\"${dir#$BASE_DIR/}/file_${i}_tagged.pdf\",\"${dir#$BASE_DIR/}/file_${i}_colored.pdf\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Hidden folders (10 groups)
    log_info "  - Files in hidden folders (10 groups)..."
    for i in $(seq 1 10); do
        local dir="$base/hidden_folder/group_$i"
        mkdir -p "$dir/visible" "$dir/.hidden" "$dir/.secret"
        generate_test_image "$dir/visible/photo_$i.jpg" "meta_hidden_$i" 12288
        cp "$dir/visible/photo_$i.jpg" "$dir/.hidden/photo_$i.jpg"
        cp "$dir/visible/photo_$i.jpg" "$dir/.secret/photo_$i.jpg"
        local hash=$(shasum -a 256 "$dir/visible/photo_$i.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"hidden_folder\",\"files\":[\"${dir#$BASE_DIR/}/visible/photo_$i.jpg\",\"${dir#$BASE_DIR/}/.hidden/photo_$i.jpg\",\"${dir#$BASE_DIR/}/.secret/photo_$i.jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Dotfiles (10 groups)
    log_info "  - Dotfiles / hidden files (10 groups)..."
    for i in $(seq 1 10); do
        local dir="$base/dotfiles/group_$i"
        mkdir -p "$dir"
        generate_test_text "$dir/config_$i.txt" "meta_dot_$i" 2048
        cp "$dir/config_$i.txt" "$dir/.config_$i.txt"
        cp "$dir/config_$i.txt" "$dir/..config_$i.txt"
        local hash=$(shasum -a 256 "$dir/config_$i.txt" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"1.2_metadata\",\"type\":\"dotfiles\",\"files\":[\"${dir#$BASE_DIR/}/config_$i.txt\",\"${dir#$BASE_DIR/}/.config_$i.txt\",\"${dir#$BASE_DIR/}/..config_$i.txt\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    log_success "Part 1.2 complete: 100 groups, 300 files"
}

# =============================================================================
# Part 2: Traps (NOT duplicates)
# =============================================================================

create_part2_traps() {
    log_info "Creating Part 2: Trap files (NOT duplicates)..."

    local base="$BASE_DIR/02_traps_not_duplicates"
    local trap_id=1

    # 2.1 Same name, different content (28 files = 4 types × 7 files)
    log_info "  - 2.1 Same name, different content..."
    create_trap_set "$trap_id" "2.1_same_name" "documents" "$base/2.1_same_name_diff_content/documents" "pdf" generate_test_document 8192 "report"
    trap_id=$((trap_id + 1))
    create_trap_set "$trap_id" "2.1_same_name" "photos" "$base/2.1_same_name_diff_content/photos" "jpg" generate_test_image 15360 "sunset"
    trap_id=$((trap_id + 1))
    create_trap_set "$trap_id" "2.1_same_name" "configs" "$base/2.1_same_name_diff_content/configs" "gitignore" generate_test_text 512 ".gitignore"
    trap_id=$((trap_id + 1))
    create_trap_set "$trap_id" "2.1_same_name" "readme" "$base/2.1_same_name_diff_content/readme" "md" generate_test_text 1024 "README"
    trap_id=$((trap_id + 1))

    # 2.2 Same size, different content (42 files = 6 sizes × 7 files)
    log_info "  - 2.2 Same size, different content..."
    # 0 bytes
    local dir="$base/2.2_same_size_diff_content/size_0"
    mkdir -p "$dir/set_A" "$dir/set_B"
    for f in 1 2; do touch "$dir/set_A/empty_$f.bin"; TOTAL_FILES=$((TOTAL_FILES + 1)); done
    for f in 1 2 3 4 5; do touch "$dir/set_B/empty_$f.bin"; TOTAL_FILES=$((TOTAL_FILES + 1)); done
    MANIFEST_TRAPS+=("{\"id\":\"TRAP_${trap_id}_A\",\"category\":\"2.2_same_size\",\"type\":\"0_bytes\",\"reason\":\"Zero-byte files\",\"files\":[\"${dir#$BASE_DIR/}/set_A/empty_1.bin\",\"${dir#$BASE_DIR/}/set_A/empty_2.bin\"]}")
    MANIFEST_TRAPS+=("{\"id\":\"TRAP_${trap_id}_B\",\"category\":\"2.2_same_size\",\"type\":\"0_bytes\",\"reason\":\"Zero-byte files\",\"files\":[\"${dir#$BASE_DIR/}/set_B/empty_1.bin\",\"${dir#$BASE_DIR/}/set_B/empty_2.bin\",\"${dir#$BASE_DIR/}/set_B/empty_3.bin\",\"${dir#$BASE_DIR/}/set_B/empty_4.bin\",\"${dir#$BASE_DIR/}/set_B/empty_5.bin\"]}")
    trap_id=$((trap_id + 1))

    # Other sizes
    for size_name in "1KB:1024" "10KB:10240" "100KB:102400" "1MB:1048576" "10MB:10485760"; do
        local name="${size_name%%:*}"
        local bytes="${size_name##*:}"
           # create_trap_set "$trap_id" "2.2_same_size" "$name" "$base/2.2_same_size_diff_content/size_$name" "bin" generate_random_content "$bytes" "file"
        create_trap_set "$trap_id" "2.2_same_size" "$name" "$base/2.2_same_size_diff_content/size_$name" "bin" generate_random_content_wrapper "$bytes" "file"
        trap_id=$((trap_id + 1))
    done

    # 2.3 Similar names, different content (28 files = 4 patterns × 7 files)
    log_info "  - 2.3 Similar names, different content..."
    for pattern in "copy:_copy" "numbered:(1)" "backup:-backup" "dated:_2024"; do
        local pname="${pattern%%:*}"
        local suffix="${pattern##*:}"
        local dir="$base/2.3_similar_names/$pname"
        mkdir -p "$dir/set_A" "$dir/set_B"

        # Set A: 2 files
        generate_test_image "$dir/set_A/photo.jpg" "trap_sim_${trap_id}_A1" 10240
        generate_test_image "$dir/set_A/photo${suffix}.jpg" "trap_sim_${trap_id}_A2" 10240
        TOTAL_FILES=$((TOTAL_FILES + 2))

        # Set B: 5 files
        generate_test_image "$dir/set_B/image.jpg" "trap_sim_${trap_id}_B1" 10240
        generate_test_image "$dir/set_B/image${suffix}.jpg" "trap_sim_${trap_id}_B2" 10240
        generate_test_image "$dir/set_B/image${suffix}${suffix}.jpg" "trap_sim_${trap_id}_B3" 10240
        generate_test_image "$dir/set_B/picture.jpg" "trap_sim_${trap_id}_B4" 10240
        generate_test_image "$dir/set_B/picture${suffix}.jpg" "trap_sim_${trap_id}_B5" 10240
        TOTAL_FILES=$((TOTAL_FILES + 5))

        MANIFEST_TRAPS+=("{\"id\":\"TRAP_${trap_id}\",\"category\":\"2.3_similar_names\",\"type\":\"$pname\",\"reason\":\"Similar names but different content\",\"files\":[\"${dir#$BASE_DIR/}/set_A/photo.jpg\",\"${dir#$BASE_DIR/}/set_A/photo${suffix}.jpg\",\"${dir#$BASE_DIR/}/set_B/image.jpg\",\"${dir#$BASE_DIR/}/set_B/image${suffix}.jpg\"]}")
        trap_id=$((trap_id + 1))
    done

    # 2.4 Truncated files (28 files = 4 types × 7 files)
    log_info "  - 2.4 Truncated files (same beginning, different length)..."
    for ftype in "video:mp4:102400" "audio:mp3:51200" "archive:zip:30720" "document:pdf:20480"; do
        local tname="${ftype%%:*}"
        local ext="${ftype#*:}"
        ext="${ext%%:*}"
        local size="${ftype##*:}"
        local dir="$base/2.4_truncated/$tname"
        mkdir -p "$dir/set_A" "$dir/set_B"

        # Create full file, then truncated versions
        generate_random_content "$size" "$dir/set_A/full.$ext"
        head -c $((size / 2)) "$dir/set_A/full.$ext" > "$dir/set_A/truncated.$ext"
        TOTAL_FILES=$((TOTAL_FILES + 2))

        generate_random_content "$size" "$dir/set_B/complete.$ext"
        for pct in 25 50 75 90; do
            head -c $((size * pct / 100)) "$dir/set_B/complete.$ext" > "$dir/set_B/partial_${pct}pct.$ext"
            TOTAL_FILES=$((TOTAL_FILES + 1))
        done
        TOTAL_FILES=$((TOTAL_FILES + 1))

        MANIFEST_TRAPS+=("{\"id\":\"TRAP_${trap_id}\",\"category\":\"2.4_truncated\",\"type\":\"$tname\",\"reason\":\"Same beginning, different file length\",\"files\":[\"${dir#$BASE_DIR/}/set_A/full.$ext\",\"${dir#$BASE_DIR/}/set_A/truncated.$ext\"]}")
        trap_id=$((trap_id + 1))
    done

    # 2.5 Visually similar (28 files = 4 types × 7 files)
    log_info "  - 2.5 Visually similar but technically different..."
    for vtype in "quality" "pixel_diff" "whitespace" "exif_diff"; do
        local dir="$base/2.5_visually_similar/$vtype"
        mkdir -p "$dir/set_A" "$dir/set_B"

        # Generate unique files that might look similar
        for f in 1 2; do
            generate_test_image "$dir/set_A/image_$f.jpg" "trap_vis_${trap_id}_A_$f" 15360
            TOTAL_FILES=$((TOTAL_FILES + 1))
        done
        for f in 1 2 3 4 5; do
            generate_test_image "$dir/set_B/image_$f.jpg" "trap_vis_${trap_id}_B_$f" 15360
            TOTAL_FILES=$((TOTAL_FILES + 1))
        done

        MANIFEST_TRAPS+=("{\"id\":\"TRAP_${trap_id}\",\"category\":\"2.5_visually_similar\",\"type\":\"$vtype\",\"reason\":\"Visually similar but binary different\",\"files\":[\"${dir#$BASE_DIR/}/set_A/image_1.jpg\",\"${dir#$BASE_DIR/}/set_A/image_2.jpg\"]}")
        trap_id=$((trap_id + 1))
    done

    log_success "Part 2 complete: 154 trap files"
}

# =============================================================================
# Part 3: Edge Cases
# =============================================================================

create_part3_edge_cases() {
    log_info "Creating Part 3: Edge cases..."

    local base="$BASE_DIR/03_edge_cases"
    local group_id=201

    # 3.1 Special names (60 files → 20 groups)
    log_info "  - 3.1 Special file names..."

    # Unicode emoji (3 groups)
    for i in 1 2 3; do
        local dir="$base/3.1_special_names/unicode_emoji/group_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        local emoji_name="📷photo_$i"
        generate_test_image "$dir/folder_A/${emoji_name}.jpg" "edge_emoji_$i" 10240
        cp "$dir/folder_A/${emoji_name}.jpg" "$dir/folder_A/${emoji_name}_copy.jpg"
        cp "$dir/folder_A/${emoji_name}.jpg" "$dir/folder_B/${emoji_name}.jpg"
        local hash=$(shasum -a 256 "$dir/folder_A/${emoji_name}.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.1_special_names\",\"type\":\"unicode_emoji\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/${emoji_name}.jpg\",\"${dir#$BASE_DIR/}/folder_A/${emoji_name}_copy.jpg\",\"${dir#$BASE_DIR/}/folder_B/${emoji_name}.jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Cyrillic (3 groups)
    for i in 1 2 3; do
        local dir="$base/3.1_special_names/cyrillic/group_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        local cyrillic_name="фото_$i"
        generate_test_image "$dir/folder_A/${cyrillic_name}.jpg" "edge_cyr_$i" 10240
        cp "$dir/folder_A/${cyrillic_name}.jpg" "$dir/folder_A/${cyrillic_name}_копия.jpg"
        cp "$dir/folder_A/${cyrillic_name}.jpg" "$dir/folder_B/${cyrillic_name}.jpg"
        local hash=$(shasum -a 256 "$dir/folder_A/${cyrillic_name}.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.1_special_names\",\"type\":\"cyrillic\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/${cyrillic_name}.jpg\",\"${dir#$BASE_DIR/}/folder_A/${cyrillic_name}_копия.jpg\",\"${dir#$BASE_DIR/}/folder_B/${cyrillic_name}.jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Hieroglyphs (3 groups)
    for i in 1 2 3; do
        local dir="$base/3.1_special_names/hieroglyphs/group_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        local jp_name="写真_$i"
        generate_test_image "$dir/folder_A/${jp_name}.jpg" "edge_jp_$i" 10240
        cp "$dir/folder_A/${jp_name}.jpg" "$dir/folder_A/${jp_name}_コピー.jpg"
        cp "$dir/folder_A/${jp_name}.jpg" "$dir/folder_B/${jp_name}.jpg"
        local hash=$(shasum -a 256 "$dir/folder_A/${jp_name}.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.1_special_names\",\"type\":\"hieroglyphs\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/${jp_name}.jpg\",\"${dir#$BASE_DIR/}/folder_A/${jp_name}_コピー.jpg\",\"${dir#$BASE_DIR/}/folder_B/${jp_name}.jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Spaces in name (3 groups)
    for i in 1 2 3; do
        local dir="$base/3.1_special_names/spaces/group_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        local space_name="my vacation photo $i"
        generate_test_image "$dir/folder_A/${space_name}.jpg" "edge_space_$i" 10240
        cp "$dir/folder_A/${space_name}.jpg" "$dir/folder_A/${space_name} copy.jpg"
        cp "$dir/folder_A/${space_name}.jpg" "$dir/folder_B/${space_name}.jpg"
        local hash=$(shasum -a 256 "$dir/folder_A/${space_name}.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.1_special_names\",\"type\":\"spaces\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/${space_name}.jpg\",\"${dir#$BASE_DIR/}/folder_A/${space_name} copy.jpg\",\"${dir#$BASE_DIR/}/folder_B/${space_name}.jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Special characters (3 groups)
    for i in 1 2 3; do
        local dir="$base/3.1_special_names/special_chars/group_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        local special_name="file@#\$%&_$i"
        generate_test_image "$dir/folder_A/${special_name}.jpg" "edge_special_$i" 10240
        cp "$dir/folder_A/${special_name}.jpg" "$dir/folder_A/${special_name}_copy.jpg"
        cp "$dir/folder_A/${special_name}.jpg" "$dir/folder_B/${special_name}.jpg"
        local hash=$(shasum -a 256 "$dir/folder_A/${special_name}.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.1_special_names\",\"type\":\"special_chars\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/${special_name}.jpg\",\"${dir#$BASE_DIR/}/folder_A/${special_name}_copy.jpg\",\"${dir#$BASE_DIR/}/folder_B/${special_name}.jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Long names (3 groups)
    for i in 1 2 3; do
        local dir="$base/3.1_special_names/long_names/group_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        local long_name="this_is_a_very_long_filename_that_exceeds_normal_length_limits_and_tests_how_well_the_application_handles_such_edge_cases_in_file_naming_conventions_number_$i"
        generate_test_image "$dir/folder_A/${long_name}.jpg" "edge_long_$i" 10240
        cp "$dir/folder_A/${long_name}.jpg" "$dir/folder_A/${long_name}_c.jpg"
        cp "$dir/folder_A/${long_name}.jpg" "$dir/folder_B/${long_name}.jpg"
        local hash=$(shasum -a 256 "$dir/folder_A/${long_name}.jpg" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.1_special_names\",\"type\":\"long_names\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/${long_name}.jpg\",\"${dir#$BASE_DIR/}/folder_A/${long_name}_c.jpg\",\"${dir#$BASE_DIR/}/folder_B/${long_name}.jpg\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # Dotfiles (2 groups)
    for i in 1 2; do
        local dir="$base/3.1_special_names/dotfiles/group_$i"
        mkdir -p "$dir/folder_A" "$dir/folder_B"
        generate_test_text "$dir/folder_A/.hidden_config_$i" "edge_dot_$i" 1024
        cp "$dir/folder_A/.hidden_config_$i" "$dir/folder_A/.hidden_config_${i}_backup"
        cp "$dir/folder_A/.hidden_config_$i" "$dir/folder_B/.hidden_config_$i"
        local hash=$(shasum -a 256 "$dir/folder_A/.hidden_config_$i" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.1_special_names\",\"type\":\"dotfiles\",\"files\":[\"${dir#$BASE_DIR/}/folder_A/.hidden_config_$i\",\"${dir#$BASE_DIR/}/folder_A/.hidden_config_${i}_backup\",\"${dir#$BASE_DIR/}/folder_B/.hidden_config_$i\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # 3.2 Deep nesting (30 files → 10 groups)
    log_info "  - 3.2 Deep folder nesting..."
    for i in $(seq 1 10); do
        local dir="$base/3.2_deep_nesting/group_$i"

        # Create deep path (15 levels)
        local deep_path="$dir/l1/l2/l3/l4/l5/l6/l7/l8/l9/l10/l11/l12/l13/l14/l15"
        local shallow_path="$dir/shallow"
        mkdir -p "$deep_path" "$shallow_path"

        generate_test_document "$dir/root_file_$i.pdf" "edge_deep_$i" 8192
        cp "$dir/root_file_$i.pdf" "$deep_path/deep_file_$i.pdf"
        cp "$dir/root_file_$i.pdf" "$shallow_path/shallow_file_$i.pdf"

        local hash=$(shasum -a 256 "$dir/root_file_$i.pdf" | cut -d' ' -f1)
        MANIFEST_GROUPS+=("{\"id\":\"G$(printf '%04d' $group_id)\",\"category\":\"3.2_deep_nesting\",\"type\":\"deep_folders\",\"files\":[\"${dir#$BASE_DIR/}/root_file_$i.pdf\",\"${deep_path#$BASE_DIR/}/deep_file_$i.pdf\",\"${shallow_path#$BASE_DIR/}/shallow_file_$i.pdf\"],\"hash_sha256\":\"${hash}\"}")
        TOTAL_FILES=$((TOTAL_FILES + 3))
        TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
        group_id=$((group_id + 1))
    done

    # 3.3 Bundles (7 files → 0 groups, these are traps)
    log_info "  - 3.3 App bundles (traps)..."
    local bundle_dir="$base/3.3_bundles"
    mkdir -p "$bundle_dir/set_A" "$bundle_dir/set_B"

    # Create fake app bundles (set A: 2 bundles)
    for i in 1 2; do
        local app="$bundle_dir/set_A/TestApp_$i.app"
        mkdir -p "$app/Contents/MacOS" "$app/Contents/Resources"
        echo "#!/bin/bash" > "$app/Contents/MacOS/TestApp_$i"
        echo "App $i resources" > "$app/Contents/Resources/data.txt"
        echo "<?xml version=\"1.0\"?><plist><dict><key>CFBundleName</key><string>TestApp_$i</string></dict></plist>" > "$app/Contents/Info.plist"
        TOTAL_FILES=$((TOTAL_FILES + 1))
    done

    # Set B: 5 bundles
    for i in 1 2 3 4 5; do
        local app="$bundle_dir/set_B/DemoApp_$i.app"
        mkdir -p "$app/Contents/MacOS" "$app/Contents/Resources"
        echo "#!/bin/bash" > "$app/Contents/MacOS/DemoApp_$i"
        echo "Demo $i resources" > "$app/Contents/Resources/data.txt"
        echo "<?xml version=\"1.0\"?><plist><dict><key>CFBundleName</key><string>DemoApp_$i</string></dict></plist>" > "$app/Contents/Info.plist"
        TOTAL_FILES=$((TOTAL_FILES + 1))
    done

    MANIFEST_TRAPS+=("{\"id\":\"TRAP_BUNDLE_A\",\"category\":\"3.3_bundles\",\"type\":\"app_bundle\",\"reason\":\"App bundles are directories, not file duplicates\",\"files\":[\"${bundle_dir#$BASE_DIR/}/set_A/TestApp_1.app\",\"${bundle_dir#$BASE_DIR/}/set_A/TestApp_2.app\"]}")
    MANIFEST_TRAPS+=("{\"id\":\"TRAP_BUNDLE_B\",\"category\":\"3.3_bundles\",\"type\":\"app_bundle\",\"reason\":\"App bundles are directories, not file duplicates\",\"files\":[\"${bundle_dir#$BASE_DIR/}/set_B/DemoApp_1.app\",\"${bundle_dir#$BASE_DIR/}/set_B/DemoApp_2.app\",\"${bundle_dir#$BASE_DIR/}/set_B/DemoApp_3.app\",\"${bundle_dir#$BASE_DIR/}/set_B/DemoApp_4.app\",\"${bundle_dir#$BASE_DIR/}/set_B/DemoApp_5.app\"]}")

    log_success "Part 3 complete: 90 duplicate files (30 groups) + 7 trap bundles"
}

# =============================================================================
# Generate Manifest
# =============================================================================

generate_manifest() {
    log_info "Generating manifest.json..."

    local groups_json=$(printf '%s\n' "${MANIFEST_GROUPS[@]}" | paste -sd ',' -)
    local traps_json=$(printf '%s\n' "${MANIFEST_TRAPS[@]}" | paste -sd ',' -)

    cat > "$MANIFEST_FILE" << EOF
{
  "version": "1.0",
  "created": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")",
  "description": "Duplicate Finder Accuracy Test Dataset",
  "summary": {
    "total_files": $TOTAL_FILES,
    "expected_duplicate_groups": $TOTAL_GROUPS,
    "expected_files_per_group": 3,
    "expected_false_positives": 0
  },
  "structure": {
    "part1_true_duplicates": {
      "files": 600,
      "groups": 200
    },
    "part2_traps": {
      "files": 154,
      "groups": 0
    },
    "part3_edge_cases": {
      "duplicate_files": 90,
      "duplicate_groups": 30,
      "trap_files": 7,
      "trap_groups": 0
    }
  },
  "duplicate_groups": [
    $groups_json
  ],
  "traps": [
    $traps_json
  ]
}
EOF

    log_success "Manifest generated: $MANIFEST_FILE"
}

# =============================================================================
# Main
# =============================================================================

main() {
    echo "=============================================="
    echo " Duplicate Finder Accuracy Test Dataset"
    echo " Generator v1.0"
    echo "=============================================="
    echo ""

# Check if directory exists (SAFE deletion + simple y/N confirm)
if [[ -d "$BASE_DIR" ]]; then
    log_warning "Directory already exists: $BASE_DIR"

    SAFE_ROOT="$HOME/DuplicateTest"

    canonical_dir() {
        local p="$1"
        (cd "$p" 2>/dev/null && pwd -P) || return 1
    }

    BASE_DIR_CANON="$(canonical_dir "$BASE_DIR")" || { log_error "Cannot resolve BASE_DIR: $BASE_DIR"; exit 1; }
    SAFE_ROOT_CANON="$(canonical_dir "$SAFE_ROOT")" || { log_error "Cannot resolve SAFE_ROOT: $SAFE_ROOT"; exit 1; }

    # Hard stop-list
    [[ -n "${BASE_DIR_CANON// }" ]] || { log_error "BASE_DIR is empty"; exit 1; }
    [[ "$BASE_DIR_CANON" != "/" ]] || { log_error "Refusing to delete /"; exit 1; }
    [[ "$BASE_DIR_CANON" != "$HOME" ]] || { log_error "Refusing to delete HOME"; exit 1; }

    # Allow deletion only inside sandbox
    case "$BASE_DIR_CANON" in
        "$SAFE_ROOT_CANON" | "$SAFE_ROOT_CANON"/*) ;;
        *)
            log_error "Refusing to delete outside sandbox. BASE_DIR must be inside: $SAFE_ROOT_CANON"
            exit 1
            ;;
    esac

    echo "Will delete and recreate: $BASE_DIR_CANON"
    read -r -p "Delete and recreate? (y/N): " confirm
    if [[ "$confirm" =~ ^[Yy]$ ]]; then
        rm -rf -- "$BASE_DIR_CANON"
    else
        log_error "Aborted."
        exit 1
    fi
fi


    log_info "Creating dataset in: $BASE_DIR"
    mkdir -p "$BASE_DIR"

    # Detect FS behavior once (important for macOS default case-insensitive volumes)
    FS_CASE_SENSITIVE=$(detect_case_sensitivity "$BASE_DIR")
    if [[ "$FS_CASE_SENSITIVE" == "1" ]]; then
        log_info "Filesystem: case-sensitive"
    else
        log_warning "Filesystem: case-insensitive (will use safe filenames for extension-case tests)"
    fi

    echo ""

    # Create all parts
    create_part1_simple_duplicates
    echo ""
    create_part1_metadata_duplicates
    echo ""
    create_part2_traps
    echo ""
    create_part3_edge_cases
    echo ""
    generate_manifest

    echo ""
    echo "=============================================="
    echo " Dataset Generation Complete!"
    echo "=============================================="
    echo ""
    echo " Location: $BASE_DIR"
    echo " Total files: $TOTAL_FILES"
    echo " Expected duplicate groups: $TOTAL_GROUPS"
    echo " Files per group: 3"
    echo " Expected false positives: 0"
    echo ""
    echo " Manifest: $MANIFEST_FILE"
    echo ""
    echo " To test a duplicate finder:"
    echo "   1. Scan $BASE_DIR"
    echo "   2. Verify: $TOTAL_GROUPS groups found"
    echo "   3. Verify: Each group has exactly 3 files"
    echo "   4. Verify: No files from 02_traps/ in results"
    echo "Successfully completed!!!"
}

main "$@"
