#!/bin/bash
#
# Script to create test dataset 6: Edge Cases
# Duplicate-finder testing methodology v3.4
#
# Creates files with various macOS-specific edge cases:
#   - Hidden files (by name and by flag)
#   - Symlinks and Hardlinks
#   - .app bundles
#   - Files with extended attributes (xattr)
#   - Zero-byte files
#
# Total: 19 files, 4 duplicate groups
#

set -e

# ============================================================================
# SETTINGS
# ============================================================================

BASE_DIR="$HOME/DuplicateTest/Dataset_06_EdgeCases"
LOG_DIR="$BASE_DIR/logs"
LOG_FILE="$LOG_DIR/dataset_creation_$(date +%Y%m%d_%H%M%S).log"

# ============================================================================
# FUNCTIONS
# ============================================================================

log() {
    local message="$1"
    local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
    echo "[$timestamp] $message" | tee -a "$LOG_FILE"
}

# ============================================================================
# MAIN SCRIPT
# ============================================================================

echo "============================================================"
echo "  Creating test dataset 6: Edge Cases"
echo "  Methodology v3.4"
echo "============================================================"
echo ""

# Create directory structure
mkdir -p "$BASE_DIR/plain"
mkdir -p "$BASE_DIR/.hidden_dir"
mkdir -p "$BASE_DIR/bundles"
mkdir -p "$BASE_DIR/links"
mkdir -p "$LOG_DIR"

log "Starting creation of test dataset 6 (Edge Cases)"

# Counters
total_files=0

# ============================================================================
# 1. REGULAR FILES (basic duplicates)
# ============================================================================

echo ""
log "1. Creating regular files..."

# Base file for duplicates
echo "This is the base content for duplicate testing. Random: $RANDOM$RANDOM" > "$BASE_DIR/plain/data_base.txt"
cp "$BASE_DIR/plain/data_base.txt" "$BASE_DIR/plain/data_dup.txt"
((total_files += 2))

log "   Created: data_base.txt, data_dup.txt"

# ============================================================================
# 2. ZERO-BYTE FILES
# ============================================================================

echo ""
log "2. Creating zero-byte files..."

for i in $(seq 1 5); do
    touch "$BASE_DIR/plain/empty_0$i.bin"
    ((total_files++))
done

log "   Created: 5 empty files (empty_01.bin - empty_05.bin)"

# ============================================================================
# 3. HIDDEN FILES (by name)
# ============================================================================

echo ""
log "3. Creating hidden files (dot-prefixed names)..."

echo "Hidden content by dot name" > "$BASE_DIR/.dot_hidden.txt"
cp "$BASE_DIR/.dot_hidden.txt" "$BASE_DIR/.hidden_dir/hidden_data.txt"
touch "$BASE_DIR/.hidden_dir/hidden_empty.bin"
((total_files += 3))

log "   Created: .dot_hidden.txt, .hidden_dir/hidden_data.txt, hidden_empty.bin"

# ============================================================================
# 4. HIDDEN FILES (by flag)
# ============================================================================

echo ""
log "4. Creating a hidden file (via chflags hidden)..."

echo "Hidden content by flag" > "$BASE_DIR/hidden_flagged.txt"
chflags hidden "$BASE_DIR/hidden_flagged.txt"
((total_files++))

log "   Created: hidden_flagged.txt (with hidden flag)"

# ============================================================================
# 5. SYMLINKS
# ============================================================================

echo ""
log "5. Creating symbolic links..."

# Symlink to a file
ln -s "$BASE_DIR/plain/data_base.txt" "$BASE_DIR/links/symlink_to_data_base"
((total_files++))

# Symlink to a directory
ln -s "$BASE_DIR/plain" "$BASE_DIR/links/symlink_to_plain_dir"
((total_files++))

log "   Created: symlink_to_data_base, symlink_to_plain_dir"

# ============================================================================
# 6. HARDLINKS
# ============================================================================

echo ""
log "6. Creating hard links..."

# Hard links point to the same inode (identical content)
ln "$BASE_DIR/plain/data_base.txt" "$BASE_DIR/links/hardlink_data_base_1"
ln "$BASE_DIR/plain/data_base.txt" "$BASE_DIR/links/hardlink_data_base_2"
((total_files += 2))

log "   Created: hardlink_data_base_1, hardlink_data_base_2"

# ============================================================================
# 7. .APP BUNDLES
# ============================================================================

echo ""
log "7. Creating .app bundles..."

# Create a minimal .app bundle
create_app_bundle() {
    local app_path="$1"
    local app_name="$2"

    mkdir -p "$app_path/Contents/MacOS"
    mkdir -p "$app_path/Contents/Resources"

    # Info.plist
    cat > "$app_path/Contents/Info.plist" << PLIST
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
    <key>CFBundleExecutable</key>
    <string>$app_name</string>
    <key>CFBundleIdentifier</key>
    <string>com.test.$app_name</string>
    <key>CFBundleName</key>
    <string>$app_name</string>
    <key>CFBundleVersion</key>
    <string>1.0</string>
</dict>
</plist>
PLIST

    # Executable file (simple shell script)
    cat > "$app_path/Contents/MacOS/$app_name" << EXEC
#!/bin/bash
echo "Test application: $app_name"
EXEC
    chmod +x "$app_path/Contents/MacOS/$app_name"

    # Resource file
    echo "Resource data for $app_name" > "$app_path/Contents/Resources/data.txt"
}

create_app_bundle "$BASE_DIR/bundles/TestApp.app" "TestApp"
((total_files++))

# Create a duplicate .app bundle
cp -R "$BASE_DIR/bundles/TestApp.app" "$BASE_DIR/bundles/TestApp_Duplicate.app"
((total_files++))

log "   Created: TestApp.app, TestApp_Duplicate.app"

# ============================================================================
# 8. FILES WITH EXTENDED ATTRIBUTES
# ============================================================================

echo ""
log "8. Creating files with extended attributes..."

# File with xattr
cp "$BASE_DIR/plain/data_base.txt" "$BASE_DIR/plain/data_xattr.txt"
xattr -w com.test.attribute "Test attribute value" "$BASE_DIR/plain/data_xattr.txt"
xattr -w com.test.another "Another attribute" "$BASE_DIR/plain/data_xattr.txt"
((total_files++))

# Another file with the same xattr
cp "$BASE_DIR/plain/data_base.txt" "$BASE_DIR/plain/data_xattr2.txt"
xattr -w com.test.attribute "Test attribute value" "$BASE_DIR/plain/data_xattr2.txt"
xattr -w com.test.another "Another attribute" "$BASE_DIR/plain/data_xattr2.txt"
((total_files++))

log "   Created: data_xattr.txt, data_xattr2.txt (with extended attributes)"

# ============================================================================
# FINAL REPORT
# ============================================================================

echo ""
echo "============================================================"
echo "  FINAL REPORT"
echo "============================================================"

log ""
log "============================================================"
log "CREATION COMPLETE"
log "============================================================"
log ""
log "Total files/objects created: $total_files"
log ""
log "Structure:"
log "  $BASE_DIR/"
log "  ├── .dot_hidden.txt              ← hidden by name"
log "  ├── .hidden_dir/"
log "  │   ├── hidden_data.txt"
log "  │   └── hidden_empty.bin"
log "  ├── bundles/"
log "  │   ├── TestApp.app"
log "  │   └── TestApp_Duplicate.app"
log "  ├── hidden_flagged.txt           ← hidden by flag"
log "  ├── links/"
log "  │   ├── hardlink_data_base_1"
log "  │   ├── hardlink_data_base_2"
log "  │   ├── symlink_to_data_base"
log "  │   └── symlink_to_plain_dir"
log "  └── plain/"
log "      ├── data_base.txt"
log "      ├── data_dup.txt"
log "      ├── data_xattr.txt           ← with extended attributes"
log "      ├── data_xattr2.txt"
log "      └── empty_01.bin ... empty_05.bin  ← zero-byte"
log ""
log "EXPECTED DUPLICATE GROUPS:"
log "  1. Zero-byte: 6 files (5 empty_*.bin + hidden_empty.bin)"
log "  2. .app bundles: 2 items"
log "  3. data_base group: 6 files (base, dup, 2 hardlinks, 2 xattr)"
log "  4. .dot_hidden group: 2 files (.dot_hidden.txt, hidden_data.txt)"
log ""

echo ""
echo "Dataset structure:"
echo "  $BASE_DIR/"
echo "  ├── plain/         (regular files + xattr)"
echo "  ├── .hidden_dir/   (hidden folder)"
echo "  ├── bundles/       (.app bundles)"
echo "  ├── links/         (symlinks + hardlinks)"
echo "  └── logs/"
echo ""

# Create metadata file
cat > "$BASE_DIR/dataset_info.txt" << EOF
Test dataset 6: Edge Cases
=============================
Creation date: $(date "+%Y-%m-%d %H:%M:%S")
Methodology: v3.4

Contents:
1. Regular files (data_base.txt, data_dup.txt) — 2 items
2. Zero-byte files (empty_01-05.bin + hidden_empty.bin) — 6 items
3. Hidden by name (.dot_hidden.txt, .hidden_dir/hidden_data.txt) — 2 items
4. Hidden by flag (hidden_flagged.txt — chflags hidden) — 1 item
5. Symbolic links (symlink_to_data_base, symlink_to_plain_dir) — 2 items
6. Hard links (hardlink_data_base_1, hardlink_data_base_2) — 2 items
7. .app bundles (TestApp.app, TestApp_Duplicate.app) — 2 items
8. Extended attributes (data_xattr.txt, data_xattr2.txt) — 2 items

Total files: 19

Expected duplicate groups (4 groups):
- Group 1: 6 zero-byte files (5 empty_*.bin + hidden_empty.bin)
- Group 2: 2 .app bundles
- Group 3: 6 data_base files (base, dup, 2 hardlinks, 2 xattr)
- Group 4: 2 .dot_hidden files (.dot_hidden.txt, hidden_data.txt)

What is being tested:
- Detection of hidden files (by name and by flag)
- Correct handling of symlinks (do not follow vs follow)
- Detecting hard links as duplicates
- Handling of .app bundles (treat folder as a unit)
- Including/excluding extended attributes

For scanning: $BASE_DIR
After the test is finished, delete the folder: "$BASE_DIR"
EOF

log "Metadata file created: $BASE_DIR/dataset_info.txt"

echo ""
echo "For scanning, select the folder:"
echo "  $BASE_DIR"
echo ""
echo "Done!"
