<?php

require __DIR__ . '/vendor/autoload.php';

use thiagoalessio\TesseractOCR\TesseractOCR;

$imagePath = '/Volumes/Workspace/Client Projects/Constituency/16 - ORLEAMPETH/11/booth_info 1.png';
$text = (new TesseractOCR($imagePath))->lang('eng')->psm(4)->run();

echo "=== BOOTH NUMBER EXTRACTION ===" . PHP_EOL;

// Test booth number extraction
$patterns = [
    '/Part\s+No\.\s*:\s*(\d+)/i',
    '/Part\s+Number\s*:\s*(\d+)/i',
    '/No\.\s+and\s+Name\s+of\s+Polling\s+Station\s*:\s*(\d+)/i'
];

$found = false;
foreach ($patterns as $pattern) {
    if (preg_match($pattern, $text, $matches)) {
        echo "Pattern matched: $pattern" . PHP_EOL;
        echo "Booth number: {$matches[1]}" . PHP_EOL;
        $found = true;
        break;
    }
}

if (!$found) {
    echo "No booth number pattern matched!" . PHP_EOL;
    // Try to find what we have
    if (preg_match('/Part\s*No/i', $text)) {
        echo 'Found "Part No" in text' . PHP_EOL;
    }
    if (preg_match('/pareno/i', $text)) {
        echo 'Found "pareno" in text' . PHP_EOL;
        // Try to extract
        if (preg_match('/pareno\.\s*:\s*(\d+)/i', $text, $m)) {
            echo "Booth number from pareno: {$m[1]}" . PHP_EOL;
            $found = true;
        }
    }
}

// Test street extraction
echo PHP_EOL . "=== STREET EXTRACTION ===" . PHP_EOL;
$clean = preg_replace('/\r/', '', $text);
$lines = preg_split('/\n+/', $clean);
$capture = false;
$streets = [];
foreach ($lines as $line) {
    $l = trim($line);
    if ($l === '') continue;
    if (preg_match('/No\.\s*\.?.*name\s+of\s+sections\s+in\s+the\s+part/i', $l)) {
        echo "Found section header: $l" . PHP_EOL;
        $capture = true;
        continue;
    }
    if ($capture) {
        if (preg_match('/^\d+\s*-\s*(.+)$/', $l, $m)) {
            $full = trim($m[1]);
            $name = trim(preg_split('/\s*,\s*/', $full)[0]);
            $streets[] = $name;
            echo "  Extracted street: $name" . PHP_EOL;
        }
        if (preg_match('/^3\.\s*Polling\s+station\s+details|^4\.\s*NUMBER\s+OF\s+ELECTORS/i', $l)) {
            echo "Found stopping marker: $l" . PHP_EOL;
            break;
        }
    }
}
echo PHP_EOL . "Total streets found: " . count($streets) . PHP_EOL;
echo "Streets: " . json_encode($streets, JSON_PRETTY_PRINT) . PHP_EOL;
