<?php

if (!defined('ABSPATH')) {
    exit;
}

class Coco_Ops_Cohort_Matcher {
    
    /**
     * Find similar past events (cohorts) for forecasting
     * 
     * @param int $event_id Current event ID
     * @param int $min_matches Minimum number of matches to return
     * @param string $sensitivity Matching sensitivity: 'strict', 'medium', 'loose'
     * @return array Array of cohort event IDs with similarity scores
     */
    public function find_cohorts($event_id, $min_matches = 5, $sensitivity = 'medium') {
        global $wpdb;
        
        $table_features = $wpdb->prefix . 'coco_event_features';
        $table_actuals = $wpdb->prefix . 'coco_event_actuals';
        
        // Get current event features
        $current_event = $wpdb->get_row($wpdb->prepare(
            "SELECT * FROM $table_features WHERE event_id = %d",
            $event_id
        ));
        
        if (!$current_event) {
            return [];
        }
        
        // Build query based on sensitivity
        $where_conditions = ["ef.event_id != %d"];
        $query_params = [$event_id];
        
        // Venue match (required for all sensitivity levels)
        $where_conditions[] = "ef.venue_id = %s";
        $query_params[] = $current_event->venue_id;
        
        // Weekday match (ALWAYS required - different days have different patterns)
        $where_conditions[] = "ef.weekday = %s";
        $query_params[] = $current_event->weekday;
        
        // Start bucket match (ALWAYS required - different times have different patterns)
        // Map similar time buckets together
        $time_buckets = [];
        switch ($current_event->start_bucket) {
            case 'late_night':
                $time_buckets = ['late_night', 'night'];
                break;
            case 'night':
                $time_buckets = ['night', 'late_night'];
                break;
            case 'evening':
                $time_buckets = ['evening', 'night'];
                break;
            case 'afternoon':
                $time_buckets = ['afternoon', 'evening'];
                break;
            case 'morning':
                $time_buckets = ['morning', 'afternoon'];
                break;
            default:
                $time_buckets = [$current_event->start_bucket];
        }
        
        $placeholders = implode(',', array_fill(0, count($time_buckets), '%s'));
        $where_conditions[] = "ef.start_bucket IN ($placeholders)";
        $query_params = array_merge($query_params, $time_buckets);
        
        // Holiday flag match
        $where_conditions[] = "ef.holiday_flag = %d";
        $query_params[] = $current_event->holiday_flag;
        
        // Only include events that have actuals OR snapshots (completed events or events with sales data)
        $where_conditions[] = "(ea.event_id IS NOT NULL OR es.event_id IS NOT NULL)";
        
        // Only include events in the past
        $where_conditions[] = "ef.start_ts < NOW()";
        
        $where_clause = implode(' AND ', $where_conditions);
        
        // Query for cohort events
        $query = "
            SELECT DISTINCT
                ef.*,
                COALESCE(ea.final_attendance, 0) as final_attendance,
                COALESCE(ea.final_revenue, 0) as final_revenue,
                0 as similarity_score
            FROM $table_features ef
            LEFT JOIN $table_actuals ea ON ef.event_id = ea.event_id
            LEFT JOIN {$wpdb->prefix}coco_event_snapshots es ON ef.event_id = es.event_id
            WHERE $where_clause
            ORDER BY ef.start_ts DESC
            LIMIT 50
        ";
        
        $cohorts = $wpdb->get_results($wpdb->prepare($query, $query_params));
        
        if (empty($cohorts)) {
            return [];
        }
        
        // Calculate similarity scores based on tags
        $current_tags = json_decode($current_event->tags_json ?? '[]', true) ?: [];
        
        foreach ($cohorts as &$cohort) {
            $cohort_tags = json_decode($cohort->tags_json ?? '[]', true) ?: [];
            $similarity = $this->calculate_tag_similarity($current_tags, $cohort_tags);
            
            // Bonus for same time bucket
            if ($cohort->start_bucket === $current_event->start_bucket) {
                $similarity += 0.2;
            }
            
            // Bonus for same weekday (if not already required)
            if ($sensitivity === 'loose' && $cohort->weekday === $current_event->weekday) {
                $similarity += 0.3;
            }
            
            $cohort->similarity_score = $similarity;
        }
        
        // Sort by similarity score (descending)
        usort($cohorts, function($a, $b) {
            return $b->similarity_score <=> $a->similarity_score;
        });
        
        // Return top matches
        return array_slice($cohorts, 0, max($min_matches, 20));
    }
    
    /**
     * Calculate tag similarity using Jaccard index
     */
    private function calculate_tag_similarity($tags1, $tags2) {
        if (empty($tags1) && empty($tags2)) {
            return 0.5; // Neutral score if both have no tags
        }
        
        if (empty($tags1) || empty($tags2)) {
            return 0; // No similarity if one has tags and other doesn't
        }
        
        $intersection = count(array_intersect($tags1, $tags2));
        $union = count(array_unique(array_merge($tags1, $tags2)));
        
        return $union > 0 ? $intersection / $union : 0;
    }
    
    /**
     * Get cohort statistics for display
     */
    public function get_cohort_stats($cohort_ids) {
        if (empty($cohort_ids)) {
            return null;
        }
        
        global $wpdb;
        $table_actuals = $wpdb->prefix . 'coco_event_actuals';
        
        $placeholders = implode(',', array_fill(0, count($cohort_ids), '%d'));
        
        $query = "
            SELECT 
                COUNT(*) as count,
                AVG(final_attendance) as avg_attendance,
                MIN(final_attendance) as min_attendance,
                MAX(final_attendance) as max_attendance,
                AVG(final_revenue) as avg_revenue
            FROM $table_actuals
            WHERE event_id IN ($placeholders)
        ";
        
        return $wpdb->get_row($wpdb->prepare($query, $cohort_ids));
    }
    
    /**
     * Get snapshots at specific days-to-event for cohorts
     */
    public function get_cohort_snapshots_at_day($cohort_ids, $days_to_event, $tolerance = 0.5) {
        if (empty($cohort_ids)) {
            return [];
        }
        
        global $wpdb;
        $table_snapshots = $wpdb->prefix . 'coco_event_snapshots';
        
        $placeholders = implode(',', array_fill(0, count($cohort_ids), '%d'));
        
        $query = "
            SELECT 
                event_id,
                tickets_sold,
                tables_sold,
                revenue_to_date,
                days_to_event
            FROM $table_snapshots
            WHERE event_id IN ($placeholders)
            AND days_to_event BETWEEN %f AND %f
            ORDER BY event_id, ABS(days_to_event - %f)
        ";
        
        $params = array_merge(
            $cohort_ids,
            [$days_to_event - $tolerance, $days_to_event + $tolerance, $days_to_event]
        );
        
        return $wpdb->get_results($wpdb->prepare($query, $params));
    }
}

