#!/usr/bin/env python3
"""
Slow Day Detector — parses USchedule daily calendar emails to detect slow days.

USchedule sends a daily calendar summary to ops@theswingshift.golf at ~11:18 PM.
This script parses those emails (plain text or HTML) and determines if the next day
qualifies as a "slow day" based on booking density.

Usage:
    # Parse an email file (plain text or .eml):
    python3 slow_day_detector.py --email path/to/email.txt

    # Parse raw email body from stdin:
    echo "$EMAIL_BODY" | python3 slow_day_detector.py --stdin

    # Override the threshold:
    python3 slow_day_detector.py --email email.txt --threshold 40

Output: JSON to stdout with detection results.
"""

import argparse
import json
import re
import sys
from datetime import datetime, timedelta
from email import policy
from email.parser import BytesParser, Parser
from html.parser import HTMLParser
from pathlib import Path


# Default: if fewer than this % of bays are booked, it's a slow day
SLOW_DAY_THRESHOLD_PCT = 50

# Swing Shift has 4 simulator bays, open ~12 hours/day = ~48 bay-hours
TOTAL_BAYS = 4
HOURS_OPEN = 12  # approximate: 10 AM - 10 PM
TOTAL_BAY_SLOTS = TOTAL_BAYS * HOURS_OPEN


class HTMLTextExtractor(HTMLParser):
    """Strip HTML to plain text."""
    def __init__(self):
        super().__init__()
        self._text = []

    def handle_data(self, data):
        self._text.append(data)

    def get_text(self):
        return " ".join(self._text)


def strip_html(html_str: str) -> str:
    extractor = HTMLTextExtractor()
    extractor.feed(html_str)
    return extractor.get_text()


def parse_email_file(filepath: str) -> str:
    """Parse an .eml or plain text email file, return body text."""
    path = Path(filepath)
    raw = path.read_bytes()

    # Try parsing as structured email
    try:
        msg = BytesParser(policy=policy.default).parsebytes(raw)
        body = msg.get_body(preferencelist=("plain", "html"))
        if body:
            content = body.get_content()
            if body.get_content_type() == "text/html":
                return strip_html(content)
            return content
    except Exception:
        pass

    # Fall back to treating as plain text
    return raw.decode("utf-8", errors="replace")


def count_bookings(text: str) -> dict:
    """
    Extract booking count from USchedule calendar email text.

    USchedule calendar emails typically contain lines like:
      - "3 Bookings" or "3 Reservations" or "3 appointments"
      - Time-slot lines: "10:00 AM - 11:00 AM  John D. (Bay 2)"
      - Bay assignment lines: "Bay 1: 10:00 AM, 2:00 PM, 6:00 PM"

    We try multiple patterns to be resilient to format changes.
    """
    bookings = []
    total_count = 0

    # Pattern 1: Explicit count header — "X booking(s)" or "X reservation(s)"
    count_match = re.search(
        r"(\d+)\s+(?:booking|reservation|appointment|session)s?\b",
        text, re.IGNORECASE
    )
    if count_match:
        total_count = int(count_match.group(1))

    # Pattern 2: Time slot lines — "10:00 AM - 11:00 AM" or "10:00AM-11:00AM"
    time_slots = re.findall(
        r"(\d{1,2}:\d{2}\s*(?:AM|PM))\s*[-–—]\s*(\d{1,2}:\d{2}\s*(?:AM|PM))",
        text, re.IGNORECASE
    )
    if time_slots:
        total_count = max(total_count, len(time_slots))
        for start, end in time_slots:
            bookings.append({"start": start.strip(), "end": end.strip()})

    # Pattern 3: Bay-based lines — "Bay 1: ..." or "Simulator 1: ..."
    bay_lines = re.findall(
        r"(?:bay|simulator|sim)\s*(\d+)\s*:\s*(.+)",
        text, re.IGNORECASE
    )
    if bay_lines:
        bay_booking_count = 0
        for bay_num, slots_text in bay_lines:
            # Count comma-separated time entries per bay
            times = re.findall(r"\d{1,2}:\d{2}\s*(?:AM|PM)", slots_text, re.IGNORECASE)
            bay_booking_count += len(times)
        total_count = max(total_count, bay_booking_count)

    # Pattern 4: "No bookings" / "No reservations" / "Nothing scheduled"
    if re.search(r"no\s+(?:booking|reservation|appointment|session)s?", text, re.IGNORECASE):
        total_count = 0
    if re.search(r"nothing\s+scheduled", text, re.IGNORECASE):
        total_count = 0

    # Pattern 5: Calendar is empty / "Your calendar is clear"
    if re.search(r"calendar\s+is\s+(?:clear|empty)", text, re.IGNORECASE):
        total_count = 0

    return {
        "booking_count": total_count,
        "bookings": bookings,
        "raw_time_slots": len(time_slots),
        "bay_lines_found": len(bay_lines),
    }


def detect_target_date(text: str) -> str:
    """Try to extract which date the calendar email is for."""
    # Look for date patterns like "March 20, 2026" or "03/20/2026" or "2026-03-20"
    patterns = [
        (r"(\w+ \d{1,2},?\s*\d{4})", "%B %d, %Y"),
        (r"(\w+ \d{1,2},?\s*\d{4})", "%B %d %Y"),
        (r"(\d{1,2}/\d{1,2}/\d{4})", "%m/%d/%Y"),
        (r"(\d{4}-\d{2}-\d{2})", "%Y-%m-%d"),
    ]
    for pat, fmt in patterns:
        m = re.search(pat, text)
        if m:
            try:
                dt = datetime.strptime(m.group(1).replace(",", ""), fmt)
                return dt.strftime("%Y-%m-%d")
            except ValueError:
                continue

    # Default: assume it's for tomorrow
    tomorrow = datetime.now() + timedelta(days=1)
    return tomorrow.strftime("%Y-%m-%d")


def detect_day_name(date_str: str) -> str:
    """Get the day-of-week name for a date string."""
    try:
        dt = datetime.strptime(date_str, "%Y-%m-%d")
        return dt.strftime("%A")
    except ValueError:
        return "Tomorrow"


def is_slow_day(booking_count: int, threshold_pct: int = SLOW_DAY_THRESHOLD_PCT) -> bool:
    """Determine if booking count is below the slow-day threshold."""
    if TOTAL_BAY_SLOTS == 0:
        return True
    utilization = (booking_count / TOTAL_BAY_SLOTS) * 100
    return utilization < threshold_pct


def analyze(text: str, threshold_pct: int = SLOW_DAY_THRESHOLD_PCT) -> dict:
    """Full analysis pipeline: parse text → detect slow day → return result."""
    booking_info = count_bookings(text)
    target_date = detect_target_date(text)
    day_name = detect_day_name(target_date)
    count = booking_info["booking_count"]
    utilization = round((count / TOTAL_BAY_SLOTS) * 100, 1) if TOTAL_BAY_SLOTS > 0 else 0
    slow = is_slow_day(count, threshold_pct)

    return {
        "target_date": target_date,
        "day_name": day_name,
        "booking_count": count,
        "total_bay_slots": TOTAL_BAY_SLOTS,
        "utilization_pct": utilization,
        "threshold_pct": threshold_pct,
        "is_slow_day": slow,
        "bookings": booking_info["bookings"],
    }


def main():
    parser = argparse.ArgumentParser(description="Detect slow days from USchedule calendar emails")
    parser.add_argument("--email", help="Path to email file (.eml or .txt)")
    parser.add_argument("--stdin", action="store_true", help="Read email body from stdin")
    parser.add_argument("--threshold", type=int, default=SLOW_DAY_THRESHOLD_PCT,
                        help=f"Slow day threshold %% (default: {SLOW_DAY_THRESHOLD_PCT})")
    args = parser.parse_args()

    if args.stdin:
        text = sys.stdin.read()
    elif args.email:
        text = parse_email_file(args.email)
    else:
        parser.print_help()
        sys.exit(1)

    result = analyze(text, args.threshold)
    print(json.dumps(result, indent=2))

    if result["is_slow_day"]:
        sys.exit(0)  # slow day detected
    else:
        sys.exit(2)  # not a slow day (non-error exit for scripting)


if __name__ == "__main__":
    main()
