#!/usr/bin/env python3
"""
YouTube Intel Scanner — Swing Shift Operations
Monitors OpenClaw YouTube creators for new content, pulls transcripts,
and generates actionable briefings for Swing Shift.

Usage:
    python3 13_Scripts/youtube_intel.py
    python3 13_Scripts/youtube_intel.py --channel @AlexFinnOfficial
    python3 13_Scripts/youtube_intel.py --all
"""

import json
import os
import sys
import re
from datetime import datetime, timedelta
from pathlib import Path

# --- Configuration ---

CHANNELS = {
    "AlexFinnOfficial": {
        "handle": "@AlexFinnOfficial",
        "name": "Alex Finn",
        "priority": 1,
        "focus": "OpenClaw Mission Control, autonomous agents, operator workflows, reverse prompting",
        "why": "Most hands-on operator content. Mission Control, task boards, proactive cron jobs."
    },
    "MatthewBerman": {
        "handle": "@MatthewBerman",
        "name": "Matthew Berman",
        "priority": 2,
        "focus": "OpenClaw use cases, CRM, memory systems, advanced workflows",
        "why": "2.54B tokens on OpenClaw. 21 real use cases. Deep technical content."
    },
    "VelvetShark": {
        "handle": "@intothemachines",
        "name": "Velvet Shark (Mikhail Shcheglov)",
        "priority": 3,
        "focus": "Workflow prompts, real automations, calendar management, home automation",
        "why": "Publishes actual prompts you can copy. Most generous with implementation details."
    },
    "ColeMedin": {
        "handle": "@ColeMedin",
        "name": "Cole Medin",
        "priority": 4,
        "focus": "Security, Claude Code alternatives, safe agent design",
        "why": "Security-focused. Good counterbalance for understanding risks."
    }
}

BOT_DIR = Path(__file__).resolve().parent.parent
INTEL_DIR = BOT_DIR / "ops" / "intel" / "openclaw-youtube"
TRANSCRIPT_DIR = INTEL_DIR / "transcripts"
BRIEFING_DIR = INTEL_DIR / "briefings"
INDEX_FILE = INTEL_DIR / "video-index.json"

# Ensure directories exist
TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)
BRIEFING_DIR.mkdir(parents=True, exist_ok=True)


def load_index():
    """Load the video index (tracks what we've already processed)."""
    if INDEX_FILE.exists():
        with open(INDEX_FILE, "r") as f:
            return json.load(f)
    return {"videos": {}, "last_scan": None, "scan_count": 0}


def save_index(index):
    """Save the video index."""
    with open(INDEX_FILE, "w") as f:
        json.dump(index, f, indent=2)


def get_channel_video_ids(handle, max_results=5):
    """
    Get recent video IDs from a YouTube channel.
    Uses yt-dlp if available, falls back to youtube_transcript_api search.
    """
    video_ids = []

    # Try yt-dlp first (most reliable)
    try:
        import subprocess
        result = subprocess.run(
            ["yt-dlp", "--flat-playlist", "--print", "id",
             "--playlist-end", str(max_results),
             f"https://www.youtube.com/{handle}/videos"],
            capture_output=True, text=True, timeout=30
        )
        if result.returncode == 0:
            video_ids = [line.strip() for line in result.stdout.strip().split("\n") if line.strip()]
            if video_ids:
                return video_ids
    except (FileNotFoundError, subprocess.TimeoutExpired):
        pass

    # Fallback: try scraping the channel page
    try:
        import urllib.request
        url = f"https://www.youtube.com/{handle}/videos"
        req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
        with urllib.request.urlopen(req, timeout=15) as resp:
            html = resp.read().decode("utf-8", errors="ignore")
        # Extract video IDs from the page
        pattern = r'"videoId":"([a-zA-Z0-9_-]{11})"'
        found = re.findall(pattern, html)
        # Deduplicate while preserving order
        seen = set()
        for vid in found:
            if vid not in seen:
                seen.add(vid)
                video_ids.append(vid)
            if len(video_ids) >= max_results:
                break
    except Exception as e:
        print(f"  Warning: Could not fetch channel page for {handle}: {e}")

    return video_ids


def get_transcript(video_id):
    """Pull transcript for a video using youtube_transcript_api."""
    try:
        from youtube_transcript_api import YouTubeTranscriptApi
        ytt = YouTubeTranscriptApi()
        transcript = ytt.fetch(video_id)
        return "\n".join(entry.text for entry in transcript)
    except Exception as e:
        return f"[Transcript unavailable: {e}]"


def get_video_title(video_id):
    """Get video title from YouTube."""
    try:
        import urllib.request
        url = f"https://www.youtube.com/watch?v={video_id}"
        req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
        with urllib.request.urlopen(req, timeout=10) as resp:
            html = resp.read().decode("utf-8", errors="ignore")
        match = re.search(r"<title>(.*?)</title>", html)
        if match:
            title = match.group(1).replace(" - YouTube", "").strip()
            return title
    except Exception:
        pass
    return f"Video {video_id}"


def save_transcript(video_id, channel_key, title, transcript_text):
    """Save transcript to file."""
    date_str = datetime.now().strftime("%Y-%m-%d")
    safe_title = re.sub(r'[^\w\s-]', '', title)[:60].strip().replace(' ', '-')
    filename = f"{date_str}_{channel_key}_{safe_title}.md"
    filepath = TRANSCRIPT_DIR / filename

    content = f"""# {title}

**Channel:** {CHANNELS.get(channel_key, {}).get('name', channel_key)}
**Video ID:** {video_id}
**URL:** https://www.youtube.com/watch?v={video_id}
**Pulled:** {datetime.now().strftime('%Y-%m-%d %H:%M')}

---

## Transcript

{transcript_text}
"""
    with open(filepath, "w") as f:
        f.write(content)

    return filepath


def generate_briefing(new_videos):
    """Generate a briefing document from new videos found."""
    date_str = datetime.now().strftime("%Y-%m-%d")
    filepath = BRIEFING_DIR / f"{date_str}-youtube-intel.md"

    lines = [
        f"# YouTube Intel Briefing — {date_str}",
        f"",
        f"**Scan time:** {datetime.now().strftime('%Y-%m-%d %H:%M')} AZ",
        f"**New videos found:** {len(new_videos)}",
        f"",
        f"---",
        f""
    ]

    if not new_videos:
        lines.append("No new videos found from monitored channels since last scan.")
        lines.append("")
        lines.append("Channels monitored:")
        for key, ch in CHANNELS.items():
            lines.append(f"- {ch['name']} ({ch['handle']})")
    else:
        for vid in new_videos:
            lines.append(f"## {vid['title']}")
            lines.append(f"")
            lines.append(f"**Channel:** {vid['channel_name']}")
            lines.append(f"**URL:** https://www.youtube.com/watch?v={vid['video_id']}")
            lines.append(f"**Transcript saved:** `{vid['transcript_path']}`")
            lines.append(f"")

            # Include first 500 chars of transcript as preview
            preview = vid.get('transcript_text', '')[:500]
            if preview:
                lines.append(f"### Preview")
                lines.append(f"")
                lines.append(f"> {preview}...")
                lines.append(f"")

            lines.append(f"### Relevance to Swing Shift")
            lines.append(f"")
            lines.append(f"*Analysis pending — dispatch to Claude Code for deep analysis:*")
            lines.append(f'```')
            lines.append(f'/bash 13_Scripts/claude-code-bridge.sh "Read {vid["transcript_path"]} and analyze: What ideas from this video can we apply to Swing Shift? Focus on automation, booking, customer retention, and unmanned operations. Write findings to ops/intel/openclaw-youtube/analysis/{date_str}-{vid["video_id"]}.md"')
            lines.append(f'```')
            lines.append(f"")
            lines.append(f"---")
            lines.append(f"")

    # Summary section
    lines.append(f"## Action Items")
    lines.append(f"")
    if new_videos:
        lines.append(f"- [ ] Review {len(new_videos)} new transcript(s)")
        lines.append(f"- [ ] Run Claude Code analysis on each for Swing Shift applicability")
        lines.append(f"- [ ] Update Mission Control task board with any new ideas")
    else:
        lines.append(f"- No action needed this cycle")
    lines.append(f"")

    with open(filepath, "w") as f:
        f.write("\n".join(lines))

    return filepath


def scan_channel(channel_key, index, max_videos=3):
    """Scan a single channel for new videos."""
    channel = CHANNELS[channel_key]
    print(f"\n📡 Scanning {channel['name']} ({channel['handle']})...")

    video_ids = get_channel_video_ids(channel["handle"], max_results=max_videos)

    if not video_ids:
        print(f"  No videos found for {channel['handle']}")
        return []

    print(f"  Found {len(video_ids)} recent videos")

    new_videos = []
    for vid_id in video_ids:
        if vid_id in index["videos"]:
            print(f"  ⏭  {vid_id} — already processed")
            continue

        print(f"  📥 New video: {vid_id}")
        title = get_video_title(vid_id)
        print(f"     Title: {title}")

        print(f"     Pulling transcript...")
        transcript = get_transcript(vid_id)

        if transcript.startswith("[Transcript unavailable"):
            print(f"     ⚠️  {transcript}")
        else:
            word_count = len(transcript.split())
            print(f"     ✅ Got transcript ({word_count} words)")

        filepath = save_transcript(vid_id, channel_key, title, transcript)
        print(f"     💾 Saved to {filepath.name}")

        # Update index
        index["videos"][vid_id] = {
            "channel": channel_key,
            "title": title,
            "scanned_at": datetime.now().isoformat(),
            "transcript_file": str(filepath.relative_to(BOT_DIR)),
            "has_transcript": not transcript.startswith("[Transcript unavailable")
        }

        new_videos.append({
            "video_id": vid_id,
            "title": title,
            "channel_name": channel["name"],
            "channel_key": channel_key,
            "transcript_path": str(filepath.relative_to(BOT_DIR)),
            "transcript_text": transcript
        })

    return new_videos


def main():
    """Main entry point."""
    print("=" * 60)
    print("🎯 Swing Shift YouTube Intel Scanner")
    print(f"   {datetime.now().strftime('%Y-%m-%d %H:%M')} AZ")
    print("=" * 60)

    # Parse args
    scan_all = "--all" in sys.argv
    specific_channel = None
    for i, arg in enumerate(sys.argv):
        if arg == "--channel" and i + 1 < len(sys.argv):
            specific_channel = sys.argv[i + 1].lstrip("@")

    # Load index
    index = load_index()
    all_new_videos = []

    if specific_channel:
        # Scan one channel
        matching = [k for k in CHANNELS if k.lower() == specific_channel.lower()
                    or CHANNELS[k]["handle"].lower() == f"@{specific_channel.lower()}"]
        if matching:
            new = scan_channel(matching[0], index)
            all_new_videos.extend(new)
        else:
            print(f"Unknown channel: {specific_channel}")
            print(f"Known channels: {', '.join(CHANNELS.keys())}")
            sys.exit(1)
    else:
        # Scan priority channels (or all)
        channels_to_scan = CHANNELS.keys() if scan_all else [
            k for k, v in CHANNELS.items() if v["priority"] <= 2
        ]
        for channel_key in channels_to_scan:
            new = scan_channel(channel_key, index)
            all_new_videos.extend(new)

    # Update index metadata
    index["last_scan"] = datetime.now().isoformat()
    index["scan_count"] = index.get("scan_count", 0) + 1
    save_index(index)

    # Generate briefing
    briefing_path = generate_briefing(all_new_videos)

    # Summary
    print(f"\n{'=' * 60}")
    print(f"📊 Scan Complete")
    print(f"   New videos: {len(all_new_videos)}")
    print(f"   Total tracked: {len(index['videos'])}")
    print(f"   Briefing: {briefing_path.name}")
    print(f"   Scans to date: {index['scan_count']}")
    print(f"{'=' * 60}")

    # Output for Telegram summary
    if all_new_videos:
        print(f"\n📨 TELEGRAM SUMMARY:")
        print(f"YouTube Intel: {len(all_new_videos)} new video(s) found")
        for v in all_new_videos:
            print(f"• {v['channel_name']}: {v['title']}")
        print(f"Briefing saved. Run analysis for Swing Shift applicability.")

    return len(all_new_videos)


if __name__ == "__main__":
    main()