#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (C) 2026 TimeHexOn / unturf
#
# This file is part of fs-api, the backend for the 3D filesystem visualizer
# at https://timehexon.com/3d-tree-filesystem.html.
#
# fs-api is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details: https://www.gnu.org/licenses/agpl-3.0.html
#
# Source: https://timehexon.com/run-local/
"""
fs-api: filesystem & stats API for a 3D visualizer.

Defaults to loopback (127.0.0.1) for safe local demo. JIT scans live
filesystem on each request (cached). Optionally serves bundled static
files from its own directory so a single `python3 fs-api.py` boots a
self-contained demo with no internet required.

Routes:
  GET  /healthz        liveness probe (used by demo page detection)
  GET  /api/fs         filesystem tree JSON
  GET  /api/stats      memory, load, top processes
  GET  /api/file-info  single-path metadata + preview/hexdump
  GET  /api/all        fs + stats combined
  GET  /api/shadows    shadow oracle vitals (unturf-only, degrades gracefully)
  GET  /api/pokedex    Hermes LLM pokedex entry (unturf-only)
  POST /api/chat       Hermes chat (unturf-only)
  GET  /<path>         static file from --static-dir (if set)

Node format: {n: name, p: path, t: type, m: mode, s: size, c: children}
Types: d=directory, f=file, l=symlink
Mode: octal string (e.g. "755", "4755" for SUID).

Run locally:
  curl -fsSL https://timehexon.com/run-local/fs-api.py -o fs-api.py
  python3 fs-api.py
  xdg-open http://127.0.0.1:8001/
"""
import os, json, time, stat, subprocess, sys, base64
from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn

SOCKS_PROXY = 'socks5h://egress.unsandbox.com:1080'

# Strip proxy env at startup — direct HTTPS works on semitrusted network.
# SOCKS proxy causes timeouts for hermes & shadow probes.
for _pk in list(os.environ):
    if _pk.lower() in ('http_proxy', 'https_proxy', 'all_proxy', 'no_proxy'):
        del os.environ[_pk]

_fs_cache = {'data': None, 'ts': 0}
_stats_cache = {'data': None, 'ts': 0}
_shadow_cache = {'data': None, 'ts': 0}

SKIP = frozenset(['proc', 'sys', 'dev', 'lost+found', 'node_modules',
    '__pycache__'])

DEEP_PATHS = frozenset(['/', '/root', '/opt', '/etc', '/var', '/usr',
    '/usr/local', '/usr/local/bin', '/root/git', '/root/www',
    '/root/git/timehexon.com', '/root/git/timehexon.com/root',
    '/home', '/home/fox', '/home/fox/git'])

MAX_FILES_PER_DIR = 200  # cap file leaf nodes per directory


def file_meta(path):
    """Get type, mode, size for a path. Uses lstat (no follow symlinks)."""
    try:
        st = os.lstat(path)
        mode = format(stat.S_IMODE(st.st_mode), 'o').zfill(3)
        size = st.st_size
        if stat.S_ISLNK(st.st_mode):
            return 'l', mode, size, st
        elif stat.S_ISDIR(st.st_mode):
            return 'd', mode, size, st
        else:
            return 'f', mode, size, st
    except (OSError, PermissionError):
        return 'f', '000', 0, None


def scan(path, max_depth, depth=0):
    """O(N) tree walk with file metadata."""
    ft, mode, size, _ = file_meta(path)
    name = os.path.basename(path) or '/'
    node = {'n': name, 'p': path, 't': 'd', 'm': mode, 's': size, 'c': []}

    if depth >= max_depth:
        return node

    try:
        entries = sorted(os.listdir(path))
    except (PermissionError, OSError):
        return node

    files = []
    for entry in entries:
        if entry.startswith('.') and entry not in ('.claude', '.ssh', '.secrets', '.config',
                '.bashrc', '.profile', '.bash_history', '.local', '.cache',
                '.npm', '.cargo', '.rustup', '.gnupg', '.vim', '.vimrc', '.tmux.conf'):
            continue
        if entry.lower() in SKIP:
            continue
        full = os.path.join(path, entry)
        eft, emode, esize, est = file_meta(full)

        if eft == 'd' and not os.path.islink(full):
            d = 5 if path in DEEP_PATHS else 3
            node['c'].append(scan(full, d, depth + 1))
        elif eft in ('f', 'l'):
            files.append({'n': entry, 'p': full, 't': eft, 'm': emode, 's': esize})

    # Include files sorted by size (largest first = most visible tubers)
    files.sort(key=lambda f: f['s'], reverse=True)
    node['c'].extend(files[:MAX_FILES_PER_DIR])

    return node


def get_fs():
    now = time.time()
    if _fs_cache['data'] and now - _fs_cache['ts'] < 30:
        return _fs_cache['data']
    tree = scan('/', 5)
    _fs_cache['data'] = tree
    _fs_cache['ts'] = now
    return tree


SELF_NAME = 'ralph-claude'
_discovered_cache = {'data': None, 'ts': 0}


def discover_shadows():
    """Dynamically discover running shadow services via un CLI. Spiral topology."""
    now = time.time()
    if _discovered_cache['data'] is not None and now - _discovered_cache['ts'] < 120:
        return _discovered_cache['data']
    try:
        result = subprocess.run(['un', 'service', '--list'], capture_output=True, text=True, timeout=15)
        shadows = []
        for line in result.stdout.splitlines():
            parts = line.split()
            if len(parts) < 3 or not parts[0].startswith('unsb-service-'):
                continue
            name, status = parts[1], parts[2]
            if name == SELF_NAME or status != 'running':
                continue
            url = f'https://{name}.on.unsandbox.com/api/stats'
            shadows.append({'name': name, 'url': url})
        _discovered_cache['data'] = shadows
        _discovered_cache['ts'] = now
    except Exception as e:
        print(f"shadow discovery error: {e}", file=sys.stderr)
        if _discovered_cache['data'] is not None:
            return _discovered_cache['data']
        return []
    return shadows


def get_shadow_stats():
    """Probe discovered shadow oracles for vitals. Spiral topology — no hardcoded DNS."""
    now = time.time()
    if _shadow_cache['data'] is not None and now - _shadow_cache['ts'] < 60:
        return _shadow_cache['data']

    discovered = discover_shadows()
    shadows = []
    for sh in discovered:
        try:
            # Direct HTTPS — proxy env stripped at startup
            result = subprocess.run(
                ['curl', '-s', '--max-time', '8', sh['url']],
                capture_output=True, text=True, timeout=12
            )
            if result.returncode == 0 and result.stdout.strip():
                data = json.loads(result.stdout)
                data['name'] = sh['name']
                data['status'] = 'alive'
                shadows.append(data)
            else:
                # Log failure details for debugging
                print(f"shadow probe failed: {sh['name']} rc={result.returncode} stderr={result.stderr[:200]}", file=sys.stderr)
                # If previously alive, keep stale data instead of marking unreachable
                prev = _get_prev_shadow(sh['name'])
                if prev and prev.get('status') == 'alive':
                    prev['status'] = 'stale'
                    shadows.append(prev)
                else:
                    shadows.append({'name': sh['name'], 'status': 'unreachable'})
        except Exception as e:
            print(f"shadow probe exception: {sh['name']} {e}", file=sys.stderr)
            prev = _get_prev_shadow(sh['name'])
            if prev and prev.get('status') in ('alive', 'stale'):
                prev['status'] = 'stale'
                shadows.append(prev)
            else:
                shadows.append({'name': sh['name'], 'status': 'unreachable'})

    _shadow_cache['data'] = shadows
    _shadow_cache['ts'] = now
    return shadows


def _get_prev_shadow(name):
    """Get previous cache entry for a shadow by name."""
    if not _shadow_cache['data']:
        return None
    for s in _shadow_cache['data']:
        if s.get('name') == name:
            return dict(s)  # copy
    return None


def get_stats():
    now = time.time()
    if _stats_cache['data'] and now - _stats_cache['ts'] < 3:
        return _stats_cache['data']
    try:
        mem = {}
        with open('/proc/meminfo') as f:
            for line in f:
                parts = line.split()
                if len(parts) >= 2:
                    mem[parts[0].rstrip(':')] = int(parts[1])
        total = mem.get('MemTotal', 0)
        free = mem.get('MemAvailable', mem.get('MemFree', 0))
        swap_total = mem.get('SwapTotal', 0)
        swap_free = mem.get('SwapFree', 0)

        procs = []
        for pid in os.listdir('/proc'):
            if not pid.isdigit():
                continue
            try:
                with open(f'/proc/{pid}/stat') as f:
                    s = f.read().split()
                comm = s[1].strip('()')
                rss = int(s[23]) * os.sysconf('SC_PAGE_SIZE')
                if rss > 1024 * 1024:
                    procs.append({'pid': int(pid), 'name': comm, 'rss': rss})
            except (FileNotFoundError, IndexError, PermissionError, ProcessLookupError):
                continue
        procs.sort(key=lambda p: p['rss'], reverse=True)

        with open('/proc/loadavg') as f:
            load = f.read().split()[:3]

        stats = {
            'mem_total': total,
            'mem_free': free,
            'mem_used': total - free,
            'swap_total': swap_total,
            'swap_free': swap_free,
            'swap_used': swap_total - swap_free,
            'load': [float(x) for x in load],
            'top': procs[:7],
            'shadows': get_shadow_stats(),
            'ts': int(now)
        }
        _stats_cache['data'] = stats
        _stats_cache['ts'] = now
        return stats
    except Exception as e:
        return {'error': str(e)}


def get_file_info(path):
    """Run file(1) on a path and optionally return first 42 lines if world-readable."""
    # Sanitize — must be absolute, no traversal
    path = os.path.realpath(path)
    if not os.path.exists(path):
        return {'error': 'not found', 'path': path}

    result = {'path': path}

    # file(1) output
    try:
        out = subprocess.run(['file', '-b', path], capture_output=True, text=True, timeout=5)
        result['file_type'] = out.stdout.strip()
    except Exception as e:
        result['file_type'] = str(e)

    # stat info
    try:
        st = os.lstat(path)
        mode = stat.S_IMODE(st.st_mode)
        result['mode'] = format(mode, 'o').zfill(3)
        result['size'] = st.st_size
        result['uid'] = st.st_uid
        result['gid'] = st.st_gid
        result['mtime'] = int(st.st_mtime)
        result['is_dir'] = stat.S_ISDIR(st.st_mode)
        result['is_link'] = stat.S_ISLNK(st.st_mode)

        # Check world-readable (other read bit)
        world_readable = bool(mode & 0o004)
        result['world_readable'] = world_readable

        # If world-readable regular text file, return full contents
        # Skip binary files — file(1) output tells us
        ft = result.get('file_type', '').lower()
        is_text = any(k in ft for k in ('text', 'json', 'xml', 'html', 'script', 'source', 'empty'))
        is_binary = not is_text and any(k in ft for k in ('elf', 'executable', 'shared object', 'relocatable',
            'archive', 'image', 'audio', 'video', 'compressed', 'gzip', 'tar',
            'zip', 'binary', 'data', 'font', 'sqlite'))
        if world_readable and not stat.S_ISDIR(st.st_mode) and not stat.S_ISLNK(st.st_mode) and is_text:
            try:
                with open(path, 'r', errors='replace') as f:
                    content = f.read()
                    result['preview'] = content
                    result['preview_lines'] = content.count('\n') + (1 if content and not content.endswith('\n') else 0)
            except (PermissionError, IsADirectoryError, UnicodeDecodeError):
                pass
        elif world_readable and not stat.S_ISDIR(st.st_mode) and not stat.S_ISLNK(st.st_mode):
            # Binary file — full hexdump
            try:
                with open(path, 'rb') as f:
                    raw = f.read()
                lines = []
                for off in range(0, len(raw), 16):
                    chunk = raw[off:off+16]
                    hx = ' '.join(f'{b:02x}' for b in chunk)
                    asc = ''.join(chr(b) if 32 <= b < 127 else '.' for b in chunk)
                    lines.append(f'{off:08x}  {hx:<48s}  |{asc}|')
                result['hexdump'] = '\n'.join(lines)
                result['hexdump_bytes'] = len(raw)
                result['binary_b64'] = base64.b64encode(raw).decode('ascii')
            except (PermissionError, IsADirectoryError):
                pass
        if stat.S_ISLNK(st.st_mode):
            try:
                result['link_target'] = os.readlink(path)
            except OSError:
                pass

        # Directory listing (only if world-readable + executable)
        if stat.S_ISDIR(st.st_mode) and (mode & 0o005) == 0o005:
            try:
                entries = sorted(os.listdir(path))
                listing = []
                for e in entries[:200]:
                    full = os.path.join(path, e)
                    ft, emode, esize, _ = file_meta(full)
                    listing.append({'n': e, 't': ft, 'm': emode, 's': esize})
                result['listing'] = listing
                result['total_entries'] = len(entries)
            except (PermissionError, OSError):
                pass
    except OSError as e:
        result['stat_error'] = str(e)

    return result


HERMES_BASE = 'https://hermes.ai.unturf.com/v1'
HERMES_MODEL = 'adamo1139/Hermes-3-Llama-3.1-8B-FP8-Dynamic'
HERMES_KEY = 'permacomputer'
HERMES_TIMEOUT = 30
_pokedex_cache = {}  # path -> {text, ts}


def _hermes_post(messages, max_tokens=500, temperature=0.7, stream=False):
    """POST to Hermes /chat/completions. Stdlib only — no openai dep needed.
    Returns urllib response object; caller is responsible for closing."""
    import urllib.request
    body = json.dumps({
        'model': HERMES_MODEL,
        'messages': messages,
        'temperature': temperature,
        'max_tokens': max_tokens,
        'stream': stream,
    }).encode('utf-8')
    req = urllib.request.Request(
        f'{HERMES_BASE}/chat/completions',
        data=body,
        method='POST',
        headers={
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {HERMES_KEY}',
            'Accept': 'text/event-stream' if stream else 'application/json',
        },
    )
    return urllib.request.urlopen(req, timeout=HERMES_TIMEOUT)


def _hermes_chat(messages, max_tokens=500, temperature=0.7):
    """Non-streaming chat completion — returns response text or raises."""
    with _hermes_post(messages, max_tokens, temperature, stream=False) as r:
        data = json.loads(r.read().decode('utf-8'))
    return data['choices'][0]['message']['content'].strip()


def _hermes_chat_stream(messages, max_tokens=500, temperature=0.7):
    """Streaming chat completion — yields content delta strings."""
    with _hermes_post(messages, max_tokens, temperature, stream=True) as r:
        for raw in r:
            line = raw.decode('utf-8', errors='replace').strip()
            if not line.startswith('data:'):
                continue
            payload = line[5:].strip()
            if payload == '[DONE]':
                return
            try:
                obj = json.loads(payload)
                delta = obj['choices'][0]['delta'].get('content', '')
            except (json.JSONDecodeError, KeyError, IndexError):
                continue
            if delta:
                yield delta


MAX_CTX_CHARS = 40000  # ~10K tokens — leaves room for system prompt + response in 16K model


def _build_reverse_context(path):
    """Build ancestry context — list contents of each parent directory up to /."""
    parts = []
    current = os.path.dirname(path) if not os.path.isdir(path) else path
    seen = set()
    while current and current not in seen:
        seen.add(current)
        try:
            entries = sorted(os.listdir(current))[:30]
            dirname = current if current != '/' else '/'
            parts.append(f"{dirname}/: {', '.join(entries)}")
        except (PermissionError, OSError):
            pass
        parent = os.path.dirname(current)
        if parent == current:
            break
        current = parent
    parts.reverse()  # root first, target last
    return '\n'.join(parts)


def _read_file_content(path, budget):
    """Read as much file content as fits in budget chars."""
    try:
        st = os.lstat(path)
        if stat.S_ISDIR(st.st_mode) or stat.S_ISLNK(st.st_mode):
            return None
        if st.st_size == 0:
            return None
        # Skip huge binaries
        if st.st_size > 10 * 1024 * 1024:
            return None
        # Check if text via file(1)
        try:
            out = subprocess.run(['file', '-b', path], capture_output=True, text=True, timeout=3)
            ft_str = out.stdout.strip().lower()
            is_text = any(k in ft_str for k in ('text', 'json', 'xml', 'html', 'script', 'source', 'empty'))
            if not is_text:
                return None
        except Exception:
            return None
        with open(path, 'r', errors='replace') as f:
            content = f.read(budget)
        return content
    except (PermissionError, OSError, UnicodeDecodeError):
        return None


def get_pokedex(path, file_type=None, preview_head=None):
    """Ask Hermes to explain a file/directory — the Pokedex entry."""
    now = time.time()
    cached = _pokedex_cache.get(path)
    if cached and now - cached['ts'] < 3600:
        return cached['text']

    # Build context about this path
    name = os.path.basename(path) or '/'
    parent = os.path.dirname(path) or '/'
    ft, mode, size, st_obj = file_meta(path)
    mode_str = format(int(mode, 8), 'o') if mode != '000' else '000'

    ctx = f"Path: {path}\nName: {name}\nParent: {parent}\nType: {ft}\nMode: {mode_str}\nSize: {size}"
    if file_type:
        ctx += f"\nfile(1): {file_type}"

    # Reverse path context — ancestry breadcrumbs
    reverse = _build_reverse_context(path)
    if reverse:
        ctx += f"\n\n--- Directory ancestry (root to target) ---\n{reverse}"

    # If directory, list contents
    if ft == 'd':
        try:
            entries = sorted(os.listdir(path))[:50]
            ctx += f"\nContents ({len(entries)} shown): {', '.join(entries)}"
        except (PermissionError, OSError):
            pass

    # File contents — fill remaining budget
    remaining = MAX_CTX_CHARS - len(ctx) - 200  # reserve for labels
    if remaining > 500:
        # Prefer preview_head from frontend (already fetched), else read directly
        content = None
        if preview_head and len(preview_head) > 100:
            content = preview_head[:remaining]
        if not content or len(content) < remaining // 2:
            # Read directly from disk to get more content
            direct = _read_file_content(path, remaining)
            if direct and len(direct) > len(content or ''):
                content = direct
        if content:
            ctx += f"\n\n--- File contents ({len(content)} chars) ---\n{content}"

    prompt = (
        "You are a Pokedex for a unix filesystem. Given details about a file or directory, "
        "write a concise entry (2 paragraphs) explaining what this is, what it does, "
        "and why it exists on a linux system. Be specific and technical but accessible. "
        "First paragraph: what it is and its purpose. "
        "Second paragraph: how it works, what depends on it, or interesting facts. "
        "If it's a directory, describe what lives inside and how the contents relate. "
        "No markdown formatting. Keep it tight — two paragraphs only."
    )

    try:
        text = _hermes_chat([
            {'role': 'system', 'content': prompt},
            {'role': 'user', 'content': ctx}
        ])
        _pokedex_cache[path] = {'text': text, 'ts': now}
        return text
    except Exception as e:
        print(f"pokedex error: {e}", file=sys.stderr)
        return None


def chat_with_context(path, messages):
    """Chat with Hermes about a specific file/directory, preserving conversation."""
    name = os.path.basename(path) or '/'
    ft, mode, size, st_obj = file_meta(path)

    ctx = f"Path: {path}\nName: {name}\nType: {ft}\nMode: {mode}\nSize: {size}"

    # Reverse path context
    reverse = _build_reverse_context(path)
    if reverse:
        ctx += f"\n\n--- Directory ancestry ---\n{reverse}"

    # Directory contents
    if ft == 'd':
        try:
            entries = sorted(os.listdir(path))[:50]
            ctx += f"\nContents ({len(entries)} shown): {', '.join(entries)}"
        except (PermissionError, OSError):
            pass

    # File contents
    remaining = MAX_CTX_CHARS - len(ctx) - 500
    if remaining > 500:
        content = _read_file_content(path, remaining)
        if content:
            ctx += f"\n\n--- File contents ({len(content)} chars) ---\n{content}"

    # Existing pokedex entry for richer context
    cached = _pokedex_cache.get(path)
    if cached:
        ctx += f"\n\n--- Pokedex entry ---\n{cached['text']}"

    system = (
        "You are a knowledgeable unix filesystem guide. You are chatting with a user "
        "who is exploring a filesystem visualization. They have selected a specific "
        "file or directory and want to learn more about it. Answer their questions "
        "concisely and technically. The context below describes what they are looking at.\n\n"
        f"{ctx}"
    )

    # Build message list: system + user conversation history (max 10 messages)
    api_messages = [{'role': 'system', 'content': system}]
    for msg in messages[-10:]:
        role = msg.get('role', 'user')
        if role not in ('user', 'assistant'):
            role = 'user'
        api_messages.append({'role': role, 'content': msg.get('content', '')})

    try:
        return _hermes_chat(api_messages)
    except Exception as e:
        print(f"chat error: {e}", file=sys.stderr)
        return None


def _stream_pokedex(path, file_type=None, preview_head=None):
    """Stream Hermes pokedex entry as chunks. Yields (chunk_text, is_done)."""
    now = time.time()
    cached = _pokedex_cache.get(path)
    if cached and now - cached['ts'] < 3600:
        yield cached['text'], True
        return

    name = os.path.basename(path) or '/'
    parent = os.path.dirname(path) or '/'
    ft, mode, size, st_obj = file_meta(path)
    mode_str = format(int(mode, 8), 'o') if mode != '000' else '000'
    ctx = f"Path: {path}\nName: {name}\nParent: {parent}\nType: {ft}\nMode: {mode_str}\nSize: {size}"
    if file_type:
        ctx += f"\nfile(1): {file_type}"
    reverse = _build_reverse_context(path)
    if reverse:
        ctx += f"\n\n--- Directory ancestry (root to target) ---\n{reverse}"
    if ft == 'd':
        try:
            entries = sorted(os.listdir(path))[:50]
            ctx += f"\nContents ({len(entries)} shown): {', '.join(entries)}"
        except (PermissionError, OSError):
            pass
    remaining = MAX_CTX_CHARS - len(ctx) - 200
    if remaining > 500:
        content = None
        if preview_head and len(preview_head) > 100:
            content = preview_head[:remaining]
        if not content or len(content) < remaining // 2:
            direct = _read_file_content(path, remaining)
            if direct and len(direct) > len(content or ''):
                content = direct
        if content:
            ctx += f"\n\n--- File contents ({len(content)} chars) ---\n{content}"

    prompt = (
        "You are a Pokedex for a unix filesystem. Given details about a file or directory, "
        "write a concise entry (2 paragraphs) explaining what this is, what it does, "
        "and why it exists on a linux system. Be specific and technical but accessible. "
        "First paragraph: what it is and its purpose. "
        "Second paragraph: how it works, what depends on it, or interesting facts. "
        "If it's a directory, describe what lives inside and how the contents relate. "
        "No markdown formatting. Keep it tight — two paragraphs only."
    )

    try:
        full_text = ''
        for delta in _hermes_chat_stream([
            {'role': 'system', 'content': prompt},
            {'role': 'user', 'content': ctx}
        ]):
            full_text += delta
            yield delta, False
        if full_text:
            _pokedex_cache[path] = {'text': full_text, 'ts': now}
        yield '', True
    except Exception as e:
        print(f"pokedex stream error: {e}", file=sys.stderr)
        yield None, True


def _stream_chat(path, messages):
    """Stream Hermes chat reply as chunks. Yields (chunk_text, is_done)."""
    name = os.path.basename(path) or '/'
    ft, mode, size, st_obj = file_meta(path)
    ctx = f"Path: {path}\nName: {name}\nType: {ft}\nMode: {mode}\nSize: {size}"
    reverse = _build_reverse_context(path)
    if reverse:
        ctx += f"\n\n--- Directory ancestry ---\n{reverse}"
    if ft == 'd':
        try:
            entries = sorted(os.listdir(path))[:50]
            ctx += f"\nContents ({len(entries)} shown): {', '.join(entries)}"
        except (PermissionError, OSError):
            pass
    remaining = MAX_CTX_CHARS - len(ctx) - 500
    if remaining > 500:
        content = _read_file_content(path, remaining)
        if content:
            ctx += f"\n\n--- File contents ({len(content)} chars) ---\n{content}"
    cached = _pokedex_cache.get(path)
    if cached:
        ctx += f"\n\n--- Pokedex entry ---\n{cached['text']}"

    system = (
        "You are a knowledgeable unix filesystem guide. You are chatting with a user "
        "who is exploring a filesystem visualization. They have selected a specific "
        "file or directory and want to learn more about it. Answer their questions "
        "concisely and technically. The context below describes what they are looking at.\n\n"
        f"{ctx}"
    )

    api_messages = [{'role': 'system', 'content': system}]
    for msg in messages[-10:]:
        role = msg.get('role', 'user')
        if role not in ('user', 'assistant'):
            role = 'user'
        api_messages.append({'role': role, 'content': msg.get('content', '')})

    try:
        for delta in _hermes_chat_stream(api_messages):
            yield delta, False
        yield '', True
    except Exception as e:
        print(f"chat stream error: {e}", file=sys.stderr)
        yield None, True


def _send_sse(handler, generator):
    """Send a generator of (text, done) tuples as SSE events."""
    handler.send_response(200)
    handler.send_header('Content-Type', 'text/event-stream')
    handler.send_header('Cache-Control', 'no-cache, no-transform')
    handler.send_header('Connection', 'keep-alive')
    handler.send_header('X-Accel-Buffering', 'no')
    handler.send_header('X-Content-Type-Options', 'nosniff')
    handler._send_cors()
    handler.end_headers()
    try:
        for text, done in generator:
            if text is None:
                # Error
                handler.wfile.write(b'event: error\ndata: hermes unavailable\n\n')
                handler.wfile.flush()
                return
            if text:
                escaped = json.dumps(text)
                handler.wfile.write(f'data: {escaped}\n\n'.encode())
                handler.wfile.flush()
            if done:
                handler.wfile.write(b'event: done\ndata: end\n\n')
                handler.wfile.flush()
                return
    except (BrokenPipeError, ConnectionResetError):
        pass


VERSION = '0.2.0'

DEFAULT_ALLOWED_ORIGINS = (
    'https://www.timehexon.com',
    'https://timehexon.com',
)

STATIC_MIME = {
    '.html': 'text/html; charset=utf-8',
    '.htm':  'text/html; charset=utf-8',
    '.css':  'text/css; charset=utf-8',
    '.js':   'application/javascript; charset=utf-8',
    '.mjs':  'application/javascript; charset=utf-8',
    '.json': 'application/json',
    '.svg':  'image/svg+xml',
    '.png':  'image/png',
    '.jpg':  'image/jpeg',
    '.jpeg': 'image/jpeg',
    '.gif':  'image/gif',
    '.ico':  'image/x-icon',
    '.webp': 'image/webp',
    '.txt':  'text/plain; charset=utf-8',
    '.md':   'text/plain; charset=utf-8',
    '.wasm': 'application/wasm',
    '.map':  'application/json',
    '.woff': 'font/woff',
    '.woff2':'font/woff2',
    '.ttf':  'font/ttf',
    '.xml':  'application/xml',
}


class Handler(BaseHTTPRequestHandler):
    # Set by main() before server starts
    STATIC_DIR = None
    CORS_ENABLED = False
    ALLOWED_ORIGINS = ()  # tuple of exact-match origins

    def _cors_origin(self):
        """Return the Origin header if it matches an allowed origin, else None."""
        if not self.CORS_ENABLED:
            return None
        origin = self.headers.get('Origin')
        if not origin:
            return None
        if origin in self.ALLOWED_ORIGINS:
            return origin
        return None

    def _send_cors(self, public=False):
        """Emit CORS headers. `public=True` always sends `*` (used for /healthz)."""
        if public:
            self.send_header('Access-Control-Allow-Origin', '*')
            self.send_header('Vary', 'Origin')
            return
        origin = self._cors_origin()
        if origin:
            self.send_header('Access-Control-Allow-Origin', origin)
            self.send_header('Vary', 'Origin')

    def do_OPTIONS(self):
        # CORS preflight. Permissive on /healthz, allowlist elsewhere.
        from urllib.parse import urlparse
        parsed = urlparse(self.path)
        self.send_response(204)
        if parsed.path == '/healthz':
            self._send_cors(public=True)
        else:
            self._send_cors()
        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
        self.send_header('Access-Control-Max-Age', '600')
        self.send_header('Content-Length', '0')
        self.end_headers()

    def _serve_static(self, urlpath):
        """Serve a file from STATIC_DIR. Returns True if handled, False if not found.
        Path traversal protected via realpath containment check."""
        if not self.STATIC_DIR:
            return False
        # Strip query string already done by caller; normalize to relative
        rel = urlpath.lstrip('/')
        if rel == '' or urlpath.endswith('/'):
            rel = os.path.join(rel, 'index.html') if rel else 'index.html'
        # Reject any traversal-like segment outright before joining
        if '\x00' in rel or any(seg == '..' for seg in rel.split('/')):
            return False
        candidate = os.path.realpath(os.path.join(self.STATIC_DIR, rel))
        root = os.path.realpath(self.STATIC_DIR)
        # Containment check — candidate must live under root
        if not (candidate == root or candidate.startswith(root + os.sep)):
            return False
        if not os.path.isfile(candidate):
            return False
        ext = os.path.splitext(candidate)[1].lower()
        mime = STATIC_MIME.get(ext, 'application/octet-stream')
        try:
            with open(candidate, 'rb') as f:
                body = f.read()
        except OSError:
            return False
        self.send_response(200)
        self.send_header('Content-Type', mime)
        self.send_header('Content-Length', len(body))
        self.send_header('Cache-Control', 'no-cache')
        self._send_cors()
        self.end_headers()
        self.wfile.write(body)
        return True

    def do_POST(self):
        from urllib.parse import urlparse
        parsed = urlparse(self.path)

        if parsed.path == '/api/chat':
            length = int(self.headers.get('Content-Length', 0))
            body = self.rfile.read(length)
            try:
                req = json.loads(body)
            except (json.JSONDecodeError, ValueError):
                self.send_response(400)
                self.end_headers()
                return

            fpath = req.get('path', '')
            messages = req.get('messages', [])
            if not fpath or not messages:
                self.send_response(400)
                self.end_headers()
                return

            fpath = os.path.realpath(fpath)
            reply = chat_with_context(fpath, messages)
            data = {'reply': reply} if reply else {'error': 'hermes unavailable'}

            resp = json.dumps(data).encode()
            self.send_response(200)
            self.send_header('Content-Type', 'application/json')
            self.send_header('Cache-Control', 'no-cache')
            self.send_header('Content-Length', len(resp))
            self._send_cors()
            self.end_headers()
            self.wfile.write(resp)

        elif parsed.path == '/api/chat-stream':
            length = int(self.headers.get('Content-Length', 0))
            body = self.rfile.read(length)
            try:
                req = json.loads(body)
            except (json.JSONDecodeError, ValueError):
                self.send_response(400)
                self.end_headers()
                return

            fpath = req.get('path', '')
            messages = req.get('messages', [])
            if not fpath or not messages:
                self.send_response(400)
                self.end_headers()
                return

            fpath = os.path.realpath(fpath)
            _send_sse(self, _stream_chat(fpath, messages))

        else:
            self.send_response(404)
            self.end_headers()

    def do_GET(self):
        from urllib.parse import urlparse, parse_qs
        parsed = urlparse(self.path)

        if parsed.path == '/healthz':
            data = {'ok': True, 'service': 'fs-api', 'version': VERSION}
        elif parsed.path == '/api/fs':
            data = get_fs()
        elif parsed.path == '/api/stats':
            data = get_stats()
        elif parsed.path == '/api/shadows':
            data = get_shadow_stats()
        elif parsed.path == '/api/all':
            data = {'fs': get_fs(), 'stats': get_stats()}
        elif parsed.path == '/api/file-info':
            qs = parse_qs(parsed.query)
            fpath = qs.get('path', [''])[0]
            if not fpath:
                self.send_response(400)
                self.end_headers()
                return
            data = get_file_info(fpath)
        elif parsed.path == '/api/pokedex':
            qs = parse_qs(parsed.query)
            fpath = qs.get('path', [''])[0]
            if not fpath:
                self.send_response(400)
                self.end_headers()
                return
            fpath = os.path.realpath(fpath)
            file_type = qs.get('file_type', [None])[0]
            preview = qs.get('preview', [None])[0]
            text = get_pokedex(fpath, file_type, preview)
            data = {'path': fpath, 'entry': text} if text else {'path': fpath, 'error': 'hermes unavailable'}
        elif parsed.path == '/api/pokedex-stream':
            qs = parse_qs(parsed.query)
            fpath = qs.get('path', [''])[0]
            if not fpath:
                self.send_response(400)
                self.end_headers()
                return
            fpath = os.path.realpath(fpath)
            file_type = qs.get('file_type', [None])[0]
            preview = qs.get('preview', [None])[0]
            _send_sse(self, _stream_pokedex(fpath, file_type, preview))
            return
        else:
            # Static file fallback (only if --static-dir was configured)
            if self.STATIC_DIR and self._serve_static(parsed.path):
                return
            self.send_response(404)
            self.end_headers()
            return

        body = json.dumps(data).encode()
        self.send_response(200)
        self.send_header('Content-Type', 'application/json')
        self.send_header('Cache-Control', 'no-cache')
        self.send_header('Content-Length', len(body))
        # /healthz is publicly probeable for local-server detection.
        # Other endpoints get CORS only when --cors is on AND Origin is allowlisted.
        # In prod (Caddy-fronted), CORS is added by the edge — fs-api stays silent there.
        if parsed.path == '/healthz':
            self._send_cors(public=True)
        else:
            self._send_cors()
        self.end_headers()
        self.wfile.write(body)

    def log_message(self, format, *args):
        pass


class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
    daemon_threads = True


def main(argv=None):
    import argparse
    ap = argparse.ArgumentParser(
        prog='fs-api',
        description='3D filesystem visualizer backend (loopback by default)',
    )
    ap.add_argument('--bind', default='127.0.0.1',
                    help='host/IP to bind (default: 127.0.0.1)')
    ap.add_argument('--port', type=int, default=8001,
                    help='port to bind (default: 8001)')
    ap.add_argument('--public', action='store_true',
                    help='allow non-loopback bind (DANGEROUS: exposes filesystem read API to the network)')
    ap.add_argument('--static-dir', default=None, metavar='PATH',
                    help='serve static files from this directory (default: directory of this script)')
    ap.add_argument('--no-static', action='store_true',
                    help='disable static file serving entirely')
    ap.add_argument('--cors', action='store_true',
                    help='emit CORS headers for allowlisted origins (needed for hybrid-mode '
                         'auto-detection from https://www.timehexon.com)')
    ap.add_argument('--allow-origin', action='append', metavar='ORIGIN', default=[],
                    help='additional Origin to allowlist (repeatable). '
                         f'defaults: {", ".join(DEFAULT_ALLOWED_ORIGINS)}')
    ap.add_argument('--version', action='version', version=f'fs-api {VERSION}')
    args = ap.parse_args(argv)

    is_loopback = args.bind in ('127.0.0.1', '::1', 'localhost')
    if not is_loopback and not args.public:
        sys.stderr.write(
            f"refusing to bind {args.bind!r} without --public\n"
            f"  by default fs-api listens on 127.0.0.1 (loopback) only.\n"
            f"  re-run with --public to expose your filesystem read API to the network.\n"
        )
        sys.exit(2)
    if args.public:
        sys.stderr.write(
            "WARNING: --public exposes your filesystem read API to the network.\n"
            "  Anyone who can reach this port can read directory listings, file metadata,\n"
            "  and the contents of any file readable by this process.\n"
        )

    if args.no_static:
        static_dir = None
    else:
        static_dir = args.static_dir or os.path.dirname(os.path.abspath(sys.argv[0]))
        if not os.path.isdir(static_dir):
            static_dir = None
    Handler.STATIC_DIR = static_dir
    Handler.CORS_ENABLED = args.cors
    Handler.ALLOWED_ORIGINS = tuple(set(DEFAULT_ALLOWED_ORIGINS) | set(args.allow_origin))

    server = ThreadedHTTPServer((args.bind, args.port), Handler)
    sys.stderr.write(f'fs-api {VERSION} listening on http://{args.bind}:{args.port}/\n')
    if static_dir:
        sys.stderr.write(f'  static files: {static_dir}\n')
    if args.cors:
        sys.stderr.write(f'  cors: on  (allow-origin: {", ".join(Handler.ALLOWED_ORIGINS)})\n')
    sys.stderr.write('  /healthz: always public (Access-Control-Allow-Origin: *)\n')
    try:
        server.serve_forever()
    except KeyboardInterrupt:
        sys.stderr.write('\nfs-api: shutting down\n')


if __name__ == '__main__':
    main()
