"""Modal cloud execution environment using the native Modal SDK directly.

Uses ``Sandbox.create()`` + ``Sandbox.exec()`` instead of the older runtime
wrapper, while preserving Hermes' persistent snapshot behavior across sessions.
"""

import asyncio
import json
import logging
import re
import shlex
import threading
import time as _time
from pathlib import Path
from typing import Any, Dict, Optional

from hermes_constants import get_hermes_home
from tools.environments.base import BaseEnvironment

logger = logging.getLogger(__name__)

_SYNC_INTERVAL_SECONDS = 5.0
_SNAPSHOT_STORE = get_hermes_home() / "modal_snapshots.json"
_DIRECT_SNAPSHOT_NAMESPACE = "direct"

# Matches official Python Docker Hub tags: python:3.12, python:3.12-slim,
# python:3.12.3-bookworm, etc.  Rejects alpine (not Debian-based) and
# ambiguous tags like python:3 or python:latest.
_PYTHON_IMAGE_RE = re.compile(
    r"^python:(\d+\.\d+)(?:\.\d+)?(?:-(slim|bookworm|bullseye|slim-bookworm|slim-bullseye))?$"
)


def _load_snapshots() -> Dict[str, str]:
    """Load snapshot ID mapping from disk."""
    if _SNAPSHOT_STORE.exists():
        try:
            return json.loads(_SNAPSHOT_STORE.read_text())
        except Exception:
            pass
    return {}


def _save_snapshots(data: Dict[str, str]) -> None:
    """Persist snapshot ID mapping to disk."""
    _SNAPSHOT_STORE.parent.mkdir(parents=True, exist_ok=True)
    _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))


def _direct_snapshot_key(task_id: str) -> str:
    return f"{_DIRECT_SNAPSHOT_NAMESPACE}:{task_id}"


def _get_snapshot_restore_candidate(task_id: str) -> tuple[str | None, bool]:
    """Return a snapshot id and whether it came from the legacy key format."""
    snapshots = _load_snapshots()

    namespaced_key = _direct_snapshot_key(task_id)
    snapshot_id = snapshots.get(namespaced_key)
    if isinstance(snapshot_id, str) and snapshot_id:
        return snapshot_id, False

    legacy_snapshot_id = snapshots.get(task_id)
    if isinstance(legacy_snapshot_id, str) and legacy_snapshot_id:
        return legacy_snapshot_id, True

    return None, False


def _store_direct_snapshot(task_id: str, snapshot_id: str) -> None:
    """Persist the direct Modal snapshot id under the direct namespace."""
    snapshots = _load_snapshots()
    snapshots[_direct_snapshot_key(task_id)] = snapshot_id
    snapshots.pop(task_id, None)
    _save_snapshots(snapshots)


def _delete_direct_snapshot(task_id: str, snapshot_id: str | None = None) -> None:
    """Remove direct Modal snapshot entries for a task, including legacy keys."""
    snapshots = _load_snapshots()
    updated = False

    for key in (_direct_snapshot_key(task_id), task_id):
        value = snapshots.get(key)
        if value is None:
            continue
        if snapshot_id is None or value == snapshot_id:
            snapshots.pop(key, None)
            updated = True

    if updated:
        _save_snapshots(snapshots)


def _resolve_modal_image(image_spec: Any) -> Any:
    """Convert registry references or snapshot ids into Modal image objects.

    Official ``python:X.Y*`` images are mapped to
    ``modal.Image.debian_slim(python_version=X.Y)`` because the stock
    Docker Hub images lack build tools (gcc/g++) required by Modal's
    internal pip bootstrap.
    """
    import modal as _modal

    if not isinstance(image_spec, str):
        return image_spec

    if image_spec.startswith("im-"):
        return _modal.Image.from_id(image_spec)

    m = _PYTHON_IMAGE_RE.match(image_spec)
    if m:
        return _modal.Image.debian_slim(python_version=m.group(1))

    return _modal.Image.from_registry(
        image_spec,
        setup_dockerfile_commands=[
            "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
            "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
        ],
    )


class _AsyncWorker:
    """Background thread with its own event loop for async-safe Modal calls."""

    def __init__(self):
        self._loop: Optional[asyncio.AbstractEventLoop] = None
        self._thread: Optional[threading.Thread] = None
        self._started = threading.Event()

    def start(self):
        self._thread = threading.Thread(target=self._run_loop, daemon=True)
        self._thread.start()
        self._started.wait(timeout=30)

    def _run_loop(self):
        self._loop = asyncio.new_event_loop()
        asyncio.set_event_loop(self._loop)
        self._started.set()
        self._loop.run_forever()

    def run_coroutine(self, coro, timeout=600):
        if self._loop is None or self._loop.is_closed():
            raise RuntimeError("AsyncWorker loop is not running")
        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
        return future.result(timeout=timeout)

    def stop(self):
        if self._loop and self._loop.is_running():
            self._loop.call_soon_threadsafe(self._loop.stop)
        if self._thread:
            self._thread.join(timeout=10)


class ModalEnvironment(BaseEnvironment):
    """Modal cloud execution via native Modal sandboxes.

    Uses the unified spawn-per-call model: _run_bash() returns a
    _ThreadedProcessHandle that wraps Modal's async SDK in a thread + OS pipe,
    satisfying the ProcessHandle protocol for BaseEnvironment._wait_for_process().
    """

    _snapshot_timeout = 60  # Modal sandbox cold start can be slow

    def __init__(
        self,
        image: str,
        cwd: str = "/root",
        timeout: int = 60,
        modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
        persistent_filesystem: bool = True,
        task_id: str = "default",
    ):
        super().__init__(cwd=cwd, timeout=timeout)

        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._base_image = image
        self._sandbox = None
        self._app = None
        self._worker = _AsyncWorker()
        self._synced_files: Dict[str, tuple] = {}
        self._last_sync_time: float = 0.0

        sandbox_kwargs = dict(modal_sandbox_kwargs or {})

        restored_snapshot_id = None
        restored_from_legacy_key = False
        if self._persistent:
            restored_snapshot_id, restored_from_legacy_key = _get_snapshot_restore_candidate(
                self._task_id
            )
            if restored_snapshot_id:
                logger.info("Modal: restoring from snapshot %s", restored_snapshot_id[:20])

        import modal as _modal

        cred_mounts = []
        try:
            from tools.credential_files import get_credential_file_mounts, iter_skills_files

            for mount_entry in get_credential_file_mounts():
                cred_mounts.append(
                    _modal.Mount.from_local_file(
                        mount_entry["host_path"],
                        remote_path=mount_entry["container_path"],
                    )
                )
                logger.info(
                    "Modal: mounting credential %s -> %s",
                    mount_entry["host_path"],
                    mount_entry["container_path"],
                )

            # Mount individual skill files (symlinks filtered out).
            skills_files = iter_skills_files()
            for entry in skills_files:
                cred_mounts.append(
                    _modal.Mount.from_local_file(
                        entry["host_path"],
                        remote_path=entry["container_path"],
                    )
                )
            if skills_files:
                logger.info("Modal: mounting %d skill files", len(skills_files))
        except Exception as e:
            logger.debug("Modal: could not load credential file mounts: %s", e)

        self._worker.start()

        async def _create_sandbox(image_spec: Any):
            app = await _modal.App.lookup.aio("hermes-agent", create_if_missing=True)
            create_kwargs = dict(sandbox_kwargs)
            if cred_mounts:
                existing_mounts = list(create_kwargs.pop("mounts", []))
                existing_mounts.extend(cred_mounts)
                create_kwargs["mounts"] = existing_mounts
            sandbox = await _modal.Sandbox.create.aio(
                "sleep",
                "infinity",
                image=image_spec,
                app=app,
                timeout=int(create_kwargs.pop("timeout", 3600)),
                **create_kwargs,
            )
            return app, sandbox

        try:
            target_image_spec = restored_snapshot_id or image
            try:
                # _resolve_modal_image routes python:X.Y images to
                # debian_slim (avoiding build-tool failures) and applies
                # setup_dockerfile_commands with ensurepip for other
                # registry images.  Snapshot ids restore via from_id().
                effective_image = _resolve_modal_image(target_image_spec)
                self._app, self._sandbox = self._worker.run_coroutine(
                    _create_sandbox(effective_image),
                    timeout=300,
                )
            except Exception as exc:
                if not restored_snapshot_id:
                    raise

                logger.warning(
                    "Modal: failed to restore snapshot %s, retrying with base image: %s",
                    restored_snapshot_id[:20],
                    exc,
                )
                _delete_direct_snapshot(self._task_id, restored_snapshot_id)
                base_image = _resolve_modal_image(image)
                self._app, self._sandbox = self._worker.run_coroutine(
                    _create_sandbox(base_image),
                    timeout=300,
                )
            else:
                if restored_snapshot_id and restored_from_legacy_key:
                    _store_direct_snapshot(self._task_id, restored_snapshot_id)
                    logger.info(
                        "Modal: migrated legacy snapshot entry for task %s",
                        self._task_id,
                    )
        except Exception:
            self._worker.stop()
            raise

        logger.info("Modal: sandbox created (task=%s)", self._task_id)

        # Capture login-shell environment into a snapshot for the unified model
        self.init_session()

    # ------------------------------------------------------------------
    # File sync
    # ------------------------------------------------------------------

    def _push_file_to_sandbox(self, host_path: str, container_path: str) -> bool:
        """Push a single file into the sandbox if changed. Returns True if synced."""
        hp = Path(host_path)
        try:
            stat = hp.stat()
            file_key = (stat.st_mtime, stat.st_size)
        except OSError:
            return False

        if self._synced_files.get(container_path) == file_key:
            return False

        try:
            content = hp.read_bytes()
        except Exception:
            return False

        import base64
        b64 = base64.b64encode(content).decode("ascii")
        container_dir = str(Path(container_path).parent)
        cmd = (
            f"mkdir -p {shlex.quote(container_dir)} && "
            f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
        )

        async def _write():
            proc = await self._sandbox.exec.aio("bash", "-c", cmd)
            await proc.wait.aio()

        self._worker.run_coroutine(_write(), timeout=15)
        self._synced_files[container_path] = file_key
        return True

    def _sync_files(self) -> None:
        """Push credential files and skill files into the running sandbox.

        Runs before each command. Uses mtime+size caching so only changed
        files are pushed (~13us overhead in the no-op case).
        """
        try:
            from tools.credential_files import get_credential_file_mounts, iter_skills_files

            for entry in get_credential_file_mounts():
                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
                    logger.debug("Modal: synced credential %s", entry["container_path"])

            for entry in iter_skills_files():
                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
                    logger.debug("Modal: synced skill file %s", entry["container_path"])
        except Exception as e:
            logger.debug("Modal: file sync failed: %s", e)

    # ------------------------------------------------------------------
    # Unified execution hooks
    # ------------------------------------------------------------------

    def _before_execute(self) -> None:
        now = _time.monotonic()
        if now - self._last_sync_time >= _SYNC_INTERVAL_SECONDS:
            self._sync_files()
            self._last_sync_time = now

    def _run_bash(self, cmd_string: str, *, timeout: int | None = None,
                  stdin_data: str | None = None):
        """Spawn ``bash -c <cmd_string>`` inside the Modal sandbox."""
        effective_timeout = timeout or self.timeout
        if stdin_data is not None:
            cmd_string = self._embed_stdin_heredoc(cmd_string, stdin_data)
        return self._modal_exec(cmd_string, effective_timeout, login=False)

    def _run_bash_login(self, cmd_string: str, *,
                        timeout: int | None = None):
        """Spawn ``bash -l -c <cmd_string>`` for snapshot creation."""
        effective_timeout = timeout or self._snapshot_timeout
        return self._modal_exec(cmd_string, effective_timeout, login=True)

    def _modal_exec(self, cmd_string: str, timeout: int, login: bool):
        """Create a _ThreadedProcessHandle wrapping Modal's async exec."""
        from tools.environments.base import _ThreadedProcessHandle
        worker = self._worker
        sandbox = self._sandbox

        def exec_fn():
            async def _exec():
                args = ["bash", "-l", "-c", cmd_string] if login else ["bash", "-c", cmd_string]
                process = await sandbox.exec.aio(*args, timeout=timeout)
                stdout = await process.stdout.read.aio()
                stderr = await process.stderr.read.aio()
                exit_code = await process.wait.aio()
                return stdout, stderr, exit_code

            stdout, stderr, exit_code = worker.run_coroutine(
                _exec(), timeout=timeout + 30,
            )
            output = stdout if isinstance(stdout, str) else stdout.decode("utf-8", errors="replace")
            if stderr:
                output += stderr if isinstance(stderr, str) else stderr.decode("utf-8", errors="replace")
            return output, exit_code

        return _ThreadedProcessHandle(exec_fn)

    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------

    def cleanup(self):
        """Snapshot the filesystem (if persistent) then stop the sandbox."""
        if self._sandbox is None:
            return

        if self._persistent:
            try:
                async def _snapshot():
                    img = await self._sandbox.snapshot_filesystem.aio()
                    return img.object_id

                try:
                    snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
                except Exception:
                    snapshot_id = None

                if snapshot_id:
                    _store_direct_snapshot(self._task_id, snapshot_id)
                    logger.info(
                        "Modal: saved filesystem snapshot %s for task %s",
                        snapshot_id[:20],
                        self._task_id,
                    )
            except Exception as e:
                logger.warning("Modal: filesystem snapshot failed: %s", e)

        try:
            self._worker.run_coroutine(
                self._sandbox.terminate.aio(),
                timeout=15,
            )
        except Exception:
            pass
        finally:
            self._worker.stop()
            self._sandbox = None
            self._app = None