"""
build_seveso.py
===============
Fetches all Seveso-classified ICPE sites from the GEORISQUES API and stores
them in the `seveso_sites` table of the MySQL database.

Only sites with "Seveso seuil haut" or "Seveso seuil bas" classification and
valid GPS coordinates are kept.  Non-Seveso ICPE installations are discarded.

Usage:
    python build_seveso.py                  # all of France
    python build_seveso.py --dept 59        # Nord only
    python build_seveso.py --dept 59 31     # Nord + Haute-Garonne

Note: running without --dept fetches every department (~N pages). The table
is always rebuilt from scratch so the script is idempotent.
"""

from __future__ import annotations

import argparse
import asyncio
import logging

import httpx

from domain.core.mysql_db import get_connection, reset_table

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s — %(message)s",
    datefmt="%H:%M:%S",
)
logger = logging.getLogger("build_seveso")

_GEORISQUES_ICPE_URL = "https://www.georisques.gouv.fr/api/v1/installations_classees"
_NON_SEVESO = frozenset(("non seveso", "ns", "non-seveso"))

_DDL = """
CREATE TABLE seveso_sites (
    id               INT NOT NULL AUTO_INCREMENT,
    nom              TEXT         NOT NULL,
    commune          TEXT         NOT NULL,
    code_postal      VARCHAR(10)  NOT NULL,
    code_departement VARCHAR(5)   NOT NULL,
    adresse          TEXT         NOT NULL,
    seveso           TEXT         NOT NULL,
    statut           TEXT         NOT NULL,
    latitude         DOUBLE       NOT NULL,
    longitude        DOUBLE       NOT NULL,
    PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
"""

_IDX_DEPT = "CREATE INDEX idx_seveso_dept ON seveso_sites (code_departement)"


# ---------------------------------------------------------------------------
# Fetch
# ---------------------------------------------------------------------------

async def _fetch_icpe(client: httpx.AsyncClient, departement: str = "") -> list[dict]:
    """Fetches all pages for one department (or all of France if empty)."""
    all_items: list[dict] = []
    page = 1
    base_params: dict = {"page_size": 1000}
    if departement:
        base_params["departement"] = departement

    while True:
        logger.info("GEORISQUES ICPE — page=%d | dept=%s", page, departement or "France")
        resp = await client.get(
            _GEORISQUES_ICPE_URL,
            params={"page": page, **base_params},
            headers={"User-Agent": "agent-immobilier/1.0"},
        )
        resp.raise_for_status()
        payload     = resp.json()
        items       = payload.get("data", [])
        total_pages = payload.get("total_pages", 1)
        all_items.extend(items)
        logger.debug(
            "page=%d/%d | +%d items | total=%d",
            page, total_pages, len(items), len(all_items),
        )
        if not items or page >= total_pages:
            break
        page += 1

    return all_items


# ---------------------------------------------------------------------------
# Filter
# ---------------------------------------------------------------------------

def _to_dept(code_postal: str) -> str:
    """Extracts the department code from a postal code."""
    cp = code_postal.strip()
    if cp.startswith("97"):
        return cp[:3]   # overseas departments: 971–976
    return cp[:2]


def _filtrer(items: list[dict]) -> list[tuple]:
    """Returns INSERT-ready tuples for Seveso sites with GPS coordinates."""
    rows: list[tuple] = []
    skipped_seveso = skipped_gps = 0

    for site in items:
        seveso = (site.get("statutSeveso") or "").strip()
        if not seveso or seveso.lower() in _NON_SEVESO:
            skipped_seveso += 1
            continue

        lat = site.get("latitude")
        lng = site.get("longitude")
        if not lat or not lng:
            skipped_gps += 1
            logger.debug(
                "Site Seveso sans GPS ignoré | nom=%s | seveso=%s",
                site.get("raisonSociale"), seveso,
            )
            continue

        code_postal = (site.get("codePostal") or "").strip()
        rows.append((
            (site.get("raisonSociale") or "—").strip(),
            (site.get("commune")       or "—").strip(),
            code_postal or "—",
            _to_dept(code_postal) if code_postal else "—",
            (site.get("adresse1")      or "—").strip(),
            seveso,
            (site.get("etatActivite")  or "—").strip(),
            float(lat),
            float(lng),
        ))

    logger.info(
        "%d sites Seveso retenus | %d non-Seveso ignorés | %d sans GPS ignorés",
        len(rows), skipped_seveso, skipped_gps,
    )
    return rows


# ---------------------------------------------------------------------------
# Database
# ---------------------------------------------------------------------------

def _save(rows: list[tuple]) -> None:
    conn = get_connection()
    try:
        reset_table(conn, "seveso_sites", _DDL, indexes=[_IDX_DEPT])
        with conn.cursor() as cur:
            cur.executemany(
                """INSERT INTO seveso_sites
                   (nom, commune, code_postal, code_departement,
                    adresse, seveso, statut, latitude, longitude)
                   VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)""",
                rows,
            )
        conn.commit()
    finally:
        conn.close()
    logger.info("Table seveso_sites : %d lignes insérées → MySQL", len(rows))


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

async def main(departements: list[str]) -> None:
    all_items: list[dict] = []
    async with httpx.AsyncClient(timeout=60.0) as client:
        if departements:
            for dept in departements:
                all_items.extend(await _fetch_icpe(client, dept))
        else:
            all_items = await _fetch_icpe(client)

    rows = _filtrer(all_items)
    _save(rows)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Build seveso_sites table from GEORISQUES.")
    parser.add_argument(
        "--dept", nargs="*", default=[],
        metavar="CODE",
        help="Department codes to fetch (e.g. 59 31). Omit for all of France.",
    )
    args = parser.parse_args()
    asyncio.run(main(args.dept))
