"""
Generate data/2026/securite_nord_complet.xlsx
Full Nord (59) commune security index — 1 (very safe) to 10 (Roubaix-level dangerous).

Scoring formula (derived from file columns):
  base  = DENSITE_BASE[Typo.densité]          # urban density proxy
  bonus = ROLE_BONUS[CATEAAV2020]             # commune role in attraction area
  adj   = -1 if zone_competence == ZGN        # rural gendarmerie zones trend safer
  score = clamp(base + bonus + adj, 1, 9)

Hard overrides for cities with well-documented crime levels
(source: Ministère de l'Intérieur statistiques délinquance):
  Roubaix=10, Tourcoing=9, Mons-en-Barœul=8, Croix=7, …
"""

from __future__ import annotations

import pandas as pd

from domain.core.mysql_db import get_connection, reset_table

SRC = r"data\2026\securite-info-complements-data.gouv-2025-geographie2025-produit-le2026-02-03.xlsx"
DST = r"data\2026\securite_nord_complet.xlsx"

_DDL_SECURITY_NORD = """
CREATE TABLE IF NOT EXISTS security_nord (
    CODGEO          VARCHAR(5)   PRIMARY KEY,
    LIBGEO          VARCHAR(100),
    DEP             VARCHAR(3),
    zone_competence VARCHAR(10),
    CATEAAV2020     VARCHAR(100),
    typo_densite    VARCHAR(100),
    danger_score    INT          NOT NULL
)
"""

DENSITE_BASE: dict[str, int] = {
    "Rural à habitat très dispersé": 1,
    "Rural à habitat dispersé":      1,
    "Bourgs ruraux":                  2,
    "Petites villes":                 3,
    "Ceintures urbaines":             4,
    "Centres urbains intermédiaires": 5,
    "Grands centres urbains":         6,
}

ROLE_BONUS: dict[str, int] = {
    "Commune hors attraction des villes": 0,
    "Commune de la couronne":             0,
    "Autre commune du pôle principal":    1,
    "Commune-Centre":                     2,
}

# Hard overrides — based on public crime statistics
OVERRIDES: dict[str, int] = {
    "59512": 10,  # Roubaix
    "59599":  9,  # Tourcoing
    "59410":  8,  # Mons-en-Barœul
    "59183":  8,  # Dunkerque
    "59606":  8,  # Valenciennes
    "59392":  8,  # Maubeuge
    "59163":  7,  # Croix
    "59350":  7,  # Lille
    "59299":  6,  # Hem
    "59009":  5,  # Villeneuve-d'Ascq
    "59646":  4,  # Wasquehal
    "59328":  4,  # Lambersart
    "59378":  3,  # Marcq-en-Barœul
    "59421":  2,  # Mouvaux
    "59386":  2,  # Marquette-lez-Lille
    "59346":  1,  # Lezennes
    "59352":  1,  # Linselles
    "59636":  1,  # Wambrechies
    "59482":  1,  # Quesnoy-sur-Deûle
    "59527":  2,  # Saint-André-lez-Lille
}


def compute_score(row: pd.Series, densite_col: str) -> int:
    codgeo = str(row["CODGEO"]).zfill(5)
    if codgeo in OVERRIDES:
        return OVERRIDES[codgeo]
    densite = str(row[densite_col])
    role    = str(row["CATEAAV2020"])
    zone    = str(row["zone_competence"])
    base    = DENSITE_BASE.get(densite, 2)
    bonus   = ROLE_BONUS.get(role, 0)
    adj     = -1 if zone == "ZGN" else 0
    return max(1, min(9, base + bonus + adj))


def main() -> None:
    df = pd.read_excel(SRC, sheet_name="zonages supracommunaux", engine="openpyxl")
    dep59 = df[df["DEP"].astype(str) == "59"].copy()

    densite_col    = next(c for c in dep59.columns if "Typo" in c and "densit" in c.lower())
    ruralurbain_col = next(c for c in dep59.columns if "Typo" in c and "rural" in c.lower())

    dep59["danger_score"] = dep59.apply(lambda r: compute_score(r, densite_col), axis=1)

    # Clean up encoded column names for portability
    dep59 = dep59.rename(columns={
        densite_col:    "typo_densite",
        ruralurbain_col: "typo_rural_urbain",
    })

    print(f"Total communes Nord (59): {len(dep59)}")
    print("\nScore distribution:")
    dist = dep59["danger_score"].value_counts().sort_index()
    for score, count in dist.items():
        bar = "#" * (count // 10)
        print(f"  {score:2d}/10 - {count:3d} communes  {bar}")

    print("\nTop 10 most dangerous:")
    top = dep59.nlargest(10, "danger_score")[["CODGEO", "LIBGEO", "danger_score", "zone_competence"]]
    print(top.to_string(index=False))

    dep59.to_excel(DST, index=False)
    print(f"\nSaved -> {DST}")

    # Ensure optional columns exist before writing to MySQL
    for col in ["LIBGEO", "DEP", "zone_competence", "CATEAAV2020", "typo_densite"]:
        if col not in dep59.columns:
            dep59[col] = ""

    rows_to_insert = []
    for _, row in dep59.iterrows():
        def _str(v) -> str:
            return str(v) if pd.notna(v) else ""
        rows_to_insert.append({
            "CODGEO":          str(row["CODGEO"]).zfill(5),
            "LIBGEO":          _str(row["LIBGEO"]),
            "DEP":             _str(row["DEP"]),
            "zone_competence": _str(row["zone_competence"]),
            "CATEAAV2020":     _str(row["CATEAAV2020"]),
            "typo_densite":    _str(row["typo_densite"]),
            "danger_score":    int(row["danger_score"]),
        })

    conn = get_connection()
    try:
        reset_table(conn, "security_nord", _DDL_SECURITY_NORD)
        with conn.cursor() as cur:
            cur.executemany(
                """REPLACE INTO security_nord
                   (CODGEO, LIBGEO, DEP, zone_competence, CATEAAV2020,
                    typo_densite, danger_score)
                   VALUES (%(CODGEO)s, %(LIBGEO)s, %(DEP)s, %(zone_competence)s,
                           %(CATEAAV2020)s, %(typo_densite)s, %(danger_score)s)""",
                rows_to_insert,
            )
        conn.commit()
    finally:
        conn.close()
    print(f"Saved -> MySQL security_nord ({len(rows_to_insert)} communes)")


if __name__ == "__main__":
    main()
