"""Build PICARD/SOVAP TSI charts from the CDS VizieR VI/152 composit.dat file.

Source: CDS VizieR catalog VI/152 (Meftah et al. 2016)
Reference: Meftah, M. et al. (2016) SOVAP/Picard, a spaceborne radiometer to measure
           the total solar irradiance. Metrologia 53, 1. DOI: 10.1088/0026-1394/53/1/1
Attribution: PICARD/SOVAP team, CNES; data via CDS VizieR (Strasbourg).
"""
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


BASE_DIR = Path(__file__).parent
INPUT_FILE = BASE_DIR / "tsi_raw" / "picard_sovap" / "raw" / "composit.dat"
OUT_DIR = BASE_DIR / "tsi_satellite_sources"
OUT_PNG = OUT_DIR / "picard_sovap_tsi_daily_monthly_annual.png"
OUT_CSV = OUT_DIR / "picard_sovap_tsi_daily.csv"
OUT_MANIFEST = OUT_DIR / "picard_sovap_tsi_manifest.txt"


def load_daily() -> pd.DataFrame:
    """Parse composit.dat 6-hourly data and aggregate to daily means.

    Format: space-separated 'YYYY-MM-DD HH:MM:SS TSI e_TSI n channel'
    """
    rows = []
    for line in INPUT_FILE.read_text(encoding="latin-1").splitlines():
        parts = line.split()
        if len(parts) < 4:
            continue
        try:
            date = pd.to_datetime(parts[0], format="%Y-%m-%d")
            tsi  = float(parts[2])
            unc  = float(parts[3])
        except (ValueError, IndexError):
            continue
        if not (1200.0 < tsi < 1500.0):
            continue
        rows.append((date, tsi, unc))

    raw = pd.DataFrame(rows, columns=["date", "tsi", "uncertainty"])
    # Average the 6-hourly measurements to daily means
    daily = (raw.groupby("date")
               .agg(tsi=("tsi", "mean"), uncertainty=("uncertainty", "mean"),
                    n_obs=("tsi", "count"))
               .reset_index())
    return daily.sort_values("date").reset_index(drop=True)


def build_chart(frame: pd.DataFrame) -> None:
    monthly = frame.set_index("date")["tsi"].resample("MS").mean().reset_index(name="tsi")
    annual  = frame.set_index("date")["tsi"].resample("YS").mean().reset_index(name="tsi")
    annual["plot_date"] = annual["date"] + pd.offsets.Day(181)

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(12.5, 6.8), dpi=170)

    ax.scatter(frame["date"], frame["tsi"], s=10, color="#fda4af", alpha=0.55,
               edgecolors="none", label="Daily means (6-hourly averaged)")
    ax.plot(monthly["date"], monthly["tsi"], color="#e11d48", linewidth=1.8, label="Monthly means")
    ax.plot(annual["plot_date"], annual["tsi"], color="#881337", linewidth=2.4,
            marker="o", markersize=3.4, label="Annual means")

    ax.set_title("PICARD/SOVAP TSI — CDS VizieR VI/152 Composite (2010–2014)", fontsize=15, pad=14)
    ax.set_ylabel("Total Solar Irradiance (W m^-2)")
    ax.set_xlabel("Year")
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", fontsize=8.5)
    ax.set_xlim(frame["date"].min(), frame["date"].max())
    ax.legend(loc="upper right", frameon=True)

    note = (
        "Source: composit.dat — CDS VizieR catalog VI/152 (cdsarc.cds.unistra.fr)\n"
        "6-hourly measurements averaged to daily means; TSI ~1362 W/m² (SI-traceable scale)\n"
        "Ref: Meftah et al. 2016, Metrologia 53, 1 · DOI 10.1088/0026-1394/53/1/1\n"
        "Attribution: PICARD/SOVAP team, CNES | data via CDS VizieR, Strasbourg"
    )
    ax.text(0.015, 0.03, note, transform=ax.transAxes, fontsize=8.0,
            bbox=dict(boxstyle="round,pad=0.35", facecolor="white", edgecolor="#cbd5e1", alpha=0.96))

    fig.tight_layout()
    fig.savefig(OUT_PNG, bbox_inches="tight")
    plt.close(fig)


def write_outputs(frame: pd.DataFrame) -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    daily = frame.copy()
    daily["date"] = daily["date"].dt.strftime("%Y-%m-%d")
    daily.to_csv(OUT_CSV, index=False)

    monthly_rows = frame.set_index("date")["tsi"].resample("MS").mean().shape[0]
    annual_rows  = frame.set_index("date")["tsi"].resample("YS").mean().shape[0]

    lines = [
        "PICARD/SOVAP TSI manifest",
        "",
        "Dataset: PICARD/SOVAP TSI composite (Meftah et al. 2016), CDS VizieR VI/152",
        "Instrument: SOVAP (Solar VAriability and Planetary temperatures) on PICARD",
        "Platform: PICARD satellite (CNES, 2010-2014)",
        "Publisher: CDS VizieR, Strasbourg (catalogue VI/152)",
        "Source URL: https://cdsarc.cds.unistra.fr/ftp/VI/152/composit.dat",
        "ReadMe: https://cdsarc.cds.unistra.fr/ftp/VI/152/ReadMe",
        "Raw file: tsi_raw/picard_sovap/raw/composit.dat",
        "",
        f"Date span: {frame['date'].min().date()} to {frame['date'].max().date()}",
        f"Daily rows (6-hourly averaged): {len(frame)}",
        f"Monthly bins: {monthly_rows}",
        f"Annual bins: {annual_rows}",
        f"TSI range: {frame['tsi'].min():.4f} to {frame['tsi'].max():.4f} W m^-2",
        "",
        "Attribution (required):",
        "  Meftah, M., Hauchecorne, A., Irbah, A., et al. (2016)",
        "  SOVAP/Picard, a spaceborne radiometer to measure the total solar irradiance.",
        "  Metrologia 53, 1. DOI: 10.1088/0026-1394/53/1/1",
        "  Acknowledge PICARD/SOVAP team, CNES, and CDS VizieR (Strasbourg).",
        "",
        "Aggregation: 6-hour cadence (03:00, 09:00, 15:00, 21:00 UTC) averaged to daily means.",
    ]
    OUT_MANIFEST.write_text("\n".join(lines) + "\n")


def main() -> None:
    if not INPUT_FILE.exists():
        raise FileNotFoundError(f"Missing input: {INPUT_FILE}")

    frame = load_daily()
    if frame.empty:
        raise ValueError("No valid composit.dat rows after filtering")

    build_chart(frame)
    write_outputs(frame)

    print("saved", OUT_PNG.name)
    print("saved", OUT_CSV.name)
    print("saved", OUT_MANIFEST.name)
    print("daily_rows (averaged)", len(frame))
    print("date_span", frame["date"].min().date(), "to", frame["date"].max().date())
    print("tsi_range", round(frame["tsi"].min(), 4), "to", round(frame["tsi"].max(), 4))


if __name__ == "__main__":
    main()
