"""Build IRMB/C3S composite TSI charts from C3S_RMIB_daily_TSI_composite_ICDR_v3.1.txt.

Source: Royal Meteorological Institute of Belgium (RMIB) / Copernicus C3S
Version: ICDR v3.1, 1979-01-01 to 2023-09-30
Reference: Dewitte, S., Nevens, S. (2016) The Total Solar Irradiance Climate Data
           Record. Astrophysical Journal 830, 25. DOI: 10.3847/0004-637X/830/1/25
Attribution: RMIB / Copernicus Climate Change Service (C3S).
Data URL: https://gerb.oma.be/tsi/C3S_RMIB_daily_TSI_composite_ICDR_v3.1.txt
"""
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


BASE_DIR = Path(__file__).parent
INPUT_FILE = (BASE_DIR / "tsi_raw" / "irmb_composite" / "raw"
              / "C3S_RMIB_daily_TSI_composite_ICDR_v3.1.txt")
OUT_DIR = BASE_DIR / "tsi_satellite_sources"
OUT_PNG = OUT_DIR / "irmb_composite_tsi_daily_monthly_annual.png"
OUT_CSV = OUT_DIR / "irmb_composite_tsi_daily.csv"
OUT_MANIFEST = OUT_DIR / "irmb_composite_tsi_manifest.txt"


def load_daily() -> pd.DataFrame:
    """Parse C3S/RMIB composite file.

    Columns (space-separated, comment lines start with #):
      1: fractional year  2: TSI  3: julian day  4: YYYYMMDD
      5: n_instr  6: std.dev  7: dist  8: act.TSI  9: instr.flag  10+: per-instrument
    """
    rows = []
    for line in INPUT_FILE.read_text(encoding="utf-8").splitlines():
        if line.startswith("#") or not line.strip():
            continue
        parts = line.split()
        if len(parts) < 4:
            continue
        try:
            tsi = float(parts[1])
            date = pd.to_datetime(parts[3], format="%Y%m%d")
        except (ValueError, IndexError):
            continue
        if not (1200.0 < tsi < 1500.0):
            continue
        unc = float(parts[5]) if len(parts) > 5 else float("nan")
        rows.append((date, tsi, unc))

    df = pd.DataFrame(rows, columns=["date", "tsi", "uncertainty"])
    return df.sort_values("date").reset_index(drop=True)


def build_chart(frame: pd.DataFrame) -> None:
    monthly = frame.set_index("date")["tsi"].resample("MS").mean().reset_index(name="tsi")
    annual  = frame.set_index("date")["tsi"].resample("YS").mean().reset_index(name="tsi")
    annual["plot_date"] = annual["date"] + pd.offsets.Day(181)

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(12.5, 6.8), dpi=170)

    ax.scatter(frame["date"], frame["tsi"], s=10, color="#6ee7b7", alpha=0.45,
               edgecolors="none", label="Daily means")
    ax.plot(monthly["date"], monthly["tsi"], color="#059669", linewidth=1.8, label="Monthly means")
    ax.plot(annual["plot_date"], annual["tsi"], color="#064e3b", linewidth=2.4,
            marker="o", markersize=3.4, label="Annual means")

    ax.set_title("IRMB/C3S TSI Composite v3.1 — RMIB / Copernicus C3S (1979–2023)", fontsize=15, pad=14)
    ax.set_ylabel("Total Solar Irradiance (W m^-2)")
    ax.set_xlabel("Year")
    ax.xaxis.set_major_locator(mdates.YearLocator(4))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", fontsize=8.5)
    ax.set_xlim(frame["date"].min(), frame["date"].max())
    ax.legend(loc="upper right", frameon=True)

    note = (
        "Source: C3S_RMIB_daily_TSI_composite_ICDR_v3.1.txt — RMIB / Copernicus C3S\n"
        "ICDR v3.1 · 1979–2023 · SI-traceable scale · multi-instrument composite\n"
        "Ref: Dewitte & Nevens (2016) ApJ 830, 25 · DOI 10.3847/0004-637X/830/1/25\n"
        "Attribution: Royal Meteorological Institute of Belgium (RMIB) | Copernicus C3S"
    )
    ax.text(0.015, 0.03, note, transform=ax.transAxes, fontsize=8.0,
            bbox=dict(boxstyle="round,pad=0.35", facecolor="white", edgecolor="#cbd5e1", alpha=0.96))

    fig.tight_layout()
    fig.savefig(OUT_PNG, bbox_inches="tight")
    plt.close(fig)


def write_outputs(frame: pd.DataFrame) -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    out = frame.copy()
    out["date"] = out["date"].dt.strftime("%Y-%m-%d")
    out.to_csv(OUT_CSV, index=False)

    monthly_rows = frame.set_index("date")["tsi"].resample("MS").mean().shape[0]
    annual_rows  = frame.set_index("date")["tsi"].resample("YS").mean().shape[0]

    lines = [
        "IRMB/C3S Composite TSI manifest",
        "",
        "Dataset: C3S/RMIB daily TSI composite ICDR v3.1",
        "Provider: Royal Meteorological Institute of Belgium (RMIB)",
        "Programme: Copernicus Climate Change Service (C3S), Contract C3S2_312a_lot1",
        "Source URL: https://gerb.oma.be/tsi/C3S_RMIB_daily_TSI_composite_ICDR_v3.1.txt",
        "Raw file: tsi_raw/irmb_composite/raw/C3S_RMIB_daily_TSI_composite_ICDR_v3.1.txt",
        "",
        "CDR period (final): 1979-01-01 to 2020-12-31",
        "ICDR period (interim): 2021-01-01 to 2023-09-30",
        f"Date span (parsed): {frame['date'].min().date()} to {frame['date'].max().date()}",
        f"Daily rows: {len(frame)}",
        f"Monthly bins: {monthly_rows}",
        f"Annual bins: {annual_rows}",
        f"TSI range: {frame['tsi'].min():.4f} to {frame['tsi'].max():.4f} W m^-2",
        "",
        "Instruments included in composite: ERB, ACRIM1, ERBS, ACRIM2, DIARAD, PMO06,",
        "  ACRIM3, SORCE/TIM, PREMOS, SOVAP, TCTE, TSIS-1",
        "",
        "Attribution (required):",
        "  Dewitte, S., Nevens, S. (2016) The Total Solar Irradiance Climate Data Record.",
        "  Astrophysical Journal 830, 25. DOI: 10.3847/0004-637X/830/1/25",
        "  Acknowledge RMIB and Copernicus Climate Change Service (C3S).",
    ]
    OUT_MANIFEST.write_text("\n".join(lines) + "\n")


def main() -> None:
    if not INPUT_FILE.exists():
        raise FileNotFoundError(f"Missing input: {INPUT_FILE}")
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    frame = load_daily()
    if frame.empty:
        raise ValueError("No valid rows parsed from IRMB composite file")
    build_chart(frame)
    write_outputs(frame)
    print("saved", OUT_PNG.name)
    print("saved", OUT_CSV.name)
    print("saved", OUT_MANIFEST.name)
    print("daily_rows", len(frame))
    print("date_span", frame["date"].min().date(), "to", frame["date"].max().date())
    print("tsi_range", round(frame["tsi"].min(), 4), "to", round(frame["tsi"].max(), 4))


if __name__ == "__main__":
    main()
