"""Build ACRIM composite TSI charts from ACRIM3composite_nnaa3.txt (NOAA NGDC).

Source: NOAA NGDC / ACRIM Science Team
Format: 4 columns (no header) — decimal year, TSI, uncertainty, day-of-mission
Scale note: Old ACRIM radiometric scale (~1366 W/m²); ~5 W/m² higher than modern
            SI-traceable scale (Kopp & Lean 2011, GRL 38, L01706).
Reference: Willson, R.C. (1997) Total solar irradiance trend during solar cycles
           21 and 22. Science 277, 1963-1965. DOI: 10.1126/science.277.5334.1963
Attribution: ACRIM Science Team; data via NOAA NCEI/NGDC.
"""
from pathlib import Path
import datetime

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


BASE_DIR = Path(__file__).parent
INPUT_FILE = BASE_DIR / "tsi_raw" / "acrim_composite" / "raw" / "ACRIM3composite_nnaa3.txt"
OUT_DIR = BASE_DIR / "tsi_satellite_sources"
OUT_PNG = OUT_DIR / "acrim_composite_tsi_daily_monthly_annual.png"
OUT_CSV = OUT_DIR / "acrim_composite_tsi_daily.csv"
OUT_MANIFEST = OUT_DIR / "acrim_composite_tsi_manifest.txt"


def decimal_year_to_date(dy: float) -> datetime.date:
    year = int(dy)
    fraction = dy - year
    start = datetime.datetime(year, 1, 1)
    end = datetime.datetime(year + 1, 1, 1)
    secs = (end - start).total_seconds()
    return (start + datetime.timedelta(seconds=fraction * secs)).date()


def load_daily() -> pd.DataFrame:
    rows = []
    for line in INPUT_FILE.read_text(encoding="utf-8").splitlines():
        parts = line.split()
        if len(parts) < 2:
            continue
        try:
            dy = float(parts[0])
            tsi = float(parts[1])
            unc = float(parts[2]) if len(parts) > 2 else float("nan")
        except ValueError:
            continue
        if not (1200.0 < tsi < 1500.0):
            continue
        rows.append((decimal_year_to_date(dy), tsi, unc))

    df = pd.DataFrame(rows, columns=["date", "tsi", "uncertainty"])
    df["date"] = pd.to_datetime(df["date"])
    return df.sort_values("date").reset_index(drop=True)


def build_chart(frame: pd.DataFrame) -> None:
    monthly = frame.set_index("date")["tsi"].resample("MS").mean().reset_index(name="tsi")
    annual  = frame.set_index("date")["tsi"].resample("YS").mean().reset_index(name="tsi")
    annual["plot_date"] = annual["date"] + pd.offsets.Day(181)

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(12.5, 6.8), dpi=170)

    ax.scatter(frame["date"], frame["tsi"], s=10, color="#93c5fd", alpha=0.55,
               edgecolors="none", label="Daily means")
    ax.plot(monthly["date"], monthly["tsi"], color="#2563eb", linewidth=1.8, label="Monthly means")
    ax.plot(annual["plot_date"], annual["tsi"], color="#1e3a8a", linewidth=2.4,
            marker="o", markersize=3.4, label="Annual means")

    ax.set_title("ACRIM Composite TSI — ACRIM Science Team / NOAA NGDC (1978–2013)", fontsize=15, pad=14)
    ax.set_ylabel("Total Solar Irradiance (W m^-2)")
    ax.set_xlabel("Year")
    ax.xaxis.set_major_locator(mdates.YearLocator(2))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", fontsize=8.5)
    ax.set_xlim(frame["date"].min(), frame["date"].max())
    ax.legend(loc="upper right", frameon=True)

    note = (
        "Source: ACRIM3composite_nnaa3.txt — NOAA NCEI/NGDC\n"
        "WARNING: Old ACRIM radiometric scale (~1366 W/m2); ~5 W/m2 above modern SI-traceable scale\n"
        "Ref: Willson (1997) Science 277, 1963 · DOI 10.1126/science.277.5334.1963\n"
        "Attribution: ACRIM Science Team | hosted by NOAA NCEI/NGDC"
    )
    ax.text(0.015, 0.03, note, transform=ax.transAxes, fontsize=8.0,
            bbox=dict(boxstyle="round,pad=0.35", facecolor="white", edgecolor="#cbd5e1", alpha=0.96))

    fig.tight_layout()
    fig.savefig(OUT_PNG, bbox_inches="tight")
    plt.close(fig)


def write_outputs(frame: pd.DataFrame) -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    out = frame.copy()
    out["date"] = out["date"].dt.strftime("%Y-%m-%d")
    out.to_csv(OUT_CSV, index=False)

    monthly_rows = frame.set_index("date")["tsi"].resample("MS").mean().shape[0]
    annual_rows  = frame.set_index("date")["tsi"].resample("YS").mean().shape[0]

    lines = [
        "ACRIM Composite TSI manifest",
        "",
        "Dataset: ACRIM composite (Willson 1997; updated through ~2013)",
        "File: ACRIM3composite_nnaa3.txt",
        "Instruments: ACRIM I (SMM), ACRIM II (UARS), ACRIM III (ACRIMSAT)",
        "Publisher: ACRIM Science Team; hosted by NOAA NCEI/NGDC",
        "Source URL: https://www.ngdc.noaa.gov/stp/space-weather/solar-data/solar-indices/"
        "total-solar-irradiance/acrim-3/ACRIM3composite_nnaa3.txt",
        "Raw file: tsi_raw/acrim_composite/raw/ACRIM3composite_nnaa3.txt",
        "",
        f"Date span: {frame['date'].min().date()} to {frame['date'].max().date()}",
        f"Daily rows: {len(frame)}",
        f"Monthly bins: {monthly_rows}",
        f"Annual bins: {annual_rows}",
        f"TSI range: {frame['tsi'].min():.4f} to {frame['tsi'].max():.4f} W m^-2",
        "",
        "Scale note: Old ACRIM radiometric scale (~1366 W/m²), ~5 W/m² above modern",
        "  SI-traceable scale. See Kopp & Lean (2011) GRL 38, L01706.",
        "",
        "Attribution (required):",
        "  Willson, R.C. (1997) Total solar irradiance trend during solar cycles 21 and 22.",
        "  Science 277, 1963-1965. DOI: 10.1126/science.277.5334.1963",
        "  Acknowledge ACRIM Science Team and NOAA NCEI/NGDC.",
    ]
    OUT_MANIFEST.write_text("\n".join(lines) + "\n")


def main() -> None:
    if not INPUT_FILE.exists():
        raise FileNotFoundError(f"Missing input: {INPUT_FILE}")
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    frame = load_daily()
    if frame.empty:
        raise ValueError("No valid rows parsed from ACRIM composite file")
    build_chart(frame)
    write_outputs(frame)
    print("saved", OUT_PNG.name)
    print("saved", OUT_CSV.name)
    print("saved", OUT_MANIFEST.name)
    print("daily_rows", len(frame))
    print("date_span", frame["date"].min().date(), "to", frame["date"].max().date())
    print("tsi_range", round(frame["tsi"].min(), 4), "to", round(frame["tsi"].max(), 4))


if __name__ == "__main__":
    main()
