"""Build ACRIM I / Solar Maximum Mission (SMM) TSI charts from NOAA NGDC SMM.PLT.

Source: NOAA NGDC archive (SMM.PLT), accessed via Wayback Machine HTTPS copy.
Instrument: ACRIM (Active Cavity Radiometer Irradiance Monitor) I on SMM (1980-1989).

Scale note: Values are on the old SMM/ACRIM-I absolute scale (~1367-1368 W/m²),
approximately +6-7 W/m² above the modern SI-traceable scale used by TIM instruments.
This is a well-known systematic offset (Kopp & Lean 2011, GRL). No rescaling is applied.
Attribution: NOAA NCEI / NGDC solar data archive; ACRIM science team (R.C. Willson).
"""
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


BASE_DIR = Path(__file__).parent
INPUT_FILE = BASE_DIR / "tsi_raw" / "acrim_smm" / "raw" / "SMM.PLT"
OUT_DIR = BASE_DIR / "tsi_satellite_sources"
OUT_PNG = OUT_DIR / "acrim_smm_tsi_daily_monthly_annual.png"
OUT_CSV = OUT_DIR / "acrim_smm_tsi_daily.csv"
OUT_MANIFEST = OUT_DIR / "acrim_smm_tsi_manifest.txt"


def load_daily() -> pd.DataFrame:
    """Parse SMM.PLT: space-separated 'YYYY MM DD TSI uncertainty'."""
    rows = []
    for line in INPUT_FILE.read_text(encoding="latin-1").splitlines():
        parts = line.split()
        if len(parts) < 4:
            continue
        try:
            date = pd.to_datetime(f"{parts[0]}-{parts[1].zfill(2)}-{parts[2].zfill(2)}")
            tsi  = float(parts[3])
            unc  = float(parts[4]) if len(parts) >= 5 else float("nan")
        except (ValueError, IndexError):
            continue
        if not (1300.0 < tsi < 1500.0):      # exclude fill / sentinel values
            continue
        rows.append((date, tsi, unc))

    frame = pd.DataFrame(rows, columns=["date", "tsi", "uncertainty"])
    return frame.sort_values("date").reset_index(drop=True)


def build_chart(frame: pd.DataFrame) -> None:
    monthly = frame.set_index("date")["tsi"].resample("MS").mean().reset_index(name="tsi")
    annual  = frame.set_index("date")["tsi"].resample("YS").mean().reset_index(name="tsi")
    annual["plot_date"] = annual["date"] + pd.offsets.Day(181)

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(12.5, 6.8), dpi=170)

    ax.scatter(frame["date"], frame["tsi"], s=10, color="#fde68a", alpha=0.55,
               edgecolors="none", label="Daily values (SMM/ACRIM-I)")
    ax.plot(monthly["date"], monthly["tsi"], color="#d97706", linewidth=1.8, label="Monthly means")
    ax.plot(annual["plot_date"], annual["tsi"], color="#92400e", linewidth=2.4,
            marker="o", markersize=3.4, label="Annual means")

    ax.set_title("ACRIM I / Solar Maximum Mission (SMM) TSI — 1980–1989", fontsize=15, pad=14)
    ax.set_ylabel("Total Solar Irradiance (W m^-2)")
    ax.set_xlabel("Year")
    ax.xaxis.set_major_locator(mdates.YearLocator(1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    ax.set_xlim(frame["date"].min(), frame["date"].max())
    ax.legend(loc="upper right", frameon=True)

    note = (
        "Source: SMM.PLT — NOAA NGDC solar irradiance archive (via Wayback Machine HTTPS)\n"
        "Scale: old SMM/ACRIM-I absolute scale (~1367–1368 W/m²);\n"
        "  approximately +6–7 W/m² above modern SI-traceable TIM scale (Kopp & Lean 2011)\n"
        "Attribution: NOAA NCEI/NGDC archive | ACRIM science team (R.C. Willson)"
    )
    ax.text(0.015, 0.03, note, transform=ax.transAxes, fontsize=8.2,
            bbox=dict(boxstyle="round,pad=0.35", facecolor="white", edgecolor="#cbd5e1", alpha=0.96))

    fig.tight_layout()
    fig.savefig(OUT_PNG, bbox_inches="tight")
    plt.close(fig)


def write_outputs(frame: pd.DataFrame) -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    daily = frame.copy()
    daily["date"] = daily["date"].dt.strftime("%Y-%m-%d")
    daily.to_csv(OUT_CSV, index=False)

    monthly_rows = frame.set_index("date")["tsi"].resample("MS").mean().shape[0]
    annual_rows  = frame.set_index("date")["tsi"].resample("YS").mean().shape[0]

    lines = [
        "ACRIM I / SMM TSI manifest",
        "",
        "Dataset: Solar Maximum Mission (SMM) ACRIM-I daily TSI (NOAA NGDC SMM.PLT)",
        "Instrument: ACRIM I (Active Cavity Radiometer Irradiance Monitor) on SMM",
        "Platform: Solar Maximum Mission (SMM, NASA)",
        "Publisher: NOAA National Centers for Environmental Information (NCEI) / NGDC",
        "Source URL (Wayback HTTPS): http://web.archive.org/web/20210109004518id_/ftp://ftp.ngdc.noaa.gov/STP/SOLAR_DATA/SOLAR_IRRADIANCE/SMM.PLT",
        "NCEI landing page: https://www.ncei.noaa.gov/products/space-weather/legacy-data/total-solar-irradiance",
        "Raw file: tsi_raw/acrim_smm/raw/SMM.PLT",
        "",
        f"Date span: {frame['date'].min().date()} to {frame['date'].max().date()}",
        f"Daily rows (valid): {len(frame)}",
        f"Monthly bins: {monthly_rows}",
        f"Annual bins: {annual_rows}",
        f"TSI range: {frame['tsi'].min():.4f} to {frame['tsi'].max():.4f} W m^-2",
        "",
        "Scale note:",
        "  Values on the old SMM/ACRIM-I absolute scale (~1367-1368 W/m^2).",
        "  This is approximately +6-7 W/m^2 above the modern SI-traceable scale used by TIM.",
        "  Reference: Kopp, G. & Lean, J.L. (2011), GRL 38, L01706.",
        "  No rescaling is applied in this pipeline.",
        "",
        "Attribution:",
        "  NOAA NCEI / NGDC solar irradiance archive.",
        "  ACRIM Science Team: Willson, R.C. (1979) Active Cavity Radiometer Type IV, Appl. Opt. 18, 179.",
    ]
    OUT_MANIFEST.write_text("\n".join(lines) + "\n")


def main() -> None:
    if not INPUT_FILE.exists():
        raise FileNotFoundError(f"Missing input: {INPUT_FILE}")

    frame = load_daily()
    if frame.empty:
        raise ValueError("No valid SMM.PLT rows after filtering")

    build_chart(frame)
    write_outputs(frame)

    print("saved", OUT_PNG.name)
    print("saved", OUT_CSV.name)
    print("saved", OUT_MANIFEST.name)
    print("daily_rows", len(frame))
    print("date_span", frame["date"].min().date(), "to", frame["date"].max().date())
    print("tsi_range", round(frame["tsi"].min(), 4), "to", round(frame["tsi"].max(), 4))


if __name__ == "__main__":
    main()
