"""Build SOHO/VIRGO TSI charts from the full-mission Version 8.0 daily file.

Source: NASA SOHO archive  VIRGO_TSI_daily_V8.txt  (1996-2021)
Reference: Finsterle et al. 2021, Sci Rep 11, 7835  https://doi.org/10.1038/s41598-021-87108-y
Attribution: VIRGO Experiment on the cooperative ESA/NASA Mission SoHO, VIRGO Team /
             PMOD/WRC, Davos, Switzerland
"""
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


BASE_DIR = Path(__file__).parent
INPUT_FILE = BASE_DIR / "tsi_raw" / "soho_virgo_v8" / "raw" / "VIRGO_TSI_daily_V8.txt"
OUT_DIR = BASE_DIR / "tsi_satellite_sources"
OUT_PNG = OUT_DIR / "soho_virgo_v8_tsi_daily_monthly_annual.png"
OUT_CSV = OUT_DIR / "soho_virgo_v8_tsi_daily.csv"
OUT_MANIFEST = OUT_DIR / "soho_virgo_v8_tsi_manifest.txt"


def load_daily() -> pd.DataFrame:
    """Parse VIRGO V8 daily file; use col 8 (fused new SI-traceable scale, ~1361 W/m²)."""
    rows = []
    for line in INPUT_FILE.read_text(encoding="latin-1").splitlines():
        line = line.strip()
        if not line or ";" in line:      # skip blanks and all comment/header lines
            continue
        parts = line.split()
        if len(parts) < 8:
            continue
        try:
            date = pd.to_datetime(parts[0].split("T")[0], format="%Y-%m-%d")
            tsi_new = float(parts[7])     # col 8: VIRGO A+B fused, new SI scale
            tsi_org = float(parts[4])     # col 5: VIRGO A+B fused, original WRR scale
            unc     = float(parts[5])     # col 6: uncertainty of fused series
        except (ValueError, IndexError):
            continue
        if not (1200.0 < tsi_new < 1500.0):
            continue
        rows.append((date, tsi_new, tsi_org, unc))

    frame = pd.DataFrame(rows, columns=["date", "tsi", "tsi_orig", "uncertainty"])
    return frame.sort_values("date").reset_index(drop=True)


def build_chart(frame: pd.DataFrame) -> None:
    monthly = frame.set_index("date")["tsi"].resample("MS").mean().reset_index(name="tsi")
    annual  = frame.set_index("date")["tsi"].resample("YS").mean().reset_index(name="tsi")
    annual["plot_date"] = annual["date"] + pd.offsets.Day(181)

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(12.5, 6.8), dpi=170)

    ax.scatter(frame["date"], frame["tsi"], s=6, color="#67e8f9", alpha=0.30,
               edgecolors="none", label="Daily values (V8 fused, new SI scale)")
    ax.plot(monthly["date"], monthly["tsi"], color="#0891b2", linewidth=1.8, label="Monthly means")
    ax.plot(annual["plot_date"], annual["tsi"], color="#155e75", linewidth=2.4,
            marker="o", markersize=3.4, label="Annual means")

    ax.set_title("SOHO/VIRGO TSI — Version 8.0 Full Mission (1996–2021)", fontsize=15, pad=14)
    ax.set_ylabel("Total Solar Irradiance (W m^-2)")
    ax.set_xlabel("Year")
    ax.xaxis.set_major_locator(mdates.YearLocator(2))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    ax.set_xlim(frame["date"].min(), frame["date"].max())
    ax.legend(loc="upper right", frameon=True)

    note = (
        "Source: VIRGO_TSI_daily_V8.txt — NASA SOHO archive (soho.nascom.nasa.gov)\n"
        "Plotted: col 8 — VIRGO A+B fused, new SI-traceable scale (~1361 W/m²)\n"
        "Attribution: Finsterle et al. 2021, Sci Rep 11, 7835 | PMOD/WRC / VIRGO Team"
    )
    ax.text(0.015, 0.03, note, transform=ax.transAxes, fontsize=8.5,
            bbox=dict(boxstyle="round,pad=0.35", facecolor="white", edgecolor="#cbd5e1", alpha=0.96))

    fig.tight_layout()
    fig.savefig(OUT_PNG, bbox_inches="tight")
    plt.close(fig)


def write_outputs(frame: pd.DataFrame) -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    daily = frame.copy()
    daily["date"] = daily["date"].dt.strftime("%Y-%m-%d")
    daily.to_csv(OUT_CSV, index=False)

    monthly_rows = frame.set_index("date")["tsi"].resample("MS").mean().shape[0]
    annual_rows  = frame.set_index("date")["tsi"].resample("YS").mean().shape[0]

    lines = [
        "SOHO/VIRGO TSI Version 8.0 manifest",
        "",
        "Dataset: VIRGO Total Solar Irradiance daily, Version 8.0",
        "Instrument: VIRGO (PMO6-V + DIARAD, A+B fused) on SOHO",
        "Platform: SOHO (ESA/NASA cooperative mission)",
        "Publisher: NASA SOHO archive (soho.nascom.nasa.gov)",
        "Source URL: https://soho.nascom.nasa.gov/data/EntireMissionBundles/VIRGO_TSI_daily_V8.txt",
        "Raw file: tsi_raw/soho_virgo_v8/raw/VIRGO_TSI_daily_V8.txt",
        "",
        f"Date span: {frame['date'].min().date()} to {frame['date'].max().date()}",
        f"Daily rows (valid): {len(frame)}",
        f"Monthly bins: {monthly_rows}",
        f"Annual bins: {annual_rows}",
        f"TSI range (new scale, col 8): {frame['tsi'].min():.4f} to {frame['tsi'].max():.4f} W m^-2",
        f"TSI range (orig scale, col 5): {frame['tsi_orig'].min():.4f} to {frame['tsi_orig'].max():.4f} W m^-2",
        "",
        "Attribution (required):",
        "  Finsterle, W., Montillet, J.P., Schmutz, W. et al.",
        "  The total solar irradiance during the recent solar minimum period measured by SOHO/VIRGO.",
        "  Sci Rep 11, 7835 (2021). https://doi.org/10.1038/s41598-021-87108-y",
        '  Acknowledge: "VIRGO Experiment on the cooperative ESA/NASA Mission SoHO',
        '  from VIRGO Team through PMOD/WRC, Davos, Switzerland"',
        "",
        "Scale note:",
        "  Plotted series uses the NEW SI-traceable scale (col 8, ~1361 W/m^2).",
        "  The original WRR scale (col 5, ~1365 W/m^2) is also exported in the CSV.",
        "  Scales differ by ~0.34% / ~4.6 W/m^2 — see Finsterle et al. 2021 Table 2.",
    ]
    OUT_MANIFEST.write_text("\n".join(lines) + "\n")


def main() -> None:
    if not INPUT_FILE.exists():
        raise FileNotFoundError(f"Missing input: {INPUT_FILE}")

    frame = load_daily()
    if frame.empty:
        raise ValueError("No valid VIRGO V8 rows after filtering")

    build_chart(frame)
    write_outputs(frame)

    print("saved", OUT_PNG.name)
    print("saved", OUT_CSV.name)
    print("saved", OUT_MANIFEST.name)
    print("daily_rows", len(frame))
    print("date_span", frame["date"].min().date(), "to", frame["date"].max().date())
    print("tsi_range (new)", round(frame["tsi"].min(), 4), "to", round(frame["tsi"].max(), 4))


if __name__ == "__main__":
    main()
