"""Build ISSI/Dudok de Wit composite TSI charts from TSI_composite_DeWit.txt.

Source: International Space Science Institute (ISSI) TSI team
Version: v1.1, 17-Nov-1978 to 31-Dec-2015
Author: T. Dudok de Wit (University of Orléans)
Reference: Dudok de Wit, T. et al. (2017) Methodology to create a new
           total solar irradiance record: Making a composite out of
           multiple space-based measurements. Journal of Geophysical
           Research: Space Physics 122, 5390-5407.
           DOI: 10.1002/2016JA023492
Attribution: T. Dudok de Wit / ISSI TSI team; data from issibern.ch.
Data URL: https://www.issibern.ch/teams/solarirradiance/TSI_composite_DeWit.txt

Columns (space-separated, comment lines start with ;):
  year  month  day  hour  julian_date  TSIo  dTSIo  TSIc  dTSIc  nobs
  TSIo = original-data composite; TSIc = Fröhlich-corrected composite (PMOD-style)
We plot TSIc (column index 7) as the primary series — the corrected version
aligns with the PMOD correction philosophy and is the commonly cited benchmark.
"""
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


BASE_DIR = Path(__file__).parent
INPUT_FILE = (BASE_DIR / "tsi_raw" / "issi_dewit_composite" / "raw"
              / "TSI_composite_DeWit.txt")
OUT_DIR = BASE_DIR / "tsi_satellite_sources"
OUT_PNG = OUT_DIR / "issi_dewit_composite_tsi_daily_monthly_annual.png"
OUT_CSV = OUT_DIR / "issi_dewit_composite_tsi_daily.csv"
OUT_MANIFEST = OUT_DIR / "issi_dewit_composite_tsi_manifest.txt"


def load_daily() -> pd.DataFrame:
    """Parse ISSI/De Wit composite file.

    Data lines: year month day hour julian_date TSIo dTSIo TSIc dTSIc nobs
    Comment lines start with ';'.
    We use TSIc (index 7) as the primary plotted series.
    """
    rows = []
    for line in INPUT_FILE.read_text(encoding="utf-8").splitlines():
        stripped = line.strip()
        if stripped.startswith(";") or not stripped:
            continue
        parts = stripped.split()
        if len(parts) < 9:
            continue
        try:
            year  = int(parts[0])
            month = int(parts[1])
            day   = int(parts[2])
            tsic  = float(parts[7])   # corrected composite (PMOD-style)
            dtsic = float(parts[8])   # uncertainty on corrected
            tsio  = float(parts[5])   # original composite (retain in CSV)
            dtiso = float(parts[6])
        except (ValueError, IndexError):
            continue
        if not (1200.0 < tsic < 1500.0):
            continue
        try:
            date = pd.Timestamp(year=year, month=month, day=day)
        except ValueError:
            continue
        rows.append((date, tsic, dtsic, tsio, dtiso))

    df = pd.DataFrame(rows, columns=["date", "tsi_corrected", "unc_corrected",
                                     "tsi_original", "unc_original"])
    return df.sort_values("date").reset_index(drop=True)


def build_chart(frame: pd.DataFrame) -> None:
    monthly = frame.set_index("date")["tsi_corrected"].resample("MS").mean().reset_index(name="tsi")
    annual  = frame.set_index("date")["tsi_corrected"].resample("YS").mean().reset_index(name="tsi")
    annual["plot_date"] = annual["date"] + pd.offsets.Day(181)

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(12.5, 6.8), dpi=170)

    ax.scatter(frame["date"], frame["tsi_corrected"], s=10, color="#c4b5fd",
               alpha=0.45, edgecolors="none", label="Daily means (TSIc, corrected)")
    ax.plot(monthly["date"], monthly["tsi"], color="#7c3aed", linewidth=1.8,
            label="Monthly means")
    ax.plot(annual["plot_date"], annual["tsi"], color="#3b0764", linewidth=2.4,
            marker="o", markersize=3.4, label="Annual means")

    ax.set_title(
        "ISSI/Dudok de Wit TSI Composite v1.1 — Corrected series (TSIc, PMOD-style)\n"
        "ISSI TSI team / T. Dudok de Wit (1978–2015)",
        fontsize=14, pad=14)
    ax.set_ylabel("Total Solar Irradiance (W m^-2)")
    ax.set_xlabel("Year")
    ax.xaxis.set_major_locator(mdates.YearLocator(4))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", fontsize=8.5)
    ax.set_xlim(frame["date"].min(), frame["date"].max())
    ax.legend(loc="upper right", frameon=True)

    note = (
        "Source: TSI_composite_DeWit.txt — ISSI TSI team / T. Dudok de Wit\n"
        "v1.1 · 1978–2015 · Plotted series: TSIc (Fröhlich-corrected, PMOD-style)\n"
        "Interim stand-in for PMOD composite (official FTP currently unavailable)\n"
        "Ref: Dudok de Wit et al. (2017) JGR Space Physics 122, 5390 · DOI 10.1002/2016JA023492\n"
        "Attribution: T. Dudok de Wit / ISSI TSI team — issibern.ch"
    )
    ax.text(0.015, 0.03, note, transform=ax.transAxes, fontsize=7.8,
            bbox=dict(boxstyle="round,pad=0.35", facecolor="white", edgecolor="#cbd5e1", alpha=0.96))

    fig.tight_layout()
    fig.savefig(OUT_PNG, bbox_inches="tight")
    plt.close(fig)


def write_outputs(frame: pd.DataFrame) -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    out = frame.copy()
    out["date"] = out["date"].dt.strftime("%Y-%m-%d")
    out.to_csv(OUT_CSV, index=False)

    monthly_rows = frame.set_index("date")["tsi_corrected"].resample("MS").mean().shape[0]
    annual_rows  = frame.set_index("date")["tsi_corrected"].resample("YS").mean().shape[0]

    lines = [
        "ISSI/Dudok de Wit Composite TSI manifest",
        "",
        "Dataset: TSI composite v1.1 (ISSI TSI team)",
        "Author: T. Dudok de Wit, University of Orléans",
        "Provider: International Space Science Institute (ISSI)",
        "Source URL: https://www.issibern.ch/teams/solarirradiance/TSI_composite_DeWit.txt",
        "Raw file: tsi_raw/issi_dewit_composite/raw/TSI_composite_DeWit.txt",
        "",
        "Plotted series: TSIc (Fröhlich-corrected composite — PMOD-style corrections)",
        "Also retained in CSV: TSIo (original-data composite)",
        "Interim stand-in for PMOD composite (PMOD official FTP unreachable from build env)",
        "",
        f"Date span (parsed): {frame['date'].min().date()} to {frame['date'].max().date()}",
        f"Daily rows: {len(frame)}",
        f"Monthly bins: {monthly_rows}",
        f"Annual bins: {annual_rows}",
        f"TSIc range: {frame['tsi_corrected'].min():.4f} to {frame['tsi_corrected'].max():.4f} W m^-2",
        f"TSIo range: {frame['tsi_original'].min():.4f} to {frame['tsi_original'].max():.4f} W m^-2",
        "",
        "Method: blind source separation; 1/f noise model; uncertainty per day",
        "Instruments contributing: HF (Nimbus-7), ACRIM1 (SMM), ERBE, ACRIM2 (UARS),",
        "  DIARAD/VIRGO, PMO6V/VIRGO (SOHO), ACRIM3 (ACRIMSAT), TIM (SORCE)",
        "",
        "Attribution (required):",
        "  Dudok de Wit, T., Kopp, G., Fröhlich, C., Schöll, M. (2017)",
        "  Methodology to create a new total solar irradiance record.",
        "  Journal of Geophysical Research: Space Physics 122, 5390–5407.",
        "  DOI: 10.1002/2016JA023492",
        "  Acknowledge T. Dudok de Wit and the ISSI TSI team.",
    ]
    OUT_MANIFEST.write_text("\n".join(lines) + "\n")


def main() -> None:
    if not INPUT_FILE.exists():
        raise FileNotFoundError(f"Missing input: {INPUT_FILE}")
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    frame = load_daily()
    if frame.empty:
        raise ValueError("No valid rows parsed from ISSI/De Wit composite file")
    build_chart(frame)
    write_outputs(frame)
    print("saved", OUT_PNG.name)
    print("saved", OUT_CSV.name)
    print("saved", OUT_MANIFEST.name)
    print("daily_rows", len(frame))
    print("date_span", frame["date"].min().date(), "to", frame["date"].max().date())
    print("tsic_range", round(frame["tsi_corrected"].min(), 4), "to",
          round(frame["tsi_corrected"].max(), 4))


if __name__ == "__main__":
    main()
