"""Build PMOD Composite Data Fusion (CPMDF) TSI charts.

Source: PMOD/WRC via GKH / AstroMaterials (IEDA)
Version: July 2025 (v1)
File: MergedPMOD_NobaselineScaleCycle23_JPM_July2025_v1.txt
DOI: 10.26022/IEDA/112238 (methodology ref)
Attribution: PMOD/WRC; Montillet et al. (2022)

Columns (space-separated):
  0: Time [Dec. Year]
  1: Time [JD]
  2: TSI [W.m2]
  3: Unc
  4: TSI after corr. [W.m2]
  5: Unc.
We plot 'TSI after corr.' (index 4) as the primary fusion product.
"""
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd


BASE_DIR = Path(__file__).parent
INPUT_FILE = (BASE_DIR / "tsi_raw" / "pmod_composite" / "raw"
              / "MergedPMOD_NobaselineScaleCycle23_JPM_July2025_v1.txt")
OUT_DIR = BASE_DIR / "tsi_satellite_sources"
OUT_PNG = OUT_DIR / "pmod_composite_tsi_daily_monthly_annual.png"
OUT_CSV = OUT_DIR / "pmod_composite_tsi_daily.csv"
OUT_MANIFEST = OUT_DIR / "pmod_composite_tsi_manifest.txt"


def load_daily() -> pd.DataFrame:
    """Parse PMOD fusion file using Julian Dates."""
    # Skip header line
    df = pd.read_csv(INPUT_FILE, sep="\\s+", skiprows=1, header=None,
                     names=["dec_year", "jd", "tsi", "unc", "tsi_corr", "unc_corr"])

    # Convert Julian Date to Timestamp
    df["date"] = pd.to_datetime(df["jd"], unit="D", origin="julian").dt.round("D")

    # Clean up and filter
    df = df[df["tsi_corr"] > 1300].copy()
    return df[["date", "tsi_corr", "unc_corr", "tsi", "unc"]].sort_values("date")


def build_chart(frame: pd.DataFrame) -> None:
    monthly = frame.set_index("date")["tsi_corr"].resample("MS").mean().reset_index(name="tsi")
    annual  = frame.set_index("date")["tsi_corr"].resample("YS").mean().reset_index(name="tsi")
    annual["plot_date"] = annual["date"] + pd.offsets.Day(181)

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(12.5, 6.8), dpi=170)

    ax.scatter(frame["date"], frame["tsi_corr"], s=8, color="#fca5a5",
               alpha=0.4, edgecolors="none", label="Daily means (TSI after corr.)")
    ax.plot(monthly["date"], monthly["tsi"], color="#ef4444", linewidth=1.6,
            label="Monthly means")
    ax.plot(annual["plot_date"], annual["tsi"], color="#7f1d1d", linewidth=2.2,
            marker="o", markersize=3.2, label="Annual means")

    ax.set_title(
        "PMOD Composite TSI Data Fusion (July 2025 update)\n"
        "PMOD/WRC \u00b7 Composite PMOD Data Fusion (CPMDF)",
        fontsize=14, pad=14)
    ax.set_ylabel("Total Solar Irradiance (W m^-2)")
    ax.set_xlabel("Year")
    ax.xaxis.set_major_locator(mdates.YearLocator(4))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", fontsize=8.5)
    ax.set_xlim(frame["date"].min(), frame["date"].max())
    ax.legend(loc="upper right", frameon=True)

    note = (
        "Source: MergedPMOD_NobaselineScaleCycle23_JPM_July2025_v1.txt\n"
        "PMOD/WRC \u00b7 Modern Data Fusion product (CPMDF) \u00b7 July 2025 v1\n"
        "Plotted series: 'TSI after corr.' (merged using statistical data fusion)\n"
        "Ref: Montillet et al. (2022) / AstroMaterials DOI 10.26022/IEDA/112238\n"
        "Attribution: PMOD/WRC; J.-P. Montillet; M. Sch\u00f6ll"
    )
    ax.text(0.015, 0.03, note, transform=ax.transAxes, fontsize=7.8,
            bbox=dict(boxstyle="round,pad=0.35", facecolor="white", edgecolor="#cbd5e1", alpha=0.96))

    fig.tight_layout()
    fig.savefig(OUT_PNG, bbox_inches="tight")
    plt.close(fig)


def write_outputs(frame: pd.DataFrame) -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    out = frame.copy()
    out["date"] = out["date"].dt.strftime("%Y-%m-%d")
    out.to_csv(OUT_CSV, index=False)

    lines = [
        "PMOD Composite TSI Data Fusion manifest",
        "",
        "Dataset: Composite PMOD Data Fusion (CPMDF)",
        "Provider: PMOD/WRC",
        "Source: GEO Knowledge Hub / AstroMaterials (IEDA)",
        f"Raw file: {INPUT_FILE.relative_to(BASE_DIR)}",
        "",
        "Plotted series: 'TSI after corr.' (Column 5) \u2014 the primary fusion product",
        "Also in CSV: 'TSI' (Column 3) and uncertainties",
        "",
        f"Date span: {frame['date'].min().date()} to {frame['date'].max().date()}",
        f"Daily rows: {len(frame)}",
        f"TSI range (corr): {frame['tsi_corr'].min():.4f} to {frame['tsi_corr'].max():.4f} W m^-2",
        "",
        "Method: Statistical Data Fusion (CPMDF)",
        "Reference: Montillet et al. (2022), DOI: 10.26022/IEDA/112238",
        "Attribution: PMOD/WRC; J.-P. Montillet; M. Sch\u00f6ll",
    ]
    OUT_MANIFEST.write_text("\n".join(lines) + "\n")


def main() -> None:
    if not INPUT_FILE.exists():
        raise FileNotFoundError(f"Missing input: {INPUT_FILE}")
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    frame = load_daily()
    build_chart(frame)
    write_outputs(frame)
    print(f"saved {OUT_PNG.name} ({len(frame)} rows)")


if __name__ == "__main__":
    main()
