# playoff_efficiency.R ---------------------------------------------------------
# Full franchise management efficiency story -- 1995-2025 (excl. 2020)
#
# Regular charts  → $LAHMANS_DBDIR/charts/
# LinkedIn slides → $LAHMANS_DBDIR/charts/linkedin_NN.png  (personal use only)
#
# Causal arc:
#   Develop rookies (Act 1)
#   → Retain best pre-FA talent + FA efficiency (Act 2)
#   → Avoid dead money (Act 3)
#   → Trade well (Act 4)
#   → Survive bad luck (Act 5 -- confounder)
#   → Results: Get to October (Act 6) → Go deep (Act 7)
#   → Scorecard grades all 6 management dimensions (Act 8)
#
# Slide narrative:
#  01  Key Findings
#  02  Framework: WAR + 6 dimensions + 4 eras + payroll context
#  03  Act 1 -- Rookie development (building the pipeline)
#  04  Act 2 -- Keeping stars + paying for performance (FA efficiency + retention)
#  05  Act 3 -- Dead money (the self-inflicted tax)
#  06  Act 4 -- Trading future stars
#  07  Act 5 -- The great confounder: injury luck
#  08  Act 6 -- The payoff: management decisions → October
#  09  Act 7 -- Going deep in October
#  10  Act 8 -- The complete scorecard
#
# Era breakpoints (for future Shiny dynamic filtering):
#   Early FA      1985-1993  (pre-analytics, relationship era)
#   Pre-Moneyball 1994-2002  (FA costs rise, small markets squeezed)
#   Moneyball     2003-2011  (sabermetrics; undervalued skills surface)
#   Big Data      2012-2025  (Statcast; analytics widespread)

suppressPackageStartupMessages({
  library(data.table)
  library(ggplot2)
  library(ggrepel)
  library(ggtext)
  library(DBI)
  library(duckdb)
  library(scales)
  library(png)
  library(grid)
})
.linkedin_dir <- tryCatch(
  dirname(normalizePath(sub("--file=", "", grep("--file=", commandArgs(FALSE), value = TRUE)[1]))),
  error = function(e) getwd()
)
source(file.path(.linkedin_dir, "chart_theme.R"))

n_distinct <- function(x) length(unique(x[!is.na(x)]))

db_path <- file.path(
  path.expand(Sys.getenv("LAHMANS_DBDIR", "~/Documents/Data/baseball")),
  "baseball.duckdb"
)
CHART_DIR <- file.path(
  path.expand(Sys.getenv("LAHMANS_DBDIR", "~/Documents/Data/baseball")),
  "charts"
)
chart_path <- function(f) file.path(CHART_DIR, f)

con <- dbConnect(duckdb(), db_path, read_only = TRUE)
on.exit(dbDisconnect(con, shutdown = TRUE))

qry <- function(sql) setDT(dbGetQuery(con, sql))

START_YR  <- 1995L
END_YR    <- 2025L
YR_RANGE  <- sprintf("%d\u2013%d (excl. 2020)", START_YR, END_YR)

ERA_BREAKS <- list(
  early_fa      = c(1985L, 1993L),
  pre_moneyball = c(1994L, 2002L),
  moneyball     = c(2003L, 2011L),
  big_data      = c(2012L, 2025L)
)
ERA_LABELS <- c(
  early_fa      = "Early Free Agency (1985\u201393)",
  pre_moneyball = "Pre-Moneyball (1994\u20132002)",
  moneyball     = "Moneyball (2003\u201311)",
  big_data      = "Big Data (2012\u201325)"
)
FA_SVC_YRS <- 6L   # service years before FA eligibility

# ── FRANCHISE LOOKUP (registered once; all SQL queries reference fran_lookup) ──
# Teams caps at 2021 but all 2022-2025 playoff teamIDs exist there -- confirmed.
fran_lookup <- qry("
  SELECT t.teamID, t.franchID, tf.franchName
  FROM (
    SELECT teamID, franchID,
      ROW_NUMBER() OVER (PARTITION BY teamID ORDER BY yearID DESC) AS rn
    FROM Teams
  ) t
  JOIN TeamsFranchises tf ON t.franchID = tf.franchID
  WHERE t.rn = 1
")
duckdb_register(con, "fran_lookup", fran_lookup)

fran_lookup[, short := TEAM_SHORT[franchID]]
fran_lookup[is.na(short), short := franchID]
# Guard: warn if any franchise fell back to raw code (means TEAM_SHORT needs updating)
raw_fallbacks <- fran_lookup[short == franchID, unique(franchID)]
if (length(raw_fallbacks))
  warning("TEAM_SHORT missing franchIDs: ", paste(raw_fallbacks, collapse = ", "), call. = FALSE)
fran_key <- unique(fran_lookup[, .(franchID, franchName, short)])
fran_key <- fran_key[!duplicated(franchID)]

# ── PLAYOFF ACHIEVEMENT SCORES ─────────────────────────────────────────────────
playoff_seasons <- qry("
  WITH all_teams AS (
    SELECT teamIDwinner AS teamID, yearID FROM SeriesPost
    WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
    UNION
    SELECT teamIDloser AS teamID, yearID FROM SeriesPost
    WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
  )
  SELECT a.teamID, a.yearID,
    COALESCE(SUM(CASE
      WHEN sw.round = 'WS'                               THEN 8
      WHEN sw.round IN ('ALCS','NLCS')                   THEN 4
      WHEN sw.round IN ('ALDS1','ALDS2','NLDS1','NLDS2') THEN 2
      WHEN sw.round IN ('ALWC','NLWC')
           OR sw.round LIKE 'ALWC%'
           OR sw.round LIKE 'NLWC%'                     THEN 1
      ELSE 0 END), 0) AS achievement_score
  FROM all_teams a
  LEFT JOIN SeriesPost sw
    ON a.teamID = sw.teamIDwinner AND a.yearID = sw.yearID
    AND sw.yearID BETWEEN 1995 AND 2025 AND sw.yearID != 2020
  GROUP BY a.teamID, a.yearID
")
playoff_seasons <- merge(playoff_seasons,
                          fran_lookup[, .(teamID, franchID)], by = "teamID", all.x = TRUE)
playoff_ty <- unique(playoff_seasons[, .(teamID, yearID)])

# ── TEAM RS WAR (1995-2025) ────────────────────────────────────────────────────
# Primary team: most PA (Batting/Pitching 1995-2021) or salary (SalariesAll 2022-2025).
# Confirmed: FG tables have 1 row per player-season; SalariesAll has 1 teamID per
# player-year for 2022+, so no DISTINCT ON needed for the extension segment.
team_rs_war <- qry("
  WITH player_usage AS (
    SELECT playerID, yearID, teamID,
      SUM(AB + COALESCE(BB,0) + COALESCE(HBP,0) + COALESCE(SF,0) + COALESCE(SH,0)) AS usage
    FROM Batting
    WHERE yearID BETWEEN 1995 AND 2021 AND yearID != 2020
    GROUP BY playerID, yearID, teamID
    UNION ALL
    SELECT playerID, yearID, teamID, SUM(IPouts) AS usage
    FROM Pitching
    WHERE yearID BETWEEN 1995 AND 2021 AND yearID != 2020
    GROUP BY playerID, yearID, teamID
    UNION ALL
    -- 2022-2025: salary is the primary-team proxy (1 teamID per player-year confirmed)
    SELECT playerID, yearID, teamID, salary AS usage
    FROM SalariesAll
    WHERE is_actual = TRUE AND yearID BETWEEN 2022 AND 2025 AND teamID IS NOT NULL
  ),
  primary_team AS (
    SELECT DISTINCT ON (playerID, yearID) playerID, yearID, teamID
    FROM player_usage ORDER BY playerID, yearID, usage DESC
  )
  SELECT pt.teamID, pt.yearID, SUM(pw.total_war) AS rs_war
  FROM primary_team pt
  JOIN PlayerWAR pw ON pt.playerID = pw.playerID AND pt.yearID = pw.yearID
  WHERE pw.total_war IS NOT NULL
  GROUP BY pt.teamID, pt.yearID
")
rs_war_playoff <- team_rs_war[playoff_ty, on = c("teamID","yearID"), nomatch = 0]
rs_war_playoff[, rs_war_rank := frank(-rs_war, ties.method = "average"), by = yearID]

# ── POSTSEASON WAR PROXY (1995-2025) ──────────────────────────────────────────
# RS PA:     Batting 1995-2021 (per-stint, has teamID) +
#            BattingStats FG rows 2022+ (season total, NULL teamID → join SalariesAll)
# RS IPouts: Pitching 1995-2021 +
#            FangraphsPitchingWAR 2022+ via PlayerIDs crosswalk + SalariesAll teamID
# PS PA/IP:  BattingPost / PitchingPost now cover 1995-2025 via Retrosheet
rs_pa <- qry("
  SELECT playerID, yearID, teamID,
    SUM(AB + COALESCE(BB,0) + COALESCE(HBP,0) + COALESCE(SF,0) + COALESCE(SH,0)) AS rs_pa
  FROM Batting
  WHERE yearID BETWEEN 1995 AND 2021 AND yearID != 2020
  GROUP BY playerID, yearID, teamID
  UNION ALL
  SELECT b.playerID, b.yearID, s.teamID, SUM(b.PA) AS rs_pa
  FROM BattingStats b
  JOIN SalariesAll s ON b.playerID = s.playerID AND b.yearID = s.yearID
  WHERE b.yearID BETWEEN 2022 AND 2025
    AND s.is_actual = TRUE AND s.teamID IS NOT NULL
  GROUP BY b.playerID, b.yearID, s.teamID
")
rs_ip <- qry("
  SELECT playerID, yearID, teamID, SUM(IPouts) AS rs_ipouts
  FROM Pitching
  WHERE yearID BETWEEN 1995 AND 2021 AND yearID != 2020
  GROUP BY playerID, yearID, teamID
  UNION ALL
  SELECT pi.playerID, fw.Season AS yearID, s.teamID,
    SUM(fw.IP::DOUBLE * 3) AS rs_ipouts
  FROM FangraphsPitchingWAR fw
  JOIN PlayerIDs pi ON fw.playerid::VARCHAR = pi.fg_id
  JOIN SalariesAll s ON pi.playerID = s.playerID AND fw.Season = s.yearID
  WHERE fw.Season BETWEEN 2022 AND 2025
    AND s.is_actual = TRUE AND s.teamID IS NOT NULL
  GROUP BY pi.playerID, fw.Season, s.teamID
")
ps_pa <- qry("SELECT playerID, yearID, teamID,
  SUM(AB + COALESCE(BB,0) + COALESCE(HBP,0) + COALESCE(SF,0) + COALESCE(SH,0)) AS ps_pa
  FROM BattingPost WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
  GROUP BY playerID, yearID, teamID")
ps_ip <- qry("SELECT playerID, yearID, teamID, SUM(IPouts) AS ps_ipouts
  FROM PitchingPost WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
  GROUP BY playerID, yearID, teamID")
war_data <- qry("SELECT playerID, yearID, bat_war, pit_war FROM PlayerWAR WHERE total_war IS NOT NULL")

bat_proxy <- rs_pa[playoff_ty, on = c("teamID","yearID"), nomatch = 0]
bat_proxy <- bat_proxy[war_data[, .(playerID, yearID, bat_war)],
                        on = c("playerID","yearID"), nomatch = 0]
bat_proxy <- merge(bat_proxy, ps_pa, by = c("playerID","yearID","teamID"), all.x = TRUE)
bat_proxy[is.na(ps_pa), ps_pa := 0L]
bat_proxy[, bat_war_proxy := fifelse(rs_pa > 0 & is.finite(bat_war) & bat_war > 0,
                                      bat_war * (ps_pa / rs_pa), 0)]
bat_team <- bat_proxy[, .(rs_bat_war = sum(bat_war, na.rm = TRUE),
                            ps_bat_war = sum(bat_war_proxy, na.rm = TRUE)),
                       by = .(teamID, yearID)]

pit_proxy <- rs_ip[playoff_ty, on = c("teamID","yearID"), nomatch = 0]
pit_proxy <- pit_proxy[war_data[, .(playerID, yearID, pit_war)],
                        on = c("playerID","yearID"), nomatch = 0]
pit_proxy <- merge(pit_proxy, ps_ip, by = c("playerID","yearID","teamID"), all.x = TRUE)
pit_proxy[is.na(ps_ipouts), ps_ipouts := 0L]
pit_proxy[, pit_war_proxy := fifelse(rs_ipouts > 0 & is.finite(pit_war) & pit_war > 0,
                                      pit_war * (ps_ipouts / rs_ipouts), 0)]
pit_team <- pit_proxy[, .(rs_pit_war = sum(pit_war, na.rm = TRUE),
                            ps_pit_war = sum(pit_war_proxy, na.rm = TRUE)),
                       by = .(teamID, yearID)]

war_ret <- merge(bat_team, pit_team, by = c("teamID","yearID"), all = TRUE)
for (col in c("rs_bat_war","rs_pit_war","ps_bat_war","ps_pit_war"))
  set(war_ret, which(is.na(war_ret[[col]])), col, 0)
war_ret[, rs_war_total := rs_bat_war + rs_pit_war]
war_ret[, ps_war_proxy := ps_bat_war + ps_pit_war]
war_ret[, war_ret_pct := fifelse(rs_war_total > 0,
                                  ps_war_proxy / rs_war_total * 100, NA_real_)]

# ── STAGE 1 & 2 FRANCHISE AGGREGATIONS ────────────────────────────────────────
stage2_raw <- Reduce(function(a,b) merge(a,b,by=c("teamID","yearID"),all.x=TRUE),
  list(playoff_seasons,
       rs_war_playoff[, .(teamID, yearID, rs_war, rs_war_rank)],
       war_ret[, .(teamID, yearID, rs_war_total, ps_war_proxy, war_ret_pct)]))
stage2_raw <- merge(stage2_raw, fran_key, by = "franchID", all.x = TRUE)

stage2 <- stage2_raw[!is.na(franchID), .(
  n_playoffs      = .N,
  avg_achievement = mean(achievement_score, na.rm = TRUE),
  avg_rs_war_rank = mean(rs_war_rank,        na.rm = TRUE),
  avg_war_ret_pct = mean(war_ret_pct,        na.rm = TRUE)
), by = .(franchID, short, franchName)]
setorder(stage2, -avg_achievement)

# ── STAGE 1 EFFICIENCY METRICS (1995-2025) ────────────────────────────────────
# All queries use the registered fran_lookup table -- no more repeated CTE.
fa_cost <- qry("
  SELECT fl.franchID,
    MEDIAN(sp.dollars_per_war / 1e6) AS fa_m_per_war
  FROM SalaryPerWAR sp
  JOIN PlayerAcquisitionType pat ON sp.playerID = pat.playerID AND sp.teamID = pat.teamID
  JOIN fran_lookup fl ON sp.teamID = fl.teamID
  WHERE pat.acq_type = 'veteran_acq'
    AND sp.yearID BETWEEN 1995 AND 2025 AND sp.yearID != 2020
  GROUP BY fl.franchID")

playoff_rate <- qry("
  WITH all_s AS (
    SELECT DISTINCT t.franchID, t.yearID FROM Teams t
    WHERE t.yearID BETWEEN 1995 AND 2021 AND t.yearID != 2020
    UNION
    SELECT DISTINCT fl.franchID, s.yearID
    FROM SalariesAll s JOIN fran_lookup fl ON s.teamID = fl.teamID
    WHERE s.is_actual = TRUE AND s.yearID BETWEEN 2022 AND 2025
  ),
  pf AS (
    SELECT DISTINCT fl.franchID, ps.yearID
    FROM (
      SELECT teamIDwinner AS teamID, yearID FROM SeriesPost
        WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
      UNION
      SELECT teamIDloser, yearID FROM SeriesPost
        WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
    ) ps
    JOIN fran_lookup fl ON ps.teamID = fl.teamID
  )
  SELECT a.franchID,
    COUNT(DISTINCT a.yearID) AS total_seasons,
    COUNT(DISTINCT pf.yearID) AS n_playoff_seasons,
    COUNT(DISTINCT pf.yearID)::DOUBLE / COUNT(DISTINCT a.yearID) AS playoff_rate
  FROM all_s a LEFT JOIN pf USING (franchID, yearID)
  GROUP BY a.franchID")

dead_pct_dt <- qry("
  SELECT fl.franchID,
    SUM(CASE WHEN w.total_war IS NULL OR w.total_war <= 0 THEN s.salary ELSE 0 END)
      / NULLIF(SUM(s.salary), 0) * 100 AS dead_pct
  FROM SalariesAll s
  JOIN fran_lookup fl ON s.teamID = fl.teamID
  LEFT JOIN PlayerWAR w ON s.playerID = w.playerID AND s.yearID = w.yearID
  WHERE s.is_actual = TRUE AND s.salary >= 1e6
    AND s.yearID BETWEEN 1995 AND 2025 AND s.yearID != 2020
  GROUP BY fl.franchID")

hg_war <- qry("
  SELECT fl.franchID,
    SUM(CASE WHEN pat.acq_type = 'homegrown' THEN sp.total_war ELSE 0 END)
      ::DOUBLE / NULLIF(SUM(sp.total_war), 0) * 100 AS hg_war_pct
  FROM SalaryPerWAR sp
  JOIN PlayerAcquisitionType pat ON sp.playerID = pat.playerID AND sp.teamID = pat.teamID
  JOIN fran_lookup fl ON sp.teamID = fl.teamID
  WHERE sp.yearID BETWEEN 1995 AND 2025 AND sp.yearID != 2020
    AND sp.total_war IS NOT NULL
  GROUP BY fl.franchID")

avg_payroll <- qry("
  SELECT fl.franchID,
    SUM(s.salary) / 1e6 / COUNT(DISTINCT s.yearID) AS avg_payroll_M
  FROM SalariesAll s
  JOIN fran_lookup fl ON s.teamID = fl.teamID
  WHERE s.is_actual = TRUE AND s.salary >= 1e6
    AND s.yearID BETWEEN 1995 AND 2025 AND s.yearID != 2020
  GROUP BY fl.franchID")

# ── ROOKIE DEVELOPMENT SCORE (pre-FA WAR accumulation) ───────────────────────
# For each homegrown player: sum WAR produced while with the drafting team AND
# still in their pre-FA window (service years 1–6).  Normalize by the actual
# number of seasons counted rather than requiring a full 6-year window — this
# handles right-truncation for players who debuted recently (e.g. 2021 debut
# has at most 4 seasons through 2025 excl. 2020).
# franchise score = mean of per-player (pre-FA WAR / window) across all slots.
rookie_dev <- qry("
  WITH all_apps AS (
    SELECT playerID, yearID FROM Batting
    UNION
    SELECT playerID, yearID FROM Pitching
  ),
  svc AS (
    -- service year number = rank of this season among all MLB seasons for this player
    SELECT playerID, yearID,
      ROW_NUMBER() OVER (PARTITION BY playerID ORDER BY yearID) AS svc_yr
    FROM all_apps
  ),
  homegrown_pat AS (
    SELECT playerID, teamID AS home_team
    FROM PlayerAcquisitionType
    WHERE acq_type = 'homegrown'
  ),
  -- seasons on drafting team while pre-FA (svc_yr <= 6)
  pre_fa AS (
    SELECT h.home_team AS teamID,
           h.playerID,
           s.yearID,
           s.svc_yr,
           COALESCE(w.total_war, 0) AS war
    FROM homegrown_pat h
    JOIN svc s ON s.playerID = h.playerID AND s.svc_yr <= 6
    -- only seasons where player actually appeared for home team
    JOIN (
      SELECT DISTINCT playerID, teamID, yearID FROM Batting
      UNION
      SELECT DISTINCT playerID, teamID, yearID FROM Pitching
    ) pres ON pres.playerID = h.playerID
          AND pres.teamID   = h.home_team
          AND pres.yearID   = s.yearID
    LEFT JOIN PlayerWAR w ON w.playerID = h.playerID AND w.yearID = s.yearID
    WHERE s.yearID BETWEEN 1995 AND 2025 AND s.yearID != 2020
  ),
  player_dev AS (
    SELECT teamID, playerID,
      SUM(war)   AS pre_fa_war,
      COUNT(*)   AS window_seasons  -- actual seasons available (<=6, handles truncation)
    FROM pre_fa
    GROUP BY teamID, playerID
    HAVING COUNT(*) >= 1
  )
  SELECT fl.franchID,
    -- WAR per pre-FA slot: total_war / actual_window per player, then average across players
    AVG(pre_fa_war::DOUBLE / window_seasons) AS avg_preFA_war_per_season,
    COUNT(DISTINCT playerID) AS n_rookie_slots
  FROM player_dev pd
  JOIN fran_lookup fl ON pd.teamID = fl.teamID
  GROUP BY fl.franchID
  HAVING COUNT(DISTINCT playerID) >= 3
")

rookie_share <- qry("
  WITH all_apps AS (
    SELECT DISTINCT playerID, teamID, yearID
    FROM Batting
    WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
    UNION
    SELECT DISTINCT playerID, teamID, yearID
    FROM Pitching
    WHERE yearID BETWEEN 1995 AND 2025 AND yearID != 2020
  ),
  svc AS (
    SELECT playerID, yearID,
      ROW_NUMBER() OVER (PARTITION BY playerID ORDER BY yearID) AS svc_yr
    FROM (
      SELECT DISTINCT playerID, yearID
      FROM all_apps
    )
  ),
  homegrown_pat AS (
    SELECT playerID, teamID AS home_team
    FROM PlayerAcquisitionType
    WHERE acq_type = 'homegrown'
  ),
  pre_fa_homegrown AS (
    SELECT pres.teamID, pres.yearID, pres.playerID
    FROM homegrown_pat h
    JOIN svc s
      ON s.playerID = h.playerID
     AND s.svc_yr <= 6
    JOIN all_apps pres
      ON pres.playerID = h.playerID
     AND pres.teamID = h.home_team
     AND pres.yearID = s.yearID
  ),
  team_season AS (
    SELECT fl.franchID, a.teamID, a.yearID,
      COUNT(DISTINCT a.playerID) AS roster_n,
      COUNT(DISTINCT CASE WHEN p.playerID IS NOT NULL THEN a.playerID END) AS hg_prefa_n
    FROM all_apps a
    JOIN fran_lookup fl ON a.teamID = fl.teamID
    LEFT JOIN pre_fa_homegrown p
      ON p.teamID = a.teamID
     AND p.yearID = a.yearID
     AND p.playerID = a.playerID
    GROUP BY fl.franchID, a.teamID, a.yearID
  )
  SELECT franchID,
    AVG(hg_prefa_n::DOUBLE / roster_n) AS avg_prefa_roster_share,
    AVG(hg_prefa_n)                    AS avg_hg_prefa_n,
    AVG(roster_n)                      AS avg_roster_n,
    COUNT(*)                           AS n_team_seasons
  FROM team_season
  GROUP BY franchID
")

stage1 <- Reduce(function(a,b) merge(a,b,by="franchID",all.x=TRUE),
  list(fa_cost, playoff_rate, dead_pct_dt, hg_war, avg_payroll))
stage1 <- merge(stage1, fran_key, by="franchID", all.x=TRUE)
stage1 <- stage1[!is.na(short)]

# ── CONTRACT / PLAYER SCOPE COUNTS (used in slide captions) ──────────────────
n_counts <- qry("
  SELECT
    COUNT(*)                                                         AS n_total_contracts,
    COUNT(DISTINCT s.playerID)                                       AS n_total_players,
    SUM(CASE WHEN acq.acq_type = 'veteran_acq' THEN 1 ELSE 0 END)  AS n_fa_contracts,
    COUNT(DISTINCT CASE WHEN acq.acq_type = 'veteran_acq' THEN s.playerID END) AS n_fa_players,
    SUM(CASE WHEN COALESCE(w.total_war, 0) <= 0 AND s.salary > 0 THEN 1 ELSE 0 END) AS n_dead_contracts,
    SUM(CASE WHEN acq.acq_type = 'homegrown' THEN 1 ELSE 0 END)    AS n_hg_contracts,
    COUNT(DISTINCT CASE WHEN acq.acq_type = 'homegrown' THEN s.playerID END) AS n_hg_players
  FROM SalariesAll s
  LEFT JOIN PlayerAcquisitionType acq ON s.playerID = acq.playerID
  LEFT JOIN PlayerWAR w ON s.playerID = w.playerID AND s.yearID = w.yearID
  WHERE s.is_actual = TRUE AND s.yearID BETWEEN 1995 AND 2025 AND s.yearID != 2020
")
n_total_contracts <- as.integer(n_counts$n_total_contracts)
n_total_players   <- as.integer(n_counts$n_total_players)
n_fa_contracts    <- as.integer(n_counts$n_fa_contracts)
n_fa_players      <- as.integer(n_counts$n_fa_players)
n_dead_contracts  <- as.integer(n_counts$n_dead_contracts)
n_hg_players      <- as.integer(n_counts$n_hg_players)
n_rookie_players  <- as.integer(sum(rookie_dev$n_rookie_slots, na.rm = TRUE))
n_rookie_team_seasons <- as.integer(sum(rookie_share$n_team_seasons, na.rm = TRUE))

# Trade count: distinct players classified as young acquisitions (pre-FA cross-team moves)
n_trade_players <- as.integer(qry("
  SELECT COUNT(DISTINCT playerID) AS n FROM PlayerAcquisitionType WHERE acq_type = 'young_acq'
")$n)

# ── SYNTHESIS SCORECARD ────────────────────────────────────────────────────────
# Playoff achievement counts per franchise (winners at each round, 1995-2025)
post_ach <- qry("
  SELECT fl.franchID,
    COUNT(CASE WHEN sp.round = 'WS'                               THEN 1 END) AS ws_wins,
    COUNT(CASE WHEN sp.round IN ('ALCS','NLCS')                   THEN 1 END) AS pennants,
    COUNT(CASE WHEN sp.round IN ('ALDS1','ALDS2','NLDS1','NLDS2') THEN 1 END) AS lcs_appearances
  FROM SeriesPost sp
  JOIN fran_lookup fl ON sp.teamIDwinner = fl.teamID
  WHERE sp.yearID BETWEEN 1995 AND 2025 AND sp.yearID != 2020
  GROUP BY fl.franchID
")

# Total cumulative achievement -- computed once, used in Chart 1 and Scorecard sort
total_ach_by_fran <- playoff_seasons[!is.na(franchID),
  .(total_achievement = sum(achievement_score, na.rm = TRUE), n_playoffs = .N),
  by = franchID]

syn <- Reduce(function(a,b) merge(a,b,by="franchID",all.x=TRUE),
  list(stage1[, .(franchID, short, fa_m_per_war, playoff_rate, dead_pct, hg_war_pct)],
       stage2[, .(franchID, avg_achievement, avg_war_ret_pct, n_playoffs)],
       avg_payroll,
       total_ach_by_fran[, .(franchID, total_achievement)],  # n_playoffs already in stage2
       post_ach,
       rookie_dev[, .(franchID, avg_preFA_war_per_season)],
       rookie_share[, .(franchID, avg_prefa_roster_share, avg_hg_prefa_n, avg_roster_n)]))
syn <- syn[n_playoffs >= 5 & !is.na(fa_m_per_war)]
syn[is.na(ws_wins),        ws_wins        := 0L]
syn[is.na(pennants),       pennants        := 0L]
syn[is.na(lcs_appearances), lcs_appearances := 0L]

# Medal label: 🥇=WS title, 🥈=pennant, 🥉=LCS appearance (counts shown when >1)
syn[, medal_label := mapply(function(w, p, l) {
  g <- if (w > 0) paste0("\U0001F947", if (w > 1) w else "") else ""
  s <- if (p > 0) paste0("\U0001F948", if (p > 1) p else "") else ""
  b <- if (l > 0) paste0("\U0001F949", if (l > 1) l else "") else ""
  parts <- c(g, s, b)[nzchar(c(g, s, b))]
  if (length(parts) == 0L) "--" else paste(parts, collapse = " ")
}, ws_wins, pennants, lcs_appearances, SIMPLIFY = TRUE)]

pct_rank <- function(x, higher_better = TRUE) {
  r <- rank(x, na.last = "keep", ties.method = "average")
  p <- (r - 1) / (sum(!is.na(x)) - 1) * 100
  if (!higher_better) p <- 100 - p
  round(p, 1)
}
syn[, `:=`(
  pct_fa_cost     = pct_rank(fa_m_per_war,            FALSE),
  pct_dead        = pct_rank(dead_pct,                FALSE),
  pct_hg          = pct_rank(hg_war_pct,              TRUE),
  pct_retention   = pct_rank(avg_war_ret_pct,         TRUE),
  pct_achievement = pct_rank(avg_achievement,         TRUE),
  pct_rookie_dev  = pct_rank(avg_preFA_war_per_season, TRUE)
)]
syn[, overall := rowMeans(.SD, na.rm = TRUE),
    .SDcols = c("pct_fa_cost","pct_dead","pct_hg","pct_retention",
                "pct_achievement","pct_rookie_dev")]
syn[, overall_pct := pct_rank(overall, TRUE)]

# Sort by total_achievement (cumulative championship points, 1995-2025)
setorder(syn, -total_achievement)
syn[, short_f := factor(short, levels = rev(short))]

# WS trophy indicator: filled stars per win, empty star for none
syn[, ws_label := fifelse(ws_wins > 0,
                           paste0(strrep("\u2605", ws_wins)),
                           "\u2606")]

# ── SHARED HELPERS ─────────────────────────────────────────────────────────────
# theme_story, COL_LO/HI, PT_SIZE/ALPHA, LBL_SIZE, std_colourbar are all
# loaded from chart_theme.R (sourced at the top of this file).

save_chart <- function(p, name, w = 10, h = 7) {
  path <- chart_path(name)
  ggsave(path, p, width = w, height = h, dpi = 300, device = ragg::agg_png)
  message("Saved: ", path)
}

# ── CHART 1: PAYROLL VS TOTAL PLAYOFF ACHIEVEMENT ─────────────────────────────
# Using TOTAL (cumulative) achievement rather than per-appearance average.
# Per-appearance average distorts small samples: Marlins (2 WS wins in 2 trips)
# look better than Yankees (22 trips, 1 WS win in this window) on a per-game basis.
# Total achievement rewards sustained excellence across the full 30-season window.
total_ach <- merge(total_ach_by_fran, fran_key, by = "franchID")
total_ach <- merge(total_ach, avg_payroll, by = "franchID")

# Compute correlation from data (not hardcoded)
r_payroll  <- round(cor(total_ach$avg_payroll_M, total_ach$total_achievement,
                        use = "complete.obs"), 3)
r2_payroll <- round(r_payroll^2, 2)

p1 <- ggplot(total_ach, aes(avg_payroll_M, total_achievement)) +
  geom_point(aes(size = n_playoffs, colour = avg_payroll_M), alpha = 0.85) +
  geom_smooth(method = "lm", se = TRUE, fill = "grey85",
              colour = "grey40", linetype = "dashed", linewidth = 0.7) +
  geom_label_repel(aes(label = short), size = LBL_SIZE, fontface = "bold",
                   box.padding = 0.5, max.overlaps = Inf, seed = 42,
                   min.segment.length = 0, point.padding = 0.3, force = 2) +
  scale_colour_gradient(low = COL_LO, high = COL_HI,
                        name = "Avg payroll ($M)", labels = dollar_format(suffix="M")) +
  scale_size_continuous(range = c(2, 8), name = "Playoff\nappearances",
                        breaks = c(5, 10, 15, 20)) +
  scale_x_continuous(labels = dollar_format(suffix = "M"),
                     name = paste0("Avg annual payroll (", YR_RANGE, ")")) +
  scale_y_continuous(name = paste0("Total playoff achievement score (", YR_RANGE, ")\n",
                                    "(WC win=1 pt, DS=2, LCS=4, WS=8)")) +
  labs(
    title    = sprintf("Payroll Explains %d%% of Total Playoff Achievement (R\u00b2 = %.2f)",
                       round(r2_payroll * 100), r2_payroll),
    subtitle = paste0(sprintf("Pearson r = %.3f.", r_payroll),
                      if (abs(r_payroll) < 0.1)
                        " Regression slope statistically indistinguishable from zero.\n"
                      else
                        " Weak relationship: payroll alone does not predict rings.\n",
                      "Yankees spend most AND achieve most -- but the Rays, Cardinals, and",
                      " Braves match or\nexceed most big spenders at a fraction of the cost."),
    caption  = paste0("Sources: Lahman Baseball Database, FanGraphs WAR, SalariesAll\n",
                      AUTHOR_LINE)
  ) +
  guides(colour = std_colourbar(),
         size   = guide_legend(title.position = "top")) +
  theme_story() +
  coord_cartesian(clip = "off")
save_chart(p1, "payroll_vs_achievement.png")

# ── CHART 2: STAGE 1 GETTING THERE (SIMPLIFIED) ───────────────────────────────
# Key message: efficient franchises reach October most often with modest payroll.
# Two axes: average payroll (x) vs playoff appearance rate (y).
# Color encodes FA cost efficiency ($/WAR). No size aesthetic (reduces clutter).
p2 <- ggplot(stage1, aes(avg_payroll_M, playoff_rate * 100)) +
  geom_point(aes(colour = fa_m_per_war), size = PT_SIZE, alpha = PT_ALPHA) +
  geom_label_repel(aes(label = short), size = LBL_SIZE, fontface = "bold",
                   box.padding = 0.5, max.overlaps = Inf, seed = 42,
                   min.segment.length = 0, point.padding = 0.3, force = 2) +
  scale_colour_gradient(low = COL_LO, high = COL_HI,
                        name = "Median FA (Free Agent)\ncost per WAR ($M)",
                        labels = dollar_format(suffix = "M")) +
  scale_x_continuous(labels = dollar_format(suffix = "M"),
                     name = "Average annual payroll") +
  scale_y_continuous(name = "Playoff appearance rate (%)",
                     labels = function(x) paste0(x, "%")) +
  labs(title    = "Stage 1: Who Gets to October -- and at What Cost?",
       subtitle = paste0("Blue = efficient (low FA $/WAR); Red = expensive. ",
                         "Upper-left = best: high playoff rate at low cost. ",
                         YR_RANGE),
       caption  = paste0("FA = Free Agent (players signed as veterans with 6+ years MLB service). Sources: Lahman, FanGraphs, Spotrac\n",
                         AUTHOR_LINE)) +
  guides(colour = std_colourbar()) +
  theme_story() +
  coord_cartesian(clip = "off")
save_chart(p2, "stage1_getting_there.png")

# ── CHART 3: STAGE 2 GOING DEEP ────────────────────────────────────────────────
stage2_plot <- stage2[n_playoffs >= 3]
ret_min <- floor(min(stage2_plot$avg_war_ret_pct, na.rm = TRUE))
ret_max <- ceiling(max(stage2_plot$avg_war_ret_pct, na.rm = TRUE))
stage2_plot[, label := short]

p3 <- ggplot(stage2_plot, aes(avg_rs_war_rank, avg_achievement)) +
  geom_point(aes(size = n_playoffs, colour = avg_war_ret_pct), alpha = PT_ALPHA) +
  geom_label_repel(aes(label = label), size = LBL_SIZE, fontface = "bold",
                   box.padding = 0.5, max.overlaps = Inf, seed = 42,
                   min.segment.length = 0, point.padding = 0.3, force = 2) +
  scale_colour_gradient(low = COL_HI, high = COL_LO,
                        name = "Avg WAR retention\n(% RS WAR in playoffs)",
                        limits = c(ret_min, ret_max),
                        labels = function(x) paste0(round(x), "%")) +
  scale_size_continuous(range = c(2, 9), name = "Playoff appearances") +
  scale_x_reverse(name = "Avg RS WAR rank among that year's playoff teams (1 = most WAR)",
                  n.breaks = 8) +
  scale_y_continuous(name = "Avg playoff achievement score per appearance") +
  labs(
    title    = "Stage 2: Who Converts October Appearances to Deep Runs?",
    subtitle = paste0("Playoff teams only, ", YR_RANGE,
                      ". WAR retention = RS production that showed up in October."),
    caption  = paste0("WAR retention proxy: RS WAR \u00d7 (postseason PA/IP \u00f7 RS PA/IP). ",
                      "Players absent from playoffs = 0. WAR retention capped at 2021 (RS stats).",
                      "\nPlayoff achievement extends to 2025 via Retrosheet. Sources: Lahman, Retrosheet, FanGraphs WAR\n",
                      AUTHOR_LINE)
  ) +
  guides(colour = std_colourbar(),
         size   = guide_legend(title.position = "top")) +
  theme_story() +
  coord_cartesian(clip = "off")
save_chart(p3, "stage2_going_deep.png")

# ── CHART 4: SYNTHESIS SCORECARD ──────────────────────────────────────────────
# Columns: 5 percentile dimensions + Achievements (medals) column + WS stars label
metrics_labels <- c(
  pct_rookie_dev  = "Rookie\nDevelopment",
  pct_fa_cost     = "FA\nEfficiency",
  pct_dead        = "Dead\nMoney",
  pct_hg          = "Homegrown\nWAR %",
  pct_retention   = "Oct WAR\nRetention",
  pct_achievement = "Playoff\nAchievement",
  overall_pct     = "Successes"    # shows medal emoji instead of percentile
)
syn_long <- melt(
  syn[, c("franchID","short_f", names(metrics_labels)), with = FALSE],
  id.vars = c("franchID","short_f"), variable.name = "metric", value.name = "pct"
)
syn_long[, metric_label := factor(metrics_labels[as.character(metric)],
                                   levels = metrics_labels)]
syn_long[, is_overall := metric == "overall_pct"]
# Achievements column: grey background (NA fill) instead of percentile gradient
syn_long[is_overall == TRUE, pct := NA_real_]

# Medal annotation data for achievements column
medal_ann <- syn[, .(short_f, medal_label)]
medal_ann[, metric_label := factor("Successes", levels = metrics_labels)]

p4 <- ggplot(syn_long, aes(metric_label, short_f, fill = pct)) +
  geom_tile(aes(colour = is_overall, linewidth = is_overall)) +
  # percentile numbers for the 5 performance columns (NA rows silently skipped)
  geom_text(data = syn_long[!is.na(pct)],
            aes(label = round(pct)), size = 2.6, colour = "white", fontface = "bold") +
  # medal emoji for achievements column
  geom_text(data = medal_ann,
            aes(x = metric_label, y = short_f, label = medal_label),
            inherit.aes = FALSE, size = 3.0, colour = "#333333") +

  scale_fill_gradient2(low = "#D6604D", mid = "gold", high = "#2166AC",
                       midpoint = 50, name = "Percentile", limits = c(0, 100),
                       na.value = "grey93") +
  scale_colour_manual(values = c(`FALSE` = "white", `TRUE` = "#555555"), guide = "none") +
  scale_linewidth_manual(values = c(`FALSE` = 0.5, `TRUE` = 1.5), guide = "none") +
  scale_x_discrete(position = "top") +
  coord_cartesian(clip = "off") +
  labs(
    title    = paste0("Franchise Scorecard: Sorted by Total Playoff Achievement (", YR_RANGE, ")"),
    subtitle = paste0(
      "Rows sorted by total championship points (WC=1, DS=2, LCS=4, WS=8), highest first.\n",
      "Cell values = percentile rank on each dimension (100 = best in class, 0 = worst).\n",
      "\U0001F947 = WS title  \U0001F948 = pennant (WS appearance)  \U0001F949 = LCS appearance.\n",
      "FA = Free Agent veterans; Oct = October postseason; Rookie = avg pre-FA WAR/season (svc yrs 1-6, truncation-adjusted).\nFranchises with \u22655 playoff appearances only."
    ),
    caption  = paste0("Sources: Lahman, FanGraphs, Spotrac, USA Today\n", AUTHOR_LINE)
  ) +
  theme_story(12) +
  theme(axis.title = element_blank(),
        axis.text.y = element_text(size = 10, face = "bold"),
        legend.position = "right",
        panel.grid = element_blank(),
        plot.margin = margin(10, 10, 10, 30))
save_chart(p4, "synthesis_scorecard.png", w = 12, h = 8)

# ── CHART 5: ROOKIE DEVELOPMENT -- PIPELINE QUALITY VS DEPTH ──────────────────
rookie_plot_dt <- syn[!is.na(avg_preFA_war_per_season) & !is.na(avg_prefa_roster_share),
  .(franchID, short, avg_preFA_war_per_season, avg_prefa_roster_share,
    avg_hg_prefa_n, total_achievement)]
rookie_plot_dt[, label := short]
rookie_med_x <- median(rookie_plot_dt$avg_prefa_roster_share, na.rm = TRUE)
rookie_med_y <- median(rookie_plot_dt$avg_preFA_war_per_season, na.rm = TRUE)
rookie_qs <- quad_setup(
  rookie_plot_dt$avg_prefa_roster_share,
  rookie_plot_dt$avg_preFA_war_per_season,
  labels = c(
    "Selective pipeline\nhigh impact",
    "Self-sustaining pipeline\nhigh share, high impact",
    "Thin pipeline\nlow share, low impact",
    "Lots of youth\nnot enough impact"
  )
)

p_rookie <- ggplot(rookie_plot_dt,
                   aes(x = avg_prefa_roster_share, y = avg_preFA_war_per_season)) +
  annotate("rect", xmin = rookie_qs$xlim[1], xmax = rookie_med_x,
           ymin = rookie_med_y, ymax = rookie_qs$ylim[2],
           fill = "#fff3cd", alpha = 0.50) +
  annotate("rect", xmin = rookie_med_x, xmax = rookie_qs$xlim[2],
           ymin = rookie_med_y, ymax = rookie_qs$ylim[2],
           fill = "#d4edda", alpha = 0.45) +
  annotate("rect", xmin = rookie_qs$xlim[1], xmax = rookie_med_x,
           ymin = rookie_qs$ylim[1], ymax = rookie_med_y,
           fill = "#f8d7da", alpha = 0.35) +
  annotate("rect", xmin = rookie_med_x, xmax = rookie_qs$xlim[2],
           ymin = rookie_qs$ylim[1], ymax = rookie_med_y,
           fill = "#fff3cd", alpha = 0.50) +
  geom_vline(xintercept = rookie_med_x, linetype = "dashed", colour = "grey45", linewidth = 0.4) +
  geom_hline(yintercept = rookie_med_y, linetype = "dashed", colour = "grey45", linewidth = 0.4) +
  geom_point(aes(size = avg_hg_prefa_n, colour = total_achievement), alpha = PT_ALPHA) +
  geom_label_repel(
    aes(label = label),
    size = LBL_SIZE, label.padding = 0.15, box.padding = 0.5,
    max.overlaps = Inf, segment.colour = "grey55", segment.size = 0.3,
    seed = 42, min.segment.length = 0, point.padding = 0.3, force = 2
  ) +
  geom_text(
    data = rookie_qs$quad_lbl,
    aes(x = x, y = y, label = label, hjust = hjust, vjust = vjust),
    colour = "grey35", size = QUAD_LBL_SIZE, fontface = "italic",
    lineheight = 0.85, inherit.aes = FALSE
  ) +
  scale_colour_gradient(low = COL_HI, high = COL_LO,
                        name = "Playoff achievement\n(total points)",
                        guide = std_colourbar()) +
  scale_size_continuous(name = "Avg homegrown\npre-FA players",
                        range = c(3, 10),
                        breaks = pretty(rookie_plot_dt$avg_hg_prefa_n, n = 4)) +
  scale_x_continuous(limits = rookie_qs$xlim, expand = expansion(0),
                     labels = label_percent(accuracy = 1)) +
  scale_y_continuous(limits = rookie_qs$ylim, expand = expansion(0)) +
  labs(
    title    = "Building the Pipeline: Homegrown Pre-FA Depth vs Quality (1995\u20132025)",
    subtitle = paste0(
      "Medians split pipeline depth from impact; truncation-adjusted windows keep recent debuts comparable.\n",
      "Only a few franchises live in the top-right."
    ),
    x       = "Avg roster share from homegrown pre-FA players",
    y       = "Avg WAR per pre-FA season slot",
    caption = paste0("Sources: Lahman + FanGraphs fWAR | PlayerAcquisitionType: acq_type = 'homegrown' | ",
                     formatC(n_rookie_team_seasons, format = "d", big.mark = ","), " franchise-seasons; ",
                     formatC(n_rookie_players, format = "d", big.mark = ","), " homegrown player windows\n",
                     AUTHOR_LINE)
  ) +
  theme_story(12) +
  theme(legend.position = "bottom") +
  coord_cartesian(clip = "off")
save_chart(p_rookie, "rookie_dev_chart.png", w = 9, h = 8)


# LINKEDIN SLIDES  (personal use -- NOT committed to lahmanTools package)
# =============================================================================
# Causal arc: Develop → Retain → Allocate → Trade → Survive luck → October
#  01  Key Findings  (punchline first -- 7 data-backed bullets)
#  02  Framework: WAR scale + 6 management dimensions + 4 eras + payroll R² context
#  03  Act 1 -- Building the pipeline: rookie development rankings
#  04  Act 2 -- Keeping stars + paying for performance (retention + FA efficiency)
#  05  Act 3 -- Dead money: the self-inflicted tax
#  06  Act 4 -- Trading future stars (pre-FA talent flow)
#  07  Act 5 -- The great confounder: injury luck
#  08  Act 6 -- The payoff: management decisions → October (payroll r²=0.59)
#  09  Act 7 -- Going deep: October WAR retention
#  10  Act 8 -- The complete scorecard (6 management dimensions)
# =============================================================================
li_theme <- function(base = 20) {
  theme_minimal(base_size = base) +
    theme(
      plot.title.position = "plot",
      plot.title       = element_text(face = "bold", size = base + 1, lineheight = 1.1,
                                      colour = COL_ACCENT,
                                      margin = margin(b = 5)),
      plot.subtitle    = element_text(colour = "grey25", size = base - 4, lineheight = 1.35,
                                      margin = margin(b = 6)),
      plot.caption     = element_text(colour = "grey55", size = base - 7,
                                      hjust = 0, margin = margin(t = 6)),
      plot.margin      = margin(26, 60, 26, 26),
      axis.title       = element_text(size = base - 4),
      axis.text        = element_text(size = base - 5),
      axis.text.y      = ggtext::element_markdown(size = base - 5),
      legend.text      = element_text(size = base - 6),
      legend.title     = element_text(size = base - 5, face = "bold"),
      legend.position  = "bottom",
      legend.box       = "horizontal",
      legend.spacing.x = unit(0.5, "cm"),
      legend.key.width = unit(1.0, "cm"),
      panel.grid.minor = element_blank()
    )
}

save_li <- function(p, slide_num, max_chars = 78L) {
  for (field in c("title", "subtitle")) {
    txt <- p$labels[[field]]
    if (is.null(txt) || !nzchar(txt)) next
    long <- Filter(function(x) nchar(x) > max_chars, strsplit(txt, "\n", fixed = TRUE)[[1]])
    if (length(long))
      warning(sprintf("Slide %02d %s: line too long (%d chars): %s",
                      slide_num, field, nchar(long[[1]]), long[[1]]), call. = FALSE)
  }
  fname <- sprintf("linkedin_%02d.png", slide_num)
  ggsave(chart_path(fname), p, width = 11, height = 10, dpi = 300, device = ragg::agg_png)
  message("Saved LinkedIn slide: ", fname)
}

# Helper: wrap an existing PNG file into a LinkedIn-sized ggplot frame
li_wrap_png <- function(png_file, title, subtitle = "", caption = "", base = 18) {
  img_path <- chart_path(png_file)
  img      <- png::readPNG(img_path)
  g        <- grid::rasterGrob(img, interpolate = TRUE)
  ggplot() +
    annotation_custom(g, xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf) +
    coord_fixed(ratio = nrow(img) / ncol(img),
                xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
    labs(title = title, subtitle = subtitle, caption = caption) +
    theme_void(base_size = base) +
    theme(
      plot.title.position = "plot",
      plot.title    = element_text(face = "bold", size = base + 1,
                                   lineheight = 1.1, margin = margin(t = 10, b = 5)),
      plot.subtitle = element_text(colour = "grey25", size = base - 4,
                                   lineheight = 1.3, margin = margin(b = 5)),
      plot.caption  = element_text(colour = "grey55", size = base - 7,
                                   hjust = 0, margin = margin(t = 5)),
      plot.margin   = margin(16, 60, 12, 16)
    )
}

# ── Slide 01: Key Findings ────────────────────────────────────────────────────
# Punchline first: give readers a reason to swipe
takeaway_body <- paste(c(
  "1.  Pipeline edge has two parts -- rookie quality and rookie roster share -- and only a few clubs truly do both",
  "2.  Developing talent AND exporting it pre-FA (Free Agency) pays twice: absorbing development AND replacement costs",
  "3.  Dead money -- salary paid to zero WAR (Wins Above Replacement) players is a tax that hits small markets hardest",
  "4.  Teams that overpay veteran Free Agents AND let homegrown talent walk are paying twice",
  "5.  Payroll mattered over the period (r = 0.77, R\u00b2 \u2248 0.60), but the modern period may be different",
  "6.  Injury luck is real and powerful: some franchises were penalised for decisions they did NOT make",
  "7.  Champions have scored well across several dimensions, but show different strengths and weaknesses"
), collapse = "\n\n")

p_01 <- ggplot() + xlim(0, 1) + ylim(0, 1) +
  # Title in dark navy
  annotate("text", x = 0.5, y = 0.93, hjust = 0.5, vjust = 1,
           label = "7 Things the Data Reveals About\nMLB Franchise Management",
           size = 7.8, fontface = "bold", lineheight = 1.1, colour = COL_ACCENT) +
  # Blue accent rule below title
  annotate("rect", xmin = 0.05, xmax = 0.95, ymin = 0.756, ymax = 0.760,
           fill = COL_LO, alpha = 0.8) +
  # Body text
  annotate("text", x = 0.05, y = 0.73, hjust = 0, vjust = 1,
           label = takeaway_body, size = 4.1, lineheight = 1.45) +
  # Footer with David Lucey
  annotate("text", x = 0.5, y = 0.02, hjust = 0.5, vjust = 0,
           label = paste0(AUTHOR_LINE, "  |  Lahman DB \u00b7 FanGraphs WAR \u00b7 Spotrac/USA Today \u00b7 Retrosheet  |  ",
                          YR_RANGE, "  |  Swipe for the evidence"),
           size = 3.4, colour = "grey45") +
  theme_void() +
  theme(plot.background = element_rect(fill = "#f7f9fc", colour = COL_ACCENT, linewidth = 1.5),
        plot.margin = margin(28, 36, 18, 36))
save_li(p_01, 1)

# ── Slide 02: Framework ────────────────────────────────────────────────────────
# Introduce the causal chain, four eras, and anchor the payroll R² upfront
war_body <- paste0(
  "WAR (Wins Above Replacement) is the universal currency of baseball value.\n",
  "It measures how many wins a player adds vs a freely available minor-league call-up.\n",
  "WAR 5 = 5 extra wins. WAR 0 = replaceable. WAR < 0 = a net drag on the roster.\n\n",
  "THE CAUSAL CHAIN: how front-offices build winners\n",
  "  1. Develop rookies into pre-FA contributors and fill real roster share internally\n",
  "  2. Retain the best pre-FA talent before they reach the open market\n",
  "  3. Allocate payroll wisely: efficient FA signings + avoid dead money\n",
  "  4. Win the trade market for future stars\n",
  "  5. Survive injury luck (the great confounder you cannot fully control)\n",
  "  \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n",
  "  6. Get to playoffs in October  7. Have remaining depth to go deep\n\n",
  "WHAT PAYROLL BUYS: r = 0.77, R\u00b2 \u2248 0.60 (1995-2025)\n",
  "Payroll predicted ~60% playoff achievement over the full 30-year period;\n",
  "but may explain less in the big data era.\n\n",
  "FOUR ANALYTICAL ERAS (context only; full-period scores here use 1995-2025):\n",
  "  Early FA 1985-1993 | Pre-Moneyball 1994-2002 | Moneyball 2003-2011 | Big Data 2012-2025\n\n",
  "SIX MANAGEMENT DIMENSIONS SCORED:\n",
  "  Rookie dev \u00b7 FA efficiency \u00b7 Dead money \u00b7 Homegrown retention \u00b7 Trade quality \u00b7 Oct WAR\n\n",
  sprintf("DATA SCOPE: %s player-season salary records reviewed (%s distinct players, 30 franchises, %s)",
          formatC(n_total_contracts, format = "d", big.mark = ","),
          formatC(n_total_players,   format = "d", big.mark = ","),
          YR_RANGE)
)
p_02 <- ggplot() + xlim(0, 1) + ylim(0, 1) +
  # Title in dark navy
  annotate("text", x = 0.5, y = 0.97, hjust = 0.5, vjust = 1,
           label = "The Framework: WAR + Six Management Dimensions",
           size = 8.4, fontface = "bold", lineheight = 1.1, colour = COL_ACCENT) +
  # Blue accent rule below title
  annotate("rect", xmin = 0.05, xmax = 0.95, ymin = 0.898, ymax = 0.902,
           fill = COL_LO, alpha = 0.8) +
  # Body text (indented slightly)
  annotate("text", x = 0.06, y = 0.874, hjust = 0, vjust = 1,
           label = war_body, size = 4.1, lineheight = 1.45) +
  # Footer with David Lucey
  annotate("text", x = 0.5, y = 0.02, hjust = 0.5, vjust = 0,
           label = paste0(AUTHOR_LINE, "  |  Data: Lahman DB + FanGraphs fWAR + Spotrac/USA Today + Retrosheet  |  ",
                          YR_RANGE),
            size = 3.4, colour = "grey45") +
  theme_void() +
  theme(plot.background = element_rect(fill = "#f7f9fc", colour = COL_ACCENT, linewidth = 1.5),
        plot.margin = margin(28, 36, 18, 36))
save_li(p_02, 2)

# ── Slide 03: Act 1 -- Rookie Development Rankings ───────────────────────────
# First management decision in the causal chain: can you develop rookies?
p_03 <- (p_rookie + li_theme()) +
  labs(
    title    = "Act 1: Building the Pipeline -- Quality vs Depth",
    subtitle = paste0(
      "Few franchises combine pipeline depth with real impact.\n",
      "Most clubs get one without the other; top-right teams hold the cleanest edge.\n",
      YR_RANGE
    ),
    caption  = paste0("Sources: Lahman + FanGraphs fWAR | acq_type = 'homegrown' | Truncation: denominator = min(svc_yrs, 6) | ",
                      formatC(n_rookie_team_seasons, format = "d", big.mark = ","), " franchise-seasons; ",
                      formatC(n_rookie_players, format = "d", big.mark = ","), " homegrown player windows analyzed")
  )
save_li(p_03, 3)

# ── Slide 04: Act 2 -- Keeping Stars + Paying for Performance ────────────────
# Retention + FA efficiency -- the two-way allocation decision
p_04 <- (readRDS(chart_path("talent_allocation.rds")) + li_theme() + coord_cartesian(clip = "off")) +
  labs(
    title    = "Act 2: Keeping Stars + Paying for Performance",
    subtitle = paste0(
      "X-axis reversed: right = efficient FA spending (lower $/WAR).\n",
      "Cracking both FA efficiency AND homegrown retention is rare --\n",
      "sustained contenders do both; others rebuild. ", YR_RANGE
    ),
    caption  = paste0("FA = veterans with 6+ years MLB service time. ",
                      formatC(n_fa_contracts, format = "d", big.mark = ","), " FA contract-seasons | ",
                      formatC(n_fa_players,   format = "d", big.mark = ","), " distinct FA players. ",
                      "Point size = number of FA signings (area-scaled).")
  )
save_li(p_04, 4)

# ── Slide 05: Act 3 -- Dead Money: The Self-Inflicted Tax ─────────────────────
p_05 <- (readRDS(chart_path("dead_money.rds")) + li_theme() + coord_cartesian(clip = "off")) +
  labs(
    title    = "Act 3: Dead Money -- The Self-Inflicted Tax on Bad Contracts",
    subtitle = paste0(
      "Dead money = salary >=$1M paid to <=0 WAR players. One bad long-term\n",
      "deal can consume 25-30% of a small-market budget; large markets absorb it,\n",
      "small markets are crippled. ", YR_RANGE
    ),
    caption  = paste0("Sources: SalariesAll (Lahman + Spotrac + USA Today), FanGraphs WAR | ",
                      formatC(n_dead_contracts, format = "d", big.mark = ","),
                      " zero/negative-WAR contract-seasons identified across 30 franchises")
  )
save_li(p_05, 5)

# ── Slide 06: Act 4 -- Trading Future Stars ───────────────────────────────────
p_06 <- (readRDS(chart_path("trade_scores.rds")) + li_theme() + coord_cartesian(clip = "off")) +
  labs(
    title    = "Act 4: Who Wins the Trade Market for Future Stars?",
    subtitle = paste0(
      "Future Stars = players with 2+ post-trade MLB seasons traded before\n",
      "FA eligibility (< 6 yrs service). Net losers rarely recover without elite\n",
      "draft luck. Pipeline builders outperform FA-only franchises. 1990\u20132025"
    ),
    caption  = paste0("WAR/season measured over post-trade career. ",
                      formatC(n_trade_players, format = "d", big.mark = ","),
                      " pre-FA acquisitions tracked. Caveats: prospect-only trades not captured.")
  )
save_li(p_06, 6)

# ── Slide 07: Act 5 -- The Great Confounder: Injury Luck ─────────────────────
# Acts 1-4 are decisions managers make. Act 5 is what happens TO them.
# Injury luck is a genuine confounder: good managers can look bad if key players
# are hurt; bad managers can look fine when expensive FAs stay healthy.
p_07 <- (readRDS(chart_path("mva_quadrant.rds")) + li_theme() + coord_cartesian(clip = "off")) +
  labs(
    title    = "Act 5: The Great Confounder -- Injury Luck",
    subtitle = paste0(
      "Scores are era-averaged z-scores (Pre-Moneyball/Moneyball/Big Data).\n",
      "Injury luck is the great equalizer -- it makes elite managers look average\n",
      "and masks poor decisions. Acts 1-4 = choices; Act 5 = luck.\n", YR_RANGE
    ),
    caption  = paste0("Injury proxy: player games < 60% of expected given salary/role. ",
                      formatC(n_total_contracts, format = "d", big.mark = ","),
                      " salary records underlying the management composite score.")
  )
save_li(p_07, 7)

# ── Slide 08: Act 6 -- The Payoff: Management Decisions → October ─────────────
# Key message: payroll explains ~60%; efficient franchises land ABOVE the trend line,
# converting the same payroll into more results. No "paradox" -- just a real but
# incomplete relationship with meaningful front-office alpha.
p_08 <- (p2 + li_theme()) +
  labs(
    title    = "Act 6: The Payoff -- Management Decisions Convert Payroll into October",
    subtitle = paste0(
      "Color = median FA $/WAR (blue = efficient). Upper-left = best:\n",
      sprintf("high playoff rate at low cost. Payroll explains ~60%% (r\u00b2 = %.2f) --\n", r2_payroll),
      "efficient franchises punch above their weight. ", YR_RANGE
    )
  )
save_li(p_08, 8)

# ── Slide 09: Act 7 -- Going Deep in October ──────────────────────────────────
# Key message: making the playoffs isn't enough. Franchises with high WAR
# retention (healthy roster that performed in RS) go deepest.
p_09 <- (p3 + li_theme()) +
  labs(
    title    = "Act 7: Going Deep in October",
    subtitle = paste0(
      "Playoff teams only. WAR retention = % of RS WAR that showed up in October\n",
      "(RS WAR x postseason PA/IP ratio). High retention drives deep runs --\n",
      "health in October matters more than RS talent rank. ", YR_RANGE, "\n",
      sprintf("%d playoff appearances by %d franchises scored.",
              nrow(stage2_raw), nrow(stage2))
    )
  )
save_li(p_09, 9)

# ── Slide 10: Act 8 -- The Complete Scorecard ─────────────────────────────────
# Sorted by total championship points. Rookie dev is first management column.
# Right column = medal emojis. Left margin = WS star labels.
p_10 <- (p4 + li_theme(14)) +
  labs(
    title    = paste0("Act 8: The Complete Scorecard -- ", YR_RANGE),
    subtitle = paste0(
      "Rows sorted by total playoff achievement (top = most).\n",
      "Cell values = percentile rank on each dimension (100 = best, 0 = worst).\n",
      "Rookie Dev = pre-FA WAR/slot | FA Eff = $/WAR | Dead Money = unproductive %\n",
      "Homegrown WAR% = self-developed share | Oct Retention = RS WAR in postseason\n",
      "\U0001F947 = WS  \U0001F948 = pennant  \U0001F949 = LCS  |\n",
      sprintf("%s records \u00b7 %d franchises \u00b7 6 dimensions",
              formatC(n_total_contracts, format = "d", big.mark = ","),
              nrow(syn))
    )
  )
save_li(p_10, 10)

message("\nAll charts saved to: ", CHART_DIR)
