suppressPackageStartupMessages({
  library(DBI); library(duckdb); library(data.table); library(ggplot2); library(ggrepel)
})
.linkedin_dir <- tryCatch(
  dirname(normalizePath(sub("--file=", "", grep("--file=", commandArgs(FALSE), value = TRUE)[1]))),
  error = function(e) getwd()
)
source(file.path(.linkedin_dir, "chart_theme.R"))

db_path <- file.path(
  path.expand(Sys.getenv("LAHMANS_DBDIR", "~/Documents/Data/baseball")),
  "baseball.duckdb"
)
con <- dbConnect(duckdb(), db_path, read_only = TRUE)
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)

disc_sql <- "
WITH homegrown AS (
    SELECT playerID, teamID AS home_team
    FROM PlayerAcquisitionType WHERE acq_type = 'homegrown'
),
player_seasons AS (
    SELECT s.playerID, s.teamID, s.yearID,
           COALESCE(w.total_war, 0) AS war
    FROM SalariesAll s
    LEFT JOIN PlayerWAR w ON s.playerID = w.playerID AND s.yearID = w.yearID
    WHERE s.is_actual = TRUE AND s.yearID BETWEEN 1995 AND 2025 AND s.yearID != 2020
),
tagged AS (
    SELECT ps.playerID, ps.teamID, ps.yearID, ps.war, h.home_team,
           CASE WHEN ps.teamID = h.home_team THEN 'retained' ELSE 'exported' END AS phase
    FROM player_seasons ps
    JOIN homegrown h ON ps.playerID = h.playerID
),
with_fran AS (
    SELECT t.playerID, t.yearID, t.war, t.phase,
           fm.franchID
    FROM tagged t
    JOIN (SELECT DISTINCT teamID, franchID FROM Teams) fm ON fm.teamID = t.home_team
)
SELECT franchID AS teamID, phase,
       COUNT(DISTINCT playerID) AS n_players,
       AVG(war) AS avg_war
FROM with_fran
GROUP BY franchID, phase
"
disc_raw <- as.data.table(dbGetQuery(con, disc_sql))
disc <- dcast(disc_raw, teamID ~ phase, value.var = c("avg_war", "n_players"))
disc[, discrimination := avg_war_retained - avg_war_exported]

fa_sql <- "
WITH fa_signings AS (
    SELECT pat.playerID, pat.teamID, pat.first_team_year AS signing_year,
           fm.franchID
    FROM PlayerAcquisitionType pat
    JOIN (SELECT DISTINCT teamID, franchID FROM Teams) fm ON fm.teamID = pat.teamID
    WHERE pat.acq_type = 'veteran_acq' AND pat.first_team_year >= 2000
),
fa_perf AS (
    SELECT fs.franchID,
           COUNT(DISTINCT fs.playerID) AS n_fa,
           SUM(s.salary) AS total_spend,
           SUM(COALESCE(w.total_war, 0)) AS total_war
    FROM fa_signings fs
    JOIN SalariesAll s ON fs.playerID = s.playerID AND s.teamID = fs.teamID
                      AND s.yearID >= fs.signing_year AND s.is_actual = TRUE
                      AND s.yearID BETWEEN 1995 AND 2025 AND s.yearID != 2020
    LEFT JOIN PlayerWAR w ON fs.playerID = w.playerID AND w.yearID = s.yearID
    GROUP BY fs.franchID
)
SELECT franchID AS teamID, n_fa,
       (total_spend / 1e6) / NULLIF(total_war, 0) AS dollars_per_war_M
FROM fa_perf WHERE total_war > 10
"
fa <- as.data.table(dbGetQuery(con, fa_sql))

dt <- merge(disc, fa, by = "teamID")
dt <- dt[!is.na(discrimination) & !is.na(dollars_per_war_M)]
dt[, short := TEAM_SHORT[teamID]]
dt[is.na(short), short := teamID]
raw_fallbacks <- dt[short == teamID, unique(teamID)]
if (length(raw_fallbacks))
  warning("TEAM_SHORT missing teamIDs: ", paste(raw_fallbacks, collapse = ", "), call. = FALSE)
dt[, label := short]   # label ALL teams

med_x <- median(dt$dollars_per_war_M, na.rm = TRUE)
med_y <- median(dt$discrimination, na.rm = TRUE)

qs <- quad_setup(
  dt$dollars_per_war_M, dt$discrimination,
  labels = c(
    "Best allocators\n(efficient FA + smart retention)",
    "Good retention\nbut overpaid for FA",
    "Efficient FA spend\nbut poor retention model",
    "Worst: overpaid FA\n+ let go good players"
  ),
  x_reversed = TRUE
)

p <- ggplot(dt, aes(x = dollars_per_war_M, y = discrimination)) +
  annotate("rect", xmin=-Inf, xmax=med_x, ymin=med_y,  ymax=Inf,  fill="#d4edda", alpha=0.6) +  # best
  annotate("rect", xmin=med_x, xmax=Inf,  ymin=med_y,  ymax=Inf,  fill="#f0f7ee", alpha=0.6) +  # ok
  annotate("rect", xmin=-Inf, xmax=med_x, ymin=-Inf,   ymax=med_y, fill="#fff3cd", alpha=0.6) + # ok
  annotate("rect", xmin=med_x, xmax=Inf,  ymin=-Inf,   ymax=med_y, fill="#f8d7da", alpha=0.6) + # worst
  geom_vline(xintercept=med_x, linetype="dashed", colour="grey60", linewidth=0.4) +
  geom_hline(yintercept=med_y, linetype="dashed", colour="grey60", linewidth=0.4) +
  geom_hline(yintercept=0, linetype="dotted", colour="grey40", linewidth=0.3) +
  geom_point(aes(size = n_fa, colour = dollars_per_war_M), alpha = PT_ALPHA) +
  scale_colour_gradient(low = COL_LO, high = COL_HI,
                        name = "FA $/WAR ($M)\n(lower = efficient)",
                        guide = std_colourbar()) +
  scale_x_reverse(limits = qs$xlim, expand = expansion(0)) +
  scale_y_continuous(limits = qs$ylim, expand = expansion(0)) +
  scale_size_area(name = "# FA signings", max_size = 10,
                  breaks = c(50, 100, 150, 200)) +
  geom_label_repel(
    aes(label = label),
    size = LBL_SIZE, label.padding = 0.15, box.padding = 0.5,
    max.overlaps = Inf, segment.colour = "grey50", segment.size = 0.3,
    seed = 42, min.segment.length = 0, point.padding = 0.3, force = 2
  ) +
  geom_text(data = qs$quad_lbl,
    aes(x = x, y = y, label = label, hjust = hjust, vjust = vjust),
    colour = "grey35", size = QUAD_LBL_SIZE, fontface = "italic",
    lineheight = 0.85, inherit.aes = FALSE) +
  labs(
    title    = "Veteran FA Efficiency vs Homegrown Talent Retention",
    subtitle = "Cracking both simultaneously is rare -- and separates sustained contenders from rebuilding cycles.",
    x        = "FA spending efficiency: $/WAR, $M  (right = lower $/WAR = more efficient)",
    y        = "Homegrown retention score\n(avg WAR/season kept  \u2212  avg WAR/season let go)",
    caption  = paste0(
      "Homegrown seasons 1995\u20132025 (excl. 2020); FA signings 2000\u20132025 | ",
      nrow(dt), " franchises | Point size = # FA signings (area-scaled)\n",
      "TBA: financially forced to export elite homegrown players (Price, Snell, Adames) | ",
      "Dotted line = zero: retained and exported players of equal quality\n",
      AUTHOR_LINE, "  |  Data: Lahman DB, FanGraphs WAR, Spotrac"
    )
  ) +
  theme_story()

out_file <- file.path(
  path.expand(Sys.getenv("LAHMANS_DBDIR", "~/Documents/Data/baseball")),
  "charts", "talent_allocation.png"
)
ggsave(out_file, p, width=10.5, height=7, dpi=300, device=ragg::agg_png)
saveRDS(p, sub("\\.png$", ".rds", out_file))
message("Saved: ", out_file)
