All examples By author By category About

timelyportfolio

Lahman Baseball Salaries | R formattable + sparkline

uploaded with blockbuilder.org

Baseball | Salaries by Team

While revisiting the Lahman package docs, I thought the salary data would be a great subject for my recent experiments with formattable + sparkline.

Fork and Improve

This is far from a polished and finished piece, and there are many ways to improve. Feel free to fork and make this better.

R Code to Replicate

Much of this code comes from ?Lahman::Salaries, so thanks to the fine authors of this great package.

library(Lahman)
library(dplyr)
library(tibble)
library(sparkline)
library(formattable)
library(htmltools)

data(Salaries)
data(TeamsFranchises)

#######################################
# Average salaries by teams, over years
#######################################
# Some franchises are multiply named, so add a new variable
# 'franchise' to the Salaries data as a lookup table
franchise <- c(`ANA` = "LAA", `ARI` = "ARI", `ATL` = "ATL",
               `BAL` = "BAL", `BOS` = "BOS", `CAL` = "LAA",
               `CHA` = "CHA", `CHN` = "CHN", `CIN` = "CIN",
               `CLE` = "CLE", `COL` = "COL", `DET` = "DET",
               `FLO` = "MIA", `HOU` = "HOU", `KCA` = "KCA",
               `LAA` = "LAA", `LAN` = "LAN", `MIA` = "MIA",
               `MIL` = "MIL", `MIN` = "MIN", `ML4` = "MIL",
               `MON` = "WAS", `NYA` = "NYA", `NYM` = "NYN",
               `NYN` = "NYN", `OAK` = "OAK", `PHI` = "PHI",
               `PIT` = "PIT", `SDN` = "SDN", `SEA` = "SEA",
               `SFG` = "SFN", `SFN` = "SFN", `SLN` = "SLN",
               `TBA` = "TBA", `TEX` = "TEX", `TOR` = "TOR",
               `WAS` = "WAS")
Salaries$franchise <- unname(franchise[as.character(Salaries$teamID)])

# Average salaries annual salaries by team, in millions USD
avg_team_salaries <- Salaries %>%
  group_by(yearID, franchise, lgID) %>%
  summarise(salary= mean(salary, na.rm=TRUE)/1e6) %>%
  filter(!(franchise == "CLE" & lgID == "NL"))

# now let's also get team avg - all avg as another column
avg_team_salaries <- avg_team_salaries %>%
  left_join(
    Salaries %>%
      group_by(yearID) %>%
      summarise( all_avg = mean(salary, na.rm=TRUE)/1e6 )
  ) %>%
  mutate(salary_vs = salary - all_avg) 

max_salary <- max(avg_team_salaries$salary)
range_salary_vs <- range(avg_team_salaries$salary_vs)

tbl_salary <- avg_team_salaries %>%
  # use expand.grid to make sure there is a row
  #  for each year for each team
  right_join(
    expand.grid(
      yearID=unique(avg_team_salaries$yearID),
      franchise=unique(avg_team_salaries$franchise),
      stringsAsFactors=FALSE
    )
  ) %>%
  ungroup() %>%
  group_by(franchise) %>%
  summarise(
    salary = as.character(
      as.tags(
        sparkline(salary, type="line", chartRangeMin=0, chartRangeMax=max_salary, height=50)
      )
    ),
    salary_vs = as.character(
      as.tags(
        sparkline(salary_vs, type="bar", chartRangeMin=range_salary_vs[1], chartRangeMax=range_salary_vs[2], height=50)
      )
    )
  ) %>%
  formattable() %>%
  as.htmlwidget(width="50%")

tbl_salary$dependencies <- c(
  tbl_salary$dependencies,
  htmlwidgets:::widget_dependencies("sparkline","sparkline")
)

tbl_salary