uploaded with blockbuilder.org
While revisiting the Lahman
package docs, I thought the salary data would be a great subject for my recent experiments with formattable
+ sparkline
.
This is far from a polished and finished piece, and there are many ways to improve. Feel free to fork and make this better.
Much of this code comes from ?Lahman::Salaries
, so thanks to the fine authors of this great package.
library(Lahman)
library(dplyr)
library(tibble)
library(sparkline)
library(formattable)
library(htmltools)
data(Salaries)
data(TeamsFranchises)
#######################################
# Average salaries by teams, over years
#######################################
# Some franchises are multiply named, so add a new variable
# 'franchise' to the Salaries data as a lookup table
franchise <- c(`ANA` = "LAA", `ARI` = "ARI", `ATL` = "ATL",
`BAL` = "BAL", `BOS` = "BOS", `CAL` = "LAA",
`CHA` = "CHA", `CHN` = "CHN", `CIN` = "CIN",
`CLE` = "CLE", `COL` = "COL", `DET` = "DET",
`FLO` = "MIA", `HOU` = "HOU", `KCA` = "KCA",
`LAA` = "LAA", `LAN` = "LAN", `MIA` = "MIA",
`MIL` = "MIL", `MIN` = "MIN", `ML4` = "MIL",
`MON` = "WAS", `NYA` = "NYA", `NYM` = "NYN",
`NYN` = "NYN", `OAK` = "OAK", `PHI` = "PHI",
`PIT` = "PIT", `SDN` = "SDN", `SEA` = "SEA",
`SFG` = "SFN", `SFN` = "SFN", `SLN` = "SLN",
`TBA` = "TBA", `TEX` = "TEX", `TOR` = "TOR",
`WAS` = "WAS")
Salaries$franchise <- unname(franchise[as.character(Salaries$teamID)])
# Average salaries annual salaries by team, in millions USD
avg_team_salaries <- Salaries %>%
group_by(yearID, franchise, lgID) %>%
summarise(salary= mean(salary, na.rm=TRUE)/1e6) %>%
filter(!(franchise == "CLE" & lgID == "NL"))
# now let's also get team avg - all avg as another column
avg_team_salaries <- avg_team_salaries %>%
left_join(
Salaries %>%
group_by(yearID) %>%
summarise( all_avg = mean(salary, na.rm=TRUE)/1e6 )
) %>%
mutate(salary_vs = salary - all_avg)
max_salary <- max(avg_team_salaries$salary)
range_salary_vs <- range(avg_team_salaries$salary_vs)
tbl_salary <- avg_team_salaries %>%
# use expand.grid to make sure there is a row
# for each year for each team
right_join(
expand.grid(
yearID=unique(avg_team_salaries$yearID),
franchise=unique(avg_team_salaries$franchise),
stringsAsFactors=FALSE
)
) %>%
ungroup() %>%
group_by(franchise) %>%
summarise(
salary = as.character(
as.tags(
sparkline(salary, type="line", chartRangeMin=0, chartRangeMax=max_salary, height=50)
)
),
salary_vs = as.character(
as.tags(
sparkline(salary_vs, type="bar", chartRangeMin=range_salary_vs[1], chartRangeMax=range_salary_vs[2], height=50)
)
)
) %>%
formattable() %>%
as.htmlwidget(width="50%")
tbl_salary$dependencies <- c(
tbl_salary$dependencies,
htmlwidgets:::widget_dependencies("sparkline","sparkline")
)
tbl_salary