Here is the R code for the competition entry mentioned in my previous post. See http://www.datamilk.com/leaderboard_animation.gif for the animation.
library(lubridate) library(plyr) library(sqldf) library(ggplot2) library(animation) #clear everything rm(list=ls(all=TRUE)) # Injest data data <- read.csv("unimelb_public_leaderboard.csv", header=TRUE) # calculate days and date time as numeric data <- data.frame( data , SubmissionDate_datetime = strptime(data$SubmissionDate, format="%m/%d/%Y %H:%M:%S %p") , Submission_day = round(strptime(data$SubmissionDate, format="%m/%d/%Y %H:%M:%S %p"), "day") , Submission_time_num = as.numeric(strptime(data$SubmissionDate, format="%m/%d/%Y %H:%M:%S %p")) ) start_time <- min(na.omit(data$SubmissionDate_datetime)) end_time <- max(na.omit(data$SubmissionDate_datetime)) start_day <- min(na.omit(data$Submission_day)) end_day <- max(na.omit(data$Submission_day)) duration<- round(end_time - start_time,0) team_names <- sqldf("select distinct TeamName from data") num_teams <- nrow(team_names) competition_days<- seq(start_day, b = "days", length = as.numeric(round(end_day - start_day,0))) #################################################### #make a new column with the leaderboard scores in it #################################################### #sort data by team and submission date data <- data[with(data, order(TeamName, Submission_time_num)),] data <- data.frame(data, lb_score = rep(0, nrow(data))) current_team <- data$TeamName[1] current_score <- data$Score[1] data$lb_score[1] <- current_score for (i in 2:nrow(data)){ if(data$TeamName[i]==current_team){ current_score<- max(current_score, data$Score[i]) data$lb_score[i] <- current_score } else{ current_team <- data$TeamName[i] current_score <- data$Score[i] data$lb_score[i] <- current_score } } # Make the animation make_animation <- function(){ start_time <- min(na.omit(data$SubmissionDate_datetime)) end_time <- max(na.omit(data$SubmissionDate_datetime)) start_day <- min(na.omit(data$Submission_day)) end_day <- max(na.omit(data$Submission_day)) duration<- round(end_time - start_time,0) #fix the start time a an exact number of days before the end time t <- end_time - as.numeric(duration)*24*60*60 while(t < end_time){ t <- min(c(end_time, t+24*60*60)) days_left <- round((end_time - t), 0) #extract the data prior to time t temp_dat<- na.omit(data[data$SubmissionDate_datetime <= t,]) #find the top 5 teams top_teams <- sqldf("select TeamName, max(lb_score) as lb_score from temp_dat group by Teamname order by 2 desc limit 7") #get the ranking for each team top_teams <- data.frame(top_teams, Leaderboard = paste(rownames(top_teams), " ", top_teams$TeamName, " (", top_teams$lb_score, ")"), sep = "") # select just the top teams temp_dat <- sqldf("select temp_dat.*, top_teams.Leaderboard as Leaderboard from temp_dat, top_teams where temp_dat.TeamName = top_teams.TeamName and temp_dat.TeamName in (select TeamName from top_teams)") #turn off the scale for alpha sc <- scale_alpha_continuous() sc$legend <- FALSE #plot the data p <- ggplot(temp_dat, aes(x=SubmissionDate_datetime, y=lb_score, group=Leaderboard, colour = Leaderboard, alpha = rank(lb_score))) theme_set(theme_gray(base_size = 18)) print(p+ geom_line(size = 2) + geom_point(size = 4, aes(colour = Leaderboard, alpha = rank(lb_score))) + xlab("Submission Date") + coord_cartesian(xlim = c(min(na.omit(data$SubmissionDate_datetime)), max(na.omit(data$SubmissionDate_datetime))+20*24*60*60) , #ylim = c(max(c(0, min(na.omit(data$Score)))), max(na.omit(data$Score))+0.01) ylim = c(0.85, max(na.omit(data$Score))+0.01) ) #add a verticle line to show the current date + geom_vline(linetype = 2, xintercept = as.numeric(t)) #add a vertical line to show the end date + geom_vline(linetype = 2, xintercept = as.numeric(end_time)) #lable the end date line + annotate("text" , label = paste("Competition ends: " , end_day) , x = as.numeric(end_time)+(48*60*60) , y = 0.86 , hjust=0 , vjust=0 , angle = 90 ) # add a vertical line to show the current submission date + geom_hline(linetype = 2, yintercept = max(temp_dat$Score)) # add a lable to show the current top score + annotate("text" , label = paste("Top Score:" , round(max(temp_dat$Score),4)) , x = as.numeric(t)+12*60*60 , y = max(temp_dat$Score) , hjust=0 , vjust=-1 ) #add a lable to show the number of days left + annotate("text" , label = paste(days_left, "days left.") , x = as.numeric(t)+12*60*60 , y = max(temp_dat$Score)-0.002 , hjust=0 , vjust=1 ) # add a title + opts(title = paste("'Grant Applications Comp' as of", max(temp_dat$Submission_day))) #turn off the legend for alpha + sc ) } } saveGIF( make_animation() , title = "Predict Grant Applications" , description = "Predict Grant Applications" ,ani.width = 900 ,ani.height = 600 ) #create the file saveHTML( make_animation() , title = "Predict Grant Applications" , description = "Predict Grant Applications" ,ani.width = 900 ,ani.height = 600 )
Comments