##############################################################################
# PlotsOfData: Shiny app for plotting and comparing the data
# Created by Joachim Goedhart (@joachimgoedhart), first version 2018
# Takes non-tidy, spreadsheet type data as input or tidy format
# Non-tidy data is converted into tidy format
# For tidy data the x and y variables need to be selected
# Raw data is displayed with user-defined visibility (alpha)
# Summary statistics are displayed with user-defined visibility (alpha)
# Inferential statistics (95%CI) can be added
# The 95%CI of the median is determined by resampling (bootstrap)
# A plot and a table with stats are generated
# Colors can be added to the data and/or the stats
# Several colorblind safe palettes are available
# Ordering of the categorial data is 'as is, based on median or alphabetical
##############################################################################
# Copyright (C) 2018 Joachim Goedhart
# electronic mail address: j #dot# goedhart #at# uva #dot# nl
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
##############################################################################
library(shiny)
library(ggplot2)
library(dplyr)
library(tidyr)
library(readr)
library(magrittr)
library(ggbeeswarm)
library(readxl)
################
#Function that resamples a vector (with replacement) and calculates the median value
boot_median = function(x) {
median(sample(x, replace = TRUE))
}
i=0
#Number of bootstrap samples
nsteps=1000
#Confidence level
Confidence_Percentage = 95
Confidence_level = Confidence_Percentage/100
alpha=1-Confidence_level
lower_percentile=(1-Confidence_level)/2
upper_percentile=1-((1-Confidence_level)/2)
#Several qualitative color palettes that are colorblind friendly
#From Paul Tol: https://personal.sron.nl/~pault/
#Code to generate vectors in R to use these palettes
#Red, Green, Blue, yellow, cyan, purple, grey
Tol_bright <- c('#EE6677', '#228833', '#4477AA', '#CCBB44', '#66CCEE', '#AA3377', '#BBBBBB')
Tol_muted <- c('#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', '#CC6677', '#882255', '#AA4499', '#332288', '#DDDDDD')
Tol_light <- c('#BBCC33', '#AAAA00', '#77AADD', '#EE8866', '#EEDD88', '#FFAABB', '#99DDFF', '#44BB99', '#DDDDDD')
#Read a text file (comma separated values)
df_wide_example <- read.csv("Data_wide_example.csv", na.strings = "")
df_tidy_example <- read.csv("Data_tidy_example.csv", na.strings = "")
#######################################
###### Define the User interface #########
ui <- fluidPage(
titlePanel("PlotsOfData - Plots all Of the Data"),
sidebarLayout(
sidebarPanel(width=3,
conditionalPanel(
condition = "input.tabs=='Plot'",
radioButtons("jitter_type", "Data offset", choices = list("Beeswarm" = "beeswarm", "Random" = "random", "None (for small n)" = "none"), selected = "beeswarm"),
sliderInput("alphaInput", "Visibility of the data", 0, 1, 0.3),
# conditionalPanel(
# condition = "input.adjust_jitter == true",
# sliderInput("jitter_width", "Width:", 0,0.5,0.3),
# checkboxInput(inputId = "random_jitter", label = ("Randomize Jitter"), value = TRUE)
# ),
radioButtons("summaryInput", "Statistics", choices = list("Median" = "median", "Mean" = "mean", "Boxplot (minimal n=10)" = "boxplot", "Violin Plot (minimal n=10)" = "violin"), selected = "median"),
# sliderInput("Input_CI", "Confidence Level", 90, 100, 95),
checkboxInput(inputId = "add_CI", label = HTML("Add 95% CI
(minimal n=10)"), value = FALSE),
sliderInput("alphaInput_summ", "Visibility of the statistics", 0, 1, 1),
radioButtons(inputId = "ordered",
label= "Order of the data/statistics:",
choices = list("As supplied" = "none", "By median value" = "median", "By alphabet/number" = "alphabet"),
selected = "none"),
h4("Plot Layout"),
checkboxInput(inputId = "rotate_plot",
label = "Rotate plot 90 degrees",
value = FALSE),
checkboxInput(inputId = "no_grid",
label = "Remove gridlines",
value = FALSE),
checkboxInput(inputId = "adjust_scale",
label = "Adjust scale",
value = FALSE),
conditionalPanel(
condition = "input.adjust_scale == true",
textInput("range", "Range of values (min,max)", value = "0,2")),
checkboxInput("color_data", "Use color for the data", value=FALSE),
checkboxInput("color_stats", "Use color for the stats", value=FALSE),
conditionalPanel(
condition = "input.color_data == true || input.color_stats == true",
########## Choose color from list
#selectInput("colour_list", "Colour:", choices = ""),
radioButtons("adjustcolors", "Color palette:", choices = list("Standard" = 1,"Colorblind safe (bright)" = 2,"Colorblind safe (muted)" = 3,"Colorblind safe (light)" = 4, "User defined"=5) , selected = 1),
conditionalPanel(condition = "input.adjustcolors == 5",
textInput("user_color_list", "List of names or hexadecimal codes", value = "turquoise2,#FF2222,lawngreen"),
h5("",
a("Click here for more info on color names",
href = "http://www.endmemo.com/program/R/color.php", target="_blank"))
)),
numericInput("plot_height", "Height (# pixels): ", value = 480),
numericInput("plot_width", "Width (# pixels):", value = 480),
h4("Labels"),
checkboxInput(inputId = "add_title",
label = "Add title",
value = FALSE),
conditionalPanel(
condition = "input.add_title == true",
textInput("title", "Title:", value = "")
),
checkboxInput(inputId = "label_axes",
label = "Change labels",
value = FALSE),
conditionalPanel(
condition = "input.label_axes == true",
textInput("lab_x", "X-axis:", value = ""),
textInput("lab_y", "Y-axis:", value = "")),
checkboxInput(inputId = "adj_fnt_sz",
label = "Change font size",
value = FALSE),
conditionalPanel(
condition = "input.adj_fnt_sz == true",
numericInput("fnt_sz_ttl", "Size axis titles:", value = 24),
numericInput("fnt_sz_ax", "Size axis labels:", value = 18)),
conditionalPanel(
condition = "input.color_data == true || input.color_stats == true",
checkboxInput(inputId = "add_legend",
label = "Add legend",
value = FALSE))
),
conditionalPanel(
condition = "input.tabs=='Data upload'",
h4("Data upload"),
radioButtons(
"data_input", "",
choices =
list("Example 1 (wide format)" = 1,
"Example 2 (tidy format)" = 2,
"Upload file" = 3,
"Paste data" = 4)
,
selected = 1),
conditionalPanel(
condition = "input.data_input=='1'"
),
conditionalPanel(
condition = "input.data_input=='3'",
h5("Upload file: "),
fileInput("upload", "", multiple = FALSE),
selectInput("file_type", "Type of file:",
list("text (csv)" = "text",
"Excel" = "Excel"
),
selected = "text"),
conditionalPanel(
condition = "input.file_type=='text'",
radioButtons(
"upload_delim", "Delimiter",
choices =
list("Comma" = ",",
"Tab" = "\t",
"Semicolon" = ";",
"Space" = " "),
selected = ",")),
actionButton("submit_datafile_button",
"Submit datafile")),
conditionalPanel(
condition = "input.data_input=='4'",
h5("Paste data below:"),
tags$textarea(id = "data_paste",
placeholder = "Add data here",
rows = 10,
cols = 20, ""),
actionButton("submit_data_button", "Submit data"),
radioButtons(
"text_delim", "Delimiter",
choices =
list("Tab (from Excel)" = "\t",
"Space" = " ",
"Comma" = ",",
"Semicolon" = ";"),
selected = "\t")),
checkboxInput(inputId = "tidyInput",
label = "These data are Tidy",
value = FALSE),
conditionalPanel(
condition = "input.tidyInput==true",
h5("",
a("Click here for more info on tidy data",
href = "http://thenode.biologists.com/converting-excellent-spreadsheets-tidy-data/education/")),
selectInput("x_var", "Conditions to compare:", choices = ""),
selectInput("y_var", "Variables:", choices = "")
)
),
conditionalPanel(
condition = "input.tabs=='About'",
h4("About")
),
conditionalPanel(
condition = "input.tabs=='Data Summary'",
h4("Data summary")
)
),
mainPanel(
tabsetPanel(id="tabs",
tabPanel("Data upload", h4("Data as provided"), dataTableOutput("data_uploaded")),
tabPanel("Plot", downloadButton("downloadPlotPDF", "Download pdf-file"), downloadButton("downloadPlotPNG", "Download png-file"), plotOutput("coolplot")
),
tabPanel("Data Summary", tableOutput('data_summary')),
tabPanel("About", includeHTML("about.html")
)
)
)
)
)
#######################################
server <- function(input, output, session) {
#####################################
###### DATA INPUT ###################
df_upload <- reactive({
if (input$data_input == 1) {
data <- df_wide_example
} else if (input$data_input == 2) {
data <- df_tidy_example
} else if (input$data_input == 3) {
file_in <- input$upload
# Avoid error message while file is not uploaded yet
if (is.null(input$upload)) {
return(data.frame(x = "Select your datafile"))
} else if (input$submit_datafile_button == 0) {
return(data.frame(x = "Press 'submit datafile' button"))
} else {
isolate({
if (input$file_type == "text") {
data <- read_delim(file_in$datapath,
delim = input$upload_delim,
col_names = TRUE)
} else if (input$file_type == "Excel") {
data <- read_excel(file_in$datapath)
}
})
}
} else if (input$data_input == 4) {
if (input$data_paste == "") {
data <- data.frame(x = "Copy your data into the textbox,
select the appropriate delimiter, and
press 'Submit data'")
} else {
if (input$submit_data_button == 0) {
return(data.frame(x = "Press 'submit data' button"))
} else {
isolate({
data <- read_delim(input$data_paste,
delim = input$text_delim,
col_names = TRUE)
})
}
}
}
return(data)
})
#####################################
####################################
##### CONVERT TO TIDY DATA ##########
#Need to tidy the data?!
#Untidy data will be converted to long format with two columns named 'Condition' and 'Value'
#The input for "Condition" will be taken from the header, i.e. first row
#Tidy data will be used as supplied
df_upload_tidy <- reactive({
if(input$tidyInput == FALSE ) {
klaas <- gather(df_upload(), Condition, Value)
}
else if(input$tidyInput == TRUE ) {
klaas <- df_upload()
}
return(klaas)
})
###################################
####################################
##### Get the Variables ##############
observe({
var_names <- names(df_upload_tidy())
var_list <- c("none", var_names)
# updateSelectInput(session, "colour_list", choices = var_list)
updateSelectInput(session, "y_var", choices = var_list)
updateSelectInput(session, "x_var", choices = var_list)
})
###################################
###########################################################
######## Determine and set the order of the Conditions #######
df_sorted <- reactive({
# klaas <- df_upload_tidy()
klaas <- df_selected()
if(input$ordered == "median") {
klaas$Condition <- reorder(klaas$Condition, klaas$Value, median, na.rm = TRUE)
} else if (input$ordered == "none") {
klaas$Condition <- factor(klaas$Condition, levels=unique(klaas$Condition))
} else if (input$ordered == "alphabet") {
klaas$Condition <- factor(klaas$Condition, levels=unique(sort(klaas$Condition)))
}
return(klaas)
})
###########################################################
###########################################################
######## Extract the data for display & summary stats #######
df_selected <- reactive({
if(input$tidyInput == TRUE ) {
df_temp <- df_upload_tidy()
x_choice <- input$x_var
y_choice <- input$y_var
koos <- df_temp %>% select(Condition = !!x_choice , Value = !!y_choice) %>% filter(!is.na(Value))
} else if (input$tidyInput == FALSE ) {
koos <- df_upload_tidy() %>% filter(!is.na(Value))
}
return(koos)
})
###########################################################
#############################################################
#### DISPLAY UPLOADED DATA (exactly as provided) ##################
output$data_uploaded <- renderDataTable({
# observe({ print(input$tidyInput) })
df_upload()
})
#############################################################
##################################################
#### Caluclate Summary of the DATA for the MEAN ####
df_summary_mean <- reactive({
koos <- df_selected()
koos %>%
group_by(Condition) %>%
summarise(n = n(),
mean = mean(Value, na.rm = TRUE),
median = median(Value, na.rm = TRUE),
sd = sd(Value, na.rm = TRUE)) %>%
mutate(sem = sd / sqrt(n - 1),
CI_lo = mean + qt((1-Confidence_level)/2, n - 1) * sem,
CI_hi = mean - qt((1-Confidence_level)/2, n - 1) * sem)
})
#################################################
####################################################
#### Caluclate Summary of the DATA for the Median ####
df_summary_median <- reactive({
kees <- df_selected()
# df_booted <- data.frame(Condition=levels(factor(kees$Condition)), n=tapply(kees$Value, kees$Condition, length), median=tapply(kees$Value, kees$Condition, median))
df_booted <- kees %>%
group_by(Condition) %>%
summarise(n= n(),
median = median(Value, na.rm = TRUE),
MAD= mad(Value, na.rm = TRUE, constant=1))
i=0
df_new_medians <- data.frame(Condition=levels(factor(kees$Condition)), resampled_median=tapply(kees$Value, kees$Condition, boot_median))
#Perform the resampling nsteps number of times (typically 1,000-10,000x)
for (i in 1:nsteps) {
#Caclulate the median from a boostrapped sample (resampled_median) and add to the dataframe
df_boostrapped_median <- data.frame(Condition=levels(factor(kees$Condition)), resampled_median=tapply(kees$Value, kees$Condition, boot_median))
#Add the new median to a datafram that collects all the resampled median values
df_new_medians <- bind_rows(df_new_medians, df_boostrapped_median)
}
df_booted$CI_lo <- tapply(df_new_medians$resampled_median, df_new_medians$Condition, quantile, probs=lower_percentile)
df_booted$CI_hi <- tapply(df_new_medians$resampled_median, df_new_medians$Condition, quantile, probs=upper_percentile)
# observe({ print(df_booted) })
return(df_booted)
})
###################################################
##################################################
#### Caluclate Summary of the DATA for Box (&Violin) ####
df_summary_box <- reactive({
df_selected() %>%
group_by(Condition) %>%
summarise(n = n(),
mean = mean(Value),
SD = sd(Value),
median = median(Value),
MAD = mad(Value, constant=1),
IQR = IQR(Value))
})
#################################################
###########################################
######### DEFINE DOWNLOAD BUTTONS ###########
##### Set width and height of the plot area
width <- reactive ({ input$plot_width })
height <- reactive ({ input$plot_height })
output$downloadPlotPDF <- downloadHandler(
filename <- function() {
paste("ComparisonPlot_", Sys.time(), ".pdf", sep = "")
},
content <- function(file) {
ggsave(file, width = input$plot_width/72,
height = input$plot_height/72, dpi="retina")
},
contentType = "application/pdf" # MIME type of the image
)
output$downloadPlotPNG <- downloadHandler(
filename <- function() {
paste("ComparisonPlot_", Sys.time(), ".png", sep = "")
},
content <- function(file) {
ggsave(file, width = input$plot_width/72,
height = input$plot_height/72)
},
contentType = "application/png" # MIME type of the image
)
###########################################
###########################################
######## PREPARE PLOT FOR DISPLAY ##########
###########################################
output$coolplot <- renderPlot(width = width, height = height, {
####### Read the order from the ordered dataframe #############
koos <- df_sorted()
custom_order <- levels(factor(koos$Condition))
# observe({ print(custom_order) })
########## Define alternative color palettes ##########
newColors <- NULL
if (input$adjustcolors == 2) {
newColors <- Tol_bright
} else if (input$adjustcolors == 3) {
newColors <- Tol_muted
} else if (input$adjustcolors == 4) {
newColors <- Tol_light
} else if (input$adjustcolors == 5) {
newColors <- gsub("\\s","", strsplit(input$user_color_list,",")[[1]])
}
######## Repeat the colors, if number of colors < number of conditions
klaas <- df_selected()
max_colors <- nlevels(as.factor(klaas$Condition))
if(length(newColors) < max_colors) {
newColors<-rep(newColors,times=(round(max_colors/length(newColors)))+1)
}
########## Set default to Plotting "Condition" and "Value"
if (input$x_var == "none") {
x_choice <- "Condition"
} else if (input$x_var != "none") {
x_choice <- as.character(input$x_var)
}
if (input$y_var == "none") {
y_choice <- "Value"
} else if (input$y_var != "none") {
y_choice <- as.character(input$y_var)
}
########## Define if color is used for the data
# observe({ print(class(input$colour_list)) })
if (input$color_data == FALSE) {
kleur <- NULL
} else if (input$color_data == TRUE) {
# kleur <- as.character(input$colour_list)
kleur <- x_choice
}
########## Define if/how color is used for the stats ############
# observe({ print(class(input$colour_list)) })
if (input$color_stats == FALSE) {
kleur_stats <- NULL
} else if (input$color_stats == TRUE && input$summaryInput == "boxplot") {
kleur_stats <- x_choice
} else if (input$color_stats == TRUE && input$summaryInput == "violin") {
kleur_stats <- x_choice
} else if (input$color_stats == TRUE) {
kleur_stats <- "Condition"
}
########## Define minimal n - only plot box/violinplots for min_n>9
df_temp <- df_summary_median()
min_n <- min(df_temp$n)
###############################################
############## GENERATE PLOT LAYERS #############
p <- ggplot(data=df_selected(), aes_string(x="Condition"))
# Setting the order of the x-axis
p <- p + scale_x_discrete(limits=custom_order)
##### plot selected data summary (bottom layer) ####
if (input$summaryInput == "boxplot" && min_n>9) {
p <- p + geom_boxplot(data=df_upload_tidy(), aes_string(x=x_choice, y=y_choice, fill=kleur_stats), notch = input$add_CI, outlier.color=NA, width=0.8, size=0.5, alpha=input$alphaInput_summ)
} else if (input$summaryInput == "violin" && min_n>9) {
p <- p + geom_violin(data=df_upload_tidy(), aes_string(x=x_choice, y=y_choice, fill=kleur_stats),scale = "width", draw_quantiles = c(0.5), width=0.8, size=0.5, alpha=input$alphaInput_summ)
if (input$add_CI == TRUE) {
p <- p + geom_linerange(data=df_summary_median(), aes_string(x="Condition", ymin = "CI_lo", ymax = "CI_hi"), colour="black", size =3,alpha=input$alphaInput_summ)
}
}
#### plot individual measurements (middle layer) ####
if (input$jitter_type == "beeswarm") {
p <- p + geom_quasirandom(data=df_upload_tidy(), aes_string(x=x_choice, y=y_choice, colour = kleur), varwidth = TRUE, cex=3, alpha=input$alphaInput)
} else if (input$jitter_type == "random") {
p <- p + geom_jitter(data=df_upload_tidy(), aes_string(x=x_choice, y=y_choice, colour = kleur), width=0.3, height=0.0, cex=3, alpha=input$alphaInput)
} else if (input$jitter_type == "none") {
p <- p + geom_jitter(data=df_upload_tidy(), aes_string(x=x_choice, y=y_choice, colour = kleur), width=0, height=0.0, cex=3, alpha=input$alphaInput)
}
##### plot selected data summary (top layer) ####
if (input$summaryInput == "median" && input$add_CI == TRUE && min_n>9) {
p <- p + geom_point(data=df_summary_median(), aes_string(x="Condition", y = "median", colour=kleur_stats), shape = 21,fill=NA,size = 8,alpha=input$alphaInput_summ)+
geom_linerange(data=df_summary_median(), aes_string(x="Condition", ymin = "CI_lo", ymax = "CI_hi", colour=kleur_stats), size =3,alpha=input$alphaInput_summ)
}
else if (input$summaryInput == "median" && input$add_CI == FALSE || min_n<10) {
p <- p + geom_errorbar(data=df_summary_median(), aes_string(x="Condition", ymin="median", ymax="median", colour = kleur_stats), width=.8, size=2, alpha=input$alphaInput_summ)
} else if (input$summaryInput == "mean" && input$add_CI == TRUE && min_n>9) {
p <- p + geom_linerange(data=df_summary_mean(), aes_string(x="Condition", ymin = "CI_lo", ymax = "CI_hi", colour=kleur_stats), size =3,alpha=input$alphaInput_summ)+
geom_point(data=df_summary_mean(), aes_string(x="Condition", y = "mean", colour=kleur_stats), shape = 21,fill=NA,size = 8,alpha=input$alphaInput_summ)
} else if (input$summaryInput == "mean" && input$add_CI == FALSE || min_n<10) {
p <- p + geom_errorbar(data=df_summary_mean(), aes_string(x="Condition", ymin="mean", ymax="mean", colour=kleur_stats), width=.8, size=2, alpha=input$alphaInput_summ)
}
########### Do some formatting of the lay-out
p <- p+ theme_light(base_size = 16)
#### If selected, rotate plot 90 degrees CW ####
if (input$rotate_plot == TRUE) { p <- p + coord_flip()}
# if the range of values is specified
if (input$adjust_scale == TRUE) {
rng <- as.numeric(strsplit(input$range,",")[[1]])
p <- p + ylim(rng[1],rng[2])}
# if title specified
if (input$add_title)
p <- p + ggtitle(input$title)
# if labels specified
if (input$label_axes)
p <- p + labs(x = input$lab_x, y = input$lab_y)
# if font size is adjusted
if (input$adj_fnt_sz) {
p <- p + theme(axis.text = element_text(size=input$fnt_sz_ax))
p <- p + theme(axis.title = element_text(size=input$fnt_sz_ttl))
}
#remove legend (if selected)
if (input$add_legend == FALSE) {
p <- p + theme(legend.position="none")
}
#remove gridlines (if selected)
if (input$no_grid == TRUE) {
p <- p+ theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
}
if (input$adjustcolors >1) {
p <- p+ scale_color_manual(values=newColors)
p <- p+ scale_fill_manual(values=newColors)
}
### Output the plot ######
p
}) #close output$coolplot
###########################################
#### Render the data summary as a table ###########
output$data_summary <- renderTable({
df_out <- NULL
if (input$summaryInput == "mean") {
df_out <- df_summary_mean()
df_out$median <- NULL
} else if (input$summaryInput == "median") {
df_out <- df_summary_median()
} else if (input$summaryInput == "boxplot") {
df_out <- df_summary_box()
} else if (input$summaryInput == "violin") {
df_out <- df_summary_box()
}
return(df_out)
})
###########################################
} #close "server"
shinyApp(ui = ui, server = server)