#### R - plots and analyses #### ## Plot geographic, PCA and structure analysis data { setwd("[Your_working_directory]") d <- read.delim("Supplemental_file_1") ## load libraries library(ggplot2) # set graphics parameters alpha = 0.8 font.size = 24 point.size = 5 ### simple theme with no gridlines theme <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text=element_text(size=font.size), legend.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold"), legend.title=element_text(size=font.size, face="bold"), legend.background = element_rect(fill="transparent"), legend.position = c(0.2,0.8) ) ## plot PCA colored by population structure groups #pop1 filename <- paste(Sys.Date(),"_pca_pop1.pdf", sep="") pdf(filename) pv <- ggplot(data=d,aes(x=-PC1,y=-PC2)) pv + geom_point(aes(x=-PC1,y=-PC2,col=Pop1),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="Pop1") + scale_color_gradient(low="white", high="#FF3300") + coord_flip() + theme dev.off() # pop2 filename <- paste(Sys.Date(),"_pca_pop2.pdf", sep="") pdf(filename) pv <- ggplot(data=d,aes(x=-PC1,y=-PC2)) pv + geom_point(aes(x=-PC1,y=-PC2,col=Pop2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="Pop2") + scale_color_gradient(low="white", high="#33FF00") + coord_flip() + theme dev.off() #pop3 filename <- paste(Sys.Date(),"_pca_pop3.pdf", sep="") pdf(filename) pv <- ggplot(data=d,aes(x=-PC1,y=-PC2)) pv + geom_point(aes(x=-PC1,y=-PC2,col=Pop3),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="Pop3") + scale_color_gradient(low="white", high="#0099FF") + coord_flip() + theme dev.off() ## plot PCA colored by latitude and longitude #Latitude filename <- paste(Sys.Date(),"_pca_latitude.pdf", sep="") pdf(filename) pv <- ggplot(data=d,aes(x=-PC1,y=-PC2)) pv + geom_point(aes(x=-PC1,y=-PC2,col=Latitude),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="Latitude") + scale_color_gradientn(colours = rainbow(5)) + coord_flip() + theme dev.off() #Longitude filename <- paste(Sys.Date(),"_pca_longitude.pdf", sep="") pdf(filename) pv <- ggplot(data=d,aes(x=-PC1,y=-PC2)) pv + geom_point(aes(x=-PC1,y=-PC2,col=Longitude),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="Longitude") + scale_color_gradientn(colours = rainbow(5)) + coord_flip() + theme #+geom_text(aes(label=V4), size =3, hjust=1, vjust=0) dev.off() ## plot PCA colored by PC1 and PC2 #PC1 filename <- paste(Sys.Date(),"_pca_PC1.pdf", sep="") pdf(filename) pv <- ggplot(data=d,aes(x=PC1,y=PC2)) pv + geom_point(aes(x=-PC1,y=-PC2,col=-PC1),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="PC1") + scale_color_gradientn(colours = rainbow(2)) + coord_flip() + theme dev.off() #PC2 filename <- paste(Sys.Date(),"_pca_PC2.pdf", sep="") pdf(filename) pv <- ggplot(data=d,aes(x=PC1,y=PC2)) pv + geom_point(aes(x=-PC1,y=-PC2,col=-PC2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="PC2") + scale_color_gradientn(colours = rainbow(2)) + coord_flip() + theme #+geom_text(aes(label=V4), size =3, hjust=1, vjust=0) dev.off() ## Plot maps library(ggplot2) #library(ggmap) library(maps) library(mapdata) ## Prepare map of Japan japan <- map_data("japan") dim(japan) head(japan) gg1 <- ggplot() + geom_polygon(data = japan, aes(x=long, y = lat, group = group), fill = "grey", color = "grey") + coord_fixed(1.3) ## plot map colored by population structure groups filename <- paste(Sys.Date(),"_map_pop1.pdf", sep="") pdf(filename) gg.pop1 <- gg1 + geom_point(data=d, aes(x=Longitude,y=Latitude,col=Pop1),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop1") + scale_color_gradient(low="white", high="#FF3300") + theme gg.pop1 dev.off() filename <- paste(Sys.Date(),"_map_pop2.pdf", sep="") pdf(filename) gg.pop2 <- gg1 + geom_point(data=d, aes(x=Longitude,y=Latitude,col=Pop2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop2") + scale_color_gradient(low="white", high="#33FF00") + theme gg.pop2 dev.off() filename <- paste(Sys.Date(),"_map_pop3.pdf", sep="") pdf(filename) gg.pop3 <- gg1 + geom_point(data=d, aes(x=Longitude,y=Latitude,col=Pop3),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop3") + scale_color_gradient(low="white", high="#0099FF") + theme gg.pop3 dev.off() ## plot map colored by PC1 and PC2 filename <- paste(Sys.Date(),"_map_PC1.pdf", sep="") pdf(filename) gg.pc1 <- gg1 + geom_point(data=d, aes(x=Longitude,y=Latitude,col=-PC1),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="PC1") + scale_color_gradientn(colours = rainbow(2)) + theme gg.pc1 dev.off() filename <- paste(Sys.Date(),"_map_PC2.pdf", sep="") pdf(filename) gg.pc2 <- gg1 + geom_point(data=d, aes(x=Longitude,y=Latitude,col=-PC2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="PC2") + scale_color_gradientn(colours = rainbow(2)) + theme gg.pc2 dev.off() filename <- paste(Sys.Date(),"_map_PC2.zoom.pdf", sep="") pdf(filename, 10, 10) gg.pc2.zoom <- gg1 + geom_point(data=d, aes(x=Longitude,y=Latitude,col=-PC2),na.rm=TRUE,size=3,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="PC2") + scale_color_gradientn(colours = rainbow(2)) + theme gg.pc2.zoom dev.off() } ### Plot PSMC graphs { ### Plot examples of PSMC curves { # set graphics parameters alpha = 0.8 font.size = 24 point.size = 5 # load libraries library(ggplot2) ### simple theme with no gridlines theme <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text=element_text(size=font.size), legend.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold"), legend.title=element_text(size=font.size, face="bold"), legend.position = "none" ) # list of accessions analysed accessions <- c("MG-001", "MG-005", "MG-008", "MG-010", "MG-019", "MG-027", "MG-036", "MG-042", "MG-053", "MG-063", "MG-068", "MG-076", "MG-081", "MG-085", "MG-089", "MG-101", "MG-111", "MG-113", "MG-128", "MG-130", "MG-146", "MG-154", "MG017", "MG024", "MG028", "MG035", "MG039", "MG069", "MG078", "MG106", "MG116", "MG121", "MG125", "MG138", "MG141", "MG144", "MG-003", "MG-007", "MG-009", "MG-018", "MG-030", "MG-040", "MG-044", "MG-051", "MG-056", "MG-066", "MG-073", "MG-082", "MG-086", "MG-096", "MG-107", "MG-112", "MG-120", "MG-129", "MG-142", "MG-152", "MG013", "MG022", "MG025", "MG034", "MG038", "MG045", "MG074", "MG084", "MG110", "MG117", "MG124", "MG126", "MG140", "MG143") #get the PSMC output file names setwd("[PSMC_output_directory]") file.names <- dir(pattern="*.0.txt") ### plot PSMC graphs for each accession against all others plots = list() #loop over all accessions used for (i in 1: length(accessions)) { #Subset filenames to get the one versus all comparison file.names.sub <- grep(accessions[i],file.names, fixed=TRUE, value =TRUE) #Get the PSMC data for the filenames in the list setwd("/Volumes/GenomeDK/LotusGenome/faststorage/tomomi/psmc/psmc_out_201808/") j=1 my_list <- list() for (file in file.names.sub) { d <- read.delim(file, header=FALSE) d$file <- rep(file, nrow(d)) my_list[[j]] <- d j=j+1 } #combine in one big data frame big_data = do.call(what = rbind, args = my_list) nrow(big_data) d.sub <- big_data #produce the PSMC curve plot gg <- ggplot(data=d.sub, aes(x=V1, y=V2, color=file)) + geom_line() + scale_x_continuous(breaks=c(1e1,1e2,1e3,1e4,1e5,1e6),trans='log10') + scale_y_continuous(trans='log10')+ geom_hline(yintercept = 5) + annotation_logticks(sides = "lb") + theme plots[[i]] <- gg #save the plot setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC/graphs") filename <- paste(Sys.Date(), "_", accessions[i], "_PSMC_curves.pdf", sep="") pdf(filename, 12, 8) print(plots[[i]]) dev.off() } } ## plot PSMC graphs for the main figure { ## set colors col1="#8c510a" col2="#bf812d" col3="#dfc27d" col4="#f6e8c3" col5="#35978f" col6="#80cdc1" # First a custom plot for MG-063 { # list of accessions analysed accessions <- c("MG-063") #accessions excluded because of diverging population histories excluded <- c("Gifu", "MG-080", "MG-023", "MG-049") #make regular expression to match excluded accssions excluded.pattern <- paste(excluded,collapse="|") setwd("/Volumes/GenomeDK/LotusGenome/faststorage/tomomi/psmc/psmc_out_201808/") file.names <- dir(pattern="*.0.txt") ### plot PSMC graphs for each accession against all others plots = list() #loop over all accessions used #Subset filenames to get the one versus all comparison file.names.sub <- grep(accessions,file.names, fixed=TRUE, value =TRUE) #Remove excluded accessions file.names.excluded <- grep(excluded.pattern, file.names.sub, fixed=FALSE, value=TRUE) '%ni%' <- Negate('%in%') file.names.sub <- file.names.sub[file.names.sub %ni% file.names.excluded] #Get the PSMC data for the filenames in the list setwd("[working dir]") j=1 my_list <- list() for (file in file.names.sub) { d <- read.delim(file, header=FALSE) d$file <- rep(file, nrow(d)) my_list[[j]] <- d j=j+1 } #combine in one big data frame big_data = do.call(what = rbind, args = my_list) nrow(big_data) d.sub <- big_data #produce the PSMC curve plot group1 <- c("MG024", "MG-027", "MG-068", "MG025", "MG028", "MG022") group1.pattern <- paste(group1,collapse="|") d.sub1 <- d.sub[grep(group1.pattern, d.sub$file),] group2 <- c("MG138", "MG056", "MG142", "MG144", "MG143", "MG063", "MG140", "MG141", "MG106", "MG066", "MG074", "MG003") group2.pattern <- paste(group2,collapse="|") d.sub2 <- d.sub[grep(group2.pattern, d.sub$file),] group3 <- c("MG126", "MG085", "MG125", "MG124", "MG045", "MG084", "MG082", "MG044", "MG086", "MG007", "MG010", "MG129", "MG042", "MG009", "MG039", "MG040", "MG038", "MG035", "MG096", "MG034", "MG030") group3.pattern <- paste(group3,collapse="|") d.sub3 <- d.sub[grep(group3.pattern, d.sub$file),] color_scale <- c("MG-063.sub_sampling.MG-027.sub_sampling.fq.gz.0.txt" = col1, "MG-063.sub_sampling.MG-027.sub_sampling.fq.gz.0.txt" = col1, "MG-063.sub_sampling.MG-068.sub_sampling.fq.gz.0.txt" = col1, "MG-063.sub_sampling.MG022.fq.gz.0.txt" = col1, "MG-063.sub_sampling.MG024.fq.gz.0.txt" = col1, "MG-063.sub_sampling.MG025.fq.gz.0.txt" = col1, "MG-063.sub_sampling.MG028.fq.gz.0.txt" = col1, "MG-063.sub_sampling.MG074.fq.gz.0.txt" = col3, "MG-063.sub_sampling.MG106.fq.gz.0.txt" = col3, "MG-063.sub_sampling.MG138.fq.gz.0.txt" = col3, "MG-063.sub_sampling.MG140.fq.gz.0.txt" = col3, "MG-063.sub_sampling.MG141.fq.gz.0.txt" = col3, "MG-063.sub_sampling.MG143.fq.gz.0.txt" = col3, "MG-063.sub_sampling.MG144.fq.gz.0.txt" = col3, "MG-063.sub_sampling.MG035.fq.gz.0.txt" = "light blue", "MG-063.sub_sampling.MG038.fq.gz.0.txt" = "light blue", "MG-063.sub_sampling.MG039.fq.gz.0.txt" = "light blue", "MG-063.sub_sampling.MG045.fq.gz.0.txt" = "light blue", "MG-063.sub_sampling.MG084.fq.gz.0.txt" = "light blue", "MG-063.sub_sampling.MG124.fq.gz.0.txt" = "light blue", "MG-063.sub_sampling.MG125.fq.gz.0.txt" = "light blue", "MG-063.sub_sampling.MG126.fq.gz.0.txt" = "light blue", "MG034.sub_sampling.MG-063.sub_sampling.fq.gz.0.txt" = "light blue" ) gg <- ggplot() + geom_line(data=d.sub1, aes(x=V1, y=V2, color=file)) + geom_line(data=d.sub2, aes(x=V1, y=V2, color=file)) + geom_line(data=d.sub3, aes(x=V1, y=V2, color=file)) + scale_color_manual(values=color_scale) + scale_x_continuous(breaks=c(1e1,1e2,1e3,1e4,1e5,1e6),trans='log10') + scale_y_continuous(trans='log10')+ geom_hline(yintercept = 5) + annotation_logticks(sides = "lb") + theme + theme(legend.position="none") + xlim(500,1e6) gg #save the plot setwd("[working dir]") filename <- paste(Sys.Date(), "_", accessions, "_PSMC_curves.pdf", sep="") pdf(filename, 12, 8) gg dev.off() } # Then for for MG-126 { # list of accessions analysed accessions <- c("MG126") #accessions excluded because of diverging population histories excluded <- c("Gifu", "MG-080", "MG-023", "MG-049") #make regular expression to match excluded accssions excluded.pattern <- paste(excluded,collapse="|") setwd("[working dir]") file.names <- dir(pattern="*.0.txt") ### plot PSMC graphs for each accession against all others plots = list() #Subset filenames to get the one versus all comparison file.names.sub <- grep(accessions,file.names, fixed=TRUE, value =TRUE) #Remove excluded accessions file.names.excluded <- grep(excluded.pattern, file.names.sub, fixed=FALSE, value=TRUE) '%ni%' <- Negate('%in%') file.names.sub <- file.names.sub[file.names.sub %ni% file.names.excluded] #Get the PSMC data for the filenames in the list setwd("[working dir]") j=1 my_list <- list() for (file in file.names.sub) { d <- read.delim(file, header=FALSE) d$file <- rep(file, nrow(d)) my_list[[j]] <- d j=j+1 } #combine in one big data frame big_data = do.call(what = rbind, args = my_list) nrow(big_data) d.sub <- big_data #produce the PSMC curve plot group1 <- c("MG024", "MG-027", "MG-068", "MG025", "MG028", "MG022") group1.pattern <- paste(group1,collapse="|") d.sub1 <- d.sub[grep(group1.pattern, d.sub$file),] group2 <- c("MG138", "MG056", "MG142", "MG144", "MG143", "MG063", "MG140", "MG141", "MG106", "MG066", "MG074", "MG003") group2.pattern <- paste(group2,collapse="|") d.sub2 <- d.sub[grep(group2.pattern, d.sub$file),] group3 <- c("MG085", "MG125", "MG124", "MG045", "MG084", "MG082", "MG044", "MG086", "MG007", "MG010", "MG129", "MG042", "MG009", "MG039", "MG040", "MG038", "MG035", "MG096", "MG034", "MG030") group3.pattern <- paste(group3,collapse="|") d.sub3 <- d.sub[grep(group3.pattern, d.sub$file),] color_scale <- c("MG-027.sub_sampling.MG126.fq.gz.0.txt" = col1, "MG-068.sub_sampling.MG126.fq.gz.0.txt" = col1, "MG022.MG126.fq.gz.0.txt" = col1, "MG024.MG126.fq.gz.0.txt" = col1, "MG025.MG126.fq.gz.0.txt" = col1, "MG028.MG126.fq.gz.0.txt" = col1, "MG074.MG126.fq.gz.0.txt" = col3, "MG106.MG126.fq.gz.0.txt" = col3, "MG138.MG126.fq.gz.0.txt" = col3, "MG140.MG126.fq.gz.0.txt" = col3, "MG141.MG126.fq.gz.0.txt" = col3, "MG143.MG126.fq.gz.0.txt" = col3, "MG144.MG126.fq.gz.0.txt" = col3, "MG034.sub_sampling.MG126.fq.gz.0.txt" = "light blue", "MG035.MG126.fq.gz.0.txt" = "light blue", "MG038.MG126.fq.gz.0.txt" = "light blue", "MG039.MG126.fq.gz.0.txt" = "light blue", "MG045.MG126.fq.gz.0.txt" = "light blue", "MG084.MG126.fq.gz.0.txt" = "light blue", "MG124.MG126.fq.gz.0.txt" = "light blue", "MG125.MG126.fq.gz.0.txt" = "light blue" ) gg <- ggplot() + geom_line(data=d.sub1, aes(x=V1, y=V2, color=file)) + geom_line(data=d.sub2, aes(x=V1, y=V2, color=file)) + geom_line(data=d.sub3, aes(x=V1, y=V2, color=file)) + scale_color_manual(values=color_scale) + scale_x_continuous(breaks=c(1e1,1e2,1e3,1e4,1e5,1e6),trans='log10') + scale_y_continuous(trans='log10')+ geom_hline(yintercept = 5) + annotation_logticks(sides = "lb") + theme + theme(legend.position="none") gg #save the plot setwd("[working dir]") filename <- paste(Sys.Date(), "_", accessions, "_PSMC_curves.pdf", sep="") pdf(filename, 12, 8) gg dev.off() } } ## Plot the relevant accessions on the map colored by pop structure { ## read accession metadata setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180605_stig_analysis/geo_pca_struc") d <- read.delim("20180612_137LjAcessionData.txt") # set graphics parameters alpha = 0.8 font.size = 24 point.size = 7 # load libraries library(ggplot2) #library(ggmap) library(maps) library(mapdata) ### simple theme with no gridlines theme <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text=element_text(size=font.size), legend.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold"), legend.title=element_text(size=font.size, face="bold"), legend.position = "none" ) ## generate that map of Japan japan <- map_data("japan") dim(japan) head(japan) gg1 <- ggplot() + geom_polygon(data = japan, aes(x=long, y = lat, group = group), fill = "grey", color = "grey") + coord_fixed(1.3) gg1 ### plot the different groups on the map pop1 <- c("MG024","MG027","MG068","MG025","MG028","MG022") d.sub <- subset(d, Accession %in% pop1) gg.pop1 <- gg1 + geom_point(data=d.sub, aes(x=Longitude,y=Latitude,col=Pop1),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop1") + scale_color_gradient(low="white", high="#FF3300", limits=c(0,1)) + theme gg.pop1 filename <- paste(Sys.Date(),"_pop1_psmc.png", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC") png(filename, 5, 5, units = "in", res = 200) gg.pop1 dev.off() south.east <- c("MG138","MG056","MG142","MG144","MG143","MG063", "MG140", "MG141", "MG106", "MG066", "MG074") d.sub <- subset(d, Accession %in% south.east) gg.south.east <- gg1 + geom_point(data=d.sub, aes(x=Longitude,y=Latitude, col=Pop2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop2") + scale_color_gradient(low="white", high="#29cc00",limits=c(0,1)) + theme gg.south.east filename <- paste(Sys.Date(),"_south.east_psmc.png", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC") png(filename, 5, 5, units = "in", res = 200) gg.south.east dev.off() south <- c("MG003", "MG001","MG069","MG053","MG008","MG130","MG053") d.sub <- subset(d, Accession %in% south) gg.south <- gg1 + geom_point(data=d.sub, aes(x=Longitude,y=Latitude, col=Pop2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop2") + scale_color_gradient(low="white", high="#29cc00",limits=c(0,1)) + theme gg.south filename <- paste(Sys.Date(),"_south_psmc.png", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC") png(filename, 5, 5, units = "in", res = 200) gg.south dev.off() central.pop2 <- c("MG110","MG111") central.pop3 <- c("MG112") d.sub.pop2 <- subset(d, Accession %in% central.pop2) d.sub.pop3 <- subset(d, Accession %in% central.pop3) gg.central <- gg1 + geom_point(data=d.sub.pop2, aes(x=Longitude,y=Latitude, col=Pop2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop2") + scale_color_gradient(low="white", high="#29cc00",limits=c(0,1)) + geom_point(data=d.sub.pop3, aes(x=Longitude,y=Latitude), col="#0099FF",na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + theme gg.central filename <- paste(Sys.Date(),"_central_psmc.png", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC") png(filename, 5, 5, units = "in", res = 200) gg.central dev.off() tsushima <- c("MG152","MG154") d.sub <- subset(d, Accession %in% tsushima) gg.tsushima <- gg1 + geom_point(data=d.sub, aes(x=Longitude,y=Latitude, col=Pop1),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop2") + scale_color_gradient(low="white", high="#FF3300", limits=c(0,1)) + theme gg.tsushima filename <- paste(Sys.Date(),"_tsushima_psmc.png", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC") png(filename, 5, 5, units = "in", res = 200) gg.tsushima dev.off() #pop3 pop3 <- c("MG076" , "MG017" , "MG018" , "MG019" , "MG013" , "MG005" , "MG078" , "MG116" , "MG128" , "MG121" , "MG120" , "MG081" , "MG126" , "MG085" , "MG125" , "MG124" , "MG045" , "MG084" , "MG082" , "MG044" , "MG086" , "MG007" , "MG010" , "MG129" , "MG042" , "MG009" , "MG039" , "MG040" , "MG038" , "MG035" , "MG096" , "MG034" , "MG030") d.sub <- subset(d, Accession %in% pop3) gg.pop3 <- gg1 + geom_point(data=d.sub, aes(x=Longitude,y=Latitude, col=Pop3),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color="Pop2") + scale_color_gradient(low="white", high="#0099FF", limits=c(0,1)) + theme gg.pop3 filename <- paste(Sys.Date(),"_pop3_psmc.png", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC") png(filename, 5, 5, units = "in", res = 200) gg.pop3 dev.off() } ## Plot the relevant accessions on the PCA chart colored by pop structure { setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180605_stig_analysis/geo_pca_struc") d <- read.delim("20180612_137LjAcessionData.txt") # set graphics parameters alpha = 0.8 font.size = 24 point.size = 7 # load libraries library(ggplot2) # prepare the basic PCA plot with white dots gg1 <- ggplot(data=d,aes(x=-PC1,y=-PC2)) gg1 <- gg1 + geom_point(aes(x=-PC1,y=-PC2,col=Pop1), col="white",na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2") + coord_flip() + theme gg1 # Add the central Kyushu and Tshushima lines to the plot central.pop2 <- c("MG110","MG111") central.pop3 <- c("MG112") tsushima <- c("MG152","MG154") d.sub.pop2 <- subset(d, Accession %in% central.pop2) d.sub.pop3 <- subset(d, Accession %in% central.pop3) d.tsushima <- subset(d, Accession %in% tsushima) gg.central <- gg1 + geom_point(data=d.sub.pop2, aes(x=-PC1,y=-PC2, col=Pop2),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color="Pop2") + scale_color_gradient(low="white", high="#29cc00",limits=c(0,1)) + geom_point(data=d.sub.pop3, aes(x=-PC1,y=-PC2), col="#0099FF",na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + geom_point(data=d.tsushima, aes(x=-PC1,y=-PC2), col="#ff9980",na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + theme gg.central filename <- paste(Sys.Date(),"_central_tsushima_PCA.png", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180808_sua_PSMC") png(filename, 5, 5, units = "in", res = 200) gg.central dev.off() } } ### Plot and compare GWAS phenotype data { ## read data setwd("[Your_working_directory]") d <- read.delim("Supplemental_file_1") colnames(d) ## load libraries library(ggplot2) # set graphics parameters alpha=0.6 font.size = 20 point.size = 5 ### simple theme with no gridlines theme <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text=element_text(size=font.size), legend.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold"), legend.title=element_text(size=font.size, face="bold"), legend.background = element_rect(fill="transparent"), legend.position = c(0.2,0.8) ) ## Plot map colored by gwas library(ggplot2) library(maps) library(mapdata) japan <- map_data("japan") dim(japan) head(japan) gg1 <- ggplot() + geom_polygon(data = japan, aes(x=long, y = lat, group = group), fill = "grey", color = "grey") + coord_fixed(1.3) ## plot PCA colored by GWAS phenoytpe data plots.pca = list() plots.map = list() for (i in 22:ncol(d) ){ d.sub <- subset(d, is.finite(d[,i])) gwas <- d.sub[,i] gwas.name <-colnames(d)[i] pv <- ggplot(data=d.sub,aes(x=PC1,y=PC2)) pv <- pv + geom_point(aes(x=-PC1,y=-PC2,col=gwas),na.rm=TRUE,size=point.size,alpha=alpha, shape=16) + labs(x="PC1",y="PC2",color=gwas.name) + scale_color_gradientn(colours = rainbow(2),na.value="light grey") + coord_flip() + theme plots.pca[[i]] = pv filename <- paste(Sys.Date(),"_",gwas.name,"_pca.pdf", sep="") pdf(filename) print(plots.pca[[i]]) dev.off() # plot maps colored by GWAS phenotype data filename <- paste(Sys.Date(),"_", gwas.name, "_map.pdf", sep="") gg.long <- gg1 + geom_point(data=d.sub, aes(x=Longitude,y=Latitude,col=d.sub[,i]),size=point.size,alpha=alpha, shape=16) + labs(x="Longitude",y="Latitude",color=gwas.name) + scale_color_gradientn(colours = rainbow(2), na.value="light grey") + theme plots.map[[i]] <- gg.long pdf(filename) print(plots.map[[i]]) dev.off() } # Make interactive plots library(plotly) i=15 d.sub <- subset(d, is.finite(d[,i])) gwas <- d.sub[,i] gwas.name <-colnames(d)[i] p.map <- plot_ly(d.sub, x = ~Longitude, y = ~Latitude, type = 'scatter', mode = 'markers', text = ~paste('ID: ', Accession, d.sub[,i]), color = d.sub[,i]) p.map p.pca <- plot_ly(d.sub, x = ~-PC2, y = ~-PC1, type = 'scatter', mode = 'markers', text = ~paste('ID: ', Accession, d.sub[,i]), color = d.sub[,i]) p.pca ## check the correlations library(corrgram) filename <- paste(Sys.Date(),"_", "_corrgram.png", sep="") png(filename,18,18, units="in", res=200) corrgram(d.sub[,c(1:ncol(d.sub))], lower.panel=panel.pts, upper.panel=panel.cor) dev.off() ### plot winter survival by population { # subset to get only non-admixed individuals d.sub <- subset(d, (pop1==1 | pop2==1 | pop3==1)) # generate new pop column pop=NULL for (i in 1:nrow(d.sub)) { if(d.sub$pop1[i]>d.sub$pop2[i] & d.sub$pop1[i]>d.sub$pop3[i]) {pop[i] <- 1} else if (d.sub$pop2[i]>d.sub$pop1[i] & d.sub$pop2[i]>d.sub$pop3[i]) { pop[i] <- 2} else if (d.sub$pop3[i]>d.sub$pop1[i] & d.sub$pop3[i]>d.sub$pop2[i]) {pop[i] <- 3} } d.sub$pop <- pop head(d.sub) ## theme { t <- theme( # Text text = element_text( #family = "Arial", ), # Plot title plot.title = element_text(size=rel(1.5), face="bold", vjust=2), plot.margin = unit(c(1.5,1.5,1.5,1.5),"lines"), # Panel panel.background = element_rect(fill = "#ffffff"), panel.border = element_blank(), panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor = element_blank(), #panel.margin = unit(1, "lines"), # Legend legend.position = "bottom", legend.key = element_rect(colour = "#000000"), # Facet strip.background = element_rect(fill = "#ffffff"), strip.text = element_text( size = rel(1) ), # Axis axis.text = element_text( color = "#000000", size = rel(1) ), #axis.ticks.x = element_blank(), #axis.ticks.y = element_blank(), axis.title = element_text( size = rel(1.2) #family = "Arial", ) #axis.line.x = element_line(color="black", size = .5) ) } # generate boxplots overlaid with jittered dots { size=4 # p1: All species, separated by clade (1,2,3,4,5) p1 <- ggplot( data = d.sub, aes( y = OW_2014_15, x = pop-0.2, group = pop ) ) + geom_jitter( aes( color = factor(pop) ), size=size, alpha=0.7, position = position_jitter(width = .05), shape=16 ) + geom_boxplot( outlier.shape = NA, width=0.1, alpha=0.5, notch=FALSE, color="dark grey", coef=0 #removes whiskers ) + scale_y_continuous(expand = c(0, 0)) + labs( y = 'Survival', x = 'pop' ) p1 <- p1 + t + xlim(0.5,3.5) + ylim(-0.05,1.05) p2 <- p1 + geom_jitter(data = d.sub, aes( y = OW_2015_16, x = pop, group = pop, color = factor(pop) ), shape=16, size=size, alpha=0.7, position = position_jitter(width = .05) #width=0.2 ) + geom_boxplot(data = d.sub, aes( y = OW_2015_16, x = pop), outlier.shape = NA, width=0.1, alpha=0.5, notch=FALSE, color="dark grey", coef=0 #removes whiskers ) p3 <- p2 + geom_jitter(data = d.sub, aes( y = OW_2016_17, x = pop+.2, group = pop, color = factor(pop) ), shape=16, size=size, alpha=0.7, position = position_jitter(width = .05) ) + geom_boxplot(data = d.sub, aes( y = OW_2016_17, x = pop+.2), outlier.shape = NA, width=0.1, alpha=0.5, notch=FALSE, color="dark grey", coef=0 #removes whiskers ) } ## save the plot ## mean on top, without whiskers! Wider, field pic as insert on map. setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/Figures/_20180614/phenotypes_and_gwas") filename <- paste(Sys.Date(),"survivalByPopAndYear.pdf", sep="_") pdf(filename, width=6, height=6) print(p3) dev.off() } ### Test for significant differences in winter survival between the populations { install.packages("multcomp") library(multcomp) d.sub$pop.factor <- as.factor(pop) d.sub <- d.sub[,c(1,25:27,48)] OW2014 <- d.sub[,c(2,5)] OW2014$year <- as.factor(paste(OW2014$pop, "2014", sep="_")) colnames(OW2014) <- c("OW", "pop", "pop.year") OW2015 <- d.sub[,c(3,5)] OW2015$year <- as.factor(paste(OW2015$pop, "2015", sep="_")) colnames(OW2015) <- c("OW", "pop", "pop.year") OW2016 <- d.sub[,c(4,5)] OW2016$year <- as.factor(paste(OW2016$pop, "2016", sep="_")) colnames(OW2016) <- c("OW", "pop", "pop.year") d.sub.long <- rbind(OW2014, OW2015, OW2016) d.sub.long <- subset(d.sub.long, OW>-1) # all pop-year comparisons pop.glm <- glm(OW ~ pop.year, data=d.sub.long) summary(pop.glm) summary(glht(pop.glm, linfct= mcp(pop.year="Tukey"))) summary.pop.glm <- summary(glht(pop.glm, linfct= mcp(pop.year="Tukey"))) filename <- paste(Sys.Date(), "significance_test.txt", sep ="_") sink(filename) summary.pop.glm sink() # compare pops across all years pop.glm2 <- glm(OW ~ pop, data=d.sub.long) summary(pop.glm2) summary(glht(pop.glm2, linfct= mcp(pop="Tukey"))) summary.pop.glm2 <- summary(glht(pop.glm, linfct= mcp(pop.year="Tukey"))) filename <- paste(Sys.Date(), "byPopSignificance_test.txt", sep ="_") sink(filename) summary.pop.glm2 sink() } } ## read in GWAS and Fst data { ## Fst results setwd("Fst_output_directory") fst.file <- "20180622_notPop3vsPop3_fst_scan.txt.weir.fst" #fst.file <- "20180622_Pop1vsPop2_fst_scan.txt.weir.fst" d.fst <- read.delim(fst.file) colnames(d.fst) <- c("chr","pos","fst") d.fst$key <- paste(d.fst$chr,d.fst$pos,sep="_") d.fst.filt <- subset(d.fst, !(is.na(d.fst$fst)) & is.numeric(d.fst$fst) & d.fst$fst>0) head(d.fst.filt) nrow(d.fst.filt) ## GWAS results setwd("[GWAS_output_directory]") ## filenames have to be adjusted to pid01 etc, to get the right order dataFiles.in <- lapply(Sys.glob("*.pvals"), read.csv) fileList <- Sys.glob("*.pvals") fileList } ## produce GWAS Manhattan plots { #install.packages("qqman") library("qqman") setwd("[Your_working_directory]") # plot for all traits filename <- paste(Sys.Date(),"_","manhat.png", sep="") png(filename, 8, length(fileList)*2, units="in", res=200) par(mfrow=c(length(fileList),1)) for (i in 1:length(fileList)) { main <- fileList[i] gwas.plot <- dataFiles.in[[i]][,c(1,2,3,5)] gwas.plot <- subset(gwas.plot, macs > mac.cutoff & chromosomes != 0) colnames(gwas.plot) <- c("CHR","BP","P", "MAC") head(gwas.plot) manhattan(gwas.plot, main=main, cex=2) } dev.off() } ### Analyse overlaps in GWAS and Fst results at the SNP level { ## loop through all GWAS p-value files and get the Fst overlapping SNPs setwd("[Your_working_directory]") fst.gwas.overlap <- list() fst.gwas.overlap.filt <- list() dataFiles <- list() top.snps=500 fst.cutoff=0.65 mac.cutoff=10 p.cutoff=3 for (i in 1:length(fileList)) { dataFiles.in[[i]]$p <- -log(dataFiles.in[[i]]$scores,10) dataFiles[[i]] <- dataFiles.in[[i]][with(dataFiles.in,order(-dataFiles.in[[i]]$p)),] dataFiles[[i]] <- subset(dataFiles[[i]], macs> mac.cutoff & p>p.cutoff) dataFiles[[i]] <- dataFiles[[i]][1:top.snps,] dataFiles[[i]]$chr <- paste("chr", dataFiles[[i]]$chromosomes, sep="") dataFiles[[i]]$key <- paste(dataFiles[[i]]$chr, dataFiles[[i]]$positions, sep="_") head(dataFiles[[i]]) fst.gwas.overlap[[i]] <- merge(dataFiles[[i]], d.fst.filt, by="key", all.x=TRUE) fst.gwas.overlap.filt[[i]] <- subset(fst.gwas.overlap[[i]], fst>fst.cutoff) } # Summarise GWAS and Fst overlaps setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180615_field_data/20180806_results") overlap.counts <- lapply(fst.gwas.overlap.filt, nrow) overlap.summary <- NULL for (i in 1:length(fileList)) { overlap.summary[i] <- overlap.counts[[i]] } overlap.summary <- as.data.frame(cbind(fileList,overlap.summary)) filename <- paste(Sys.Date(), "gwas_fst_overlap", fst.file, top.snps, fst.cutoff, "M", mac.cutoff, "P", p.cutoff, ".txt", sep="_") write.table(overlap.summary, filename, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") overlap.summary # summarise overlaps between different GWAS runs gwas.overlap <- data.frame(matrix(ncol = length(fileList), nrow = length(fileList))) for (n in 1:length(fileList)) { for (m in 1:length(fileList)) { gwas.overlap[n,m] <- nrow(merge(fst.gwas.overlap.filt[[n]], fst.gwas.overlap.filt[[m]], by="key")) } } gwas.overlap$file <- fileList filename <- paste(Sys.Date(), "gwas_overlap", fst.file, top.snps, fst.cutoff, "M", mac.cutoff, "P", p.cutoff,".txt", sep="_") write.table(gwas.overlap, filename, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") gwas.overlap # Do boxplot of Fst values for the top 100 GWAS SNPs for each phenotype fst.boxplot <- data.frame(matrix(ncol = length(fileList), nrow = top.snps)) p.boxplot <- data.frame(matrix(ncol = length(fileList), nrow = top.snps)) for (i in 1:length(fileList)) { fst.boxplot[,i] <- fst.gwas.overlap[[i]]$fst p.boxplot[,i] <- dataFiles[[i]]$p } colnames(fst.boxplot) <- fileList boxplot(fst.boxplot) colnames(p.boxplot) <- fileList boxplot(p.boxplot) fst.means <- as.data.frame(colMeans(fst.boxplot, na.rm = TRUE)) p.means <- as.data.frame(colMeans(p.boxplot, na.rm = TRUE)) p.medians <- as.data.frame(apply(p.boxplot, 2, median,na.rm = TRUE)) filename <- paste(Sys.Date(), "fst.means", fst.file,top.snps, fst.cutoff, "M", mac.cutoff, "P", p.cutoff, ".txt", sep="_") write.table(fst.means, filename, fst.file, col.names=FALSE, row.names=TRUE, quote=FALSE, sep="\t") ## check distribution of all Fst values head(d.fst.filt) hist(d.fst.filt$fst, breaks=50) #compare the Fst distributions of all genes to those of the top GWAS SNPs { ## histogram of fst distribution for all SNPs setwd("[Your_working_directory]") filename <- paste(Sys.Date(), "all_SNPS_fst.distribution.png", sep ="_") cex=2 png(filename, 3, 3, units="in", res=150) par(mfcol=c(1,1), mar=c(3,3,3,3)) hist(d.fst.filt$fst, xlim=c(0,1), main="", breaks=25, xlab="", ylab="", cex.axis=cex, cex.lab=cex) dev.off() ## loop through all traits and plot histograms, ecdfs, and gwas/fst scatter plots for (i in 1: length(fileList)) { ecdf1 <- ecdf(d.fst.filt$fst) #fst, all genes ecdf2 <- ecdf(fst.gwas.overlap[[i]]$fst) #fst, top gwas SNPs setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180615_field_data/20180806_results") filename <- paste(Sys.Date(), fileList[i], "M", mac.cutoff, "P", p.cutoff, fst.file, "fst.distributions.png", sep ="_") png(filename, 9, 3, units="in", res=150) par(mfcol=c(1,3), mar=c(5,5.1,5,5)) cex=2.7 hist(fst.gwas.overlap[[i]]$fst, xlim=c(0,1), main=fileList[i], breaks=25, xlab="", ylab="", cex.axis=cex, cex.lab=cex) plot(ecdf1, verticals=TRUE, do.points=FALSE, col="blue", xlim=c(0,1), main=fileList[i], xlab="", ylab="", lwd=3, cex.axis=cex, cex.lab=cex ) plot(ecdf2, verticals=TRUE, do.points=FALSE, add=TRUE, col='orange', xlim=c(0,1), lwd=3, cex.axis=cex, cex.lab=cex) plot(fst.gwas.overlap[[i]]$fst, fst.gwas.overlap[[i]]$p, main = fileList[i], lwd=2, xlab="", ylab="", cex.axis=cex, cex.lab=cex, xlim=c(0,1)) dev.off() } ##Kolmogorov-Smirnov tests for all traits { ## Kolmogorov-Smirnov - Fst, all SNPs versus GWA SNPs for each trait ks.test.summary <- data.frame(matrix(ncol = 3, nrow = 0)) x <- c("Two-sided", "Greater", "Less") colnames(ks.test.summary) <- x for (i in 1:length(fileList)) { ks.two.sided <- ks.test(d.fst.filt$fst, fst.gwas.overlap[[i]]$fst, alternative="two.sided") ks.greater <- ks.test(d.fst.filt$fst, fst.gwas.overlap[[i]]$fst, alternative="greater") ks.less <- ks.test(d.fst.filt$fst, fst.gwas.overlap[[i]]$fst, alternative="less") ks.test.summary[i,] <- c(ks.two.sided$p.value, ks.greater$p.value, ks.less$p.value) } rownames(ks.test.summary) <- fileList ks.test.summary } } } ## Save supplemental files with GWAS and FST results { # set working directory setwd("[Your_working_directory]") # select traits and set trait names traits.list <- c(1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 8, 9, 10, 16, 17, 19, 20, 39:45) trait.names <- c("Altitude","Mean_temperature","Minimum_temperature","Overwintering_2014", "Overwintering_2015", "Overwintering_2016", "FT_greenhouse", "FT_2014", "FT_2015", "FT_2017", "FT_2018", "FT_1st", "FT_2nd", "FT_dur", "K_ctrl", "Na_ctrl", "K_salt", "Na_salt", "Seed_weight", "Seed_size", "Seed_perimeter", "Seed_length", "Seed_width", "Seed_length_width ratio", "Seed_circularity") # save supplemental files n=1 for (i in traits.list) { fst.gwas.overlap.filt <- NULL fst.gwas.overlap.filt <- subset(fst.gwas.overlap[[i]], p>3) fst.gwas.overlap.filt.table <- fst.gwas.overlap.filt[,c(9,3,4,8,5:7,12)] filename <- paste(Sys.Date(), trait.names[n], fst.file,"gwas_results.txt", sep="_") write.table(fst.gwas.overlap.filt.table, file = filename, quote=FALSE, sep="\t", col.names=NA) n=n+1 } } ### Analyse Fst results at the gene level { ## check the original GFF file for gene model length distribution setwd("[Your_working_directory]") gff.original <- read.delim("20130802_Lj30.sorted.igv.gff3", header = FALSE) gff.original.genes <- subset(gff.original,V2=="protein_coding" & V3== "gene")[,c(1:5)] gff.original.genes$length <- gff.original.genes$V5 - gff.original.genes$V4 hist(gff.original.genes$length) nrow(gff.original.genes) gff <- subset(gff.original.genes, length>200 & length < 20000) nrow(gff) hist(gff$length) # average fst and gwas results by gene in R setwd("[Your_working_directory]") fst.by.gene.filename <- paste(fst.file, ".bedgraph.genes.txt", sep="") d <- read.delim(fst.by.gene.filename, header=FALSE) head(d) fst.gene <- aggregate(abs(d$V13), list(d$V14), mean) fst.gene.count <- aggregate(abs(d$V13), list(d$V14), NROW) fst.gene$count <- fst.gene.count$x head(fst.gene) fst.gene.filt <- subset(fst.gene, count >3) nrow(fst.gene) nrow(fst.gene.filt) median(fst.gene.filt$x) fst.gene.filt.order <- fst.gene.filt[order(-fst.gene.filt$x),] subset(fst.gene.filt.order, x> 0.65) filename <- paste(Sys.Date(), "_", fst.file, "_fst_by_gene.txt", sep="") write.table(fst.gene.filt.order, filename, quote=FALSE, sep="\t") #check candidates subset(d, V14=="Lj6g2130160") subset(d, V14=="Lj6g1887780") subset(d, V14=="Lj1g2533770") top.100.fst.genes <-subset(fst.gene.filt.order, x> 0.65) top.100.fst.genes.positions <- merge(top.100.fst.genes, d, by.x="Group.1", by.y="V14") top.100.fst.genes.positions.unique <- unique(top.100.fst.genes.positions[,c(4,7,8)]) nrow(top.100.fst.genes) filename <- paste(Sys.Date(), "_", fst.file, "_fst_by_gene_hist.pdf", sep="") setwd("/Users/au27857/Dropbox/NirajShah/AccessionPaper/31082016/Manuscript/20180605_stig_analysis") pdf(filename) hist(fst.gene.filt$x, breaks=50) abline(v=median(fst.gene.filt$x), col="red", lwd=4 ) abline(v=0.65, col="blue", lwd=4 ) dev.off() # no correlation between SNP count and fst score smoothScatter(fst.gene.filt$x,fst.gene.filt$count, log="y") } ### Produce gwas and Fst charts for the main figure { # Read in libraries library(zoo) library(ggplot2) #install.packages("viridis") library(viridis) library(grid) library(dplyr) # set averaging parameters for rolling mean applied to Fst data by=1 width=10 # set number of decimals for y-axis scaleFUN <- function(x) sprintf("%.2f", x) # set plot theme font.size=20 #theme <- theme(panel.grid.major = element_blank(), legend.position="none", panel.grid.minor = element_blank(), axis.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold") ) theme <- theme(axis.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold") ) ## plot Fst and GWAS results in the same graph # plots for the figure # chr1: OW_2014 (4) # chr2: FT_gh (11), FT_2014 (12), FT_dur (10) # chr3: OW_2016 (6) # chr4: FT_gh (11) # chr5: NA+ control (17), Seed width (43) # chr6: OW_2014 (4), OW_2015 (5) plots = list() plots.fst = list() traits.list <- list(c(4), c(11,12,10), c(6), c(11), c(17,43), c(4,5)) n=1 gwas.dot.size=3 gwas.circle.size=5 # set plot directory setwd("[Your_working_directory]") # generate the plots for (chr in c(1:6)) { current.chr <- paste("chr",chr, sep="") n=1 for (i in traits.list[[chr]]) { #gwas gwas.chr <- subset(dataFiles[[i]], chr == current.chr) x.gwas <- gwas.chr$positions y.gwas <- gwas.chr$p d.gwas.chr <- as.data.frame(cbind(x.gwas,y.gwas)) q.gwas <- ggplot(d.gwas.chr, aes(x=x.gwas, y=y.gwas)) q.gwas <- q.gwas + geom_point(color="dark grey") + theme #fst d.fst.chr <- subset(d.fst.filt, chr ==current.chr) #fst.genes <- subset(top.100.fst.genes.positions.unique, V1 == current.chr) x.fst <- rollapply(d.fst.chr$pos, width = width, by = by, FUN = mean, align = "left") y.fst <- rollapply(d.fst.chr$fst, width = width, by = by, FUN = mean, align = "left") d.fst.chr.rollmean <- as.data.frame(cbind(x.fst,y.fst)) q.fst <- ggplot(d.fst.chr.rollmean, aes(x=x.fst, y=y.fst)) q.fst <- q.fst + geom_point(color="dark grey") + theme #gwas fst overlap gwas.fst.overlap.chr <- NULL gwas.fst.overlap.chr <- subset(fst.gwas.overlap[[i]], chr.x == current.chr) gwas.fst.overlap.chr <- gwas.fst.overlap.chr[order(gwas.fst.overlap.chr$p),] # produce Fst plot colored by GWAS p-values if (nrow(gwas.fst.overlap.chr)>0) { q.fst.colored <- q.fst + geom_point(data=gwas.fst.overlap.chr, aes(x=pos, y=fst, color=p), size=gwas.circle.size, shape=16 ) + ggtitle(paste(current.chr, fileList[i], sep="_")) + labs(x="Position", y="Fst") + scale_color_viridis() + theme } else { q.fst.colored <- q.fst + theme} plots.fst[[n]] = q.fst.colored n=n+1 } #print plots filename <- paste(Sys.Date(), fst.file, "chr", chr, ".png", sep="_") png(filename, 12,3*length(traits.list[[chr]]), units="in", res=200) grid.newpage() if (length(traits.list[[chr]]) == 2) { grid.draw(rbind(ggplotGrob(plots.fst[[1]]), ggplotGrob(plots.fst[[2]]), size = "last")) } else if (length(traits.list[[chr]]) == 3) { grid.draw(rbind(ggplotGrob(plots.fst[[1]]), ggplotGrob(plots.fst[[2]]), ggplotGrob(plots.fst[[3]]), size = "last"))} else if (length(traits.list[[chr]]) == 1) { grid.draw(rbind(ggplotGrob(plots.fst[[1]]), size = "last")) } dev.off() } } ### Produce gwas and Fst charts for the supplementary figure { # Read in libraries library(zoo) library(ggplot2) #install.packages("viridis") library(viridis) library(grid) library(dplyr) library(gridExtra) # set averaging parameters for rolling mean applied to Fst data by=1 width=10 # set plot theme font.size=16 #theme <- theme(panel.grid.major = element_blank(), legend.position="none", panel.grid.minor = element_blank(), axis.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold") ) theme <- theme(axis.text=element_text(size=font.size), axis.title=element_text(size=font.size,face="bold"), legend.text=element_text(size=font.size), legend.title=element_text(size=font.size), plot.title = element_text(size=font.size) ) plots = list() plots.fst = list() traits.list <- c(1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 8, 9, 10, 16, 17, 19, 20, 39:45) trait.names <- c("Altitude","Mean temperature","Minimum temperature","Overwintering 2014", "Overwintering 2015", "Overwintering 2017", "FT greenhouse", "FT 2014", "FT 2015", "FT 2017", "FT 2018", "FT 1st", "FT 2nd", "FT dur", "K ctrl", "Na ctrl", "K salt", "Na salt", "Seed weight", "Seed size", "Seed perimeter", "Seed length", "Seed width", "Seed length/width ratio", "Seed circularity") gwas.dot.size=3 gwas.circle.size=5 # set plot directory setwd("[Your_working_directory]") # generate the plots for (chr in c(1:6)) { current.chr <- paste("chr",chr, sep="") n=1 for (i in traits.list) { #gwas gwas.chr <- subset(dataFiles[[i]], chr == current.chr) x.gwas <- gwas.chr$positions y.gwas <- gwas.chr$p d.gwas.chr <- as.data.frame(cbind(x.gwas,y.gwas)) #fst d.fst.chr <- subset(d.fst.filt, chr ==current.chr) #fst.genes <- subset(top.100.fst.genes.positions.unique, V1 == current.chr) x.fst <- rollapply(d.fst.chr$pos, width = width, by = by, FUN = mean, align = "left") y.fst <- rollapply(d.fst.chr$fst, width = width, by = by, FUN = mean, align = "left") d.fst.chr.rollmean <- as.data.frame(cbind(x.fst,y.fst)) q.fst <- ggplot(d.fst.chr.rollmean, aes(x=x.fst, y=y.fst)) q.fst <- q.fst + geom_point(color="dark grey") + ggtitle(trait.names[n]) + labs(x=paste(current.chr,"position", sep=" "), y="Fst") + theme #gwas fst overlap gwas.fst.overlap.chr <- NULL gwas.fst.overlap.chr <- subset(fst.gwas.overlap[[i]], chr.x == current.chr & fst > -0.5) gwas.fst.overlap.chr <- gwas.fst.overlap.chr[order(gwas.fst.overlap.chr$p),] # produce GWAS plots colored by Fst values and Fst plot colored by GWAS p-values if (nrow(gwas.fst.overlap.chr)>0) { q.gwas <- q.fst + geom_point(data=gwas.fst.overlap.chr, aes(x=pos, y=p/5, color=fst), size=gwas.circle.size, shape=16) + ggtitle(trait.names[n]) + labs(x=paste(current.chr,"position", sep=" "), y="Fst or -log(p)/5") + geom_hline(yintercept = 1, color="blue") + geom_hline(yintercept = 7/5, color="red") + scale_color_viridis(option="plasma") + theme q.fst.colored <- q.fst + geom_point(data=gwas.fst.overlap.chr, aes(x=pos, y=fst, color=p), size=gwas.circle.size, shape=16 ) + ggtitle(trait.names[n]) + labs(x=paste(current.chr,"position", sep=" "), y="Fst") + scale_color_viridis() + theme } else { q.gwas <- ggplot(d.fst.chr.rollmean, aes(x=x.fst, y=y.fst)) + geom_point(aes(color="Fst"), shape=16) + scale_color_manual(breaks = 'Fst', values = 'dark grey', guide = guide_legend(title = NULL)) + ggtitle(trait.names[n]) + labs(x=paste(current.chr,"position", sep=" "), y="Fst") + theme q.fst.colored <- ggplot(d.fst.chr.rollmean, aes(x=x.fst, y=y.fst)) + geom_point(aes(color="Fst"), shape=16) + scale_color_manual(breaks = 'Fst', values = 'dark grey', guide = guide_legend(title = NULL)) + ggtitle(trait.names[n]) + labs(x=paste(current.chr,"position", sep=" "), y="Fst") + theme} plots[[n]] = q.gwas plots.fst[[n]] = q.fst.colored n=n+1 } #print plots #filename <- paste(Sys.Date(), fst.file, "chr", chr, ".png", sep="_") #png(filename, 24,3*length(traits.list), units="in", res=100) #do.call("grid.arrange", c(c(plots.fst,plots), nrow=length(traits.list), ncol=2, as.table=FALSE)) #dev.off() #print plots # fst colored by GWAS filename <- paste(Sys.Date(), fst.file, "chr", chr, ".png", sep="_") png(filename, 12,3*length(traits.list), units="in", res=150) grid.newpage() grid.draw(rbind(ggplotGrob(plots.fst[[1]]), ggplotGrob(plots.fst[[2]]), ggplotGrob(plots.fst[[3]]), ggplotGrob(plots.fst[[4]]), ggplotGrob(plots.fst[[5]]), ggplotGrob(plots.fst[[6]]), ggplotGrob(plots.fst[[7]]), ggplotGrob(plots.fst[[8]]), ggplotGrob(plots.fst[[9]]), ggplotGrob(plots.fst[[10]]), ggplotGrob(plots.fst[[11]]), ggplotGrob(plots.fst[[12]]), ggplotGrob(plots.fst[[13]]), ggplotGrob(plots.fst[[14]]), ggplotGrob(plots.fst[[15]]), ggplotGrob(plots.fst[[16]]), ggplotGrob(plots.fst[[17]]), ggplotGrob(plots.fst[[18]]), ggplotGrob(plots.fst[[19]]), ggplotGrob(plots.fst[[20]]), ggplotGrob(plots.fst[[21]]), ggplotGrob(plots.fst[[22]]), ggplotGrob(plots.fst[[23]]), ggplotGrob(plots.fst[[24]]), ggplotGrob(plots.fst[[25]]), size = "first")) dev.off() # GWAS colored by FST filename <- paste(Sys.Date(), fst.file, "gwa_chr", chr, ".png", sep="_") png(filename, 12,3*length(traits.list), units="in", res=150) grid.newpage() grid.draw(rbind(ggplotGrob(plots[[1]]), ggplotGrob(plots[[2]]), ggplotGrob(plots[[3]]), ggplotGrob(plots[[4]]), ggplotGrob(plots[[5]]), ggplotGrob(plots[[6]]), ggplotGrob(plots[[7]]), ggplotGrob(plots[[8]]), ggplotGrob(plots[[9]]), ggplotGrob(plots[[10]]), ggplotGrob(plots[[11]]), ggplotGrob(plots[[12]]), ggplotGrob(plots[[13]]), ggplotGrob(plots[[14]]), ggplotGrob(plots[[15]]), ggplotGrob(plots[[16]]), ggplotGrob(plots[[17]]), ggplotGrob(plots[[18]]), ggplotGrob(plots[[19]]), ggplotGrob(plots[[20]]), ggplotGrob(plots[[21]]), ggplotGrob(plots[[22]]), ggplotGrob(plots[[23]]), ggplotGrob(plots[[24]]), ggplotGrob(plots[[25]]), size = "first")) dev.off() } }