#Tutorial for creating a scatterplot with vertical and horizontal error bars
#The data are vertebral means for males and females of three populations of Rhoadsia minor and two populations of R. altipinna from western Ecuador. The file also has the standard error of the means and standard deviations.
#You can find the data file (Rhoadsia_Vertebral.txt) at: https://github.com/aguirrelab/r-tutorials.git
#setwd sets the working directory so replace the path with the working directory on your computer
setwd("C:/1awinz/R_work/scatter_plot_error_bars")
#The name of the file is Rhoadsia_Vertebral.txt. Replace with the name of your input file unless you are using the data file linked to this tutorial. Make sure the input file is formatted properly.
read.table("Rhoadsia_Vertebral.txt", header=T)
## Site_cd Sex Label N avg_PreV sd_PreV se_PreV avg_CdV sd_CdV
## 1 E4 F E4-F 25 13.92000 0.2768875 0.05537750 19.00000 0.5773503
## 2 E4 M E4-M 17 13.94118 0.2425356 0.05882352 19.05882 0.4287465
## 3 E6 F E6-F 6 13.50000 0.5477226 0.22360681 19.66667 0.5163978
## 4 E6 M E6-M 9 13.66667 0.7071068 0.23570227 19.33333 0.5000000
## 5 Pal F Pal-F 57 13.70175 0.4987453 0.06606043 18.50877 0.6302722
## 6 Pal M Pal-M 35 13.74286 0.4434396 0.07495497 18.80000 0.6324555
## 7 SR F SR-F 31 13.74194 0.4448027 0.07988892 18.54839 0.5058794
## 8 SR M SR-M 33 13.84848 0.3641095 0.06338333 18.78788 0.5998737
## 9 Qu All Qu-All 45 13.75556 0.4840903 0.07216392 18.84444 0.5622825
## se_CdV
## 1 0.11547006
## 2 0.10398630
## 3 0.21081852
## 4 0.16666667
## 5 0.08348159
## 6 0.10690449
## 7 0.09085862
## 8 0.10442461
## 9 0.08382013
#These commands get the data file ready
data=read.table("Rhoadsia_Vertebral.txt", header=T)
attach(data)
names(data)
## [1] "Site_cd" "Sex" "Label" "N" "avg_PreV" "sd_PreV"
## [7] "se_PreV" "avg_CdV" "sd_CdV" "se_CdV"
#You only need ggplot2 for this tutorial but tidyverse contains a number of useful packages including the ggplot2 package so by habit I just call the tidyverse package
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#name plots to simplify the final code. X is the average number of precaudal vertebrae and Y is the average number of caudal vertebrae.
a<-ggplot(data, aes(x=avg_PreV, y=avg_CdV)) + geom_point()
#Creates a new file with custom shapes and colors. Note that the numbers in the scale_color_manual are to create several tones of blue for related samples.
anew<-a + geom_point(aes(color=Site_cd, shape=Sex), size=5)+scale_shape_manual(values=c("circle", "square", "triangle")) + scale_color_manual(values=c("#0066CC", "#003399", "black", "#3399FF", "red"))
#Creates a new file with proper labels for the X and Y and sets the plot to have a classic theme.
anew2<-anew + xlab("Average Number of Precaudal Vertebrae") + ylab("Average Number of Caudal Vertebrae") + theme_classic()
#Last command creates the plot with vertical and horizontal error bars. You have to have the standard error or standard deviation as columns in the data file. In this case, the error bars represent the standard error of the mean
anew2+geom_errorbar(aes(ymin=avg_CdV-se_CdV, ymax=avg_CdV+se_CdV), colour="black", width=.025)+ geom_errorbarh(aes(xmax = avg_PreV + se_PreV, xmin = avg_PreV - se_PreV), height=0.05)
