#This file was created as an R script and saved in html format using the Compile Report function

#This tutorial shows you how to use dplyr within the tidyverse package to create summary statistics
#We will compute multiple descriptive statistics by group for a file with multiple groups
#The data are standard length (SL) for female and male oceanic and stream threespine stickleback (four groups)
#The data file is available in our github repository: https://github.com/aguirrelab/r-tutorials
#The key data are the SL (standard length) column and the Grps column, which splits the data into four groups by 
#ocean vs. stream and male vs. female 

#Preliminaries: set the working directory. Change "C:/1awinz/R_work/dplyr/descriptives" for the folder where you will work.
setwd("C:/1awinz/R_work/dplyr/descriptives")

#Change the data file name (1_Stickleback_SL_data.txt) to "data"
data=read.table("1_Stickleback_SL_data.txt", header=T)
attach(data)
names(data)
## [1] "Type" "Sex"  "Grp"  "SL"
#Open the dplyr package.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Use dplyr's "group_by" function to group the data
#This groups the data by the variable you select. 
#In this case, there is a Grp variable that divides the data into four groups:
#Ocean female, ocean male, stream female, stream male.
grouped_data<-group_by(data, Grp) 

#Now create a file, I called mine "descriptives", that includes the descriptives that you want to calculate for each group
#In this tutorial, we will calculate the mean, standard deviation (sd), sample size per group (n), standard error of the mean (se)
#Coefficient of variation (cv), median, interquartile range (IQR), and range
descriptives<-summarise(grouped_data, avg=mean(SL), sd_SL=sd(SL), n_SL=n(), se_SL=sd_SL/sqrt(n_SL), cv_SL=100*(sd_SL/avg), median_SL=median(SL), IQR_SL=IQR(SL), range_SL=(max(SL)-min(SL))) 

#Now call the descriptives file to see the data
descriptives
## # A tibble: 4 x 9
##   Grp        avg sd_SL  n_SL se_SL cv_SL median_SL IQR_SL range_SL
##   <chr>    <dbl> <dbl> <int> <dbl> <dbl>     <int>  <dbl>    <int>
## 1 ocean_f   67.6  4.97    45 0.741  7.35        69      5       30
## 2 ocean_m   65.2  3.48    45 0.519  5.35        65      5       15
## 3 stream_f  41.8  3.75    45 0.559  8.97        41      4       18
## 4 stream_m  40.3  3.83    45 0.571  9.50        40      4       17
#You should see a table with the summary descriptive data by group