#Log Hinge Width Regressions and Predictions
#Taylor Vollman
#Start Date: 28 Feb 2022
#Last Updated: 6 Dec 2023

library(tidyverse)
library(car) #for crPlots()
library(nortest) #for ad.test()
library(readr)
library(ggplot2)

options(digits = 4) #limits to 4 significant figures

#Load dataset
#set working directory Session>Set Working Directory>Choose Directory
library(readxl)
OlyMeasurements <- read_excel("OlyMeasurements.xlsx")


#Make Dorsal_Length Continuous
OlyMeasurements$Dorsal_Length <- as.numeric(as.character(OlyMeasurements$Dorsal_Length))
#Make Hinge_Thickness_Left_Valve Continuous
OlyMeasurements$Hinge_Thickness_Left_Valve <- as.numeric(as.character(OlyMeasurements$Hinge_Thickness_Left_Valve))

#add logs for hinge width
OlyMeasurements$logH <- log(OlyMeasurements$Hinge_Thickness_Left_Valve)


#Bivariate Plot w/ Log values
ggplot(OlyMeasurements, aes(logH, Dorsal_Length)) +
  geom_point() +
  scale_color_manual() +
  xlab("log Hinge Thickness (mm)") +
  ylab("Dorsal Length (mm)") +
  theme() +
  xlim(0,3) +
  ylim(0,60)

####Create Regression model####
X <- OlyMeasurements$Hinge_Thickness_Left_Valve
Y <- OlyMeasurements$Dorsal_Length

# lm fit
Oly.LogH.Regression <- lm(Y ~ log(X))
#To view the ANOVA table:
anova(Oly.LogH.Regression)

#To just view the slope and y-intercept coefficients:
Oly.LogH.Regression

#Summary of Oly model 2
#Residuals, R^2, p-value
summary(Oly.LogH.Regression)

#See Residuals
#Plot Residuals
plot(Oly.LogH.Regression, which = 1) #Residuals vs Fitted
plot(Oly.LogH.Regression, which = 2) #Q-Q Plot
plot(Oly.LogH.Regression, which = 3) #Scale-Location
plot(Oly.LogH.Regression, which = 5) #Residuals vs Leverage
crPlots(Oly.LogH.Regression)

#Normality of residuals
ad.test(Oly.LogH.Regression$residuals)

####CI, PI, and Predicting New Observations####
#CI of the Coefficients
confint(Oly.LogH.Regression, level = 0.95)
#Gives up and lower limits of 95% CI

#Adding Prediction Interval
#Add PI to model first
Pred.Oly <- predict(Oly.LogH.Regression, interval = "prediction")
Plot.Oly <- cbind(OlyMeasurements, Pred.Oly)

#Plot of Regression Line with CI & PI
ggplot(Plot.Oly,
       aes(Hinge_Thickness_Left_Valve, Dorsal_Length)) + 
  geom_point(shape = 1) + 
  stat_smooth(method = "lm", 
              formula = y ~ log(x), 
              col = "blue") +
  geom_line(aes(y = lwr), color = "red",
            linetype = "dashed") +
  geom_line(aes(y = upr), color = "red",
            linetype = "dashed") +
  xlim(0,12) +
  ylim(0,80) +
  xlab("Left Valve Hinge Thickness") +
  ylab("Dorsal Length (mm)") +
  theme_classic()

#####Predicting Dorsal Length####
#> new.data <- data.frame(x_variable = c(newx1, newx2, ...etc.))
#> or Import .xlsx or .csv of Left Valve Hinge Measurements 

#Make Hinge_Thickness_Left_Valve Continuous
YOURDATAFRAMENAME$Hinge_Thickness <- as.numeric(as.character(YOURDATAFRAMENAME$Hinge_Thickness))

#Convert Hinge to Log
#add logs for hinge width
YOURDATAFRAMENAME$logH <- log(YOURDATAFRAMENAME$Hinge_Thickness)


#Predict
predict(Oly.LogH.Regression, newdata = YOURDATAFRAMENAME,
        interval = "prediction")


#Save predicts as object
pred.new.Hinge_Thickness <- predict(Oly.LogH.Regression, newdata = YOURDATAFRAMENAME,
                                    interval = "prediction")

#Export as .csv
write.csv(pred.new.Hinge_Thickness, "Prediction Lengths.csv")