# https://reader.elsevier.com/reader/sd/pii/S0169534720303347?token=D3409E5C3AEC22FDF92433723642EA266B85A0E70254D5949283F57771E0644BF15C0B464781550B58CFEAE81E192653&originRegion=eu-west-1&originCreation=20220118122957
# Evaluating Impact Using Time-Series Data
################################################################################
# second workshop on time-series analysis for ecologists
# 14.00 Italian Time: 21 March 2022
# Ulrike Obertegger, ulrike.obertegger@fmach.it
################################################################################

rm(list=ls())
library(readxl)
library(strucchange)
library(tidyverse)
library(ggplot2)
setwd("/Users/uliobertegger/Documents/Eigene Dateien/nach 2010/Kongresse-Tagungen/2022/SPF workshop")
dat <- read_excel("test.xlsx")
head(dat)

set.seed(123)
dat$unprot <- dat$unprot + rnorm(30, mean = 0, sd = 0.5)
dat$prot   <- dat$prot + rnorm(30, mean = 0, sd = 0.5)

(dati <- dat[,1:3] %>%
  pivot_longer(!year, names_to = "status", values_to = "count"))
dati$obs <- rep(c("phase1", "phase2"),each=30)
head(dati)

ggplot(dati, aes(x=year, y=count, color=status)) +
  geom_point()+
  geom_line() +
  geom_smooth(method=lm)

mod.wrong <- lm(count ~ year * status, data=dati)
summary(mod.wrong)
resi <- residuals(mod.wrong)[seq(1,60,2)]
acf(resi)
plot(resi, type="o")
abline(h=0)

resi <- residuals(mod.wrong)[seq(2,60,2)]
acf(resi)
plot(resi, type="o")
abline(h=0)

ggplot(dati, aes(x=year, y=count, color=status)) +
  geom_point()+
  facet_grid(.~obs, labeller = "label_both") +
  geom_smooth(method=lm)

head(dati)
mod0 <- lm(count ~ year + status + obs + 
                   year : status + year : obs + obs : status +
                   year : status : obs, data=dati)

mod0 <- lm(count ~ year * status * obs , data=dati)

mod1 <- lm(count ~ year + status +  obs + 
                   year : status + year : obs + obs : status, data=dati)
anova(mod0, mod1)
summary(mod0)

# model predictions
pred   <- expand.grid(year   = c(min(dati$year),0, max(dati$year)),
                      status = factor(dati$status)[1:2],
                      obs    = rep(c("phase1", "phase2"), each=1))
pred

(pred$count <- predict(mod0, pred))
# Faktoren beinflussen den Intercept
#
# intercept for status=prot & obs=phase1
coef(mod0)[1]
#intercept for status=prot & obs=phase2
coef(mod0)[1] + coef(mod0)[4]

#intercept for status=unprot & obs=phase1
coef(mod0)[1] + coef(mod0)[3]
#intercept for status=unprot & obs=phase2
coef(mod0)[1] + coef(mod0)[3] + coef(mod0)[4] + coef(mod0)[7]

# understanding the estimates: the intercepts
ggplot(dati,aes(x=year,y=count,color=status))+geom_point()+
  facet_grid(.~obs, labeller = "label_both")+
  stat_smooth(method="lm",se=TRUE)+
  geom_linerange(data=pred[c(2,5),],aes(ymin=count[1],ymax=count[2]),
                 size=2,color="orange")+
  geom_text(data=pred[2,],aes(label="F12"),vjust=4,color="black")+
  geom_linerange(data=pred[c(8,11),],aes(ymin=count[1],ymax=count[2]),
                 size=2,color="orange") +
  geom_text(data=pred[8,],aes(label="F12+\nF12:F22"),vjust=2,color="black") +
  geom_point(data=pred[pred$count==0,],color="black")

#understanding the estimates: the slopes
#slope for status=prot & obs=phase1
coef(mod0)[2] 
#slope for status=prot & obs=phase2
coef(mod0)[2] + coef(mod0)[6] 

#slope for status=unprot & obs=phase1
coef(mod0)[2] + coef(mod0)[5] 

#slope for status=unprot & obs=phase2
coef(mod0)[2] + coef(mod0)[5] + coef(mod0)[6] + coef(mod0)[8] 

mod0 <- lm(count ~ year + status +  obs + 
             year : status + year : obs + obs : status +
             year : status :  obs, data=dati)

dat.phase1 <- dati %>%
  filter(obs=="phase1")
mod.phase1 <- lm(count ~ year * status , data=dat.phase1)

dat.phase2 <- dati %>%
  filter(obs=="phase2")
mod.phase2 <- lm(count ~ year * status , data=dat.phase2)

library(emmeans)
emtrends(mod.phase1, pairwise ~ status, var = "year")
emtrends(mod.phase2, pairwise ~ status, var = "year")

confint(mod0)
# -> phase 2 - unprotected: slope not stat. significant different from 0 

resi <- residuals(mod0)

frame        <- data.frame(resi=resi, dati)
frame.phase1 <- frame[1:30,]  # residuals for phase 1

# residuals for phase 1 and status unprotected
resi1 <- frame.phase1$resi[frame.phase1$status=="unprot"]
acf(resi1)
pacf(resi1)

#  take the residuals from phase 1 and plot versus all factors 
boxplot(frame.phase1$resi ~ frame.phase1$status)
boxplot(frame.phase1$resi ~ frame.phase1$obs)

library("strucchange")
# testing for a change point in the "protected" data
count.prot <- ts(dat$prot, start=1, frequency=1)
year.prot  <- time(count.prot)
fs <- Fstats(count.prot ~ year.prot)
plot(fs)
sctest(fs)

(bp <- breakpoints(count.prot ~ year.prot))
confint(bp)
## inspect fitted coefficients in both segments
coef(bp)
(seg <- breakfactor(bp))

seglm1 <- lm(count.prot ~ seg*year.prot)          # baseline testing
seglm2 <- lm(count.prot ~ seg/year.prot)          # baseline testing
seglm3 <- lm(count.prot ~ -1 + seg/year.prot)     # no baseline testing
summary(seglm1)
summary(seglm2)
summary(seglm3)

resi <- residuals(seglm1)
acf(resi)

## visualization
par(mfrow=c(1,1))
plot(count.prot, col = "black", lwd = 2, ylab="count")
lines(fitted(bp))
lines(confint(bp))

#### unprotected
count.unprot <- ts(dat$unprot, start=1, frequency=1)
year.unprot  <- time(count.unprot)
fs <- Fstats(count.unprot ~ year.unprot)
plot(fs)
sctest(fs)

(bp <- breakpoints(count.unprot ~ year.unprot, breaks=1))
confint(bp)
## inspect fitted coefficients in both segments
coef(bp)
(seg <- breakfactor(bp))

seglm <- lm(count.unprot ~ as.numeric(year.unprot) * seg )    # baseline 
summary(seglm)
resi <- residuals(seglm)
acf(resi)

par(mfrow=c(1,1))
plot(count.unprot, col = "black", lwd = 2, ylab="count")
lines(fitted(bp))
lines(confint(bp))

library(Rmisc)
(RMSE_reg <- rmse(as.numeric(count.unprot), fitted(seglm)))

## segmented regression as alternative to strucchange
library(segmented)
ggplot() + geom_point(aes(x=year.unprot, y=count.unprot))
o    <- lm(count.unprot ~ year.unprot)
o.seg<- segmented(o, seg.Z=~year.unprot, psi=10)

#o    <- lm(count.prot ~ year.prot)
#summary(o)
#o.seg<- segmented(o, seg.Z=~year.prot, psi=15)

summary(o.seg)
confint(o.seg)
# get the fitted data
my.fitted <- fitted(o.seg)
(RMSE_segreg <- rmse(as.numeric(count.unprot), my.fitted))


# what if this is not regression, but a shift in the mean?
fs <- Fstats(count.unprot ~ 1)
plot(fs)
sctest(fs)

(bp <- breakpoints(count.unprot ~ 1))
confint(bp)

(bp <- breakpoints(count.unprot ~ 1, breaks=1))
confint(bp)
## inspect fitted coefficients in both segments
coef(bp)
seg <- breakfactor(bp)

seglm1 <- lm(count.unprot ~ seg/1)          # baseline testing
summary(seglm1)

plot(y=count.unprot,  x=seq(1,30, by=1))
fm0 <- lm(count.unprot ~ 1)
lines(y=fitted(fm0),  x=seq(1,30, by=1), col = 3)
lines(fitted(seglm1), x=seq(1,30, by=1), col = 4, lwd=4)
lines(fitted(seglm),  x=seq(1,30, by=1), col = 5, lwd=4)

library(Rmisc)
(RMSE_shift.in.the.mean <- rmse(as.numeric(count.unprot), fitted(seglm1)))

RMSE_shift.in.the.mean
RMSE_reg
RMSE_segreg

# play with the standard deviation & see what happens
# have fun :o)