# https://reader.elsevier.com/reader/sd/pii/S0169534720303347?token=D3409E5C3AEC22FDF92433723642EA266B85A0E70254D5949283F57771E0644BF15C0B464781550B58CFEAE81E192653&originRegion=eu-west-1&originCreation=20220118122957 # Evaluating Impact Using Time-Series Data ################################################################################ # second workshop on time-series analysis for ecologists # 14.00 Italian Time: 21 March 2022 # Ulrike Obertegger, ulrike.obertegger@fmach.it ################################################################################ rm(list=ls()) library(readxl) library(strucchange) library(tidyverse) library(ggplot2) setwd("/Users/uliobertegger/Documents/Eigene Dateien/nach 2010/Kongresse-Tagungen/2022/SPF workshop") dat <- read_excel("test.xlsx") head(dat) set.seed(123) dat$unprot <- dat$unprot + rnorm(30, mean = 0, sd = 0.5) dat$prot <- dat$prot + rnorm(30, mean = 0, sd = 0.5) (dati <- dat[,1:3] %>% pivot_longer(!year, names_to = "status", values_to = "count")) dati$obs <- rep(c("phase1", "phase2"),each=30) head(dati) ggplot(dati, aes(x=year, y=count, color=status)) + geom_point()+ geom_line() + geom_smooth(method=lm) mod.wrong <- lm(count ~ year * status, data=dati) summary(mod.wrong) resi <- residuals(mod.wrong)[seq(1,60,2)] acf(resi) plot(resi, type="o") abline(h=0) resi <- residuals(mod.wrong)[seq(2,60,2)] acf(resi) plot(resi, type="o") abline(h=0) ggplot(dati, aes(x=year, y=count, color=status)) + geom_point()+ facet_grid(.~obs, labeller = "label_both") + geom_smooth(method=lm) head(dati) mod0 <- lm(count ~ year + status + obs + year : status + year : obs + obs : status + year : status : obs, data=dati) mod0 <- lm(count ~ year * status * obs , data=dati) mod1 <- lm(count ~ year + status + obs + year : status + year : obs + obs : status, data=dati) anova(mod0, mod1) summary(mod0) # model predictions pred <- expand.grid(year = c(min(dati$year),0, max(dati$year)), status = factor(dati$status)[1:2], obs = rep(c("phase1", "phase2"), each=1)) pred (pred$count <- predict(mod0, pred)) # Faktoren beinflussen den Intercept # # intercept for status=prot & obs=phase1 coef(mod0)[1] #intercept for status=prot & obs=phase2 coef(mod0)[1] + coef(mod0)[4] #intercept for status=unprot & obs=phase1 coef(mod0)[1] + coef(mod0)[3] #intercept for status=unprot & obs=phase2 coef(mod0)[1] + coef(mod0)[3] + coef(mod0)[4] + coef(mod0)[7] # understanding the estimates: the intercepts ggplot(dati,aes(x=year,y=count,color=status))+geom_point()+ facet_grid(.~obs, labeller = "label_both")+ stat_smooth(method="lm",se=TRUE)+ geom_linerange(data=pred[c(2,5),],aes(ymin=count[1],ymax=count[2]), size=2,color="orange")+ geom_text(data=pred[2,],aes(label="F12"),vjust=4,color="black")+ geom_linerange(data=pred[c(8,11),],aes(ymin=count[1],ymax=count[2]), size=2,color="orange") + geom_text(data=pred[8,],aes(label="F12+\nF12:F22"),vjust=2,color="black") + geom_point(data=pred[pred$count==0,],color="black") #understanding the estimates: the slopes #slope for status=prot & obs=phase1 coef(mod0)[2] #slope for status=prot & obs=phase2 coef(mod0)[2] + coef(mod0)[6] #slope for status=unprot & obs=phase1 coef(mod0)[2] + coef(mod0)[5] #slope for status=unprot & obs=phase2 coef(mod0)[2] + coef(mod0)[5] + coef(mod0)[6] + coef(mod0)[8] mod0 <- lm(count ~ year + status + obs + year : status + year : obs + obs : status + year : status : obs, data=dati) dat.phase1 <- dati %>% filter(obs=="phase1") mod.phase1 <- lm(count ~ year * status , data=dat.phase1) dat.phase2 <- dati %>% filter(obs=="phase2") mod.phase2 <- lm(count ~ year * status , data=dat.phase2) library(emmeans) emtrends(mod.phase1, pairwise ~ status, var = "year") emtrends(mod.phase2, pairwise ~ status, var = "year") confint(mod0) # -> phase 2 - unprotected: slope not stat. significant different from 0 resi <- residuals(mod0) frame <- data.frame(resi=resi, dati) frame.phase1 <- frame[1:30,] # residuals for phase 1 # residuals for phase 1 and status unprotected resi1 <- frame.phase1$resi[frame.phase1$status=="unprot"] acf(resi1) pacf(resi1) # take the residuals from phase 1 and plot versus all factors boxplot(frame.phase1$resi ~ frame.phase1$status) boxplot(frame.phase1$resi ~ frame.phase1$obs) library("strucchange") # testing for a change point in the "protected" data count.prot <- ts(dat$prot, start=1, frequency=1) year.prot <- time(count.prot) fs <- Fstats(count.prot ~ year.prot) plot(fs) sctest(fs) (bp <- breakpoints(count.prot ~ year.prot)) confint(bp) ## inspect fitted coefficients in both segments coef(bp) (seg <- breakfactor(bp)) seglm1 <- lm(count.prot ~ seg*year.prot) # baseline testing seglm2 <- lm(count.prot ~ seg/year.prot) # baseline testing seglm3 <- lm(count.prot ~ -1 + seg/year.prot) # no baseline testing summary(seglm1) summary(seglm2) summary(seglm3) resi <- residuals(seglm1) acf(resi) ## visualization par(mfrow=c(1,1)) plot(count.prot, col = "black", lwd = 2, ylab="count") lines(fitted(bp)) lines(confint(bp)) #### unprotected count.unprot <- ts(dat$unprot, start=1, frequency=1) year.unprot <- time(count.unprot) fs <- Fstats(count.unprot ~ year.unprot) plot(fs) sctest(fs) (bp <- breakpoints(count.unprot ~ year.unprot, breaks=1)) confint(bp) ## inspect fitted coefficients in both segments coef(bp) (seg <- breakfactor(bp)) seglm <- lm(count.unprot ~ as.numeric(year.unprot) * seg ) # baseline summary(seglm) resi <- residuals(seglm) acf(resi) par(mfrow=c(1,1)) plot(count.unprot, col = "black", lwd = 2, ylab="count") lines(fitted(bp)) lines(confint(bp)) library(Rmisc) (RMSE_reg <- rmse(as.numeric(count.unprot), fitted(seglm))) ## segmented regression as alternative to strucchange library(segmented) ggplot() + geom_point(aes(x=year.unprot, y=count.unprot)) o <- lm(count.unprot ~ year.unprot) o.seg<- segmented(o, seg.Z=~year.unprot, psi=10) #o <- lm(count.prot ~ year.prot) #summary(o) #o.seg<- segmented(o, seg.Z=~year.prot, psi=15) summary(o.seg) confint(o.seg) # get the fitted data my.fitted <- fitted(o.seg) (RMSE_segreg <- rmse(as.numeric(count.unprot), my.fitted)) # what if this is not regression, but a shift in the mean? fs <- Fstats(count.unprot ~ 1) plot(fs) sctest(fs) (bp <- breakpoints(count.unprot ~ 1)) confint(bp) (bp <- breakpoints(count.unprot ~ 1, breaks=1)) confint(bp) ## inspect fitted coefficients in both segments coef(bp) seg <- breakfactor(bp) seglm1 <- lm(count.unprot ~ seg/1) # baseline testing summary(seglm1) plot(y=count.unprot, x=seq(1,30, by=1)) fm0 <- lm(count.unprot ~ 1) lines(y=fitted(fm0), x=seq(1,30, by=1), col = 3) lines(fitted(seglm1), x=seq(1,30, by=1), col = 4, lwd=4) lines(fitted(seglm), x=seq(1,30, by=1), col = 5, lwd=4) library(Rmisc) (RMSE_shift.in.the.mean <- rmse(as.numeric(count.unprot), fitted(seglm1))) RMSE_shift.in.the.mean RMSE_reg RMSE_segreg # play with the standard deviation & see what happens # have fun :o)