From 1c372c4cdc70fa126b1491ad02c1b936af5e0220 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 09:16:18 +0100
Subject: [PATCH 001/176] Add files via upload
Addition of 'utils' function and a new POST_FATE function for habitat validation in RFate
---
POST_FATE.validation_habitat.R | 169 +++++++++++++++++++++
UTILS.do_habitat_validation.R | 263 +++++++++++++++++++++++++++++++++
UTILS.plot_predicted_habitat.R | 137 +++++++++++++++++
UTILS.train_RF_habitat.R | 221 +++++++++++++++++++++++++++
4 files changed, 790 insertions(+)
create mode 100644 POST_FATE.validation_habitat.R
create mode 100644 UTILS.do_habitat_validation.R
create mode 100644 UTILS.plot_predicted_habitat.R
create mode 100644 UTILS.train_RF_habitat.R
diff --git a/POST_FATE.validation_habitat.R b/POST_FATE.validation_habitat.R
new file mode 100644
index 0000000..75e8b5a
--- /dev/null
+++ b/POST_FATE.validation_habitat.R
@@ -0,0 +1,169 @@
+### HEADER #####################################################################
+##'
+##' @title Compute habitat performance and create a prediction plot of habitat
+##' for a whole map of a \code{FATE} simulation.
+##'
+##' @name POST_FATE.validation.habitat
+##'
+##' @author Matthieu ... & Maxime Delprat
+##'
+##' @description This script compare habitat simulations and observations and
+##' create a map to visualize this comparison with all the the \code{FATE} and
+##' observed data.
+##'
+##' @param name.simulation simulation folder name.
+##' @param sim.version name of the simulation we want to validate (it works with
+##' only one sim.version).
+##' @param obs.path the function needs observed data, please create a folder for them in your
+##' simulation folder and then indicate in this parmeter the access path to this folder.
+##' @param releves.PFG name of file which contain the observed Braund-Blanquet abundance at each site
+##' and each PFG and strata (with extension).
+##' @param releves.site name of the file which contain coordinates and a description of
+##' the habitat associated with the dominant species of each site in the studied map (with extension).
+##' @param hab.obs name of the file which contain the extended studied map in the simulation (with extension).
+##' @param habitat.FATE.map name of the file which contain the restricted studied map in the simulation (with extension).
+##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation.
+##'
+##' @details
+##'
+##' The observed habitat is derived from the cesbio map, the simulated habitat
+##' is derived from FATE simulated relative abundance, based on a random forest
+##' algorithm trained on CBNA data. To compare observations and simulations, the function
+##' compute confusion matrix between observation and prediction and then compute the TSS
+##' for each habitat h (number of prediction of habitat h/number of observation
+##' of habitat h + number of non-prediction of habitat h/number of non-observation
+##' of habitat h). The final metrics this script use is the mean of TSS per habitat over all
+##' habitats, weighted by the share of each habitat in the observed habitat distribution.
+##'
+##' @return
+##'
+##' Two folders are created in name.simulation folder :
+##' \describe{
+##' \item{\file{VALIDATION/HABITAT/sim.version}}{containing the prepared CBNA data,
+##' RF model, the performance analyzes (confusion matrix and TSS) for the training and
+##' testing parts of the RF model, the habitat performance file, the habitat prediction file with
+##' observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
+##' \item{\file{DATA_OBS}}{maps of observed habitat and csv files of PFG and sites releves.}
+##' }
+##'
+### END OF HEADER ##############################################################
+
+
+POST_FATE.validation_habitat = function(name.simulation
+ , sim.version
+ , obs.path
+ , releves.PFG
+ , releves.sites
+ , hab.obs
+ , habitat.FATE.map
+ , validation.mask)
+{
+
+ ## LIBRARIES
+ require(data.table)
+ require(raster)
+ require(RFate)
+ require(reshape2)
+ require(stringr)
+ require(foreign)
+ require(stringr)
+ require(dplyr)
+ require(sp)
+ options("rgdal_show_exportToProj4_warnings"="none")
+ require(rgdal)
+ require(randomForest)
+ require(ggplot2)
+ require(ggradar)
+ require(tidyverse)
+ require(ggpubr)
+ require(gridExtra)
+ require(vegan)
+ require(parallel)
+ require(scales)
+ require(class)
+ require(caret)
+ require(sampling)
+ require(tidyselect)
+ require(grid)
+ require(gtable)
+ require(scales)
+ require(cowplot)
+ require(sf)
+ require(visNetwork)
+ require(foreach)
+ require(doParallel)
+ require(prettyR)
+ require(vcd)
+
+ ## GLOBAL PARAMETERS
+
+ # Create directories
+ dir.create(paste0(name.simulation, "/VALIDATION"), recursive = TRUE)
+ dir.create(paste0(name.simulation, "/VALIDATION/HABITAT"), recursive = TRUE)
+ dir.create(paste0(name.simulation, "/VALIDATION/HABITAT/", sim.version), recursive = TRUE)
+
+ # General
+ output.path = paste0(name.simulation, "/VALIDATION")
+
+ # Useful elements to extract from the simulation
+ simulation.map=raster(paste0(name.simulation,"/DATA/MASK/MASK_Champsaur.tif"))
+
+ # For habitat validation
+ # CBNA releves data habitat map
+ releves.PFG<-read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
+ releves.sites<-st_read(paste0(obs.path, releves.sites))
+ hab.obs<-raster(paste0(obs.path, hab.obs))
+ # Habitat mask at FATE simu resolution
+ # hab.obs.modif<-projectRaster(from = hab.obs, to = simulation.map, res = res(hab.obs)[1], crs = crs(projection(simulation.mask)))
+ # habitat.FATE.map<-crop(hab.obs.modif, simulation.map)
+ habitat.FATE.map<-raster(paste0(obs.path, habitat.FATE.map))
+ validation.mask<-raster(paste0(obs.path, validation.mask))
+
+ # Provide a color df
+ col.df<-data.frame(
+ habitat=c("agricultural.grassland","coniferous.forest","deciduous.forest","natural.grassland","woody.heatland"),
+ failure=c("yellow","blueviolet","aquamarine","chartreuse1","lightsalmon"),
+ success=c("darkorange1","blue4","aquamarine3","chartreuse3","firebrick4"))
+
+ # Other
+ studied.habitat=c("coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","agricultural.grassland")
+ RF.param = list(
+ share.training=0.7,
+ ntree=500)
+ predict.all.map<-T
+
+ ## TRAIN A RF ON CBNA DATA
+
+ RF.model <- train.RF.habitat(releves.PFG = releves.PFG
+ , releves.sites = releves.sites
+ , hab.obs = hab.obs
+ , external.training.mask = NULL
+ , studied.habitat = studied.habitat
+ , RF.param = RF.param
+ , output.path = output.path
+ , perStrata = F
+ , sim.version = sim.version)
+
+ ## USE THE RF MODEL TO VALIDATE OUTPUT
+
+ habitats.results <- do.habitat.validation(output.path = output.path
+ , RF.model = RF.model
+ , habitat.FATE.map = habitat.FATE.map
+ , validation.mask = validation.mask
+ , simulation.map = simulation.map
+ , predict.all.map = predict.all.map
+ , sim.version = sim.version
+ , name.simulation = name.simulation
+ , perStrata = F)
+
+ ## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
+
+ prediction.map <- plot.predicted.habitat(predicted.habitat = habitats.results
+ , col.df = col.df
+ , simulation.map = simulation.map
+ , output.path = output.path
+ , sim.version = sim.version)
+ return(prediction.map)
+
+}
+
diff --git a/UTILS.do_habitat_validation.R b/UTILS.do_habitat_validation.R
new file mode 100644
index 0000000..6a6c07f
--- /dev/null
+++ b/UTILS.do_habitat_validation.R
@@ -0,0 +1,263 @@
+### HEADER #####################################################################
+##'
+##' @title Compare observed and simulated habitat of a \code{FATE} simulation
+##' at the last simulation year.
+##'
+##' @name do.habitat.validation
+##'
+##' @author Matthieu ... & Maxime Delprat
+##'
+##' @description To compare observations and simulations, this function compute
+##' confusion matrix between observation and prediction and then compute the TSS
+##' for each habitat.
+##'
+##' @param output.path access path to the for the folder where output files
+##' will be created.
+##' @param RF.model random forest model trained on CBNA data (train.RF.habitat
+##' function)
+##' @param habitat.FATE.map a raster map of the observed habitat in the
+##' studied area.
+##' @param validation.mask a raster mask that specified which pixels need validation.
+##' @param simulation.map a raster map of the whole studied area use to check
+##' the consistency between simulation map and the observed habitat map.
+##' @param predict.all.map a TRUE/FALSE vector. If TRUE, the script will predict
+##' habitat for the whole map.
+##' @param sim.version name of the simulation we want to validate.
+##' @param name.simulation simulation folder name.
+##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+##' by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
+##'
+##' @details
+##'
+##' After several preliminary checks, the function is going to prepare the observations
+##' database by extracting the observed habitat from a raster map. Then, for each
+##' simulations (sim.version), the script take the evolution abundance for each PFG
+##' and all strata file and predict the habitat for the whole map (if option selected)
+##' thanks to the RF model.Finally, the function compute habitat performance based on
+##' TSS for each habitat.
+##'
+##' @return
+##'
+##' Habitat performance file
+##' If option selected, the function returns an habitat prediction file with
+##' observed and simulated habitat for each pixel of the whole map.
+##'
+### END OF HEADER ##############################################################
+
+
+do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata) {
+
+ #notes
+ # we prepare the relevé data in this function, but in fact we could provide them directly if we adjust the code
+
+ ###########################
+ #I. Preliminary checks
+ ###########################
+
+ #check if strata definition used in the RF model is the same as the one used to analyze FATE output
+ if(perStrata==F){
+ list.strata<-"all"
+ }else{
+ stop("check 'perStrata' parameter and/or the names of strata in param$list.strata.releves & param$list.strata.simul")
+ }
+
+ #initial consistency between habitat.FATE.map and validation.mask (do it before the adjustement of habitat.FATE.map)
+ if(!compareCRS(habitat.FATE.map,validation.mask) | !all(res(habitat.FATE.map)==res(validation.mask))){
+ stop("please provide rasters with same crs and resolution for habitat.FATE.map and validation.mask")
+ }
+
+ #consistency between habitat.FATE.map and simulation.map
+ if(!compareCRS(simulation.map,habitat.FATE.map)){
+ print("reprojecting habitat.FATE.map to match simulation.map crs")
+ habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
+ }
+ if(!all(res(habitat.FATE.map)==res(simulation.map))){
+ stop("provide habitat.FATE.map with same resolution as simulation.map")
+ }
+ if(extent(simulation.map)!=extent(habitat.FATE.map)){
+ print("cropping habitat.FATE.map to match simulation.map")
+ habitat.FATE.map<-crop(x=habitat.FATE.map,y=simulation.map)
+ }
+ if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
+ print("setting origin habitat.FATE.map to match simulation.map")
+ origin(habitat.FATE.map)<-origin(simulation.map)
+ }
+ if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
+ stop("habitat.FATE.map could not be coerced to match simulation.map")
+ }else{
+ print("simulation.map & habitat.FATE.map are (now) consistent")
+ }
+
+ #adjust validation.mask accordingly
+ if(!all(res(habitat.FATE.map)==res(validation.mask))){
+ validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
+ }
+ if(extent(validation.mask)!=extent(habitat.FATE.map)){
+ validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
+ }
+ if(!compareRaster(validation.mask,habitat.FATE.map)){
+ stop("error in correcting validation.mask to match habitat.FATE.map")
+ }else{
+ print("validation.mask is (now) consistent with (modified) habitat.FATE.map")
+ }
+
+ #check consistency for PFG & strata classes between FATE output vs the RF model
+
+ RF.predictors<-rownames(RF.model$importance)
+ RF.PFG<-unique(str_sub(RF.predictors,1,2))
+
+ FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7)
+
+ if(length(setdiff(FATE.PFG,RF.PFG))>0|length(setdiff(RF.PFG,FATE.PFG))>0){
+ stop("The PFG used to train the RF algorithm are not the same as the PFG used to run FATE.")
+ }
+
+
+ #########################################################################################
+ #II. Prepare database for FATE habitat
+ #########################################################################################
+
+ #index of the pixels in the simulation area
+ in.region.pixels<-which(getValues(simulation.map)==1)
+
+ #habitat df for the whole simulation area
+ habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
+ habitat.whole.area.df<-habitat.whole.area.df[in.region.pixels,]
+ habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(habitat.FATE.map)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
+ habitat.whole.area.df<-filter(habitat.whole.area.df,is.element(habitat,RF.model$classes))
+
+ print(cat("Habitat considered in the prediction exercise: ",c(unique(habitat.whole.area.df$habitat)),"\n",sep="\t"))
+
+ print("Habitat in the simulation area:")
+ table(habitat.whole.area.df$habitat,useNA="always")
+
+ print("Habitat in the subpart of the simulation area used for validation:")
+ table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation==1],useNA="always")
+
+ ##############################
+ # III. Loop on simulations
+ #########################
+
+ print("processing simulations")
+
+ registerDoParallel(detectCores()-2)
+ results.simul <- foreach(i=1:length(sim.version),.packages = c("dplyr","forcats","reshape2","randomForest","vcd","caret")) %dopar%{
+
+ ########################"
+ # III.1. Data preparation
+ #########################
+
+ #get simulated abundance per pixel*strata*PFG for pixels in the simulation area
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,-c(3:44)]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+
+ #aggregate per strata group with the correspondance provided in input
+ simu_PFG$new.strata<-NA
+
+ #attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
+ if(perStrata==F){
+ simu_PFG$new.strata<-"A"
+ }
+
+ simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
+
+ #aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
+ simu_PFG<-aggregate(abs~pixel+strata+PFG,data=simu_PFG,FUN="sum")
+
+ #transform absolute abundance into relative abundance (no pb if all combination PFG*strata are not present, since then the value is 0!)
+ simu_PFG<-simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.abundance= round(prop.table(abs),digits=2)) #those are proportions, not percentages
+ simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
+ simu_PFG<-as.data.frame(simu_PFG)
+
+ #drop the absolute abundance
+ simu_PFG$abs<-NULL
+
+ #set a factor structure
+ simu_PFG$PFG<-as.factor(simu_PFG$PFG)
+ simu_PFG$strata<-as.factor(simu_PFG$strata)
+
+ #correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
+ simu_PFG$PFG<-fct_expand(simu_PFG$PFG,RF.PFG)
+ simu_PFG$strata<-fct_expand(simu_PFG$strata,list.strata)
+
+ #cast
+ simu_PFG<-dcast(simu_PFG,pixel~PFG*strata,value.var=c("relative.abundance"),fill=0,drop=F)
+
+ #merge PFG info and habitat + transform habitat into factor
+
+ #here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
+ data.FATE.PFG.habitat<-merge(simu_PFG,habitat.whole.area.df,by="pixel") #at this stage we have all the pixels in the simulation area
+ data.FATE.PFG.habitat$habitat<-factor(data.FATE.PFG.habitat$habitat,levels=RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
+
+ ############################
+ # III.2. Prediction of habitat with the RF algorithm
+ #################################
+
+ data.validation<-filter(data.FATE.PFG.habitat,for.validation==1)
+ x.validation<-select(data.validation,all_of(RF.predictors))
+ y.validation<-data.validation$habitat
+
+ y.validation.predicted<-predict(object=RF.model,newdata=x.validation,type="response",norm.votes=T)
+
+ ##############################
+ # III.3. Analysis of the results
+ ################################
+
+ confusion.validation<-confusionMatrix(data=y.validation.predicted,reference=fct_expand(y.validation,levels(y.validation.predicted)))
+
+ synthesis.validation<-data.frame(habitat=colnames(confusion.validation$table),sensitivity=confusion.validation$byClass[,1],specificity=confusion.validation$byClass[,2],weight=colSums(confusion.validation$table)/sum(colSums(confusion.validation$table)))
+ synthesis.validation<-synthesis.validation%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
+
+ aggregate.TSS.validation<-round(sum(synthesis.validation$weight*synthesis.validation$TSS,na.rm=T),digits=2)
+
+ ########################
+ # III.4. Predict habitat for the whole map if option selected (do it only for a small number of simulations)
+ ############################################
+
+ if(predict.all.map==T){
+
+ y.all.map.predicted = predict(object=RF.model,newdata=select(data.FATE.PFG.habitat,all_of(RF.predictors)),type="response",norm.votes=T)
+ y.all.map.predicted = as.data.frame(y.all.map.predicted)
+ y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
+ colnames(y.all.map.predicted) = c(sim.version, "pixel")
+
+ }else{
+ y.all.map.predicted<-NULL
+ }
+
+ #prepare outputs
+
+ output.validation<-c(synthesis.validation$TSS,aggregate.TSS.validation)
+ names(output.validation)<-c(synthesis.validation$habitat,"aggregated")
+
+ output<-list(output.validation,y.all.map.predicted)
+ names(output)<-c("output.validation","y.all.map.predicted")
+
+ return(output)
+ }
+ #end of the loop on simulations
+
+ #deal with the results regarding model performance
+ habitat.performance<-as.data.frame(matrix(unlist(lapply(results.simul,"[[",1)),ncol=length(RF.model$classes)+1,byrow=T))
+ names(habitat.performance)<-c(RF.model$classes,"weighted")
+ habitat.performance$simulation<-sim.version
+
+ #save
+ write.csv(habitat.performance,paste0(output.path,"/HABITAT/", sim.version, "/performance.habitat.csv"),row.names=F)
+
+ print("habitat performance saved")
+
+ #deal with the results regarding habitat prediction over the whole map
+ all.map.prediction = results.simul[[1]]$y.all.map.predicted
+ all.map.prediction = merge(all.map.prediction, select(habitat.whole.area.df, c(pixel,habitat)), by = "pixel")
+ all.map.prediction = rename(all.map.prediction,"true.habitat"="habitat")
+
+ #save
+ write.csv(all.map.prediction,paste0(output.path,"/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names=F)
+
+ #return results
+ return(all.map.prediction)
+
+}
+
diff --git a/UTILS.plot_predicted_habitat.R b/UTILS.plot_predicted_habitat.R
new file mode 100644
index 0000000..a415c26
--- /dev/null
+++ b/UTILS.plot_predicted_habitat.R
@@ -0,0 +1,137 @@
+### HEADER #####################################################################
+##'
+##' @title Create a raster map of habitat prediction for a specific \code{FATE}
+##' simulation at the last simulation year.
+##'
+##' @name plot.predicted.habitat
+##'
+##' @author Matthieu ... & Maxime Delprat
+##'
+##' @description This script is designed to create a raster map of habitat prediction
+##' based on a habitat prediction file. For each pixel, the habitat failure or success value
+##' is associated to a color and then, the map is built.
+##'
+##' @param predicted habitat a csv file created by the do.habitat.validation function
+##' which contain, for each pixel of the studied map, the simulated and observed habitat.
+##' @param col.df a data frame with all the colors associated with the failure or
+##' success of each studied habitat prediction.
+##' @param simulation.map a raster map of the whole studied area.
+##' @param output.path access path to the for the folder where output files
+##' will be created.
+##' @param sim.version name of the simulation we want to validate.
+##'
+##' @details
+##'
+##' The function determine true/false prediction ('failure' if false, 'success' if true)
+##' and prepare a dataframe containing color and habitat code. Then, the script merge
+##' the prediction dataframe with the color and code habitat dataframe. Finally,
+##' the function draw a raster map and a plot of prediction habitat over it thanks
+##' to the data prepared before.
+##'
+##' @return
+##'
+##' a synthetic.prediction.png file which contain the final prediction plot.
+### END OF HEADER ##############################################################
+
+
+plot.predicted.habitat<-function(predicted.habitat
+ , col.df
+ , simulation.map
+ , output.path
+ , sim.version)
+{
+
+ #auxiliary function to compute the proportion of simulations lead to the modal prediction
+ count.habitat<-function(df){
+ index<-which(names(df)=="modal.predicted.habitat")
+ prop.simu<-sum(df[-index]==as.character(df[index]))/(length(names(df))-1)
+ return(prop.simu)
+ }
+
+ #compute modal predicted habitat and the proportion of simulations predicting this habitat (for each pixel)
+ predicted.habitat$modal.predicted.habitat<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version))),1,Mode)
+ predicted.habitat$modal.predicted.habitat[predicted.habitat$modal.predicted.habitat==">1 mode"]<-"ambiguous"
+ predicted.habitat$confidence<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version),modal.predicted.habitat)),1,FUN=function(x) count.habitat(x))
+
+
+ #true/false prediction
+ predicted.habitat$prediction.code<-"failure"
+ predicted.habitat$prediction.code[predicted.habitat$modal.predicted.habitat==predicted.habitat$true.habitat]<-"success"
+
+ #prepare a df containing color & habitat code (to facilitate conversion into raster)
+ col.df.long<-data.table::melt(data=setDT(col.df),id.vars="habitat",variable.name="prediction.code",value.name="color")
+
+ habitat.code.df<-unique(dplyr::select(predicted.habitat,c(modal.predicted.habitat,prediction.code)))
+ habitat.code.df$habitat.code<-seq(from=1,to=dim(habitat.code.df)[1],by=1)
+ habitat.code.df<-rename(habitat.code.df,"habitat"="modal.predicted.habitat")
+
+ habitat.code.df<-merge(habitat.code.df,col.df.long,by=c("habitat","prediction.code"))
+ habitat.code.df$label<-paste0(habitat.code.df$habitat," (",habitat.code.df$prediction.code,")")
+
+ #deal with out of scope habitat
+ out.of.scope<-data.frame(habitat="out.of.scope",prediction.code="",habitat.code=0,color="white",label="out of scope")
+ habitat.code.df<-rbind(habitat.code.df,out.of.scope)
+
+ habitat.code.df$label<-as.factor(habitat.code.df$label)
+
+ #order the df
+ habitat.code.df<-habitat.code.df[order(habitat.code.df$label),] #to be sure it's in te right order (useful to ensure the correspondance between label and color in the ggplot function)
+
+
+ #merge the prediction df with the df containing color and habitat code
+ predicted.habitat<-merge(predicted.habitat,habitat.code.df,by.x=c("modal.predicted.habitat","prediction.code"),by.y=c("habitat","prediction.code"))
+
+
+ #plot
+
+ #prepare raster
+ prediction.map<-raster(nrows=nrow(simulation.map),ncols=ncol(simulation.map),crs=crs(simulation.map),ext=extent(simulation.map), resolution=res(simulation.map))
+
+ prediction.map[]<-0 #initialization of the raster, corresponding to "out of scope habitats"
+ prediction.map[predicted.habitat$pixel]<-predicted.habitat$habitat.code
+
+ #ratify
+ prediction.map<-ratify(prediction.map)
+ prediction.map.rat<-levels(prediction.map)[[1]]
+ prediction.map.rat<-merge(prediction.map.rat,habitat.code.df,by.x="ID",by.y="habitat.code")
+ levels(prediction.map)<-prediction.map.rat
+
+ #save the raster
+ writeRaster(prediction.map,filename = paste0(output.path,"/HABITAT/", sim.version, "/synthetic.prediction.grd"),overwrite=T)
+
+
+ #plot on R
+ #convert into xy
+ xy.prediction<-as.data.frame(prediction.map,xy=T)
+ names(xy.prediction)<-c("x","y","habitat","prediction.code","color","label")
+ xy.prediction<-xy.prediction[complete.cases(xy.prediction),]
+
+ #plot
+ prediction.plot<-
+ ggplot(xy.prediction, aes(x=x, y=y, fill=factor(label)))+
+ geom_raster(show.legend = T) +
+ coord_equal()+
+ scale_fill_manual(values = as.character(habitat.code.df$color))+ #ok only if habitat.code.df has been ordered according to "label"
+ ggtitle(paste0("Modal prediction over ",length(sim.version)," simulations"))+
+ guides(fill=guide_legend(nrow=4,byrow=F))+
+ theme(
+ plot.title = element_text(size = 8),
+ legend.text = element_text(size = 8, colour ="black"),
+ legend.title = element_blank(),
+ legend.position = "bottom",
+ axis.title.x=element_blank(),
+ axis.text.x=element_blank(),
+ axis.ticks.x=element_blank(),
+ axis.title.y=element_blank(),
+ axis.text.y=element_blank(),
+ axis.ticks.y=element_blank()
+ )
+
+ #save the map
+ ggsave(filename="synthetic.prediction.png",plot = prediction.plot,path = paste0(output.path, "/HABITAT/", sim.version),scale = 1,dpi = 300,limitsize = F,width = 15,height = 15,units ="cm")
+
+ #return the map
+ return(prediction.plot)
+
+}
+
diff --git a/UTILS.train_RF_habitat.R b/UTILS.train_RF_habitat.R
new file mode 100644
index 0000000..8174f9a
--- /dev/null
+++ b/UTILS.train_RF_habitat.R
@@ -0,0 +1,221 @@
+### HEADER #####################################################################
+##'
+##' @title Create a random forest algorithm trained on CBNA data, in order to
+##' obtain the simulated habitat, derived from a \code{FATE} simulation.
+##'
+##' @name train.RF.habitat
+##'
+##' @author Matthieu & Maxime Delprat
+##'
+##' @description This script is designed to produce a random forest model
+##' trained on observed PFG abundance, sites releves and a map of observed
+##' habitat.
+##'
+##' @param releves.PFG a data frame with Braund-Blanquet abundance at each site
+##' and each PFG and strata.
+##' @param releves.sites a data frame with coordinates and a description of
+##' the habitat associated with the dominant species of each site in the
+##' studied map.
+##' @param hab.obs a raster map of the observed habitat in the
+##' extended studied area.
+##' @param external.training.mask default \code{NULL}. (optional) Keep only
+##' releves data in a specific area.
+##' @param studied.habitat a vector that specifies habitats that we take
+##' into account for the validation.
+##' @param RF.param a list of 2 parameters for random forest model :
+##' share.training defines the size of the trainig part of the data base.
+##' ntree is the number of trees build by the algorithm, it allows to reduce
+##' the prediction error.
+##' @param output.path access path to the for the folder where output files
+##' will be created.
+##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+##' by strata in each site. If FALSE, PFG abundance is defined for all strata.
+##' @param sim.version name of the simulation we want to validate.
+##'
+##' @details
+##'
+##' This function transform PFG Braund-Blanquet abundance in relative abundance,
+##' get habitat information from the releves map, keep only relees on interesting
+##' habitat and then builds de random forest model. Finally, the function analyzes
+##' the model performance with computation of confusion matrix and TSS for
+##' the traning and testing sample.
+##'
+##' @return
+##'
+##' 2 prepared CBNA releves files are created before the building of the random
+##' forest model in a habitat validation folder.
+##' 5 more files are created at the end of the script to save the RF model and
+##' the performance analyzes (confusion matrix and TSS) for the training and
+##' testing parts.
+##'
+### END OF HEADER ##############################################################
+
+
+train.RF.habitat<-function(releves.PFG
+ , releves.sites
+ , hab.obs
+ , external.training.mask=NULL
+ , studied.habitat
+ , RF.param
+ , output.path
+ , perStrata
+ , sim.version)
+{
+
+ #1. Compute relative abundance metric
+ #########################################
+
+ #identify sites with wrong BB values (ie values that cannot be converted by the PRE_FATE.abundBraunBlanquet function)
+ releves.PFG<-filter(releves.PFG,is.element(BB,c(NA, "NA", 0, "+", "r", 1:5)))
+
+ #transformation into coverage percentage
+ releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
+
+ if(perStrata==T){
+ aggregated.releves.PFG<-aggregate(coverage~site+PFG+strata,data=releves.PFG,FUN="sum")
+ }else if(perStrata==F){
+ aggregated.releves.PFG<-aggregate(coverage~site+PFG,data=releves.PFG,FUN="sum")
+ aggregated.releves.PFG$strata<-"A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
+ }
+
+ #transformation into a relative metric (here relative.metric is relative coverage)
+ aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG %>% group_by(site,strata) %>% mutate(relative.metric= round(prop.table(coverage),digits = 2))) #rel is proportion of total pct_cov, not percentage
+ aggregated.releves.PFG$relative.metric[is.na(aggregated.releves.PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
+ aggregated.releves.PFG$coverage<-NULL
+
+ print("releve data have been transformed into a relative metric")
+
+ #2. Cast the df
+ #######################
+
+ #transfo into factor to be sure to create all the combination when doing "dcast"
+ aggregated.releves.PFG$PFG<-as.factor(aggregated.releves.PFG$PFG)
+ aggregated.releves.PFG$strata<-as.factor(aggregated.releves.PFG$strata)
+
+ aggregated.releves.PFG<-dcast(setDT(aggregated.releves.PFG),site~PFG+strata,value.var=c("relative.metric"),fill=0,drop=F)
+
+ #3. Get habitat information
+ ###################################
+
+ #get sites coordinates
+ aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)),aggregated.releves.PFG,by="site")
+
+ #get habitat code and name
+ if(compareCRS(aggregated.releves.PFG,hab.obs)){
+ aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ }else{
+ aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(hab.obs))
+ aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ }
+
+ #correspondance habitat code/habitat name
+ table.habitat.releve<-levels(hab.obs)[[1]]
+
+ aggregated.releves.PFG<-merge(aggregated.releves.PFG,dplyr::select(table.habitat.releve,c(ID,habitat)),by.x="code.habitat",by.y="ID")
+
+ #(optional) keep only releves data in a specific area
+ if(!is.null(external.training.mask)){
+
+ if(compareCRS(aggregated.releves.PFG,external.training.mask)==F){ #as this stage it is not a problem to transform crs(aggregated.releves.PFG) since we have no more merge to do (we have already extracted habitat info from the map)
+ aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(external.training.mask))
+ }
+
+ aggregated.releves.PFG<-st_crop(x=aggregated.releves.PFG,y=external.training.mask)
+ print("'releve' map has been cropped to match 'external.training.mask'.")
+ }
+
+
+ # 4. Keep only releve on interesting habitat
+ ###################################################"
+
+ aggregated.releves.PFG<-filter(aggregated.releves.PFG,is.element(habitat,studied.habitat)) #filter non interesting habitat + NA
+
+ print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
+
+ # 5. Save data
+ #####################
+
+ st_write(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/releve.PFG.habitat.shp"),overwrite=T,append=F)
+ write.csv(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/CBNA.releves.prepared.csv"),row.names = F)
+
+ # 6. Small adjustment in data structure
+ ##########################################
+
+ aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG) #get rid of the spatial structure before entering the RF process
+ aggregated.releves.PFG$habitat<-as.factor(aggregated.releves.PFG$habitat)
+
+ # 7.Random forest
+ ######################################
+
+ #separate the database into a training and a test part
+ set.seed(123)
+
+ training.site<-sample(aggregated.releves.PFG$site,size=RF.param$share.training*length(aggregated.releves.PFG$site),replace = F)
+ releves.training<-filter(aggregated.releves.PFG,is.element(site,training.site))
+ releves.testing<-filter(aggregated.releves.PFG,!is.element(site,training.site))
+
+ #train the model (with correction for imbalances in sampling)
+
+ #run optimization algo (careful : optimization over OOB...)
+ mtry.perf<-as.data.frame(
+ tuneRF(
+ x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
+ y=releves.training$habitat,
+ strata=releves.training$habitat,
+ sampsize=min(table(releves.training$habitat)),
+ ntreeTry=RF.param$ntree,
+ stepFactor=2, improve=0.05,doBest=FALSE,plot=F,trace=F
+ )
+ )
+
+ #select mtry
+ mtry<-mtry.perf$mtry[mtry.perf$OOBError==min(mtry.perf$OOBError)][1] #the lowest n achieving minimum OOB
+
+ #run real model
+ model<- randomForest(
+ x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
+ y=releves.training$habitat,
+ xtest=select(releves.testing,-c(code.habitat,site,habitat,geometry)),
+ ytest=releves.testing$habitat,
+ strata=releves.training$habitat,
+ min(table(releves.training$habitat)),
+ ntree=RF.param$ntree,
+ mtry=mtry,
+ norm.votes=TRUE,
+ keep.forest=TRUE
+ )
+
+ #analyse model performance
+
+ # Analysis on the training sample
+
+ confusion.training<-confusionMatrix(data=model$predicted,reference=releves.training$habitat)
+
+ synthesis.training<-data.frame(habitat=colnames(confusion.training$table),sensitivity=confusion.training$byClass[,1],specificity=confusion.training$byClass[,2],weight=colSums(confusion.training$table)/sum(colSums(confusion.training$table))) #warning: prevalence is the weight of predicted habitat, not of observed habitat
+ synthesis.training<-synthesis.training%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
+
+ aggregate.TSS.training<-round(sum(synthesis.training$weight*synthesis.training$TSS),digits=2)
+
+ # Analysis on the testing sample
+
+ confusion.testing<-confusionMatrix(data=model$test$predicted,reference=releves.testing$habitat)
+
+ synthesis.testing<-data.frame(habitat=colnames(confusion.testing$table),sensitivity=confusion.testing$byClass[,1],specificity=confusion.testing$byClass[,2],weight=colSums(confusion.testing$table)/sum(colSums(confusion.testing$table)))#warning: prevalence is the weight of predicted habitat, not of observed habitat
+ synthesis.testing<-synthesis.testing%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
+
+ aggregate.TSS.testing<-round(sum(synthesis.testing$weight*synthesis.testing$TSS),digits=2)
+
+
+ # 8. Save and return output
+ #######################################"
+
+ write_rds(model,paste0(output.path,"/HABITAT/", sim.version, "/RF.model.rds"),compress="none")
+ write.csv(synthesis.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_training.csv"),row.names=F)
+ write.csv(aggregate.TSS.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_training.csv"),row.names=F)
+ write.csv(synthesis.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_testing.csv"),row.names=F)
+ write.csv(aggregate.TSS.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_testing.csv"),row.names=F)
+
+ return(model)
+
+}
+
From c56c83923592d68a6ab585fcaffd5a58c098b5cb Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 09:22:47 +0100
Subject: [PATCH 002/176] Delete POST_FATE.validation_habitat.R
---
POST_FATE.validation_habitat.R | 169 ---------------------------------
1 file changed, 169 deletions(-)
delete mode 100644 POST_FATE.validation_habitat.R
diff --git a/POST_FATE.validation_habitat.R b/POST_FATE.validation_habitat.R
deleted file mode 100644
index 75e8b5a..0000000
--- a/POST_FATE.validation_habitat.R
+++ /dev/null
@@ -1,169 +0,0 @@
-### HEADER #####################################################################
-##'
-##' @title Compute habitat performance and create a prediction plot of habitat
-##' for a whole map of a \code{FATE} simulation.
-##'
-##' @name POST_FATE.validation.habitat
-##'
-##' @author Matthieu ... & Maxime Delprat
-##'
-##' @description This script compare habitat simulations and observations and
-##' create a map to visualize this comparison with all the the \code{FATE} and
-##' observed data.
-##'
-##' @param name.simulation simulation folder name.
-##' @param sim.version name of the simulation we want to validate (it works with
-##' only one sim.version).
-##' @param obs.path the function needs observed data, please create a folder for them in your
-##' simulation folder and then indicate in this parmeter the access path to this folder.
-##' @param releves.PFG name of file which contain the observed Braund-Blanquet abundance at each site
-##' and each PFG and strata (with extension).
-##' @param releves.site name of the file which contain coordinates and a description of
-##' the habitat associated with the dominant species of each site in the studied map (with extension).
-##' @param hab.obs name of the file which contain the extended studied map in the simulation (with extension).
-##' @param habitat.FATE.map name of the file which contain the restricted studied map in the simulation (with extension).
-##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation.
-##'
-##' @details
-##'
-##' The observed habitat is derived from the cesbio map, the simulated habitat
-##' is derived from FATE simulated relative abundance, based on a random forest
-##' algorithm trained on CBNA data. To compare observations and simulations, the function
-##' compute confusion matrix between observation and prediction and then compute the TSS
-##' for each habitat h (number of prediction of habitat h/number of observation
-##' of habitat h + number of non-prediction of habitat h/number of non-observation
-##' of habitat h). The final metrics this script use is the mean of TSS per habitat over all
-##' habitats, weighted by the share of each habitat in the observed habitat distribution.
-##'
-##' @return
-##'
-##' Two folders are created in name.simulation folder :
-##' \describe{
-##' \item{\file{VALIDATION/HABITAT/sim.version}}{containing the prepared CBNA data,
-##' RF model, the performance analyzes (confusion matrix and TSS) for the training and
-##' testing parts of the RF model, the habitat performance file, the habitat prediction file with
-##' observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
-##' \item{\file{DATA_OBS}}{maps of observed habitat and csv files of PFG and sites releves.}
-##' }
-##'
-### END OF HEADER ##############################################################
-
-
-POST_FATE.validation_habitat = function(name.simulation
- , sim.version
- , obs.path
- , releves.PFG
- , releves.sites
- , hab.obs
- , habitat.FATE.map
- , validation.mask)
-{
-
- ## LIBRARIES
- require(data.table)
- require(raster)
- require(RFate)
- require(reshape2)
- require(stringr)
- require(foreign)
- require(stringr)
- require(dplyr)
- require(sp)
- options("rgdal_show_exportToProj4_warnings"="none")
- require(rgdal)
- require(randomForest)
- require(ggplot2)
- require(ggradar)
- require(tidyverse)
- require(ggpubr)
- require(gridExtra)
- require(vegan)
- require(parallel)
- require(scales)
- require(class)
- require(caret)
- require(sampling)
- require(tidyselect)
- require(grid)
- require(gtable)
- require(scales)
- require(cowplot)
- require(sf)
- require(visNetwork)
- require(foreach)
- require(doParallel)
- require(prettyR)
- require(vcd)
-
- ## GLOBAL PARAMETERS
-
- # Create directories
- dir.create(paste0(name.simulation, "/VALIDATION"), recursive = TRUE)
- dir.create(paste0(name.simulation, "/VALIDATION/HABITAT"), recursive = TRUE)
- dir.create(paste0(name.simulation, "/VALIDATION/HABITAT/", sim.version), recursive = TRUE)
-
- # General
- output.path = paste0(name.simulation, "/VALIDATION")
-
- # Useful elements to extract from the simulation
- simulation.map=raster(paste0(name.simulation,"/DATA/MASK/MASK_Champsaur.tif"))
-
- # For habitat validation
- # CBNA releves data habitat map
- releves.PFG<-read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
- releves.sites<-st_read(paste0(obs.path, releves.sites))
- hab.obs<-raster(paste0(obs.path, hab.obs))
- # Habitat mask at FATE simu resolution
- # hab.obs.modif<-projectRaster(from = hab.obs, to = simulation.map, res = res(hab.obs)[1], crs = crs(projection(simulation.mask)))
- # habitat.FATE.map<-crop(hab.obs.modif, simulation.map)
- habitat.FATE.map<-raster(paste0(obs.path, habitat.FATE.map))
- validation.mask<-raster(paste0(obs.path, validation.mask))
-
- # Provide a color df
- col.df<-data.frame(
- habitat=c("agricultural.grassland","coniferous.forest","deciduous.forest","natural.grassland","woody.heatland"),
- failure=c("yellow","blueviolet","aquamarine","chartreuse1","lightsalmon"),
- success=c("darkorange1","blue4","aquamarine3","chartreuse3","firebrick4"))
-
- # Other
- studied.habitat=c("coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","agricultural.grassland")
- RF.param = list(
- share.training=0.7,
- ntree=500)
- predict.all.map<-T
-
- ## TRAIN A RF ON CBNA DATA
-
- RF.model <- train.RF.habitat(releves.PFG = releves.PFG
- , releves.sites = releves.sites
- , hab.obs = hab.obs
- , external.training.mask = NULL
- , studied.habitat = studied.habitat
- , RF.param = RF.param
- , output.path = output.path
- , perStrata = F
- , sim.version = sim.version)
-
- ## USE THE RF MODEL TO VALIDATE OUTPUT
-
- habitats.results <- do.habitat.validation(output.path = output.path
- , RF.model = RF.model
- , habitat.FATE.map = habitat.FATE.map
- , validation.mask = validation.mask
- , simulation.map = simulation.map
- , predict.all.map = predict.all.map
- , sim.version = sim.version
- , name.simulation = name.simulation
- , perStrata = F)
-
- ## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
-
- prediction.map <- plot.predicted.habitat(predicted.habitat = habitats.results
- , col.df = col.df
- , simulation.map = simulation.map
- , output.path = output.path
- , sim.version = sim.version)
- return(prediction.map)
-
-}
-
From 8d05b710c54ac7d176b2032c24a9840e252c3841 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 09:22:56 +0100
Subject: [PATCH 003/176] Delete UTILS.do_habitat_validation.R
---
UTILS.do_habitat_validation.R | 263 ----------------------------------
1 file changed, 263 deletions(-)
delete mode 100644 UTILS.do_habitat_validation.R
diff --git a/UTILS.do_habitat_validation.R b/UTILS.do_habitat_validation.R
deleted file mode 100644
index 6a6c07f..0000000
--- a/UTILS.do_habitat_validation.R
+++ /dev/null
@@ -1,263 +0,0 @@
-### HEADER #####################################################################
-##'
-##' @title Compare observed and simulated habitat of a \code{FATE} simulation
-##' at the last simulation year.
-##'
-##' @name do.habitat.validation
-##'
-##' @author Matthieu ... & Maxime Delprat
-##'
-##' @description To compare observations and simulations, this function compute
-##' confusion matrix between observation and prediction and then compute the TSS
-##' for each habitat.
-##'
-##' @param output.path access path to the for the folder where output files
-##' will be created.
-##' @param RF.model random forest model trained on CBNA data (train.RF.habitat
-##' function)
-##' @param habitat.FATE.map a raster map of the observed habitat in the
-##' studied area.
-##' @param validation.mask a raster mask that specified which pixels need validation.
-##' @param simulation.map a raster map of the whole studied area use to check
-##' the consistency between simulation map and the observed habitat map.
-##' @param predict.all.map a TRUE/FALSE vector. If TRUE, the script will predict
-##' habitat for the whole map.
-##' @param sim.version name of the simulation we want to validate.
-##' @param name.simulation simulation folder name.
-##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
-##' by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
-##'
-##' @details
-##'
-##' After several preliminary checks, the function is going to prepare the observations
-##' database by extracting the observed habitat from a raster map. Then, for each
-##' simulations (sim.version), the script take the evolution abundance for each PFG
-##' and all strata file and predict the habitat for the whole map (if option selected)
-##' thanks to the RF model.Finally, the function compute habitat performance based on
-##' TSS for each habitat.
-##'
-##' @return
-##'
-##' Habitat performance file
-##' If option selected, the function returns an habitat prediction file with
-##' observed and simulated habitat for each pixel of the whole map.
-##'
-### END OF HEADER ##############################################################
-
-
-do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata) {
-
- #notes
- # we prepare the relevé data in this function, but in fact we could provide them directly if we adjust the code
-
- ###########################
- #I. Preliminary checks
- ###########################
-
- #check if strata definition used in the RF model is the same as the one used to analyze FATE output
- if(perStrata==F){
- list.strata<-"all"
- }else{
- stop("check 'perStrata' parameter and/or the names of strata in param$list.strata.releves & param$list.strata.simul")
- }
-
- #initial consistency between habitat.FATE.map and validation.mask (do it before the adjustement of habitat.FATE.map)
- if(!compareCRS(habitat.FATE.map,validation.mask) | !all(res(habitat.FATE.map)==res(validation.mask))){
- stop("please provide rasters with same crs and resolution for habitat.FATE.map and validation.mask")
- }
-
- #consistency between habitat.FATE.map and simulation.map
- if(!compareCRS(simulation.map,habitat.FATE.map)){
- print("reprojecting habitat.FATE.map to match simulation.map crs")
- habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
- }
- if(!all(res(habitat.FATE.map)==res(simulation.map))){
- stop("provide habitat.FATE.map with same resolution as simulation.map")
- }
- if(extent(simulation.map)!=extent(habitat.FATE.map)){
- print("cropping habitat.FATE.map to match simulation.map")
- habitat.FATE.map<-crop(x=habitat.FATE.map,y=simulation.map)
- }
- if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
- print("setting origin habitat.FATE.map to match simulation.map")
- origin(habitat.FATE.map)<-origin(simulation.map)
- }
- if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
- stop("habitat.FATE.map could not be coerced to match simulation.map")
- }else{
- print("simulation.map & habitat.FATE.map are (now) consistent")
- }
-
- #adjust validation.mask accordingly
- if(!all(res(habitat.FATE.map)==res(validation.mask))){
- validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
- }
- if(extent(validation.mask)!=extent(habitat.FATE.map)){
- validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
- }
- if(!compareRaster(validation.mask,habitat.FATE.map)){
- stop("error in correcting validation.mask to match habitat.FATE.map")
- }else{
- print("validation.mask is (now) consistent with (modified) habitat.FATE.map")
- }
-
- #check consistency for PFG & strata classes between FATE output vs the RF model
-
- RF.predictors<-rownames(RF.model$importance)
- RF.PFG<-unique(str_sub(RF.predictors,1,2))
-
- FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7)
-
- if(length(setdiff(FATE.PFG,RF.PFG))>0|length(setdiff(RF.PFG,FATE.PFG))>0){
- stop("The PFG used to train the RF algorithm are not the same as the PFG used to run FATE.")
- }
-
-
- #########################################################################################
- #II. Prepare database for FATE habitat
- #########################################################################################
-
- #index of the pixels in the simulation area
- in.region.pixels<-which(getValues(simulation.map)==1)
-
- #habitat df for the whole simulation area
- habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
- habitat.whole.area.df<-habitat.whole.area.df[in.region.pixels,]
- habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(habitat.FATE.map)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
- habitat.whole.area.df<-filter(habitat.whole.area.df,is.element(habitat,RF.model$classes))
-
- print(cat("Habitat considered in the prediction exercise: ",c(unique(habitat.whole.area.df$habitat)),"\n",sep="\t"))
-
- print("Habitat in the simulation area:")
- table(habitat.whole.area.df$habitat,useNA="always")
-
- print("Habitat in the subpart of the simulation area used for validation:")
- table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation==1],useNA="always")
-
- ##############################
- # III. Loop on simulations
- #########################
-
- print("processing simulations")
-
- registerDoParallel(detectCores()-2)
- results.simul <- foreach(i=1:length(sim.version),.packages = c("dplyr","forcats","reshape2","randomForest","vcd","caret")) %dopar%{
-
- ########################"
- # III.1. Data preparation
- #########################
-
- #get simulated abundance per pixel*strata*PFG for pixels in the simulation area
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,-c(3:44)]
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
-
- #aggregate per strata group with the correspondance provided in input
- simu_PFG$new.strata<-NA
-
- #attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
- if(perStrata==F){
- simu_PFG$new.strata<-"A"
- }
-
- simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
-
- #aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
- simu_PFG<-aggregate(abs~pixel+strata+PFG,data=simu_PFG,FUN="sum")
-
- #transform absolute abundance into relative abundance (no pb if all combination PFG*strata are not present, since then the value is 0!)
- simu_PFG<-simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.abundance= round(prop.table(abs),digits=2)) #those are proportions, not percentages
- simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
- simu_PFG<-as.data.frame(simu_PFG)
-
- #drop the absolute abundance
- simu_PFG$abs<-NULL
-
- #set a factor structure
- simu_PFG$PFG<-as.factor(simu_PFG$PFG)
- simu_PFG$strata<-as.factor(simu_PFG$strata)
-
- #correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
- simu_PFG$PFG<-fct_expand(simu_PFG$PFG,RF.PFG)
- simu_PFG$strata<-fct_expand(simu_PFG$strata,list.strata)
-
- #cast
- simu_PFG<-dcast(simu_PFG,pixel~PFG*strata,value.var=c("relative.abundance"),fill=0,drop=F)
-
- #merge PFG info and habitat + transform habitat into factor
-
- #here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
- data.FATE.PFG.habitat<-merge(simu_PFG,habitat.whole.area.df,by="pixel") #at this stage we have all the pixels in the simulation area
- data.FATE.PFG.habitat$habitat<-factor(data.FATE.PFG.habitat$habitat,levels=RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
-
- ############################
- # III.2. Prediction of habitat with the RF algorithm
- #################################
-
- data.validation<-filter(data.FATE.PFG.habitat,for.validation==1)
- x.validation<-select(data.validation,all_of(RF.predictors))
- y.validation<-data.validation$habitat
-
- y.validation.predicted<-predict(object=RF.model,newdata=x.validation,type="response",norm.votes=T)
-
- ##############################
- # III.3. Analysis of the results
- ################################
-
- confusion.validation<-confusionMatrix(data=y.validation.predicted,reference=fct_expand(y.validation,levels(y.validation.predicted)))
-
- synthesis.validation<-data.frame(habitat=colnames(confusion.validation$table),sensitivity=confusion.validation$byClass[,1],specificity=confusion.validation$byClass[,2],weight=colSums(confusion.validation$table)/sum(colSums(confusion.validation$table)))
- synthesis.validation<-synthesis.validation%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
-
- aggregate.TSS.validation<-round(sum(synthesis.validation$weight*synthesis.validation$TSS,na.rm=T),digits=2)
-
- ########################
- # III.4. Predict habitat for the whole map if option selected (do it only for a small number of simulations)
- ############################################
-
- if(predict.all.map==T){
-
- y.all.map.predicted = predict(object=RF.model,newdata=select(data.FATE.PFG.habitat,all_of(RF.predictors)),type="response",norm.votes=T)
- y.all.map.predicted = as.data.frame(y.all.map.predicted)
- y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
- colnames(y.all.map.predicted) = c(sim.version, "pixel")
-
- }else{
- y.all.map.predicted<-NULL
- }
-
- #prepare outputs
-
- output.validation<-c(synthesis.validation$TSS,aggregate.TSS.validation)
- names(output.validation)<-c(synthesis.validation$habitat,"aggregated")
-
- output<-list(output.validation,y.all.map.predicted)
- names(output)<-c("output.validation","y.all.map.predicted")
-
- return(output)
- }
- #end of the loop on simulations
-
- #deal with the results regarding model performance
- habitat.performance<-as.data.frame(matrix(unlist(lapply(results.simul,"[[",1)),ncol=length(RF.model$classes)+1,byrow=T))
- names(habitat.performance)<-c(RF.model$classes,"weighted")
- habitat.performance$simulation<-sim.version
-
- #save
- write.csv(habitat.performance,paste0(output.path,"/HABITAT/", sim.version, "/performance.habitat.csv"),row.names=F)
-
- print("habitat performance saved")
-
- #deal with the results regarding habitat prediction over the whole map
- all.map.prediction = results.simul[[1]]$y.all.map.predicted
- all.map.prediction = merge(all.map.prediction, select(habitat.whole.area.df, c(pixel,habitat)), by = "pixel")
- all.map.prediction = rename(all.map.prediction,"true.habitat"="habitat")
-
- #save
- write.csv(all.map.prediction,paste0(output.path,"/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names=F)
-
- #return results
- return(all.map.prediction)
-
-}
-
From 74647c2188b28dba03fd968752c6f2859c280893 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 09:23:06 +0100
Subject: [PATCH 004/176] Delete UTILS.plot_predicted_habitat.R
---
UTILS.plot_predicted_habitat.R | 137 ---------------------------------
1 file changed, 137 deletions(-)
delete mode 100644 UTILS.plot_predicted_habitat.R
diff --git a/UTILS.plot_predicted_habitat.R b/UTILS.plot_predicted_habitat.R
deleted file mode 100644
index a415c26..0000000
--- a/UTILS.plot_predicted_habitat.R
+++ /dev/null
@@ -1,137 +0,0 @@
-### HEADER #####################################################################
-##'
-##' @title Create a raster map of habitat prediction for a specific \code{FATE}
-##' simulation at the last simulation year.
-##'
-##' @name plot.predicted.habitat
-##'
-##' @author Matthieu ... & Maxime Delprat
-##'
-##' @description This script is designed to create a raster map of habitat prediction
-##' based on a habitat prediction file. For each pixel, the habitat failure or success value
-##' is associated to a color and then, the map is built.
-##'
-##' @param predicted habitat a csv file created by the do.habitat.validation function
-##' which contain, for each pixel of the studied map, the simulated and observed habitat.
-##' @param col.df a data frame with all the colors associated with the failure or
-##' success of each studied habitat prediction.
-##' @param simulation.map a raster map of the whole studied area.
-##' @param output.path access path to the for the folder where output files
-##' will be created.
-##' @param sim.version name of the simulation we want to validate.
-##'
-##' @details
-##'
-##' The function determine true/false prediction ('failure' if false, 'success' if true)
-##' and prepare a dataframe containing color and habitat code. Then, the script merge
-##' the prediction dataframe with the color and code habitat dataframe. Finally,
-##' the function draw a raster map and a plot of prediction habitat over it thanks
-##' to the data prepared before.
-##'
-##' @return
-##'
-##' a synthetic.prediction.png file which contain the final prediction plot.
-### END OF HEADER ##############################################################
-
-
-plot.predicted.habitat<-function(predicted.habitat
- , col.df
- , simulation.map
- , output.path
- , sim.version)
-{
-
- #auxiliary function to compute the proportion of simulations lead to the modal prediction
- count.habitat<-function(df){
- index<-which(names(df)=="modal.predicted.habitat")
- prop.simu<-sum(df[-index]==as.character(df[index]))/(length(names(df))-1)
- return(prop.simu)
- }
-
- #compute modal predicted habitat and the proportion of simulations predicting this habitat (for each pixel)
- predicted.habitat$modal.predicted.habitat<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version))),1,Mode)
- predicted.habitat$modal.predicted.habitat[predicted.habitat$modal.predicted.habitat==">1 mode"]<-"ambiguous"
- predicted.habitat$confidence<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version),modal.predicted.habitat)),1,FUN=function(x) count.habitat(x))
-
-
- #true/false prediction
- predicted.habitat$prediction.code<-"failure"
- predicted.habitat$prediction.code[predicted.habitat$modal.predicted.habitat==predicted.habitat$true.habitat]<-"success"
-
- #prepare a df containing color & habitat code (to facilitate conversion into raster)
- col.df.long<-data.table::melt(data=setDT(col.df),id.vars="habitat",variable.name="prediction.code",value.name="color")
-
- habitat.code.df<-unique(dplyr::select(predicted.habitat,c(modal.predicted.habitat,prediction.code)))
- habitat.code.df$habitat.code<-seq(from=1,to=dim(habitat.code.df)[1],by=1)
- habitat.code.df<-rename(habitat.code.df,"habitat"="modal.predicted.habitat")
-
- habitat.code.df<-merge(habitat.code.df,col.df.long,by=c("habitat","prediction.code"))
- habitat.code.df$label<-paste0(habitat.code.df$habitat," (",habitat.code.df$prediction.code,")")
-
- #deal with out of scope habitat
- out.of.scope<-data.frame(habitat="out.of.scope",prediction.code="",habitat.code=0,color="white",label="out of scope")
- habitat.code.df<-rbind(habitat.code.df,out.of.scope)
-
- habitat.code.df$label<-as.factor(habitat.code.df$label)
-
- #order the df
- habitat.code.df<-habitat.code.df[order(habitat.code.df$label),] #to be sure it's in te right order (useful to ensure the correspondance between label and color in the ggplot function)
-
-
- #merge the prediction df with the df containing color and habitat code
- predicted.habitat<-merge(predicted.habitat,habitat.code.df,by.x=c("modal.predicted.habitat","prediction.code"),by.y=c("habitat","prediction.code"))
-
-
- #plot
-
- #prepare raster
- prediction.map<-raster(nrows=nrow(simulation.map),ncols=ncol(simulation.map),crs=crs(simulation.map),ext=extent(simulation.map), resolution=res(simulation.map))
-
- prediction.map[]<-0 #initialization of the raster, corresponding to "out of scope habitats"
- prediction.map[predicted.habitat$pixel]<-predicted.habitat$habitat.code
-
- #ratify
- prediction.map<-ratify(prediction.map)
- prediction.map.rat<-levels(prediction.map)[[1]]
- prediction.map.rat<-merge(prediction.map.rat,habitat.code.df,by.x="ID",by.y="habitat.code")
- levels(prediction.map)<-prediction.map.rat
-
- #save the raster
- writeRaster(prediction.map,filename = paste0(output.path,"/HABITAT/", sim.version, "/synthetic.prediction.grd"),overwrite=T)
-
-
- #plot on R
- #convert into xy
- xy.prediction<-as.data.frame(prediction.map,xy=T)
- names(xy.prediction)<-c("x","y","habitat","prediction.code","color","label")
- xy.prediction<-xy.prediction[complete.cases(xy.prediction),]
-
- #plot
- prediction.plot<-
- ggplot(xy.prediction, aes(x=x, y=y, fill=factor(label)))+
- geom_raster(show.legend = T) +
- coord_equal()+
- scale_fill_manual(values = as.character(habitat.code.df$color))+ #ok only if habitat.code.df has been ordered according to "label"
- ggtitle(paste0("Modal prediction over ",length(sim.version)," simulations"))+
- guides(fill=guide_legend(nrow=4,byrow=F))+
- theme(
- plot.title = element_text(size = 8),
- legend.text = element_text(size = 8, colour ="black"),
- legend.title = element_blank(),
- legend.position = "bottom",
- axis.title.x=element_blank(),
- axis.text.x=element_blank(),
- axis.ticks.x=element_blank(),
- axis.title.y=element_blank(),
- axis.text.y=element_blank(),
- axis.ticks.y=element_blank()
- )
-
- #save the map
- ggsave(filename="synthetic.prediction.png",plot = prediction.plot,path = paste0(output.path, "/HABITAT/", sim.version),scale = 1,dpi = 300,limitsize = F,width = 15,height = 15,units ="cm")
-
- #return the map
- return(prediction.plot)
-
-}
-
From 848cab7d4595a0ead65ae998457541f36e6e0490 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 09:23:15 +0100
Subject: [PATCH 005/176] Delete UTILS.train_RF_habitat.R
---
UTILS.train_RF_habitat.R | 221 ---------------------------------------
1 file changed, 221 deletions(-)
delete mode 100644 UTILS.train_RF_habitat.R
diff --git a/UTILS.train_RF_habitat.R b/UTILS.train_RF_habitat.R
deleted file mode 100644
index 8174f9a..0000000
--- a/UTILS.train_RF_habitat.R
+++ /dev/null
@@ -1,221 +0,0 @@
-### HEADER #####################################################################
-##'
-##' @title Create a random forest algorithm trained on CBNA data, in order to
-##' obtain the simulated habitat, derived from a \code{FATE} simulation.
-##'
-##' @name train.RF.habitat
-##'
-##' @author Matthieu & Maxime Delprat
-##'
-##' @description This script is designed to produce a random forest model
-##' trained on observed PFG abundance, sites releves and a map of observed
-##' habitat.
-##'
-##' @param releves.PFG a data frame with Braund-Blanquet abundance at each site
-##' and each PFG and strata.
-##' @param releves.sites a data frame with coordinates and a description of
-##' the habitat associated with the dominant species of each site in the
-##' studied map.
-##' @param hab.obs a raster map of the observed habitat in the
-##' extended studied area.
-##' @param external.training.mask default \code{NULL}. (optional) Keep only
-##' releves data in a specific area.
-##' @param studied.habitat a vector that specifies habitats that we take
-##' into account for the validation.
-##' @param RF.param a list of 2 parameters for random forest model :
-##' share.training defines the size of the trainig part of the data base.
-##' ntree is the number of trees build by the algorithm, it allows to reduce
-##' the prediction error.
-##' @param output.path access path to the for the folder where output files
-##' will be created.
-##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
-##' by strata in each site. If FALSE, PFG abundance is defined for all strata.
-##' @param sim.version name of the simulation we want to validate.
-##'
-##' @details
-##'
-##' This function transform PFG Braund-Blanquet abundance in relative abundance,
-##' get habitat information from the releves map, keep only relees on interesting
-##' habitat and then builds de random forest model. Finally, the function analyzes
-##' the model performance with computation of confusion matrix and TSS for
-##' the traning and testing sample.
-##'
-##' @return
-##'
-##' 2 prepared CBNA releves files are created before the building of the random
-##' forest model in a habitat validation folder.
-##' 5 more files are created at the end of the script to save the RF model and
-##' the performance analyzes (confusion matrix and TSS) for the training and
-##' testing parts.
-##'
-### END OF HEADER ##############################################################
-
-
-train.RF.habitat<-function(releves.PFG
- , releves.sites
- , hab.obs
- , external.training.mask=NULL
- , studied.habitat
- , RF.param
- , output.path
- , perStrata
- , sim.version)
-{
-
- #1. Compute relative abundance metric
- #########################################
-
- #identify sites with wrong BB values (ie values that cannot be converted by the PRE_FATE.abundBraunBlanquet function)
- releves.PFG<-filter(releves.PFG,is.element(BB,c(NA, "NA", 0, "+", "r", 1:5)))
-
- #transformation into coverage percentage
- releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
-
- if(perStrata==T){
- aggregated.releves.PFG<-aggregate(coverage~site+PFG+strata,data=releves.PFG,FUN="sum")
- }else if(perStrata==F){
- aggregated.releves.PFG<-aggregate(coverage~site+PFG,data=releves.PFG,FUN="sum")
- aggregated.releves.PFG$strata<-"A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
- }
-
- #transformation into a relative metric (here relative.metric is relative coverage)
- aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG %>% group_by(site,strata) %>% mutate(relative.metric= round(prop.table(coverage),digits = 2))) #rel is proportion of total pct_cov, not percentage
- aggregated.releves.PFG$relative.metric[is.na(aggregated.releves.PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
- aggregated.releves.PFG$coverage<-NULL
-
- print("releve data have been transformed into a relative metric")
-
- #2. Cast the df
- #######################
-
- #transfo into factor to be sure to create all the combination when doing "dcast"
- aggregated.releves.PFG$PFG<-as.factor(aggregated.releves.PFG$PFG)
- aggregated.releves.PFG$strata<-as.factor(aggregated.releves.PFG$strata)
-
- aggregated.releves.PFG<-dcast(setDT(aggregated.releves.PFG),site~PFG+strata,value.var=c("relative.metric"),fill=0,drop=F)
-
- #3. Get habitat information
- ###################################
-
- #get sites coordinates
- aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)),aggregated.releves.PFG,by="site")
-
- #get habitat code and name
- if(compareCRS(aggregated.releves.PFG,hab.obs)){
- aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
- }else{
- aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(hab.obs))
- aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
- }
-
- #correspondance habitat code/habitat name
- table.habitat.releve<-levels(hab.obs)[[1]]
-
- aggregated.releves.PFG<-merge(aggregated.releves.PFG,dplyr::select(table.habitat.releve,c(ID,habitat)),by.x="code.habitat",by.y="ID")
-
- #(optional) keep only releves data in a specific area
- if(!is.null(external.training.mask)){
-
- if(compareCRS(aggregated.releves.PFG,external.training.mask)==F){ #as this stage it is not a problem to transform crs(aggregated.releves.PFG) since we have no more merge to do (we have already extracted habitat info from the map)
- aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(external.training.mask))
- }
-
- aggregated.releves.PFG<-st_crop(x=aggregated.releves.PFG,y=external.training.mask)
- print("'releve' map has been cropped to match 'external.training.mask'.")
- }
-
-
- # 4. Keep only releve on interesting habitat
- ###################################################"
-
- aggregated.releves.PFG<-filter(aggregated.releves.PFG,is.element(habitat,studied.habitat)) #filter non interesting habitat + NA
-
- print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
-
- # 5. Save data
- #####################
-
- st_write(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/releve.PFG.habitat.shp"),overwrite=T,append=F)
- write.csv(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/CBNA.releves.prepared.csv"),row.names = F)
-
- # 6. Small adjustment in data structure
- ##########################################
-
- aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG) #get rid of the spatial structure before entering the RF process
- aggregated.releves.PFG$habitat<-as.factor(aggregated.releves.PFG$habitat)
-
- # 7.Random forest
- ######################################
-
- #separate the database into a training and a test part
- set.seed(123)
-
- training.site<-sample(aggregated.releves.PFG$site,size=RF.param$share.training*length(aggregated.releves.PFG$site),replace = F)
- releves.training<-filter(aggregated.releves.PFG,is.element(site,training.site))
- releves.testing<-filter(aggregated.releves.PFG,!is.element(site,training.site))
-
- #train the model (with correction for imbalances in sampling)
-
- #run optimization algo (careful : optimization over OOB...)
- mtry.perf<-as.data.frame(
- tuneRF(
- x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
- y=releves.training$habitat,
- strata=releves.training$habitat,
- sampsize=min(table(releves.training$habitat)),
- ntreeTry=RF.param$ntree,
- stepFactor=2, improve=0.05,doBest=FALSE,plot=F,trace=F
- )
- )
-
- #select mtry
- mtry<-mtry.perf$mtry[mtry.perf$OOBError==min(mtry.perf$OOBError)][1] #the lowest n achieving minimum OOB
-
- #run real model
- model<- randomForest(
- x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
- y=releves.training$habitat,
- xtest=select(releves.testing,-c(code.habitat,site,habitat,geometry)),
- ytest=releves.testing$habitat,
- strata=releves.training$habitat,
- min(table(releves.training$habitat)),
- ntree=RF.param$ntree,
- mtry=mtry,
- norm.votes=TRUE,
- keep.forest=TRUE
- )
-
- #analyse model performance
-
- # Analysis on the training sample
-
- confusion.training<-confusionMatrix(data=model$predicted,reference=releves.training$habitat)
-
- synthesis.training<-data.frame(habitat=colnames(confusion.training$table),sensitivity=confusion.training$byClass[,1],specificity=confusion.training$byClass[,2],weight=colSums(confusion.training$table)/sum(colSums(confusion.training$table))) #warning: prevalence is the weight of predicted habitat, not of observed habitat
- synthesis.training<-synthesis.training%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
-
- aggregate.TSS.training<-round(sum(synthesis.training$weight*synthesis.training$TSS),digits=2)
-
- # Analysis on the testing sample
-
- confusion.testing<-confusionMatrix(data=model$test$predicted,reference=releves.testing$habitat)
-
- synthesis.testing<-data.frame(habitat=colnames(confusion.testing$table),sensitivity=confusion.testing$byClass[,1],specificity=confusion.testing$byClass[,2],weight=colSums(confusion.testing$table)/sum(colSums(confusion.testing$table)))#warning: prevalence is the weight of predicted habitat, not of observed habitat
- synthesis.testing<-synthesis.testing%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
-
- aggregate.TSS.testing<-round(sum(synthesis.testing$weight*synthesis.testing$TSS),digits=2)
-
-
- # 8. Save and return output
- #######################################"
-
- write_rds(model,paste0(output.path,"/HABITAT/", sim.version, "/RF.model.rds"),compress="none")
- write.csv(synthesis.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_training.csv"),row.names=F)
- write.csv(aggregate.TSS.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_training.csv"),row.names=F)
- write.csv(synthesis.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_testing.csv"),row.names=F)
- write.csv(aggregate.TSS.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_testing.csv"),row.names=F)
-
- return(model)
-
-}
-
From d3d0ad60ac182870fe4584f36de2e2433b258b03 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 09:35:20 +0100
Subject: [PATCH 006/176] Add files via upload
Addition of new "Utils" function and a new POST_FATE function for habitat validation of a FATE simulation
---
R/POST_FATE.validation_habitat.R | 169 ++++++++++++++++++++
R/UTILS.do_habitat_validation.R | 263 +++++++++++++++++++++++++++++++
R/UTILS.plot_predicted_habitat.R | 137 ++++++++++++++++
R/UTILS.train_RF_habitat.R | 221 ++++++++++++++++++++++++++
4 files changed, 790 insertions(+)
create mode 100644 R/POST_FATE.validation_habitat.R
create mode 100644 R/UTILS.do_habitat_validation.R
create mode 100644 R/UTILS.plot_predicted_habitat.R
create mode 100644 R/UTILS.train_RF_habitat.R
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
new file mode 100644
index 0000000..59ba9a2
--- /dev/null
+++ b/R/POST_FATE.validation_habitat.R
@@ -0,0 +1,169 @@
+### HEADER #####################################################################
+##'
+##' @title Compute habitat performance and create a prediction plot of habitat
+##' for a whole map of a \code{FATE} simulation.
+##'
+##' @name POST_FATE.validation.habitat
+##'
+##' @author Matthieu .. & Maxime Delprat
+##'
+##' @description This script compare habitat simulations and observations and
+##' create a map to visualize this comparison with all the the \code{FATE} and
+##' observed data.
+##'
+##' @param name.simulation simulation folder name.
+##' @param sim.version name of the simulation we want to validate (it works with
+##' only one sim.version).
+##' @param obs.path the function needs observed data, please create a folder for them in your
+##' simulation folder and then indicate in this parmeter the access path to this folder.
+##' @param releves.PFG name of file which contain the observed Braund-Blanquet abundance at each site
+##' and each PFG and strata (with extension).
+##' @param releves.site name of the file which contain coordinates and a description of
+##' the habitat associated with the dominant species of each site in the studied map (with extension).
+##' @param hab.obs name of the file which contain the extended studied map in the simulation (with extension).
+##' @param habitat.FATE.map name of the file which contain the restricted studied map in the simulation (with extension).
+##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation.
+##'
+##' @details
+##'
+##' The observed habitat is derived from the cesbio map, the simulated habitat
+##' is derived from FATE simulated relative abundance, based on a random forest
+##' algorithm trained on CBNA data. To compare observations and simulations, the function
+##' compute confusion matrix between observation and prediction and then compute the TSS
+##' for each habitat h (number of prediction of habitat h/number of observation
+##' of habitat h + number of non-prediction of habitat h/number of non-observation
+##' of habitat h). The final metrics this script use is the mean of TSS per habitat over all
+##' habitats, weighted by the share of each habitat in the observed habitat distribution.
+##'
+##' @return
+##'
+##' Two folders are created in name.simulation folder :
+##' \describe{
+##' \item{\file{VALIDATION/HABITAT/sim.version}}{containing the prepared CBNA data,
+##' RF model, the performance analyzes (confusion matrix and TSS) for the training and
+##' testing parts of the RF model, the habitat performance file, the habitat prediction file with
+##' observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
+##' \item{\file{DATA_OBS}}{maps of observed habitat and csv files of PFG and sites releves.}
+##' }
+##'
+### END OF HEADER ##############################################################
+
+
+POST_FATE.validation_habitat = function(name.simulation
+ , sim.version
+ , obs.path
+ , releves.PFG
+ , releves.sites
+ , hab.obs
+ , habitat.FATE.map
+ , validation.mask)
+{
+
+ ## LIBRARIES
+ require(data.table)
+ require(raster)
+ require(RFate)
+ require(reshape2)
+ require(stringr)
+ require(foreign)
+ require(stringr)
+ require(dplyr)
+ require(sp)
+ options("rgdal_show_exportToProj4_warnings"="none")
+ require(rgdal)
+ require(randomForest)
+ require(ggplot2)
+ require(ggradar)
+ require(tidyverse)
+ require(ggpubr)
+ require(gridExtra)
+ require(vegan)
+ require(parallel)
+ require(scales)
+ require(class)
+ require(caret)
+ require(sampling)
+ require(tidyselect)
+ require(grid)
+ require(gtable)
+ require(scales)
+ require(cowplot)
+ require(sf)
+ require(visNetwork)
+ require(foreach)
+ require(doParallel)
+ require(prettyR)
+ require(vcd)
+
+ ## GLOBAL PARAMETERS
+
+ # Create directories
+ dir.create(paste0(name.simulation, "/VALIDATION"), recursive = TRUE)
+ dir.create(paste0(name.simulation, "/VALIDATION/HABITAT"), recursive = TRUE)
+ dir.create(paste0(name.simulation, "/VALIDATION/HABITAT/", sim.version), recursive = TRUE)
+
+ # General
+ output.path = paste0(name.simulation, "/VALIDATION")
+
+ # Useful elements to extract from the simulation
+ simulation.map=raster(paste0(name.simulation,"/DATA/MASK/MASK_Champsaur.tif"))
+
+ # For habitat validation
+ # CBNA releves data habitat map
+ releves.PFG<-read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
+ releves.sites<-st_read(paste0(obs.path, releves.sites))
+ hab.obs<-raster(paste0(obs.path, hab.obs))
+ # Habitat mask at FATE simu resolution
+ # hab.obs.modif<-projectRaster(from = hab.obs, to = simulation.map, res = res(hab.obs)[1], crs = crs(projection(simulation.mask)))
+ # habitat.FATE.map<-crop(hab.obs.modif, simulation.map)
+ habitat.FATE.map<-raster(paste0(obs.path, habitat.FATE.map))
+ validation.mask<-raster(paste0(obs.path, validation.mask))
+
+ # Provide a color df
+ col.df<-data.frame(
+ habitat=c("agricultural.grassland","coniferous.forest","deciduous.forest","natural.grassland","woody.heatland"),
+ failure=c("yellow","blueviolet","aquamarine","chartreuse1","lightsalmon"),
+ success=c("darkorange1","blue4","aquamarine3","chartreuse3","firebrick4"))
+
+ # Other
+ studied.habitat=c("coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","agricultural.grassland")
+ RF.param = list(
+ share.training=0.7,
+ ntree=500)
+ predict.all.map<-T
+
+ ## TRAIN A RF ON CBNA DATA
+
+ RF.model <- train.RF.habitat(releves.PFG = releves.PFG
+ , releves.sites = releves.sites
+ , hab.obs = hab.obs
+ , external.training.mask = NULL
+ , studied.habitat = studied.habitat
+ , RF.param = RF.param
+ , output.path = output.path
+ , perStrata = F
+ , sim.version = sim.version)
+
+ ## USE THE RF MODEL TO VALIDATE OUTPUT
+
+ habitats.results <- do.habitat.validation(output.path = output.path
+ , RF.model = RF.model
+ , habitat.FATE.map = habitat.FATE.map
+ , validation.mask = validation.mask
+ , simulation.map = simulation.map
+ , predict.all.map = predict.all.map
+ , sim.version = sim.version
+ , name.simulation = name.simulation
+ , perStrata = F)
+
+ ## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
+
+ prediction.map <- plot.predicted.habitat(predicted.habitat = habitats.results
+ , col.df = col.df
+ , simulation.map = simulation.map
+ , output.path = output.path
+ , sim.version = sim.version)
+ return(prediction.map)
+
+}
+
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
new file mode 100644
index 0000000..4624baa
--- /dev/null
+++ b/R/UTILS.do_habitat_validation.R
@@ -0,0 +1,263 @@
+### HEADER #####################################################################
+##'
+##' @title Compare observed and simulated habitat of a \code{FATE} simulation
+##' at the last simulation year.
+##'
+##' @name do.habitat.validation
+##'
+##' @author Matthieu .. & Maxime Delprat
+##'
+##' @description To compare observations and simulations, this function compute
+##' confusion matrix between observation and prediction and then compute the TSS
+##' for each habitat.
+##'
+##' @param output.path access path to the for the folder where output files
+##' will be created.
+##' @param RF.model random forest model trained on CBNA data (train.RF.habitat
+##' function)
+##' @param habitat.FATE.map a raster map of the observed habitat in the
+##' studied area.
+##' @param validation.mask a raster mask that specified which pixels need validation.
+##' @param simulation.map a raster map of the whole studied area use to check
+##' the consistency between simulation map and the observed habitat map.
+##' @param predict.all.map a TRUE/FALSE vector. If TRUE, the script will predict
+##' habitat for the whole map.
+##' @param sim.version name of the simulation we want to validate.
+##' @param name.simulation simulation folder name.
+##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+##' by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
+##'
+##' @details
+##'
+##' After several preliminary checks, the function is going to prepare the observations
+##' database by extracting the observed habitat from a raster map. Then, for each
+##' simulations (sim.version), the script take the evolution abundance for each PFG
+##' and all strata file and predict the habitat for the whole map (if option selected)
+##' thanks to the RF model.Finally, the function compute habitat performance based on
+##' TSS for each habitat.
+##'
+##' @return
+##'
+##' Habitat performance file
+##' If option selected, the function returns an habitat prediction file with
+##' observed and simulated habitat for each pixel of the whole map.
+##'
+### END OF HEADER ##############################################################
+
+
+do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata) {
+
+ #notes
+ # we prepare the relevé data in this function, but in fact we could provide them directly if we adjust the code
+
+ ###########################
+ #I. Preliminary checks
+ ###########################
+
+ #check if strata definition used in the RF model is the same as the one used to analyze FATE output
+ if(perStrata==F){
+ list.strata<-"all"
+ }else{
+ stop("check 'perStrata' parameter and/or the names of strata in param$list.strata.releves & param$list.strata.simul")
+ }
+
+ #initial consistency between habitat.FATE.map and validation.mask (do it before the adjustement of habitat.FATE.map)
+ if(!compareCRS(habitat.FATE.map,validation.mask) | !all(res(habitat.FATE.map)==res(validation.mask))){
+ stop("please provide rasters with same crs and resolution for habitat.FATE.map and validation.mask")
+ }
+
+ #consistency between habitat.FATE.map and simulation.map
+ if(!compareCRS(simulation.map,habitat.FATE.map)){
+ print("reprojecting habitat.FATE.map to match simulation.map crs")
+ habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
+ }
+ if(!all(res(habitat.FATE.map)==res(simulation.map))){
+ stop("provide habitat.FATE.map with same resolution as simulation.map")
+ }
+ if(extent(simulation.map)!=extent(habitat.FATE.map)){
+ print("cropping habitat.FATE.map to match simulation.map")
+ habitat.FATE.map<-crop(x=habitat.FATE.map,y=simulation.map)
+ }
+ if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
+ print("setting origin habitat.FATE.map to match simulation.map")
+ origin(habitat.FATE.map)<-origin(simulation.map)
+ }
+ if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
+ stop("habitat.FATE.map could not be coerced to match simulation.map")
+ }else{
+ print("simulation.map & habitat.FATE.map are (now) consistent")
+ }
+
+ #adjust validation.mask accordingly
+ if(!all(res(habitat.FATE.map)==res(validation.mask))){
+ validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
+ }
+ if(extent(validation.mask)!=extent(habitat.FATE.map)){
+ validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
+ }
+ if(!compareRaster(validation.mask,habitat.FATE.map)){
+ stop("error in correcting validation.mask to match habitat.FATE.map")
+ }else{
+ print("validation.mask is (now) consistent with (modified) habitat.FATE.map")
+ }
+
+ #check consistency for PFG & strata classes between FATE output vs the RF model
+
+ RF.predictors<-rownames(RF.model$importance)
+ RF.PFG<-unique(str_sub(RF.predictors,1,2))
+
+ FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7)
+
+ if(length(setdiff(FATE.PFG,RF.PFG))>0|length(setdiff(RF.PFG,FATE.PFG))>0){
+ stop("The PFG used to train the RF algorithm are not the same as the PFG used to run FATE.")
+ }
+
+
+ #########################################################################################
+ #II. Prepare database for FATE habitat
+ #########################################################################################
+
+ #index of the pixels in the simulation area
+ in.region.pixels<-which(getValues(simulation.map)==1)
+
+ #habitat df for the whole simulation area
+ habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
+ habitat.whole.area.df<-habitat.whole.area.df[in.region.pixels,]
+ habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(habitat.FATE.map)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
+ habitat.whole.area.df<-filter(habitat.whole.area.df,is.element(habitat,RF.model$classes))
+
+ print(cat("Habitat considered in the prediction exercise: ",c(unique(habitat.whole.area.df$habitat)),"\n",sep="\t"))
+
+ print("Habitat in the simulation area:")
+ table(habitat.whole.area.df$habitat,useNA="always")
+
+ print("Habitat in the subpart of the simulation area used for validation:")
+ table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation==1],useNA="always")
+
+ ##############################
+ # III. Loop on simulations
+ #########################
+
+ print("processing simulations")
+
+ registerDoParallel(detectCores()-2)
+ results.simul <- foreach(i=1:length(sim.version),.packages = c("dplyr","forcats","reshape2","randomForest","vcd","caret")) %dopar%{
+
+ ########################"
+ # III.1. Data preparation
+ #########################
+
+ #get simulated abundance per pixel*strata*PFG for pixels in the simulation area
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,-c(3:44)]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+
+ #aggregate per strata group with the correspondance provided in input
+ simu_PFG$new.strata<-NA
+
+ #attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
+ if(perStrata==F){
+ simu_PFG$new.strata<-"A"
+ }
+
+ simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
+
+ #aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
+ simu_PFG<-aggregate(abs~pixel+strata+PFG,data=simu_PFG,FUN="sum")
+
+ #transform absolute abundance into relative abundance (no pb if all combination PFG*strata are not present, since then the value is 0!)
+ simu_PFG<-simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.abundance= round(prop.table(abs),digits=2)) #those are proportions, not percentages
+ simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
+ simu_PFG<-as.data.frame(simu_PFG)
+
+ #drop the absolute abundance
+ simu_PFG$abs<-NULL
+
+ #set a factor structure
+ simu_PFG$PFG<-as.factor(simu_PFG$PFG)
+ simu_PFG$strata<-as.factor(simu_PFG$strata)
+
+ #correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
+ simu_PFG$PFG<-fct_expand(simu_PFG$PFG,RF.PFG)
+ simu_PFG$strata<-fct_expand(simu_PFG$strata,list.strata)
+
+ #cast
+ simu_PFG<-dcast(simu_PFG,pixel~PFG*strata,value.var=c("relative.abundance"),fill=0,drop=F)
+
+ #merge PFG info and habitat + transform habitat into factor
+
+ #here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
+ data.FATE.PFG.habitat<-merge(simu_PFG,habitat.whole.area.df,by="pixel") #at this stage we have all the pixels in the simulation area
+ data.FATE.PFG.habitat$habitat<-factor(data.FATE.PFG.habitat$habitat,levels=RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
+
+ ############################
+ # III.2. Prediction of habitat with the RF algorithm
+ #################################
+
+ data.validation<-filter(data.FATE.PFG.habitat,for.validation==1)
+ x.validation<-select(data.validation,all_of(RF.predictors))
+ y.validation<-data.validation$habitat
+
+ y.validation.predicted<-predict(object=RF.model,newdata=x.validation,type="response",norm.votes=T)
+
+ ##############################
+ # III.3. Analysis of the results
+ ################################
+
+ confusion.validation<-confusionMatrix(data=y.validation.predicted,reference=fct_expand(y.validation,levels(y.validation.predicted)))
+
+ synthesis.validation<-data.frame(habitat=colnames(confusion.validation$table),sensitivity=confusion.validation$byClass[,1],specificity=confusion.validation$byClass[,2],weight=colSums(confusion.validation$table)/sum(colSums(confusion.validation$table)))
+ synthesis.validation<-synthesis.validation%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
+
+ aggregate.TSS.validation<-round(sum(synthesis.validation$weight*synthesis.validation$TSS,na.rm=T),digits=2)
+
+ ########################
+ # III.4. Predict habitat for the whole map if option selected (do it only for a small number of simulations)
+ ############################################
+
+ if(predict.all.map==T){
+
+ y.all.map.predicted = predict(object=RF.model,newdata=select(data.FATE.PFG.habitat,all_of(RF.predictors)),type="response",norm.votes=T)
+ y.all.map.predicted = as.data.frame(y.all.map.predicted)
+ y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
+ colnames(y.all.map.predicted) = c(sim.version, "pixel")
+
+ }else{
+ y.all.map.predicted<-NULL
+ }
+
+ #prepare outputs
+
+ output.validation<-c(synthesis.validation$TSS,aggregate.TSS.validation)
+ names(output.validation)<-c(synthesis.validation$habitat,"aggregated")
+
+ output<-list(output.validation,y.all.map.predicted)
+ names(output)<-c("output.validation","y.all.map.predicted")
+
+ return(output)
+ }
+ #end of the loop on simulations
+
+ #deal with the results regarding model performance
+ habitat.performance<-as.data.frame(matrix(unlist(lapply(results.simul,"[[",1)),ncol=length(RF.model$classes)+1,byrow=T))
+ names(habitat.performance)<-c(RF.model$classes,"weighted")
+ habitat.performance$simulation<-sim.version
+
+ #save
+ write.csv(habitat.performance,paste0(output.path,"/HABITAT/", sim.version, "/performance.habitat.csv"),row.names=F)
+
+ print("habitat performance saved")
+
+ #deal with the results regarding habitat prediction over the whole map
+ all.map.prediction = results.simul[[1]]$y.all.map.predicted
+ all.map.prediction = merge(all.map.prediction, select(habitat.whole.area.df, c(pixel,habitat)), by = "pixel")
+ all.map.prediction = rename(all.map.prediction,"true.habitat"="habitat")
+
+ #save
+ write.csv(all.map.prediction,paste0(output.path,"/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names=F)
+
+ #return results
+ return(all.map.prediction)
+
+}
+
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
new file mode 100644
index 0000000..4f40061
--- /dev/null
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -0,0 +1,137 @@
+### HEADER #####################################################################
+##'
+##' @title Create a raster map of habitat prediction for a specific \code{FATE}
+##' simulation at the last simulation year.
+##'
+##' @name plot.predicted.habitat
+##'
+##' @author Matthieu .. & Maxime Delprat
+##'
+##' @description This script is designed to create a raster map of habitat prediction
+##' based on a habitat prediction file. For each pixel, the habitat failure or success value
+##' is associated to a color and then, the map is built.
+##'
+##' @param predicted habitat a csv file created by the do.habitat.validation function
+##' which contain, for each pixel of the studied map, the simulated and observed habitat.
+##' @param col.df a data frame with all the colors associated with the failure or
+##' success of each studied habitat prediction.
+##' @param simulation.map a raster map of the whole studied area.
+##' @param output.path access path to the for the folder where output files
+##' will be created.
+##' @param sim.version name of the simulation we want to validate.
+##'
+##' @details
+##'
+##' The function determine true/false prediction ('failure' if false, 'success' if true)
+##' and prepare a dataframe containing color and habitat code. Then, the script merge
+##' the prediction dataframe with the color and code habitat dataframe. Finally,
+##' the function draw a raster map and a plot of prediction habitat over it thanks
+##' to the data prepared before.
+##'
+##' @return
+##'
+##' a synthetic.prediction.png file which contain the final prediction plot.
+### END OF HEADER ##############################################################
+
+
+plot.predicted.habitat<-function(predicted.habitat
+ , col.df
+ , simulation.map
+ , output.path
+ , sim.version)
+{
+
+ #auxiliary function to compute the proportion of simulations lead to the modal prediction
+ count.habitat<-function(df){
+ index<-which(names(df)=="modal.predicted.habitat")
+ prop.simu<-sum(df[-index]==as.character(df[index]))/(length(names(df))-1)
+ return(prop.simu)
+ }
+
+ #compute modal predicted habitat and the proportion of simulations predicting this habitat (for each pixel)
+ predicted.habitat$modal.predicted.habitat<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version))),1,Mode)
+ predicted.habitat$modal.predicted.habitat[predicted.habitat$modal.predicted.habitat==">1 mode"]<-"ambiguous"
+ predicted.habitat$confidence<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version),modal.predicted.habitat)),1,FUN=function(x) count.habitat(x))
+
+
+ #true/false prediction
+ predicted.habitat$prediction.code<-"failure"
+ predicted.habitat$prediction.code[predicted.habitat$modal.predicted.habitat==predicted.habitat$true.habitat]<-"success"
+
+ #prepare a df containing color & habitat code (to facilitate conversion into raster)
+ col.df.long<-data.table::melt(data=setDT(col.df),id.vars="habitat",variable.name="prediction.code",value.name="color")
+
+ habitat.code.df<-unique(dplyr::select(predicted.habitat,c(modal.predicted.habitat,prediction.code)))
+ habitat.code.df$habitat.code<-seq(from=1,to=dim(habitat.code.df)[1],by=1)
+ habitat.code.df<-rename(habitat.code.df,"habitat"="modal.predicted.habitat")
+
+ habitat.code.df<-merge(habitat.code.df,col.df.long,by=c("habitat","prediction.code"))
+ habitat.code.df$label<-paste0(habitat.code.df$habitat," (",habitat.code.df$prediction.code,")")
+
+ #deal with out of scope habitat
+ out.of.scope<-data.frame(habitat="out.of.scope",prediction.code="",habitat.code=0,color="white",label="out of scope")
+ habitat.code.df<-rbind(habitat.code.df,out.of.scope)
+
+ habitat.code.df$label<-as.factor(habitat.code.df$label)
+
+ #order the df
+ habitat.code.df<-habitat.code.df[order(habitat.code.df$label),] #to be sure it's in te right order (useful to ensure the correspondance between label and color in the ggplot function)
+
+
+ #merge the prediction df with the df containing color and habitat code
+ predicted.habitat<-merge(predicted.habitat,habitat.code.df,by.x=c("modal.predicted.habitat","prediction.code"),by.y=c("habitat","prediction.code"))
+
+
+ #plot
+
+ #prepare raster
+ prediction.map<-raster(nrows=nrow(simulation.map),ncols=ncol(simulation.map),crs=crs(simulation.map),ext=extent(simulation.map), resolution=res(simulation.map))
+
+ prediction.map[]<-0 #initialization of the raster, corresponding to "out of scope habitats"
+ prediction.map[predicted.habitat$pixel]<-predicted.habitat$habitat.code
+
+ #ratify
+ prediction.map<-ratify(prediction.map)
+ prediction.map.rat<-levels(prediction.map)[[1]]
+ prediction.map.rat<-merge(prediction.map.rat,habitat.code.df,by.x="ID",by.y="habitat.code")
+ levels(prediction.map)<-prediction.map.rat
+
+ #save the raster
+ writeRaster(prediction.map,filename = paste0(output.path,"/HABITAT/", sim.version, "/synthetic.prediction.grd"),overwrite=T)
+
+
+ #plot on R
+ #convert into xy
+ xy.prediction<-as.data.frame(prediction.map,xy=T)
+ names(xy.prediction)<-c("x","y","habitat","prediction.code","color","label")
+ xy.prediction<-xy.prediction[complete.cases(xy.prediction),]
+
+ #plot
+ prediction.plot<-
+ ggplot(xy.prediction, aes(x=x, y=y, fill=factor(label)))+
+ geom_raster(show.legend = T) +
+ coord_equal()+
+ scale_fill_manual(values = as.character(habitat.code.df$color))+ #ok only if habitat.code.df has been ordered according to "label"
+ ggtitle(paste0("Modal prediction over ",length(sim.version)," simulations"))+
+ guides(fill=guide_legend(nrow=4,byrow=F))+
+ theme(
+ plot.title = element_text(size = 8),
+ legend.text = element_text(size = 8, colour ="black"),
+ legend.title = element_blank(),
+ legend.position = "bottom",
+ axis.title.x=element_blank(),
+ axis.text.x=element_blank(),
+ axis.ticks.x=element_blank(),
+ axis.title.y=element_blank(),
+ axis.text.y=element_blank(),
+ axis.ticks.y=element_blank()
+ )
+
+ #save the map
+ ggsave(filename="synthetic.prediction.png",plot = prediction.plot,path = paste0(output.path, "/HABITAT/", sim.version),scale = 1,dpi = 300,limitsize = F,width = 15,height = 15,units ="cm")
+
+ #return the map
+ return(prediction.plot)
+
+}
+
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
new file mode 100644
index 0000000..d9b720b
--- /dev/null
+++ b/R/UTILS.train_RF_habitat.R
@@ -0,0 +1,221 @@
+### HEADER #####################################################################
+##'
+##' @title Create a random forest algorithm trained on CBNA data, in order to
+##' obtain the simulated habitat, derived from a \code{FATE} simulation.
+##'
+##' @name train.RF.habitat
+##'
+##' @author Matthieu .. & Maxime Delprat
+##'
+##' @description This script is designed to produce a random forest model
+##' trained on observed PFG abundance, sites releves and a map of observed
+##' habitat.
+##'
+##' @param releves.PFG a data frame with Braund-Blanquet abundance at each site
+##' and each PFG and strata.
+##' @param releves.sites a data frame with coordinates and a description of
+##' the habitat associated with the dominant species of each site in the
+##' studied map.
+##' @param hab.obs a raster map of the observed habitat in the
+##' extended studied area.
+##' @param external.training.mask default \code{NULL}. (optional) Keep only
+##' releves data in a specific area.
+##' @param studied.habitat a vector that specifies habitats that we take
+##' into account for the validation.
+##' @param RF.param a list of 2 parameters for random forest model :
+##' share.training defines the size of the trainig part of the data base.
+##' ntree is the number of trees build by the algorithm, it allows to reduce
+##' the prediction error.
+##' @param output.path access path to the for the folder where output files
+##' will be created.
+##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+##' by strata in each site. If FALSE, PFG abundance is defined for all strata.
+##' @param sim.version name of the simulation we want to validate.
+##'
+##' @details
+##'
+##' This function transform PFG Braund-Blanquet abundance in relative abundance,
+##' get habitat information from the releves map, keep only relees on interesting
+##' habitat and then builds de random forest model. Finally, the function analyzes
+##' the model performance with computation of confusion matrix and TSS for
+##' the traning and testing sample.
+##'
+##' @return
+##'
+##' 2 prepared CBNA releves files are created before the building of the random
+##' forest model in a habitat validation folder.
+##' 5 more files are created at the end of the script to save the RF model and
+##' the performance analyzes (confusion matrix and TSS) for the training and
+##' testing parts.
+##'
+### END OF HEADER ##############################################################
+
+
+train.RF.habitat<-function(releves.PFG
+ , releves.sites
+ , hab.obs
+ , external.training.mask=NULL
+ , studied.habitat
+ , RF.param
+ , output.path
+ , perStrata
+ , sim.version)
+{
+
+ #1. Compute relative abundance metric
+ #########################################
+
+ #identify sites with wrong BB values (ie values that cannot be converted by the PRE_FATE.abundBraunBlanquet function)
+ releves.PFG<-filter(releves.PFG,is.element(BB,c(NA, "NA", 0, "+", "r", 1:5)))
+
+ #transformation into coverage percentage
+ releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
+
+ if(perStrata==T){
+ aggregated.releves.PFG<-aggregate(coverage~site+PFG+strata,data=releves.PFG,FUN="sum")
+ }else if(perStrata==F){
+ aggregated.releves.PFG<-aggregate(coverage~site+PFG,data=releves.PFG,FUN="sum")
+ aggregated.releves.PFG$strata<-"A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
+ }
+
+ #transformation into a relative metric (here relative.metric is relative coverage)
+ aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG %>% group_by(site,strata) %>% mutate(relative.metric= round(prop.table(coverage),digits = 2))) #rel is proportion of total pct_cov, not percentage
+ aggregated.releves.PFG$relative.metric[is.na(aggregated.releves.PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
+ aggregated.releves.PFG$coverage<-NULL
+
+ print("releve data have been transformed into a relative metric")
+
+ #2. Cast the df
+ #######################
+
+ #transfo into factor to be sure to create all the combination when doing "dcast"
+ aggregated.releves.PFG$PFG<-as.factor(aggregated.releves.PFG$PFG)
+ aggregated.releves.PFG$strata<-as.factor(aggregated.releves.PFG$strata)
+
+ aggregated.releves.PFG<-dcast(setDT(aggregated.releves.PFG),site~PFG+strata,value.var=c("relative.metric"),fill=0,drop=F)
+
+ #3. Get habitat information
+ ###################################
+
+ #get sites coordinates
+ aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)),aggregated.releves.PFG,by="site")
+
+ #get habitat code and name
+ if(compareCRS(aggregated.releves.PFG,hab.obs)){
+ aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ }else{
+ aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(hab.obs))
+ aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ }
+
+ #correspondance habitat code/habitat name
+ table.habitat.releve<-levels(hab.obs)[[1]]
+
+ aggregated.releves.PFG<-merge(aggregated.releves.PFG,dplyr::select(table.habitat.releve,c(ID,habitat)),by.x="code.habitat",by.y="ID")
+
+ #(optional) keep only releves data in a specific area
+ if(!is.null(external.training.mask)){
+
+ if(compareCRS(aggregated.releves.PFG,external.training.mask)==F){ #as this stage it is not a problem to transform crs(aggregated.releves.PFG) since we have no more merge to do (we have already extracted habitat info from the map)
+ aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(external.training.mask))
+ }
+
+ aggregated.releves.PFG<-st_crop(x=aggregated.releves.PFG,y=external.training.mask)
+ print("'releve' map has been cropped to match 'external.training.mask'.")
+ }
+
+
+ # 4. Keep only releve on interesting habitat
+ ###################################################"
+
+ aggregated.releves.PFG<-filter(aggregated.releves.PFG,is.element(habitat,studied.habitat)) #filter non interesting habitat + NA
+
+ print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
+
+ # 5. Save data
+ #####################
+
+ st_write(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/releve.PFG.habitat.shp"),overwrite=T,append=F)
+ write.csv(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/CBNA.releves.prepared.csv"),row.names = F)
+
+ # 6. Small adjustment in data structure
+ ##########################################
+
+ aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG) #get rid of the spatial structure before entering the RF process
+ aggregated.releves.PFG$habitat<-as.factor(aggregated.releves.PFG$habitat)
+
+ # 7.Random forest
+ ######################################
+
+ #separate the database into a training and a test part
+ set.seed(123)
+
+ training.site<-sample(aggregated.releves.PFG$site,size=RF.param$share.training*length(aggregated.releves.PFG$site),replace = F)
+ releves.training<-filter(aggregated.releves.PFG,is.element(site,training.site))
+ releves.testing<-filter(aggregated.releves.PFG,!is.element(site,training.site))
+
+ #train the model (with correction for imbalances in sampling)
+
+ #run optimization algo (careful : optimization over OOB...)
+ mtry.perf<-as.data.frame(
+ tuneRF(
+ x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
+ y=releves.training$habitat,
+ strata=releves.training$habitat,
+ sampsize=min(table(releves.training$habitat)),
+ ntreeTry=RF.param$ntree,
+ stepFactor=2, improve=0.05,doBest=FALSE,plot=F,trace=F
+ )
+ )
+
+ #select mtry
+ mtry<-mtry.perf$mtry[mtry.perf$OOBError==min(mtry.perf$OOBError)][1] #the lowest n achieving minimum OOB
+
+ #run real model
+ model<- randomForest(
+ x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
+ y=releves.training$habitat,
+ xtest=select(releves.testing,-c(code.habitat,site,habitat,geometry)),
+ ytest=releves.testing$habitat,
+ strata=releves.training$habitat,
+ min(table(releves.training$habitat)),
+ ntree=RF.param$ntree,
+ mtry=mtry,
+ norm.votes=TRUE,
+ keep.forest=TRUE
+ )
+
+ #analyse model performance
+
+ # Analysis on the training sample
+
+ confusion.training<-confusionMatrix(data=model$predicted,reference=releves.training$habitat)
+
+ synthesis.training<-data.frame(habitat=colnames(confusion.training$table),sensitivity=confusion.training$byClass[,1],specificity=confusion.training$byClass[,2],weight=colSums(confusion.training$table)/sum(colSums(confusion.training$table))) #warning: prevalence is the weight of predicted habitat, not of observed habitat
+ synthesis.training<-synthesis.training%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
+
+ aggregate.TSS.training<-round(sum(synthesis.training$weight*synthesis.training$TSS),digits=2)
+
+ # Analysis on the testing sample
+
+ confusion.testing<-confusionMatrix(data=model$test$predicted,reference=releves.testing$habitat)
+
+ synthesis.testing<-data.frame(habitat=colnames(confusion.testing$table),sensitivity=confusion.testing$byClass[,1],specificity=confusion.testing$byClass[,2],weight=colSums(confusion.testing$table)/sum(colSums(confusion.testing$table)))#warning: prevalence is the weight of predicted habitat, not of observed habitat
+ synthesis.testing<-synthesis.testing%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
+
+ aggregate.TSS.testing<-round(sum(synthesis.testing$weight*synthesis.testing$TSS),digits=2)
+
+
+ # 8. Save and return output
+ #######################################"
+
+ write_rds(model,paste0(output.path,"/HABITAT/", sim.version, "/RF.model.rds"),compress="none")
+ write.csv(synthesis.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_training.csv"),row.names=F)
+ write.csv(aggregate.TSS.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_training.csv"),row.names=F)
+ write.csv(synthesis.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_testing.csv"),row.names=F)
+ write.csv(aggregate.TSS.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_testing.csv"),row.names=F)
+
+ return(model)
+
+}
+
From 580822b400bf55333c41da0deec3232361ac867d Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 13:36:51 +0100
Subject: [PATCH 007/176] Update UTILS.plot_predicted_habitat.R
---
R/UTILS.plot_predicted_habitat.R | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index 4f40061..09a2936 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -80,7 +80,8 @@ plot.predicted.habitat<-function(predicted.habitat
#merge the prediction df with the df containing color and habitat code
predicted.habitat<-merge(predicted.habitat,habitat.code.df,by.x=c("modal.predicted.habitat","prediction.code"),by.y=c("habitat","prediction.code"))
-
+ write.csv(x = predicted.habitat, file = paste0(output.path, "/HABITAT/", sim.version, "/hab.pred.csv"))
+
#plot
From d54d0d87835e6371ccc5396b52dc1ef10d893d17 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 14:22:31 +0100
Subject: [PATCH 008/176] Update POST_FATE.validation_habitat.R
---
R/POST_FATE.validation_habitat.R | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
index 59ba9a2..9334f27 100644
--- a/R/POST_FATE.validation_habitat.R
+++ b/R/POST_FATE.validation_habitat.R
@@ -5,7 +5,7 @@
##'
##' @name POST_FATE.validation.habitat
##'
-##' @author Matthieu .. & Maxime Delprat
+##' @author Matthieu Combaud, Maxime Delprat
##'
##' @description This script compare habitat simulations and observations and
##' create a map to visualize this comparison with all the the \code{FATE} and
@@ -163,6 +163,15 @@ POST_FATE.validation_habitat = function(name.simulation
, simulation.map = simulation.map
, output.path = output.path
, sim.version = sim.version)
+
+ ## COMPARISON FAILURE/SUCCESS
+
+ hab.pred = read.csv(paste0(output.path, "/HABITAT/", sim.version, "/hab.pred.csv"))
+ failure = as.numeric((table(hab.pred$prediction.code)[1]/sum(table(hab.pred$prediction.code)))*100)
+ success = as.numeric((table(hab.pred$prediction.code)[2]/sum(table(hab.pred$prediction.code)))*100)
+ cat("\n ---------- END OF THE SIMULATION \n")
+ cat(paste0("\n ---------- ", round(failure, digits = 2), "% of habitats are not correctly predicted by ", sim.version, " \n"))
+ cat(paste0("\n ---------- ", round(success, digits = 2), "% of habitats are correctly predicted by ", sim.version, " \n"))
return(prediction.map)
}
From 71a2c933650180af3ee8e675854ed296da618139 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 14:23:38 +0100
Subject: [PATCH 009/176] Update UTILS.do_habitat_validation.R
---
R/UTILS.do_habitat_validation.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index 4624baa..d77882d 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -5,7 +5,7 @@
##'
##' @name do.habitat.validation
##'
-##' @author Matthieu .. & Maxime Delprat
+##' @author Matthieu Combaud, Maxime Delprat
##'
##' @description To compare observations and simulations, this function compute
##' confusion matrix between observation and prediction and then compute the TSS
From 7068728c29c7ed5d16d60f3180ef7763a067c6d5 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 14:24:26 +0100
Subject: [PATCH 010/176] Update UTILS.plot_predicted_habitat.R
---
R/UTILS.plot_predicted_habitat.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index 09a2936..24666b6 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -5,7 +5,7 @@
##'
##' @name plot.predicted.habitat
##'
-##' @author Matthieu .. & Maxime Delprat
+##' @author Matthieu Combaud, Maxime Delprat
##'
##' @description This script is designed to create a raster map of habitat prediction
##' based on a habitat prediction file. For each pixel, the habitat failure or success value
From 02b95af62ba131032d03e12d066e8bead77d30a8 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 14:25:06 +0100
Subject: [PATCH 011/176] Update UTILS.train_RF_habitat.R
---
R/UTILS.train_RF_habitat.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index d9b720b..a277f05 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -5,7 +5,7 @@
##'
##' @name train.RF.habitat
##'
-##' @author Matthieu .. & Maxime Delprat
+##' @author Matthieu Combaud, Maxime Delprat
##'
##' @description This script is designed to produce a random forest model
##' trained on observed PFG abundance, sites releves and a map of observed
From 12d50321ec90bd75abbe600846b05583b37b9963 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 15:52:37 +0100
Subject: [PATCH 012/176] Update POST_FATE.validation_habitat.R
With this update the function take only the extended map of the studied area as argument.
---
R/POST_FATE.validation_habitat.R | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
index 9334f27..68b87bb 100644
--- a/R/POST_FATE.validation_habitat.R
+++ b/R/POST_FATE.validation_habitat.R
@@ -55,7 +55,6 @@ POST_FATE.validation_habitat = function(name.simulation
, releves.PFG
, releves.sites
, hab.obs
- , habitat.FATE.map
, validation.mask)
{
@@ -114,9 +113,8 @@ POST_FATE.validation_habitat = function(name.simulation
releves.sites<-st_read(paste0(obs.path, releves.sites))
hab.obs<-raster(paste0(obs.path, hab.obs))
# Habitat mask at FATE simu resolution
- # hab.obs.modif<-projectRaster(from = hab.obs, to = simulation.map, res = res(hab.obs)[1], crs = crs(projection(simulation.mask)))
- # habitat.FATE.map<-crop(hab.obs.modif, simulation.map)
- habitat.FATE.map<-raster(paste0(obs.path, habitat.FATE.map))
+ hab.obs.modif<-projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
+ habitat.FATE.map<-crop(hab.obs.modif, simulation.map)
validation.mask<-raster(paste0(obs.path, validation.mask))
# Provide a color df
From ec413be2a84f6a8e350006975338da927d13533b Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 15:57:13 +0100
Subject: [PATCH 013/176] Update UTILS.do_habitat_validation.R
With this update the function uses the new restricted map of the area and extracts data from it.
---
R/UTILS.do_habitat_validation.R | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index d77882d..e4355b3 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -123,7 +123,8 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#habitat df for the whole simulation area
habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
habitat.whole.area.df<-habitat.whole.area.df[in.region.pixels,]
- habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(habitat.FATE.map)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
+ habitat.whole.area.df<-subset(habitat.whole.area.df, for.validation!="NA")
+ habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
habitat.whole.area.df<-filter(habitat.whole.area.df,is.element(habitat,RF.model$classes))
print(cat("Habitat considered in the prediction exercise: ",c(unique(habitat.whole.area.df$habitat)),"\n",sep="\t"))
From a61ec30958ebaccc4eaa50c2cb3c72a46cad3ea7 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:14:20 +0100
Subject: [PATCH 014/176] Update UTILS.do_habitat_validation.R
---
R/UTILS.do_habitat_validation.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index e4355b3..83d2880 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -45,7 +45,7 @@
### END OF HEADER ##############################################################
-do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata) {
+do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs) {
#notes
# we prepare the relevé data in this function, but in fact we could provide them directly if we adjust the code
From 409fe3c10a61246b89da73ce34540a1d26d06707 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:16:30 +0100
Subject: [PATCH 015/176] Update POST_FATE.validation_habitat.R
---
R/POST_FATE.validation_habitat.R | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
index 68b87bb..4e99d75 100644
--- a/R/POST_FATE.validation_habitat.R
+++ b/R/POST_FATE.validation_habitat.R
@@ -152,7 +152,8 @@ POST_FATE.validation_habitat = function(name.simulation
, predict.all.map = predict.all.map
, sim.version = sim.version
, name.simulation = name.simulation
- , perStrata = F)
+ , perStrata = F
+ , hab.obs = hab.obs)
## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
From 625e9d498eff28fa64be575021539b71fa9ac111 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:18:08 +0100
Subject: [PATCH 016/176] Update UTILS.do_habitat_validation.R
Modification of the header with a new param
---
R/UTILS.do_habitat_validation.R | 2 ++
1 file changed, 2 insertions(+)
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index 83d2880..c0dae0e 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -26,6 +26,8 @@
##' @param name.simulation simulation folder name.
##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
##' by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
+##' @param hab.obs a raster map of the observed habitat in the
+##' extended studied area.
##'
##' @details
##'
From 5c9149e05c954262dcce7e6273bcf3df26f2bd96 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:35:45 +0100
Subject: [PATCH 017/176] Update UTILS.plot_predicted_habitat.R
Correction of a small mistake in the header
---
R/UTILS.plot_predicted_habitat.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index 24666b6..80e9be2 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -11,7 +11,7 @@
##' based on a habitat prediction file. For each pixel, the habitat failure or success value
##' is associated to a color and then, the map is built.
##'
-##' @param predicted habitat a csv file created by the do.habitat.validation function
+##' @param predicted.habitat a csv file created by the do.habitat.validation function
##' which contain, for each pixel of the studied map, the simulated and observed habitat.
##' @param col.df a data frame with all the colors associated with the failure or
##' success of each studied habitat prediction.
From 1b6896cfa68366954f2a7954d965dd1c12239959 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:36:25 +0100
Subject: [PATCH 018/176] Update UTILS.train_RF_habitat.R
Modification of the title
---
R/UTILS.train_RF_habitat.R | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index a277f05..334e079 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -1,7 +1,6 @@
### HEADER #####################################################################
##'
-##' @title Create a random forest algorithm trained on CBNA data, in order to
-##' obtain the simulated habitat, derived from a \code{FATE} simulation.
+##' @title Create a random forest algorithm trained on CBNA data.
##'
##' @name train.RF.habitat
##'
From 5df7fc7cc2a6014e58ed20e9065dfc04b3233c29 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:48:49 +0100
Subject: [PATCH 019/176] Add files via upload
Addition of the new markdown files of the new habitat validation functions
---
man/POST_FATE.validation.habitat.Rd | 67 +++++++++++++++++++++++++++
man/do.habitat.validation.Rd | 69 ++++++++++++++++++++++++++++
man/plot.predicted.habitat.Rd | 41 +++++++++++++++++
man/train.RF.habitat.Rd | 70 +++++++++++++++++++++++++++++
4 files changed, 247 insertions(+)
create mode 100644 man/POST_FATE.validation.habitat.Rd
create mode 100644 man/do.habitat.validation.Rd
create mode 100644 man/plot.predicted.habitat.Rd
create mode 100644 man/train.RF.habitat.Rd
diff --git a/man/POST_FATE.validation.habitat.Rd b/man/POST_FATE.validation.habitat.Rd
new file mode 100644
index 0000000..572efbf
--- /dev/null
+++ b/man/POST_FATE.validation.habitat.Rd
@@ -0,0 +1,67 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/POST_FATE.validation_habitat.R
+\name{POST_FATE.validation.habitat}
+\alias{POST_FATE.validation.habitat}
+\alias{POST_FATE.validation_habitat}
+\title{Compute habitat performance and create a prediction plot of habitat
+for a whole map of a \code{FATE} simulation.}
+\usage{
+POST_FATE.validation_habitat(
+ name.simulation,
+ sim.version,
+ obs.path,
+ releves.PFG,
+ releves.sites,
+ hab.obs,
+ validation.mask
+)
+}
+\arguments{
+\item{name.simulation}{simulation folder name.}
+
+\item{sim.version}{name of the simulation we want to validate (it works with
+only one sim.version).}
+
+\item{obs.path}{the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parmeter the access path to this folder.}
+
+\item{releves.PFG}{name of file which contain the observed Braund-Blanquet abundance at each site
+and each PFG and strata (with extension).}
+
+\item{hab.obs}{name of the file which contain the extended studied map in the simulation (with extension).}
+
+\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation.}
+
+\item{releves.site}{name of the file which contain coordinates and a description of
+the habitat associated with the dominant species of each site in the studied map (with extension).}
+
+\item{habitat.FATE.map}{name of the file which contain the restricted studied map in the simulation (with extension).}
+}
+\value{
+Two folders are created in name.simulation folder :
+\describe{
+ \item{\file{VALIDATION/HABITAT/sim.version}}{containing the prepared CBNA data,
+ RF model, the performance analyzes (confusion matrix and TSS) for the training and
+testing parts of the RF model, the habitat performance file, the habitat prediction file with
+observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
+ \item{\file{DATA_OBS}}{maps of observed habitat and csv files of PFG and sites releves.}
+}
+}
+\description{
+This script compare habitat simulations and observations and
+create a map to visualize this comparison with all the the \code{FATE} and
+observed data.
+}
+\details{
+The observed habitat is derived from the cesbio map, the simulated habitat
+is derived from FATE simulated relative abundance, based on a random forest
+algorithm trained on CBNA data. To compare observations and simulations, the function
+compute confusion matrix between observation and prediction and then compute the TSS
+for each habitat h (number of prediction of habitat h/number of observation
+of habitat h + number of non-prediction of habitat h/number of non-observation
+of habitat h). The final metrics this script use is the mean of TSS per habitat over all
+habitats, weighted by the share of each habitat in the observed habitat distribution.
+}
+\author{
+Matthieu Combaud, Maxime Delprat
+}
diff --git a/man/do.habitat.validation.Rd b/man/do.habitat.validation.Rd
new file mode 100644
index 0000000..5163b04
--- /dev/null
+++ b/man/do.habitat.validation.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/UTILS.do_habitat_validation.R
+\name{do.habitat.validation}
+\alias{do.habitat.validation}
+\title{Compare observed and simulated habitat of a \code{FATE} simulation
+at the last simulation year.}
+\usage{
+do.habitat.validation(
+ output.path,
+ RF.model,
+ habitat.FATE.map,
+ validation.mask,
+ simulation.map,
+ predict.all.map,
+ sim.version,
+ name.simulation,
+ perStrata,
+ hab.obs
+)
+}
+\arguments{
+\item{output.path}{access path to the for the folder where output files
+will be created.}
+
+\item{RF.model}{random forest model trained on CBNA data (train.RF.habitat
+function)}
+
+\item{habitat.FATE.map}{a raster map of the observed habitat in the
+studied area.}
+
+\item{validation.mask}{a raster mask that specified which pixels need validation.}
+
+\item{simulation.map}{a raster map of the whole studied area use to check
+the consistency between simulation map and the observed habitat map.}
+
+\item{predict.all.map}{a TRUE/FALSE vector. If TRUE, the script will predict
+habitat for the whole map.}
+
+\item{sim.version}{name of the simulation we want to validate.}
+
+\item{name.simulation}{simulation folder name.}
+
+\item{perStrata}{a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+by strata in each pixel. If FALSE, PFG abundance is defined for all strata.}
+
+\item{hab.obs}{a raster map of the observed habitat in the
+extended studied area.}
+}
+\value{
+Habitat performance file
+If option selected, the function returns an habitat prediction file with
+observed and simulated habitat for each pixel of the whole map.
+}
+\description{
+To compare observations and simulations, this function compute
+confusion matrix between observation and prediction and then compute the TSS
+for each habitat.
+}
+\details{
+After several preliminary checks, the function is going to prepare the observations
+database by extracting the observed habitat from a raster map. Then, for each
+simulations (sim.version), the script take the evolution abundance for each PFG
+and all strata file and predict the habitat for the whole map (if option selected)
+thanks to the RF model.Finally, the function compute habitat performance based on
+TSS for each habitat.
+}
+\author{
+Matthieu Combaud & Maxime Delprat
+}
diff --git a/man/plot.predicted.habitat.Rd b/man/plot.predicted.habitat.Rd
new file mode 100644
index 0000000..381bac5
--- /dev/null
+++ b/man/plot.predicted.habitat.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/UTILS.plot_predicted_habitat.R
+\name{plot.predicted.habitat}
+\alias{plot.predicted.habitat}
+\title{Create a raster map of habitat prediction for a specific \code{FATE}
+simulation at the last simulation year.}
+\usage{
+\method{plot}{predicted.habitat}(predicted.habitat, col.df, simulation.map, output.path, sim.version)
+}
+\arguments{
+\item{predicted.habitat}{a csv file created by the do.habitat.validation function
+which contain, for each pixel of the studied map, the simulated and observed habitat.}
+
+\item{col.df}{a data frame with all the colors associated with the failure or
+success of each studied habitat prediction.}
+
+\item{simulation.map}{a raster map of the whole studied area.}
+
+\item{output.path}{access path to the for the folder where output files
+will be created.}
+
+\item{sim.version}{name of the simulation we want to validate.}
+}
+\value{
+a synthetic.prediction.png file which contain the final prediction plot.
+}
+\description{
+This script is designed to create a raster map of habitat prediction
+based on a habitat prediction file. For each pixel, the habitat failure or success value
+is associated to a color and then, the map is built.
+}
+\details{
+The function determine true/false prediction ('failure' if false, 'success' if true)
+and prepare a dataframe containing color and habitat code. Then, the script merge
+the prediction dataframe with the color and code habitat dataframe. Finally,
+the function draw a raster map and a plot of prediction habitat over it thanks
+to the data prepared before.
+}
+\author{
+Matthieu Combaud, Maxime Delprat
+}
diff --git a/man/train.RF.habitat.Rd b/man/train.RF.habitat.Rd
new file mode 100644
index 0000000..272141c
--- /dev/null
+++ b/man/train.RF.habitat.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/UTILS.train_RF_habitat.R
+\name{train.RF.habitat}
+\alias{train.RF.habitat}
+\title{Create a random forest algorithm trained on CBNA data.}
+\usage{
+train.RF.habitat(
+ releves.PFG,
+ releves.sites,
+ hab.obs,
+ external.training.mask = NULL,
+ studied.habitat,
+ RF.param,
+ output.path,
+ perStrata,
+ sim.version
+)
+}
+\arguments{
+\item{releves.PFG}{a data frame with Braund-Blanquet abundance at each site
+and each PFG and strata.}
+
+\item{releves.sites}{a data frame with coordinates and a description of
+the habitat associated with the dominant species of each site in the
+studied map.}
+
+\item{hab.obs}{a raster map of the observed habitat in the
+extended studied area.}
+
+\item{external.training.mask}{default \code{NULL}. (optional) Keep only
+releves data in a specific area.}
+
+\item{studied.habitat}{a vector that specifies habitats that we take
+into account for the validation.}
+
+\item{RF.param}{a list of 2 parameters for random forest model :
+share.training defines the size of the trainig part of the data base.
+ntree is the number of trees build by the algorithm, it allows to reduce
+the prediction error.}
+
+\item{output.path}{access path to the for the folder where output files
+will be created.}
+
+\item{perStrata}{a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+by strata in each site. If FALSE, PFG abundance is defined for all strata.}
+
+\item{sim.version}{name of the simulation we want to validate.}
+}
+\value{
+2 prepared CBNA releves files are created before the building of the random
+forest model in a habitat validation folder.
+5 more files are created at the end of the script to save the RF model and
+the performance analyzes (confusion matrix and TSS) for the training and
+testing parts.
+}
+\description{
+This script is designed to produce a random forest model
+trained on observed PFG abundance, sites releves and a map of observed
+habitat.
+}
+\details{
+This function transform PFG Braund-Blanquet abundance in relative abundance,
+get habitat information from the releves map, keep only relees on interesting
+habitat and then builds de random forest model. Finally, the function analyzes
+the model performance with computation of confusion matrix and TSS for
+the traning and testing sample.
+}
+\author{
+Matthieu Combaud, Maxime Delprat
+}
From 05d90b1bad1f25a05c71f14c05ffc9828fc7a654 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:54:29 +0100
Subject: [PATCH 020/176] Add files via upload
Addition of the documentation (.html files) of the new habitat validation functions
---
.../POST_FATE.validation.habitat.html | 235 +++++++++++++++++
docs/reference/do.habitat.validation.html | 236 ++++++++++++++++++
docs/reference/plot.predicted.habitat.html | 209 ++++++++++++++++
docs/reference/train.RF.habitat.html | 235 +++++++++++++++++
4 files changed, 915 insertions(+)
create mode 100644 docs/reference/POST_FATE.validation.habitat.html
create mode 100644 docs/reference/do.habitat.validation.html
create mode 100644 docs/reference/plot.predicted.habitat.html
create mode 100644 docs/reference/train.RF.habitat.html
diff --git a/docs/reference/POST_FATE.validation.habitat.html b/docs/reference/POST_FATE.validation.habitat.html
new file mode 100644
index 0000000..7be1b99
--- /dev/null
+++ b/docs/reference/POST_FATE.validation.habitat.html
@@ -0,0 +1,235 @@
+
+
Compute habitat performance and create a prediction plot of habitat
+for a whole map of a FATE simulation. — POST_FATE.validation.habitat • RFate
+
+
+
name of the simulation we want to validate (it works with
+only one sim.version).
+
obs.path
+
the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parmeter the access path to this folder.
+
releves.PFG
+
name of file which contain the observed Braund-Blanquet abundance at each site
+and each PFG and strata (with extension).
+
hab.obs
+
name of the file which contain the extended studied map in the simulation (with extension).
+
validation.mask
+
name of the file which contain a raster mask that specified which pixels need validation.
+
releves.site
+
name of the file which contain coordinates and a description of
+the habitat associated with the dominant species of each site in the studied map (with extension).
+
habitat.FATE.map
+
name of the file which contain the restricted studied map in the simulation (with extension).
+
+
+
Value
+
Two folders are created in name.simulation folder :
VALIDATION/HABITAT/sim.version
+
containing the prepared CBNA data,
+ RF model, the performance analyzes (confusion matrix and TSS) for the training and
+testing parts of the RF model, the habitat performance file, the habitat prediction file with
+observed and simulated habitat for each pixel of the whole map and the final prediction plot.
+
+
DATA_OBS
+
maps of observed habitat and csv files of PFG and sites releves.
+
+
+
+
+
Details
+
The observed habitat is derived from the cesbio map, the simulated habitat
+is derived from FATE simulated relative abundance, based on a random forest
+algorithm trained on CBNA data. To compare observations and simulations, the function
+compute confusion matrix between observation and prediction and then compute the TSS
+for each habitat h (number of prediction of habitat h/number of observation
+of habitat h + number of non-prediction of habitat h/number of non-observation
+of habitat h). The final metrics this script use is the mean of TSS per habitat over all
+habitats, weighted by the share of each habitat in the observed habitat distribution.
+
+
+
Author
+
Matthieu Combaud, Maxime Delprat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/do.habitat.validation.html b/docs/reference/do.habitat.validation.html
new file mode 100644
index 0000000..9dd1092
--- /dev/null
+++ b/docs/reference/do.habitat.validation.html
@@ -0,0 +1,236 @@
+
+Compare observed and simulated habitat of a FATE simulation
+at the last simulation year. — do.habitat.validation • RFate
+
+
+
To compare observations and simulations, this function compute
+confusion matrix between observation and prediction and then compute the TSS
+for each habitat.
access path to the for the folder where output files
+will be created.
+
RF.model
+
random forest model trained on CBNA data (train.RF.habitat
+function)
+
habitat.FATE.map
+
a raster map of the observed habitat in the
+studied area.
+
validation.mask
+
a raster mask that specified which pixels need validation.
+
simulation.map
+
a raster map of the whole studied area use to check
+the consistency between simulation map and the observed habitat map.
+
predict.all.map
+
a TRUE/FALSE vector. If TRUE, the script will predict
+habitat for the whole map.
+
sim.version
+
name of the simulation we want to validate.
+
name.simulation
+
simulation folder name.
+
perStrata
+
a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
+
hab.obs
+
a raster map of the observed habitat in the
+extended studied area.
+
+
+
Value
+
Habitat performance file
+If option selected, the function returns an habitat prediction file with
+observed and simulated habitat for each pixel of the whole map.
+
+
+
Details
+
After several preliminary checks, the function is going to prepare the observations
+database by extracting the observed habitat from a raster map. Then, for each
+simulations (sim.version), the script take the evolution abundance for each PFG
+and all strata file and predict the habitat for the whole map (if option selected)
+thanks to the RF model.Finally, the function compute habitat performance based on
+TSS for each habitat.
+
+
+
Author
+
Matthieu Combaud & Maxime Delprat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/plot.predicted.habitat.html b/docs/reference/plot.predicted.habitat.html
new file mode 100644
index 0000000..678075a
--- /dev/null
+++ b/docs/reference/plot.predicted.habitat.html
@@ -0,0 +1,209 @@
+
+Create a raster map of habitat prediction for a specific FATE
+simulation at the last simulation year. — plot.predicted.habitat • RFate
+
+
+
This script is designed to create a raster map of habitat prediction
+based on a habitat prediction file. For each pixel, the habitat failure or success value
+is associated to a color and then, the map is built.
+
+
+
+
# S3 method for predicted.habitat
+plot(predicted.habitat, col.df, simulation.map, output.path, sim.version)
+
+
+
+
Arguments
+
predicted.habitat
+
a csv file created by the do.habitat.validation function
+which contain, for each pixel of the studied map, the simulated and observed habitat.
+
col.df
+
a data frame with all the colors associated with the failure or
+success of each studied habitat prediction.
+
simulation.map
+
a raster map of the whole studied area.
+
output.path
+
access path to the for the folder where output files
+will be created.
+
sim.version
+
name of the simulation we want to validate.
+
+
+
Value
+
a synthetic.prediction.png file which contain the final prediction plot.
+
+
+
Details
+
The function determine true/false prediction ('failure' if false, 'success' if true)
+and prepare a dataframe containing color and habitat code. Then, the script merge
+the prediction dataframe with the color and code habitat dataframe. Finally,
+the function draw a raster map and a plot of prediction habitat over it thanks
+to the data prepared before.
+
+
+
Author
+
Matthieu Combaud, Maxime Delprat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/train.RF.habitat.html b/docs/reference/train.RF.habitat.html
new file mode 100644
index 0000000..9c99be9
--- /dev/null
+++ b/docs/reference/train.RF.habitat.html
@@ -0,0 +1,235 @@
+
+Create a random forest algorithm trained on CBNA data. — train.RF.habitat • RFate
+
+
+
a data frame with Braund-Blanquet abundance at each site
+and each PFG and strata.
+
releves.sites
+
a data frame with coordinates and a description of
+the habitat associated with the dominant species of each site in the
+studied map.
+
hab.obs
+
a raster map of the observed habitat in the
+extended studied area.
+
external.training.mask
+
default NULL. (optional) Keep only
+releves data in a specific area.
+
studied.habitat
+
a vector that specifies habitats that we take
+into account for the validation.
+
RF.param
+
a list of 2 parameters for random forest model :
+share.training defines the size of the trainig part of the data base.
+ntree is the number of trees build by the algorithm, it allows to reduce
+the prediction error.
+
output.path
+
access path to the for the folder where output files
+will be created.
+
perStrata
+
a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+by strata in each site. If FALSE, PFG abundance is defined for all strata.
+
sim.version
+
name of the simulation we want to validate.
+
+
+
Value
+
2 prepared CBNA releves files are created before the building of the random
+forest model in a habitat validation folder.
+5 more files are created at the end of the script to save the RF model and
+the performance analyzes (confusion matrix and TSS) for the training and
+testing parts.
+
+
+
Details
+
This function transform PFG Braund-Blanquet abundance in relative abundance,
+get habitat information from the releves map, keep only relees on interesting
+habitat and then builds de random forest model. Finally, the function analyzes
+the model performance with computation of confusion matrix and TSS for
+the traning and testing sample.
+
+
+
Author
+
Matthieu Combaud, Maxime Delprat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From d3d4a8b44ce21cea4d2a41e8c2ccd125f54d77b1 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 22 Feb 2022 16:58:58 +0100
Subject: [PATCH 021/176] Update _pkgdown.yml
Addition of the new habitat validation functions in the index of the website
---
_pkgdown.yml | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/_pkgdown.yml b/_pkgdown.yml
index b104486..c9d2eda 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -129,6 +129,7 @@ reference:
- "POST_FATE.binaryMaps"
- "POST_FATE.graphic_mapPFGvsHS"
- "POST_FATE.graphic_mapPFG"
+ - "POST_FATE.validation_habitat"
- title: Save FATE simulation
contents:
- "SAVE_FATE.step1_PFG"
@@ -144,3 +145,6 @@ reference:
- ".scaleMaps"
- ".getCutoff"
- ".unzip_ALL"
+ - "do_habitat_validation"
+ - "plot_predicted_habitat"
+ - "train_RF_habitat"
From 910c3458a6e8fbe8572d0c0b5fca5801309e568e Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 23 Feb 2022 09:16:31 +0100
Subject: [PATCH 022/176] Add files via upload
Correction of mistakes in header
---
docs/reference/POST_FATE.validation.habitat.html | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/docs/reference/POST_FATE.validation.habitat.html b/docs/reference/POST_FATE.validation.habitat.html
index 7be1b99..5b56626 100644
--- a/docs/reference/POST_FATE.validation.habitat.html
+++ b/docs/reference/POST_FATE.validation.habitat.html
@@ -166,19 +166,17 @@
Arguments
only one sim.version).
obs.path
the function needs observed data, please create a folder for them in your
-simulation folder and then indicate in this parmeter the access path to this folder.
+simulation folder and then indicate in this parameter the access path to this new folder.
releves.PFG
name of file which contain the observed Braund-Blanquet abundance at each site
and each PFG and strata (with extension).
hab.obs
name of the file which contain the extended studied map in the simulation (with extension).
validation.mask
-
name of the file which contain a raster mask that specified which pixels need validation.
+
name of the file which contain a raster mask that specified which pixels need validation (with extension).
releves.site
name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).
-
habitat.FATE.map
-
name of the file which contain the restricted studied map in the simulation (with extension).
name of the file which contain the extended studied map in the simulation (with extension).
validation.mask
name of the file which contain a raster mask that specified which pixels need validation (with extension).
-
releves.site
+
releves.sites
name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).
From 4375b5934a66c659feaffd4238c705381ece7d87 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 23 Feb 2022 09:19:01 +0100
Subject: [PATCH 024/176] Add files via upload
Correction of mistakes
---
man/POST_FATE.validation.habitat.Rd | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/man/POST_FATE.validation.habitat.Rd b/man/POST_FATE.validation.habitat.Rd
index 572efbf..995d80d 100644
--- a/man/POST_FATE.validation.habitat.Rd
+++ b/man/POST_FATE.validation.habitat.Rd
@@ -23,19 +23,17 @@ POST_FATE.validation_habitat(
only one sim.version).}
\item{obs.path}{the function needs observed data, please create a folder for them in your
-simulation folder and then indicate in this parmeter the access path to this folder.}
+simulation folder and then indicate in this parameter the access path to this new folder.}
\item{releves.PFG}{name of file which contain the observed Braund-Blanquet abundance at each site
and each PFG and strata (with extension).}
\item{hab.obs}{name of the file which contain the extended studied map in the simulation (with extension).}
-\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation.}
+\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation (with extension).}
\item{releves.site}{name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).}
-
-\item{habitat.FATE.map}{name of the file which contain the restricted studied map in the simulation (with extension).}
}
\value{
Two folders are created in name.simulation folder :
From f5607bf8242163425dc5af568c1e8876bc75c6d0 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 23 Feb 2022 09:20:02 +0100
Subject: [PATCH 025/176] Update POST_FATE.validation.habitat.Rd
---
man/POST_FATE.validation.habitat.Rd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/man/POST_FATE.validation.habitat.Rd b/man/POST_FATE.validation.habitat.Rd
index 995d80d..6c82615 100644
--- a/man/POST_FATE.validation.habitat.Rd
+++ b/man/POST_FATE.validation.habitat.Rd
@@ -32,7 +32,7 @@ and each PFG and strata (with extension).}
\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation (with extension).}
-\item{releves.site}{name of the file which contain coordinates and a description of
+\item{releves.sites}{name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).}
}
\value{
From 4d75823065ab2858b095b1630e16a1c941c3d2aa Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 23 Feb 2022 10:14:22 +0100
Subject: [PATCH 026/176] Add files via upload
Update of the function : addition of an habitat considered by the random forest model to generalized the function to any kind of environment
---
R/POST_FATE.validation_habitat.R | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
index 4e99d75..b0524e1 100644
--- a/R/POST_FATE.validation_habitat.R
+++ b/R/POST_FATE.validation_habitat.R
@@ -15,14 +15,13 @@
##' @param sim.version name of the simulation we want to validate (it works with
##' only one sim.version).
##' @param obs.path the function needs observed data, please create a folder for them in your
-##' simulation folder and then indicate in this parmeter the access path to this folder.
+##' simulation folder and then indicate in this parameter the access path to this new folder.
##' @param releves.PFG name of file which contain the observed Braund-Blanquet abundance at each site
##' and each PFG and strata (with extension).
##' @param releves.site name of the file which contain coordinates and a description of
##' the habitat associated with the dominant species of each site in the studied map (with extension).
##' @param hab.obs name of the file which contain the extended studied map in the simulation (with extension).
-##' @param habitat.FATE.map name of the file which contain the restricted studied map in the simulation (with extension).
-##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation.
+##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation (with extension).
##'
##' @details
##'
@@ -113,18 +112,18 @@ POST_FATE.validation_habitat = function(name.simulation
releves.sites<-st_read(paste0(obs.path, releves.sites))
hab.obs<-raster(paste0(obs.path, hab.obs))
# Habitat mask at FATE simu resolution
- hab.obs.modif<-projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
- habitat.FATE.map<-crop(hab.obs.modif, simulation.map)
+ hab.obs.modif <- projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
+ habitat.FATE.map <- crop(hab.obs.modif, simulation.map)
validation.mask<-raster(paste0(obs.path, validation.mask))
# Provide a color df
col.df<-data.frame(
- habitat=c("agricultural.grassland","coniferous.forest","deciduous.forest","natural.grassland","woody.heatland"),
- failure=c("yellow","blueviolet","aquamarine","chartreuse1","lightsalmon"),
- success=c("darkorange1","blue4","aquamarine3","chartreuse3","firebrick4"))
+ habitat=c("agricultural.grassland","coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","crops"),
+ failure=c("yellow","blueviolet","aquamarine","chartreuse1","lightsalmon","slategray3"),
+ success=c("darkorange1","blue4","aquamarine3","chartreuse3","firebrick4","slategrey"))
# Other
- studied.habitat=c("coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","agricultural.grassland")
+ studied.habitat=c("coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","agricultural.grassland","crops")
RF.param = list(
share.training=0.7,
ntree=500)
From 95b185a562f42e3c29f83aef848db041373d2b1d Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 23 Feb 2022 11:58:30 +0100
Subject: [PATCH 027/176] Add files via upload
Small adjustment of the header
---
R/POST_FATE.validation_habitat.R | 1 -
1 file changed, 1 deletion(-)
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
index b0524e1..263f5be 100644
--- a/R/POST_FATE.validation_habitat.R
+++ b/R/POST_FATE.validation_habitat.R
@@ -42,7 +42,6 @@
##' RF model, the performance analyzes (confusion matrix and TSS) for the training and
##' testing parts of the RF model, the habitat performance file, the habitat prediction file with
##' observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
-##' \item{\file{DATA_OBS}}{maps of observed habitat and csv files of PFG and sites releves.}
##' }
##'
### END OF HEADER ##############################################################
From a68ff792e9876df7e681935e37d11726dbf06375 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 23 Feb 2022 11:59:10 +0100
Subject: [PATCH 028/176] Add files via upload
Small correction
---
man/POST_FATE.validation.habitat.Rd | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/man/POST_FATE.validation.habitat.Rd b/man/POST_FATE.validation.habitat.Rd
index 6c82615..6f4905d 100644
--- a/man/POST_FATE.validation.habitat.Rd
+++ b/man/POST_FATE.validation.habitat.Rd
@@ -32,7 +32,7 @@ and each PFG and strata (with extension).}
\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation (with extension).}
-\item{releves.sites}{name of the file which contain coordinates and a description of
+\item{releves.site}{name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).}
}
\value{
@@ -42,7 +42,6 @@ Two folders are created in name.simulation folder :
RF model, the performance analyzes (confusion matrix and TSS) for the training and
testing parts of the RF model, the habitat performance file, the habitat prediction file with
observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
- \item{\file{DATA_OBS}}{maps of observed habitat and csv files of PFG and sites releves.}
}
}
\description{
From de6cb9ff714ad6b8e8099e9b38808dda4af74d59 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 23 Feb 2022 11:59:58 +0100
Subject: [PATCH 029/176] Add files via upload
Small correction
---
docs/reference/POST_FATE.validation.habitat.html | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/docs/reference/POST_FATE.validation.habitat.html b/docs/reference/POST_FATE.validation.habitat.html
index 3cbbcab..c6cd0f1 100644
--- a/docs/reference/POST_FATE.validation.habitat.html
+++ b/docs/reference/POST_FATE.validation.habitat.html
@@ -174,7 +174,7 @@
Arguments
name of the file which contain the extended studied map in the simulation (with extension).
validation.mask
name of the file which contain a raster mask that specified which pixels need validation (with extension).
-
releves.sites
+
releves.site
name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).
@@ -186,9 +186,6 @@
Value
testing parts of the RF model, the habitat performance file, the habitat prediction file with
observed and simulated habitat for each pixel of the whole map and the final prediction plot.
-
DATA_OBS
-
maps of observed habitat and csv files of PFG and sites releves.
Compute habitat performance and create a prediction plot of habitat
This script compare habitat simulations and observations and
-create a map to visualize this comparison with all the the FATE and
+create a map to visualize this comparison with all the FATE and
observed data.
@@ -174,13 +174,13 @@
Arguments
name of the file which contain the extended studied map in the simulation (with extension).
validation.mask
name of the file which contain a raster mask that specified which pixels need validation (with extension).
-
releves.site
+
releves.sites
name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).
Value
-
Two folders are created in name.simulation folder :
VALIDATION/HABITAT/sim.version
+
One folder is created in name.simulation folder :
VALIDATION/HABITAT/sim.version
containing the prepared CBNA data,
RF model, the performance analyzes (confusion matrix and TSS) for the training and
testing parts of the RF model, the habitat performance file, the habitat prediction file with
From a54bf0a5084f4aaf51ce5db89abdc0dcb85b8143 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Mon, 28 Feb 2022 10:39:52 +0100
Subject: [PATCH 031/176] Update UTILS.train_RF_habitat.R
Correction of mistakes
---
R/UTILS.train_RF_habitat.R | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index 334e079..d7f2393 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -34,8 +34,8 @@
##' @details
##'
##' This function transform PFG Braund-Blanquet abundance in relative abundance,
-##' get habitat information from the releves map, keep only relees on interesting
-##' habitat and then builds de random forest model. Finally, the function analyzes
+##' get habitat information from the relevés map, keep only relevés on interesting
+##' habitat and then builds the random forest model. Finally, the function analyzes
##' the model performance with computation of confusion matrix and TSS for
##' the traning and testing sample.
##'
From 2f1681c9d40cc76e147fc113035c9f6d6902e8f8 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Mon, 28 Feb 2022 10:40:57 +0100
Subject: [PATCH 032/176] Update train.RF.habitat.Rd
correction of mistakes
---
man/train.RF.habitat.Rd | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/man/train.RF.habitat.Rd b/man/train.RF.habitat.Rd
index 272141c..21560e5 100644
--- a/man/train.RF.habitat.Rd
+++ b/man/train.RF.habitat.Rd
@@ -60,8 +60,8 @@ habitat.
}
\details{
This function transform PFG Braund-Blanquet abundance in relative abundance,
-get habitat information from the releves map, keep only relees on interesting
-habitat and then builds de random forest model. Finally, the function analyzes
+get habitat information from the relevés map, keep only relevés on interesting
+habitat and then builds the random forest model. Finally, the function analyzes
the model performance with computation of confusion matrix and TSS for
the traning and testing sample.
}
From 2604496280b78728957771a11d7ee7c2b4fd2b8e Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Mon, 28 Feb 2022 10:42:01 +0100
Subject: [PATCH 033/176] Update train.RF.habitat.html
correction of mistakes
---
docs/reference/train.RF.habitat.html | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/reference/train.RF.habitat.html b/docs/reference/train.RF.habitat.html
index 9c99be9..009f4ec 100644
--- a/docs/reference/train.RF.habitat.html
+++ b/docs/reference/train.RF.habitat.html
@@ -199,8 +199,8 @@
Value
Details
This function transform PFG Braund-Blanquet abundance in relative abundance,
-get habitat information from the releves map, keep only relees on interesting
-habitat and then builds de random forest model. Finally, the function analyzes
+get habitat information from the relevés map, keep only relevés on interesting
+habitat and then builds the random forest model. Finally, the function analyzes
the model performance with computation of confusion matrix and TSS for
the traning and testing sample.
From 463a0372058ce6b05bdec827269fbef310c155e7 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 1 Mar 2022 16:24:45 +0100
Subject: [PATCH 034/176] Add files via upload
Files corrected
---
R/POST_FATE.validation_habitat.R | 87 +++++++++++++-------------------
R/PRE_FATE.skeletonDirectory.R | 10 ++++
R/UTILS.do_habitat_validation.R | 25 +++++++--
R/UTILS.plot_predicted_habitat.R | 13 ++++-
R/UTILS.train_RF_habitat.R | 35 +++++++++----
5 files changed, 103 insertions(+), 67 deletions(-)
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
index 263f5be..b13062f 100644
--- a/R/POST_FATE.validation_habitat.R
+++ b/R/POST_FATE.validation_habitat.R
@@ -8,7 +8,7 @@
##' @author Matthieu Combaud, Maxime Delprat
##'
##' @description This script compare habitat simulations and observations and
-##' create a map to visualize this comparison with all the the \code{FATE} and
+##' create a map to visualize this comparison with all the \code{FATE} and
##' observed data.
##'
##' @param name.simulation simulation folder name.
@@ -22,6 +22,10 @@
##' the habitat associated with the dominant species of each site in the studied map (with extension).
##' @param hab.obs name of the file which contain the extended studied map in the simulation (with extension).
##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation (with extension).
+##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
+##' take into account of all habitats in the hab.obs map. Otherwise, please specify
+##' in a vector the habitats that we take into account for the validation.
+##' @param year year of simulation for validation.
##'
##' @details
##'
@@ -44,6 +48,12 @@
##' observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
##' }
##'
+##' @export
+##'
+##' @importFrom raster raster projectRaster res crs
+##' @importFrom sf st_read
+##' @importFrom utils read.csv
+##'
### END OF HEADER ##############################################################
@@ -53,57 +63,25 @@ POST_FATE.validation_habitat = function(name.simulation
, releves.PFG
, releves.sites
, hab.obs
- , validation.mask)
+ , validation.mask
+ , studied.habitat = NULL
+ , year)
{
- ## LIBRARIES
- require(data.table)
- require(raster)
- require(RFate)
- require(reshape2)
- require(stringr)
- require(foreign)
- require(stringr)
- require(dplyr)
- require(sp)
- options("rgdal_show_exportToProj4_warnings"="none")
- require(rgdal)
- require(randomForest)
- require(ggplot2)
- require(ggradar)
- require(tidyverse)
- require(ggpubr)
- require(gridExtra)
- require(vegan)
- require(parallel)
- require(scales)
- require(class)
- require(caret)
- require(sampling)
- require(tidyselect)
- require(grid)
- require(gtable)
- require(scales)
- require(cowplot)
- require(sf)
- require(visNetwork)
- require(foreach)
- require(doParallel)
- require(prettyR)
- require(vcd)
-
## GLOBAL PARAMETERS
- # Create directories
- dir.create(paste0(name.simulation, "/VALIDATION"), recursive = TRUE)
- dir.create(paste0(name.simulation, "/VALIDATION/HABITAT"), recursive = TRUE)
- dir.create(paste0(name.simulation, "/VALIDATION/HABITAT/", sim.version), recursive = TRUE)
+ dir.create(file.path(name.simulation, "VALIDATION", "HABITAT", sim.version), showWarnings = FALSE)
# General
output.path = paste0(name.simulation, "/VALIDATION")
+ year = year
# Useful elements to extract from the simulation
- simulation.map=raster(paste0(name.simulation,"/DATA/MASK/MASK_Champsaur.tif"))
+ name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
+ flag = "MASK",
+ flag.split = "^--.*--$",
+ is.num = FALSE)
+ simulation.map = raster(paste0(name))
# For habitat validation
# CBNA releves data habitat map
@@ -115,14 +93,14 @@ POST_FATE.validation_habitat = function(name.simulation
habitat.FATE.map <- crop(hab.obs.modif, simulation.map)
validation.mask<-raster(paste0(obs.path, validation.mask))
- # Provide a color df
- col.df<-data.frame(
- habitat=c("agricultural.grassland","coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","crops"),
- failure=c("yellow","blueviolet","aquamarine","chartreuse1","lightsalmon","slategray3"),
- success=c("darkorange1","blue4","aquamarine3","chartreuse3","firebrick4","slategrey"))
-
# Other
- studied.habitat=c("coniferous.forest","deciduous.forest","natural.grassland","woody.heatland","agricultural.grassland","crops")
+ if(is.null(studied.habitat)){
+ studied.habitat = studied.habitat
+ } else if(is.character(studied.habitat)){
+ studied.habitat = studied.habitat
+ } else{
+ stop("studied.habitat is not a vector of character")
+ }
RF.param = list(
share.training=0.7,
ntree=500)
@@ -151,10 +129,17 @@ POST_FATE.validation_habitat = function(name.simulation
, sim.version = sim.version
, name.simulation = name.simulation
, perStrata = F
- , hab.obs = hab.obs)
+ , hab.obs = hab.obs
+ , year = year)
## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
+ # Provide a color df
+ col.df<-data.frame(
+ habitat = RF.model$classes,
+ failure = terrain.colors(length(RF.model$classes), alpha = 0.5),
+ success = terrain.colors(length(RF.model$classes), alpha = 1))
+
prediction.map <- plot.predicted.habitat(predicted.habitat = habitats.results
, col.df = col.df
, simulation.map = simulation.map
diff --git a/R/PRE_FATE.skeletonDirectory.R b/R/PRE_FATE.skeletonDirectory.R
index 6201040..919f745 100644
--- a/R/PRE_FATE.skeletonDirectory.R
+++ b/R/PRE_FATE.skeletonDirectory.R
@@ -70,6 +70,13 @@
##' \code{\link{PRE_FATE.params_simulParameters}})}
##' \item{\code{RESULTS}}{this folder will collect all the results produced by the
##' software with a folder for each simulation}
+##' \item{\code{VALIDATION}}{this folder will collect all the validation files produced
+##' by POST_FATE validation functions
+##' \describe{
+##' \item{\code{HABITAT}}{this folder will collect all the validation files produces
+##' by the function POST_FATE.validation.habitat}
+##' }
+##' }
##' }
##'
##' \strong{NB :} \cr
@@ -136,6 +143,9 @@ PRE_FATE.skeletonDirectory = function(name.simulation = "FATE_simulation")
dir.create(file.path(name.simulation, "PARAM_SIMUL"), showWarnings = FALSE)
## the RESULTS dir
dir.create(file.path(name.simulation, "RESULTS"), showWarnings = FALSE)
+ ## the VALIDATION dir
+ dir.create(file.path(name.simulation, "VALIDATION"), showWarnings = FALSE)
+ dir.create(file.path(name.simulation, "VALIDATION", "HABITAT"), showWarnings = FALSE)
message("\n Your directory tree for your FATE simulation ("
, name.simulation, ") is ready!\n")
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index c0dae0e..2296d4e 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -5,7 +5,7 @@
##'
##' @name do.habitat.validation
##'
-##' @author Matthieu Combaud, Maxime Delprat
+##' @author Matthieu Combaud & Maxime Delprat
##'
##' @description To compare observations and simulations, this function compute
##' confusion matrix between observation and prediction and then compute the TSS
@@ -22,12 +22,13 @@
##' the consistency between simulation map and the observed habitat map.
##' @param predict.all.map a TRUE/FALSE vector. If TRUE, the script will predict
##' habitat for the whole map.
-##' @param sim.version name of the simulation we want to validate.
+##' @param sim.version name of the simulation to validate.
##' @param name.simulation simulation folder name.
##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
##' by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
##' @param hab.obs a raster map of the observed habitat in the
##' extended studied area.
+##' @param year year of simulation for validation.
##'
##' @details
##'
@@ -44,10 +45,24 @@
##' If option selected, the function returns an habitat prediction file with
##' observed and simulated habitat for each pixel of the whole map.
##'
+##' @export
+##'
+##' @importFrom raster compareCRS res projectRaster extent crop origin compareRaster
+##' getValues aggregate predict
+##' @importFrom stringr str_sub
+##' @importFrom dplyr select filter rename group_by %>% mutate rename
+##' @importFrom foreach foreach %dopar%
+##' @importFrom forcats fct_expand
+##' @importFrom reshape2 dcast
+##' @importFrom randomForest
+##' @importFrom vcd
+##' @importFrom caret confusionMatrix
+##' @importFrom utils write.csv
+##'
### END OF HEADER ##############################################################
-do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs) {
+do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs, year) {
#notes
# we prepare the relevé data in this function, but in fact we could provide them directly if we adjust the code
@@ -152,7 +167,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,-c(3:44)]
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
colnames(simu_PFG) = c("PFG", "pixel", "abs")
#aggregate per strata group with the correspondance provided in input
@@ -185,7 +200,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
simu_PFG$strata<-fct_expand(simu_PFG$strata,list.strata)
#cast
- simu_PFG<-dcast(simu_PFG,pixel~PFG*strata,value.var=c("relative.abundance"),fill=0,drop=F)
+ simu_PFG<-reshape2::dcast(simu_PFG,pixel~PFG*strata,value.var=c("relative.abundance"),fill=0,drop=F)
#merge PFG info and habitat + transform habitat into factor
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index 80e9be2..0504edb 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -31,6 +31,17 @@
##' @return
##'
##' a synthetic.prediction.png file which contain the final prediction plot.
+##'
+##' @export
+##'
+##' @importFrom dplyr select all_of
+##' @importFrom data.table melt rename
+##' @importFrom utils write.csv
+##' @importFrom raster raster crs extent res ratify writeRaster
+##' @importFrom stats complete.cases
+##' @importFrom ggplot2 ggplot geom_raster coord_equal scale_fill_manual
+##' ggtitle guides theme ggsave
+##'
### END OF HEADER ##############################################################
@@ -81,7 +92,7 @@ plot.predicted.habitat<-function(predicted.habitat
#merge the prediction df with the df containing color and habitat code
predicted.habitat<-merge(predicted.habitat,habitat.code.df,by.x=c("modal.predicted.habitat","prediction.code"),by.y=c("habitat","prediction.code"))
write.csv(x = predicted.habitat, file = paste0(output.path, "/HABITAT/", sim.version, "/hab.pred.csv"))
-
+
#plot
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index d7f2393..ba833e0 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -19,8 +19,9 @@
##' extended studied area.
##' @param external.training.mask default \code{NULL}. (optional) Keep only
##' releves data in a specific area.
-##' @param studied.habitat a vector that specifies habitats that we take
-##' into account for the validation.
+##' @param studied.habitat If \code{NULL}, the function will
+##' take into account of all habitats in the hab.obs map. Otherwise, please specify
+##' in a vector the habitats that we take into account for the validation.
##' @param RF.param a list of 2 parameters for random forest model :
##' share.training defines the size of the trainig part of the data base.
##' ntree is the number of trees build by the algorithm, it allows to reduce
@@ -34,8 +35,8 @@
##' @details
##'
##' This function transform PFG Braund-Blanquet abundance in relative abundance,
-##' get habitat information from the relevés map, keep only relevés on interesting
-##' habitat and then builds the random forest model. Finally, the function analyzes
+##' get habitat information from the releves map, keep only relees on interesting
+##' habitat and then builds de random forest model. Finally, the function analyzes
##' the model performance with computation of confusion matrix and TSS for
##' the traning and testing sample.
##'
@@ -47,13 +48,24 @@
##' the performance analyzes (confusion matrix and TSS) for the training and
##' testing parts.
##'
+##' @export
+##'
+##' @importFrom dplyr filter %>% group_by select
+##' @importFrom data.table dcast setDT
+##' @importFrom raster extract aggregate compareCRS
+##' @importFrom sf st_transform st_crop st_write
+##' @importFrom randomForest randomForest tuneRF
+##' @importFrom caret confusionMatrix
+##' @importFrom tidyverse write_rds
+##' @importFrom utils read.csv
+##'
### END OF HEADER ##############################################################
train.RF.habitat<-function(releves.PFG
, releves.sites
, hab.obs
- , external.training.mask=NULL
+ , external.training.mask = NULL
, studied.habitat
, RF.param
, output.path
@@ -127,9 +139,12 @@ train.RF.habitat<-function(releves.PFG
# 4. Keep only releve on interesting habitat
###################################################"
- aggregated.releves.PFG<-filter(aggregated.releves.PFG,is.element(habitat,studied.habitat)) #filter non interesting habitat + NA
-
- print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
+ if (!is.null(studied.habitat)){
+ aggregated.releves.PFG<-filter(aggregated.releves.PFG,is.element(habitat,studied.habitat)) #filter non interesting habitat + NA
+ print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
+ } else{
+ print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
+ }
# 5. Save data
#####################
@@ -161,7 +176,7 @@ train.RF.habitat<-function(releves.PFG
x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
y=releves.training$habitat,
strata=releves.training$habitat,
- sampsize=min(table(releves.training$habitat)),
+ sampsize=nrow(releves.training),
ntreeTry=RF.param$ntree,
stepFactor=2, improve=0.05,doBest=FALSE,plot=F,trace=F
)
@@ -177,7 +192,7 @@ train.RF.habitat<-function(releves.PFG
xtest=select(releves.testing,-c(code.habitat,site,habitat,geometry)),
ytest=releves.testing$habitat,
strata=releves.training$habitat,
- min(table(releves.training$habitat)),
+ sampsize=nrow(releves.training),
ntree=RF.param$ntree,
mtry=mtry,
norm.votes=TRUE,
From 50fc4de08e59dc77727b3cd7ba17939bef3a8220 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 1 Mar 2022 16:25:41 +0100
Subject: [PATCH 035/176] Add files via upload
Files corrected
---
man/POST_FATE.validation.habitat.Rd | 12 ++++++++++--
man/PRE_FATE.skeletonDirectory.Rd | 7 +++++++
man/do.habitat.validation.Rd | 9 ++++++---
man/train.RF.habitat.Rd | 11 ++++++-----
4 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/man/POST_FATE.validation.habitat.Rd b/man/POST_FATE.validation.habitat.Rd
index 6f4905d..81d101a 100644
--- a/man/POST_FATE.validation.habitat.Rd
+++ b/man/POST_FATE.validation.habitat.Rd
@@ -13,7 +13,9 @@ POST_FATE.validation_habitat(
releves.PFG,
releves.sites,
hab.obs,
- validation.mask
+ validation.mask,
+ studied.habitat = NULL,
+ year
)
}
\arguments{
@@ -32,6 +34,12 @@ and each PFG and strata (with extension).}
\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation (with extension).}
+\item{studied.habitat}{default \code{NULL}. If \code{NULL}, the function will
+take into account of all habitats in the hab.obs map. Otherwise, please specify
+in a vector the habitats that we take into account for the validation.}
+
+\item{year}{year of simulation for validation.}
+
\item{releves.site}{name of the file which contain coordinates and a description of
the habitat associated with the dominant species of each site in the studied map (with extension).}
}
@@ -46,7 +54,7 @@ observed and simulated habitat for each pixel of the whole map and the final pre
}
\description{
This script compare habitat simulations and observations and
-create a map to visualize this comparison with all the the \code{FATE} and
+create a map to visualize this comparison with all the \code{FATE} and
observed data.
}
\details{
diff --git a/man/PRE_FATE.skeletonDirectory.Rd b/man/PRE_FATE.skeletonDirectory.Rd
index 60f61f9..7bc8802 100644
--- a/man/PRE_FATE.skeletonDirectory.Rd
+++ b/man/PRE_FATE.skeletonDirectory.Rd
@@ -76,6 +76,13 @@ The tree structure is detailed below :
\code{\link{PRE_FATE.params_simulParameters}})}
\item{\code{RESULTS}}{this folder will collect all the results produced by the
software with a folder for each simulation}
+ \item{\code{VALIDATION}}{this folder will collect all the validation files produced
+ by POST_FATE validation functions
+ \describe{
+ \item{\code{HABITAT}}{this folder will collect all the validation files produces
+ by the function POST_FATE.validation.habitat}
+ }
+ }
}
\strong{NB :} \cr
diff --git a/man/do.habitat.validation.Rd b/man/do.habitat.validation.Rd
index 5163b04..cccd6f2 100644
--- a/man/do.habitat.validation.Rd
+++ b/man/do.habitat.validation.Rd
@@ -5,7 +5,7 @@
\title{Compare observed and simulated habitat of a \code{FATE} simulation
at the last simulation year.}
\usage{
-do.habitat.validation(
+\method{do}{habitat.validation}(
output.path,
RF.model,
habitat.FATE.map,
@@ -15,7 +15,8 @@ do.habitat.validation(
sim.version,
name.simulation,
perStrata,
- hab.obs
+ hab.obs,
+ year
)
}
\arguments{
@@ -36,7 +37,7 @@ the consistency between simulation map and the observed habitat map.}
\item{predict.all.map}{a TRUE/FALSE vector. If TRUE, the script will predict
habitat for the whole map.}
-\item{sim.version}{name of the simulation we want to validate.}
+\item{sim.version}{name of the simulation to validate.}
\item{name.simulation}{simulation folder name.}
@@ -45,6 +46,8 @@ by strata in each pixel. If FALSE, PFG abundance is defined for all strata.}
\item{hab.obs}{a raster map of the observed habitat in the
extended studied area.}
+
+\item{year}{year of simulation for validation.}
}
\value{
Habitat performance file
diff --git a/man/train.RF.habitat.Rd b/man/train.RF.habitat.Rd
index 21560e5..1e7cd04 100644
--- a/man/train.RF.habitat.Rd
+++ b/man/train.RF.habitat.Rd
@@ -4,7 +4,7 @@
\alias{train.RF.habitat}
\title{Create a random forest algorithm trained on CBNA data.}
\usage{
-train.RF.habitat(
+\method{train}{RF.habitat}(
releves.PFG,
releves.sites,
hab.obs,
@@ -30,8 +30,9 @@ extended studied area.}
\item{external.training.mask}{default \code{NULL}. (optional) Keep only
releves data in a specific area.}
-\item{studied.habitat}{a vector that specifies habitats that we take
-into account for the validation.}
+\item{studied.habitat}{If \code{NULL}, the function will
+take into account of all habitats in the hab.obs map. Otherwise, please specify
+in a vector the habitats that we take into account for the validation.}
\item{RF.param}{a list of 2 parameters for random forest model :
share.training defines the size of the trainig part of the data base.
@@ -60,8 +61,8 @@ habitat.
}
\details{
This function transform PFG Braund-Blanquet abundance in relative abundance,
-get habitat information from the relevés map, keep only relevés on interesting
-habitat and then builds the random forest model. Finally, the function analyzes
+get habitat information from the releves map, keep only relees on interesting
+habitat and then builds de random forest model. Finally, the function analyzes
the model performance with computation of confusion matrix and TSS for
the traning and testing sample.
}
From d4dd212c4ae96bc5efad1074414beea9f84b1e72 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 1 Mar 2022 16:26:51 +0100
Subject: [PATCH 036/176] Add files via upload
Files corrected
---
.../POST_FATE.validation.habitat.html | 16 +-
.../reference/PRE_FATE.skeletonDirectory.html | 318 ++++++++----------
docs/reference/do.habitat.validation.html | 10 +-
docs/reference/train.RF.habitat.html | 12 +-
4 files changed, 169 insertions(+), 187 deletions(-)
diff --git a/docs/reference/POST_FATE.validation.habitat.html b/docs/reference/POST_FATE.validation.habitat.html
index 1a0d761..b03c91a 100644
--- a/docs/reference/POST_FATE.validation.habitat.html
+++ b/docs/reference/POST_FATE.validation.habitat.html
@@ -2,7 +2,7 @@
Compute habitat performance and create a prediction plot of habitat
for a whole map of a FATE simulation. — POST_FATE.validation.habitat • RFate
-
-
-
-
-
-
-Create the skeleton folder for a FATE simulation — PRE_FATE.skeletonDirectory • RFate
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Create the skeleton folder for a FATE simulation — PRE_FATE.skeletonDirectory • RFate
-
-
-
-
+
+
-
which is a file containing the names of parameter files,
+
which may themselves contain
parameters (e.g. succession, dispersal files...)
or other file names (e.g. disturbance or environmental change
masks).
-
-
-
The user could give names of files stored everywhere on a machine, and does
-not have to put them all in one same place.
+
The user could give names of files stored everywhere on a machine, and does
+not have to put them all in one same place.
But as this is more practical, this function proposes a way to
organize all those files or parameter files that will or could be
-used by a FATE simulation.
+used by a FATE simulation.
The tree structure is detailed below :
-
-
DATA
this folder will contain all the data or parameters
- that are needed by the model
-
-## Create a skeleton folder with the default name ('FATE_simulation') ------------------------
-PRE_FATE.skeletonDirectory()
+
+
Examples
+
+## Create a skeleton folder with the default name ('FATE_simulation') ------------------------
+PRE_FATE.skeletonDirectory()
-## Create a skeleton folder with a specific name ---------------------------------------------
-PRE_FATE.skeletonDirectory(name.simulation ='FATE_AlpineForest')
+## Create a skeleton folder with a specific name ---------------------------------------------
+PRE_FATE.skeletonDirectory(name.simulation ='FATE_AlpineForest')
-
default NULL. (optional) Keep only
releves data in a specific area.
studied.habitat
-
a vector that specifies habitats that we take
-into account for the validation.
+
If NULL, the function will
+take into account of all habitats in the hab.obs map. Otherwise, please specify
+in a vector the habitats that we take into account for the validation.
RF.param
a list of 2 parameters for random forest model :
share.training defines the size of the trainig part of the data base.
@@ -199,8 +201,8 @@
Value
Details
This function transform PFG Braund-Blanquet abundance in relative abundance,
-get habitat information from the relevés map, keep only relevés on interesting
-habitat and then builds the random forest model. Finally, the function analyzes
+get habitat information from the releves map, keep only relees on interesting
+habitat and then builds de random forest model. Finally, the function analyzes
the model performance with computation of confusion matrix and TSS for
the traning and testing sample.
From 54de8087eb3b281eeb6d7e317363a4c6b8aff37e Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 1 Mar 2022 16:27:39 +0100
Subject: [PATCH 037/176] Add files via upload
file corrected
---
NAMESPACE | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/NAMESPACE b/NAMESPACE
index 0891e0a..790e668 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,8 @@
# Generated by roxygen2: do not edit by hand
+S3method(do,habitat.validation)
+S3method(plot,predicted.habitat)
+S3method(train,RF.habitat)
export(.adaptMaps)
export(.cropMaps)
export(.getCutoff)
@@ -25,6 +28,7 @@ export(POST_FATE.graphic_validationStatistics)
export(POST_FATE.graphics)
export(POST_FATE.relativeAbund)
export(POST_FATE.temporalEvolution)
+export(POST_FATE.validation_habitat)
export(PRE_FATE.abundBraunBlanquet)
export(PRE_FATE.params_PFGdispersal)
export(PRE_FATE.params_PFGdisturbance)
@@ -74,15 +78,28 @@ importFrom(adehabitatHR,kernelUD)
importFrom(adehabitatMA,ascgen)
importFrom(ape,as.phylo)
importFrom(ape,plot.phylo)
+importFrom(caret,confusionMatrix)
importFrom(cluster,silhouette)
importFrom(colorspace,heat_hcl)
importFrom(colorspace,sequential_hcl)
importFrom(cowplot,get_legend)
importFrom(cowplot,ggdraw)
+importFrom(data.table,dcast)
importFrom(data.table,fread)
importFrom(data.table,fwrite)
+importFrom(data.table,melt)
importFrom(data.table,rbindlist)
+importFrom(data.table,rename)
+importFrom(data.table,setDT)
importFrom(doParallel,registerDoParallel)
+importFrom(dplyr,"%>%")
+importFrom(dplyr,all_of)
+importFrom(dplyr,filter)
+importFrom(dplyr,group_by)
+importFrom(dplyr,mutate)
+importFrom(dplyr,rename)
+importFrom(dplyr,select)
+importFrom(forcats,fct_expand)
importFrom(foreach,"%do%")
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
@@ -115,6 +132,8 @@ importFrom(ggplot2,geom_smooth)
importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggsave)
+importFrom(ggplot2,ggtitle)
+importFrom(ggplot2,guides)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_alpha)
importFrom(ggplot2,scale_color_continuous)
@@ -151,29 +170,46 @@ importFrom(huge,huge.npn)
importFrom(methods,as)
importFrom(parallel,mclapply)
importFrom(phyloclim,niche.overlap)
+importFrom(randomForest,)
+importFrom(randomForest,randomForest)
+importFrom(randomForest,tuneRF)
+importFrom(raster,aggregate)
importFrom(raster,as.data.frame)
importFrom(raster,as.matrix)
importFrom(raster,cellFromXY)
importFrom(raster,cellStats)
+importFrom(raster,compareCRS)
+importFrom(raster,compareRaster)
importFrom(raster,coordinates)
importFrom(raster,crop)
+importFrom(raster,crs)
importFrom(raster,extension)
importFrom(raster,extent)
importFrom(raster,extract)
+importFrom(raster,getValues)
importFrom(raster,mask)
importFrom(raster,nlayers)
+importFrom(raster,origin)
+importFrom(raster,predict)
importFrom(raster,projectRaster)
importFrom(raster,projection)
importFrom(raster,raster)
importFrom(raster,rasterToPoints)
+importFrom(raster,ratify)
importFrom(raster,res)
importFrom(raster,stack)
importFrom(raster,writeRaster)
importFrom(raster,xyFromCell)
+importFrom(reshape2,dcast)
importFrom(reshape2,melt)
+importFrom(sf,st_crop)
+importFrom(sf,st_read)
+importFrom(sf,st_transform)
+importFrom(sf,st_write)
importFrom(shiny,runApp)
importFrom(sp,SpatialPoints)
importFrom(stats,as.dist)
+importFrom(stats,complete.cases)
importFrom(stats,cophenetic)
importFrom(stats,cor)
importFrom(stats,cutree)
@@ -188,10 +224,13 @@ importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,var)
importFrom(stats,weighted.mean)
+importFrom(stringr,str_sub)
+importFrom(tidyverse,write_rds)
importFrom(utils,combn)
importFrom(utils,download.file)
importFrom(utils,install.packages)
importFrom(utils,packageDescription)
+importFrom(utils,read.csv)
importFrom(utils,read.delim)
importFrom(utils,setTxtProgressBar)
importFrom(utils,tail)
@@ -199,4 +238,5 @@ importFrom(utils,txtProgressBar)
importFrom(utils,write.csv)
importFrom(utils,write.table)
importFrom(utils,zip)
+importFrom(vcd,)
useDynLib(RFate, .registration = TRUE)
From 0b8a1d4f321a6027ef13003925e1564ec963a429 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 10:13:15 +0100
Subject: [PATCH 038/176] Delete POST_FATE.validation_habitat.R
Creation of a POST_FATE.validation function, which include habitat validation
---
R/POST_FATE.validation_habitat.R | 160 -------------------------------
1 file changed, 160 deletions(-)
delete mode 100644 R/POST_FATE.validation_habitat.R
diff --git a/R/POST_FATE.validation_habitat.R b/R/POST_FATE.validation_habitat.R
deleted file mode 100644
index b13062f..0000000
--- a/R/POST_FATE.validation_habitat.R
+++ /dev/null
@@ -1,160 +0,0 @@
-### HEADER #####################################################################
-##'
-##' @title Compute habitat performance and create a prediction plot of habitat
-##' for a whole map of a \code{FATE} simulation.
-##'
-##' @name POST_FATE.validation.habitat
-##'
-##' @author Matthieu Combaud, Maxime Delprat
-##'
-##' @description This script compare habitat simulations and observations and
-##' create a map to visualize this comparison with all the \code{FATE} and
-##' observed data.
-##'
-##' @param name.simulation simulation folder name.
-##' @param sim.version name of the simulation we want to validate (it works with
-##' only one sim.version).
-##' @param obs.path the function needs observed data, please create a folder for them in your
-##' simulation folder and then indicate in this parameter the access path to this new folder.
-##' @param releves.PFG name of file which contain the observed Braund-Blanquet abundance at each site
-##' and each PFG and strata (with extension).
-##' @param releves.site name of the file which contain coordinates and a description of
-##' the habitat associated with the dominant species of each site in the studied map (with extension).
-##' @param hab.obs name of the file which contain the extended studied map in the simulation (with extension).
-##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation (with extension).
-##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
-##' take into account of all habitats in the hab.obs map. Otherwise, please specify
-##' in a vector the habitats that we take into account for the validation.
-##' @param year year of simulation for validation.
-##'
-##' @details
-##'
-##' The observed habitat is derived from the cesbio map, the simulated habitat
-##' is derived from FATE simulated relative abundance, based on a random forest
-##' algorithm trained on CBNA data. To compare observations and simulations, the function
-##' compute confusion matrix between observation and prediction and then compute the TSS
-##' for each habitat h (number of prediction of habitat h/number of observation
-##' of habitat h + number of non-prediction of habitat h/number of non-observation
-##' of habitat h). The final metrics this script use is the mean of TSS per habitat over all
-##' habitats, weighted by the share of each habitat in the observed habitat distribution.
-##'
-##' @return
-##'
-##' Two folders are created in name.simulation folder :
-##' \describe{
-##' \item{\file{VALIDATION/HABITAT/sim.version}}{containing the prepared CBNA data,
-##' RF model, the performance analyzes (confusion matrix and TSS) for the training and
-##' testing parts of the RF model, the habitat performance file, the habitat prediction file with
-##' observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
-##' }
-##'
-##' @export
-##'
-##' @importFrom raster raster projectRaster res crs
-##' @importFrom sf st_read
-##' @importFrom utils read.csv
-##'
-### END OF HEADER ##############################################################
-
-
-POST_FATE.validation_habitat = function(name.simulation
- , sim.version
- , obs.path
- , releves.PFG
- , releves.sites
- , hab.obs
- , validation.mask
- , studied.habitat = NULL
- , year)
-{
-
- ## GLOBAL PARAMETERS
-
- dir.create(file.path(name.simulation, "VALIDATION", "HABITAT", sim.version), showWarnings = FALSE)
-
- # General
- output.path = paste0(name.simulation, "/VALIDATION")
- year = year
-
- # Useful elements to extract from the simulation
- name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
- flag = "MASK",
- flag.split = "^--.*--$",
- is.num = FALSE)
- simulation.map = raster(paste0(name))
-
- # For habitat validation
- # CBNA releves data habitat map
- releves.PFG<-read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
- releves.sites<-st_read(paste0(obs.path, releves.sites))
- hab.obs<-raster(paste0(obs.path, hab.obs))
- # Habitat mask at FATE simu resolution
- hab.obs.modif <- projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
- habitat.FATE.map <- crop(hab.obs.modif, simulation.map)
- validation.mask<-raster(paste0(obs.path, validation.mask))
-
- # Other
- if(is.null(studied.habitat)){
- studied.habitat = studied.habitat
- } else if(is.character(studied.habitat)){
- studied.habitat = studied.habitat
- } else{
- stop("studied.habitat is not a vector of character")
- }
- RF.param = list(
- share.training=0.7,
- ntree=500)
- predict.all.map<-T
-
- ## TRAIN A RF ON CBNA DATA
-
- RF.model <- train.RF.habitat(releves.PFG = releves.PFG
- , releves.sites = releves.sites
- , hab.obs = hab.obs
- , external.training.mask = NULL
- , studied.habitat = studied.habitat
- , RF.param = RF.param
- , output.path = output.path
- , perStrata = F
- , sim.version = sim.version)
-
- ## USE THE RF MODEL TO VALIDATE OUTPUT
-
- habitats.results <- do.habitat.validation(output.path = output.path
- , RF.model = RF.model
- , habitat.FATE.map = habitat.FATE.map
- , validation.mask = validation.mask
- , simulation.map = simulation.map
- , predict.all.map = predict.all.map
- , sim.version = sim.version
- , name.simulation = name.simulation
- , perStrata = F
- , hab.obs = hab.obs
- , year = year)
-
- ## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
-
- # Provide a color df
- col.df<-data.frame(
- habitat = RF.model$classes,
- failure = terrain.colors(length(RF.model$classes), alpha = 0.5),
- success = terrain.colors(length(RF.model$classes), alpha = 1))
-
- prediction.map <- plot.predicted.habitat(predicted.habitat = habitats.results
- , col.df = col.df
- , simulation.map = simulation.map
- , output.path = output.path
- , sim.version = sim.version)
-
- ## COMPARISON FAILURE/SUCCESS
-
- hab.pred = read.csv(paste0(output.path, "/HABITAT/", sim.version, "/hab.pred.csv"))
- failure = as.numeric((table(hab.pred$prediction.code)[1]/sum(table(hab.pred$prediction.code)))*100)
- success = as.numeric((table(hab.pred$prediction.code)[2]/sum(table(hab.pred$prediction.code)))*100)
- cat("\n ---------- END OF THE SIMULATION \n")
- cat(paste0("\n ---------- ", round(failure, digits = 2), "% of habitats are not correctly predicted by ", sim.version, " \n"))
- cat(paste0("\n ---------- ", round(success, digits = 2), "% of habitats are correctly predicted by ", sim.version, " \n"))
- return(prediction.map)
-
-}
-
From 65bf12a69d0e63eff2984083338241a11df70f6c Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 10:13:56 +0100
Subject: [PATCH 039/176] Delete POST_FATE.validation.habitat.html
Creation of a POST_FATE.validation function, which include habitat validation
---
.../POST_FATE.validation.habitat.html | 238 ------------------
1 file changed, 238 deletions(-)
delete mode 100644 docs/reference/POST_FATE.validation.habitat.html
diff --git a/docs/reference/POST_FATE.validation.habitat.html b/docs/reference/POST_FATE.validation.habitat.html
deleted file mode 100644
index b03c91a..0000000
--- a/docs/reference/POST_FATE.validation.habitat.html
+++ /dev/null
@@ -1,238 +0,0 @@
-
-Compute habitat performance and create a prediction plot of habitat
-for a whole map of a FATE simulation. — POST_FATE.validation.habitat • RFate
-
-
-
name of the simulation we want to validate (it works with
-only one sim.version).
-
obs.path
-
the function needs observed data, please create a folder for them in your
-simulation folder and then indicate in this parameter the access path to this new folder.
-
releves.PFG
-
name of file which contain the observed Braund-Blanquet abundance at each site
-and each PFG and strata (with extension).
-
hab.obs
-
name of the file which contain the extended studied map in the simulation (with extension).
-
validation.mask
-
name of the file which contain a raster mask that specified which pixels need validation (with extension).
-
studied.habitat
-
default NULL. If NULL, the function will
-take into account of all habitats in the hab.obs map. Otherwise, please specify
-in a vector the habitats that we take into account for the validation.
-
year
-
year of simulation for validation.
-
releves.site
-
name of the file which contain coordinates and a description of
-the habitat associated with the dominant species of each site in the studied map (with extension).
-
-
-
Value
-
Two folders are created in name.simulation folder :
VALIDATION/HABITAT/sim.version
-
containing the prepared CBNA data,
- RF model, the performance analyzes (confusion matrix and TSS) for the training and
-testing parts of the RF model, the habitat performance file, the habitat prediction file with
-observed and simulated habitat for each pixel of the whole map and the final prediction plot.
-
-
-
-
-
Details
-
The observed habitat is derived from the cesbio map, the simulated habitat
-is derived from FATE simulated relative abundance, based on a random forest
-algorithm trained on CBNA data. To compare observations and simulations, the function
-compute confusion matrix between observation and prediction and then compute the TSS
-for each habitat h (number of prediction of habitat h/number of observation
-of habitat h + number of non-prediction of habitat h/number of non-observation
-of habitat h). The final metrics this script use is the mean of TSS per habitat over all
-habitats, weighted by the share of each habitat in the observed habitat distribution.
-
-
-
Author
-
Matthieu Combaud, Maxime Delprat
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
From 8d3898c289d41bcc8dc78389e6ceb74a7e021972 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 10:14:13 +0100
Subject: [PATCH 040/176] Delete POST_FATE.validation.habitat.Rd
Creation of a POST_FATE.validation function, which include habitat validation
---
man/POST_FATE.validation.habitat.Rd | 72 -----------------------------
1 file changed, 72 deletions(-)
delete mode 100644 man/POST_FATE.validation.habitat.Rd
diff --git a/man/POST_FATE.validation.habitat.Rd b/man/POST_FATE.validation.habitat.Rd
deleted file mode 100644
index 81d101a..0000000
--- a/man/POST_FATE.validation.habitat.Rd
+++ /dev/null
@@ -1,72 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/POST_FATE.validation_habitat.R
-\name{POST_FATE.validation.habitat}
-\alias{POST_FATE.validation.habitat}
-\alias{POST_FATE.validation_habitat}
-\title{Compute habitat performance and create a prediction plot of habitat
-for a whole map of a \code{FATE} simulation.}
-\usage{
-POST_FATE.validation_habitat(
- name.simulation,
- sim.version,
- obs.path,
- releves.PFG,
- releves.sites,
- hab.obs,
- validation.mask,
- studied.habitat = NULL,
- year
-)
-}
-\arguments{
-\item{name.simulation}{simulation folder name.}
-
-\item{sim.version}{name of the simulation we want to validate (it works with
-only one sim.version).}
-
-\item{obs.path}{the function needs observed data, please create a folder for them in your
-simulation folder and then indicate in this parameter the access path to this new folder.}
-
-\item{releves.PFG}{name of file which contain the observed Braund-Blanquet abundance at each site
-and each PFG and strata (with extension).}
-
-\item{hab.obs}{name of the file which contain the extended studied map in the simulation (with extension).}
-
-\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation (with extension).}
-
-\item{studied.habitat}{default \code{NULL}. If \code{NULL}, the function will
-take into account of all habitats in the hab.obs map. Otherwise, please specify
-in a vector the habitats that we take into account for the validation.}
-
-\item{year}{year of simulation for validation.}
-
-\item{releves.site}{name of the file which contain coordinates and a description of
-the habitat associated with the dominant species of each site in the studied map (with extension).}
-}
-\value{
-Two folders are created in name.simulation folder :
-\describe{
- \item{\file{VALIDATION/HABITAT/sim.version}}{containing the prepared CBNA data,
- RF model, the performance analyzes (confusion matrix and TSS) for the training and
-testing parts of the RF model, the habitat performance file, the habitat prediction file with
-observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
-}
-}
-\description{
-This script compare habitat simulations and observations and
-create a map to visualize this comparison with all the \code{FATE} and
-observed data.
-}
-\details{
-The observed habitat is derived from the cesbio map, the simulated habitat
-is derived from FATE simulated relative abundance, based on a random forest
-algorithm trained on CBNA data. To compare observations and simulations, the function
-compute confusion matrix between observation and prediction and then compute the TSS
-for each habitat h (number of prediction of habitat h/number of observation
-of habitat h + number of non-prediction of habitat h/number of non-observation
-of habitat h). The final metrics this script use is the mean of TSS per habitat over all
-habitats, weighted by the share of each habitat in the observed habitat distribution.
-}
-\author{
-Matthieu Combaud, Maxime Delprat
-}
From 3772cee19c60d4293f8855afd1658f72dd644bbc Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 10:15:31 +0100
Subject: [PATCH 041/176] Add files via upload
Addition of a POST_FATE.validation function & updates of NAMESPACE & pkgdown.yml files
---
NAMESPACE | 10 ++++++----
_pkgdown.yml | 10 ++++++----
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
index 790e668..acd8cd2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand
+S3method(do,PFG.composition.validation)
S3method(do,habitat.validation)
S3method(plot,predicted.habitat)
S3method(train,RF.habitat)
@@ -28,7 +29,7 @@ export(POST_FATE.graphic_validationStatistics)
export(POST_FATE.graphics)
export(POST_FATE.relativeAbund)
export(POST_FATE.temporalEvolution)
-export(POST_FATE.validation_habitat)
+export(POST_FATE.validation)
export(PRE_FATE.abundBraunBlanquet)
export(PRE_FATE.params_PFGdispersal)
export(PRE_FATE.params_PFGdisturbance)
@@ -59,6 +60,7 @@ export(designLHDNorm)
export(divLeinster)
export(dunn)
export(ecospat.kd)
+export(get.observed.distribution)
importFrom(FD,gowdis)
importFrom(PresenceAbsence,auc)
importFrom(PresenceAbsence,cmx)
@@ -87,7 +89,6 @@ importFrom(cowplot,ggdraw)
importFrom(data.table,dcast)
importFrom(data.table,fread)
importFrom(data.table,fwrite)
-importFrom(data.table,melt)
importFrom(data.table,rbindlist)
importFrom(data.table,rename)
importFrom(data.table,setDT)
@@ -170,7 +171,6 @@ importFrom(huge,huge.npn)
importFrom(methods,as)
importFrom(parallel,mclapply)
importFrom(phyloclim,niche.overlap)
-importFrom(randomForest,)
importFrom(randomForest,randomForest)
importFrom(randomForest,tuneRF)
importFrom(raster,aggregate)
@@ -188,6 +188,7 @@ importFrom(raster,extent)
importFrom(raster,extract)
importFrom(raster,getValues)
importFrom(raster,mask)
+importFrom(raster,ncell)
importFrom(raster,nlayers)
importFrom(raster,origin)
importFrom(raster,predict)
@@ -200,6 +201,7 @@ importFrom(raster,res)
importFrom(raster,stack)
importFrom(raster,writeRaster)
importFrom(raster,xyFromCell)
+importFrom(readr,write_rds)
importFrom(reshape2,dcast)
importFrom(reshape2,melt)
importFrom(sf,st_crop)
@@ -224,6 +226,7 @@ importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,var)
importFrom(stats,weighted.mean)
+importFrom(stringr,str_split)
importFrom(stringr,str_sub)
importFrom(tidyverse,write_rds)
importFrom(utils,combn)
@@ -238,5 +241,4 @@ importFrom(utils,txtProgressBar)
importFrom(utils,write.csv)
importFrom(utils,write.table)
importFrom(utils,zip)
-importFrom(vcd,)
useDynLib(RFate, .registration = TRUE)
diff --git a/_pkgdown.yml b/_pkgdown.yml
index c9d2eda..af61946 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -129,7 +129,7 @@ reference:
- "POST_FATE.binaryMaps"
- "POST_FATE.graphic_mapPFGvsHS"
- "POST_FATE.graphic_mapPFG"
- - "POST_FATE.validation_habitat"
+ - "POST_FATE.validation"
- title: Save FATE simulation
contents:
- "SAVE_FATE.step1_PFG"
@@ -145,6 +145,8 @@ reference:
- ".scaleMaps"
- ".getCutoff"
- ".unzip_ALL"
- - "do_habitat_validation"
- - "plot_predicted_habitat"
- - "train_RF_habitat"
+ - "train.RF.habitat"
+ - "do.habitat.validation"
+ - "plot.predicted.habitat"
+ - "get.observed.distribution"
+ - "do.PFG.composition.validation"
From c275cf8e74986aa1fe283c87950a955fa3b2a1b6 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 10:17:47 +0100
Subject: [PATCH 042/176] Add files via upload
Creation of a new POST_FATE.validation & updates of utils functions associated
---
R/POST_FATE.validation.R | 360 ++++++++++++++++++++++++
R/UTILS.do_PFG_composition_validation.R | 315 +++++++++++++++++++++
R/UTILS.do_habitat_validation.R | 11 +-
R/UTILS.get_observed_distribution.R | 190 +++++++++++++
R/UTILS.plot_predicted_habitat.R | 3 +-
R/UTILS.train_RF_habitat.R | 6 +-
6 files changed, 875 insertions(+), 10 deletions(-)
create mode 100644 R/POST_FATE.validation.R
create mode 100644 R/UTILS.do_PFG_composition_validation.R
create mode 100644 R/UTILS.get_observed_distribution.R
diff --git a/R/POST_FATE.validation.R b/R/POST_FATE.validation.R
new file mode 100644
index 0000000..dd85f77
--- /dev/null
+++ b/R/POST_FATE.validation.R
@@ -0,0 +1,360 @@
+### HEADER ##########################################################################
+##'
+##' @title Computes validation data for habitat, PFG richness and composition for a \code{FATE} simulation.
+##'
+##' @name POST_FATE.validation
+##'
+##' @author Matthieu Combaud, Maxime Delprat
+##'
+##' @description This script is designed to compute validation data for :
+##' \code{Habitat} : compares habitat simulations and observations and
+##' create a map to visualize this comparison with all the \code{FATE} and
+##' observed data.
+##' \code{PFG Composition} : produced a computation of observed distribution
+##' of relative abundance in the simulation area and a computation of distance between
+##' observed and simulated distribution.
+##' \code{PFG Richness} : computes the PFG richness over the whole simulation area
+##' for a \code{FATE} simulation and computes the difference between observed and simulated PFG richness.
+##'
+##' @param name.simulation simulation folder name.
+##' @param sim.version name of the simulation to validate (it works with only one \code{sim.version}).
+##' @param year year of simulation for validation.
+##' @param doHabitat logical. Default \code{TRUE}. If \code{TRUE}, habitat validation module is activated,
+##' if \code{FALSE}, habitat validation module is disabled.
+##' @param obs.path the function needs observed data, please create a folder for them in your
+##' simulation folder and then indicate in this parameter the access path to this new folder (habitat & PFG composition validation).
+##' @param releves.PFG name of file which contain the observed Braund-Blanquet abundance at each site
+##' and each PFG and strata (habitat & PFG composition validation).
+##' @param releves.site name of the file which contain coordinates and a description of
+##' the habitat associated with the dominant species of each site in the studied map (habitat & PFG composition validation).
+##' @param hab.obs name of the file which contain the extended studied map in the simulation (habitat & PFG composition validation).
+##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation
+##' (habitat & PFG composition validation).
+##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
+##' take into account of all habitats in the \code{hab.obs} map. Otherwise, please specify
+##' in a vector habitats that will be take into account for the validation (habitat validation).
+##' @param doComposition logical. Default \code{TRUE}. If \code{TRUE}, PFG composition validation module is activated,
+##' if \code{FALSE}, PFG composition validation module is disabled.
+##' @param PFG.considered_PFG.compo a character vector of the list of PFG considered
+##' in the validation (PFG composition validation).
+##' @param habitat.considered_PFG.compo a character vector of the list of habitat(s)
+##' considered in the validation (PFG composition validation).
+##' @param doRichness logical. Default \code{TRUE}. If \code{TRUE}, PFG richness validation module is activated,
+##' if \code{FALSE}, PFG richness validation module is disabled.
+##' @param list.PFG a character vector which contain all the PFGs taken account in
+##' the simulation and observed in the simulation area (PFG richness validation).
+##' @param exclude.PFG default \code{NULL}. A character vector containing the names
+##' of the PFG you want to exclude from the analysis (PFG richness validation).
+##'
+##' @details
+##'
+##' \describe{
+##' \item{Habitat validation}{The observed habitat is derived from the cesbio map, the simulated habitat
+##' is derived from FATE simulated relative abundance, based on a random forest
+##' algorithm trained on CBNA data. To compare observations and simulations, the function
+##' compute confusion matrix between observation and prediction and then compute the TSS
+##' for each habitat h (number of prediction of habitat h/number of observation
+##' of habitat h + number of non-prediction of habitat h/number of non-observation
+##' of habitat h). The final metrics this script use is the mean of TSS per habitat over all
+##' habitats, weighted by the share of each habitat in the observed habitat distribution.}
+##' \item{PFG composition validation}{This code firstly run the \code{get.observed.distribution} function in order to have
+##' a \code{obs.distri} file which contain the observed distribution per PFG, strata and habitat.
+##' This file is also an argument for the \code{do.PFG.composition.validation} function run next.
+##' This second sub function provide the computation of distance between observed and simulated distribution. \cr
+##' NB : The argument \code{strata.considered_PFG.compo} is by default "A" in the 2 sub functions because
+##' it's easier for a \code{FATE} simulation to provide PFG abundances for all strata. \cr The argument
+##' \code{perStrata.compo} is by default \code{NULL} for the same reasons.}
+##' \item{PFG richness validation}{Firstly, the function updates the \code{list.PFG} with \code{exclude.PFG} vector.
+##' Then, the script takes the abundance per PFG file from the results of the \code{FATE}
+##' simulation and computes the difference between the \code{list.PFG} and all the PFG
+##' which are presents in the abundance file, in order to obtain the PFG richness for a simulation.
+##' The function also determine if an observed PFG is missing in the results of the simulation at
+##' a specific year.}
+##' }
+##'
+##' @return
+##'
+##' Output files :
+##' \describe{
+##' \item{\file{VALIDATION/HABITAT/sim.version}}{containing the prepared CBNA data,
+##' RF model, the performance analyzes (confusion matrix and TSS) for the training and
+##' testing parts of the RF model, the habitat performance file, the habitat prediction file with
+##' observed and simulated habitat for each pixel of the whole map and the final prediction plot.}
+##' }
+##' \describe{
+##' \item{\file{VALIDATION/PFG_COMPOSITION/sim.version}}{1 .csv file which contain the proximity
+##' between observed and simulated data computed for each PFG/strata/habitat. \cr 1 .csv file which
+##' contain the observed relevés transformed into relative metrics. \cr 1 .csv file which contain
+##' the final output with the distribution per PFG, strata and habitat.}
+##' }
+##' \describe{
+##' \item{\file{VALIDATION/PFG_RICHNESS/sim.version}}{1 .csv file of PFG richness in a \code{FATE} simulation.
+##' \cr 1 .csv fie of the PFG extinction frequency in a \code{FATE} simulation. \cr 1 .rds file which is
+##' the abundance per PFG file.
+##' }
+##'
+##' @examples
+##'
+##' ## Habitat validation ---------------------------------------------------------------------------------
+##' POST_FATE.validation(name.simulation = "FATE_Champsaur"
+##' , sim.version = "SIMUL_V4.1"
+##' , year = 2000
+##' , doHabitat = TRUE
+##' , obs.path = "FATE_Champsaur/DATA_OBS/"
+##' , releves.PFG = "releves.PFG.abundance.csv"
+##' , releves.sites = "releves.sites.shp"
+##' , hab.obs = "simplified.cesbio.map.grd"
+##' , validation.mask = "certain.habitat.100m.restricted.grd"
+##' , studied.habitat = NULL
+##' , doComposition = FALSE
+##' , doRichness = FALSE)
+##'
+##' ## PFG composition validation --------------------------------------------------------------------------
+##' list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+##' habitat.considered = c("coniferous.forest", "deciduous.forest", "natural.grassland", "woody.heatland")
+##' POST_FATE.validation(name.simulation = "FATE_Champsaur"
+##' , sim.version = "SIMUL_V4.1"
+##' , year = 2000
+##' , doHabitat = FALSE
+##' , obs.path = "FATE_Champsaur/DATA_OBS/"
+##' , releves.PFG = "releves.PFG.abundance.csv"
+##' , releves.sites = "releves.sites.shp"
+##' , hab.obs = "simplified.cesbio.map.grd"
+##' , validation.mask = "certain.habitat.100m.restricted.grd"
+##' , studied.habitat = NULL
+##' , doComposition = TRUE
+##' , PFG.considered_PFG.compo = list.PFG
+##' , habitat.considered_PFG.compo = habitat.considered
+##' , doRichness = FALSE)
+##'
+##' ## PFG richness validation -----------------------------------------------------------------------------
+##' list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+##' POST_FATE.validation(name.simulation = "FATE_CHampsaur"
+##' , sim.version = "SIMUL_V4.1"
+##' , year = 2000
+##' , doHabitat = FALSE
+##' , doComposition = FALSE
+##' , doRichness = TRUE
+##' , list.PFG = list.PFG
+##' , exclude.PFG = NULL)
+##'
+##' @export
+##'
+##' @importFrom stringr str_split
+##' @importFrom raster raster projectRaster res crs crop
+##' @importFrom utils read.csv write.csv
+##' @importFrom sf st_read
+##' @foreach foreach foreach %dopar%
+##' @importFrom forcats fct_expand
+##' @importFrom readr write_rds
+##'
+### END OF HEADER ###################################################################
+
+
+POST_FATE.validation = function(name.simulation
+ , sim.version
+ , year
+ , doHabitat = TRUE
+ , obs.path
+ , releves.PFG
+ , releves.sites
+ , hab.obs
+ , validation.mask
+ , studied.habitat = NULL
+ , doComposition = TRUE
+ , PFG.considered_PFG.compo
+ , habitat.considered_PFG.compo
+ , doRichness = TRUE
+ , list.PFG
+ , exclude.PFG = NULL){
+
+ if(doHabitat == TRUE){
+
+ ## GLOBAL PARAMETERS
+
+ dir.create(file.path(name.simulation, "VALIDATION", "HABITAT", sim.version), showWarnings = FALSE)
+
+ # General
+ output.path = paste0(name.simulation, "/VALIDATION")
+ year = year # choice in the year for validation
+
+ # Useful elements to extract from the simulation
+ name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
+ flag = "MASK",
+ flag.split = "^--.*--$",
+ is.num = FALSE) #isolate the access path to the simulation mask for any FATE simulation
+ simulation.map = raster(paste0(name))
+
+ # For habitat validation
+ # CBNA releves data habitat map
+ releves.PFG<-read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
+ releves.sites<-st_read(paste0(obs.path, releves.sites))
+ hab.obs<-raster(paste0(obs.path, hab.obs))
+ # Habitat mask at FATE simu resolution
+ hab.obs.modif <- projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
+ habitat.FATE.map <- crop(hab.obs.modif, simulation.map) #reprojection and croping of the extended habitat map in order to have a reduced observed habitat map
+ validation.mask<-raster(paste0(obs.path, validation.mask))
+
+ # Other
+ if(is.null(studied.habitat)){
+ studied.habitat = studied.habitat #if null, the function will study all the habitats in the map
+ } else if(is.character(studied.habitat)){
+ studied.habitat = studied.habitat #if a character vector with habitat names, the functuon will study only the habitats in the vector
+ } else{
+ stop("studied.habitat is not a vector of character")
+ }
+ RF.param = list(
+ share.training=0.7,
+ ntree=500)
+ predict.all.map<-T
+
+ ## TRAIN A RF ON OBSERVED DATA
+
+ RF.model <- train.RF.habitat(releves.PFG = releves.PFG
+ , releves.sites = releves.sites
+ , hab.obs = hab.obs
+ , external.training.mask = NULL
+ , studied.habitat = studied.habitat
+ , RF.param = RF.param
+ , output.path = output.path
+ , perStrata = F
+ , sim.version = sim.version)
+
+ ## USE THE RF MODEL TO VALIDATE FATE OUTPUT
+
+ habitats.results <- do.habitat.validation(output.path = output.path
+ , RF.model = RF.model
+ , habitat.FATE.map = habitat.FATE.map
+ , validation.mask = validation.mask
+ , simulation.map = simulation.map
+ , predict.all.map = predict.all.map
+ , sim.version = sim.version
+ , name.simulation = name.simulation
+ , perStrata = F
+ , hab.obs = hab.obs
+ , year = year)
+
+ ## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
+
+ # Provide a color df
+ col.df<-data.frame(
+ habitat = RF.model$classes,
+ failure = terrain.colors(length(RF.model$classes), alpha = 0.5),
+ success = terrain.colors(length(RF.model$classes), alpha = 1))
+
+ prediction.map <- plot.predicted.habitat(predicted.habitat = habitats.results
+ , col.df = col.df
+ , simulation.map = simulation.map
+ , output.path = output.path
+ , sim.version = sim.version)
+
+ }
+
+ if(doComposition == TRUE){
+
+ ## GLOBAL PARAMETERS
+
+ if(doHabitat == FALSE){
+
+ # Get observed distribution
+ releves.PFG = read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
+ releves.sites = st_read(paste0(obs.path, releves.sites))
+ hab.obs = raster(paste0(obs.path, hab.obs))
+ # Do PFG composition validation
+ validation.mask = raster(paste0(obs.path, validation.mask))
+ }
+
+ ## GET OBSERVED DISTRIBUTION
+
+ obs.distri = get.observed.distribution(name.simulation = name.simulation
+ , obs.path = obs.path
+ , releves.PFG = releves.PFG
+ , releves.sites = releves.sites
+ , hab.obs = hab.obs
+ , PFG.considered_PFG.compo = PFG.considered_PFG.compo
+ , strata.considered_PFG.compo = "A"
+ , habitat.considered_PFG.compo = habitat.considered_PFG.compo
+ , perStrata.compo = FALSE
+ , sim.version = sim.version)
+
+ ## DO PFG COMPOSITION VALIDATION
+
+ performance.composition = do.PFG.composition.validation(name.simulation = name.simulation
+ , obs.path = obs.path
+ , sim.version = sim.version
+ , hab.obs = hab.obs
+ , PFG.considered_PFG.compo = PFG.considered_PFG.compo
+ , strata.considered_PFG.compo = "A"
+ , habitat.considered_PFG.compo = habitat.considered_PFG.compo
+ , observed.distribution = obs.distri
+ , perStrata.compo = FALSE
+ , validation.mask = validation.mask
+ , year = year)
+
+ }
+
+ if(doRichness == TRUE){
+
+ output.path = paste0(name.simulation, "/VALIDATION/PFG_RICHNESS/", sim.version)
+
+ #exclude PFG : character vector containing the names of the PFG you want to exclude from the analysis #optional
+
+ #list of PFG of interest
+ list.PFG<-setdiff(list.PFG,exclude.PFG)
+
+ dying.PFG.list<-foreach(i=1:length(sim.version)) %dopar% {
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+
+ return(setdiff(list.PFG,unique(simu_PFG$PFG)))
+ }
+
+ #names the results
+ names(dying.PFG.list) = sim.version
+
+ #get table with PFG richness
+ PFG.richness.df<-data.frame(simulation=names(dying.PFG.list),richness=length(list.PFG)-unlist(lapply(dying.PFG.list,FUN="length")))
+
+ #get vector with one occurence per PFG*simulation with dying of the PFG, as factor with completed levels in order to have table with all PFG, including those which never die
+ dyingPFG.vector<-as.factor(unlist(dying.PFG.list))
+ dyingPFG.vector<-fct_expand(dyingPFG.vector,list.PFG)
+ dying.distribution<-round(table(dyingPFG.vector)/length(sim.version),digits=2)
+
+ #output
+ output = list(PFG.richness.df, dying.distribution ,dying.PFG.list)
+ names(output)<-c("PFG.richness.df","dying.distribution","dying.PFG.list")
+
+ dir.create(output.path,recursive = TRUE, showWarnings = FALSE)
+
+ write.csv(PFG.richness.df,paste0(output.path,"/performance.richness.csv"),row.names=F)
+ write.csv(dying.distribution,paste0(output.path,"/PFG.extinction.frequency.csv"),row.names=F)
+ write_rds(dying.PFG.list,file=paste0(output.path,"/dying.PFG.list.rds"),compress="none")
+
+ }
+
+ cat("\n ---------- END OF FUNCTION \n")
+
+ if(doRichness == TRUE){
+ cat("\n ---------- PFG RICHNESS VALIDATION RESULTS \n")
+ cat(paste0("\n Richness at year ", year, " : ", output[[1]][2], "\n"))
+ cat(paste0("\n Number of PFG extinction at year ", year, " : ", sum(output[[2]]), "\n"))
+ } else{cat("\n ---------- PFG RICHNESS VALIDATION DISABLED \n")
+ }
+ if(doHabitat == TRUE){
+ hab.pred = read.csv(paste0(name.simulation, "/VALIDATION/HABITAT/", sim.version, "/hab.pred.csv"))
+ failure = as.numeric((table(hab.pred$prediction.code)[1]/sum(table(hab.pred$prediction.code)))*100)
+ success = as.numeric((table(hab.pred$prediction.code)[2]/sum(table(hab.pred$prediction.code)))*100)
+ cat("\n ---------- HABITAT VALIDATION RESULTS \n")
+ cat(paste0("\n", round(failure, digits = 2), "% of habitats are not correctly predicted by ", sim.version, " \n"))
+ cat(paste0("\n", round(success, digits = 2), "% of habitats are correctly predicted by ", sim.version, " \n"))
+ plot(prediction.map)
+ } else{cat("\n ---------- HABITAT VALIDATION DISABLED \n")
+ }
+ if(doComposition == TRUE){
+ cat("\n ---------- PFG COMPOSITION VALIDATION RESULTS \n")
+ return(performance.composition)
+ } else{cat("\n ---------- PFG COMPOSITION VALIDATION DISABLED \n")
+ }
+}
diff --git a/R/UTILS.do_PFG_composition_validation.R b/R/UTILS.do_PFG_composition_validation.R
new file mode 100644
index 0000000..6c6dea1
--- /dev/null
+++ b/R/UTILS.do_PFG_composition_validation.R
@@ -0,0 +1,315 @@
+### HEADER #####################################################################
+##'
+##' @title Compute distance between observed and simulated distribution
+##'
+##' @name do.PFG.composition.validation
+##'
+##' @author Matthieu Combaud, Maxime Delprat
+##'
+##' @description This script is designed to compare the difference between the
+##' PFG distribution in observed and simulated data. For a set of PFG, strata and
+##' habitats chosen, the function compute distance between observed and simulated
+##' distribution for a precise \code{FATE} simulation.
+##'
+##' @param name.simulation simulation folder name.
+##' @param obs.path the function needs observed data, please create a folder for them in your
+##' simulation folder and then indicate in this parameter the access path to this new folder.
+##' @param sim.version name of the simulation we want to validate (it works with
+##' only one \code{sim.version}).
+##' @param hab.obs file which contain the extended studied map in the simulation.
+##' @param PFG.considered_PFG.compo a character vector of the list of PFG considered
+##' in the validation.
+##' @param strata.considered_PFG.compo a character vector of the list of precise
+##' strata considered in the validation.
+##' @param habitat.considered_PFG.compo a character vector of the list of habitat(s)
+##' considered in the validation.
+##' @param observed.distribution PFG observed distribution table.
+##' @param perStrata.compo Logical. All strata together (FALSE) or per strata (TRUE).
+##' @param validation.mask file which contain a raster mask that specified
+##' which pixels need validation.
+##' @param year year of simulation to validate.
+##'
+##' @details
+##'
+##' After preliminary checks, this code extract observed habitat from the \code{hab.obs}
+##' map and, then, merge it with the simulated PFG abundance file from results of a \code{FATE}
+##' simulation. After filtration of the required PFG, strata and habitats, the function
+##' transform the data into relative metrics and, then, compute distribution per PFG, strata
+##' and habitat (if necessary). Finally, the code computes proximity between observed
+##' and simulated data, per PFG, strata and habitat.
+##'
+##' @return
+##'
+##' 1 file is created in
+##' \describe{
+##' \item{\file{VALIDATION/PFG_COMPOSITION/sim.version} :
+##' A .csv file which contain the proximity between observed and simulated data computed
+##' for each PFG/strata/habitat.
+##'
+##' @export
+##'
+##' @importFrom raster raster projectRaster res crs crop extent origin compareRaster
+##' getValues ncell aggregate compareCRS
+##' @importFrom utils read.csv write.csv
+##' @importFrom dplyr rename filter group_by mutate %>% select
+##' @importFrom data.table setDT
+##'
+### END OF HEADER ##############################################################
+
+
+do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version, hab.obs, PFG.considered_PFG.compo, strata.considered_PFG.compo, habitat.considered_PFG.compo, observed.distribution, perStrata.compo, validation.mask, year){
+
+ output.path = paste0(name.simulation, "/VALIDATION/PFG_COMPOSITION/", sim.version)
+ name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
+ flag = "MASK",
+ flag.split = "^--.*--$",
+ is.num = FALSE) #isolate the access path to the simulation mask for any FATE simulation
+ simulation.map = raster(paste0(name))
+ hab.obs.modif = projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
+ habitat.FATE.map = crop(hab.obs.modif, simulation.map) #reprojection and croping of the extended habitat map in order to have a reduced observed habitat map
+
+ #Auxiliary function to compute proximity (on a 0 to 1 scale, 1 means quantile equality)
+ compute.proximity<-function(simulated.quantile,observed.quantile){
+ #for a given PFG*habitat*strata, return a "distance", computed as the sum of the absolute gap between observed and simulated quantile
+ return(1-sum(abs(simulated.quantile-observed.quantile))/4)
+ }
+
+ ############################
+ # 1. Preliminary checks
+ ############################
+
+ #check if strata definition used in the RF model is the same as the one used to analyze FATE output
+ if(perStrata.compo==F){
+ list.strata<-"all"
+ }else{
+ stop("check 'perStrata' parameter and/or the names of strata in param$list.strata.releves & param$list.strata.simul")
+ }
+
+ #consistency between habitat.FATE.map and simulation.map
+ if(!compareCRS(simulation.map,habitat.FATE.map)){
+ print("reprojecting habitat.FATE.map to match simulation.map crs")
+ habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
+ }
+ if(!all(res(habitat.FATE.map)==res(simulation.map))){
+ stop("provide habitat.FATE.map with same resolution as simulation.map")
+ }
+ if(extent(simulation.map)!=extent(habitat.FATE.map)){
+ print("cropping habitat.FATE.map to match simulation.map")
+ habitat.FATE.map<-crop(x=habitat.FATE.map,y=simulation.map)
+ }
+ if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
+ print("setting origin habitat.FATE.map to match simulation.map")
+ origin(habitat.FATE.map)<-origin(simulation.map)
+ }
+ if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
+ stop("habitat.FATE.map could not be coerced to match simulation.map")
+ }else{
+ print("simulation.map & habitat.FATE.map are (now) consistent")
+ }
+
+ #adjust validation.mask accordingly
+ if(!all(res(habitat.FATE.map)==res(validation.mask))){
+ validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
+ }
+ if(extent(validation.mask)!=extent(habitat.FATE.map)){
+ validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
+ }
+ if(!compareRaster(validation.mask,habitat.FATE.map)){
+ stop("error in correcting validation.mask to match habitat.FATE.map")
+ }else{
+ print("validation.mask is (now) consistent with (modified) habitat.FATE.map")
+ }
+
+
+ #########################################
+ # 2. Get observed habitat
+ #########################################
+
+ #index of the pixels in the simulation area
+ in.region.pixels<-which(getValues(simulation.map)==1)
+
+ #habitat df for the whole simulation area
+ habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
+ habitat.whole.area.df<-filter(habitat.whole.area.df,is.element(pixel,in.region.pixels)&for.validation==1)
+ habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
+
+ print("Habitat in the simulation area:")
+ table(habitat.whole.area.df$habitat,useNA="always")
+
+
+ ##############################
+ # 3. Loop on simulations
+ ##############################
+
+ print("processing simulations")
+
+ results.simul<-list()
+ for(i in 1:length(sim.version)) {
+
+ # 3.1. Data preparation
+ #########################
+
+ #get simulated abundance per pixel*strata*PFG for pixels in the simulation area
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+
+ #aggregate per strata group with the correspondence provided in input
+ simu_PFG$new.strata<-NA
+
+ #attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
+ if(perStrata.compo==F){
+ simu_PFG$new.strata<-"A"
+ }
+
+ simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
+
+ #agggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
+ simu_PFG<-aggregate(abs~pixel+strata+PFG,data=simu_PFG,FUN="sum") #sum and not mean because for a given CBNA strata some PFG are present in 2 FATE strata (let's say 1 unit in each) and other are present in 3 FATE strata (let's say one unit in each), so taking the mean would suppress the info that the second PFG is more present!
+
+ # 3.2. Merge with habitat
+ ###########################
+
+ #here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
+ simu_PFG<-merge(simu_PFG,habitat.whole.area.df,by="pixel") #at this stage we have all the pixels in the simulation area
+
+ # 3.3. Filter the required PFG, strata and habitat
+ ###################################################
+
+ simu_PFG<-filter(
+ simu_PFG,
+ is.element(PFG,PFG.considered_PFG.compo)&
+ is.element(strata,strata.considered_PFG.compo)&
+ is.element(habitat,habitat.considered_PFG.compo)
+ )
+
+ # 3.4.Transform into a relative metrics (here relative.metric is relative coverage)
+ #####################################################################################
+
+ #important to do it only here, because if we filter some PFG, it changes the value of the relative metric (no impact of filtering for habitat or for strata since we do it per strata, and habitat is constant across a given pixel)
+
+
+ #careful: if several strata/habitat are selected, the computation is made for each strata separately
+ simu_PFG<-as.data.frame(simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.metric= round(prop.table(abs),digits = 2)))
+ simu_PFG$relative.metric[is.na(simu_PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
+ simu_PFG$abs<-NULL
+
+ # 3.5. Compute distribution per PFG, and if require per strata/habitat (else all strata/habitat will be considered together)
+ ##############################################################################################################################
+
+
+ #prepare the df storing quantile values
+ simulated.distribution<-expand.grid(
+ PFG=PFG.considered_PFG.compo,
+ habitat=habitat.considered_PFG.compo,
+ strata=strata.considered_PFG.compo
+ )
+
+ null.quantile<-data.frame(rank=seq(0,4,1)) #to have 5 rows per PFG*strata*habitat
+ simulated.distribution<-merge(simulated.distribution,null.quantile,all=T)
+
+ if(dim(simu_PFG)[1]>0){
+
+ distribution<-setDT(simu_PFG)[, quantile(relative.metric), by=c("PFG","habitat","strata")]
+ distribution<-rename(distribution,"quantile"="V1")
+ distribution<-data.frame(distribution,rank=seq(0,4,1)) #add the rank number
+
+ simulated.distribution<-merge(simulated.distribution,distribution,by=c("PFG","habitat","strata","rank"),all.x=T) # add the simulated quantiles, "all.x=T" to keep the unobserved combination (with quantile=NA then)
+
+ simulated.distribution$quantile[is.na(simulated.distribution$quantile)]<-0 # "NA" in the previous line means that the corresponding combination PFG*strata*habitat is not present, so as a null relative abundance !
+
+ }else{
+ simulated.distribution$quantile<-0
+ }
+
+ simulated.distribution$habitat<-as.character(simulated.distribution$habitat) #else may generate problem in ordering the database
+ simulated.distribution$strata<-as.character(simulated.distribution$strata) #else may generate problem in ordering the database
+ simulated.distribution$PFG<-as.character(simulated.distribution$PFG) #else may generate problem in ordering the database
+ simulated.distribution$rank<-as.numeric(simulated.distribution$rank) #else may generate problem in ordering the database
+
+
+ # 3.6. Order the table to be able to have output in the right format
+ #####################################################################
+ simulated.distribution<-setDT(simulated.distribution)
+ simulated.distribution<-simulated.distribution[order(habitat,strata,PFG,rank)]
+
+
+ # 3.7. Rename
+ ##############
+ simulated.distribution<-rename(simulated.distribution,"simulated.quantile"="quantile")
+
+
+ # 3.8 Rename and reorder the observed database
+ ###############################################
+
+ observed.distribution$habitat<-as.character(observed.distribution$habitat) #else may generate problem in ordering the database
+ observed.distribution$strata<-as.character(observed.distribution$strata) #else may generate problem in ordering the database
+ observed.distribution$PFG<-as.character(observed.distribution$PFG) #else may generate problem in ordering the database
+ observed.distribution$rank<-as.numeric(observed.distribution$rank) #else may generate problem in ordering the database
+
+ observed.distribution<-setDT(observed.distribution)
+ observed.distribution<-observed.distribution[order(habitat,strata,PFG,rank)]
+
+ # "if" to check that observed and simulated databases are in the same order
+ if(
+ !(
+ all(simulated.distribution$PFG==observed.distribution$PFG)&
+ all(simulated.distribution$habitat==observed.distribution$habitat)&
+ all(simulated.distribution$strata==observed.distribution$strata)&
+ all(simulated.distribution$rank==observed.distribution$rank)
+ )
+ ){
+ stop("Problem in observed vs simulated database (problem in the PFG*strata*habitat considered or in the database order)")
+ }
+
+ # 3.9. Merge observed and simulated data
+ #########################################
+
+ simulated.distribution<-cbind(simulated.distribution,observed.quantile=observed.distribution$observed.quantile) #quicker than a merge, but we can do it only because we have worked on the order of the DT
+
+ # 3.10 Compute proximity between observed and simulated data, per PFG*strata*habitat
+ #####################################################################################
+
+ #we get rid off rank==0 because there is good chance that it is nearly always equal to zero both in observed and simulated data, and that would provide a favorable bias in the results
+
+ simulated.distribution<-filter(simulated.distribution,rank!=0)
+
+ proximity<-simulated.distribution[,compute.proximity(simulated.quantile=simulated.quantile,observed.quantile=observed.quantile),by=c("PFG","habitat","strata")]
+
+
+ proximity<-rename(proximity,"proximity"="V1")
+
+ proximity<-proximity[order(habitat,strata,PFG)] #to have output in the same order for all simulations
+
+
+ # 3.11. Aggregate results for the different PFG
+ ################################################
+
+ aggregated.proximity<-proximity[,mean(proximity),by=c("habitat","strata")]
+ aggregated.proximity<-rename(aggregated.proximity,"aggregated.proximity"="V1")
+ aggregated.proximity$aggregated.proximity<-round(aggregated.proximity$aggregated.proximity,digits=2)
+ aggregated.proximity$simul<-sim.version
+
+ # return(aggregated.proximity)
+
+ #line added because the foreach method does not work
+ results.simul[[i]]<-aggregated.proximity
+
+ }
+
+ # 4. Put in the output format
+ ##############################
+
+ results<-sapply(results.simul,function(X){X$aggregated.proximity})
+ rownames(results)<-paste0(results.simul[[1]]$habitat,"_",results.simul[[1]]$strata)
+ colnames(results)<-sim.version
+ results<-t(results)
+ results<-as.data.frame(results)
+ results$simulation<-rownames(results)
+
+ #save and return
+ write.csv(results,paste0(output.path,"/performance.composition.csv"),row.names = F)
+
+ return(results)
+}
+
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index 2296d4e..7321724 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -54,8 +54,6 @@
##' @importFrom foreach foreach %dopar%
##' @importFrom forcats fct_expand
##' @importFrom reshape2 dcast
-##' @importFrom randomForest
-##' @importFrom vcd
##' @importFrom caret confusionMatrix
##' @importFrom utils write.csv
##'
@@ -167,7 +165,8 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
+ #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
colnames(simu_PFG) = c("PFG", "pixel", "abs")
#aggregate per strata group with the correspondance provided in input
@@ -213,7 +212,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#################################
data.validation<-filter(data.FATE.PFG.habitat,for.validation==1)
- x.validation<-select(data.validation,all_of(RF.predictors))
+ x.validation<-dplyr::select(data.validation,all_of(RF.predictors))
y.validation<-data.validation$habitat
y.validation.predicted<-predict(object=RF.model,newdata=x.validation,type="response",norm.votes=T)
@@ -235,7 +234,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
if(predict.all.map==T){
- y.all.map.predicted = predict(object=RF.model,newdata=select(data.FATE.PFG.habitat,all_of(RF.predictors)),type="response",norm.votes=T)
+ y.all.map.predicted = predict(object=RF.model,newdata=dplyr::select(data.FATE.PFG.habitat,all_of(RF.predictors)),type="response",norm.votes=T)
y.all.map.predicted = as.data.frame(y.all.map.predicted)
y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
colnames(y.all.map.predicted) = c(sim.version, "pixel")
@@ -268,7 +267,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#deal with the results regarding habitat prediction over the whole map
all.map.prediction = results.simul[[1]]$y.all.map.predicted
- all.map.prediction = merge(all.map.prediction, select(habitat.whole.area.df, c(pixel,habitat)), by = "pixel")
+ all.map.prediction = merge(all.map.prediction, dplyr::select(habitat.whole.area.df, c(pixel,habitat)), by = "pixel")
all.map.prediction = rename(all.map.prediction,"true.habitat"="habitat")
#save
diff --git a/R/UTILS.get_observed_distribution.R b/R/UTILS.get_observed_distribution.R
new file mode 100644
index 0000000..f7560d6
--- /dev/null
+++ b/R/UTILS.get_observed_distribution.R
@@ -0,0 +1,190 @@
+### HEADER #####################################################################
+##'
+##' @title Compute distribution of relative abundance over observed relevés
+##'
+##' @name get.observed.distribution
+##'
+##' @author Matthieu Combaud, Maxime Delprat
+##'
+##' @description This script is designed to compute distribution, per PFG/strata/habitat,
+##' of relative abundance, from observed data.
+##'
+##' @param name.simulation simulation folder name.
+##' @param obs.path the function needs observed data, please create a folder for them in your
+##' simulation folder and then indicate in this parameter the access path to this new folder.
+##' @param releves.PFG file which contain the observed Braund-Blanquet abundance at each site
+##' and each PFG and strata.
+##' @param releves.sites file which contain coordinates and a description of
+##' the habitat associated with the dominant species of each site in the studied map.
+##' @param hab.obs raster map of the extended studied area in the simulation.
+##' @param PFG.considered_PFG.compo a character vector of the list of PFG considered
+##' in the validation.
+##' @param strata.considered_PFG.compo a character vector of the list of precise
+##' strata considered in the validation.
+##' @param habitat.considered_PFG.compo a character vector of the list of habitat(s)
+##' considered in the validation.
+##' @param perStrata.compo Logical. All strata together (FALSE) or per strata (TRUE).
+##' @param sim.version name of the simulation we want to validate (it works with
+##' only one \code{sim.version}).
+##'
+##' @details
+##'
+##' The function takes the \code{releves.PFG} and \code{releves.sites} files and
+##' aggregate coverage per PFG. Then, the code get habitat information from also
+##' the \code{hab.obs} map, keep only interesting habitat, strata and PFG, and
+##' transform the data into relative metrics. Finally, the script computes distribution
+##' per PFG, and if require per strata/habitat (else all strata/habitat will be considered together).
+##'
+##' @return
+##'
+##' 2 files are created in
+##' \describe{
+##' \item{\file{VALIDATION/PFG_COMPOSITION/sim.version} :
+##' 1 .csv file which contain the observed relevés transformed into relative metrics.
+##' 1 .csv file which contain the final output with the distribution per PFG, strata and habitat.
+##'
+##' @export
+##'
+##' @importFrom dplyr filter select filter group_by mutate %>% rename
+##' @importFrom raster aggregate compareCRS res crs
+##' @importFrom sf st_transform st_crop
+##' @importFrom utils write.csv
+##' @importFrom data.table setDT
+##'
+### END OF HEADER ##############################################################
+
+
+get.observed.distribution<-function(name.simulation
+ , obs.path
+ , releves.PFG
+ , releves.sites
+ , hab.obs
+ , PFG.considered_PFG.compo
+ , strata.considered_PFG.compo
+ , habitat.considered_PFG.compo
+ , perStrata.compo
+ , sim.version){
+
+ composition.mask = NULL
+ output.path = paste0(name.simulation, "/VALIDATION/PFG_COMPOSITION/", sim.version)
+ dir.create(file.path(output.path), recursive = TRUE, showWarnings = FALSE)
+
+ #1. Aggregate coverage per PFG
+ #########################################
+
+ #identify sites with wrong BB values (ie values that cannot be converted by the PRE_FATE.abundBraunBlanquet function)
+ releves.PFG<-filter(releves.PFG,is.element(BB,c(NA, "NA", 0, "+", "r", 1:5)))
+
+ #transformation into coverage percentage
+ releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
+
+ if(perStrata.compo==T){
+ aggregated.releves.PFG<-aggregate(coverage~site+PFG+strata,data=releves.PFG,FUN="sum")
+ }else if(perStrata.compo==F){
+ aggregated.releves.PFG<-aggregate(coverage~site+PFG,data=releves.PFG,FUN="sum")
+ aggregated.releves.PFG$strata<-"A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
+ }
+
+
+ #2. Get habitat information
+ ###################################
+
+ #get sites coordinates
+ aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)),aggregated.releves.PFG,by="site")
+
+ #get habitat code and name
+ if(compareCRS(aggregated.releves.PFG,hab.obs)){
+ aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ }else{
+ aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(hab.obs))
+ aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ }
+
+ #correspondance habitat code/habitat name
+ table.habitat.releve<-levels(hab.obs)[[1]]
+
+ aggregated.releves.PFG<-merge(aggregated.releves.PFG,dplyr::select(table.habitat.releve,c(ID,habitat)),by.x="code.habitat",by.y="ID")
+
+ #(optional) keep only releves data in a specific area
+ if(!is.null(composition.mask)){
+
+ if(compareCRS(aggregated.releves.PFG,composition.mask)==F){ #as this stage it is not a problem to transform crs(aggregated.releves.PFG) since we have no more merge to do (we have already extracted habitat info from the map)
+ aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(composition.mask))
+ }
+
+ aggregated.releves.PFG<-st_crop(x=aggregated.releves.PFG,y=composition.mask)
+ print("'releve' map has been cropped to match 'external.training.mask'.")
+ }
+
+
+ # 3. Keep only releve on interesting habitat, strata and PFG
+ ##################################################################"
+
+ aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG)
+ aggregated.releves.PFG<-dplyr::select(aggregated.releves.PFG,c(site,PFG,strata,coverage,habitat))
+
+ aggregated.releves.PFG<-filter(
+ aggregated.releves.PFG,
+ is.element(PFG,PFG.considered_PFG.compo)&
+ is.element(strata,strata.considered_PFG.compo)&
+ is.element(habitat,habitat.considered_PFG.compo)
+ )
+
+
+ #4.Transform into a relative metrics (here relative.metric is relative coverage)
+ ###################################################################################
+
+ #important to do it only here, because if we filter some PFG, it changes the value of the relative metric
+ #careful: if several strata are selected, the computation is made for each strata separately
+ aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG %>% group_by(site,strata) %>% mutate(relative.metric= round(prop.table(coverage),digits = 2)))
+ aggregated.releves.PFG$relative.metric[is.na(aggregated.releves.PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
+ aggregated.releves.PFG$coverage<-NULL
+
+ print("releve data have been transformed into a relative metric")
+
+
+ # 5. Save data
+ #####################
+ write.csv(aggregated.releves.PFG,paste0(output.path,"/CBNA.releves.prepared.csv"),row.names = F)
+
+
+ # 6. Compute distribution per PFG, and if require per strata/habitat (else all strata/habitat will be considered together)
+ ####################################
+
+ distribution<-setDT(aggregated.releves.PFG)[, quantile(relative.metric), by=c("PFG","habitat","strata")]
+ distribution<-rename(distribution,"quantile"="V1")
+ distribution<-data.frame(distribution,rank=seq(0,5,1)) #to be able to sort on quantile
+
+ # 7. Add the missing PFG*habitat*strata
+ #final distribution is the distribution once the missing combination have been added. For these combination, all quantiles are set to 0
+
+ observed.distribution<-expand.grid(
+ PFG=PFG.considered_PFG.compo,
+ habitat=habitat.considered_PFG.compo,
+ strata=strata.considered_PFG.compo
+ )
+
+ null.quantile<-data.frame(rank=seq(0,4,1)) #to have 5 rows per PFG*strata*habitat
+ observed.distribution<-merge(observed.distribution,null.quantile,all=T)
+
+ observed.distribution<-merge(observed.distribution,distribution,by=c("PFG","habitat","strata","rank"),all.x=T) # "all.x=T" to keep the unobserved combination
+
+ observed.distribution$quantile[is.na(observed.distribution$quantile)]<-0
+
+ # 8. Order the table to be able to have output in the right format
+ observed.distribution<-setDT(observed.distribution)
+ observed.distribution<-observed.distribution[order(habitat,strata,PFG,rank)]
+
+ observed.distribution<-rename(observed.distribution,"observed.quantile"="quantile")
+
+
+ # 9. Save results
+ ##########################################
+ write.csv(observed.distribution,paste0(output.path,"/observed.distribution.csv"),row.names = F)
+
+ # 8. Return
+ ####################
+
+ return(observed.distribution)
+
+}
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index 0504edb..266d0ca 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -35,12 +35,13 @@
##' @export
##'
##' @importFrom dplyr select all_of
-##' @importFrom data.table melt rename
+##' @importFrom data.table rename
##' @importFrom utils write.csv
##' @importFrom raster raster crs extent res ratify writeRaster
##' @importFrom stats complete.cases
##' @importFrom ggplot2 ggplot geom_raster coord_equal scale_fill_manual
##' ggtitle guides theme ggsave
+##' @importFrom reshape2 melt
##'
### END OF HEADER ##############################################################
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index ba833e0..885e6be 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -173,7 +173,7 @@ train.RF.habitat<-function(releves.PFG
#run optimization algo (careful : optimization over OOB...)
mtry.perf<-as.data.frame(
tuneRF(
- x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
+ x=dplyr::select(releves.training,-c(code.habitat,site,habitat,geometry)),
y=releves.training$habitat,
strata=releves.training$habitat,
sampsize=nrow(releves.training),
@@ -187,9 +187,9 @@ train.RF.habitat<-function(releves.PFG
#run real model
model<- randomForest(
- x=select(releves.training,-c(code.habitat,site,habitat,geometry)),
+ x=dplyr::select(releves.training,-c(code.habitat,site,habitat,geometry)),
y=releves.training$habitat,
- xtest=select(releves.testing,-c(code.habitat,site,habitat,geometry)),
+ xtest=dplyr::select(releves.testing,-c(code.habitat,site,habitat,geometry)),
ytest=releves.testing$habitat,
strata=releves.training$habitat,
sampsize=nrow(releves.training),
From 91f44dc67e0933bcfc4198a70b0dd5d91f78378c Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 10:18:43 +0100
Subject: [PATCH 043/176] Add files via upload
Creation of a new POST_FATE.validation & updates of utils functions associated
---
man/POST_FATE.validation.Rd | 159 +++++++++++++++++++++++++++
man/do.PFG.composition.validation.Rd | 69 ++++++++++++
man/get.observed.distribution.Rd | 64 +++++++++++
3 files changed, 292 insertions(+)
create mode 100644 man/POST_FATE.validation.Rd
create mode 100644 man/do.PFG.composition.validation.Rd
create mode 100644 man/get.observed.distribution.Rd
diff --git a/man/POST_FATE.validation.Rd b/man/POST_FATE.validation.Rd
new file mode 100644
index 0000000..6a9e87f
--- /dev/null
+++ b/man/POST_FATE.validation.Rd
@@ -0,0 +1,159 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/POST_FATE.validation.R
+\name{POST_FATE.validation}
+\alias{POST_FATE.validation}
+\title{Computes validation data for habitat, PFG richness and composition for a \code{FATE} simulation.}
+\usage{
+POST_FATE.validation(
+ name.simulation,
+ sim.version,
+ year,
+ doHabitat = TRUE,
+ obs.path,
+ releves.PFG,
+ releves.sites,
+ hab.obs,
+ validation.mask,
+ studied.habitat = NULL,
+ doComposition = TRUE,
+ PFG.considered_PFG.compo,
+ habitat.considered_PFG.compo,
+ doRichness = TRUE,
+ list.PFG,
+ exclude.PFG = NULL
+)
+}
+\arguments{
+\item{name.simulation}{simulation folder name.}
+
+\item{sim.version}{name of the simulation to validate (it works with only one \code{sim.version}).}
+
+\item{year}{year of simulation for validation.}
+
+\item{doHabitat}{logical. Default \code{TRUE}. If \code{TRUE}, habitat validation module is activated,
+if \code{FALSE}, habitat validation module is disabled.}
+
+\item{obs.path}{the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parameter the access path to this new folder (habitat & PFG composition validation).}
+
+\item{releves.PFG}{name of file which contain the observed Braund-Blanquet abundance at each site
+and each PFG and strata (habitat & PFG composition validation).}
+
+\item{hab.obs}{name of the file which contain the extended studied map in the simulation (habitat & PFG composition validation).}
+
+\item{validation.mask}{name of the file which contain a raster mask that specified which pixels need validation
+(habitat & PFG composition validation).}
+
+\item{studied.habitat}{default \code{NULL}. If \code{NULL}, the function will
+take into account of all habitats in the \code{hab.obs} map. Otherwise, please specify
+in a vector habitats that will be take into account for the validation (habitat validation).}
+
+\item{doComposition}{logical. Default \code{TRUE}. If \code{TRUE}, PFG composition validation module is activated,
+if \code{FALSE}, PFG composition validation module is disabled.}
+
+\item{PFG.considered_PFG.compo}{a character vector of the list of PFG considered
+in the validation (PFG composition validation).}
+
+\item{habitat.considered_PFG.compo}{a character vector of the list of habitat(s)
+considered in the validation (PFG composition validation).}
+
+\item{doRichness}{logical. Default \code{TRUE}. If \code{TRUE}, PFG richness validation module is activated,
+if \code{FALSE}, PFG richness validation module is disabled.}
+
+\item{list.PFG}{a character vector which contain all the PFGs taken account in
+the simulation and observed in the simulation area (PFG richness validation).}
+
+\item{exclude.PFG}{default \code{NULL}. A character vector containing the names
+of the PFG you want to exclude from the analysis (PFG richness validation).}
+
+\item{releves.site}{name of the file which contain coordinates and a description of
+the habitat associated with the dominant species of each site in the studied map (habitat & PFG composition validation).}
+}
+\value{
+
+}
+\description{
+This script is designed to compute validation data for :
+\code{Habitat} : compares habitat simulations and observations and
+create a map to visualize this comparison with all the \code{FATE} and
+observed data.
+\code{PFG Composition} : produced a computation of observed distribution
+of relative abundance in the simulation area and a computation of distance between
+observed and simulated distribution.
+\code{PFG Richness} : computes the PFG richness over the whole simulation area
+for a \code{FATE} simulation and computes the difference between observed and simulated PFG richness.
+}
+\details{
+\describe{
+ \item{Habitat validation}{The observed habitat is derived from the cesbio map, the simulated habitat
+is derived from FATE simulated relative abundance, based on a random forest
+algorithm trained on CBNA data. To compare observations and simulations, the function
+compute confusion matrix between observation and prediction and then compute the TSS
+for each habitat h (number of prediction of habitat h/number of observation
+of habitat h + number of non-prediction of habitat h/number of non-observation
+of habitat h). The final metrics this script use is the mean of TSS per habitat over all
+habitats, weighted by the share of each habitat in the observed habitat distribution.}
+ \item{PFG composition validation}{This code firstly run the \code{get.observed.distribution} function in order to have
+a \code{obs.distri} file which contain the observed distribution per PFG, strata and habitat.
+This file is also an argument for the \code{do.PFG.composition.validation} function run next.
+This second sub function provide the computation of distance between observed and simulated distribution. \cr
+NB : The argument \code{strata.considered_PFG.compo} is by default "A" in the 2 sub functions because
+it's easier for a \code{FATE} simulation to provide PFG abundances for all strata. \cr The argument
+\code{perStrata.compo} is by default \code{NULL} for the same reasons.}
+ \item{PFG richness validation}{Firstly, the function updates the \code{list.PFG} with \code{exclude.PFG} vector.
+Then, the script takes the abundance per PFG file from the results of the \code{FATE}
+simulation and computes the difference between the \code{list.PFG} and all the PFG
+which are presents in the abundance file, in order to obtain the PFG richness for a simulation.
+The function also determine if an observed PFG is missing in the results of the simulation at
+a specific year.}
+}
+}
+\examples{
+
+## Habitat validation ---------------------------------------------------------------------------------
+POST_FATE.validation(name.simulation = "FATE_Champsaur"
+ , sim.version = "SIMUL_V4.1"
+ , year = 2000
+ , doHabitat = TRUE
+ , obs.path = "FATE_Champsaur/DATA_OBS/"
+ , releves.PFG = "releves.PFG.abundance.csv"
+ , releves.sites = "releves.sites.shp"
+ , hab.obs = "simplified.cesbio.map.grd"
+ , validation.mask = "certain.habitat.100m.restricted.grd"
+ , studied.habitat = NULL
+ , doComposition = FALSE
+ , doRichness = FALSE)
+
+## PFG composition validation --------------------------------------------------------------------------
+list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+habitat.considered = c("coniferous.forest", "deciduous.forest", "natural.grassland", "woody.heatland")
+POST_FATE.validation(name.simulation = "FATE_Champsaur"
+ , sim.version = "SIMUL_V4.1"
+ , year = 2000
+ , doHabitat = FALSE
+ , obs.path = "FATE_Champsaur/DATA_OBS/"
+ , releves.PFG = "releves.PFG.abundance.csv"
+ , releves.sites = "releves.sites.shp"
+ , hab.obs = "simplified.cesbio.map.grd"
+ , validation.mask = "certain.habitat.100m.restricted.grd"
+ , studied.habitat = NULL
+ , doComposition = TRUE
+ , PFG.considered_PFG.compo = list.PFG
+ , habitat.considered_PFG.compo = habitat.considered
+ , doRichness = FALSE)
+
+## PFG richness validation -----------------------------------------------------------------------------
+list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+POST_FATE.validation(name.simulation = "FATE_CHampsaur"
+ , sim.version = "SIMUL_V4.1"
+ , year = 2000
+ , doHabitat = FALSE
+ , doComposition = FALSE
+ , doRichness = TRUE
+ , list.PFG = list.PFG
+ , exclude.PFG = NULL)
+
+}
+\author{
+Matthieu Combaud, Maxime Delprat
+}
diff --git a/man/do.PFG.composition.validation.Rd b/man/do.PFG.composition.validation.Rd
new file mode 100644
index 0000000..f4a934b
--- /dev/null
+++ b/man/do.PFG.composition.validation.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/UTILS.do_PFG_composition_validation.R
+\name{do.PFG.composition.validation}
+\alias{do.PFG.composition.validation}
+\title{Compute distance between observed and simulated distribution}
+\usage{
+\method{do}{PFG.composition.validation}(
+ name.simulation,
+ obs.path,
+ sim.version,
+ hab.obs,
+ PFG.considered_PFG.compo,
+ strata.considered_PFG.compo,
+ habitat.considered_PFG.compo,
+ observed.distribution,
+ perStrata.compo,
+ validation.mask,
+ year
+)
+}
+\arguments{
+\item{name.simulation}{simulation folder name.}
+
+\item{obs.path}{the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parameter the access path to this new folder.}
+
+\item{sim.version}{name of the simulation we want to validate (it works with
+only one \code{sim.version}).}
+
+\item{hab.obs}{file which contain the extended studied map in the simulation.}
+
+\item{PFG.considered_PFG.compo}{a character vector of the list of PFG considered
+in the validation.}
+
+\item{strata.considered_PFG.compo}{a character vector of the list of precise
+strata considered in the validation.}
+
+\item{habitat.considered_PFG.compo}{a character vector of the list of habitat(s)
+considered in the validation.}
+
+\item{observed.distribution}{PFG observed distribution table.}
+
+\item{perStrata.compo}{Logical. All strata together (FALSE) or per strata (TRUE).}
+
+\item{validation.mask}{file which contain a raster mask that specified
+which pixels need validation.}
+
+\item{year}{year of simulation to validate.}
+}
+\value{
+
+}
+\description{
+This script is designed to compare the difference between the
+PFG distribution in observed and simulated data. For a set of PFG, strata and
+habitats chosen, the function compute distance between observed and simulated
+distribution for a precise \code{FATE} simulation.
+}
+\details{
+After preliminary checks, this code extract observed habitat from the \code{hab.obs}
+map and, then, merge it with the simulated PFG abundance file from results of a \code{FATE}
+simulation. After filtration of the required PFG, strata and habitats, the function
+transform the data into relative metrics and, then, compute distribution per PFG, strata
+and habitat (if necessary). Finally, the code computes proximity between observed
+and simulated data, per PFG, strata and habitat.
+}
+\author{
+Matthieu Combaud, Maxime Delprat
+}
diff --git a/man/get.observed.distribution.Rd b/man/get.observed.distribution.Rd
new file mode 100644
index 0000000..6d0c1c8
--- /dev/null
+++ b/man/get.observed.distribution.Rd
@@ -0,0 +1,64 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/UTILS.get_observed_distribution.R
+\name{get.observed.distribution}
+\alias{get.observed.distribution}
+\title{Compute distribution of relative abundance over observed relevés}
+\usage{
+get.observed.distribution(
+ name.simulation,
+ obs.path,
+ releves.PFG,
+ releves.sites,
+ hab.obs,
+ PFG.considered_PFG.compo,
+ strata.considered_PFG.compo,
+ habitat.considered_PFG.compo,
+ perStrata.compo,
+ sim.version
+)
+}
+\arguments{
+\item{name.simulation}{simulation folder name.}
+
+\item{obs.path}{the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parameter the access path to this new folder.}
+
+\item{releves.PFG}{file which contain the observed Braund-Blanquet abundance at each site
+and each PFG and strata.}
+
+\item{releves.sites}{file which contain coordinates and a description of
+the habitat associated with the dominant species of each site in the studied map.}
+
+\item{hab.obs}{raster map of the extended studied area in the simulation.}
+
+\item{PFG.considered_PFG.compo}{a character vector of the list of PFG considered
+in the validation.}
+
+\item{strata.considered_PFG.compo}{a character vector of the list of precise
+strata considered in the validation.}
+
+\item{habitat.considered_PFG.compo}{a character vector of the list of habitat(s)
+considered in the validation.}
+
+\item{perStrata.compo}{Logical. All strata together (FALSE) or per strata (TRUE).}
+
+\item{sim.version}{name of the simulation we want to validate (it works with
+only one \code{sim.version}).}
+}
+\value{
+
+}
+\description{
+This script is designed to compute distribution, per PFG/strata/habitat,
+of relative abundance, from observed data.
+}
+\details{
+The function takes the \code{releves.PFG} and \code{releves.sites} files and
+aggregate coverage per PFG. Then, the code get habitat information from also
+the \code{hab.obs} map, keep only interesting habitat, strata and PFG, and
+transform the data into relative metrics. Finally, the script computes distribution
+per PFG, and if require per strata/habitat (else all strata/habitat will be considered together).
+}
+\author{
+Matthieu Combaud, Maxime Delprat
+}
From 7612aed51c997f46c3f9d2f05fdbbf3d9420c183 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 10:19:50 +0100
Subject: [PATCH 044/176] Add files via upload
Creation of a new POST_FATE.validation & updates of utils functions associated
---
docs/reference/POST_FATE.validation.html | 338 ++++++++++++++++++
.../do.PFG.composition.validation.html | 236 ++++++++++++
docs/reference/get.observed.distribution.html | 228 ++++++++++++
3 files changed, 802 insertions(+)
create mode 100644 docs/reference/POST_FATE.validation.html
create mode 100644 docs/reference/do.PFG.composition.validation.html
create mode 100644 docs/reference/get.observed.distribution.html
diff --git a/docs/reference/POST_FATE.validation.html b/docs/reference/POST_FATE.validation.html
new file mode 100644
index 0000000..450d8ff
--- /dev/null
+++ b/docs/reference/POST_FATE.validation.html
@@ -0,0 +1,338 @@
+
+Computes validation data for habitat, PFG richness and composition for a FATE simulation. — POST_FATE.validation • RFate
+
+
+
This script is designed to compute validation data for :
+Habitat : compares habitat simulations and observations and
+create a map to visualize this comparison with all the FATE and
+observed data.
+PFG Composition : produced a computation of observed distribution
+of relative abundance in the simulation area and a computation of distance between
+observed and simulated distribution.
+PFG Richness : computes the PFG richness over the whole simulation area
+for a FATE simulation and computes the difference between observed and simulated PFG richness.
name of the simulation to validate (it works with only one sim.version).
+
year
+
year of simulation for validation.
+
doHabitat
+
logical. Default TRUE. If TRUE, habitat validation module is activated,
+if FALSE, habitat validation module is disabled.
+
obs.path
+
the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parameter the access path to this new folder (habitat & PFG composition validation).
+
releves.PFG
+
name of file which contain the observed Braund-Blanquet abundance at each site
+and each PFG and strata (habitat & PFG composition validation).
+
hab.obs
+
name of the file which contain the extended studied map in the simulation (habitat & PFG composition validation).
+
validation.mask
+
name of the file which contain a raster mask that specified which pixels need validation
+(habitat & PFG composition validation).
+
studied.habitat
+
default NULL. If NULL, the function will
+take into account of all habitats in the hab.obs map. Otherwise, please specify
+in a vector habitats that will be take into account for the validation (habitat validation).
+
doComposition
+
logical. Default TRUE. If TRUE, PFG composition validation module is activated,
+if FALSE, PFG composition validation module is disabled.
+
PFG.considered_PFG.compo
+
a character vector of the list of PFG considered
+in the validation (PFG composition validation).
+
habitat.considered_PFG.compo
+
a character vector of the list of habitat(s)
+considered in the validation (PFG composition validation).
+
doRichness
+
logical. Default TRUE. If TRUE, PFG richness validation module is activated,
+if FALSE, PFG richness validation module is disabled.
+
list.PFG
+
a character vector which contain all the PFGs taken account in
+the simulation and observed in the simulation area (PFG richness validation).
+
exclude.PFG
+
default NULL. A character vector containing the names
+of the PFG you want to exclude from the analysis (PFG richness validation).
+
releves.site
+
name of the file which contain coordinates and a description of
+the habitat associated with the dominant species of each site in the studied map (habitat & PFG composition validation).
+
+
+
Value
+
+
+
+
Details
+
+
Habitat validation
+
The observed habitat is derived from the cesbio map, the simulated habitat
+is derived from FATE simulated relative abundance, based on a random forest
+algorithm trained on CBNA data. To compare observations and simulations, the function
+compute confusion matrix between observation and prediction and then compute the TSS
+for each habitat h (number of prediction of habitat h/number of observation
+of habitat h + number of non-prediction of habitat h/number of non-observation
+of habitat h). The final metrics this script use is the mean of TSS per habitat over all
+habitats, weighted by the share of each habitat in the observed habitat distribution.
+
+
PFG composition validation
+
This code firstly run the get.observed.distribution function in order to have
+a obs.distri file which contain the observed distribution per PFG, strata and habitat.
+This file is also an argument for the do.PFG.composition.validation function run next.
+This second sub function provide the computation of distance between observed and simulated distribution.
+NB : The argument strata.considered_PFG.compo is by default "A" in the 2 sub functions because
+it's easier for a FATE simulation to provide PFG abundances for all strata. The argument
+perStrata.compo is by default NULL for the same reasons.
+
+
PFG richness validation
+
Firstly, the function updates the list.PFG with exclude.PFG vector.
+Then, the script takes the abundance per PFG file from the results of the FATE
+simulation and computes the difference between the list.PFG and all the PFG
+which are presents in the abundance file, in order to obtain the PFG richness for a simulation.
+The function also determine if an observed PFG is missing in the results of the simulation at
+a specific year.
This script is designed to compare the difference between the
+PFG distribution in observed and simulated data. For a set of PFG, strata and
+habitats chosen, the function compute distance between observed and simulated
+distribution for a precise FATE simulation.
the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parameter the access path to this new folder.
+
sim.version
+
name of the simulation we want to validate (it works with
+only one sim.version).
+
hab.obs
+
file which contain the extended studied map in the simulation.
+
PFG.considered_PFG.compo
+
a character vector of the list of PFG considered
+in the validation.
+
strata.considered_PFG.compo
+
a character vector of the list of precise
+strata considered in the validation.
+
habitat.considered_PFG.compo
+
a character vector of the list of habitat(s)
+considered in the validation.
+
observed.distribution
+
PFG observed distribution table.
+
perStrata.compo
+
Logical. All strata together (FALSE) or per strata (TRUE).
+
validation.mask
+
file which contain a raster mask that specified
+which pixels need validation.
+
year
+
year of simulation to validate.
+
+
+
Value
+
+
+
+
Details
+
After preliminary checks, this code extract observed habitat from the hab.obs
+map and, then, merge it with the simulated PFG abundance file from results of a FATE
+simulation. After filtration of the required PFG, strata and habitats, the function
+transform the data into relative metrics and, then, compute distribution per PFG, strata
+and habitat (if necessary). Finally, the code computes proximity between observed
+and simulated data, per PFG, strata and habitat.
+
+
+
Author
+
Matthieu Combaud, Maxime Delprat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/get.observed.distribution.html b/docs/reference/get.observed.distribution.html
new file mode 100644
index 0000000..94f4e09
--- /dev/null
+++ b/docs/reference/get.observed.distribution.html
@@ -0,0 +1,228 @@
+
+Compute distribution of relative abundance over observed relevés — get.observed.distribution • RFate
+
+
+
the function needs observed data, please create a folder for them in your
+simulation folder and then indicate in this parameter the access path to this new folder.
+
releves.PFG
+
file which contain the observed Braund-Blanquet abundance at each site
+and each PFG and strata.
+
releves.sites
+
file which contain coordinates and a description of
+the habitat associated with the dominant species of each site in the studied map.
+
hab.obs
+
raster map of the extended studied area in the simulation.
+
PFG.considered_PFG.compo
+
a character vector of the list of PFG considered
+in the validation.
+
strata.considered_PFG.compo
+
a character vector of the list of precise
+strata considered in the validation.
+
habitat.considered_PFG.compo
+
a character vector of the list of habitat(s)
+considered in the validation.
+
perStrata.compo
+
Logical. All strata together (FALSE) or per strata (TRUE).
+
sim.version
+
name of the simulation we want to validate (it works with
+only one sim.version).
+
+
+
Value
+
+
+
+
Details
+
The function takes the releves.PFG and releves.sites files and
+aggregate coverage per PFG. Then, the code get habitat information from also
+the hab.obs map, keep only interesting habitat, strata and PFG, and
+transform the data into relative metrics. Finally, the script computes distribution
+per PFG, and if require per strata/habitat (else all strata/habitat will be considered together).
+
+
+
Author
+
Matthieu Combaud, Maxime Delprat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From fc92bae21e5215938388b6ee0fc8236297f98180 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 11:40:17 +0100
Subject: [PATCH 045/176] Add files via upload
correction of import errors
---
NAMESPACE | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
index acd8cd2..1b9bd0a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,9 +1,6 @@
# Generated by roxygen2: do not edit by hand
-S3method(do,PFG.composition.validation)
-S3method(do,habitat.validation)
S3method(plot,predicted.habitat)
-S3method(train,RF.habitat)
export(.adaptMaps)
export(.cropMaps)
export(.getCutoff)
@@ -58,9 +55,12 @@ export(betapart.core)
export(cluster.stats)
export(designLHDNorm)
export(divLeinster)
+export(do.PFG.composition.validation)
+export(do.habitat.validation)
export(dunn)
export(ecospat.kd)
export(get.observed.distribution)
+export(train.RF.habitat)
importFrom(FD,gowdis)
importFrom(PresenceAbsence,auc)
importFrom(PresenceAbsence,cmx)
@@ -90,7 +90,6 @@ importFrom(data.table,dcast)
importFrom(data.table,fread)
importFrom(data.table,fwrite)
importFrom(data.table,rbindlist)
-importFrom(data.table,rename)
importFrom(data.table,setDT)
importFrom(doParallel,registerDoParallel)
importFrom(dplyr,"%>%")
@@ -169,6 +168,7 @@ importFrom(grid,unit)
importFrom(gridExtra,grid.arrange)
importFrom(huge,huge.npn)
importFrom(methods,as)
+importFrom(parallel,detectCores)
importFrom(parallel,mclapply)
importFrom(phyloclim,niche.overlap)
importFrom(randomForest,randomForest)
@@ -228,7 +228,6 @@ importFrom(stats,var)
importFrom(stats,weighted.mean)
importFrom(stringr,str_split)
importFrom(stringr,str_sub)
-importFrom(tidyverse,write_rds)
importFrom(utils,combn)
importFrom(utils,download.file)
importFrom(utils,install.packages)
From 8a827f98c961250f087571b68769378df3cec6d6 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 11:42:21 +0100
Subject: [PATCH 046/176] Add files via upload
correction of packages imports mistakes
---
R/POST_FATE.validation.R | 3 +++
R/PRE_FATE.skeletonDirectory.R | 6 ++++++
R/UTILS.do_habitat_validation.R | 4 +++-
R/UTILS.plot_predicted_habitat.R | 3 +--
R/UTILS.train_RF_habitat.R | 2 +-
5 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/R/POST_FATE.validation.R b/R/POST_FATE.validation.R
index dd85f77..326bcea 100644
--- a/R/POST_FATE.validation.R
+++ b/R/POST_FATE.validation.R
@@ -147,6 +147,8 @@
##' @foreach foreach foreach %dopar%
##' @importFrom forcats fct_expand
##' @importFrom readr write_rds
+##' @importFrom doParallel registerDoParallel
+##' @importFrom parallel detectCores
##'
### END OF HEADER ###################################################################
@@ -302,6 +304,7 @@ POST_FATE.validation = function(name.simulation
#list of PFG of interest
list.PFG<-setdiff(list.PFG,exclude.PFG)
+ registerDoParallel(detectCores()-2)
dying.PFG.list<-foreach(i=1:length(sim.version)) %dopar% {
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
diff --git a/R/PRE_FATE.skeletonDirectory.R b/R/PRE_FATE.skeletonDirectory.R
index 919f745..a243359 100644
--- a/R/PRE_FATE.skeletonDirectory.R
+++ b/R/PRE_FATE.skeletonDirectory.R
@@ -75,6 +75,10 @@
##' \describe{
##' \item{\code{HABITAT}}{this folder will collect all the validation files produces
##' by the function POST_FATE.validation.habitat}
+##' \item{\code{PFG_RICHNESS}}{this folder will collect all the validation files produces
+##' by the function POST_FATE.validation_PFG_richness}
+##' \item{\code{PFG_COMPOSITION}}{this folder will collect all the validation files produces
+##' by the function POST_FATE.validation_PFG_composition}
##' }
##' }
##' }
@@ -146,6 +150,8 @@ PRE_FATE.skeletonDirectory = function(name.simulation = "FATE_simulation")
## the VALIDATION dir
dir.create(file.path(name.simulation, "VALIDATION"), showWarnings = FALSE)
dir.create(file.path(name.simulation, "VALIDATION", "HABITAT"), showWarnings = FALSE)
+ dir.create(file.path(name.simulation, "VALIDATION", "PFG_RICHNESS"), showWarnings = FALSE)
+ dir.create(file.path(name.simulation, "VALIDATION", "PFG_COMPOSITION"), showWarnings = FALSE)
message("\n Your directory tree for your FATE simulation ("
, name.simulation, ") is ready!\n")
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index 7321724..b080596 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -36,7 +36,7 @@
##' database by extracting the observed habitat from a raster map. Then, for each
##' simulations (sim.version), the script take the evolution abundance for each PFG
##' and all strata file and predict the habitat for the whole map (if option selected)
-##' thanks to the RF model.Finally, the function compute habitat performance based on
+##' thanks to the RF model. Finally, the function computes habitat performance based on
##' TSS for each habitat.
##'
##' @return
@@ -56,6 +56,8 @@
##' @importFrom reshape2 dcast
##' @importFrom caret confusionMatrix
##' @importFrom utils write.csv
+##' @importFrom doParallel registerDoParallel
+##' @importFrom parallel detectCores
##'
### END OF HEADER ##############################################################
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index 266d0ca..e5903e4 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -34,8 +34,7 @@
##'
##' @export
##'
-##' @importFrom dplyr select all_of
-##' @importFrom data.table rename
+##' @importFrom dplyr select all_of rename
##' @importFrom utils write.csv
##' @importFrom raster raster crs extent res ratify writeRaster
##' @importFrom stats complete.cases
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index 885e6be..6276839 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -56,7 +56,7 @@
##' @importFrom sf st_transform st_crop st_write
##' @importFrom randomForest randomForest tuneRF
##' @importFrom caret confusionMatrix
-##' @importFrom tidyverse write_rds
+##' @importFrom readr write_rds
##' @importFrom utils read.csv
##'
### END OF HEADER ##############################################################
From 4a5b3f29e3638f44ff029a420682419882fb6a8f Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 11:43:25 +0100
Subject: [PATCH 047/176] Add files via upload
correction of mistakes in header
---
man/do.habitat.validation.Rd | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/man/do.habitat.validation.Rd b/man/do.habitat.validation.Rd
index cccd6f2..28c246f 100644
--- a/man/do.habitat.validation.Rd
+++ b/man/do.habitat.validation.Rd
@@ -5,7 +5,7 @@
\title{Compare observed and simulated habitat of a \code{FATE} simulation
at the last simulation year.}
\usage{
-\method{do}{habitat.validation}(
+do.habitat.validation(
output.path,
RF.model,
habitat.FATE.map,
@@ -64,7 +64,7 @@ After several preliminary checks, the function is going to prepare the observati
database by extracting the observed habitat from a raster map. Then, for each
simulations (sim.version), the script take the evolution abundance for each PFG
and all strata file and predict the habitat for the whole map (if option selected)
-thanks to the RF model.Finally, the function compute habitat performance based on
+thanks to the RF model. Finally, the function computes habitat performance based on
TSS for each habitat.
}
\author{
From 7f70f924a396613773946f3e241ccb892cd6db9f Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 4 Mar 2022 11:45:13 +0100
Subject: [PATCH 048/176] Add files via upload
Correction of small mistakes
---
docs/do.habitat.validation.html | 239 ++++++++++++++++++++++++++++++++
1 file changed, 239 insertions(+)
create mode 100644 docs/do.habitat.validation.html
diff --git a/docs/do.habitat.validation.html b/docs/do.habitat.validation.html
new file mode 100644
index 0000000..b33aa00
--- /dev/null
+++ b/docs/do.habitat.validation.html
@@ -0,0 +1,239 @@
+
+Compare observed and simulated habitat of a FATE simulation
+at the last simulation year. — do.habitat.validation • RFate
+
+
+
To compare observations and simulations, this function compute
+confusion matrix between observation and prediction and then compute the TSS
+for each habitat.
access path to the for the folder where output files
+will be created.
+
RF.model
+
random forest model trained on CBNA data (train.RF.habitat
+function)
+
habitat.FATE.map
+
a raster map of the observed habitat in the
+studied area.
+
validation.mask
+
a raster mask that specified which pixels need validation.
+
simulation.map
+
a raster map of the whole studied area use to check
+the consistency between simulation map and the observed habitat map.
+
predict.all.map
+
a TRUE/FALSE vector. If TRUE, the script will predict
+habitat for the whole map.
+
sim.version
+
name of the simulation to validate.
+
name.simulation
+
simulation folder name.
+
perStrata
+
a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
+
hab.obs
+
a raster map of the observed habitat in the
+extended studied area.
+
year
+
year of simulation for validation.
+
+
+
Value
+
Habitat performance file
+If option selected, the function returns an habitat prediction file with
+observed and simulated habitat for each pixel of the whole map.
+
+
+
Details
+
After several preliminary checks, the function is going to prepare the observations
+database by extracting the observed habitat from a raster map. Then, for each
+simulations (sim.version), the script take the evolution abundance for each PFG
+and all strata file and predict the habitat for the whole map (if option selected)
+thanks to the RF model. Finally, the function computes habitat performance based on
+TSS for each habitat.
+
+
+
Author
+
Matthieu Combaud & Maxime Delprat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From 9fc07046dc10b89a6de0d63afc3360786094a060 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 8 Mar 2022 11:07:00 +0100
Subject: [PATCH 049/176] Add files via upload
Update of the names space for the validation functions
---
NAMESPACE | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
index 1b9bd0a..71d3d62 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -86,7 +86,6 @@ importFrom(colorspace,heat_hcl)
importFrom(colorspace,sequential_hcl)
importFrom(cowplot,get_legend)
importFrom(cowplot,ggdraw)
-importFrom(data.table,dcast)
importFrom(data.table,fread)
importFrom(data.table,fwrite)
importFrom(data.table,rbindlist)
@@ -133,6 +132,7 @@ importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggsave)
importFrom(ggplot2,ggtitle)
+importFrom(ggplot2,guide_legend)
importFrom(ggplot2,guides)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_alpha)
@@ -171,9 +171,9 @@ importFrom(methods,as)
importFrom(parallel,detectCores)
importFrom(parallel,mclapply)
importFrom(phyloclim,niche.overlap)
+importFrom(prettyR,Mode)
importFrom(randomForest,randomForest)
importFrom(randomForest,tuneRF)
-importFrom(raster,aggregate)
importFrom(raster,as.data.frame)
importFrom(raster,as.matrix)
importFrom(raster,cellFromXY)
@@ -187,6 +187,7 @@ importFrom(raster,extension)
importFrom(raster,extent)
importFrom(raster,extract)
importFrom(raster,getValues)
+importFrom(raster,levels)
importFrom(raster,mask)
importFrom(raster,ncell)
importFrom(raster,nlayers)
@@ -210,6 +211,7 @@ importFrom(sf,st_transform)
importFrom(sf,st_write)
importFrom(shiny,runApp)
importFrom(sp,SpatialPoints)
+importFrom(stats,aggregate)
importFrom(stats,as.dist)
importFrom(stats,complete.cases)
importFrom(stats,cophenetic)
From 29256188e584f27818140ba836a49612cee81e6a Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 8 Mar 2022 11:08:12 +0100
Subject: [PATCH 050/176] Add files via upload
Update of the names space for the validation functions
---
R/POST_FATE.validation.R | 4 ++--
R/UTILS.do_PFG_composition_validation.R | 9 +++++----
R/UTILS.do_habitat_validation.R | 15 ++++++++-------
R/UTILS.get_observed_distribution.R | 5 +++--
R/UTILS.plot_predicted_habitat.R | 11 ++++++-----
R/UTILS.train_RF_habitat.R | 12 +++++++-----
6 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/R/POST_FATE.validation.R b/R/POST_FATE.validation.R
index 326bcea..a5cc134 100644
--- a/R/POST_FATE.validation.R
+++ b/R/POST_FATE.validation.R
@@ -144,7 +144,7 @@
##' @importFrom raster raster projectRaster res crs crop
##' @importFrom utils read.csv write.csv
##' @importFrom sf st_read
-##' @foreach foreach foreach %dopar%
+##' @importFrom foreach foreach %dopar%
##' @importFrom forcats fct_expand
##' @importFrom readr write_rds
##' @importFrom doParallel registerDoParallel
@@ -191,7 +191,7 @@ POST_FATE.validation = function(name.simulation
# CBNA releves data habitat map
releves.PFG<-read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
releves.sites<-st_read(paste0(obs.path, releves.sites))
- hab.obs<-raster(paste0(obs.path, hab.obs))
+ hab.obs = raster(paste0(obs.path, hab.obs))
# Habitat mask at FATE simu resolution
hab.obs.modif <- projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
habitat.FATE.map <- crop(hab.obs.modif, simulation.map) #reprojection and croping of the extended habitat map in order to have a reduced observed habitat map
diff --git a/R/UTILS.do_PFG_composition_validation.R b/R/UTILS.do_PFG_composition_validation.R
index 6c6dea1..72c2284 100644
--- a/R/UTILS.do_PFG_composition_validation.R
+++ b/R/UTILS.do_PFG_composition_validation.R
@@ -48,10 +48,11 @@
##'
##' @export
##'
-##' @importFrom raster raster projectRaster res crs crop extent origin compareRaster
-##' getValues ncell aggregate compareCRS
-##' @importFrom utils read.csv write.csv
##' @importFrom dplyr rename filter group_by mutate %>% select
+##' @importFrom raster raster projectRaster res crs crop extent origin compareRaster
+##' getValues ncell compareCRS levels
+##' @importFrom stats aggregate
+##' @importFrom utils read.csv write.csv
##' @importFrom data.table setDT
##'
### END OF HEADER ##############################################################
@@ -99,7 +100,7 @@ do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version,
}
if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
print("setting origin habitat.FATE.map to match simulation.map")
- origin(habitat.FATE.map)<-origin(simulation.map)
+ raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
}
if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
stop("habitat.FATE.map could not be coerced to match simulation.map")
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index b080596..097265e 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -47,10 +47,11 @@
##'
##' @export
##'
-##' @importFrom raster compareCRS res projectRaster extent crop origin compareRaster
-##' getValues aggregate predict
+##' @importFrom dplyr filter rename group_by %>% mutate rename select
+##' @importFrom raster compareCRS res projectRaster extent crop origin compareRaster
+##' getValues predict levels
+##' @importFrom stats aggregate
##' @importFrom stringr str_sub
-##' @importFrom dplyr select filter rename group_by %>% mutate rename
##' @importFrom foreach foreach %dopar%
##' @importFrom forcats fct_expand
##' @importFrom reshape2 dcast
@@ -97,7 +98,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
print("setting origin habitat.FATE.map to match simulation.map")
- origin(habitat.FATE.map)<-origin(simulation.map)
+ raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
}
if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
stop("habitat.FATE.map could not be coerced to match simulation.map")
@@ -141,8 +142,8 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
habitat.whole.area.df<-habitat.whole.area.df[in.region.pixels,]
habitat.whole.area.df<-subset(habitat.whole.area.df, for.validation!="NA")
- habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
- habitat.whole.area.df<-filter(habitat.whole.area.df,is.element(habitat,RF.model$classes))
+ habitat.whole.area.df<-merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x="code.habitat", by.y="ID")
+ habitat.whole.area.df<-filter(habitat.whole.area.df, is.element(habitat,RF.model$classes))
print(cat("Habitat considered in the prediction exercise: ",c(unique(habitat.whole.area.df$habitat)),"\n",sep="\t"))
@@ -159,7 +160,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
print("processing simulations")
registerDoParallel(detectCores()-2)
- results.simul <- foreach(i=1:length(sim.version),.packages = c("dplyr","forcats","reshape2","randomForest","vcd","caret")) %dopar%{
+ results.simul <- foreach(i=1:length(sim.version)) %dopar%{
########################"
# III.1. Data preparation
diff --git a/R/UTILS.get_observed_distribution.R b/R/UTILS.get_observed_distribution.R
index f7560d6..1fb9f86 100644
--- a/R/UTILS.get_observed_distribution.R
+++ b/R/UTILS.get_observed_distribution.R
@@ -45,8 +45,9 @@
##'
##' @export
##'
-##' @importFrom dplyr filter select filter group_by mutate %>% rename
-##' @importFrom raster aggregate compareCRS res crs
+##' @importFrom dplyr select filter group_by mutate %>% rename
+##' @importFrom raster compareCRS res crs levels
+##' @importFrom stats aggregate
##' @importFrom sf st_transform st_crop
##' @importFrom utils write.csv
##' @importFrom data.table setDT
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index e5903e4..b9b1beb 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -34,13 +34,14 @@
##'
##' @export
##'
-##' @importFrom dplyr select all_of rename
+##' @importFrom dplyr all_of rename select
##' @importFrom utils write.csv
-##' @importFrom raster raster crs extent res ratify writeRaster
+##' @importFrom raster raster crs extent res ratify writeRaster levels
##' @importFrom stats complete.cases
-##' @importFrom ggplot2 ggplot geom_raster coord_equal scale_fill_manual
-##' ggtitle guides theme ggsave
+##' @importFrom ggplot2 ggplot geom_raster coord_equal scale_fill_manual
+##' ggtitle guides theme ggsave guide_legend
##' @importFrom reshape2 melt
+##' @importFrom prettyR Mode
##'
### END OF HEADER ##############################################################
@@ -60,7 +61,7 @@ plot.predicted.habitat<-function(predicted.habitat
}
#compute modal predicted habitat and the proportion of simulations predicting this habitat (for each pixel)
- predicted.habitat$modal.predicted.habitat<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version))),1,Mode)
+ predicted.habitat$modal.predicted.habitat<-apply(dplyr::select(predicted.habitat,sim.version),1,Mode)
predicted.habitat$modal.predicted.habitat[predicted.habitat$modal.predicted.habitat==">1 mode"]<-"ambiguous"
predicted.habitat$confidence<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version),modal.predicted.habitat)),1,FUN=function(x) count.habitat(x))
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index 6276839..ddd4f0e 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -51,13 +51,15 @@
##' @export
##'
##' @importFrom dplyr filter %>% group_by select
-##' @importFrom data.table dcast setDT
-##' @importFrom raster extract aggregate compareCRS
+##' @importFrom stats aggregate
+##' @importFrom reshape2 dcast
+##' @importFrom data.table setDT
+##' @importFrom raster extract compareCRS levels
##' @importFrom sf st_transform st_crop st_write
##' @importFrom randomForest randomForest tuneRF
##' @importFrom caret confusionMatrix
##' @importFrom readr write_rds
-##' @importFrom utils read.csv
+##' @importFrom utils read.csv write.csv
##'
### END OF HEADER ##############################################################
@@ -109,8 +111,8 @@ train.RF.habitat<-function(releves.PFG
###################################
#get sites coordinates
- aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)),aggregated.releves.PFG,by="site")
-
+ aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)), aggregated.releves.PFG,by="site")
+
#get habitat code and name
if(compareCRS(aggregated.releves.PFG,hab.obs)){
aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
From 5e9b5f6127d9f8a5fb0eedc516535cc2bacc3056 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Tue, 8 Mar 2022 11:09:47 +0100
Subject: [PATCH 051/176] Add files via upload
Update of the names space for the validation functions
From 6043db9072dfade3f058b38d71ba0989dcd62f24 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 9 Mar 2022 09:06:36 +0100
Subject: [PATCH 052/176] Update POST_FATE.validation.html
Correction
---
docs/reference/POST_FATE.validation.html | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/docs/reference/POST_FATE.validation.html b/docs/reference/POST_FATE.validation.html
index 450d8ff..e0ae816 100644
--- a/docs/reference/POST_FATE.validation.html
+++ b/docs/reference/POST_FATE.validation.html
@@ -231,10 +231,10 @@
Value
Details
Habitat validation
-
The observed habitat is derived from the cesbio map, the simulated habitat
+
The observed habitat is derived from a map of the area, the simulated habitat
is derived from FATE simulated relative abundance, based on a random forest
-algorithm trained on CBNA data. To compare observations and simulations, the function
-compute confusion matrix between observation and prediction and then compute the TSS
+algorithm trained on observed data. To compare observations and simulations, the function
+computes confusion matrix between observation and prediction and then computes the TSS
for each habitat h (number of prediction of habitat h/number of observation
of habitat h + number of non-prediction of habitat h/number of non-observation
of habitat h). The final metrics this script use is the mean of TSS per habitat over all
From 7e051b56ea79898147f4c9b25603b182dd4869ef Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 9 Mar 2022 09:07:59 +0100
Subject: [PATCH 053/176] Update POST_FATE.validation.R
correction
---
R/POST_FATE.validation.R | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/R/POST_FATE.validation.R b/R/POST_FATE.validation.R
index a5cc134..1bb2ddf 100644
--- a/R/POST_FATE.validation.R
+++ b/R/POST_FATE.validation.R
@@ -49,10 +49,10 @@
##' @details
##'
##' \describe{
-##' \item{Habitat validation}{The observed habitat is derived from the cesbio map, the simulated habitat
+##' \item{Habitat validation}{The observed habitat is derived from a map of the area, the simulated habitat
##' is derived from FATE simulated relative abundance, based on a random forest
-##' algorithm trained on CBNA data. To compare observations and simulations, the function
-##' compute confusion matrix between observation and prediction and then compute the TSS
+##' algorithm trained on observed data. To compare observations and simulations, the function
+##' computes confusion matrix between observation and prediction and then computes the TSS
##' for each habitat h (number of prediction of habitat h/number of observation
##' of habitat h + number of non-prediction of habitat h/number of non-observation
##' of habitat h). The final metrics this script use is the mean of TSS per habitat over all
From 77bc54e1f1693d109f1b026d0c24893dd48d4b56 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 9 Mar 2022 09:09:04 +0100
Subject: [PATCH 054/176] Update POST_FATE.validation.Rd
correction
---
man/POST_FATE.validation.Rd | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/man/POST_FATE.validation.Rd b/man/POST_FATE.validation.Rd
index 6a9e87f..8eed0d9 100644
--- a/man/POST_FATE.validation.Rd
+++ b/man/POST_FATE.validation.Rd
@@ -85,10 +85,10 @@ for a \code{FATE} simulation and computes the difference between observed and si
}
\details{
\describe{
- \item{Habitat validation}{The observed habitat is derived from the cesbio map, the simulated habitat
+ \item{Habitat validation}{The observed habitat is derived from a map of the area, the simulated habitat
is derived from FATE simulated relative abundance, based on a random forest
-algorithm trained on CBNA data. To compare observations and simulations, the function
-compute confusion matrix between observation and prediction and then compute the TSS
+algorithm trained on observed data. To compare observations and simulations, the function
+computes confusion matrix between observation and prediction and then computes the TSS
for each habitat h (number of prediction of habitat h/number of observation
of habitat h + number of non-prediction of habitat h/number of non-observation
of habitat h). The final metrics this script use is the mean of TSS per habitat over all
From fd81ce0b50a90af5d0bc593453159b5d97743495 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 9 Mar 2022 09:20:29 +0100
Subject: [PATCH 055/176] Update PRE_FATE.skeletonDirectory.R
correction
---
R/PRE_FATE.skeletonDirectory.R | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/R/PRE_FATE.skeletonDirectory.R b/R/PRE_FATE.skeletonDirectory.R
index a243359..857a4a1 100644
--- a/R/PRE_FATE.skeletonDirectory.R
+++ b/R/PRE_FATE.skeletonDirectory.R
@@ -71,14 +71,14 @@
##' \item{\code{RESULTS}}{this folder will collect all the results produced by the
##' software with a folder for each simulation}
##' \item{\code{VALIDATION}}{this folder will collect all the validation files produced
-##' by POST_FATE validation functions
+##' by POST_FATE.validation function
##' \describe{
##' \item{\code{HABITAT}}{this folder will collect all the validation files produces
-##' by the function POST_FATE.validation.habitat}
+##' by the function POST_FATE.validation with habitat validation activated}
##' \item{\code{PFG_RICHNESS}}{this folder will collect all the validation files produces
-##' by the function POST_FATE.validation_PFG_richness}
+##' by the function POST_FATE.validation with PFG richness validation activated}
##' \item{\code{PFG_COMPOSITION}}{this folder will collect all the validation files produces
-##' by the function POST_FATE.validation_PFG_composition}
+##' by the function POST_FATE.validation with PFG composition validation activated}
##' }
##' }
##' }
From abf55e2ae7307483bb6b9178b4af03933e7daeb0 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 9 Mar 2022 09:25:33 +0100
Subject: [PATCH 056/176] Add files via upload
correction of documentation
---
man/PRE_FATE.skeletonDirectory.Rd | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/man/PRE_FATE.skeletonDirectory.Rd b/man/PRE_FATE.skeletonDirectory.Rd
index 7bc8802..1e35d48 100644
--- a/man/PRE_FATE.skeletonDirectory.Rd
+++ b/man/PRE_FATE.skeletonDirectory.Rd
@@ -77,10 +77,14 @@ The tree structure is detailed below :
\item{\code{RESULTS}}{this folder will collect all the results produced by the
software with a folder for each simulation}
\item{\code{VALIDATION}}{this folder will collect all the validation files produced
- by POST_FATE validation functions
+ by POST_FATE.validation function
\describe{
\item{\code{HABITAT}}{this folder will collect all the validation files produces
- by the function POST_FATE.validation.habitat}
+ by the function POST_FATE.validation with habitat validation activated}
+ \item{\code{PFG_RICHNESS}}{this folder will collect all the validation files produces
+ by the function POST_FATE.validation with PFG richness validation activated}
+ \item{\code{PFG_COMPOSITION}}{this folder will collect all the validation files produces
+ by the function POST_FATE.validation with PFG composition validation activated}
}
}
}
From 59459c3b34d4446e8a353427bc825312debc4ddf Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Wed, 9 Mar 2022 09:26:15 +0100
Subject: [PATCH 057/176] Add files via upload
correction of documentation
---
docs/reference/PRE_FATE.skeletonDirectory.html | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/docs/reference/PRE_FATE.skeletonDirectory.html b/docs/reference/PRE_FATE.skeletonDirectory.html
index 5b64a49..492b294 100644
--- a/docs/reference/PRE_FATE.skeletonDirectory.html
+++ b/docs/reference/PRE_FATE.skeletonDirectory.html
@@ -235,9 +235,17 @@
Details
VALIDATION
this folder will collect all the validation files produced
- by POST_FATE validation functions
HABITAT
+ by POST_FATE.validation function
HABITAT
this folder will collect all the validation files produces
- by the function POST_FATE.validation.habitat
+ by the function POST_FATE.validation with habitat validation activated
+
+
PFG_RICHNESS
+
this folder will collect all the validation files produces
+ by the function POST_FATE.validation with PFG richness validation activated
+
+
PFG_COMPOSITION
+
this folder will collect all the validation files produces
+ by the function POST_FATE.validation with PFG composition validation activated
From 737f56ce6efdb5554ac8432b9a4caff62ccf8e12 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 18 Mar 2022 14:15:25 +0100
Subject: [PATCH 058/176] Add files via upload
Update of the validation functions and the temporal evolution function with strata definition
---
R/POST_FATE.temporalEvolution.R | 521 ++++++++++++++++--------
R/POST_FATE.validation.R | 170 +++++---
R/UTILS.do_PFG_composition_validation.R | 57 ++-
R/UTILS.do_habitat_validation.R | 65 ++-
R/UTILS.get_observed_distribution.R | 12 +-
R/UTILS.plot_predicted_habitat.R | 4 +-
R/UTILS.train_RF_habitat.R | 4 +-
7 files changed, 577 insertions(+), 256 deletions(-)
diff --git a/R/POST_FATE.temporalEvolution.R b/R/POST_FATE.temporalEvolution.R
index b32c884..c3420df 100644
--- a/R/POST_FATE.temporalEvolution.R
+++ b/R/POST_FATE.temporalEvolution.R
@@ -25,6 +25,9 @@
##' @param opt.no_CPU (\emph{optional}) default \code{1}. \cr The number of
##' resources that can be used to parallelize the \code{unzip/zip} of raster
##' files, as well as the extraction of values from raster files
+##' @param perStrata default \code{FALSE}. \cr If abundance per PFG & per Strata
+##' activated in global parameters, the function saved a temporal evolution file
+##' per PFG & per Strata.
##'
##'
##' @details
@@ -34,14 +37,17 @@
##' preanalytical tables that can then be used to create graphics}. \cr \cr
##'
##' For each PFG and each selected simulation year, raster maps are retrieved
-##' from the results folder \code{ABUND_perPFG_allStrata} and unzipped.
+##' from the results folder \code{ABUND_perPFG_allStrata} and unzipped. If
+##' \code{perStrata} = \code{TRUE}, raster maps are retrieved from the folder
+##' \code{ABUND_perPFG_perStrata} and unzipped.
##' Informations extracted lead to the production of one table before the maps
##' are compressed again :
##'
##' \itemize{
-##' \item{the value of \strong{abundance for each Plant Functional Group}
-##' for each selected simulation year(s) in every pixel in which the PFG is
-##' present for at least one of the selected simulation year(s) \cr \cr
+##' \item{the value of \strong{abundance for each Plant Functional Group}
+##' for each selected simulation year(s) and, if option selected, each height
+##' stratum in every pixel in which the PFG is present for at least one of
+##' the selected simulation year(s) \cr \cr
##' }
##' }
##'
@@ -95,7 +101,7 @@
##' following columns :
##' \describe{
##' \item{\code{PFG}}{concerned plant functional group (for abundance)}
-##' \item{\code{STRATUM}}{concerned height stratum (for LIGHT)}
+##' \item{\code{STRATUM}}{concerned height stratum (for LIGHT & abundance if option selected)}
##' \item{\code{ID.pixel}}{number of the concerned pixel}
##' \item{\code{X, Y}}{coordinates of the concerned pixel}
##' \item{\code{HAB}}{habitat of the concerned pixel}
@@ -121,7 +127,7 @@
##'
##' @export
##'
-##' @importFrom foreach foreach %do% %dopar%
+##' @importFrom foreach foreach %do% %dopar% %:%
##' @importFrom data.table rbindlist fwrite
##' @importFrom raster raster stack
##' rasterToPoints as.data.frame extract cellFromXY
@@ -137,6 +143,7 @@ POST_FATE.temporalEvolution = function(
, no_years
, opt.ras_habitat = NULL
, opt.no_CPU = 1
+ , perStrata = FALSE
){
#############################################################################
@@ -164,8 +171,7 @@ POST_FATE.temporalEvolution = function(
#############################################################################
- res = foreach (abs.simulParam = abs.simulParams) %do%
- {
+ res = foreach (abs.simulParam = abs.simulParams) %do% {
cat("\n+++++++\n")
cat("\n Simulation name : ", name.simulation)
@@ -192,170 +198,319 @@ POST_FATE.temporalEvolution = function(
}
## Get list of arrays and extract years of simulation -------------------
- raster.perPFG.allStrata = .getRasterNames(years = NULL, "allStrata", "ABUND", GLOB_DIR)
- years = sapply(sub("Abund_YEAR_", "", raster.perPFG.allStrata)
- , function(x) strsplit(as.character(x), "_")[[1]][1])
- years = sort(unique(as.numeric(years)))
- years = years[round(seq(1, length(years)
- , length.out = min(no_years, length(years))))]
- no_years = length(years)
+ if(perStrata == FALSE){
+ raster.perPFG.allStrata = .getRasterNames(years = NULL, "allStrata", "ABUND", GLOB_DIR)
+ years = sapply(sub("Abund_YEAR_", "", raster.perPFG.allStrata)
+ , function(x) strsplit(as.character(x), "_")[[1]][1])
+ years = sort(unique(as.numeric(years)))
+ years = years[round(seq(1, length(years)
+ , length.out = min(no_years, length(years))))]
+ no_years = length(years)
+
+ cat("\n Selected years : ", years)
+ cat("\n Number of years : ", no_years)
+ cat("\n")
+
+ } else if(perStrata == TRUE){
+ raster.perPFG.perStrata = .getRasterNames(years = NULL, "perStrata", "ABUND", GLOB_DIR)
+ years = sapply(sub("Abund_YEAR_", "", raster.perPFG.perStrata)
+ , function(x) strsplit(as.character(x), "_")[[1]][1])
+ years = sort(unique(as.numeric(years)))
+ years = years[round(seq(1, length(years)
+ , length.out = min(no_years, length(years))))]
+ no_years = length(years)
+
+ cat("\n Selected years : ", years)
+ cat("\n Number of years : ", no_years)
+ cat("\n")
+ }
- cat("\n Selected years : ", years)
- cat("\n Number of years : ", no_years)
- cat("\n")
## UNZIP the raster saved -----------------------------------------------
- raster.perPFG.allStrata = .getRasterNames(years, "allStrata", "ABUND", GLOB_DIR)
- .unzip(folder_name = GLOB_DIR$dir.output.perPFG.allStrata
- , list_files = raster.perPFG.allStrata
- , no_cores = opt.no_CPU)
- if (GLOB_SIM$doLight){
- .unzip(folder_name = GLOB_DIR$dir.output.light
- , list_files = list.files(path = GLOB_DIR$dir.output.light
- , pattern = paste0("YEAR_", years, "_", collapse = "|"))
+ if(perStrata == FALSE){
+ raster.perPFG.allStrata = .getRasterNames(years, "allStrata", "ABUND", GLOB_DIR)
+ .unzip(folder_name = GLOB_DIR$dir.output.perPFG.allStrata
+ , list_files = raster.perPFG.allStrata
, no_cores = opt.no_CPU)
- }
- if (GLOB_SIM$doSoil){
- .unzip(folder_name = GLOB_DIR$dir.output.soil
- , list_files = list.files(path = GLOB_DIR$dir.output.soil
- , pattern = paste0("YEAR_", years, collapse = "|"))
+ if (GLOB_SIM$doLight){
+ .unzip(folder_name = GLOB_DIR$dir.output.light
+ , list_files = list.files(path = GLOB_DIR$dir.output.light
+ , pattern = paste0("YEAR_", years, "_", collapse = "|"))
+ , no_cores = opt.no_CPU)
+ }
+ if (GLOB_SIM$doSoil){
+ .unzip(folder_name = GLOB_DIR$dir.output.soil
+ , list_files = list.files(path = GLOB_DIR$dir.output.soil
+ , pattern = paste0("YEAR_", years, collapse = "|"))
+ , no_cores = opt.no_CPU)
+ }
+
+ doWriting.abund = TRUE
+ doWriting.light = ifelse(GLOB_SIM$doLight, TRUE, FALSE)
+ doWriting.soil = ifelse(GLOB_SIM$doSoil, TRUE, FALSE)
+
+ }else if(perStrata == TRUE){
+ raster.perPFG.perStrata = .getRasterNames(years, "perStrata", "ABUND", GLOB_DIR)
+ .unzip(folder_name = GLOB_DIR$dir.output.perPFG.perStrata
+ , list_files = raster.perPFG.perStrata
, no_cores = opt.no_CPU)
+ if (GLOB_SIM$doLight){
+ .unzip(folder_name = GLOB_DIR$dir.output.light
+ , list_files = list.files(path = GLOB_DIR$dir.output.light
+ , pattern = paste0("YEAR_", years, "_", collapse = "|"))
+ , no_cores = opt.no_CPU)
+ }
+ if (GLOB_SIM$doSoil){
+ .unzip(folder_name = GLOB_DIR$dir.output.soil
+ , list_files = list.files(path = GLOB_DIR$dir.output.soil
+ , pattern = paste0("YEAR_", years, collapse = "|"))
+ , no_cores = opt.no_CPU)
+ }
+
+ doWriting.abund = TRUE
+ doWriting.light = ifelse(GLOB_SIM$doLight, TRUE, FALSE)
+ doWriting.soil = ifelse(GLOB_SIM$doSoil, TRUE, FALSE)
}
- doWriting.abund = TRUE
- doWriting.light = ifelse(GLOB_SIM$doLight, TRUE, FALSE)
- doWriting.soil = ifelse(GLOB_SIM$doSoil, TRUE, FALSE)
- ## get the data inside the rasters --------------------------------------
- cat("\n ---------- GETTING ABUNDANCE for pfg")
- if (opt.no_CPU > 1)
- {
- if (.getOS() != "windows")
- {
- registerDoParallel(cores = opt.no_CPU)
- } else
+ ## get the data inside the rasters (abundance) --------------------------------------
+
+ if(perStrata == FALSE){
+
+ cat("\n ---------- GETTING ABUNDANCE for pfg")
+ if (opt.no_CPU > 1)
{
- warning("Parallelisation with `foreach` is not available for Windows. Sorry.")
+ if (.getOS() != "windows")
+ {
+ registerDoParallel(cores = opt.no_CPU)
+ } else
+ {
+ warning("Parallelisation with `foreach` is not available for Windows. Sorry.")
+ }
}
- }
- tabAbund.list = foreach (pfg = GLOB_SIM$PFG) %dopar%
- {
- cat(" ", pfg)
- file_name = paste0(GLOB_DIR$dir.output.perPFG.allStrata,
- "Abund_YEAR_",
- years,
- "_",
- pfg,
- "_STRATA_all")
- if (length(which(file.exists(paste0(file_name, ".tif")))) > 0)
- {
- file_name = paste0(file_name, ".tif")
- } else if (length(which(file.exists(paste0(file_name, ".img")))) > 0)
+ tabAbund.list = foreach (pfg = GLOB_SIM$PFG) %dopar%
+ {
+ cat(" ", pfg)
+ file_name = paste0(GLOB_DIR$dir.output.perPFG.allStrata,
+ "Abund_YEAR_",
+ years,
+ "_",
+ pfg,
+ "_STRATA_all")
+ if (length(which(file.exists(paste0(file_name, ".tif")))) > 0)
+ {
+ file_name = paste0(file_name, ".tif")
+ } else if (length(which(file.exists(paste0(file_name, ".img")))) > 0)
+ {
+ file_name = paste0(file_name, ".img")
+ } else if (length(which(file.exists(paste0(file_name, ".asc")))) > 0)
+ {
+ file_name = paste0(file_name, ".asc")
+ }
+
+ ye = years[which(file.exists(file_name))]
+ file_name = file_name[which(file.exists(file_name))]
+
+ if (length(file_name) > 0)
+ {
+ ras = stack(file_name) * GLOB_MASK$ras.mask
+ ras.df = rasterToPoints(ras)
+ ras.df = as.data.frame(ras.df)
+ colnames(ras.df) = c("X", "Y", ye)
+ ID.abund = rowSums(ras.df[, 3:ncol(ras.df), drop = FALSE])
+ ras.df = ras.df[which(ID.abund > 0), , drop = FALSE]
+
+ if (nrow(ras.df) > 0)
+ {
+ ras.df$ID.pixel = cellFromXY(GLOB_MASK$ras.mask, ras.df[, c("X", "Y")])
+ ras.df$PFG = pfg
+
+ if (exists("ras.habitat"))
+ {
+ ras.df$HAB = extract(ras.habitat, ras.df[, c("X", "Y")])
+ } else
+ {
+ ras.df$HAB = "ALL"
+ }
+ ras.df = ras.df[, c("PFG", "ID.pixel", "X", "Y", "HAB", ye)]
+
+ return(ras.df)
+ }
+ }
+ } ## END loop on PFG
+ cat("\n")
+
+ tabAbund = rbindlist(tabAbund.list, fill = TRUE)
+ tabAbund = as.data.frame(tabAbund, stringsAsFactors = FALSE)
+
+ if (nrow(tabAbund) > 0 && ncol(tabAbund) > 0)
{
- file_name = paste0(file_name, ".img")
- } else if (length(which(file.exists(paste0(file_name, ".asc")))) > 0)
+ fwrite(tabAbund
+ , file = paste0(name.simulation
+ , "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv")
+ , row.names = FALSE)
+ } else
{
- file_name = paste0(file_name, ".asc")
+ tabAbund = NA
+ doWriting.abund = FALSE
+ warning("No abundance values were found! Please check.")
}
- ye = years[which(file.exists(file_name))]
- file_name = file_name[which(file.exists(file_name))]
+ }else if(perStrata == TRUE){
- if (length(file_name) > 0)
+ cat("\n ---------- GETTING ABUNDANCE for pfg")
+ if (opt.no_CPU > 1)
{
- ras = stack(file_name) * GLOB_MASK$ras.mask
- ras.df = rasterToPoints(ras)
- ras.df = as.data.frame(ras.df)
- colnames(ras.df) = c("X", "Y", ye)
- ID.abund = rowSums(ras.df[, 3:ncol(ras.df), drop = FALSE])
- ras.df = ras.df[which(ID.abund > 0), , drop = FALSE]
-
- if (nrow(ras.df) > 0)
+ if (.getOS() != "windows")
{
- ras.df$ID.pixel = cellFromXY(GLOB_MASK$ras.mask, ras.df[, c("X", "Y")])
- ras.df$PFG = pfg
-
- if (exists("ras.habitat"))
+ registerDoParallel(cores = opt.no_CPU)
+ } else
+ {
+ warning("Parallelisation with `foreach` is not available for Windows. Sorry.")
+ }
+ }
+ tabAbund.list = foreach (pfg = GLOB_SIM$PFG) %dopar% {
+ no_strata = NULL
+ for(y in years){
+ strata = list.files(GLOB_DIR$dir.output.perPFG.perStrata, pattern = paste0("YEAR_", y , "_", pfg)) # take all the numbers of abundance files for each years
+ no_strata = c(no_strata, length(strata)/2) # divide it by 2 because there are 2 files for each year*strata
+ }
+ no_strata = max(no_strata) # the maximum is taken to have the number of the highest strata for each PFG
+
+ registerDoParallel(cores = opt.no_CPU)
+ tabAbund = foreach (str = 1:no_strata, .combine = "rbind") %dopar% {
+ cat(paste0(" ", pfg, "_", str))
+ file_name = paste0(GLOB_DIR$dir.output.perPFG.perStrata,
+ "Abund_YEAR_",
+ years,
+ "_",
+ pfg,
+ "_STRATA_",
+ str)
+ if (length(which(file.exists(paste0(file_name, ".tif")))) > 0)
+ {
+ file_name = paste0(file_name, ".tif")
+ } else if (length(which(file.exists(paste0(file_name, ".img")))) > 0)
{
- ras.df$HAB = extract(ras.habitat, ras.df[, c("X", "Y")])
- } else
+ file_name = paste0(file_name, ".img")
+ } else if (length(which(file.exists(paste0(file_name, ".asc")))) > 0)
{
- ras.df$HAB = "ALL"
+ file_name = paste0(file_name, ".asc")
}
- ras.df = ras.df[, c("PFG", "ID.pixel", "X", "Y", "HAB", ye)]
- return(ras.df)
+ ye = years[which(file.exists(file_name))]
+ file_name = file_name[which(file.exists(file_name))]
+
+ if (length(file_name) > 0)
+ {
+ ras = stack(file_name) * GLOB_MASK$ras.mask
+ ras.df = rasterToPoints(ras)
+ ras.df = as.data.frame(ras.df)
+ colnames(ras.df) = c("X", "Y", ye)
+ ID.abund = rowSums(ras.df[, 3:ncol(ras.df), drop = FALSE])
+ ras.df = ras.df[which(ID.abund > 0), , drop = FALSE]
+
+ if (nrow(ras.df) > 0)
+ {
+ ras.df$ID.pixel = cellFromXY(GLOB_MASK$ras.mask, ras.df[, c("X", "Y")])
+ ras.df$PFG = pfg
+ ras.df$strata = str
+
+ if (exists("ras.habitat"))
+ {
+ ras.df$HAB = extract(ras.habitat, ras.df[, c("X", "Y")])
+ } else
+ {
+ ras.df$HAB = "ALL"
+ }
+ if(length(setdiff(years,ye)) > 0){
+ missing = data.frame(matrix(0, ncol = length(setdiff(years,ye)), nrow = nrow(ras.df))) # create a new data frame with value 0 for the missing years in the files for each PFG, if files are missing
+ colnames(missing) = setdiff(years,ye)
+ missing$ID.pixel = ras.df$ID.pixel
+ ras.df = merge(ras.df, missing, by = "ID.pixel") # adding of a common column with ras.df and then merge the two data frame
+ }
+ yea = c(ye, setdiff(years,ye))
+ yea = as.character(sort(as.numeric(yea)))
+
+ ras.df = ras.df[, c("PFG", "ID.pixel", "X", "Y", "HAB", yea, "strata")]
+
+ return(ras.df)
+ }
+ }
}
+ return(tabAbund)
+ } ## END loop on PFG
+ cat("\n")
+
+ tabAbund = rbindlist(tabAbund.list, fill = TRUE)
+ tabAbund = as.data.frame(tabAbund, stringsAsFactors = FALSE)
+
+ if (nrow(tabAbund) > 0 && ncol(tabAbund) > 0)
+ {
+ fwrite(tabAbund
+ , file = paste0(name.simulation
+ , "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv")
+ , row.names = FALSE)
+ } else
+ {
+ tabAbund = NA
+ doWriting.abund = FALSE
+ warning("No abundance values were found! Please check.")
}
- } ## END loop on PFG
- cat("\n")
-
- tabAbund = rbindlist(tabAbund.list, fill = TRUE)
- tabAbund = as.data.frame(tabAbund, stringsAsFactors = FALSE)
-
- if (nrow(tabAbund) > 0 && ncol(tabAbund) > 0)
- {
- fwrite(tabAbund
- , file = paste0(name.simulation
- , "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_"
- , basename(GLOB_DIR$dir.save)
- , ".csv")
- , row.names = FALSE)
- } else
- {
- tabAbund = NA
- doWriting.abund = FALSE
- warning("No abundance values were found! Please check.")
}
- ## get the data inside the rasters --------------------------------------
+ ## get the data inside the rasters (light) --------------------------------------
if (GLOB_SIM$doLight)
{
cat("\n ---------- GETTING LIGHT for stratum")
tabLight.list = foreach (stra = c(1:GLOB_SIM$no_STRATA)-1) %dopar%
- {
- cat(" ", stra)
- file_name = paste0(GLOB_DIR$dir.output.light
- , "Light_Resources_YEAR_"
- , years
- , "_STRATA_"
- , stra)
- if (length(which(file.exists(paste0(file_name, ".tif")))) > 0)
- {
- file_name = paste0(file_name, ".tif")
- } else if (length(which(file.exists(paste0(file_name, ".img")))) > 0)
- {
- file_name = paste0(file_name, ".img")
- } else if (length(which(file.exists(paste0(file_name, ".asc")))) > 0)
- {
- file_name = paste0(file_name, ".asc")
- }
- ye = years[which(file.exists(file_name))]
- file_name = file_name[which(file.exists(file_name))]
-
- if (length(file_name) > 0)
{
- ras = stack(file_name) * GLOB_MASK$ras.mask
- ras.df = rasterToPoints(ras)
- ras.df = as.data.frame(ras.df)
- colnames(ras.df) = c("X", "Y", ye)
- ras.df$ID.pixel = cellFromXY(GLOB_MASK$ras.mask, ras.df[, c("X", "Y")])
- ras.df$STRATUM = stra
-
- if (exists("ras.habitat"))
+ cat(" ", stra)
+ file_name = paste0(GLOB_DIR$dir.output.light
+ , "Light_Resources_YEAR_"
+ , years
+ , "_STRATA_"
+ , stra)
+ if (length(which(file.exists(paste0(file_name, ".tif")))) > 0)
{
- ras.df$HAB = extract(ras.habitat, ras.df[, c("X", "Y")])
- } else
+ file_name = paste0(file_name, ".tif")
+ } else if (length(which(file.exists(paste0(file_name, ".img")))) > 0)
{
- ras.df$HAB = "ALL"
+ file_name = paste0(file_name, ".img")
+ } else if (length(which(file.exists(paste0(file_name, ".asc")))) > 0)
+ {
+ file_name = paste0(file_name, ".asc")
}
- ras.df = ras.df[, c("STRATUM", "ID.pixel", "X", "Y", "HAB", ye)]
+ ye = years[which(file.exists(file_name))]
+ file_name = file_name[which(file.exists(file_name))]
- return(ras.df)
- }
- } ## END loop on STRATUM
+ if (length(file_name) > 0)
+ {
+ ras = stack(file_name) * GLOB_MASK$ras.mask
+ ras.df = rasterToPoints(ras)
+ ras.df = as.data.frame(ras.df)
+ colnames(ras.df) = c("X", "Y", ye)
+ ras.df$ID.pixel = cellFromXY(GLOB_MASK$ras.mask, ras.df[, c("X", "Y")])
+ ras.df$STRATUM = stra
+
+ if (exists("ras.habitat"))
+ {
+ ras.df$HAB = extract(ras.habitat, ras.df[, c("X", "Y")])
+ } else
+ {
+ ras.df$HAB = "ALL"
+ }
+ ras.df = ras.df[, c("STRATUM", "ID.pixel", "X", "Y", "HAB", ye)]
+
+ return(ras.df)
+ }
+ } ## END loop on STRATUM
cat("\n")
tabLight = rbindlist(tabLight.list, fill = TRUE)
@@ -381,7 +536,7 @@ POST_FATE.temporalEvolution = function(
} ## END loop for light
- ## get the data inside the rasters --------------------------------------
+ ## get the data inside the rasters (soil) --------------------------------------
if (GLOB_SIM$doSoil)
{
cat("\n ---------- GETTING SOIL")
@@ -444,35 +599,71 @@ POST_FATE.temporalEvolution = function(
## ZIP the raster saved -------------------------------------------------
- .zip_ALL(folder_name = GLOB_DIR$dir.output.perPFG.allStrata, no_cores= opt.no_CPU)
- if (GLOB_SIM$doLight) .zip_ALL(folder_name = GLOB_DIR$dir.output.light, no_cores = opt.no_CPU)
- if (GLOB_SIM$doSoil) .zip_ALL(folder_name = GLOB_DIR$dir.output.soil, no_cores = opt.no_CPU)
-
- cat("\n> Done!\n")
-
- if(doWriting.abund || doWriting.light || doWriting.soil)
- {
- message(paste0("\n The output files \n"
- , ifelse(doWriting.abund
- , paste0(" > POST_FATE_TABLE_PIXEL_evolution_abundance_"
- , basename(GLOB_DIR$dir.save)
- , ".csv \n")
- , "")
- , ifelse(doWriting.light
- , paste0(" > POST_FATE_TABLE_PIXEL_evolution_light_"
- , basename(GLOB_DIR$dir.save)
- , ".csv \n")
- , "")
- , ifelse(doWriting.soil
- , paste0(" > POST_FATE_TABLE_PIXEL_evolution_soil_"
- , basename(GLOB_DIR$dir.save)
- , ".csv \n")
- , "")
- , "have been successfully created !\n"))
+ if(perStrata == FALSE){
+
+ .zip_ALL(folder_name = GLOB_DIR$dir.output.perPFG.allStrata, no_cores= opt.no_CPU)
+ if (GLOB_SIM$doLight) .zip_ALL(folder_name = GLOB_DIR$dir.output.light, no_cores = opt.no_CPU)
+ if (GLOB_SIM$doSoil) .zip_ALL(folder_name = GLOB_DIR$dir.output.soil, no_cores = opt.no_CPU)
+
+ cat("\n> Done!\n")
+
+ if(doWriting.abund || doWriting.light || doWriting.soil)
+ {
+ message(paste0("\n The output files \n"
+ , ifelse(doWriting.abund
+ , paste0(" > POST_FATE_TABLE_PIXEL_evolution_abundance_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv \n")
+ , "")
+ , ifelse(doWriting.light
+ , paste0(" > POST_FATE_TABLE_PIXEL_evolution_light_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv \n")
+ , "")
+ , ifelse(doWriting.soil
+ , paste0(" > POST_FATE_TABLE_PIXEL_evolution_soil_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv \n")
+ , "")
+ , "have been successfully created !\n"))
+
+ return(list(tab.abundance = tabAbund
+ , tab.light = tabLight
+ , tab.soil = tabSoil))
+ }
+
+ }else if(perStrata == TRUE){
+
+ .zip_ALL(folder_name = GLOB_DIR$dir.output.perPFG.perStrata, no_cores= opt.no_CPU)
+ if (GLOB_SIM$doLight) .zip_ALL(folder_name = GLOB_DIR$dir.output.light, no_cores = opt.no_CPU)
+ if (GLOB_SIM$doSoil) .zip_ALL(folder_name = GLOB_DIR$dir.output.soil, no_cores = opt.no_CPU)
+
+ cat("\n> Done!\n")
- return(list(tab.abundance = tabAbund
- , tab.light = tabLight
- , tab.soil = tabSoil))
+ if(doWriting.abund || doWriting.light || doWriting.soil)
+ {
+ message(paste0("\n The output files \n"
+ , ifelse(doWriting.abund
+ , paste0(" > POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv \n")
+ , "")
+ , ifelse(doWriting.light
+ , paste0(" > POST_FATE_TABLE_PIXEL_evolution_light_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv \n")
+ , "")
+ , ifelse(doWriting.soil
+ , paste0(" > POST_FATE_TABLE_PIXEL_evolution_soil_"
+ , basename(GLOB_DIR$dir.save)
+ , ".csv \n")
+ , "")
+ , "have been successfully created !\n"))
+
+ return(list(tab.abundance = tabAbund
+ , tab.light = tabLight
+ , tab.soil = tabSoil))
+ }
}
} ## END loop on abs.simulParams
names(res) = abs.simulParams
diff --git a/R/POST_FATE.validation.R b/R/POST_FATE.validation.R
index 1bb2ddf..86c9daa 100644
--- a/R/POST_FATE.validation.R
+++ b/R/POST_FATE.validation.R
@@ -19,7 +19,9 @@
##' @param name.simulation simulation folder name.
##' @param sim.version name of the simulation to validate (it works with only one \code{sim.version}).
##' @param year year of simulation for validation.
-##' @param doHabitat logical. Default \code{TRUE}. If \code{TRUE}, habitat validation module is activated,
+##' @param perStrata \code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG abundance is defined by strata.
+##' If \code{FALSE}, PFG abundance defined for all strata (habitat & PFG composition & PFG richness validation).
+##' @param doHabitat \code{Logical}. Default \code{TRUE}. If \code{TRUE}, habitat validation module is activated,
##' if \code{FALSE}, habitat validation module is disabled.
##' @param obs.path the function needs observed data, please create a folder for them in your
##' simulation folder and then indicate in this parameter the access path to this new folder (habitat & PFG composition validation).
@@ -33,13 +35,18 @@
##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
##' take into account of all habitats in the \code{hab.obs} map. Otherwise, please specify
##' in a vector habitats that will be take into account for the validation (habitat validation).
-##' @param doComposition logical. Default \code{TRUE}. If \code{TRUE}, PFG composition validation module is activated,
+##' @param list.strata.simulations default \code{NULL}. A character vector which contain \code{FATE}
+##' strata definition and correspondence with observed strata definition.
+##' @param doComposition \code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG composition validation module is activated,
##' if \code{FALSE}, PFG composition validation module is disabled.
##' @param PFG.considered_PFG.compo a character vector of the list of PFG considered
##' in the validation (PFG composition validation).
##' @param habitat.considered_PFG.compo a character vector of the list of habitat(s)
##' considered in the validation (PFG composition validation).
-##' @param doRichness logical. Default \code{TRUE}. If \code{TRUE}, PFG richness validation module is activated,
+##' @param strata.considered_PFG.compo If \code{perStrata} = \code{FALSE}, a character vector with value "A"
+##' (selection of one or several specific strata disabled). If \code{perStrata} = \code{TRUE}, a character
+##' vector with at least one of the observed strata (PFG composition validation).
+##' @param doRichness \code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG richness validation module is activated,
##' if \code{FALSE}, PFG richness validation module is disabled.
##' @param list.PFG a character vector which contain all the PFGs taken account in
##' the simulation and observed in the simulation area (PFG richness validation).
@@ -50,26 +57,27 @@
##'
##' \describe{
##' \item{Habitat validation}{The observed habitat is derived from a map of the area, the simulated habitat
-##' is derived from FATE simulated relative abundance, based on a random forest
-##' algorithm trained on observed data. To compare observations and simulations, the function
-##' computes confusion matrix between observation and prediction and then computes the TSS
-##' for each habitat h (number of prediction of habitat h/number of observation
-##' of habitat h + number of non-prediction of habitat h/number of non-observation
-##' of habitat h). The final metrics this script use is the mean of TSS per habitat over all
-##' habitats, weighted by the share of each habitat in the observed habitat distribution.}
-##' \item{PFG composition validation}{This code firstly run the \code{get.observed.distribution} function in order to have
-##' a \code{obs.distri} file which contain the observed distribution per PFG, strata and habitat.
-##' This file is also an argument for the \code{do.PFG.composition.validation} function run next.
-##' This second sub function provide the computation of distance between observed and simulated distribution. \cr
-##' NB : The argument \code{strata.considered_PFG.compo} is by default "A" in the 2 sub functions because
-##' it's easier for a \code{FATE} simulation to provide PFG abundances for all strata. \cr The argument
-##' \code{perStrata.compo} is by default \code{NULL} for the same reasons.}
+##' is derived from \code{FATE} simulated relative abundance, based on a random forest
+##' algorithm trained on observed releves data (see \code{\link{train.RF.habitat}}) \cr
+##' To compare observations and simulations, the function computes confusion matrix between
+##' observations and predictions and then compute the TSS for each habitat h
+##' (number of prediction of habitat h/number of observation of habitat h + number of non-prediction
+##' of habitat h/number of non-observation of habitat h). The final metrics this script use is the
+##' mean of TSS per habitat over all habitats, weighted by the share of each habitat in the observed
+##' habitat distribution. The habitat validation also provides a visual comparison of observed and
+##' simulated habitat on the whole studied area (see \code{\link{do.habitat.validation}} &
+##' \code{\link{plot.predicted.habitat}}).} \cr
+##' \item{PFG composition validation}{This code firstly run the \code{get.observed.distribution}
+##' function in order to have a \code{obs.distri} file which contain the observed distribution
+##' per PFG, strata and habitat. This file is also an argument for the \code{do.PFG.composition.validation}
+##' function run next. This second sub function provides the computation of distance between observed
+##' and simulated distribution.}
##' \item{PFG richness validation}{Firstly, the function updates the \code{list.PFG} with \code{exclude.PFG} vector.
-##' Then, the script takes the abundance per PFG file from the results of the \code{FATE}
-##' simulation and computes the difference between the \code{list.PFG} and all the PFG
-##' which are presents in the abundance file, in order to obtain the PFG richness for a simulation.
-##' The function also determine if an observed PFG is missing in the results of the simulation at
-##' a specific year.}
+##' Then, the script takes the abundance per PFG (and per strata if option selected) file from the
+##' results of the \code{FATE} simulation and computes the difference between the \code{list.PFG}
+##' and all the PFG which are presents in the abundance file, in order to obtain the PFG richness
+##' for a simulation. The function also determine if an observed PFG is missing in the results of the
+##' simulation at a specific year.}
##' }
##'
##' @return
@@ -84,7 +92,7 @@
##' \describe{
##' \item{\file{VALIDATION/PFG_COMPOSITION/sim.version}}{1 .csv file which contain the proximity
##' between observed and simulated data computed for each PFG/strata/habitat. \cr 1 .csv file which
-##' contain the observed relevés transformed into relative metrics. \cr 1 .csv file which contain
+##' contain the observed releves transformed into relative metrics. \cr 1 .csv file which contain
##' the final output with the distribution per PFG, strata and habitat.}
##' }
##' \describe{
@@ -96,9 +104,11 @@
##' @examples
##'
##' ## Habitat validation ---------------------------------------------------------------------------------
+##' list.strata.simulations = list(S = c(1,2,3), M = c(4), B = c(5,6,7))
##' POST_FATE.validation(name.simulation = "FATE_Champsaur"
##' , sim.version = "SIMUL_V4.1"
##' , year = 2000
+##' , perStrata = TRUE
##' , doHabitat = TRUE
##' , obs.path = "FATE_Champsaur/DATA_OBS/"
##' , releves.PFG = "releves.PFG.abundance.csv"
@@ -106,15 +116,19 @@
##' , hab.obs = "simplified.cesbio.map.grd"
##' , validation.mask = "certain.habitat.100m.restricted.grd"
##' , studied.habitat = NULL
+##' , list.strata.simulations = list.strata.simulations
##' , doComposition = FALSE
##' , doRichness = FALSE)
##'
##' ## PFG composition validation --------------------------------------------------------------------------
-##' list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+##' list.strata.simulations = list(S = c(1,2,3), M = c(4), B = c(5,6,7))
+##' list.PFG = as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
##' habitat.considered = c("coniferous.forest", "deciduous.forest", "natural.grassland", "woody.heatland")
+##' strata.considered_PFG.compo = c("S", "M", "B")
##' POST_FATE.validation(name.simulation = "FATE_Champsaur"
##' , sim.version = "SIMUL_V4.1"
##' , year = 2000
+##' , perStrata = TRUE
##' , doHabitat = FALSE
##' , obs.path = "FATE_Champsaur/DATA_OBS/"
##' , releves.PFG = "releves.PFG.abundance.csv"
@@ -122,16 +136,19 @@
##' , hab.obs = "simplified.cesbio.map.grd"
##' , validation.mask = "certain.habitat.100m.restricted.grd"
##' , studied.habitat = NULL
+##' , list.strata.simulations = list.strata.simulations
##' , doComposition = TRUE
##' , PFG.considered_PFG.compo = list.PFG
##' , habitat.considered_PFG.compo = habitat.considered
+##' , strata.considered_PFG.compo = strata.considered_PFG.compo
##' , doRichness = FALSE)
##'
##' ## PFG richness validation -----------------------------------------------------------------------------
-##' list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
-##' POST_FATE.validation(name.simulation = "FATE_CHampsaur"
+##' list.PFG = as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+##' POST_FATE.validation(name.simulation = "FATE_Champsaur"
##' , sim.version = "SIMUL_V4.1"
##' , year = 2000
+##' , perStrata = TRUE
##' , doHabitat = FALSE
##' , doComposition = FALSE
##' , doRichness = TRUE
@@ -156,6 +173,7 @@
POST_FATE.validation = function(name.simulation
, sim.version
, year
+ , perStrata = TRUE
, doHabitat = TRUE
, obs.path
, releves.PFG
@@ -163,15 +181,21 @@ POST_FATE.validation = function(name.simulation
, hab.obs
, validation.mask
, studied.habitat = NULL
+ , list.strata.simulations = NULL
, doComposition = TRUE
, PFG.considered_PFG.compo
, habitat.considered_PFG.compo
+ , strata.considered_PFG.compo
, doRichness = TRUE
, list.PFG
, exclude.PFG = NULL){
if(doHabitat == TRUE){
+ cat("\n\n #------------------------------------------------------------#")
+ cat("\n # HABITAT VALIDATION")
+ cat("\n #------------------------------------------------------------# \n")
+
## GLOBAL PARAMETERS
dir.create(file.path(name.simulation, "VALIDATION", "HABITAT", sim.version), showWarnings = FALSE)
@@ -179,6 +203,7 @@ POST_FATE.validation = function(name.simulation
# General
output.path = paste0(name.simulation, "/VALIDATION")
year = year # choice in the year for validation
+ perStrata = perStrata
# Useful elements to extract from the simulation
name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
@@ -189,8 +214,17 @@ POST_FATE.validation = function(name.simulation
# For habitat validation
# CBNA releves data habitat map
- releves.PFG<-read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
- releves.sites<-st_read(paste0(obs.path, releves.sites))
+ releves.PFG = read.csv(paste0(obs.path,releves.PFG),header=T,stringsAsFactors = T)
+
+ if(perStrata==TRUE){
+ list.strata.releves = as.character(unique(releves.PFG$strata))
+ list.strata.simulations = list.strata.simulations
+ }else {
+ list.strata.releves = NULL
+ list.strata.simulations = NULL
+ }
+
+ releves.sites = st_read(paste0(obs.path, releves.sites))
hab.obs = raster(paste0(obs.path, hab.obs))
# Habitat mask at FATE simu resolution
hab.obs.modif <- projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
@@ -201,7 +235,7 @@ POST_FATE.validation = function(name.simulation
if(is.null(studied.habitat)){
studied.habitat = studied.habitat #if null, the function will study all the habitats in the map
} else if(is.character(studied.habitat)){
- studied.habitat = studied.habitat #if a character vector with habitat names, the functuon will study only the habitats in the vector
+ studied.habitat = studied.habitat #if a character vector with habitat names, the function will study only the habitats in the vector
} else{
stop("studied.habitat is not a vector of character")
}
@@ -219,7 +253,7 @@ POST_FATE.validation = function(name.simulation
, studied.habitat = studied.habitat
, RF.param = RF.param
, output.path = output.path
- , perStrata = F
+ , perStrata = perStrata
, sim.version = sim.version)
## USE THE RF MODEL TO VALIDATE FATE OUTPUT
@@ -232,9 +266,11 @@ POST_FATE.validation = function(name.simulation
, predict.all.map = predict.all.map
, sim.version = sim.version
, name.simulation = name.simulation
- , perStrata = F
+ , perStrata = perStrata
, hab.obs = hab.obs
- , year = year)
+ , year = year
+ , list.strata.releves = list.strata.releves
+ , list.strata.simulations = list.strata.simulations)
## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
@@ -254,16 +290,29 @@ POST_FATE.validation = function(name.simulation
if(doComposition == TRUE){
+ cat("\n\n #------------------------------------------------------------#")
+ cat("\n # PFG COMPOSITION VALIDATION")
+ cat("\n #------------------------------------------------------------# \n")
+
## GLOBAL PARAMETERS
if(doHabitat == FALSE){
+
+ perStrata = perStrata
- # Get observed distribution
- releves.PFG = read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
- releves.sites = st_read(paste0(obs.path, releves.sites))
- hab.obs = raster(paste0(obs.path, hab.obs))
- # Do PFG composition validation
- validation.mask = raster(paste0(obs.path, validation.mask))
+ # Get observed distribution
+ releves.PFG = read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
+ releves.sites = st_read(paste0(obs.path, releves.sites))
+ hab.obs = raster(paste0(obs.path, hab.obs))
+ # Do PFG composition validation
+ validation.mask = raster(paste0(obs.path, validation.mask))
+ if(perStrata==TRUE){
+ list.strata.releves = as.character(unique(releves.PFG$strata))
+ list.strata.simulations = list.strata.simulations
+ }else {
+ list.strata.releves = NULL
+ list.strata.simulations = NULL
+ }
}
## GET OBSERVED DISTRIBUTION
@@ -274,9 +323,9 @@ POST_FATE.validation = function(name.simulation
, releves.sites = releves.sites
, hab.obs = hab.obs
, PFG.considered_PFG.compo = PFG.considered_PFG.compo
- , strata.considered_PFG.compo = "A"
+ , strata.considered_PFG.compo = strata.considered_PFG.compo
, habitat.considered_PFG.compo = habitat.considered_PFG.compo
- , perStrata.compo = FALSE
+ , perStrata = perStrata
, sim.version = sim.version)
## DO PFG COMPOSITION VALIDATION
@@ -286,20 +335,25 @@ POST_FATE.validation = function(name.simulation
, sim.version = sim.version
, hab.obs = hab.obs
, PFG.considered_PFG.compo = PFG.considered_PFG.compo
- , strata.considered_PFG.compo = "A"
+ , strata.considered_PFG.compo = strata.considered_PFG.compo
, habitat.considered_PFG.compo = habitat.considered_PFG.compo
, observed.distribution = obs.distri
- , perStrata.compo = FALSE
+ , perStrata = perStrata
, validation.mask = validation.mask
- , year = year)
+ , year = year
+ , list.strata.simulations = list.strata.simulations
+ , list.strata.releves = list.strata.releves)
}
if(doRichness == TRUE){
- output.path = paste0(name.simulation, "/VALIDATION/PFG_RICHNESS/", sim.version)
+ cat("\n\n #------------------------------------------------------------#")
+ cat("\n # PFG RICHNESS VALIDATION")
+ cat("\n #------------------------------------------------------------# \n")
- #exclude PFG : character vector containing the names of the PFG you want to exclude from the analysis #optional
+ output.path = paste0(name.simulation, "/VALIDATION/PFG_RICHNESS/", sim.version)
+ perStrata = perStrata
#list of PFG of interest
list.PFG<-setdiff(list.PFG,exclude.PFG)
@@ -307,9 +361,19 @@ POST_FATE.validation = function(name.simulation
registerDoParallel(detectCores()-2)
dying.PFG.list<-foreach(i=1:length(sim.version)) %dopar% {
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ if(perStrata==F){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+
+ } else if(perStrata==T){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+
+ }
return(setdiff(list.PFG,unique(simu_PFG$PFG)))
}
@@ -337,10 +401,12 @@ POST_FATE.validation = function(name.simulation
}
- cat("\n ---------- END OF FUNCTION \n")
+ cat("\n\n #------------------------------------------------------------#")
+ cat("\n # RESULTS : ")
+ cat("\n #------------------------------------------------------------# \n")
if(doRichness == TRUE){
- cat("\n ---------- PFG RICHNESS VALIDATION RESULTS \n")
+ cat("\n ---------- PFG RICHNESS : \n")
cat(paste0("\n Richness at year ", year, " : ", output[[1]][2], "\n"))
cat(paste0("\n Number of PFG extinction at year ", year, " : ", sum(output[[2]]), "\n"))
} else{cat("\n ---------- PFG RICHNESS VALIDATION DISABLED \n")
@@ -349,14 +415,14 @@ POST_FATE.validation = function(name.simulation
hab.pred = read.csv(paste0(name.simulation, "/VALIDATION/HABITAT/", sim.version, "/hab.pred.csv"))
failure = as.numeric((table(hab.pred$prediction.code)[1]/sum(table(hab.pred$prediction.code)))*100)
success = as.numeric((table(hab.pred$prediction.code)[2]/sum(table(hab.pred$prediction.code)))*100)
- cat("\n ---------- HABITAT VALIDATION RESULTS \n")
+ cat("\n ---------- HABITAT : \n")
cat(paste0("\n", round(failure, digits = 2), "% of habitats are not correctly predicted by ", sim.version, " \n"))
cat(paste0("\n", round(success, digits = 2), "% of habitats are correctly predicted by ", sim.version, " \n"))
plot(prediction.map)
} else{cat("\n ---------- HABITAT VALIDATION DISABLED \n")
}
if(doComposition == TRUE){
- cat("\n ---------- PFG COMPOSITION VALIDATION RESULTS \n")
+ cat("\n ---------- PFG COMPOSITION : \n")
return(performance.composition)
} else{cat("\n ---------- PFG COMPOSITION VALIDATION DISABLED \n")
}
diff --git a/R/UTILS.do_PFG_composition_validation.R b/R/UTILS.do_PFG_composition_validation.R
index 72c2284..6cb0c20 100644
--- a/R/UTILS.do_PFG_composition_validation.R
+++ b/R/UTILS.do_PFG_composition_validation.R
@@ -28,13 +28,18 @@
##' @param validation.mask file which contain a raster mask that specified
##' which pixels need validation.
##' @param year year of simulation to validate.
+##' @param list.strata.simulations a character vector which contain \code{FATE}
+##' strata definition and correspondence with observed strata definition.
+##' @param list.strata.releves a character vector which contain the observed strata
+##' definition, extracted from observed PFG releves.
##'
##' @details
##'
##' After preliminary checks, this code extract observed habitat from the \code{hab.obs}
-##' map and, then, merge it with the simulated PFG abundance file from results of a \code{FATE}
-##' simulation. After filtration of the required PFG, strata and habitats, the function
-##' transform the data into relative metrics and, then, compute distribution per PFG, strata
+##' map and, then, merge it with the simulated PFG abundance file (with or without strata definition)
+##' from results of the \code{FATE} simulation selected with \code{sim.version}.
+##' After filtration of the required PFG, strata and habitats, the function transforms
+##' the data into relative metrics and, then, compute distribution per PFG, strata
##' and habitat (if necessary). Finally, the code computes proximity between observed
##' and simulated data, per PFG, strata and habitat.
##'
@@ -54,11 +59,14 @@
##' @importFrom stats aggregate
##' @importFrom utils read.csv write.csv
##' @importFrom data.table setDT
+##' @importFrom tidyselect all_of
##'
### END OF HEADER ##############################################################
-do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version, hab.obs, PFG.considered_PFG.compo, strata.considered_PFG.compo, habitat.considered_PFG.compo, observed.distribution, perStrata.compo, validation.mask, year){
+do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version, hab.obs, PFG.considered_PFG.compo, strata.considered_PFG.compo, habitat.considered_PFG.compo, observed.distribution, perStrata, validation.mask, year, list.strata.simulations, list.strata.releves){
+
+ cat("\n ---------- PFG COMPOSITION VALIDATION \n")
output.path = paste0(name.simulation, "/VALIDATION/PFG_COMPOSITION/", sim.version)
name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
@@ -80,12 +88,19 @@ do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version,
############################
#check if strata definition used in the RF model is the same as the one used to analyze FATE output
- if(perStrata.compo==F){
- list.strata<-"all"
- }else{
- stop("check 'perStrata' parameter and/or the names of strata in param$list.strata.releves & param$list.strata.simul")
+ if(perStrata==T){
+ if(all(base::intersect(names(list.strata.simulations), list.strata.releves)==names(list.strata.simulations))){
+ list.strata = names(list.strata.simulations)
+ print("strata definition OK")
+ }else {
+ stop("wrong strata definition")
+ }
+ }else if(perStrata==F){
+ list.strata = "all"
+ }else {
+ stop("check 'perStrata' parameter and/or the names of strata in list.strata.releves & list.strata.simulations")
}
-
+
#consistency between habitat.FATE.map and simulation.map
if(!compareCRS(simulation.map,habitat.FATE.map)){
print("reprojecting habitat.FATE.map to match simulation.map crs")
@@ -145,22 +160,36 @@ do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version,
print("processing simulations")
results.simul<-list()
- for(i in 1:length(sim.version)) {
+ for(i in 1:length(all_of(sim.version))) {
# 3.1. Data preparation
#########################
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ if(perStrata==F){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+
+ }else if(perStrata==T){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+ }
#aggregate per strata group with the correspondence provided in input
simu_PFG$new.strata<-NA
#attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
- if(perStrata.compo==F){
+ if(perStrata==F){
simu_PFG$new.strata<-"A"
+ }else if(perStrata==T){
+ for(p in 1:length(list.strata.simulations)){
+ simu_PFG$new.strata[is.element(simu_PFG$strata,list.strata.simulations[[p]])] = names(list.strata.simulations)[p]
+ }
+ simu_PFG$strata = NULL
}
simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index 097265e..95612fa 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -20,29 +20,33 @@
##' @param validation.mask a raster mask that specified which pixels need validation.
##' @param simulation.map a raster map of the whole studied area use to check
##' the consistency between simulation map and the observed habitat map.
-##' @param predict.all.map a TRUE/FALSE vector. If TRUE, the script will predict
+##' @param predict.all.map \code{Logical}. If TRUE, the script will predict
##' habitat for the whole map.
##' @param sim.version name of the simulation to validate.
##' @param name.simulation simulation folder name.
-##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+##' @param perStrata \code{Logical}. If TRUE, the PFG abundance is defined
##' by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
##' @param hab.obs a raster map of the observed habitat in the
##' extended studied area.
-##' @param year year of simulation for validation.
+##' @param year simulation year selected for validation.
+##' @param list.strata.releves a character vector which contain the observed strata
+##' definition, extracted from observed PFG releves.
+##' @param list.strata.simulations a character vector which contain \code{FATE}
+##' strata definition and correspondence with observed strata definition.
##'
##' @details
##'
##' After several preliminary checks, the function is going to prepare the observations
-##' database by extracting the observed habitat from a raster map. Then, for each
-##' simulations (sim.version), the script take the evolution abundance for each PFG
-##' and all strata file and predict the habitat for the whole map (if option selected)
-##' thanks to the RF model. Finally, the function computes habitat performance based on
-##' TSS for each habitat.
+##' database by extracting the observed habitat from a raster map. Then, for the
+##' simulation \code{sim.version}, the script take the evolution abundance for each PFG
+##' and all strata (or for each PFG & each strata if option selected) file and predict
+##' the habitat for the whole map (if option selected) thanks to the RF model.
+##' Finally, the function computes habitat performance based on TSS for each habitat.
##'
##' @return
##'
-##' Habitat performance file
-##' If option selected, the function returns an habitat prediction file with
+##' Habitat performance file. \cr
+##' If option selected, the function also returns an habitat prediction file with
##' observed and simulated habitat for each pixel of the whole map.
##'
##' @export
@@ -59,11 +63,14 @@
##' @importFrom utils write.csv
##' @importFrom doParallel registerDoParallel
##' @importFrom parallel detectCores
+##' @importFrom tidyselect all_of
##'
### END OF HEADER ##############################################################
-do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs, year) {
+do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs, year, list.strata.releves, list.strata.simulations) {
+
+ cat("\n ---------- FATE OUTPUT ANALYSIS \n")
#notes
# we prepare the relevé data in this function, but in fact we could provide them directly if we adjust the code
@@ -73,10 +80,17 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
###########################
#check if strata definition used in the RF model is the same as the one used to analyze FATE output
- if(perStrata==F){
+ if(perStrata==T){
+ if(all(base::intersect(names(list.strata.simulations), list.strata.releves)==names(list.strata.simulations))){
+ list.strata = names(list.strata.simulations)
+ print("strata definition OK")
+ }else {
+ stop("wrong strata definition")
+ }
+ }else if(perStrata==F){
list.strata<-"all"
}else{
- stop("check 'perStrata' parameter and/or the names of strata in param$list.strata.releves & param$list.strata.simul")
+ stop("check 'perStrata' parameter and/or the names of strata in list.strata.releves & list.strata.simulation")
}
#initial consistency between habitat.FATE.map and validation.mask (do it before the adjustement of habitat.FATE.map)
@@ -160,17 +174,27 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
print("processing simulations")
registerDoParallel(detectCores()-2)
- results.simul <- foreach(i=1:length(sim.version)) %dopar%{
+ results.simul <- foreach(i=1:length(all_of(sim.version))) %dopar%{
########################"
# III.1. Data preparation
#########################
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
- #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ if(perStrata==F){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
+ #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+
+ } else if(perStrata==T){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+
+ }
#aggregate per strata group with the correspondance provided in input
simu_PFG$new.strata<-NA
@@ -178,6 +202,11 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
if(perStrata==F){
simu_PFG$new.strata<-"A"
+ }else if(perStrata==T){
+ for(i in 1:length(list.strata.simulations)){
+ simu_PFG$new.strata[is.element(simu_PFG$strata, list.strata.simulations[[i]])] = names(list.strata.simulations)[i]
+ }
+ simu_PFG$strata = NULL
}
simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
diff --git a/R/UTILS.get_observed_distribution.R b/R/UTILS.get_observed_distribution.R
index 1fb9f86..6a64f03 100644
--- a/R/UTILS.get_observed_distribution.R
+++ b/R/UTILS.get_observed_distribution.R
@@ -23,7 +23,7 @@
##' strata considered in the validation.
##' @param habitat.considered_PFG.compo a character vector of the list of habitat(s)
##' considered in the validation.
-##' @param perStrata.compo Logical. All strata together (FALSE) or per strata (TRUE).
+##' @param perStrata \code{Logical}. All strata together (FALSE) or per strata (TRUE).
##' @param sim.version name of the simulation we want to validate (it works with
##' only one \code{sim.version}).
##'
@@ -63,9 +63,11 @@ get.observed.distribution<-function(name.simulation
, PFG.considered_PFG.compo
, strata.considered_PFG.compo
, habitat.considered_PFG.compo
- , perStrata.compo
+ , perStrata
, sim.version){
+ cat("\n ---------- GET OBSERVED DISTRIBUTION \n")
+
composition.mask = NULL
output.path = paste0(name.simulation, "/VALIDATION/PFG_COMPOSITION/", sim.version)
dir.create(file.path(output.path), recursive = TRUE, showWarnings = FALSE)
@@ -79,9 +81,9 @@ get.observed.distribution<-function(name.simulation
#transformation into coverage percentage
releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
- if(perStrata.compo==T){
+ if(perStrata==T){
aggregated.releves.PFG<-aggregate(coverage~site+PFG+strata,data=releves.PFG,FUN="sum")
- }else if(perStrata.compo==F){
+ }else if(perStrata==F){
aggregated.releves.PFG<-aggregate(coverage~site+PFG,data=releves.PFG,FUN="sum")
aggregated.releves.PFG$strata<-"A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
}
@@ -154,7 +156,7 @@ get.observed.distribution<-function(name.simulation
distribution<-setDT(aggregated.releves.PFG)[, quantile(relative.metric), by=c("PFG","habitat","strata")]
distribution<-rename(distribution,"quantile"="V1")
- distribution<-data.frame(distribution,rank=seq(0,5,1)) #to be able to sort on quantile
+ distribution<-data.frame(distribution,rank=seq(0,4,1)) #to be able to sort on quantile
# 7. Add the missing PFG*habitat*strata
#final distribution is the distribution once the missing combination have been added. For these combination, all quantiles are set to 0
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index b9b1beb..7e3e65e 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -22,7 +22,7 @@
##'
##' @details
##'
-##' The function determine true/false prediction ('failure' if false, 'success' if true)
+##' The function determines true/false prediction ('failure' if false, 'success' if true)
##' and prepare a dataframe containing color and habitat code. Then, the script merge
##' the prediction dataframe with the color and code habitat dataframe. Finally,
##' the function draw a raster map and a plot of prediction habitat over it thanks
@@ -53,6 +53,8 @@ plot.predicted.habitat<-function(predicted.habitat
, sim.version)
{
+ cat("\n ---------- AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT \n")
+
#auxiliary function to compute the proportion of simulations lead to the modal prediction
count.habitat<-function(df){
index<-which(names(df)=="modal.predicted.habitat")
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index ddd4f0e..34e159c 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -28,7 +28,7 @@
##' the prediction error.
##' @param output.path access path to the for the folder where output files
##' will be created.
-##' @param perStrata a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+##' @param perStrata \code{Logical}. If TRUE, the PFG abundance is defined
##' by strata in each site. If FALSE, PFG abundance is defined for all strata.
##' @param sim.version name of the simulation we want to validate.
##'
@@ -75,6 +75,8 @@ train.RF.habitat<-function(releves.PFG
, sim.version)
{
+ cat("\n ---------- TRAIN A RANDOM FOREST MODEL ON OBSERVED DATA \n")
+
#1. Compute relative abundance metric
#########################################
From b38567fffab8a3f47b21a7dae0fd6e34115e10be Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 18 Mar 2022 14:21:55 +0100
Subject: [PATCH 059/176] Add files via upload
Update of the validation functions and the temporal evolution function with strata definition
---
man/POST_FATE.temporalEvolution.Rd | 20 +++++---
man/POST_FATE.validation.Rd | 73 ++++++++++++++++++----------
man/do.PFG.composition.validation.Rd | 25 +++++++---
man/do.habitat.validation.Rd | 30 +++++++-----
man/get.observed.distribution.Rd | 4 +-
man/plot.predicted.habitat.Rd | 2 +-
man/train.RF.habitat.Rd | 4 +-
7 files changed, 103 insertions(+), 55 deletions(-)
diff --git a/man/POST_FATE.temporalEvolution.Rd b/man/POST_FATE.temporalEvolution.Rd
index 0dfd3c2..97589a3 100644
--- a/man/POST_FATE.temporalEvolution.Rd
+++ b/man/POST_FATE.temporalEvolution.Rd
@@ -10,7 +10,8 @@ POST_FATE.temporalEvolution(
file.simulParam = NULL,
no_years,
opt.ras_habitat = NULL,
- opt.no_CPU = 1
+ opt.no_CPU = 1,
+ perStrata = FALSE
)
}
\arguments{
@@ -31,13 +32,17 @@ A \code{string} corresponding to the file name of a raster mask, with an
\item{opt.no_CPU}{(\emph{optional}) default \code{1}. \cr The number of
resources that can be used to parallelize the \code{unzip/zip} of raster
files, as well as the extraction of values from raster files}
+
+\item{perStrata}{default \code{FALSE}. \cr If abundance per PFG & per Strata
+activated in global parameters, the function saved a temporal evolution file
+per PFG & per Strata.}
}
\value{
A \code{list} containing three \code{data.frame} objects with the
following columns :
\describe{
\item{\code{PFG}}{concerned plant functional group (for abundance)}
- \item{\code{STRATUM}}{concerned height stratum (for LIGHT)}
+ \item{\code{STRATUM}}{concerned height stratum (for LIGHT & abundance if option selected)}
\item{\code{ID.pixel}}{number of the concerned pixel}
\item{\code{X, Y}}{coordinates of the concerned pixel}
\item{\code{HAB}}{habitat of the concerned pixel}
@@ -64,14 +69,17 @@ a specific parameter file within this simulation, \strong{one to three
preanalytical tables that can then be used to create graphics}. \cr \cr
For each PFG and each selected simulation year, raster maps are retrieved
-from the results folder \code{ABUND_perPFG_allStrata} and unzipped.
+from the results folder \code{ABUND_perPFG_allStrata} and unzipped. If
+\code{perStrata} = \code{TRUE}, raster maps are retrieved from the folder
+\code{ABUND_perPFG_perStrata} and unzipped.
Informations extracted lead to the production of one table before the maps
are compressed again :
\itemize{
- \item{the value of \strong{abundance for each Plant Functional Group}
- for each selected simulation year(s) in every pixel in which the PFG is
- present for at least one of the selected simulation year(s) \cr \cr
+ \item{the value of \strong{abundance for each Plant Functional Group}
+ for each selected simulation year(s) and, if option selected, each height
+ stratum in every pixel in which the PFG is present for at least one of
+ the selected simulation year(s) \cr \cr
}
}
diff --git a/man/POST_FATE.validation.Rd b/man/POST_FATE.validation.Rd
index 8eed0d9..ad92f2d 100644
--- a/man/POST_FATE.validation.Rd
+++ b/man/POST_FATE.validation.Rd
@@ -8,6 +8,7 @@ POST_FATE.validation(
name.simulation,
sim.version,
year,
+ perStrata = TRUE,
doHabitat = TRUE,
obs.path,
releves.PFG,
@@ -15,9 +16,11 @@ POST_FATE.validation(
hab.obs,
validation.mask,
studied.habitat = NULL,
+ list.strata.simulations = NULL,
doComposition = TRUE,
PFG.considered_PFG.compo,
habitat.considered_PFG.compo,
+ strata.considered_PFG.compo,
doRichness = TRUE,
list.PFG,
exclude.PFG = NULL
@@ -30,7 +33,10 @@ POST_FATE.validation(
\item{year}{year of simulation for validation.}
-\item{doHabitat}{logical. Default \code{TRUE}. If \code{TRUE}, habitat validation module is activated,
+\item{perStrata}{\code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG abundance is defined by strata.
+If \code{FALSE}, PFG abundance defined for all strata (habitat & PFG composition & PFG richness validation).}
+
+\item{doHabitat}{\code{Logical}. Default \code{TRUE}. If \code{TRUE}, habitat validation module is activated,
if \code{FALSE}, habitat validation module is disabled.}
\item{obs.path}{the function needs observed data, please create a folder for them in your
@@ -48,7 +54,10 @@ and each PFG and strata (habitat & PFG composition validation).}
take into account of all habitats in the \code{hab.obs} map. Otherwise, please specify
in a vector habitats that will be take into account for the validation (habitat validation).}
-\item{doComposition}{logical. Default \code{TRUE}. If \code{TRUE}, PFG composition validation module is activated,
+\item{list.strata.simulations}{default \code{NULL}. A character vector which contain \code{FATE}
+strata definition and correspondence with observed strata definition.}
+
+\item{doComposition}{\code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG composition validation module is activated,
if \code{FALSE}, PFG composition validation module is disabled.}
\item{PFG.considered_PFG.compo}{a character vector of the list of PFG considered
@@ -57,7 +66,11 @@ in the validation (PFG composition validation).}
\item{habitat.considered_PFG.compo}{a character vector of the list of habitat(s)
considered in the validation (PFG composition validation).}
-\item{doRichness}{logical. Default \code{TRUE}. If \code{TRUE}, PFG richness validation module is activated,
+\item{strata.considered_PFG.compo}{If \code{perStrata} = \code{FALSE}, a character vector with value "A"
+(selection of one or several specific strata disabled). If \code{perStrata} = \code{TRUE}, a character
+vector with at least one of the observed strata (PFG composition validation).}
+
+\item{doRichness}{\code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG richness validation module is activated,
if \code{FALSE}, PFG richness validation module is disabled.}
\item{list.PFG}{a character vector which contain all the PFGs taken account in
@@ -86,34 +99,37 @@ for a \code{FATE} simulation and computes the difference between observed and si
\details{
\describe{
\item{Habitat validation}{The observed habitat is derived from a map of the area, the simulated habitat
-is derived from FATE simulated relative abundance, based on a random forest
-algorithm trained on observed data. To compare observations and simulations, the function
-computes confusion matrix between observation and prediction and then computes the TSS
-for each habitat h (number of prediction of habitat h/number of observation
-of habitat h + number of non-prediction of habitat h/number of non-observation
-of habitat h). The final metrics this script use is the mean of TSS per habitat over all
-habitats, weighted by the share of each habitat in the observed habitat distribution.}
- \item{PFG composition validation}{This code firstly run the \code{get.observed.distribution} function in order to have
-a \code{obs.distri} file which contain the observed distribution per PFG, strata and habitat.
-This file is also an argument for the \code{do.PFG.composition.validation} function run next.
-This second sub function provide the computation of distance between observed and simulated distribution. \cr
-NB : The argument \code{strata.considered_PFG.compo} is by default "A" in the 2 sub functions because
-it's easier for a \code{FATE} simulation to provide PFG abundances for all strata. \cr The argument
-\code{perStrata.compo} is by default \code{NULL} for the same reasons.}
+is derived from \code{FATE} simulated relative abundance, based on a random forest
+algorithm trained on observed releves data (see \code{\link{train.RF.habitat}}) \cr
+To compare observations and simulations, the function computes confusion matrix between
+observations and predictions and then compute the TSS for each habitat h
+(number of prediction of habitat h/number of observation of habitat h + number of non-prediction
+of habitat h/number of non-observation of habitat h). The final metrics this script use is the
+mean of TSS per habitat over all habitats, weighted by the share of each habitat in the observed
+habitat distribution. The habitat validation also provides a visual comparison of observed and
+simulated habitat on the whole studied area (see \code{\link{do.habitat.validation}} &
+ \code{\link{plot.predicted.habitat}}).} \cr
+ \item{PFG composition validation}{This code firstly run the \code{get.observed.distribution}
+function in order to have a \code{obs.distri} file which contain the observed distribution
+per PFG, strata and habitat. This file is also an argument for the \code{do.PFG.composition.validation}
+function run next. This second sub function provides the computation of distance between observed
+and simulated distribution.}
\item{PFG richness validation}{Firstly, the function updates the \code{list.PFG} with \code{exclude.PFG} vector.
-Then, the script takes the abundance per PFG file from the results of the \code{FATE}
-simulation and computes the difference between the \code{list.PFG} and all the PFG
-which are presents in the abundance file, in order to obtain the PFG richness for a simulation.
-The function also determine if an observed PFG is missing in the results of the simulation at
-a specific year.}
+Then, the script takes the abundance per PFG (and per strata if option selected) file from the
+results of the \code{FATE} simulation and computes the difference between the \code{list.PFG}
+and all the PFG which are presents in the abundance file, in order to obtain the PFG richness
+for a simulation. The function also determine if an observed PFG is missing in the results of the
+simulation at a specific year.}
}
}
\examples{
## Habitat validation ---------------------------------------------------------------------------------
+list.strata.simulations = list(S = c(1,2,3), M = c(4), B = c(5,6,7))
POST_FATE.validation(name.simulation = "FATE_Champsaur"
, sim.version = "SIMUL_V4.1"
, year = 2000
+ , perStrata = TRUE
, doHabitat = TRUE
, obs.path = "FATE_Champsaur/DATA_OBS/"
, releves.PFG = "releves.PFG.abundance.csv"
@@ -121,15 +137,19 @@ POST_FATE.validation(name.simulation = "FATE_Champsaur"
, hab.obs = "simplified.cesbio.map.grd"
, validation.mask = "certain.habitat.100m.restricted.grd"
, studied.habitat = NULL
+ , list.strata.simulations = list.strata.simulations
, doComposition = FALSE
, doRichness = FALSE)
## PFG composition validation --------------------------------------------------------------------------
-list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+list.strata.simulations = list(S = c(1,2,3), M = c(4), B = c(5,6,7))
+list.PFG = as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
habitat.considered = c("coniferous.forest", "deciduous.forest", "natural.grassland", "woody.heatland")
+strata.considered_PFG.compo = c("S", "M", "B")
POST_FATE.validation(name.simulation = "FATE_Champsaur"
, sim.version = "SIMUL_V4.1"
, year = 2000
+ , perStrata = TRUE
, doHabitat = FALSE
, obs.path = "FATE_Champsaur/DATA_OBS/"
, releves.PFG = "releves.PFG.abundance.csv"
@@ -137,16 +157,19 @@ POST_FATE.validation(name.simulation = "FATE_Champsaur"
, hab.obs = "simplified.cesbio.map.grd"
, validation.mask = "certain.habitat.100m.restricted.grd"
, studied.habitat = NULL
+ , list.strata.simulations = list.strata.simulations
, doComposition = TRUE
, PFG.considered_PFG.compo = list.PFG
, habitat.considered_PFG.compo = habitat.considered
+ , strata.considered_PFG.compo = strata.considered_PFG.compo
, doRichness = FALSE)
## PFG richness validation -----------------------------------------------------------------------------
-list.PFG<-as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
-POST_FATE.validation(name.simulation = "FATE_CHampsaur"
+list.PFG = as.factor(c("C1","C2","C3","C4","H1","H2","H3","H4","H5","H6","P1","P2","P3","P4","P5"))
+POST_FATE.validation(name.simulation = "FATE_Champsaur"
, sim.version = "SIMUL_V4.1"
, year = 2000
+ , perStrata = TRUE
, doHabitat = FALSE
, doComposition = FALSE
, doRichness = TRUE
diff --git a/man/do.PFG.composition.validation.Rd b/man/do.PFG.composition.validation.Rd
index f4a934b..9d35451 100644
--- a/man/do.PFG.composition.validation.Rd
+++ b/man/do.PFG.composition.validation.Rd
@@ -4,7 +4,7 @@
\alias{do.PFG.composition.validation}
\title{Compute distance between observed and simulated distribution}
\usage{
-\method{do}{PFG.composition.validation}(
+do.PFG.composition.validation(
name.simulation,
obs.path,
sim.version,
@@ -13,9 +13,11 @@
strata.considered_PFG.compo,
habitat.considered_PFG.compo,
observed.distribution,
- perStrata.compo,
+ perStrata,
validation.mask,
- year
+ year,
+ list.strata.simulations,
+ list.strata.releves
)
}
\arguments{
@@ -40,12 +42,18 @@ considered in the validation.}
\item{observed.distribution}{PFG observed distribution table.}
-\item{perStrata.compo}{Logical. All strata together (FALSE) or per strata (TRUE).}
-
\item{validation.mask}{file which contain a raster mask that specified
which pixels need validation.}
\item{year}{year of simulation to validate.}
+
+\item{list.strata.simulations}{a character vector which contain \code{FATE}
+strata definition and correspondence with observed strata definition.}
+
+\item{list.strata.releves}{a character vector which contain the observed strata
+definition, extracted from observed PFG releves.}
+
+\item{perStrata.compo}{Logical. All strata together (FALSE) or per strata (TRUE).}
}
\value{
@@ -58,9 +66,10 @@ distribution for a precise \code{FATE} simulation.
}
\details{
After preliminary checks, this code extract observed habitat from the \code{hab.obs}
-map and, then, merge it with the simulated PFG abundance file from results of a \code{FATE}
-simulation. After filtration of the required PFG, strata and habitats, the function
-transform the data into relative metrics and, then, compute distribution per PFG, strata
+map and, then, merge it with the simulated PFG abundance file (with or without strata definition)
+from results of the \code{FATE} simulation selected with \code{sim.version}.
+After filtration of the required PFG, strata and habitats, the function transforms
+the data into relative metrics and, then, compute distribution per PFG, strata
and habitat (if necessary). Finally, the code computes proximity between observed
and simulated data, per PFG, strata and habitat.
}
diff --git a/man/do.habitat.validation.Rd b/man/do.habitat.validation.Rd
index 28c246f..2a294af 100644
--- a/man/do.habitat.validation.Rd
+++ b/man/do.habitat.validation.Rd
@@ -16,7 +16,9 @@ do.habitat.validation(
name.simulation,
perStrata,
hab.obs,
- year
+ year,
+ list.strata.releves,
+ list.strata.simulations
)
}
\arguments{
@@ -34,24 +36,30 @@ studied area.}
\item{simulation.map}{a raster map of the whole studied area use to check
the consistency between simulation map and the observed habitat map.}
-\item{predict.all.map}{a TRUE/FALSE vector. If TRUE, the script will predict
+\item{predict.all.map}{\code{Logical}. If TRUE, the script will predict
habitat for the whole map.}
\item{sim.version}{name of the simulation to validate.}
\item{name.simulation}{simulation folder name.}
-\item{perStrata}{a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+\item{perStrata}{\code{Logical}. If TRUE, the PFG abundance is defined
by strata in each pixel. If FALSE, PFG abundance is defined for all strata.}
\item{hab.obs}{a raster map of the observed habitat in the
extended studied area.}
-\item{year}{year of simulation for validation.}
+\item{year}{simulation year selected for validation.}
+
+\item{list.strata.releves}{a character vector which contain the observed strata
+definition, extracted from observed PFG releves.}
+
+\item{list.strata.simulations}{a character vector which contain \code{FATE}
+strata definition and correspondence with observed strata definition.}
}
\value{
-Habitat performance file
-If option selected, the function returns an habitat prediction file with
+Habitat performance file. \cr
+If option selected, the function also returns an habitat prediction file with
observed and simulated habitat for each pixel of the whole map.
}
\description{
@@ -61,11 +69,11 @@ for each habitat.
}
\details{
After several preliminary checks, the function is going to prepare the observations
-database by extracting the observed habitat from a raster map. Then, for each
-simulations (sim.version), the script take the evolution abundance for each PFG
-and all strata file and predict the habitat for the whole map (if option selected)
-thanks to the RF model. Finally, the function computes habitat performance based on
-TSS for each habitat.
+database by extracting the observed habitat from a raster map. Then, for the
+simulation \code{sim.version}, the script take the evolution abundance for each PFG
+and all strata (or for each PFG & each strata if option selected) file and predict
+the habitat for the whole map (if option selected) thanks to the RF model.
+Finally, the function computes habitat performance based on TSS for each habitat.
}
\author{
Matthieu Combaud & Maxime Delprat
diff --git a/man/get.observed.distribution.Rd b/man/get.observed.distribution.Rd
index 6d0c1c8..51917af 100644
--- a/man/get.observed.distribution.Rd
+++ b/man/get.observed.distribution.Rd
@@ -13,7 +13,7 @@ get.observed.distribution(
PFG.considered_PFG.compo,
strata.considered_PFG.compo,
habitat.considered_PFG.compo,
- perStrata.compo,
+ perStrata,
sim.version
)
}
@@ -40,7 +40,7 @@ strata considered in the validation.}
\item{habitat.considered_PFG.compo}{a character vector of the list of habitat(s)
considered in the validation.}
-\item{perStrata.compo}{Logical. All strata together (FALSE) or per strata (TRUE).}
+\item{perStrata}{\code{Logical}. All strata together (FALSE) or per strata (TRUE).}
\item{sim.version}{name of the simulation we want to validate (it works with
only one \code{sim.version}).}
diff --git a/man/plot.predicted.habitat.Rd b/man/plot.predicted.habitat.Rd
index 381bac5..14f4615 100644
--- a/man/plot.predicted.habitat.Rd
+++ b/man/plot.predicted.habitat.Rd
@@ -30,7 +30,7 @@ based on a habitat prediction file. For each pixel, the habitat failure or succe
is associated to a color and then, the map is built.
}
\details{
-The function determine true/false prediction ('failure' if false, 'success' if true)
+The function determines true/false prediction ('failure' if false, 'success' if true)
and prepare a dataframe containing color and habitat code. Then, the script merge
the prediction dataframe with the color and code habitat dataframe. Finally,
the function draw a raster map and a plot of prediction habitat over it thanks
diff --git a/man/train.RF.habitat.Rd b/man/train.RF.habitat.Rd
index 1e7cd04..bfd286b 100644
--- a/man/train.RF.habitat.Rd
+++ b/man/train.RF.habitat.Rd
@@ -4,7 +4,7 @@
\alias{train.RF.habitat}
\title{Create a random forest algorithm trained on CBNA data.}
\usage{
-\method{train}{RF.habitat}(
+train.RF.habitat(
releves.PFG,
releves.sites,
hab.obs,
@@ -42,7 +42,7 @@ the prediction error.}
\item{output.path}{access path to the for the folder where output files
will be created.}
-\item{perStrata}{a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+\item{perStrata}{\code{Logical}. If TRUE, the PFG abundance is defined
by strata in each site. If FALSE, PFG abundance is defined for all strata.}
\item{sim.version}{name of the simulation we want to validate.}
From 1277847522bdec7c4da31046147eb233c11b4e71 Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 18 Mar 2022 14:23:36 +0100
Subject: [PATCH 060/176] Add files via upload
Update of the validation functions and the temporal evolution function with strata definition
---
.../POST_FATE.temporalEvolution.html | 318 +++++++-----------
docs/reference/POST_FATE.validation.html | 75 +++--
.../do.PFG.composition.validation.html | 26 +-
docs/reference/do.habitat.validation.html | 32 +-
docs/reference/get.observed.distribution.html | 6 +-
docs/reference/plot.predicted.habitat.html | 2 +-
docs/reference/train.RF.habitat.html | 5 +-
7 files changed, 216 insertions(+), 248 deletions(-)
diff --git a/docs/reference/POST_FATE.temporalEvolution.html b/docs/reference/POST_FATE.temporalEvolution.html
index 861ec8c..df368ef 100644
--- a/docs/reference/POST_FATE.temporalEvolution.html
+++ b/docs/reference/POST_FATE.temporalEvolution.html
@@ -1,72 +1,17 @@
-
-
-
-
-
-
-
-Create tables of pixel temporal evolution of PFG abundances (and
-light and soil resources if activated) for a FATE simulation — POST_FATE.temporalEvolution • RFate
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Create tables of pixel temporal evolution of PFG abundances (and
+light and soil resources if activated) for a FATE simulation — POST_FATE.temporalEvolution • RFate
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
a string corresponding to the main directory
-or simulation name of the FATE simulation
-
-
-
file.simulParam
-
default NULL. A string
+
+
Arguments
+
name.simulation
+
a string corresponding to the main directory
+or simulation name of the FATE simulation
+
file.simulParam
+
default NULL. A string
corresponding to the name of a parameter file that will be contained into
-the PARAM_SIMUL folder of the FATE simulation
-
-
-
no_years
-
an integer corresponding to the number of simulation
-years that will be used to extract PFG abundance / light / soil maps
-
-
-
opt.ras_habitat
-
(optional) default NULL.
+the PARAM_SIMUL folder of the FATE simulation
+
no_years
+
an integer corresponding to the number of simulation
+years that will be used to extract PFG abundance / light / soil maps
+
opt.ras_habitat
+
(optional) default NULL.
A string corresponding to the file name of a raster mask, with an
-integer value within each pixel, corresponding to a specific habitat
-
-
-
opt.no_CPU
-
(optional) default 1. The number of
+integer value within each pixel, corresponding to a specific habitat
+
opt.no_CPU
+
(optional) default 1. The number of
resources that can be used to parallelize the unzip/zip of raster
-files, as well as the extraction of values from raster files
-
-
+files, as well as the extraction of values from raster files
+
perStrata
+
default FALSE. If abundance per PFG & per Strata
+activated in global parameters, the function saved a temporal evolution file
+per PFG & per Strata.
+
+
+
Value
+
A list containing three data.frame objects with the
+following columns :
PFG
+
concerned plant functional group (for abundance)
-
Value
+
STRATUM
+
concerned height stratum (for LIGHT & abundance if option selected)
+
+
ID.pixel
+
number of the concerned pixel
+
+
X, Y
+
coordinates of the concerned pixel
+
+
HAB
+
habitat of the concerned pixel
+
+
years
+
values of the corresponding object (abundance / LIGHT
+ / SOIL) for each selected simulation year(s)
-
A list containing three data.frame objects with the
-following columns :
-
PFG
concerned plant functional group (for abundance)
-
STRATUM
concerned height stratum (for LIGHT)
-
ID.pixel
number of the concerned pixel
-
X, Y
coordinates of the concerned pixel
-
HAB
habitat of the concerned pixel
-
years
values of the corresponding object (abundance / LIGHT
- / SOIL) for each selected simulation year(s)
-
+
One to three POST_FATE_TABLE_PIXEL_evolution_[...].csv files are created :
abundance
+
always
-
One to three POST_FATE_TABLE_PIXEL_evolution_[...].csv files are created :
-
abundance
always
-
light
if light module was activated
-
soil
if soil module was activated
+
light
+
if light module was activated
-
+
soil
+
if soil module was activated
-
Details
+
+
+
Details
This function allows to obtain, for a specific FATE simulation and
a specific parameter file within this simulation, one to three
-preanalytical tables that can then be used to create graphics.
+preanalytical tables that can then be used to create graphics.
For each PFG and each selected simulation year, raster maps are retrieved
-from the results folder ABUND_perPFG_allStrata and unzipped.
+from the results folder ABUND_perPFG_allStrata and unzipped. If
+perStrata = TRUE, raster maps are retrieved from the folder
+ABUND_perPFG_perStrata and unzipped.
Informations extracted lead to the production of one table before the maps
are compressed again :
-
-
the value of abundance for each Plant Functional Group
- for each selected simulation year(s) in every pixel in which the PFG is
- present for at least one of the selected simulation year(s)
the value of abundance for each Plant Functional Group
+ for each selected simulation year(s) and, if option selected, each height
+ stratum in every pixel in which the PFG is present for at least one of
+ the selected simulation year(s)
+
If the light module was activated (see
+PRE_FATE.params_globalParameters), for each height stratum
and each selected simulation year, raster maps are retrieved from the
results folder LIGHT and unzipped.
Informations extracted lead to the production of one table before the maps
are compressed again :
-
-
the value of light resources for each height stratum for
- each selected simulation year(s) in every pixel
the value of light resources for each height stratum for
+ each selected simulation year(s) in every pixel
+
If the soil module was activated (see
+PRE_FATE.params_globalParameters), for each selected
simulation year, raster maps are retrieved from the results folder
SOIL and unzipped.
Informations extracted lead to the production of one table before the maps
are compressed again :
-
-
the value of soil resources for each selected simulation
- year(s) in every pixel
-
-
-
If a raster mask for habitat has been provided, the tables will
-also contain information about the pixel habitat.
+
the value of soil resources for each selected simulation
+ year(s) in every pixel
+
If a raster mask for habitat has been provided, the tables will
+also contain information about the pixel habitat.
These .csv files can then be used by other functions :
-
+
to produce graphics of temporal evolution of modelled abundances
+ and space occupancy at the whole area level (see
+ POST_FATE.graphic_evolutionCoverage)
to produce graphics of temporal evolution of modelled abundances
- and space occupancy at the whole area level (see
- POST_FATE.graphic_evolutionCoverage)
-
to produce graphics of temporal evolution of modelled abundances
- and / or resources at the pixel level (see
- POST_FATE.graphic_evolutionPixels)
Computes validation data for habitat, PFG richness and composition for a name.simulation,
sim.version,
year,
+ perStrata =TRUE,
doHabitat =TRUE,
obs.path,
releves.PFG,
@@ -166,9 +167,11 @@
Computes validation data for habitat, PFG richness and composition for a hab.obs,
validation.mask,
studied.habitat =NULL,
+ list.strata.simulations =NULL,
doComposition =TRUE,
PFG.considered_PFG.compo,
habitat.considered_PFG.compo,
+ strata.considered_PFG.compo,
doRichness =TRUE,
list.PFG,
exclude.PFG =NULL
@@ -183,8 +186,11 @@
Arguments
name of the simulation to validate (it works with only one sim.version).
year
year of simulation for validation.
+
perStrata
+
Logical. Default TRUE. If TRUE, PFG abundance is defined by strata.
+If FALSE, PFG abundance defined for all strata (habitat & PFG composition & PFG richness validation).
doHabitat
-
logical. Default TRUE. If TRUE, habitat validation module is activated,
+
Logical. Default TRUE. If TRUE, habitat validation module is activated,
if FALSE, habitat validation module is disabled.
obs.path
the function needs observed data, please create a folder for them in your
@@ -201,8 +207,11 @@
Arguments
default NULL. If NULL, the function will
take into account of all habitats in the hab.obs map. Otherwise, please specify
in a vector habitats that will be take into account for the validation (habitat validation).
+
list.strata.simulations
+
default NULL. A character vector which contain FATE
+strata definition and correspondence with observed strata definition.
doComposition
-
logical. Default TRUE. If TRUE, PFG composition validation module is activated,
+
Logical. Default TRUE. If TRUE, PFG composition validation module is activated,
if FALSE, PFG composition validation module is disabled.
PFG.considered_PFG.compo
a character vector of the list of PFG considered
@@ -210,8 +219,12 @@
Arguments
habitat.considered_PFG.compo
a character vector of the list of habitat(s)
considered in the validation (PFG composition validation).
+
strata.considered_PFG.compo
+
If perStrata = FALSE, a character vector with value "A"
+(selection of one or several specific strata disabled). If perStrata = TRUE, a character
+vector with at least one of the observed strata (PFG composition validation).
doRichness
-
logical. Default TRUE. If TRUE, PFG richness validation module is activated,
+
Logical. Default TRUE. If TRUE, PFG richness validation module is activated,
if FALSE, PFG richness validation module is disabled.
list.PFG
a character vector which contain all the PFGs taken account in
@@ -232,30 +245,31 @@
Details
Habitat validation
The observed habitat is derived from a map of the area, the simulated habitat
-is derived from FATE simulated relative abundance, based on a random forest
-algorithm trained on observed data. To compare observations and simulations, the function
-computes confusion matrix between observation and prediction and then computes the TSS
-for each habitat h (number of prediction of habitat h/number of observation
-of habitat h + number of non-prediction of habitat h/number of non-observation
-of habitat h). The final metrics this script use is the mean of TSS per habitat over all
-habitats, weighted by the share of each habitat in the observed habitat distribution.
-
+is derived from FATE simulated relative abundance, based on a random forest
+algorithm trained on observed releves data (see train.RF.habitat)
+To compare observations and simulations, the function computes confusion matrix between
+observations and predictions and then compute the TSS for each habitat h
+(number of prediction of habitat h/number of observation of habitat h + number of non-prediction
+of habitat h/number of non-observation of habitat h). The final metrics this script use is the
+mean of TSS per habitat over all habitats, weighted by the share of each habitat in the observed
+habitat distribution. The habitat validation also provides a visual comparison of observed and
+simulated habitat on the whole studied area (see do.habitat.validation &
+ plot.predicted.habitat).
+
PFG composition validation
-
This code firstly run the get.observed.distribution function in order to have
-a obs.distri file which contain the observed distribution per PFG, strata and habitat.
-This file is also an argument for the do.PFG.composition.validation function run next.
-This second sub function provide the computation of distance between observed and simulated distribution.
-NB : The argument strata.considered_PFG.compo is by default "A" in the 2 sub functions because
-it's easier for a FATE simulation to provide PFG abundances for all strata. The argument
-perStrata.compo is by default NULL for the same reasons.
+
This code firstly run the get.observed.distribution
+function in order to have a obs.distri file which contain the observed distribution
+per PFG, strata and habitat. This file is also an argument for the do.PFG.composition.validation
+function run next. This second sub function provides the computation of distance between observed
+and simulated distribution.
PFG richness validation
Firstly, the function updates the list.PFG with exclude.PFG vector.
-Then, the script takes the abundance per PFG file from the results of the FATE
-simulation and computes the difference between the list.PFG and all the PFG
-which are presents in the abundance file, in order to obtain the PFG richness for a simulation.
-The function also determine if an observed PFG is missing in the results of the simulation at
-a specific year.
+Then, the script takes the abundance per PFG (and per strata if option selected) file from the
+results of the FATE simulation and computes the difference between the list.PFG
+and all the PFG which are presents in the abundance file, in order to obtain the PFG richness
+for a simulation. The function also determine if an observed PFG is missing in the results of the
+simulation at a specific year.
Logical. All strata together (FALSE) or per strata (TRUE).
validation.mask
file which contain a raster mask that specified
which pixels need validation.
year
year of simulation to validate.
+
list.strata.simulations
+
a character vector which contain FATE
+strata definition and correspondence with observed strata definition.
+
list.strata.releves
+
a character vector which contain the observed strata
+definition, extracted from observed PFG releves.
+
perStrata.compo
+
Logical. All strata together (FALSE) or per strata (TRUE).
Value
@@ -199,9 +206,10 @@
Value
Details
After preliminary checks, this code extract observed habitat from the hab.obs
-map and, then, merge it with the simulated PFG abundance file from results of a FATE
-simulation. After filtration of the required PFG, strata and habitats, the function
-transform the data into relative metrics and, then, compute distribution per PFG, strata
+map and, then, merge it with the simulated PFG abundance file (with or without strata definition)
+from results of the FATE simulation selected with sim.version.
+After filtration of the required PFG, strata and habitats, the function transforms
+the data into relative metrics and, then, compute distribution per PFG, strata
and habitat (if necessary). Finally, the code computes proximity between observed
and simulated data, per PFG, strata and habitat.
Compare observed and simulated habitat of a FATE simulation
name.simulation,
perStrata,
hab.obs,
- year
+ year,
+ list.strata.releves,
+ list.strata.simulations)
@@ -179,35 +180,40 @@
Arguments
a raster map of the whole studied area use to check
the consistency between simulation map and the observed habitat map.
predict.all.map
-
a TRUE/FALSE vector. If TRUE, the script will predict
+
Logical. If TRUE, the script will predict
habitat for the whole map.
sim.version
name of the simulation to validate.
name.simulation
simulation folder name.
perStrata
-
a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+
Logical. If TRUE, the PFG abundance is defined
by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
hab.obs
a raster map of the observed habitat in the
extended studied area.
year
-
year of simulation for validation.
+
simulation year selected for validation.
+
list.strata.releves
+
a character vector which contain the observed strata
+definition, extracted from observed PFG releves.
+
list.strata.simulations
+
a character vector which contain FATE
+strata definition and correspondence with observed strata definition.
Value
-
Habitat performance file
-If option selected, the function returns an habitat prediction file with
+
Habitat performance file. If option selected, the function also returns an habitat prediction file with
observed and simulated habitat for each pixel of the whole map.
Details
After several preliminary checks, the function is going to prepare the observations
-database by extracting the observed habitat from a raster map. Then, for each
-simulations (sim.version), the script take the evolution abundance for each PFG
-and all strata file and predict the habitat for the whole map (if option selected)
-thanks to the RF model.Finally, the function compute habitat performance based on
-TSS for each habitat.
+database by extracting the observed habitat from a raster map. Then, for the
+simulation sim.version, the script take the evolution abundance for each PFG
+and all strata (or for each PFG & each strata if option selected) file and predict
+the habitat for the whole map (if option selected) thanks to the RF model.
+Finally, the function computes habitat performance based on TSS for each habitat.
The function determine true/false prediction ('failure' if false, 'success' if true)
+
The function determines true/false prediction ('failure' if false, 'success' if true)
and prepare a dataframe containing color and habitat code. Then, the script merge
the prediction dataframe with the color and code habitat dataframe. Finally,
the function draw a raster map and a plot of prediction habitat over it thanks
diff --git a/docs/reference/train.RF.habitat.html b/docs/reference/train.RF.habitat.html
index c2d6c39..471769c 100644
--- a/docs/reference/train.RF.habitat.html
+++ b/docs/reference/train.RF.habitat.html
@@ -143,8 +143,7 @@
Create a random forest algorithm trained on CBNA data.
access path to the for the folder where output files
will be created.
perStrata
-
a TRUE/FALSE vector. If TRUE, the PFG abundance is defined
+
Logical. If TRUE, the PFG abundance is defined
by strata in each site. If FALSE, PFG abundance is defined for all strata.
sim.version
name of the simulation we want to validate.
From 94b74b8b257eafcaffd45758d75164da46a14d9c Mon Sep 17 00:00:00 2001
From: Maxime Delprat <98315113+maximedelprat@users.noreply.github.com>
Date: Fri, 18 Mar 2022 14:25:57 +0100
Subject: [PATCH 061/176] Add files via upload
Update of the namespace file with the updating of the validation functions and the temporal evolution function
---
NAMESPACE | 2 ++
1 file changed, 2 insertions(+)
diff --git a/NAMESPACE b/NAMESPACE
index 71d3d62..6225478 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -99,6 +99,7 @@ importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(forcats,fct_expand)
+importFrom(foreach,"%:%")
importFrom(foreach,"%do%")
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
@@ -230,6 +231,7 @@ importFrom(stats,var)
importFrom(stats,weighted.mean)
importFrom(stringr,str_split)
importFrom(stringr,str_sub)
+importFrom(tidyselect,all_of)
importFrom(utils,combn)
importFrom(utils,download.file)
importFrom(utils,install.packages)
From 40cb47cd258d1345bf3a29e0af9cdad98a7d14c8 Mon Sep 17 00:00:00 2001
From: MayaGueguen
Date: Mon, 21 Mar 2022 14:16:49 +0100
Subject: [PATCH 062/176] First corrections and remarks on
UTILS.train_RF_habitat
---
R/UTILS.train_RF_habitat.R | 211 ++++++++++++++++++++++---------------
1 file changed, 125 insertions(+), 86 deletions(-)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index 34e159c..20ca82b 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -77,26 +77,60 @@ train.RF.habitat<-function(releves.PFG
cat("\n ---------- TRAIN A RANDOM FOREST MODEL ON OBSERVED DATA \n")
+ #############################################################################
+
+ ## CHECK parameter releves.PFG
+ if (.testParam_notDf(releves.PFG))
+ {
+ .stopMessage_beDataframe("releves.PFG")
+ } else
+ {
+ releves.PFG = as.data.frame(releves.PFG)
+ if (nrow(releves.PFG) == 0 || ncol(releves.PFG) != 4)
+ {
+ .stopMessage_numRowCol("releves.PFG", c("sites", "PFG", "strata", "BB")) ## TODO : change colnames ?
+ }
+ ## TODO : condition on sites
+ ## TODO : condition on strata
+ ## TODO : condition on PFG
+ .testParam_notInValues.m("releves.PFG$BB", releves.PFG$BB, c(NA, "NA", 0, "+", "r", 1:5))
+ }
+ ## CHECK parameter releves.sites
+ if (.testParam_notDf(releves.sites))
+ {
+ .stopMessage_beDataframe("releves.sites")
+ } else
+ {
+ releves.sites = as.data.frame(releves.sites)
+ if (nrow(releves.sites) == 0 || ncol(releves.sites) != 3)
+ {
+ .stopMessage_numRowCol("releves.sites", c("sites", "x", "y")) ## TODO : change colnames ?
+ }
+ ## TODO : condition on site
+ }
+
+
#1. Compute relative abundance metric
#########################################
- #identify sites with wrong BB values (ie values that cannot be converted by the PRE_FATE.abundBraunBlanquet function)
- releves.PFG<-filter(releves.PFG,is.element(BB,c(NA, "NA", 0, "+", "r", 1:5)))
-
#transformation into coverage percentage
+ ## TODO : Transform in real proportion (per site)
releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
- if(perStrata==T){
- aggregated.releves.PFG<-aggregate(coverage~site+PFG+strata,data=releves.PFG,FUN="sum")
- }else if(perStrata==F){
- aggregated.releves.PFG<-aggregate(coverage~site+PFG,data=releves.PFG,FUN="sum")
- aggregated.releves.PFG$strata<-"A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
+ if (perStrata == TRUE) {
+ mat.PFG.agg <- aggregate(coverage ~ site + PFG + strata, data = releves.PFG, FUN = "sum")
+ } else if (perStrata == FALSE) {
+ mat.PFG.agg <- aggregate(coverage ~ site + PFG, data = releves.PFG, FUN = "sum")
+ mat.PFG.agg$strata <- "A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
}
#transformation into a relative metric (here relative.metric is relative coverage)
- aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG %>% group_by(site,strata) %>% mutate(relative.metric= round(prop.table(coverage),digits = 2))) #rel is proportion of total pct_cov, not percentage
- aggregated.releves.PFG$relative.metric[is.na(aggregated.releves.PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
- aggregated.releves.PFG$coverage<-NULL
+ mat.PFG.agg <-
+ as.data.frame(
+ mat.PFG.agg %>% group_by(site, strata) %>% mutate(relative.metric = round(prop.table(coverage), digits = 2))
+ ) #rel is proportion of total pct_cov, not percentage
+ mat.PFG.agg$relative.metric[is.na(mat.PFG.agg$relative.metric)] <- 0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
+ mat.PFG.agg$coverage<-NULL
print("releve data have been transformed into a relative metric")
@@ -104,38 +138,39 @@ train.RF.habitat<-function(releves.PFG
#######################
#transfo into factor to be sure to create all the combination when doing "dcast"
- aggregated.releves.PFG$PFG<-as.factor(aggregated.releves.PFG$PFG)
- aggregated.releves.PFG$strata<-as.factor(aggregated.releves.PFG$strata)
-
- aggregated.releves.PFG<-dcast(setDT(aggregated.releves.PFG),site~PFG+strata,value.var=c("relative.metric"),fill=0,drop=F)
+ mat.PFG.agg$PFG <- as.factor(mat.PFG.agg$PFG)
+ mat.PFG.agg$strata <- as.factor(mat.PFG.agg$strata)
+ mat.PFG.agg <- dcast(mat.PFG.agg, site ~ PFG + strata, value.var = "relative.metric", fill = 0, drop = FALSE)
#3. Get habitat information
###################################
#get sites coordinates
- aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)), aggregated.releves.PFG,by="site")
-
+ mat.PFG.agg <- merge(releves.sites, mat.PFG.agg, by = "site") ## TODO : mettre tout directement dans releves.PFG ?
+
#get habitat code and name
- if(compareCRS(aggregated.releves.PFG,hab.obs)){
- aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
- }else{
- aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(hab.obs))
- aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ mat.PFG.agg$code.habitat <- raster::extract(x = hab.obs, y = mat.PFG.agg[, c("x", "y")])
+ mat.PFG.agg = mat.PFG.agg[which(!is.na(mat.PFG.agg$code.habitat)), ]
+ if (nrow(mat.PFG.agg) == 0) {
+ ## TODO : add stop message
}
#correspondance habitat code/habitat name
- table.habitat.releve<-levels(hab.obs)[[1]]
-
- aggregated.releves.PFG<-merge(aggregated.releves.PFG,dplyr::select(table.habitat.releve,c(ID,habitat)),by.x="code.habitat",by.y="ID")
+ ## ATTENTION ! il faut que la couche de noms du raster existe, et qu'elle s'appelle habitat...
+ ## TODO : soit donner en paramètre un vecteur avec les noms d'habitat, soit les données dans releves.PFG...
+ table.habitat.releve <- levels(hab.obs)[[1]]
+ mat.PFG.agg<-merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
#(optional) keep only releves data in a specific area
- if(!is.null(external.training.mask)){
+ if (!is.null(external.training.mask)) {
+ # if (compareCRS(mat.PFG.agg, external.training.mask) == FALSE) {
+ # #as this stage it is not a problem to transform crs(mat.PFG.agg) since we have no more merge to do (we have already extracted habitat info from the map)
+ # mat.PFG.agg <- st_transform(x = mat.PFG.agg, crs = crs(external.training.mask))
+ # }
+ # mat.PFG.agg <- st_crop(x = mat.PFG.agg, y = external.training.mask)
- if(compareCRS(aggregated.releves.PFG,external.training.mask)==F){ #as this stage it is not a problem to transform crs(aggregated.releves.PFG) since we have no more merge to do (we have already extracted habitat info from the map)
- aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(external.training.mask))
- }
-
- aggregated.releves.PFG<-st_crop(x=aggregated.releves.PFG,y=external.training.mask)
+ val.inMask = raster::extract(x = external.training.mask, y = mat.PFG.agg[, c("x", "y")])
+ mat.PFG.agg = mat.PFG.agg[which(!is.na(val.inMask)), ]
print("'releve' map has been cropped to match 'external.training.mask'.")
}
@@ -143,24 +178,26 @@ train.RF.habitat<-function(releves.PFG
# 4. Keep only releve on interesting habitat
###################################################"
- if (!is.null(studied.habitat)){
- aggregated.releves.PFG<-filter(aggregated.releves.PFG,is.element(habitat,studied.habitat)) #filter non interesting habitat + NA
- print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
- } else{
- print(cat("habitat classes used in the RF algo: ",unique(aggregated.releves.PFG$habitat),"\n",sep="\t"))
+ if (!is.null(studied.habitat)) {
+ mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
+ if (nrow(mat.PFG.agg) == 0) {
+ ## TODO : add stop message
+ }
}
+ print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
# 5. Save data
#####################
- st_write(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/releve.PFG.habitat.shp"),overwrite=T,append=F)
- write.csv(aggregated.releves.PFG,paste0(output.path,"/HABITAT/", sim.version, "/CBNA.releves.prepared.csv"),row.names = F)
+ # st_write(mat.PFG.agg,paste0(output.path,"/HABITAT/", sim.version, "/releve.PFG.habitat.shp"),overwrite=T,append=F)
+ write.csv(mat.PFG.agg,paste0(output.path,"/HABITAT/", sim.version, "/CBNA.releves.prepared.csv"),row.names = FALSE)
+ ## TODO : remove CBNA from file name
# 6. Small adjustment in data structure
##########################################
- aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG) #get rid of the spatial structure before entering the RF process
- aggregated.releves.PFG$habitat<-as.factor(aggregated.releves.PFG$habitat)
+ mat.PFG.agg<-as.data.frame(mat.PFG.agg) #get rid of the spatial structure before entering the RF process
+ mat.PFG.agg$habitat<-as.factor(mat.PFG.agg$habitat)
# 7.Random forest
######################################
@@ -168,72 +205,74 @@ train.RF.habitat<-function(releves.PFG
#separate the database into a training and a test part
set.seed(123)
- training.site<-sample(aggregated.releves.PFG$site,size=RF.param$share.training*length(aggregated.releves.PFG$site),replace = F)
- releves.training<-filter(aggregated.releves.PFG,is.element(site,training.site))
- releves.testing<-filter(aggregated.releves.PFG,!is.element(site,training.site))
+ training.site <- sample(mat.PFG.agg$site, size = RF.param$share.training * length(mat.PFG.agg$site), replace = FALSE)
+ releves.training <- mat.PFG.agg[which(mat.PFG.agg$site %in% training.site), ]
+ releves.testing <- mat.PFG.agg[-which(mat.PFG.agg$site %in% training.site), ]
#train the model (with correction for imbalances in sampling)
#run optimization algo (careful : optimization over OOB...)
- mtry.perf<-as.data.frame(
- tuneRF(
- x=dplyr::select(releves.training,-c(code.habitat,site,habitat,geometry)),
- y=releves.training$habitat,
- strata=releves.training$habitat,
- sampsize=nrow(releves.training),
- ntreeTry=RF.param$ntree,
- stepFactor=2, improve=0.05,doBest=FALSE,plot=F,trace=F
- )
- )
+ mtry.perf <- tuneRF(x = dplyr::select(releves.training, -c(code.habitat, site, habitat, geometry)),
+ y = releves.training$habitat,
+ strata = releves.training$habitat,
+ sampsize = nrow(releves.training),
+ ntreeTry = RF.param$ntree,
+ stepFactor = 2,
+ improve = 0.05,
+ doBest = FALSE,
+ plot = FALSE,
+ trace = FALSE)
+ mtry.perf = as.data.frame(mtry.perf)
#select mtry
- mtry<-mtry.perf$mtry[mtry.perf$OOBError==min(mtry.perf$OOBError)][1] #the lowest n achieving minimum OOB
+ mtry <- mtry.perf$mtry[mtry.perf$OOBError == min(mtry.perf$OOBError)][1] #the lowest n achieving minimum OOB
#run real model
- model<- randomForest(
- x=dplyr::select(releves.training,-c(code.habitat,site,habitat,geometry)),
- y=releves.training$habitat,
- xtest=dplyr::select(releves.testing,-c(code.habitat,site,habitat,geometry)),
- ytest=releves.testing$habitat,
- strata=releves.training$habitat,
- sampsize=nrow(releves.training),
- ntree=RF.param$ntree,
- mtry=mtry,
- norm.votes=TRUE,
- keep.forest=TRUE
- )
+ model <- randomForest(x = dplyr::select(releves.training, -c(code.habitat, site, habitat, geometry)),
+ y = releves.training$habitat,
+ xtest = dplyr::select(releves.testing, -c(code.habitat, site, habitat, geometry)),
+ ytest = releves.testing$habitat,
+ strata = releves.training$habitat,
+ sampsize = nrow(releves.training),
+ ntree = RF.param$ntree,
+ mtry = mtry,
+ norm.votes = TRUE,
+ keep.forest = TRUE)
#analyse model performance
# Analysis on the training sample
-
- confusion.training<-confusionMatrix(data=model$predicted,reference=releves.training$habitat)
-
- synthesis.training<-data.frame(habitat=colnames(confusion.training$table),sensitivity=confusion.training$byClass[,1],specificity=confusion.training$byClass[,2],weight=colSums(confusion.training$table)/sum(colSums(confusion.training$table))) #warning: prevalence is the weight of predicted habitat, not of observed habitat
- synthesis.training<-synthesis.training%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
-
- aggregate.TSS.training<-round(sum(synthesis.training$weight*synthesis.training$TSS),digits=2)
+ confusion.training <- confusionMatrix(data = model$predicted, reference = releves.training$habitat)
+ synthesis.training <- data.frame(habitat = colnames(confusion.training$table)
+ , sensitivity = confusion.training$byClass[, 1]
+ , specificity = confusion.training$byClass[, 2]
+ , weight = colSums(confusion.training$table) / sum(colSums(confusion.training$table)))
+ #warning: prevalence is the weight of predicted habitat, not of observed habitat
+ synthesis.training <- synthesis.training %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
+ aggregate.TSS.training <- round(sum(synthesis.training$weight * synthesis.training$TSS), digits = 2)
# Analysis on the testing sample
-
- confusion.testing<-confusionMatrix(data=model$test$predicted,reference=releves.testing$habitat)
-
- synthesis.testing<-data.frame(habitat=colnames(confusion.testing$table),sensitivity=confusion.testing$byClass[,1],specificity=confusion.testing$byClass[,2],weight=colSums(confusion.testing$table)/sum(colSums(confusion.testing$table)))#warning: prevalence is the weight of predicted habitat, not of observed habitat
- synthesis.testing<-synthesis.testing%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
-
- aggregate.TSS.testing<-round(sum(synthesis.testing$weight*synthesis.testing$TSS),digits=2)
+ confusion.testing <- confusionMatrix(data = model$test$predicted, reference = releves.testing$habitat)
+ synthesis.testing<-data.frame(habitat = colnames(confusion.testing$table)
+ , sensitivity = confusion.testing$byClass[, 1]
+ , specificity = confusion.testing$byClass[, 2]
+ , weight = colSums(confusion.testing$table) / sum(colSums(confusion.testing$table)))
+ #warning: prevalence is the weight of predicted habitat, not of observed habitat
+ synthesis.testing <- synthesis.testing %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
+ aggregate.TSS.testing <- round(sum(synthesis.testing$weight * synthesis.testing$TSS), digits = 2)
# 8. Save and return output
#######################################"
- write_rds(model,paste0(output.path,"/HABITAT/", sim.version, "/RF.model.rds"),compress="none")
- write.csv(synthesis.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_training.csv"),row.names=F)
- write.csv(aggregate.TSS.training,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_training.csv"),row.names=F)
- write.csv(synthesis.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_perf.per.hab_testing.csv"),row.names=F)
- write.csv(aggregate.TSS.testing,paste0(output.path,"/HABITAT/", sim.version, "/RF_aggregate.TSS_testing.csv"),row.names=F)
+ path.save = paste0(output.path, "/HABITAT/", sim.version)
- return(model)
+ write_rds(model, paste0(path.save, "/RF.model.rds"), compress = "none")
+ write.csv(synthesis.training, paste0(path.save, "/RF_perf.per.hab_training.csv"), row.names = FALSE)
+ write.csv(aggregate.TSS.training, paste0(path.save, "/RF_aggregate.TSS_training.csv"), row.names = FALSE)
+ write.csv(synthesis.testing, paste0(path.save, "/RF_perf.per.hab_testing.csv"), row.names = FALSE)
+ write.csv(aggregate.TSS.testing, paste0(path.save, "/RF_aggregate.TSS_testing.csv"), row.names = FALSE)
+ return(model)
}
From ba5289ea5b576ab3c1d61752503d7c1e9438e4cd Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Tue, 22 Mar 2022 11:28:22 +0100
Subject: [PATCH 063/176] Corrections and adjustments
---
R/UTILS.train_RF_habitat.R | 40 ++++++++++++++++++++++++++++++++++----
1 file changed, 36 insertions(+), 4 deletions(-)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index 20ca82b..cc11a78 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -14,7 +14,7 @@
##' and each PFG and strata.
##' @param releves.sites a data frame with coordinates and a description of
##' the habitat associated with the dominant species of each site in the
-##' studied map.
+##' studied map. Shapefile format.
##' @param hab.obs a raster map of the observed habitat in the
##' extended studied area.
##' @param external.training.mask default \code{NULL}. (optional) Keep only
@@ -60,6 +60,7 @@
##' @importFrom caret confusionMatrix
##' @importFrom readr write_rds
##' @importFrom utils read.csv write.csv
+##' @importFrom stringr str_split
##'
### END OF HEADER ##############################################################
@@ -91,8 +92,22 @@ train.RF.habitat<-function(releves.PFG
.stopMessage_numRowCol("releves.PFG", c("sites", "PFG", "strata", "BB")) ## TODO : change colnames ?
}
## TODO : condition on sites
+ if (!is.numeric(releves.PFG$site))
+ {
+ stop("Sites in releves.PFG are not in the right format. Please make sure you have numeric values")
+ }
## TODO : condition on strata
+ if (!is.character(releves.PFG$strata) | !is.numeric(releves.PFG$strata))
+ {
+ stop("strata definition in releves.PFG is not in the right format. Please make sure you have a character or numeric values")
+ }
## TODO : condition on PFG
+ fate_PFG = .getGraphics_PFG(name.simulation = str_split(output.path, "/")[[1]][1] ## prend le premier terme de output.path qui est le nom de simul (cf POST_FATE.validation)
+ , abs.simulParam = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt")) ## fichier de paramètre associé au nom de simulation
+ if (sort(as.factor(unique(releves.PFG$PFG))) != as.factor(fate_PFG$PFG))
+ {
+ stop("PFG list in releves.PFG does not correspond to PFG list in FATE")
+ }
.testParam_notInValues.m("releves.PFG$BB", releves.PFG$BB, c(NA, "NA", 0, "+", "r", 1:5))
}
## CHECK parameter releves.sites
@@ -107,6 +122,10 @@ train.RF.habitat<-function(releves.PFG
.stopMessage_numRowCol("releves.sites", c("sites", "x", "y")) ## TODO : change colnames ?
}
## TODO : condition on site
+ if (!is.numeric(releves.sites$site))
+ {
+ stop("Sites in releves.sites are not in the right format. Please make sure you have numeric values")
+ }
}
@@ -153,13 +172,25 @@ train.RF.habitat<-function(releves.PFG
mat.PFG.agg = mat.PFG.agg[which(!is.na(mat.PFG.agg$code.habitat)), ]
if (nrow(mat.PFG.agg) == 0) {
## TODO : add stop message
+ stop("Code habitat vector is empty. Please verify values of your hab.obs map")
}
#correspondance habitat code/habitat name
## ATTENTION ! il faut que la couche de noms du raster existe, et qu'elle s'appelle habitat...
## TODO : soit donner en paramètre un vecteur avec les noms d'habitat, soit les données dans releves.PFG...
- table.habitat.releve <- levels(hab.obs)[[1]]
- mat.PFG.agg<-merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
+ if (names(raster::levels(hab.obs)[[1]]) != c("ID", "habitat", "colour") | nrow(raster::levels(hab.obs)[[1]]) == 0 & is.data.frame(obs.habitat))
+ {
+ colnames(obs.habitat) = c("ID", "habitat")
+ table.habitat.releve = obs.habitat
+ mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
+ } else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0)
+ {
+ table.habitat.releve = levels(hab.obs)[[1]]
+ mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
+ } else
+ {
+ stop("Habitat definition in hab.obs map is not correct")
+ }
#(optional) keep only releves data in a specific area
if (!is.null(external.training.mask)) {
@@ -182,6 +213,7 @@ train.RF.habitat<-function(releves.PFG
mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
if (nrow(mat.PFG.agg) == 0) {
## TODO : add stop message
+ stop("Habitats in studied.habitat parameter are not presents in hab.obs map. Please select others habitats")
}
}
print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
@@ -190,7 +222,7 @@ train.RF.habitat<-function(releves.PFG
#####################
# st_write(mat.PFG.agg,paste0(output.path,"/HABITAT/", sim.version, "/releve.PFG.habitat.shp"),overwrite=T,append=F)
- write.csv(mat.PFG.agg,paste0(output.path,"/HABITAT/", sim.version, "/CBNA.releves.prepared.csv"),row.names = FALSE)
+ write.csv(mat.PFG.agg,paste0(output.path,"/HABITAT/", sim.version, "/obs.releves.prepared.csv"),row.names = FALSE)
## TODO : remove CBNA from file name
# 6. Small adjustment in data structure
From abfb871b3e3211371b71ccb176d87e9d75b0a762 Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Tue, 22 Mar 2022 13:24:39 +0100
Subject: [PATCH 064/176] correction in train.RF.habitat : preliminary checks,
computing relative abundance & get habitat information
---
R/UTILS.train_RF_habitat.R | 48 ++++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 17 deletions(-)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index cc11a78..c48c33f 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -89,7 +89,7 @@ train.RF.habitat<-function(releves.PFG
releves.PFG = as.data.frame(releves.PFG)
if (nrow(releves.PFG) == 0 || ncol(releves.PFG) != 4)
{
- .stopMessage_numRowCol("releves.PFG", c("sites", "PFG", "strata", "BB")) ## TODO : change colnames ?
+ .stopMessage_numRowCol("releves.PFG", c("site", "PFG", "strata", "BB")) ## TODO : change colnames ?
}
## TODO : condition on sites
if (!is.numeric(releves.PFG$site))
@@ -119,7 +119,7 @@ train.RF.habitat<-function(releves.PFG
releves.sites = as.data.frame(releves.sites)
if (nrow(releves.sites) == 0 || ncol(releves.sites) != 3)
{
- .stopMessage_numRowCol("releves.sites", c("sites", "x", "y")) ## TODO : change colnames ?
+ .stopMessage_numRowCol("releves.sites", c("site", "x", "y")) ## TODO : change colnames ?
}
## TODO : condition on site
if (!is.numeric(releves.sites$site))
@@ -134,7 +134,17 @@ train.RF.habitat<-function(releves.PFG
#transformation into coverage percentage
## TODO : Transform in real proportion (per site)
- releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
+ # releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
+ if(!is.numeric(releves.PFG$abund))
+ {
+ releves.PFG$coverage = PRE_FATE.abundBraunBlanquet(releves.PFG$abund)/100
+ } else if (is.numeric(releves.PFG$abund) & max(releves.PFG$abund) == 1)
+ {
+ releves.PFG$coverage = releves.PFG$abund
+ } else if (is.numeric(releves.PFG$abund))
+ {
+ releves.PFG$coverage = releves.PFG$abund
+ }
if (perStrata == TRUE) {
mat.PFG.agg <- aggregate(coverage ~ site + PFG + strata, data = releves.PFG, FUN = "sum")
@@ -178,15 +188,19 @@ train.RF.habitat<-function(releves.PFG
#correspondance habitat code/habitat name
## ATTENTION ! il faut que la couche de noms du raster existe, et qu'elle s'appelle habitat...
## TODO : soit donner en paramètre un vecteur avec les noms d'habitat, soit les données dans releves.PFG...
- if (names(raster::levels(hab.obs)[[1]]) != c("ID", "habitat", "colour") | nrow(raster::levels(hab.obs)[[1]]) == 0 & is.data.frame(obs.habitat))
+ if (names(raster::levels(hab.obs)[[1]]) != c("ID", "habitat", "colour") | nrow(raster::levels(hab.obs)[[1]]) == 0 & !is.null(studied.habitat))
{
colnames(obs.habitat) = c("ID", "habitat")
- table.habitat.releve = obs.habitat
+ table.habitat.releve = studied.habitat
+ mat.PFG.agg = mat.PFG.agg[which(mat.PFG.agg$code.habitat %in% studied.habitat$ID), ] #filter non interesting habitat + NA
mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
- } else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0)
+ print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
+ } else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0 & is.null(studied.habitat))
{
table.habitat.releve = levels(hab.obs)[[1]]
mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
+ mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
+ print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
} else
{
stop("Habitat definition in hab.obs map is not correct")
@@ -206,17 +220,17 @@ train.RF.habitat<-function(releves.PFG
}
- # 4. Keep only releve on interesting habitat
- ###################################################"
-
- if (!is.null(studied.habitat)) {
- mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
- if (nrow(mat.PFG.agg) == 0) {
- ## TODO : add stop message
- stop("Habitats in studied.habitat parameter are not presents in hab.obs map. Please select others habitats")
- }
- }
- print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
+ # # 4. Keep only releve on interesting habitat
+ # ###################################################"
+ #
+ # if (!is.null(studied.habitat)) {
+ # mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
+ # if (nrow(mat.PFG.agg) == 0) {
+ # ## TODO : add stop message
+ # stop("Habitats in studied.habitat parameter are not presents in hab.obs map. Please select others habitats")
+ # }
+ # }
+ # print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
# 5. Save data
#####################
From 09a85c7690b1b4996a18a9251cbd2ad9a4330a31 Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Tue, 22 Mar 2022 13:52:28 +0100
Subject: [PATCH 065/176] correction of train.RF.habitat
---
R/UTILS.train_RF_habitat.R | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index c48c33f..83bed67 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -10,8 +10,8 @@
##' trained on observed PFG abundance, sites releves and a map of observed
##' habitat.
##'
-##' @param releves.PFG a data frame with Braund-Blanquet abundance at each site
-##' and each PFG and strata.
+##' @param releves.PFG a data frame with abundance (column named abund) at each site
+##' and for each PFG and strata.
##' @param releves.sites a data frame with coordinates and a description of
##' the habitat associated with the dominant species of each site in the
##' studied map. Shapefile format.
@@ -20,8 +20,9 @@
##' @param external.training.mask default \code{NULL}. (optional) Keep only
##' releves data in a specific area.
##' @param studied.habitat If \code{NULL}, the function will
-##' take into account of all habitats in the hab.obs map. Otherwise, please specify
-##' in a vector the habitats that we take into account for the validation.
+##' take into account of habitats define in the \code{hab.obs} map. Otherwise, please specify
+##' in a 2 columns data frame the habitats and the ID for each of them which will be taken
+##' into account for the validation.
##' @param RF.param a list of 2 parameters for random forest model :
##' share.training defines the size of the trainig part of the data base.
##' ntree is the number of trees build by the algorithm, it allows to reduce
@@ -34,15 +35,15 @@
##'
##' @details
##'
-##' This function transform PFG Braund-Blanquet abundance in relative abundance,
-##' get habitat information from the releves map, keep only relees on interesting
-##' habitat and then builds de random forest model. Finally, the function analyzes
-##' the model performance with computation of confusion matrix and TSS for
-##' the traning and testing sample.
+##' This function transform PFG abundance in relative abundance,
+##' get habitat information from the releves map of from a vector previously defined,
+##' keep releves on interesting habitat and then builds a random forest model. Finally,
+##' the function analyzes the model performance with computation of confusion matrix and TSS between
+##' the training and testing sample.
##'
##' @return
##'
-##' 2 prepared CBNA releves files are created before the building of the random
+##' 2 prepared observed releves files are created before the building of the random
##' forest model in a habitat validation folder.
##' 5 more files are created at the end of the script to save the RF model and
##' the performance analyzes (confusion matrix and TSS) for the training and
@@ -134,7 +135,6 @@ train.RF.habitat<-function(releves.PFG
#transformation into coverage percentage
## TODO : Transform in real proportion (per site)
- # releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
if(!is.numeric(releves.PFG$abund))
{
releves.PFG$coverage = PRE_FATE.abundBraunBlanquet(releves.PFG$abund)/100
@@ -189,14 +189,14 @@ train.RF.habitat<-function(releves.PFG
## ATTENTION ! il faut que la couche de noms du raster existe, et qu'elle s'appelle habitat...
## TODO : soit donner en paramètre un vecteur avec les noms d'habitat, soit les données dans releves.PFG...
if (names(raster::levels(hab.obs)[[1]]) != c("ID", "habitat", "colour") | nrow(raster::levels(hab.obs)[[1]]) == 0 & !is.null(studied.habitat))
- {
+ { ## cas où pas de levels dans la carte d'habitat et utilisation d'un vecteur d'habitat
colnames(obs.habitat) = c("ID", "habitat")
table.habitat.releve = studied.habitat
mat.PFG.agg = mat.PFG.agg[which(mat.PFG.agg$code.habitat %in% studied.habitat$ID), ] #filter non interesting habitat + NA
mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
} else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0 & is.null(studied.habitat))
- {
+ { ## cas où on utilise les levels définis dans la carte
table.habitat.releve = levels(hab.obs)[[1]]
mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
From ba8772fbee0e3c8df936af257ed6dc39ce909816 Mon Sep 17 00:00:00 2001
From: MayaGueguen
Date: Tue, 22 Mar 2022 15:40:43 +0100
Subject: [PATCH 066/176] Premier check sur UTILS.do_habitat_validation
---
R/UTILS.do_habitat_validation.R | 281 ++++++++++++++++----------------
1 file changed, 138 insertions(+), 143 deletions(-)
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index 95612fa..d1a5a09 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -57,7 +57,6 @@
##' @importFrom stats aggregate
##' @importFrom stringr str_sub
##' @importFrom foreach foreach %dopar%
-##' @importFrom forcats fct_expand
##' @importFrom reshape2 dcast
##' @importFrom caret confusionMatrix
##' @importFrom utils write.csv
@@ -68,7 +67,10 @@
### END OF HEADER ##############################################################
-do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs, year, list.strata.releves, list.strata.simulations) {
+do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask
+ , simulation.map, predict.all.map, sim.version, name.simulation
+ , perStrata, hab.obs, year, list.strata.releves, list.strata.simulations)
+{
cat("\n ---------- FATE OUTPUT ANALYSIS \n")
@@ -80,16 +82,16 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
###########################
#check if strata definition used in the RF model is the same as the one used to analyze FATE output
- if(perStrata==T){
- if(all(base::intersect(names(list.strata.simulations), list.strata.releves)==names(list.strata.simulations))){
+ if (perStrata == TRUE) {
+ if (all(intersect(names(list.strata.simulations), list.strata.releves) == names(list.strata.simulations))) {
list.strata = names(list.strata.simulations)
print("strata definition OK")
- }else {
+ } else {
stop("wrong strata definition")
}
- }else if(perStrata==F){
- list.strata<-"all"
- }else{
+ } else if (perStrata == FALSE) {
+ list.strata <- "all"
+ } else {
stop("check 'perStrata' parameter and/or the names of strata in list.strata.releves & list.strata.simulation")
}
@@ -99,10 +101,11 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
#consistency between habitat.FATE.map and simulation.map
- if(!compareCRS(simulation.map,habitat.FATE.map)){
- print("reprojecting habitat.FATE.map to match simulation.map crs")
- habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
- }
+ ## MUST BE DONE before
+ # if(!compareCRS(simulation.map,habitat.FATE.map)){
+ # print("reprojecting habitat.FATE.map to match simulation.map crs")
+ # habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
+ # }
if(!all(res(habitat.FATE.map)==res(simulation.map))){
stop("provide habitat.FATE.map with same resolution as simulation.map")
}
@@ -110,10 +113,11 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
print("cropping habitat.FATE.map to match simulation.map")
habitat.FATE.map<-crop(x=habitat.FATE.map,y=simulation.map)
}
- if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
- print("setting origin habitat.FATE.map to match simulation.map")
- raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
- }
+ ## MUST BE DONE before
+ # if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
+ # print("setting origin habitat.FATE.map to match simulation.map")
+ # raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
+ # }
if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
stop("habitat.FATE.map could not be coerced to match simulation.map")
}else{
@@ -121,24 +125,25 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
#adjust validation.mask accordingly
- if(!all(res(habitat.FATE.map)==res(validation.mask))){
- validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
- }
+ ## MUST BE DONE before ?
+ # if(!all(res(habitat.FATE.map)==res(validation.mask))){
+ # validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
+ # }
if(extent(validation.mask)!=extent(habitat.FATE.map)){
validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
}
if(!compareRaster(validation.mask,habitat.FATE.map)){
stop("error in correcting validation.mask to match habitat.FATE.map")
}else{
- print("validation.mask is (now) consistent with (modified) habitat.FATE.map")
+ print("validation.mask is (now) consistent with (modified) habitat.FATE.map") ## TODO : change message
}
#check consistency for PFG & strata classes between FATE output vs the RF model
- RF.predictors<-rownames(RF.model$importance)
- RF.PFG<-unique(str_sub(RF.predictors,1,2))
+ RF.predictors <- rownames(RF.model$importance)
+ RF.PFG <- unique(str_sub(RF.predictors, 1, 2))
- FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7)
+ FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7) ## TODO : careful, will not match necessarily all PFG names
if(length(setdiff(FATE.PFG,RF.PFG))>0|length(setdiff(RF.PFG,FATE.PFG))>0){
stop("The PFG used to train the RF algorithm are not the same as the PFG used to run FATE.")
@@ -149,23 +154,23 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#II. Prepare database for FATE habitat
#########################################################################################
- #index of the pixels in the simulation area
- in.region.pixels<-which(getValues(simulation.map)==1)
-
#habitat df for the whole simulation area
- habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
- habitat.whole.area.df<-habitat.whole.area.df[in.region.pixels,]
- habitat.whole.area.df<-subset(habitat.whole.area.df, for.validation!="NA")
- habitat.whole.area.df<-merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x="code.habitat", by.y="ID")
- habitat.whole.area.df<-filter(habitat.whole.area.df, is.element(habitat,RF.model$classes))
+ habitat.whole.area.df <- data.frame(pixel = seq(1, ncell(habitat.FATE.map), 1)
+ , code.habitat = getValues(habitat.FATE.map)
+ , for.validation = getValues(validation.mask))
+ habitat.whole.area.df <- habitat.whole.area.df[which(getValues(simulation.map) == 1), ] #index of the pixels in the simulation area
+ habitat.whole.area.df <- habitat.whole.area.df[which(!is.na(habitat.whole.area.df$for.validation)), ]
+ habitat.whole.area.df <- merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x = "code.habitat", by.y = "ID")
+ habitat.whole.area.df <- habitat.whole.area.df[which(habitat.whole.area.df$habitat %in% RF.model$classes), ]
- print(cat("Habitat considered in the prediction exercise: ",c(unique(habitat.whole.area.df$habitat)),"\n",sep="\t"))
+ print(cat("Habitat considered in the prediction exercise: ", c(unique(habitat.whole.area.df$habitat)), "\n", sep = "\t"))
print("Habitat in the simulation area:")
- table(habitat.whole.area.df$habitat,useNA="always")
+ table(habitat.whole.area.df$habitat, useNA = "always")
print("Habitat in the subpart of the simulation area used for validation:")
- table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation==1],useNA="always")
+ table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation == 1], useNA = "always")
+
##############################
# III. Loop on simulations
@@ -173,127 +178,117 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
print("processing simulations")
- registerDoParallel(detectCores()-2)
- results.simul <- foreach(i=1:length(all_of(sim.version))) %dopar%{
-
- ########################"
- # III.1. Data preparation
- #########################
-
- #get simulated abundance per pixel*strata*PFG for pixels in the simulation area
- if(perStrata==F){
+ # registerDoParallel(detectCores()-2) ## TODO : put as optional (like in zip/unzip function)
+ results.simul <- foreach(i = 1:length(all_of(sim.version))) %dopar%
+ {
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
- #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ ########################"
+ # III.1. Data preparation
+ #########################
- } else if(perStrata==T){
+ #get simulated abundance per pixel*strata*PFG for pixels in the simulation area
+ if (perStrata == FALSE) {
+ ## TODO : add test if file exists
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
+ #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ simu_PFG$strata <- "A"
+
+ } else if (perStrata == TRUE) {
+ ## TODO : add test if file exists
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[, c("PFG", "ID.pixel", "strata", paste0("X", year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+ new.strata <- rep(NA, nrow(simu_PFG))
+ for (i in 1:length(list.strata.simulations)) {
+ ind = which(simu_PFG$strata %in% list.strata.simulations[[i]])
+ new.strata[ind] = names(list.strata.simulations)[i]
+ }
+ simu_PFG$strata = new.strata
+ }
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
- colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+ ## SIMILAR to what was done in train_RF_habitat for the releves
+ #aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
+ simu_PFG <- aggregate(abs ~ pixel + strata + PFG, data = simu_PFG, FUN = "sum")
- }
-
- #aggregate per strata group with the correspondance provided in input
- simu_PFG$new.strata<-NA
-
- #attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
- if(perStrata==F){
- simu_PFG$new.strata<-"A"
- }else if(perStrata==T){
- for(i in 1:length(list.strata.simulations)){
- simu_PFG$new.strata[is.element(simu_PFG$strata, list.strata.simulations[[i]])] = names(list.strata.simulations)[i]
+ #transform absolute abundance into relative abundance (no pb if all combination PFG*strata are not present, since then the value is 0!)
+ simu_PFG <- simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.abundance = round(prop.table(abs), digits = 2)) #those are proportions, not percentages
+ simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)] <- 0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
+ simu_PFG <- as.data.frame(simu_PFG)
+
+ #drop the absolute abundance
+ simu_PFG$abs<-NULL
+
+ #correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
+ simu_PFG$PFG<-as.factor(simu_PFG$PFG)
+ simu_PFG$PFG <- factor(simu_PFG$PFG, sort(unique(c(levels(simu_PFG$PFG)) RF.PFG))))
+ simu_PFG$strata<-as.factor(simu_PFG$strata)
+ simu_PFG$PFG <- factor(simu_PFG$PFG, sort(unique(c(levels(simu_PFG$strata), list.strata))))
+
+ #cast
+ simu_PFG<-reshape2::dcast(simu_PFG, pixel ~ PFG * strata, value.var = c("relative.abundance"), fill = 0, drop = FALSE)
+
+ #merge PFG info and habitat + transform habitat into factor
+
+ #here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
+ data.FATE.PFG.habitat <- merge(simu_PFG, habitat.whole.area.df, by = "pixel") #at this stage we have all the pixels in the simulation area
+ data.FATE.PFG.habitat$habitat <- factor(data.FATE.PFG.habitat$habitat, levels = RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
+
+ ############################
+ # III.2. Prediction of habitat with the RF algorithm
+ #################################
+
+ data.validation <- data.FATE.PFG.habitat[which(data.FATE.PFG.habitat$for.validation == 1), ]
+ x.validation <- dplyr::select(data.validation,all_of(RF.predictors)) ## TODO : change for classic colnames selection but with error message if not fullfilling all names ?
+ y.validation <- data.validation$habitat
+
+ y.validation.predicted <- predict(object = RF.model, newdata = x.validation, type = "response", norm.votes = TRUE)
+
+ ##############################
+ # III.3. Analysis of the results
+ ################################
+
+ confusion.validation <- confusionMatrix(data = y.validation.predicted
+ , reference = factor(y.validation, sort(unique(c(levels(y.validation)) levels(y.validation.predicted)))))
+
+ synthesis.validation <- data.frame(habitat = colnames(confusion.validation$table)
+ , sensitivity = confusion.validation$byClass[, 1]
+ , specificity = confusion.validation$byClass[, 2]
+ , weight = colSums(confusion.validation$table) / sum(colSums(confusion.validation$table)))
+ synthesis.validation <- synthesis.validation %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
+
+ aggregate.TSS.validation <- round(sum(synthesis.validation$weight * synthesis.validation$TSS, na.rm = TRUE), digits = 2)
+
+ ########################
+ # III.4. Predict habitat for the whole map if option selected (do it only for a small number of simulations)
+ ############################################
+
+ if (predict.all.map == TRUE) {
+ y.all.map.predicted = predict(object = RF.model, newdata = dplyr::select(data.FATE.PFG.habitat, all_of(RF.predictors)), type = "response", norm.votes = TRUE)
+ y.all.map.predicted = as.data.frame(y.all.map.predicted)
+ y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
+ colnames(y.all.map.predicted) = c(sim.version, "pixel")
+ } else {
+ y.all.map.predicted <- NULL
}
- simu_PFG$strata = NULL
- }
-
- simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
-
- #aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
- simu_PFG<-aggregate(abs~pixel+strata+PFG,data=simu_PFG,FUN="sum")
-
- #transform absolute abundance into relative abundance (no pb if all combination PFG*strata are not present, since then the value is 0!)
- simu_PFG<-simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.abundance= round(prop.table(abs),digits=2)) #those are proportions, not percentages
- simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
- simu_PFG<-as.data.frame(simu_PFG)
-
- #drop the absolute abundance
- simu_PFG$abs<-NULL
-
- #set a factor structure
- simu_PFG$PFG<-as.factor(simu_PFG$PFG)
- simu_PFG$strata<-as.factor(simu_PFG$strata)
-
- #correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
- simu_PFG$PFG<-fct_expand(simu_PFG$PFG,RF.PFG)
- simu_PFG$strata<-fct_expand(simu_PFG$strata,list.strata)
-
- #cast
- simu_PFG<-reshape2::dcast(simu_PFG,pixel~PFG*strata,value.var=c("relative.abundance"),fill=0,drop=F)
-
- #merge PFG info and habitat + transform habitat into factor
-
- #here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
- data.FATE.PFG.habitat<-merge(simu_PFG,habitat.whole.area.df,by="pixel") #at this stage we have all the pixels in the simulation area
- data.FATE.PFG.habitat$habitat<-factor(data.FATE.PFG.habitat$habitat,levels=RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
-
- ############################
- # III.2. Prediction of habitat with the RF algorithm
- #################################
-
- data.validation<-filter(data.FATE.PFG.habitat,for.validation==1)
- x.validation<-dplyr::select(data.validation,all_of(RF.predictors))
- y.validation<-data.validation$habitat
-
- y.validation.predicted<-predict(object=RF.model,newdata=x.validation,type="response",norm.votes=T)
-
- ##############################
- # III.3. Analysis of the results
- ################################
-
- confusion.validation<-confusionMatrix(data=y.validation.predicted,reference=fct_expand(y.validation,levels(y.validation.predicted)))
-
- synthesis.validation<-data.frame(habitat=colnames(confusion.validation$table),sensitivity=confusion.validation$byClass[,1],specificity=confusion.validation$byClass[,2],weight=colSums(confusion.validation$table)/sum(colSums(confusion.validation$table)))
- synthesis.validation<-synthesis.validation%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
-
- aggregate.TSS.validation<-round(sum(synthesis.validation$weight*synthesis.validation$TSS,na.rm=T),digits=2)
-
- ########################
- # III.4. Predict habitat for the whole map if option selected (do it only for a small number of simulations)
- ############################################
-
- if(predict.all.map==T){
- y.all.map.predicted = predict(object=RF.model,newdata=dplyr::select(data.FATE.PFG.habitat,all_of(RF.predictors)),type="response",norm.votes=T)
- y.all.map.predicted = as.data.frame(y.all.map.predicted)
- y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
- colnames(y.all.map.predicted) = c(sim.version, "pixel")
+ #prepare outputs
+ output.validation <- c(synthesis.validation$TSS, aggregate.TSS.validation)
+ names(output.validation) <- c(synthesis.validation$habitat, "aggregated")
- }else{
- y.all.map.predicted<-NULL
+ return(list(output.validation = output.validation
+ , y.all.map.predicted = y.all.map.predicted))
}
-
- #prepare outputs
-
- output.validation<-c(synthesis.validation$TSS,aggregate.TSS.validation)
- names(output.validation)<-c(synthesis.validation$habitat,"aggregated")
-
- output<-list(output.validation,y.all.map.predicted)
- names(output)<-c("output.validation","y.all.map.predicted")
-
- return(output)
- }
#end of the loop on simulations
#deal with the results regarding model performance
- habitat.performance<-as.data.frame(matrix(unlist(lapply(results.simul,"[[",1)),ncol=length(RF.model$classes)+1,byrow=T))
- names(habitat.performance)<-c(RF.model$classes,"weighted")
- habitat.performance$simulation<-sim.version
+ habitat.performance <- as.data.frame(matrix(unlist(lapply(results.simul, "[[", 1)), ncol = length(RF.model$classes) + 1, byrow = TRUE))
+ names(habitat.performance) <- c(RF.model$classes, "weighted")
+ habitat.performance$simulation <- sim.version
#save
- write.csv(habitat.performance,paste0(output.path,"/HABITAT/", sim.version, "/performance.habitat.csv"),row.names=F)
+ write.csv(habitat.performance, paste0(output.path, "/HABITAT/", sim.version, "/performance.habitat.csv"), row.names = FALSE)
print("habitat performance saved")
@@ -303,7 +298,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
all.map.prediction = rename(all.map.prediction,"true.habitat"="habitat")
#save
- write.csv(all.map.prediction,paste0(output.path,"/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names=F)
+ write.csv(all.map.prediction,paste0(output.path,"/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names = FALSE)
#return results
return(all.map.prediction)
From 5c282fcb6f58c5c70df1edc6f1f321a94d69324d Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Tue, 22 Mar 2022 15:59:57 +0100
Subject: [PATCH 067/176] small adjustments
---
R/UTILS.do_habitat_validation.R | 161 ++++++++++++++++----------------
1 file changed, 80 insertions(+), 81 deletions(-)
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index 95612fa..9e604bc 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -68,7 +68,7 @@
### END OF HEADER ##############################################################
-do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs, year, list.strata.releves, list.strata.simulations) {
+do.habitat.validation = function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs, year, list.strata.releves, list.strata.simulations) {
cat("\n ---------- FATE OUTPUT ANALYSIS \n")
@@ -80,39 +80,39 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
###########################
#check if strata definition used in the RF model is the same as the one used to analyze FATE output
- if(perStrata==T){
- if(all(base::intersect(names(list.strata.simulations), list.strata.releves)==names(list.strata.simulations))){
+ if(perStrata == T){
+ if(all(base::intersect(names(list.strata.simulations), list.strata.releves) == names(list.strata.simulations))){
list.strata = names(list.strata.simulations)
print("strata definition OK")
}else {
stop("wrong strata definition")
}
- }else if(perStrata==F){
- list.strata<-"all"
+ }else if(perStrata == F){
+ list.strata = "all"
}else{
stop("check 'perStrata' parameter and/or the names of strata in list.strata.releves & list.strata.simulation")
}
#initial consistency between habitat.FATE.map and validation.mask (do it before the adjustement of habitat.FATE.map)
- if(!compareCRS(habitat.FATE.map,validation.mask) | !all(res(habitat.FATE.map)==res(validation.mask))){
+ if(!compareCRS(habitat.FATE.map,validation.mask) | !all(res(habitat.FATE.map) == res(validation.mask))){
stop("please provide rasters with same crs and resolution for habitat.FATE.map and validation.mask")
}
#consistency between habitat.FATE.map and simulation.map
- if(!compareCRS(simulation.map,habitat.FATE.map)){
+ if(!compareCRS(simulation.map, habitat.FATE.map)){
print("reprojecting habitat.FATE.map to match simulation.map crs")
- habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
+ habitat.FATE.map = projectRaster(habitat.FATE.map, crs = crs(simulation.map))
}
- if(!all(res(habitat.FATE.map)==res(simulation.map))){
+ if(!all(res(habitat.FATE.map) == res(simulation.map))){
stop("provide habitat.FATE.map with same resolution as simulation.map")
}
- if(extent(simulation.map)!=extent(habitat.FATE.map)){
+ if(extent(simulation.map) != extent(habitat.FATE.map)){
print("cropping habitat.FATE.map to match simulation.map")
- habitat.FATE.map<-crop(x=habitat.FATE.map,y=simulation.map)
+ habitat.FATE.map = crop(x = habitat.FATE.map, y = simulation.map)
}
- if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
+ if(!all(origin(simulation.map) == origin(habitat.FATE.map))){
print("setting origin habitat.FATE.map to match simulation.map")
- raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
+ raster::origin(habitat.FATE.map) = raster::origin(simulation.map)
}
if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
stop("habitat.FATE.map could not be coerced to match simulation.map")
@@ -121,13 +121,13 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
#adjust validation.mask accordingly
- if(!all(res(habitat.FATE.map)==res(validation.mask))){
- validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
+ if(!all(res(habitat.FATE.map) == res(validation.mask))){
+ validation.mask = projectRaster(from = validation.mask, to = habitat.FATE.map, method = "ngb")
}
- if(extent(validation.mask)!=extent(habitat.FATE.map)){
- validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
+ if(extent(validation.mask) != extent(habitat.FATE.map)){
+ validation.mask = crop(x = validation.mask, y = habitat.FATE.map)
}
- if(!compareRaster(validation.mask,habitat.FATE.map)){
+ if(!compareRaster(validation.mask, habitat.FATE.map)){
stop("error in correcting validation.mask to match habitat.FATE.map")
}else{
print("validation.mask is (now) consistent with (modified) habitat.FATE.map")
@@ -135,12 +135,12 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#check consistency for PFG & strata classes between FATE output vs the RF model
- RF.predictors<-rownames(RF.model$importance)
- RF.PFG<-unique(str_sub(RF.predictors,1,2))
+ RF.predictors = rownames(RF.model$importance)
+ RF.PFG = unique(str_sub(RF.predictors,1,2))
- FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7)
+ FATE.PFG = str_sub(list.files(paste0(name.simulation, "/DATA/PFGS/SUCC")), 6, 7)
- if(length(setdiff(FATE.PFG,RF.PFG))>0|length(setdiff(RF.PFG,FATE.PFG))>0){
+ if(length(setdiff(FATE.PFG,RF.PFG)) > 0 | length(setdiff(RF.PFG,FATE.PFG)) > 0){
stop("The PFG used to train the RF algorithm are not the same as the PFG used to run FATE.")
}
@@ -150,147 +150,146 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#########################################################################################
#index of the pixels in the simulation area
- in.region.pixels<-which(getValues(simulation.map)==1)
+ in.region.pixels = which(getValues(simulation.map) == 1)
#habitat df for the whole simulation area
- habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
- habitat.whole.area.df<-habitat.whole.area.df[in.region.pixels,]
- habitat.whole.area.df<-subset(habitat.whole.area.df, for.validation!="NA")
- habitat.whole.area.df<-merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x="code.habitat", by.y="ID")
- habitat.whole.area.df<-filter(habitat.whole.area.df, is.element(habitat,RF.model$classes))
+ habitat.whole.area.df = data.frame(pixel = seq(from = 1, to = ncell(habitat.FATE.map), by = 1), code.habitat = getValues(habitat.FATE.map), for.validation = getValues(validation.mask))
+ habitat.whole.area.df = habitat.whole.area.df[in.region.pixels,]
+ habitat.whole.area.df = subset(habitat.whole.area.df, for.validation != "NA")
+ habitat.whole.area.df = merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]], c(ID, habitat)), by.x = "code.habitat", by.y = "ID")
+ habitat.whole.area.df = filter(habitat.whole.area.df, is.element(habitat, RF.model$classes))
- print(cat("Habitat considered in the prediction exercise: ",c(unique(habitat.whole.area.df$habitat)),"\n",sep="\t"))
+ print(cat("Habitat considered in the prediction exercise: ", c(unique(habitat.whole.area.df$habitat)), "\n", sep = "\t"))
print("Habitat in the simulation area:")
- table(habitat.whole.area.df$habitat,useNA="always")
+ table(habitat.whole.area.df$habitat, useNA = "always")
print("Habitat in the subpart of the simulation area used for validation:")
- table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation==1],useNA="always")
+ table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation == 1], useNA = "always")
##############################
# III. Loop on simulations
- #########################
+ ##############################
print("processing simulations")
registerDoParallel(detectCores()-2)
- results.simul <- foreach(i=1:length(all_of(sim.version))) %dopar%{
+ results.simul = foreach(i = 1:length(all_of(sim.version))) %dopar%{
- ########################"
+ #########################
# III.1. Data preparation
#########################
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
- if(perStrata==F){
+ if(perStrata == F){
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
+ simu_PFG = simu_PFG[,c("PFG", "ID.pixel", paste0("X", year))] #keep only the PFG, ID.pixel and abundance at any year columns
#careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
colnames(simu_PFG) = c("PFG", "pixel", "abs")
- } else if(perStrata==T){
+ } else if(perStrata == T){
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
+ simu_PFG = simu_PFG[,c("PFG", "ID.pixel", "strata", paste0("X", year))]
colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
}
#aggregate per strata group with the correspondance provided in input
- simu_PFG$new.strata<-NA
+ simu_PFG$new.strata = NA
#attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
- if(perStrata==F){
- simu_PFG$new.strata<-"A"
- }else if(perStrata==T){
+ if(perStrata == F){
+ simu_PFG$new.strata = "A"
+ }else if(perStrata == T){
for(i in 1:length(list.strata.simulations)){
simu_PFG$new.strata[is.element(simu_PFG$strata, list.strata.simulations[[i]])] = names(list.strata.simulations)[i]
}
simu_PFG$strata = NULL
}
- simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
+ simu_PFG<-dplyr::rename(simu_PFG, "strata" = "new.strata")
#aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
- simu_PFG<-aggregate(abs~pixel+strata+PFG,data=simu_PFG,FUN="sum")
+ simu_PFG = aggregate(abs ~ pixel + strata + PFG, data = simu_PFG, FUN = "sum")
#transform absolute abundance into relative abundance (no pb if all combination PFG*strata are not present, since then the value is 0!)
- simu_PFG<-simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.abundance= round(prop.table(abs),digits=2)) #those are proportions, not percentages
- simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
- simu_PFG<-as.data.frame(simu_PFG)
+ simu_PFG = simu_PFG %>% group_by(pixel, strata) %>% mutate(relative.abundance = round(prop.table(abs), digits = 2)) #those are proportions, not percentages
+ simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)] = 0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
+ simu_PFG = as.data.frame(simu_PFG)
#drop the absolute abundance
- simu_PFG$abs<-NULL
+ simu_PFG$abs = NULL
#set a factor structure
- simu_PFG$PFG<-as.factor(simu_PFG$PFG)
- simu_PFG$strata<-as.factor(simu_PFG$strata)
+ simu_PFG$PFG = as.factor(simu_PFG$PFG)
+ simu_PFG$strata = as.factor(simu_PFG$strata)
#correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
- simu_PFG$PFG<-fct_expand(simu_PFG$PFG,RF.PFG)
- simu_PFG$strata<-fct_expand(simu_PFG$strata,list.strata)
+ simu_PFG$PFG = fct_expand(simu_PFG$PFG, RF.PFG)
+ simu_PFG$strata = fct_expand(simu_PFG$strata, list.strata)
#cast
- simu_PFG<-reshape2::dcast(simu_PFG,pixel~PFG*strata,value.var=c("relative.abundance"),fill=0,drop=F)
+ simu_PFG = reshape2::dcast(simu_PFG, pixel ~ PFG * strata, value.var = c("relative.abundance"), fill = 0, drop = F)
#merge PFG info and habitat + transform habitat into factor
#here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
- data.FATE.PFG.habitat<-merge(simu_PFG,habitat.whole.area.df,by="pixel") #at this stage we have all the pixels in the simulation area
- data.FATE.PFG.habitat$habitat<-factor(data.FATE.PFG.habitat$habitat,levels=RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
+ data.FATE.PFG.habitat = merge(simu_PFG, habitat.whole.area.df, by = "pixel") #at this stage we have all the pixels in the simulation area
+ data.FATE.PFG.habitat$habitat = factor(data.FATE.PFG.habitat$habitat, levels = RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
- ############################
+ #####################################################
# III.2. Prediction of habitat with the RF algorithm
- #################################
+ #####################################################
- data.validation<-filter(data.FATE.PFG.habitat,for.validation==1)
- x.validation<-dplyr::select(data.validation,all_of(RF.predictors))
- y.validation<-data.validation$habitat
+ data.validation = filter(data.FATE.PFG.habitat, for.validation == 1)
+ x.validation = dplyr::select(data.validation, all_of(RF.predictors))
+ y.validation = data.validation$habitat
- y.validation.predicted<-predict(object=RF.model,newdata=x.validation,type="response",norm.votes=T)
+ y.validation.predicted = predict(object = RF.model, newdata = x.validation, type = "response", norm.votes = T)
- ##############################
+ ################################
# III.3. Analysis of the results
################################
- confusion.validation<-confusionMatrix(data=y.validation.predicted,reference=fct_expand(y.validation,levels(y.validation.predicted)))
+ confusion.validation = confusionMatrix(data = y.validation.predicted, reference = fct_expand(y.validation, levels(y.validation.predicted)))
- synthesis.validation<-data.frame(habitat=colnames(confusion.validation$table),sensitivity=confusion.validation$byClass[,1],specificity=confusion.validation$byClass[,2],weight=colSums(confusion.validation$table)/sum(colSums(confusion.validation$table)))
- synthesis.validation<-synthesis.validation%>%mutate(TSS=round(sensitivity+specificity-1,digits=2))
+ synthesis.validation = data.frame(habitat = colnames(confusion.validation$table), sensitivity = confusion.validation$byClass[,1], specificity = confusion.validation$byClass[,2], weight = colSums(confusion.validation$table)/sum(colSums(confusion.validation$table)))
+ synthesis.validation = synthesis.validation %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
- aggregate.TSS.validation<-round(sum(synthesis.validation$weight*synthesis.validation$TSS,na.rm=T),digits=2)
+ aggregate.TSS.validation = round(sum(synthesis.validation$weight * synthesis.validation$TSS, na.rm=T), digits = 2)
- ########################
+ #############################################################################################################
# III.4. Predict habitat for the whole map if option selected (do it only for a small number of simulations)
- ############################################
+ #############################################################################################################
- if(predict.all.map==T){
-
- y.all.map.predicted = predict(object=RF.model,newdata=dplyr::select(data.FATE.PFG.habitat,all_of(RF.predictors)),type="response",norm.votes=T)
+ if(predict.all.map == T){
+ y.all.map.predicted = predict(object = RF.model, newdata = dplyr::select(data.FATE.PFG.habitat, all_of(RF.predictors)), type = "response", norm.votes = T)
y.all.map.predicted = as.data.frame(y.all.map.predicted)
y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
colnames(y.all.map.predicted) = c(sim.version, "pixel")
-
}else{
y.all.map.predicted<-NULL
}
#prepare outputs
- output.validation<-c(synthesis.validation$TSS,aggregate.TSS.validation)
- names(output.validation)<-c(synthesis.validation$habitat,"aggregated")
+ output.validation = c(synthesis.validation$TSS, aggregate.TSS.validation)
+ names(output.validation) = c(synthesis.validation$habitat, "aggregated")
- output<-list(output.validation,y.all.map.predicted)
- names(output)<-c("output.validation","y.all.map.predicted")
+ output = list(output.validation, y.all.map.predicted)
+ names(output) = c("output.validation", "y.all.map.predicted")
return(output)
}
+
#end of the loop on simulations
#deal with the results regarding model performance
- habitat.performance<-as.data.frame(matrix(unlist(lapply(results.simul,"[[",1)),ncol=length(RF.model$classes)+1,byrow=T))
- names(habitat.performance)<-c(RF.model$classes,"weighted")
- habitat.performance$simulation<-sim.version
+ habitat.performance = as.data.frame(matrix(unlist(lapply(results.simul, "[[", 1)), ncol = length(RF.model$classes) + 1, byrow = T))
+ names(habitat.performance) = c(RF.model$classes, "weighted")
+ habitat.performance$simulation = sim.version
#save
write.csv(habitat.performance,paste0(output.path,"/HABITAT/", sim.version, "/performance.habitat.csv"),row.names=F)
@@ -300,10 +299,10 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#deal with the results regarding habitat prediction over the whole map
all.map.prediction = results.simul[[1]]$y.all.map.predicted
all.map.prediction = merge(all.map.prediction, dplyr::select(habitat.whole.area.df, c(pixel,habitat)), by = "pixel")
- all.map.prediction = rename(all.map.prediction,"true.habitat"="habitat")
+ all.map.prediction = rename(all.map.prediction, "true.habitat" = "habitat")
#save
- write.csv(all.map.prediction,paste0(output.path,"/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names=F)
+ write.csv(all.map.prediction, paste0(output.path, "/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names = F)
#return results
return(all.map.prediction)
From 70ded50178cded339d16e0e6200992781de3d8f7 Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Tue, 22 Mar 2022 16:54:03 +0100
Subject: [PATCH 068/176] Correction and adjustments in do.habitat.validation
(maxime)
---
R/UTILS.do_habitat_validation.R | 242 ++++++--------------------------
1 file changed, 40 insertions(+), 202 deletions(-)
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index d410e12..c7f76a1 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -67,14 +67,10 @@
### END OF HEADER ##############################################################
-<<<<<<< HEAD
-do.habitat.validation = function(output.path, RF.model, habitat.FATE.map, validation.mask, simulation.map, predict.all.map, sim.version, name.simulation, perStrata, hab.obs, year, list.strata.releves, list.strata.simulations) {
-=======
do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask
, simulation.map, predict.all.map, sim.version, name.simulation
, perStrata, hab.obs, year, list.strata.releves, list.strata.simulations)
{
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
cat("\n ---------- FATE OUTPUT ANALYSIS \n")
@@ -86,27 +82,16 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
###########################
#check if strata definition used in the RF model is the same as the one used to analyze FATE output
-<<<<<<< HEAD
- if(perStrata == T){
- if(all(base::intersect(names(list.strata.simulations), list.strata.releves) == names(list.strata.simulations))){
-=======
if (perStrata == TRUE) {
if (all(intersect(names(list.strata.simulations), list.strata.releves) == names(list.strata.simulations))) {
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
list.strata = names(list.strata.simulations)
print("strata definition OK")
} else {
stop("wrong strata definition")
}
-<<<<<<< HEAD
- }else if(perStrata == F){
- list.strata = "all"
- }else{
-=======
} else if (perStrata == FALSE) {
list.strata <- "all"
} else {
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
stop("check 'perStrata' parameter and/or the names of strata in list.strata.releves & list.strata.simulation")
}
@@ -116,38 +101,23 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
#consistency between habitat.FATE.map and simulation.map
-<<<<<<< HEAD
- if(!compareCRS(simulation.map, habitat.FATE.map)){
- print("reprojecting habitat.FATE.map to match simulation.map crs")
- habitat.FATE.map = projectRaster(habitat.FATE.map, crs = crs(simulation.map))
- }
- if(!all(res(habitat.FATE.map) == res(simulation.map))){
-=======
## MUST BE DONE before
# if(!compareCRS(simulation.map,habitat.FATE.map)){
# print("reprojecting habitat.FATE.map to match simulation.map crs")
# habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
# }
if(!all(res(habitat.FATE.map)==res(simulation.map))){
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
stop("provide habitat.FATE.map with same resolution as simulation.map")
}
if(extent(simulation.map) != extent(habitat.FATE.map)){
print("cropping habitat.FATE.map to match simulation.map")
habitat.FATE.map = crop(x = habitat.FATE.map, y = simulation.map)
}
-<<<<<<< HEAD
- if(!all(origin(simulation.map) == origin(habitat.FATE.map))){
- print("setting origin habitat.FATE.map to match simulation.map")
- raster::origin(habitat.FATE.map) = raster::origin(simulation.map)
- }
-=======
## MUST BE DONE before
# if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
# print("setting origin habitat.FATE.map to match simulation.map")
# raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
# }
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
stop("habitat.FATE.map could not be coerced to match simulation.map")
}else{
@@ -155,20 +125,12 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
#adjust validation.mask accordingly
-<<<<<<< HEAD
- if(!all(res(habitat.FATE.map) == res(validation.mask))){
- validation.mask = projectRaster(from = validation.mask, to = habitat.FATE.map, method = "ngb")
- }
- if(extent(validation.mask) != extent(habitat.FATE.map)){
- validation.mask = crop(x = validation.mask, y = habitat.FATE.map)
-=======
## MUST BE DONE before ?
# if(!all(res(habitat.FATE.map)==res(validation.mask))){
# validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
# }
if(extent(validation.mask)!=extent(habitat.FATE.map)){
validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
}
if(!compareRaster(validation.mask, habitat.FATE.map)){
stop("error in correcting validation.mask to match habitat.FATE.map")
@@ -177,18 +139,11 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
#check consistency for PFG & strata classes between FATE output vs the RF model
-
-<<<<<<< HEAD
- RF.predictors = rownames(RF.model$importance)
- RF.PFG = unique(str_sub(RF.predictors,1,2))
-
- FATE.PFG = str_sub(list.files(paste0(name.simulation, "/DATA/PFGS/SUCC")), 6, 7)
-=======
+
RF.predictors <- rownames(RF.model$importance)
RF.PFG <- unique(str_sub(RF.predictors, 1, 2))
FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7) ## TODO : careful, will not match necessarily all PFG names
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
if(length(setdiff(FATE.PFG,RF.PFG)) > 0 | length(setdiff(RF.PFG,FATE.PFG)) > 0){
stop("The PFG used to train the RF algorithm are not the same as the PFG used to run FATE.")
@@ -199,17 +154,6 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#II. Prepare database for FATE habitat
#########################################################################################
-<<<<<<< HEAD
- #index of the pixels in the simulation area
- in.region.pixels = which(getValues(simulation.map) == 1)
-
- #habitat df for the whole simulation area
- habitat.whole.area.df = data.frame(pixel = seq(from = 1, to = ncell(habitat.FATE.map), by = 1), code.habitat = getValues(habitat.FATE.map), for.validation = getValues(validation.mask))
- habitat.whole.area.df = habitat.whole.area.df[in.region.pixels,]
- habitat.whole.area.df = subset(habitat.whole.area.df, for.validation != "NA")
- habitat.whole.area.df = merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]], c(ID, habitat)), by.x = "code.habitat", by.y = "ID")
- habitat.whole.area.df = filter(habitat.whole.area.df, is.element(habitat, RF.model$classes))
-=======
#habitat df for the whole simulation area
habitat.whole.area.df <- data.frame(pixel = seq(1, ncell(habitat.FATE.map), 1)
, code.habitat = getValues(habitat.FATE.map)
@@ -218,7 +162,6 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
habitat.whole.area.df <- habitat.whole.area.df[which(!is.na(habitat.whole.area.df$for.validation)), ]
habitat.whole.area.df <- merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x = "code.habitat", by.y = "ID")
habitat.whole.area.df <- habitat.whole.area.df[which(habitat.whole.area.df$habitat %in% RF.model$classes), ]
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
print(cat("Habitat considered in the prediction exercise: ", c(unique(habitat.whole.area.df$habitat)), "\n", sep = "\t"))
@@ -227,10 +170,6 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
print("Habitat in the subpart of the simulation area used for validation:")
table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation == 1], useNA = "always")
-<<<<<<< HEAD
-=======
-
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
##############################
# III. Loop on simulations
@@ -238,126 +177,18 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
print("processing simulations")
-<<<<<<< HEAD
- registerDoParallel(detectCores()-2)
- results.simul = foreach(i = 1:length(all_of(sim.version))) %dopar%{
-
- #########################
- # III.1. Data preparation
- #########################
-
- #get simulated abundance per pixel*strata*PFG for pixels in the simulation area
- if(perStrata == F){
-
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG", "ID.pixel", paste0("X", year))] #keep only the PFG, ID.pixel and abundance at any year columns
- #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
-
- } else if(perStrata == T){
-
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG", "ID.pixel", "strata", paste0("X", year))]
- colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
-
- }
-
- #aggregate per strata group with the correspondance provided in input
- simu_PFG$new.strata = NA
-
- #attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
- if(perStrata == F){
- simu_PFG$new.strata = "A"
- }else if(perStrata == T){
- for(i in 1:length(list.strata.simulations)){
- simu_PFG$new.strata[is.element(simu_PFG$strata, list.strata.simulations[[i]])] = names(list.strata.simulations)[i]
- }
- simu_PFG$strata = NULL
- }
-
- simu_PFG<-dplyr::rename(simu_PFG, "strata" = "new.strata")
-
- #aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
- simu_PFG = aggregate(abs ~ pixel + strata + PFG, data = simu_PFG, FUN = "sum")
-
- #transform absolute abundance into relative abundance (no pb if all combination PFG*strata are not present, since then the value is 0!)
- simu_PFG = simu_PFG %>% group_by(pixel, strata) %>% mutate(relative.abundance = round(prop.table(abs), digits = 2)) #those are proportions, not percentages
- simu_PFG$relative.abundance[is.na(simu_PFG$relative.abundance)] = 0 #NA because abs==0 for some PFG, so put 0 instead of NA (necessary to avoid risk of confusion with NA in pixels because out of the map)
- simu_PFG = as.data.frame(simu_PFG)
-
- #drop the absolute abundance
- simu_PFG$abs = NULL
-
- #set a factor structure
- simu_PFG$PFG = as.factor(simu_PFG$PFG)
- simu_PFG$strata = as.factor(simu_PFG$strata)
-
- #correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
- simu_PFG$PFG = fct_expand(simu_PFG$PFG, RF.PFG)
- simu_PFG$strata = fct_expand(simu_PFG$strata, list.strata)
-
- #cast
- simu_PFG = reshape2::dcast(simu_PFG, pixel ~ PFG * strata, value.var = c("relative.abundance"), fill = 0, drop = F)
-
- #merge PFG info and habitat + transform habitat into factor
-
- #here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
- data.FATE.PFG.habitat = merge(simu_PFG, habitat.whole.area.df, by = "pixel") #at this stage we have all the pixels in the simulation area
- data.FATE.PFG.habitat$habitat = factor(data.FATE.PFG.habitat$habitat, levels = RF.model$classes) #thanks to the "levels" argument, we have the same order for the habitat factor in the RF model and in the FATE outputs
-
- #####################################################
- # III.2. Prediction of habitat with the RF algorithm
- #####################################################
-
- data.validation = filter(data.FATE.PFG.habitat, for.validation == 1)
- x.validation = dplyr::select(data.validation, all_of(RF.predictors))
- y.validation = data.validation$habitat
-
- y.validation.predicted = predict(object = RF.model, newdata = x.validation, type = "response", norm.votes = T)
-
- ################################
- # III.3. Analysis of the results
- ################################
-
- confusion.validation = confusionMatrix(data = y.validation.predicted, reference = fct_expand(y.validation, levels(y.validation.predicted)))
-
- synthesis.validation = data.frame(habitat = colnames(confusion.validation$table), sensitivity = confusion.validation$byClass[,1], specificity = confusion.validation$byClass[,2], weight = colSums(confusion.validation$table)/sum(colSums(confusion.validation$table)))
- synthesis.validation = synthesis.validation %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
-
- aggregate.TSS.validation = round(sum(synthesis.validation$weight * synthesis.validation$TSS, na.rm=T), digits = 2)
-
- #############################################################################################################
- # III.4. Predict habitat for the whole map if option selected (do it only for a small number of simulations)
- #############################################################################################################
-
- if(predict.all.map == T){
- y.all.map.predicted = predict(object = RF.model, newdata = dplyr::select(data.FATE.PFG.habitat, all_of(RF.predictors)), type = "response", norm.votes = T)
- y.all.map.predicted = as.data.frame(y.all.map.predicted)
- y.all.map.predicted$pixel = data.FATE.PFG.habitat$pixel
- colnames(y.all.map.predicted) = c(sim.version, "pixel")
- }else{
- y.all.map.predicted<-NULL
- }
-
- #prepare outputs
-
- output.validation = c(synthesis.validation$TSS, aggregate.TSS.validation)
- names(output.validation) = c(synthesis.validation$habitat, "aggregated")
-
- output = list(output.validation, y.all.map.predicted)
- names(output) = c("output.validation", "y.all.map.predicted")
-
- return(output)
- }
-
- #end of the loop on simulations
- #deal with the results regarding model performance
- habitat.performance = as.data.frame(matrix(unlist(lapply(results.simul, "[[", 1)), ncol = length(RF.model$classes) + 1, byrow = T))
- names(habitat.performance) = c(RF.model$classes, "weighted")
- habitat.performance$simulation = sim.version
-=======
# registerDoParallel(detectCores()-2) ## TODO : put as optional (like in zip/unzip function)
+ if (opt.no_CPU > 1)
+ {
+ if (.getOS() != "windows")
+ {
+ registerDoParallel(cores = opt.no_CPU)
+ } else
+ {
+ warning("Parallelisation with `foreach` is not available for Windows. Sorry.")
+ }
+ }
results.simul <- foreach(i = 1:length(all_of(sim.version))) %dopar%
{
@@ -367,24 +198,36 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
if (perStrata == FALSE) {
- ## TODO : add test if file exists
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
- #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
- simu_PFG$strata <- "A"
+ ## TODO : add test if file exists
+ if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv")))
+ {
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))] #keep only the PFG, ID.pixel and abundance at any year columns
+ #careful : the number of abundance data files to save is to defined in POST_FATE.temporal.evolution function
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ simu_PFG$strata <- "A"
+ }else
+ {
+ stop("Simulated abundance file does not exist")
+ }
} else if (perStrata == TRUE) {
- ## TODO : add test if file exists
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
- simu_PFG = simu_PFG[, c("PFG", "ID.pixel", "strata", paste0("X", year))]
- colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
- new.strata <- rep(NA, nrow(simu_PFG))
- for (i in 1:length(list.strata.simulations)) {
- ind = which(simu_PFG$strata %in% list.strata.simulations[[i]])
- new.strata[ind] = names(list.strata.simulations)[i]
+ ## TODO : add test if file exists
+ if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv")))
+ {
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[, c("PFG", "ID.pixel", "strata", paste0("X", year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+ new.strata <- rep(NA, nrow(simu_PFG))
+ for (i in 1:length(list.strata.simulations)) {
+ ind = which(simu_PFG$strata %in% list.strata.simulations[[i]])
+ new.strata[ind] = names(list.strata.simulations)[i]
+ }
+ simu_PFG$strata = new.strata
+ }else
+ {
+ stop("Simulated abundance file does not exist")
}
- simu_PFG$strata = new.strata
}
## SIMILAR to what was done in train_RF_habitat for the releves
@@ -401,7 +244,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#correct the levels (to have all PFG and all strata) to make the dcast transfo easier (all PFG*strata combination will be automatically created thanks to the factor structure, even if no line corresponds to it)
simu_PFG$PFG<-as.factor(simu_PFG$PFG)
- simu_PFG$PFG <- factor(simu_PFG$PFG, sort(unique(c(levels(simu_PFG$PFG)) RF.PFG))))
+ simu_PFG$PFG <- factor(simu_PFG$PFG, sort(unique(c(levels(simu_PFG$PFG), RF.PFG))))
simu_PFG$strata<-as.factor(simu_PFG$strata)
simu_PFG$PFG <- factor(simu_PFG$PFG, sort(unique(c(levels(simu_PFG$strata), list.strata))))
@@ -429,7 +272,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
################################
confusion.validation <- confusionMatrix(data = y.validation.predicted
- , reference = factor(y.validation, sort(unique(c(levels(y.validation)) levels(y.validation.predicted)))))
+ , reference = factor(y.validation, sort(unique(c(levels(y.validation), levels(y.validation.predicted))))))
synthesis.validation <- data.frame(habitat = colnames(confusion.validation$table)
, sensitivity = confusion.validation$byClass[, 1]
@@ -465,7 +308,6 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
habitat.performance <- as.data.frame(matrix(unlist(lapply(results.simul, "[[", 1)), ncol = length(RF.model$classes) + 1, byrow = TRUE))
names(habitat.performance) <- c(RF.model$classes, "weighted")
habitat.performance$simulation <- sim.version
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
#save
write.csv(habitat.performance, paste0(output.path, "/HABITAT/", sim.version, "/performance.habitat.csv"), row.names = FALSE)
@@ -478,11 +320,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
all.map.prediction = rename(all.map.prediction, "true.habitat" = "habitat")
#save
-<<<<<<< HEAD
- write.csv(all.map.prediction, paste0(output.path, "/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names = F)
-=======
write.csv(all.map.prediction,paste0(output.path,"/HABITAT/", sim.version, "/habitat.prediction.csv"), row.names = FALSE)
->>>>>>> ba8772fbee0e3c8df936af257ed6dc39ce909816
#return results
return(all.map.prediction)
From 11cf905258bc691f4540a9fe1404ff128f0340d7 Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Tue, 22 Mar 2022 16:54:56 +0100
Subject: [PATCH 069/176] small adjustments
---
R/POST_FATE.validation.R | 71 ++++++++++++--------
R/UTILS.plot_predicted_habitat.R | 90 ++++++++++++-------------
R/UTILS.train_RF_habitat.R | 111 ++++++++++++-------------------
3 files changed, 131 insertions(+), 141 deletions(-)
diff --git a/R/POST_FATE.validation.R b/R/POST_FATE.validation.R
index 86c9daa..7c940a0 100644
--- a/R/POST_FATE.validation.R
+++ b/R/POST_FATE.validation.R
@@ -227,9 +227,9 @@ POST_FATE.validation = function(name.simulation
releves.sites = st_read(paste0(obs.path, releves.sites))
hab.obs = raster(paste0(obs.path, hab.obs))
# Habitat mask at FATE simu resolution
- hab.obs.modif <- projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
- habitat.FATE.map <- crop(hab.obs.modif, simulation.map) #reprojection and croping of the extended habitat map in order to have a reduced observed habitat map
- validation.mask<-raster(paste0(obs.path, validation.mask))
+ hab.obs.modif = projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
+ habitat.FATE.map = crop(hab.obs.modif, simulation.map) #reprojection and croping of the extended habitat map in order to have a reduced observed habitat map
+ validation.mask = raster(paste0(obs.path, validation.mask))
# Other
if(is.null(studied.habitat)){
@@ -240,13 +240,13 @@ POST_FATE.validation = function(name.simulation
stop("studied.habitat is not a vector of character")
}
RF.param = list(
- share.training=0.7,
- ntree=500)
- predict.all.map<-T
+ share.training = 0.7,
+ ntree = 500)
+ predict.all.map = T
## TRAIN A RF ON OBSERVED DATA
- RF.model <- train.RF.habitat(releves.PFG = releves.PFG
+ RF.model = train.RF.habitat(releves.PFG = releves.PFG
, releves.sites = releves.sites
, hab.obs = hab.obs
, external.training.mask = NULL
@@ -258,7 +258,7 @@ POST_FATE.validation = function(name.simulation
## USE THE RF MODEL TO VALIDATE FATE OUTPUT
- habitats.results <- do.habitat.validation(output.path = output.path
+ habitats.results = do.habitat.validation(output.path = output.path
, RF.model = RF.model
, habitat.FATE.map = habitat.FATE.map
, validation.mask = validation.mask
@@ -275,12 +275,12 @@ POST_FATE.validation = function(name.simulation
## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
# Provide a color df
- col.df<-data.frame(
+ col.df = data.frame(
habitat = RF.model$classes,
failure = terrain.colors(length(RF.model$classes), alpha = 0.5),
success = terrain.colors(length(RF.model$classes), alpha = 1))
- prediction.map <- plot.predicted.habitat(predicted.habitat = habitats.results
+ prediction.map = plot.predicted.habitat(predicted.habitat = habitats.results
, col.df = col.df
, simulation.map = simulation.map
, output.path = output.path
@@ -356,18 +356,18 @@ POST_FATE.validation = function(name.simulation
perStrata = perStrata
#list of PFG of interest
- list.PFG<-setdiff(list.PFG,exclude.PFG)
+ list.PFG = setdiff(list.PFG,exclude.PFG)
registerDoParallel(detectCores()-2)
- dying.PFG.list<-foreach(i=1:length(sim.version)) %dopar% {
+ dying.PFG.list = foreach(i=1:length(sim.version)) %dopar% {
- if(perStrata==F){
+ if(perStrata == F){
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
colnames(simu_PFG) = c("PFG", "pixel", "abs")
- } else if(perStrata==T){
+ } else if(perStrata == T){
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
@@ -382,22 +382,22 @@ POST_FATE.validation = function(name.simulation
names(dying.PFG.list) = sim.version
#get table with PFG richness
- PFG.richness.df<-data.frame(simulation=names(dying.PFG.list),richness=length(list.PFG)-unlist(lapply(dying.PFG.list,FUN="length")))
+ PFG.richness.df = data.frame(simulation = names(dying.PFG.list), richness = length(list.PFG) - unlist(lapply(dying.PFG.list, FUN = "length")))
#get vector with one occurence per PFG*simulation with dying of the PFG, as factor with completed levels in order to have table with all PFG, including those which never die
- dyingPFG.vector<-as.factor(unlist(dying.PFG.list))
- dyingPFG.vector<-fct_expand(dyingPFG.vector,list.PFG)
- dying.distribution<-round(table(dyingPFG.vector)/length(sim.version),digits=2)
+ dyingPFG.vector = as.factor(unlist(dying.PFG.list))
+ dyingPFG.vector = fct_expand(dyingPFG.vector, list.PFG)
+ dying.distribution = round(table(dyingPFG.vector)/length(sim.version), digits = 2)
#output
- output = list(PFG.richness.df, dying.distribution ,dying.PFG.list)
- names(output)<-c("PFG.richness.df","dying.distribution","dying.PFG.list")
+ output = list(PFG.richness.df, dying.distribution , dying.PFG.list)
+ names(output) = c("PFG.richness.df", "dying.distribution", "dying.PFG.list")
dir.create(output.path,recursive = TRUE, showWarnings = FALSE)
- write.csv(PFG.richness.df,paste0(output.path,"/performance.richness.csv"),row.names=F)
- write.csv(dying.distribution,paste0(output.path,"/PFG.extinction.frequency.csv"),row.names=F)
- write_rds(dying.PFG.list,file=paste0(output.path,"/dying.PFG.list.rds"),compress="none")
+ write.csv(PFG.richness.df, paste0(output.path, "/performance.richness.csv"), row.names = F)
+ write.csv(dying.distribution, paste0(output.path, "/PFG.extinction.frequency.csv"), row.names = F)
+ write_rds(dying.PFG.list, file = paste0(output.path, "/dying.PFG.list.rds"), compress = "none")
}
@@ -406,24 +406,43 @@ POST_FATE.validation = function(name.simulation
cat("\n #------------------------------------------------------------# \n")
if(doRichness == TRUE){
+
cat("\n ---------- PFG RICHNESS : \n")
cat(paste0("\n Richness at year ", year, " : ", output[[1]][2], "\n"))
cat(paste0("\n Number of PFG extinction at year ", year, " : ", sum(output[[2]]), "\n"))
- } else{cat("\n ---------- PFG RICHNESS VALIDATION DISABLED \n")
+
+ } else{
+
+ cat("\n ---------- PFG RICHNESS VALIDATION DISABLED \n")
+
}
+
if(doHabitat == TRUE){
+
hab.pred = read.csv(paste0(name.simulation, "/VALIDATION/HABITAT/", sim.version, "/hab.pred.csv"))
failure = as.numeric((table(hab.pred$prediction.code)[1]/sum(table(hab.pred$prediction.code)))*100)
success = as.numeric((table(hab.pred$prediction.code)[2]/sum(table(hab.pred$prediction.code)))*100)
+
cat("\n ---------- HABITAT : \n")
cat(paste0("\n", round(failure, digits = 2), "% of habitats are not correctly predicted by ", sim.version, " \n"))
cat(paste0("\n", round(success, digits = 2), "% of habitats are correctly predicted by ", sim.version, " \n"))
plot(prediction.map)
- } else{cat("\n ---------- HABITAT VALIDATION DISABLED \n")
+
+ } else{
+
+ cat("\n ---------- HABITAT VALIDATION DISABLED \n")
+
}
+
if(doComposition == TRUE){
+
cat("\n ---------- PFG COMPOSITION : \n")
return(performance.composition)
- } else{cat("\n ---------- PFG COMPOSITION VALIDATION DISABLED \n")
+
+ } else{
+
+ cat("\n ---------- PFG COMPOSITION VALIDATION DISABLED \n")
+
}
+
}
diff --git a/R/UTILS.plot_predicted_habitat.R b/R/UTILS.plot_predicted_habitat.R
index 7e3e65e..10cb65b 100644
--- a/R/UTILS.plot_predicted_habitat.R
+++ b/R/UTILS.plot_predicted_habitat.R
@@ -46,7 +46,7 @@
### END OF HEADER ##############################################################
-plot.predicted.habitat<-function(predicted.habitat
+plot.predicted.habitat = function(predicted.habitat
, col.df
, simulation.map
, output.path
@@ -56,94 +56,94 @@ plot.predicted.habitat<-function(predicted.habitat
cat("\n ---------- AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT \n")
#auxiliary function to compute the proportion of simulations lead to the modal prediction
- count.habitat<-function(df){
- index<-which(names(df)=="modal.predicted.habitat")
- prop.simu<-sum(df[-index]==as.character(df[index]))/(length(names(df))-1)
+ count.habitat = function(df){
+ index = which(names(df) == "modal.predicted.habitat")
+ prop.simu = sum(df[-index] == as.character(df[index]))/(length(names(df))-1)
return(prop.simu)
}
#compute modal predicted habitat and the proportion of simulations predicting this habitat (for each pixel)
- predicted.habitat$modal.predicted.habitat<-apply(dplyr::select(predicted.habitat,sim.version),1,Mode)
- predicted.habitat$modal.predicted.habitat[predicted.habitat$modal.predicted.habitat==">1 mode"]<-"ambiguous"
- predicted.habitat$confidence<-apply(dplyr::select(predicted.habitat,c(all_of(sim.version),modal.predicted.habitat)),1,FUN=function(x) count.habitat(x))
+ predicted.habitat$modal.predicted.habitat = apply(dplyr::select(predicted.habitat, sim.version), 1, Mode)
+ predicted.habitat$modal.predicted.habitat[predicted.habitat$modal.predicted.habitat == ">1 mode"] = "ambiguous"
+ predicted.habitat$confidence <- apply(dplyr::select(predicted.habitat, c(all_of(sim.version), modal.predicted.habitat)), 1 , FUN = function(x) count.habitat(x))
#true/false prediction
- predicted.habitat$prediction.code<-"failure"
- predicted.habitat$prediction.code[predicted.habitat$modal.predicted.habitat==predicted.habitat$true.habitat]<-"success"
+ predicted.habitat$prediction.code = "failure"
+ predicted.habitat$prediction.code[predicted.habitat$modal.predicted.habitat == predicted.habitat$true.habitat] = "success"
#prepare a df containing color & habitat code (to facilitate conversion into raster)
- col.df.long<-data.table::melt(data=setDT(col.df),id.vars="habitat",variable.name="prediction.code",value.name="color")
+ col.df.long = data.table::melt(data = setDT(col.df), id.vars = "habitat", variable.name = "prediction.code", value.name = "color")
- habitat.code.df<-unique(dplyr::select(predicted.habitat,c(modal.predicted.habitat,prediction.code)))
- habitat.code.df$habitat.code<-seq(from=1,to=dim(habitat.code.df)[1],by=1)
- habitat.code.df<-rename(habitat.code.df,"habitat"="modal.predicted.habitat")
+ habitat.code.df = unique(dplyr::select(predicted.habitat, c(modal.predicted.habitat, prediction.code)))
+ habitat.code.df$habitat.code = seq(from = 1, to = dim(habitat.code.df)[1], by = 1)
+ habitat.code.df = rename(habitat.code.df, "habitat" = "modal.predicted.habitat")
- habitat.code.df<-merge(habitat.code.df,col.df.long,by=c("habitat","prediction.code"))
- habitat.code.df$label<-paste0(habitat.code.df$habitat," (",habitat.code.df$prediction.code,")")
+ habitat.code.df = merge(habitat.code.df, col.df.long, by = c("habitat", "prediction.code"))
+ habitat.code.df$label = paste0(habitat.code.df$habitat, " (", habitat.code.df$prediction.code, ")")
#deal with out of scope habitat
- out.of.scope<-data.frame(habitat="out.of.scope",prediction.code="",habitat.code=0,color="white",label="out of scope")
- habitat.code.df<-rbind(habitat.code.df,out.of.scope)
+ out.of.scope = data.frame(habitat = "out.of.scope", prediction.code = "", habitat.code = 0, color = "white", label = "out of scope")
+ habitat.code.df = rbind(habitat.code.df, out.of.scope)
- habitat.code.df$label<-as.factor(habitat.code.df$label)
+ habitat.code.df$label = as.factor(habitat.code.df$label)
#order the df
- habitat.code.df<-habitat.code.df[order(habitat.code.df$label),] #to be sure it's in te right order (useful to ensure the correspondance between label and color in the ggplot function)
+ habitat.code.df = habitat.code.df[order(habitat.code.df$label),] #to be sure it's in te right order (useful to ensure the correspondance between label and color in the ggplot function)
#merge the prediction df with the df containing color and habitat code
- predicted.habitat<-merge(predicted.habitat,habitat.code.df,by.x=c("modal.predicted.habitat","prediction.code"),by.y=c("habitat","prediction.code"))
+ predicted.habitat = merge(predicted.habitat, habitat.code.df, by.x = c("modal.predicted.habitat", "prediction.code"), by.y = c("habitat", "prediction.code"))
write.csv(x = predicted.habitat, file = paste0(output.path, "/HABITAT/", sim.version, "/hab.pred.csv"))
#plot
#prepare raster
- prediction.map<-raster(nrows=nrow(simulation.map),ncols=ncol(simulation.map),crs=crs(simulation.map),ext=extent(simulation.map), resolution=res(simulation.map))
+ prediction.map = raster(nrows = nrow(simulation.map), ncols = ncol(simulation.map), crs = crs(simulation.map), ext = extent(simulation.map), resolution = res(simulation.map))
- prediction.map[]<-0 #initialization of the raster, corresponding to "out of scope habitats"
- prediction.map[predicted.habitat$pixel]<-predicted.habitat$habitat.code
+ prediction.map[] = 0 #initialization of the raster, corresponding to "out of scope habitats"
+ prediction.map[predicted.habitat$pixel] = predicted.habitat$habitat.code
#ratify
- prediction.map<-ratify(prediction.map)
- prediction.map.rat<-levels(prediction.map)[[1]]
- prediction.map.rat<-merge(prediction.map.rat,habitat.code.df,by.x="ID",by.y="habitat.code")
- levels(prediction.map)<-prediction.map.rat
+ prediction.map = ratify(prediction.map)
+ prediction.map.rat = levels(prediction.map)[[1]]
+ prediction.map.rat = merge(prediction.map.rat, habitat.code.df, by.x = "ID", by.y = "habitat.code")
+ levels(prediction.map) = prediction.map.rat
#save the raster
- writeRaster(prediction.map,filename = paste0(output.path,"/HABITAT/", sim.version, "/synthetic.prediction.grd"),overwrite=T)
+ writeRaster(prediction.map, filename = paste0(output.path, "/HABITAT/", sim.version, "/synthetic.prediction.grd"), overwrite = T)
#plot on R
#convert into xy
- xy.prediction<-as.data.frame(prediction.map,xy=T)
- names(xy.prediction)<-c("x","y","habitat","prediction.code","color","label")
- xy.prediction<-xy.prediction[complete.cases(xy.prediction),]
+ xy.prediction = as.data.frame(prediction.map, xy = T)
+ names(xy.prediction) = c("x", "y", "habitat", "prediction.code", "color", "label")
+ xy.prediction = xy.prediction[complete.cases(xy.prediction),]
#plot
- prediction.plot<-
- ggplot(xy.prediction, aes(x=x, y=y, fill=factor(label)))+
+ prediction.plot =
+ ggplot(xy.prediction, aes(x = x, y = y, fill = factor(label))) +
geom_raster(show.legend = T) +
- coord_equal()+
- scale_fill_manual(values = as.character(habitat.code.df$color))+ #ok only if habitat.code.df has been ordered according to "label"
- ggtitle(paste0("Modal prediction over ",length(sim.version)," simulations"))+
- guides(fill=guide_legend(nrow=4,byrow=F))+
+ coord_equal() +
+ scale_fill_manual(values = as.character(habitat.code.df$color)) + #ok only if habitat.code.df has been ordered according to "label"
+ ggtitle(paste0("Modal prediction over ", length(sim.version), " simulations")) +
+ guides(fill = guide_legend(nrow = 4, byrow = F)) +
theme(
plot.title = element_text(size = 8),
- legend.text = element_text(size = 8, colour ="black"),
+ legend.text = element_text(size = 8, colour = "black"),
legend.title = element_blank(),
legend.position = "bottom",
- axis.title.x=element_blank(),
- axis.text.x=element_blank(),
- axis.ticks.x=element_blank(),
- axis.title.y=element_blank(),
- axis.text.y=element_blank(),
- axis.ticks.y=element_blank()
+ axis.title.x = element_blank(),
+ axis.text.x = element_blank(),
+ axis.ticks.x = element_blank(),
+ axis.title.y = element_blank(),
+ axis.text.y = element_blank(),
+ axis.ticks.y = element_blank()
)
#save the map
- ggsave(filename="synthetic.prediction.png",plot = prediction.plot,path = paste0(output.path, "/HABITAT/", sim.version),scale = 1,dpi = 300,limitsize = F,width = 15,height = 15,units ="cm")
+ ggsave(filename = "synthetic.prediction.png", plot = prediction.plot, path = paste0(output.path, "/HABITAT/", sim.version), scale = 1, dpi = 300, limitsize = F, width = 15, height = 15, units ="cm")
#return the map
return(prediction.plot)
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index 83bed67..123ecb2 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -66,11 +66,11 @@
### END OF HEADER ##############################################################
-train.RF.habitat<-function(releves.PFG
+train.RF.habitat = function(releves.PFG
, releves.sites
, hab.obs
, external.training.mask = NULL
- , studied.habitat
+ , studied.habitat = NULL
, RF.param
, output.path
, perStrata
@@ -90,21 +90,18 @@ train.RF.habitat<-function(releves.PFG
releves.PFG = as.data.frame(releves.PFG)
if (nrow(releves.PFG) == 0 || ncol(releves.PFG) != 4)
{
- .stopMessage_numRowCol("releves.PFG", c("site", "PFG", "strata", "BB")) ## TODO : change colnames ?
+ .stopMessage_numRowCol("releves.PFG", c("site", "PFG", "strata", "BB"))
}
- ## TODO : condition on sites
if (!is.numeric(releves.PFG$site))
{
stop("Sites in releves.PFG are not in the right format. Please make sure you have numeric values")
}
- ## TODO : condition on strata
if (!is.character(releves.PFG$strata) | !is.numeric(releves.PFG$strata))
{
stop("strata definition in releves.PFG is not in the right format. Please make sure you have a character or numeric values")
}
- ## TODO : condition on PFG
- fate_PFG = .getGraphics_PFG(name.simulation = str_split(output.path, "/")[[1]][1] ## prend le premier terme de output.path qui est le nom de simul (cf POST_FATE.validation)
- , abs.simulParam = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt")) ## fichier de paramètre associé au nom de simulation
+ fate_PFG = .getGraphics_PFG(name.simulation = str_split(output.path, "/")[[1]][1]
+ , abs.simulParam = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"))
if (sort(as.factor(unique(releves.PFG$PFG))) != as.factor(fate_PFG$PFG))
{
stop("PFG list in releves.PFG does not correspond to PFG list in FATE")
@@ -120,9 +117,8 @@ train.RF.habitat<-function(releves.PFG
releves.sites = as.data.frame(releves.sites)
if (nrow(releves.sites) == 0 || ncol(releves.sites) != 3)
{
- .stopMessage_numRowCol("releves.sites", c("site", "x", "y")) ## TODO : change colnames ?
+ .stopMessage_numRowCol("releves.sites", c("site", "x", "y"))
}
- ## TODO : condition on site
if (!is.numeric(releves.sites$site))
{
stop("Sites in releves.sites are not in the right format. Please make sure you have numeric values")
@@ -134,32 +130,31 @@ train.RF.habitat<-function(releves.PFG
#########################################
#transformation into coverage percentage
- ## TODO : Transform in real proportion (per site)
- if(!is.numeric(releves.PFG$abund))
+ if(!is.numeric(releves.PFG$abund)) # Braun-Blanquet abundance
{
releves.PFG$coverage = PRE_FATE.abundBraunBlanquet(releves.PFG$abund)/100
- } else if (is.numeric(releves.PFG$abund) & max(releves.PFG$abund) == 1)
+ } else if (is.numeric(releves.PFG$abund) & max(releves.PFG$abund) == 1) # presence-absence data
{
releves.PFG$coverage = releves.PFG$abund
- } else if (is.numeric(releves.PFG$abund))
+ } else if (is.numeric(releves.PFG$abund)) # absolute abundance
{
releves.PFG$coverage = releves.PFG$abund
}
if (perStrata == TRUE) {
- mat.PFG.agg <- aggregate(coverage ~ site + PFG + strata, data = releves.PFG, FUN = "sum")
+ mat.PFG.agg = aggregate(coverage ~ site + PFG + strata, data = releves.PFG, FUN = "sum")
} else if (perStrata == FALSE) {
- mat.PFG.agg <- aggregate(coverage ~ site + PFG, data = releves.PFG, FUN = "sum")
- mat.PFG.agg$strata <- "A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
+ mat.PFG.agg = aggregate(coverage ~ site + PFG, data = releves.PFG, FUN = "sum")
+ mat.PFG.agg$strata = "A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
}
#transformation into a relative metric (here relative.metric is relative coverage)
- mat.PFG.agg <-
+ mat.PFG.agg =
as.data.frame(
mat.PFG.agg %>% group_by(site, strata) %>% mutate(relative.metric = round(prop.table(coverage), digits = 2))
) #rel is proportion of total pct_cov, not percentage
mat.PFG.agg$relative.metric[is.na(mat.PFG.agg$relative.metric)] <- 0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
- mat.PFG.agg$coverage<-NULL
+ mat.PFG.agg$coverage = NULL
print("releve data have been transformed into a relative metric")
@@ -167,40 +162,37 @@ train.RF.habitat<-function(releves.PFG
#######################
#transfo into factor to be sure to create all the combination when doing "dcast"
- mat.PFG.agg$PFG <- as.factor(mat.PFG.agg$PFG)
- mat.PFG.agg$strata <- as.factor(mat.PFG.agg$strata)
- mat.PFG.agg <- dcast(mat.PFG.agg, site ~ PFG + strata, value.var = "relative.metric", fill = 0, drop = FALSE)
+ mat.PFG.agg$PFG = as.factor(mat.PFG.agg$PFG)
+ mat.PFG.agg$strata = as.factor(mat.PFG.agg$strata)
+ mat.PFG.agg = dcast(mat.PFG.agg, site ~ PFG + strata, value.var = "relative.metric", fill = 0, drop = FALSE)
#3. Get habitat information
###################################
#get sites coordinates
- mat.PFG.agg <- merge(releves.sites, mat.PFG.agg, by = "site") ## TODO : mettre tout directement dans releves.PFG ?
+ mat.PFG.agg = merge(releves.sites, mat.PFG.agg, by = "site")
#get habitat code and name
- mat.PFG.agg$code.habitat <- raster::extract(x = hab.obs, y = mat.PFG.agg[, c("x", "y")])
+ mat.PFG.agg$code.habitat = raster::extract(x = hab.obs, y = mat.PFG.agg[, c("x", "y")])
mat.PFG.agg = mat.PFG.agg[which(!is.na(mat.PFG.agg$code.habitat)), ]
if (nrow(mat.PFG.agg) == 0) {
- ## TODO : add stop message
stop("Code habitat vector is empty. Please verify values of your hab.obs map")
}
- #correspondance habitat code/habitat name
- ## ATTENTION ! il faut que la couche de noms du raster existe, et qu'elle s'appelle habitat...
- ## TODO : soit donner en paramètre un vecteur avec les noms d'habitat, soit les données dans releves.PFG...
+ #correspondence habitat code/habitat name
if (names(raster::levels(hab.obs)[[1]]) != c("ID", "habitat", "colour") | nrow(raster::levels(hab.obs)[[1]]) == 0 & !is.null(studied.habitat))
- { ## cas où pas de levels dans la carte d'habitat et utilisation d'un vecteur d'habitat
+ { # cas où pas de levels dans la carte d'habitat et utilisation d'un vecteur d'habitat
colnames(obs.habitat) = c("ID", "habitat")
table.habitat.releve = studied.habitat
- mat.PFG.agg = mat.PFG.agg[which(mat.PFG.agg$code.habitat %in% studied.habitat$ID), ] #filter non interesting habitat + NA
+ mat.PFG.agg = mat.PFG.agg[which(mat.PFG.agg$code.habitat %in% studied.habitat$ID), ] # filter non interesting habitat + NA
mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
} else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0 & is.null(studied.habitat))
- { ## cas où on utilise les levels définis dans la carte
+ { # cas où on utilise les levels définis dans la carte
table.habitat.releve = levels(hab.obs)[[1]]
mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
- mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
- print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
+ mat.PFG.agg = mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ]
+ print(cat("habitat classes used in the RF algo: ", unique(mat.PFG.agg$habitat), "\n", sep = "\t"))
} else
{
stop("Habitat definition in hab.obs map is not correct")
@@ -208,42 +200,21 @@ train.RF.habitat<-function(releves.PFG
#(optional) keep only releves data in a specific area
if (!is.null(external.training.mask)) {
- # if (compareCRS(mat.PFG.agg, external.training.mask) == FALSE) {
- # #as this stage it is not a problem to transform crs(mat.PFG.agg) since we have no more merge to do (we have already extracted habitat info from the map)
- # mat.PFG.agg <- st_transform(x = mat.PFG.agg, crs = crs(external.training.mask))
- # }
- # mat.PFG.agg <- st_crop(x = mat.PFG.agg, y = external.training.mask)
-
val.inMask = raster::extract(x = external.training.mask, y = mat.PFG.agg[, c("x", "y")])
mat.PFG.agg = mat.PFG.agg[which(!is.na(val.inMask)), ]
print("'releve' map has been cropped to match 'external.training.mask'.")
}
-
- # # 4. Keep only releve on interesting habitat
- # ###################################################"
- #
- # if (!is.null(studied.habitat)) {
- # mat.PFG.agg <- mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ] #filter non interesting habitat + NA
- # if (nrow(mat.PFG.agg) == 0) {
- # ## TODO : add stop message
- # stop("Habitats in studied.habitat parameter are not presents in hab.obs map. Please select others habitats")
- # }
- # }
- # print(cat("habitat classes used in the RF algo: ",unique(mat.PFG.agg$habitat),"\n",sep="\t"))
-
# 5. Save data
#####################
- # st_write(mat.PFG.agg,paste0(output.path,"/HABITAT/", sim.version, "/releve.PFG.habitat.shp"),overwrite=T,append=F)
write.csv(mat.PFG.agg,paste0(output.path,"/HABITAT/", sim.version, "/obs.releves.prepared.csv"),row.names = FALSE)
- ## TODO : remove CBNA from file name
# 6. Small adjustment in data structure
##########################################
- mat.PFG.agg<-as.data.frame(mat.PFG.agg) #get rid of the spatial structure before entering the RF process
- mat.PFG.agg$habitat<-as.factor(mat.PFG.agg$habitat)
+ mat.PFG.agg = as.data.frame(mat.PFG.agg) #get rid of the spatial structure before entering the RF process
+ mat.PFG.agg$habitat = as.factor(mat.PFG.agg$habitat)
# 7.Random forest
######################################
@@ -251,14 +222,14 @@ train.RF.habitat<-function(releves.PFG
#separate the database into a training and a test part
set.seed(123)
- training.site <- sample(mat.PFG.agg$site, size = RF.param$share.training * length(mat.PFG.agg$site), replace = FALSE)
- releves.training <- mat.PFG.agg[which(mat.PFG.agg$site %in% training.site), ]
- releves.testing <- mat.PFG.agg[-which(mat.PFG.agg$site %in% training.site), ]
+ training.site = sample(mat.PFG.agg$site, size = RF.param$share.training * length(mat.PFG.agg$site), replace = FALSE)
+ releves.training = mat.PFG.agg[which(mat.PFG.agg$site %in% training.site), ]
+ releves.testing = mat.PFG.agg[-which(mat.PFG.agg$site %in% training.site), ]
#train the model (with correction for imbalances in sampling)
#run optimization algo (careful : optimization over OOB...)
- mtry.perf <- tuneRF(x = dplyr::select(releves.training, -c(code.habitat, site, habitat, geometry)),
+ mtry.perf = tuneRF(x = dplyr::select(releves.training, -c(code.habitat, site, habitat, geometry)),
y = releves.training$habitat,
strata = releves.training$habitat,
sampsize = nrow(releves.training),
@@ -271,10 +242,10 @@ train.RF.habitat<-function(releves.PFG
mtry.perf = as.data.frame(mtry.perf)
#select mtry
- mtry <- mtry.perf$mtry[mtry.perf$OOBError == min(mtry.perf$OOBError)][1] #the lowest n achieving minimum OOB
+ mtry = mtry.perf$mtry[mtry.perf$OOBError == min(mtry.perf$OOBError)][1] #the lowest n achieving minimum OOB
#run real model
- model <- randomForest(x = dplyr::select(releves.training, -c(code.habitat, site, habitat, geometry)),
+ model = randomForest(x = dplyr::select(releves.training, -c(code.habitat, site, habitat, geometry)),
y = releves.training$habitat,
xtest = dplyr::select(releves.testing, -c(code.habitat, site, habitat, geometry)),
ytest = releves.testing$habitat,
@@ -288,24 +259,24 @@ train.RF.habitat<-function(releves.PFG
#analyse model performance
# Analysis on the training sample
- confusion.training <- confusionMatrix(data = model$predicted, reference = releves.training$habitat)
- synthesis.training <- data.frame(habitat = colnames(confusion.training$table)
+ confusion.training = confusionMatrix(data = model$predicted, reference = releves.training$habitat)
+ synthesis.training = data.frame(habitat = colnames(confusion.training$table)
, sensitivity = confusion.training$byClass[, 1]
, specificity = confusion.training$byClass[, 2]
, weight = colSums(confusion.training$table) / sum(colSums(confusion.training$table)))
#warning: prevalence is the weight of predicted habitat, not of observed habitat
- synthesis.training <- synthesis.training %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
- aggregate.TSS.training <- round(sum(synthesis.training$weight * synthesis.training$TSS), digits = 2)
+ synthesis.training = synthesis.training %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
+ aggregate.TSS.training = round(sum(synthesis.training$weight * synthesis.training$TSS), digits = 2)
# Analysis on the testing sample
- confusion.testing <- confusionMatrix(data = model$test$predicted, reference = releves.testing$habitat)
- synthesis.testing<-data.frame(habitat = colnames(confusion.testing$table)
+ confusion.testing = confusionMatrix(data = model$test$predicted, reference = releves.testing$habitat)
+ synthesis.testing = data.frame(habitat = colnames(confusion.testing$table)
, sensitivity = confusion.testing$byClass[, 1]
, specificity = confusion.testing$byClass[, 2]
, weight = colSums(confusion.testing$table) / sum(colSums(confusion.testing$table)))
#warning: prevalence is the weight of predicted habitat, not of observed habitat
- synthesis.testing <- synthesis.testing %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
- aggregate.TSS.testing <- round(sum(synthesis.testing$weight * synthesis.testing$TSS), digits = 2)
+ synthesis.testing = synthesis.testing %>% mutate(TSS = round(sensitivity + specificity - 1, digits = 2))
+ aggregate.TSS.testing = round(sum(synthesis.testing$weight * synthesis.testing$TSS), digits = 2)
# 8. Save and return output
From a3b2c06952d62a02900e0042a91dbacfe9d88b22 Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Wed, 23 Mar 2022 15:53:28 +0100
Subject: [PATCH 070/176] Corrections of validation functions (23/03)
---
R/POST_FATE.validation.R | 186 ++++++++++++-----
R/UTILS.do_PFG_composition_validation.R | 264 +++++++++++-------------
R/UTILS.do_habitat_validation.R | 83 +++-----
R/UTILS.get_observed_distribution.R | 149 +++++++------
R/UTILS.train_RF_habitat.R | 14 +-
5 files changed, 382 insertions(+), 314 deletions(-)
diff --git a/R/POST_FATE.validation.R b/R/POST_FATE.validation.R
index 7c940a0..0ed228d 100644
--- a/R/POST_FATE.validation.R
+++ b/R/POST_FATE.validation.R
@@ -21,22 +21,26 @@
##' @param year year of simulation for validation.
##' @param perStrata \code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG abundance is defined by strata.
##' If \code{FALSE}, PFG abundance defined for all strata (habitat & PFG composition & PFG richness validation).
+##' @param opt.no_CPU default \code{1}. \cr The number of resources that can be used to
+##' parallelize the computation of prediction performance for habitat & richness validation.
+##'
##' @param doHabitat \code{Logical}. Default \code{TRUE}. If \code{TRUE}, habitat validation module is activated,
##' if \code{FALSE}, habitat validation module is disabled.
-##' @param obs.path the function needs observed data, please create a folder for them in your
-##' simulation folder and then indicate in this parameter the access path to this new folder (habitat & PFG composition validation).
-##' @param releves.PFG name of file which contain the observed Braund-Blanquet abundance at each site
-##' and each PFG and strata (habitat & PFG composition validation).
-##' @param releves.site name of the file which contain coordinates and a description of
-##' the habitat associated with the dominant species of each site in the studied map (habitat & PFG composition validation).
-##' @param hab.obs name of the file which contain the extended studied map in the simulation (habitat & PFG composition validation).
-##' @param validation.mask name of the file which contain a raster mask that specified which pixels need validation
-##' (habitat & PFG composition validation).
+##' @param releves.PFG a data frame with abundance (column named abund) at each site
+##' and for each PFG and strata (habitat & PFG composition validation).
+##' @param releves.sites a data frame with coordinates and a description of the habitat associated with
+##' the dominant species of each site in the studied map (habitat & PFG composition validation).
+##' @param hab.obs a raster map of the extended studied map in the simulation, with same projection
+##' & resolution than simulation mask (habitat & PFG composition validation).
+##' @param validation.mask a raster mask that specified which pixels need validation, with same projection
+##' & resolution than simulation mask (habitat & PFG composition validation).
##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
-##' take into account of all habitats in the \code{hab.obs} map. Otherwise, please specify
-##' in a vector habitats that will be take into account for the validation (habitat validation).
+##' take into account of habitats define in the \code{hab.obs} map. Otherwise, please specify
+##' in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+##' into account for the validation (habitat validation).
##' @param list.strata.simulations default \code{NULL}. A character vector which contain \code{FATE}
##' strata definition and correspondence with observed strata definition.
+##'
##' @param doComposition \code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG composition validation module is activated,
##' if \code{FALSE}, PFG composition validation module is disabled.
##' @param PFG.considered_PFG.compo a character vector of the list of PFG considered
@@ -46,6 +50,7 @@
##' @param strata.considered_PFG.compo If \code{perStrata} = \code{FALSE}, a character vector with value "A"
##' (selection of one or several specific strata disabled). If \code{perStrata} = \code{TRUE}, a character
##' vector with at least one of the observed strata (PFG composition validation).
+##'
##' @param doRichness \code{Logical}. Default \code{TRUE}. If \code{TRUE}, PFG richness validation module is activated,
##' if \code{FALSE}, PFG richness validation module is disabled.
##' @param list.PFG a character vector which contain all the PFGs taken account in
@@ -158,9 +163,8 @@
##' @export
##'
##' @importFrom stringr str_split
-##' @importFrom raster raster projectRaster res crs crop
+##' @importFrom raster raster projectRaster res crs crop origin
##' @importFrom utils read.csv write.csv
-##' @importFrom sf st_read
##' @importFrom foreach foreach %dopar%
##' @importFrom forcats fct_expand
##' @importFrom readr write_rds
@@ -174,8 +178,8 @@ POST_FATE.validation = function(name.simulation
, sim.version
, year
, perStrata = TRUE
+ , opt.no_CPU = 1
, doHabitat = TRUE
- , obs.path
, releves.PFG
, releves.sites
, hab.obs
@@ -204,18 +208,11 @@ POST_FATE.validation = function(name.simulation
output.path = paste0(name.simulation, "/VALIDATION")
year = year # choice in the year for validation
perStrata = perStrata
-
- # Useful elements to extract from the simulation
- name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
- flag = "MASK",
- flag.split = "^--.*--$",
- is.num = FALSE) #isolate the access path to the simulation mask for any FATE simulation
- simulation.map = raster(paste0(name))
+ opt.no_CPU = opt.no_CPU
# For habitat validation
- # CBNA releves data habitat map
- releves.PFG = read.csv(paste0(obs.path,releves.PFG),header=T,stringsAsFactors = T)
-
+ # Observed releves data
+ releves.PFG = releves.PFG
if(perStrata==TRUE){
list.strata.releves = as.character(unique(releves.PFG$strata))
list.strata.simulations = list.strata.simulations
@@ -223,14 +220,45 @@ POST_FATE.validation = function(name.simulation
list.strata.releves = NULL
list.strata.simulations = NULL
}
+ releves.sites = releves.sites
+
+ # Habitat map
+ hab.obs = hab.obs
+ validation.mask = validation.mask
- releves.sites = st_read(paste0(obs.path, releves.sites))
- hab.obs = raster(paste0(obs.path, hab.obs))
- # Habitat mask at FATE simu resolution
- hab.obs.modif = projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
- habitat.FATE.map = crop(hab.obs.modif, simulation.map) #reprojection and croping of the extended habitat map in order to have a reduced observed habitat map
- validation.mask = raster(paste0(obs.path, validation.mask))
+ # Simulation mask
+ name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
+ flag = "MASK",
+ flag.split = "^--.*--$",
+ is.num = FALSE) #isolate the access path to the simulation mask for any FATE simulation
+ simulation.map = raster(paste0(name))
+ # Check hab.obs map
+ if(!compareCRS(simulation.map, hab.obs) | !all(res(habitat.FATE.map)==res(simulation.map))){
+ stop(paste0("Projection & resolution of hab.obs map does not match with simulation mask. Please reproject hab.obs map with projection & resolution of ", names(simulation.map)))
+ }else if(extent(simulation.map) != extent(hab.obs)){
+ habitat.FATE.map = crop(hab.obs, simulation.map)
+ }else {
+ habitat.FATE.map = hab.obs
+ }
+ if(!all(origin(simulation.map) == origin(habitat.FATE.map))){
+ print("setting origin habitat.FATE.map to match simulation.map")
+ raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
+ }
+
+ # Check validation mask
+ if(!compareCRS(simulation.map, validation.mask) | !all(res(validation.mask)==res(simulation.map))){
+ stop(paste0("Projection & resolution of validation mask does not match with simulation mask. Please reproject validation mask with projection & resolution of ", names(simulation.map)))
+ }else if(extent(validation.mask) != extent(simulation.map)){
+ validation.mask = crop(validation.mask, simulation.map)
+ }else {
+ validation.mask = validation.mask
+ }
+ if(!all(origin(simulation.map) == origin(validation.mask))){
+ print("setting origin validation mask to match simulation.map")
+ raster::origin(validation.mask) <- raster::origin(simulation.map)
+ }
+
# Other
if(is.null(studied.habitat)){
studied.habitat = studied.habitat #if null, the function will study all the habitats in the map
@@ -270,7 +298,9 @@ POST_FATE.validation = function(name.simulation
, hab.obs = hab.obs
, year = year
, list.strata.releves = list.strata.releves
- , list.strata.simulations = list.strata.simulations)
+ , list.strata.simulations = list.strata.simulations
+ , opt.no_CPU = opt.no_CPU
+ , studied.habitat = studied.habitat)
## AGGREGATE HABITAT PREDICTION AND PLOT PREDICTED HABITAT
@@ -299,13 +329,50 @@ POST_FATE.validation = function(name.simulation
if(doHabitat == FALSE){
perStrata = perStrata
-
+ opt.no_CPU = opt.no_CPU
+
+ # Habitat map
+ hab.obs = hab.obs
+ validation.mask = validation.mask
+
+ # Simulation mask
+ name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
+ flag = "MASK",
+ flag.split = "^--.*--$",
+ is.num = FALSE) #isolate the access path to the simulation mask for any FATE simulation
+ simulation.map = raster(paste0(name))
+
+ # Check hab.obs map
+ if(!compareCRS(simulation.map, hab.obs) | !all(res(habitat.FATE.map)==res(simulation.map))){
+ stop(paste0("Projection & resolution of hab.obs map does not match with simulation mask. Please reproject hab.obs map with projection & resolution of ", names(simulation.map)))
+ }else if(extent(simulation.map) != extent(hab.obs)){
+ habitat.FATE.map = crop(hab.obs, simulation.map)
+ }else {
+ habitat.FATE.map = hab.obs
+ }
+ if(!all(origin(simulation.map) == origin(habitat.FATE.map))){
+ print("setting origin habitat.FATE.map to match simulation.map")
+ raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
+ }
+
+ # Check validation mask
+ if(!compareCRS(simulation.map, validation.mask) | !all(res(validation.mask)==res(simulation.map))){
+ stop(paste0("Projection & resolution of validation mask does not match with simulation mask. Please reproject validation mask with projection & resolution of ", names(simulation.map)))
+ }else if(extent(validation.mask) != extent(simulation.map)){
+ validation.mask = crop(validation.mask, simulation.map)
+ }else {
+ validation.mask = validation.mask
+ }
+ if(!all(origin(simulation.map) == origin(validation.mask))){
+ print("setting origin validation mask to match simulation.map")
+ raster::origin(validation.mask) <- raster::origin(simulation.map)
+ }
+
# Get observed distribution
- releves.PFG = read.csv(paste0(obs.path, releves.PFG),header=T,stringsAsFactors = T)
- releves.sites = st_read(paste0(obs.path, releves.sites))
- hab.obs = raster(paste0(obs.path, hab.obs))
+ releves.PFG = releves.PFG
+ releves.sites = releves.sites
+
# Do PFG composition validation
- validation.mask = raster(paste0(obs.path, validation.mask))
if(perStrata==TRUE){
list.strata.releves = as.character(unique(releves.PFG$strata))
list.strata.simulations = list.strata.simulations
@@ -313,15 +380,25 @@ POST_FATE.validation = function(name.simulation
list.strata.releves = NULL
list.strata.simulations = NULL
}
+
+ # Studied.habitat
+ if(is.null(studied.habitat)){
+ studied.habitat = studied.habitat #if null, the function will study all the habitats in the map
+ } else if(is.character(studied.habitat)){
+ studied.habitat = studied.habitat #if a character vector with habitat names, the function will study only the habitats in the vector
+ } else{
+ stop("studied.habitat is not a vector of character")
+ }
+
}
## GET OBSERVED DISTRIBUTION
obs.distri = get.observed.distribution(name.simulation = name.simulation
- , obs.path = obs.path
, releves.PFG = releves.PFG
, releves.sites = releves.sites
, hab.obs = hab.obs
+ , studied.habitat = studied.habitat
, PFG.considered_PFG.compo = PFG.considered_PFG.compo
, strata.considered_PFG.compo = strata.considered_PFG.compo
, habitat.considered_PFG.compo = habitat.considered_PFG.compo
@@ -331,7 +408,6 @@ POST_FATE.validation = function(name.simulation
## DO PFG COMPOSITION VALIDATION
performance.composition = do.PFG.composition.validation(name.simulation = name.simulation
- , obs.path = obs.path
, sim.version = sim.version
, hab.obs = hab.obs
, PFG.considered_PFG.compo = PFG.considered_PFG.compo
@@ -342,7 +418,8 @@ POST_FATE.validation = function(name.simulation
, validation.mask = validation.mask
, year = year
, list.strata.simulations = list.strata.simulations
- , list.strata.releves = list.strata.releves)
+ , list.strata.releves = list.strata.releves
+ , habitat.FATE.map = habitat.FATE.map)
}
@@ -358,20 +435,35 @@ POST_FATE.validation = function(name.simulation
#list of PFG of interest
list.PFG = setdiff(list.PFG,exclude.PFG)
- registerDoParallel(detectCores()-2)
+ print("processing simulations")
+
+ if (opt.no_CPU > 1)
+ {
+ if (.getOS() != "windows")
+ {
+ registerDoParallel(cores = opt.no_CPU)
+ } else
+ {
+ warning("Parallelisation with `foreach` is not available for Windows. Sorry.")
+ }
+ }
dying.PFG.list = foreach(i=1:length(sim.version)) %dopar% {
- if(perStrata == F){
+ if(perStrata == FALSE){
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))){
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ }
} else if(perStrata == T){
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
- colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+ if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))){
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+ }
}
diff --git a/R/UTILS.do_PFG_composition_validation.R b/R/UTILS.do_PFG_composition_validation.R
index 6cb0c20..01ab98f 100644
--- a/R/UTILS.do_PFG_composition_validation.R
+++ b/R/UTILS.do_PFG_composition_validation.R
@@ -12,26 +12,31 @@
##' distribution for a precise \code{FATE} simulation.
##'
##' @param name.simulation simulation folder name.
-##' @param obs.path the function needs observed data, please create a folder for them in your
-##' simulation folder and then indicate in this parameter the access path to this new folder.
##' @param sim.version name of the simulation we want to validate (it works with
##' only one \code{sim.version}).
-##' @param hab.obs file which contain the extended studied map in the simulation.
+##' @param hab.obs a raster map of the extended studied map in the simulation, with same projection
+##' & resolution than simulation mask.
##' @param PFG.considered_PFG.compo a character vector of the list of PFG considered
##' in the validation.
##' @param strata.considered_PFG.compo a character vector of the list of precise
##' strata considered in the validation.
##' @param habitat.considered_PFG.compo a character vector of the list of habitat(s)
##' considered in the validation.
-##' @param observed.distribution PFG observed distribution table.
-##' @param perStrata.compo Logical. All strata together (FALSE) or per strata (TRUE).
-##' @param validation.mask file which contain a raster mask that specified
-##' which pixels need validation.
+##' @param observed.distribution PFG observed distribution table provides by \code{get.observed.distribution} function.
+##' @param perStrata.compo \code{Logical}. All strata together (FALSE) or per strata (TRUE).
+##' @param validation.mask a raster mask that specified
+##' which pixels need validation, with same projection & resolution than simulation mask.
##' @param year year of simulation to validate.
##' @param list.strata.simulations a character vector which contain \code{FATE}
##' strata definition and correspondence with observed strata definition.
##' @param list.strata.releves a character vector which contain the observed strata
##' definition, extracted from observed PFG releves.
+##' @param habitat.FATE.map a raster map of the observed habitat in the
+##' studied area with same projection & resolution than validation mask and simulation mask.
+##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
+##' take into account of habitats define in the \code{hab.obs} map. Otherwise, please specify
+##' in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+##' into account for the validation.
##'
##' @details
##'
@@ -64,18 +69,14 @@
### END OF HEADER ##############################################################
-do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version, hab.obs, PFG.considered_PFG.compo, strata.considered_PFG.compo, habitat.considered_PFG.compo, observed.distribution, perStrata, validation.mask, year, list.strata.simulations, list.strata.releves){
+do.PFG.composition.validation<-function(name.simulation, sim.version, hab.obs, PFG.considered_PFG.compo
+ , strata.considered_PFG.compo, habitat.considered_PFG.compo, observed.distribution
+ , perStrata, validation.mask, year, list.strata.simulations, list.strata.releves
+ , habitat.FATE.map, studied.habitat){
cat("\n ---------- PFG COMPOSITION VALIDATION \n")
output.path = paste0(name.simulation, "/VALIDATION/PFG_COMPOSITION/", sim.version)
- name = .getParam(params.lines = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"),
- flag = "MASK",
- flag.split = "^--.*--$",
- is.num = FALSE) #isolate the access path to the simulation mask for any FATE simulation
- simulation.map = raster(paste0(name))
- hab.obs.modif = projectRaster(from = hab.obs, res = res(simulation.map)[1], crs = crs(projection(simulation.map)), method = "ngb")
- habitat.FATE.map = crop(hab.obs.modif, simulation.map) #reprojection and croping of the extended habitat map in order to have a reduced observed habitat map
#Auxiliary function to compute proximity (on a 0 to 1 scale, 1 means quantile equality)
compute.proximity<-function(simulated.quantile,observed.quantile){
@@ -88,69 +89,48 @@ do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version,
############################
#check if strata definition used in the RF model is the same as the one used to analyze FATE output
- if(perStrata==T){
- if(all(base::intersect(names(list.strata.simulations), list.strata.releves)==names(list.strata.simulations))){
+ if (perStrata == TRUE) {
+ if (all(intersect(names(list.strata.simulations), list.strata.releves) == names(list.strata.simulations))) {
list.strata = names(list.strata.simulations)
print("strata definition OK")
- }else {
+ } else {
stop("wrong strata definition")
}
- }else if(perStrata==F){
- list.strata = "all"
- }else {
- stop("check 'perStrata' parameter and/or the names of strata in list.strata.releves & list.strata.simulations")
+ } else if (perStrata == FALSE) {
+ list.strata <- "all"
+ } else {
+ stop("check 'perStrata' parameter and/or the names of strata in list.strata.releves & list.strata.simulation")
}
- #consistency between habitat.FATE.map and simulation.map
- if(!compareCRS(simulation.map,habitat.FATE.map)){
- print("reprojecting habitat.FATE.map to match simulation.map crs")
- habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
+ #initial consistency between habitat.FATE.map and validation.mask (do it before the adjustement of habitat.FATE.map)
+ if(!compareCRS(habitat.FATE.map,validation.mask) | !all(res(habitat.FATE.map) == res(validation.mask))){
+ stop("please provide rasters with same crs and resolution for habitat.FATE.map and validation.mask")
}
- if(!all(res(habitat.FATE.map)==res(simulation.map))){
- stop("provide habitat.FATE.map with same resolution as simulation.map")
- }
- if(extent(simulation.map)!=extent(habitat.FATE.map)){
- print("cropping habitat.FATE.map to match simulation.map")
- habitat.FATE.map<-crop(x=habitat.FATE.map,y=simulation.map)
- }
- if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
- print("setting origin habitat.FATE.map to match simulation.map")
- raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
- }
- if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
- stop("habitat.FATE.map could not be coerced to match simulation.map")
- }else{
- print("simulation.map & habitat.FATE.map are (now) consistent")
- }
-
- #adjust validation.mask accordingly
- if(!all(res(habitat.FATE.map)==res(validation.mask))){
- validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
- }
- if(extent(validation.mask)!=extent(habitat.FATE.map)){
- validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
- }
- if(!compareRaster(validation.mask,habitat.FATE.map)){
- stop("error in correcting validation.mask to match habitat.FATE.map")
- }else{
- print("validation.mask is (now) consistent with (modified) habitat.FATE.map")
- }
-
#########################################
# 2. Get observed habitat
#########################################
- #index of the pixels in the simulation area
- in.region.pixels<-which(getValues(simulation.map)==1)
+ habitat.whole.area.df <- data.frame(pixel = seq(1, ncell(habitat.FATE.map), 1)
+ , code.habitat = getValues(habitat.FATE.map)
+ , for.validation = getValues(validation.mask))
+ habitat.whole.area.df <- habitat.whole.area.df[which(getValues(simulation.map) == 1), ] #index of the pixels in the simulation area
+ habitat.whole.area.df <- habitat.whole.area.df[which(!is.na(habitat.whole.area.df$for.validation)), ]
+ if (!is.null(studied.habitat) & nrow(studied.habitat) > 0 & ncol(studied.habitat) == 2){
+ habitat.whole.area.df <- merge(habitat.whole.area.df, dplyr::select(studied.habitat,c(ID,habitat)), by.x = "code.habitat", by.y = "ID")
+ habitat.whole.area.df <- habitat.whole.area.df[which(habitat.whole.area.df$habitat %in% RF.model$classes), ]
+ } else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0 & is.null(studied.habitat)){
+ habitat.whole.area.df <- merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x = "code.habitat", by.y = "ID")
+ habitat.whole.area.df <- habitat.whole.area.df[which(habitat.whole.area.df$habitat %in% RF.model$classes), ]
+ }
- #habitat df for the whole simulation area
- habitat.whole.area.df<-data.frame(pixel=seq(from=1,to=ncell(habitat.FATE.map),by=1),code.habitat=getValues(habitat.FATE.map),for.validation=getValues(validation.mask))
- habitat.whole.area.df<-filter(habitat.whole.area.df,is.element(pixel,in.region.pixels)&for.validation==1)
- habitat.whole.area.df<-merge(habitat.whole.area.df,dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)),by.x="code.habitat",by.y="ID")
+ print(cat("Habitat considered in the prediction exercise: ", c(unique(habitat.whole.area.df$habitat)), "\n", sep = "\t"))
print("Habitat in the simulation area:")
- table(habitat.whole.area.df$habitat,useNA="always")
+ table(habitat.whole.area.df$habitat, useNA = "always")
+
+ print("Habitat in the subpart of the simulation area used for validation:")
+ table(habitat.whole.area.df$habitat[habitat.whole.area.df$for.validation == 1], useNA = "always")
##############################
@@ -166,42 +146,48 @@ do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version,
#########################
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
- if(perStrata==F){
+ if(perStrata == FALSE){
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
- colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", paste0("X",year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "abs")
+ simu_PFG$strata <- "A"
+
+ }else {
+
+ stop("Simulated abundance file does not exist")
+ }
- }else if(perStrata==T){
+ }else if(perStrata == TRUE){
- simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
- simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
- colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
- }
-
- #aggregate per strata group with the correspondence provided in input
- simu_PFG$new.strata<-NA
-
- #attribute the "new.strata" value to group FATE strata used in the simulations into strata comparable with CBNA ones (all strata together or per strata)
- if(perStrata==F){
- simu_PFG$new.strata<-"A"
- }else if(perStrata==T){
- for(p in 1:length(list.strata.simulations)){
- simu_PFG$new.strata[is.element(simu_PFG$strata,list.strata.simulations[[p]])] = names(list.strata.simulations)[p]
+ if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))){
+
+ simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
+ simu_PFG = simu_PFG[,c("PFG","ID.pixel", "strata", paste0("X", year))]
+ colnames(simu_PFG) = c("PFG", "pixel", "strata", "abs")
+ new.strata <- rep(NA, nrow(simu_PFG))
+ for (i in 1:length(list.strata.simulations)) {
+ ind = which(simu_PFG$strata %in% list.strata.simulations[[i]])
+ new.strata[ind] = names(list.strata.simulations)[i]
+ }
+ simu_PFG$strata = new.strata
+
+ }else {
+
+ stop("Simulated abundance file does not exist")
}
- simu_PFG$strata = NULL
}
- simu_PFG<-dplyr::rename(simu_PFG,"strata"="new.strata")
-
- #agggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
- simu_PFG<-aggregate(abs~pixel+strata+PFG,data=simu_PFG,FUN="sum") #sum and not mean because for a given CBNA strata some PFG are present in 2 FATE strata (let's say 1 unit in each) and other are present in 3 FATE strata (let's say one unit in each), so taking the mean would suppress the info that the second PFG is more present!
+ #aggregate all the rows with same pixel, (new) strata and PFG (necessary since possibly several line with the same pixel+strata+PFG after strata grouping)
+ simu_PFG <- aggregate(abs ~ pixel + strata + PFG, data = simu_PFG, FUN = "sum") #sum and not mean because for a given CBNA strata some PFG are present in 2 FATE strata (let's say 1 unit in each) and other are present in 3 FATE strata (let's say one unit in each), so taking the mean would suppress the info that the second PFG is more present!
# 3.2. Merge with habitat
###########################
#here it is crucial to have exactly the same raster structure for "simulation.map" and "habitat.FATE.map", so as to be able to do the merge on the "pixel" variable
- simu_PFG<-merge(simu_PFG,habitat.whole.area.df,by="pixel") #at this stage we have all the pixels in the simulation area
+ simu_PFG <- merge(simu_PFG, habitat.whole.area.df, by = "pixel") #at this stage we have all the pixels in the simulation area
# 3.3. Filter the required PFG, strata and habitat
###################################################
@@ -217,76 +203,74 @@ do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version,
#####################################################################################
#important to do it only here, because if we filter some PFG, it changes the value of the relative metric (no impact of filtering for habitat or for strata since we do it per strata, and habitat is constant across a given pixel)
-
-
#careful: if several strata/habitat are selected, the computation is made for each strata separately
- simu_PFG<-as.data.frame(simu_PFG %>% group_by(pixel,strata) %>% mutate(relative.metric= round(prop.table(abs),digits = 2)))
- simu_PFG$relative.metric[is.na(simu_PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
- simu_PFG$abs<-NULL
+ simu_PFG <- as.data.frame(simu_PFG %>% group_by(pixel, strata) %>% mutate(relative.metric = round(prop.table(abs), digits = 2)))
+ simu_PFG$relative.metric[is.na(simu_PFG$relative.metric)] <- 0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
+ simu_PFG$abs <- NULL
# 3.5. Compute distribution per PFG, and if require per strata/habitat (else all strata/habitat will be considered together)
##############################################################################################################################
#prepare the df storing quantile values
- simulated.distribution<-expand.grid(
- PFG=PFG.considered_PFG.compo,
- habitat=habitat.considered_PFG.compo,
- strata=strata.considered_PFG.compo
+ simulated.distribution <- expand.grid(
+ PFG = PFG.considered_PFG.compo,
+ habitat = habitat.considered_PFG.compo,
+ strata = strata.considered_PFG.compo
)
- null.quantile<-data.frame(rank=seq(0,4,1)) #to have 5 rows per PFG*strata*habitat
- simulated.distribution<-merge(simulated.distribution,null.quantile,all=T)
+ null.quantile <- data.frame(rank = seq(0, 4, 1)) #to have 5 rows per PFG*strata*habitat
+ simulated.distribution <- merge(simulated.distribution, null.quantile, all = TRUE)
- if(dim(simu_PFG)[1]>0){
+ if(dim(simu_PFG)[1] > 0){
- distribution<-setDT(simu_PFG)[, quantile(relative.metric), by=c("PFG","habitat","strata")]
- distribution<-rename(distribution,"quantile"="V1")
- distribution<-data.frame(distribution,rank=seq(0,4,1)) #add the rank number
+ distribution <- setDT(simu_PFG)[, quantile(relative.metric), by = c("PFG", "habitat", "strata")]
+ distribution <- rename(distribution, "quantile" = "V1")
+ distribution <- data.frame(distribution, rank = seq(0, 4, 1)) #add the rank number
- simulated.distribution<-merge(simulated.distribution,distribution,by=c("PFG","habitat","strata","rank"),all.x=T) # add the simulated quantiles, "all.x=T" to keep the unobserved combination (with quantile=NA then)
+ simulated.distribution <- merge(simulated.distribution, distribution, by = c("PFG", "habitat", "strata", "rank"), all.x = TRUE) # add the simulated quantiles, "all.x=T" to keep the unobserved combination (with quantile=NA then)
- simulated.distribution$quantile[is.na(simulated.distribution$quantile)]<-0 # "NA" in the previous line means that the corresponding combination PFG*strata*habitat is not present, so as a null relative abundance !
+ simulated.distribution$quantile[is.na(simulated.distribution$quantile)] <- 0 # "NA" in the previous line means that the corresponding combination PFG*strata*habitat is not present, so as a null relative abundance !
}else{
- simulated.distribution$quantile<-0
+ simulated.distribution$quantile <- 0
}
- simulated.distribution$habitat<-as.character(simulated.distribution$habitat) #else may generate problem in ordering the database
- simulated.distribution$strata<-as.character(simulated.distribution$strata) #else may generate problem in ordering the database
- simulated.distribution$PFG<-as.character(simulated.distribution$PFG) #else may generate problem in ordering the database
- simulated.distribution$rank<-as.numeric(simulated.distribution$rank) #else may generate problem in ordering the database
+ simulated.distribution$habitat <- as.character(simulated.distribution$habitat) #else may generate problem in ordering the database
+ simulated.distribution$strata <- as.character(simulated.distribution$strata) #else may generate problem in ordering the database
+ simulated.distribution$PFG <- as.character(simulated.distribution$PFG) #else may generate problem in ordering the database
+ simulated.distribution$rank <- as.numeric(simulated.distribution$rank) #else may generate problem in ordering the database
# 3.6. Order the table to be able to have output in the right format
#####################################################################
- simulated.distribution<-setDT(simulated.distribution)
- simulated.distribution<-simulated.distribution[order(habitat,strata,PFG,rank)]
+ simulated.distribution <- setDT(simulated.distribution)
+ simulated.distribution <- simulated.distribution[order(habitat, strata, PFG, rank)]
# 3.7. Rename
##############
- simulated.distribution<-rename(simulated.distribution,"simulated.quantile"="quantile")
+ simulated.distribution <- rename(simulated.distribution, "simulated.quantile" = "quantile")
# 3.8 Rename and reorder the observed database
###############################################
- observed.distribution$habitat<-as.character(observed.distribution$habitat) #else may generate problem in ordering the database
- observed.distribution$strata<-as.character(observed.distribution$strata) #else may generate problem in ordering the database
- observed.distribution$PFG<-as.character(observed.distribution$PFG) #else may generate problem in ordering the database
- observed.distribution$rank<-as.numeric(observed.distribution$rank) #else may generate problem in ordering the database
+ observed.distribution$habitat <- as.character(observed.distribution$habitat) #else may generate problem in ordering the database
+ observed.distribution$strata <- as.character(observed.distribution$strata) #else may generate problem in ordering the database
+ observed.distribution$PFG <- as.character(observed.distribution$PFG) #else may generate problem in ordering the database
+ observed.distribution$rank <- as.numeric(observed.distribution$rank) #else may generate problem in ordering the database
- observed.distribution<-setDT(observed.distribution)
- observed.distribution<-observed.distribution[order(habitat,strata,PFG,rank)]
+ observed.distribution <- setDT(observed.distribution)
+ observed.distribution <- observed.distribution[order(habitat, strata, PFG, rank)]
# "if" to check that observed and simulated databases are in the same order
if(
!(
- all(simulated.distribution$PFG==observed.distribution$PFG)&
- all(simulated.distribution$habitat==observed.distribution$habitat)&
- all(simulated.distribution$strata==observed.distribution$strata)&
- all(simulated.distribution$rank==observed.distribution$rank)
+ all(simulated.distribution$PFG == observed.distribution$PFG)&
+ all(simulated.distribution$habitat == observed.distribution$habitat)&
+ all(simulated.distribution$strata == observed.distribution$strata)&
+ all(simulated.distribution$rank == observed.distribution$rank)
)
){
stop("Problem in observed vs simulated database (problem in the PFG*strata*habitat considered or in the database order)")
@@ -295,50 +279,50 @@ do.PFG.composition.validation<-function(name.simulation, obs.path, sim.version,
# 3.9. Merge observed and simulated data
#########################################
- simulated.distribution<-cbind(simulated.distribution,observed.quantile=observed.distribution$observed.quantile) #quicker than a merge, but we can do it only because we have worked on the order of the DT
+ simulated.distribution <- cbind(simulated.distribution, observed.quantile = observed.distribution$observed.quantile) #quicker than a merge, but we can do it only because we have worked on the order of the DT
# 3.10 Compute proximity between observed and simulated data, per PFG*strata*habitat
#####################################################################################
#we get rid off rank==0 because there is good chance that it is nearly always equal to zero both in observed and simulated data, and that would provide a favorable bias in the results
- simulated.distribution<-filter(simulated.distribution,rank!=0)
+ simulated.distribution <- filter(simulated.distribution, rank != 0)
- proximity<-simulated.distribution[,compute.proximity(simulated.quantile=simulated.quantile,observed.quantile=observed.quantile),by=c("PFG","habitat","strata")]
+ proximity <- simulated.distribution[,compute.proximity(simulated.quantile = simulated.quantile, observed.quantile = observed.quantile), by = c("PFG", "habitat", "strata")]
- proximity<-rename(proximity,"proximity"="V1")
+ proximity <- rename(proximity, "proximity" = "V1")
- proximity<-proximity[order(habitat,strata,PFG)] #to have output in the same order for all simulations
+ proximity <- proximity[order(habitat, strata, PFG)] #to have output in the same order for all simulations
# 3.11. Aggregate results for the different PFG
################################################
- aggregated.proximity<-proximity[,mean(proximity),by=c("habitat","strata")]
- aggregated.proximity<-rename(aggregated.proximity,"aggregated.proximity"="V1")
- aggregated.proximity$aggregated.proximity<-round(aggregated.proximity$aggregated.proximity,digits=2)
- aggregated.proximity$simul<-sim.version
+ aggregated.proximity <- proximity[,mean(proximity), by = c("habitat", "strata")]
+ aggregated.proximity <- rename(aggregated.proximity, "aggregated.proximity" = "V1")
+ aggregated.proximity$aggregated.proximity <- round(aggregated.proximity$aggregated.proximity, digits = 2)
+ aggregated.proximity$simul <- sim.version
# return(aggregated.proximity)
#line added because the foreach method does not work
- results.simul[[i]]<-aggregated.proximity
+ results.simul[[i]] <- aggregated.proximity
}
# 4. Put in the output format
##############################
- results<-sapply(results.simul,function(X){X$aggregated.proximity})
- rownames(results)<-paste0(results.simul[[1]]$habitat,"_",results.simul[[1]]$strata)
- colnames(results)<-sim.version
- results<-t(results)
- results<-as.data.frame(results)
- results$simulation<-rownames(results)
+ results <- sapply(results.simul, function(X){X$aggregated.proximity})
+ rownames(results) <- paste0(results.simul[[1]]$habitat, "_", results.simul[[1]]$strata)
+ colnames(results) <- sim.version
+ results <- t(results)
+ results <- as.data.frame(results)
+ results$simulation <- rownames(results)
#save and return
- write.csv(results,paste0(output.path,"/performance.composition.csv"),row.names = F)
+ write.csv(results, paste0(output.path, "/performance.composition.csv"), row.names = FALSE)
return(results)
}
diff --git a/R/UTILS.do_habitat_validation.R b/R/UTILS.do_habitat_validation.R
index c7f76a1..ad4ae4f 100644
--- a/R/UTILS.do_habitat_validation.R
+++ b/R/UTILS.do_habitat_validation.R
@@ -16,23 +16,30 @@
##' @param RF.model random forest model trained on CBNA data (train.RF.habitat
##' function)
##' @param habitat.FATE.map a raster map of the observed habitat in the
-##' studied area.
-##' @param validation.mask a raster mask that specified which pixels need validation.
-##' @param simulation.map a raster map of the whole studied area use to check
-##' the consistency between simulation map and the observed habitat map.
+##' studied area with same projection & resolution than validation mask and simulation mask.
+##' @param validation.mask a raster mask that specified
+##' which pixels need validation, with same projection & resolution than simulation mask.
+##' @param simulation.map a raster map of the whole studied area (provides by FATE parameters functions).
##' @param predict.all.map \code{Logical}. If TRUE, the script will predict
##' habitat for the whole map.
##' @param sim.version name of the simulation to validate.
##' @param name.simulation simulation folder name.
##' @param perStrata \code{Logical}. If TRUE, the PFG abundance is defined
##' by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
-##' @param hab.obs a raster map of the observed habitat in the
-##' extended studied area.
+##' @param hab.obs a raster map of the extended studied map in the simulation, with same projection
+##' & resolution than simulation mask.
##' @param year simulation year selected for validation.
##' @param list.strata.releves a character vector which contain the observed strata
##' definition, extracted from observed PFG releves.
##' @param list.strata.simulations a character vector which contain \code{FATE}
##' strata definition and correspondence with observed strata definition.
+##' @param opt.no_CPU default \code{1}. \cr The number of
+##' resources that can be used to parallelize the computation of performance of
+##' habitat prediction.
+##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
+##' take into account of habitats define in the \code{hab.obs} map. Otherwise, please specify
+##' in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+##' into account for the validation.
##'
##' @details
##'
@@ -55,7 +62,7 @@
##' @importFrom raster compareCRS res projectRaster extent crop origin compareRaster
##' getValues predict levels
##' @importFrom stats aggregate
-##' @importFrom stringr str_sub
+##' @importFrom stringr str_sub str_split
##' @importFrom foreach foreach %dopar%
##' @importFrom reshape2 dcast
##' @importFrom caret confusionMatrix
@@ -69,7 +76,8 @@
do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validation.mask
, simulation.map, predict.all.map, sim.version, name.simulation
- , perStrata, hab.obs, year, list.strata.releves, list.strata.simulations)
+ , perStrata, hab.obs, year, list.strata.releves, list.strata.simulations
+ , opt.no_CPU = 1, studied.habitat = NULL)
{
cat("\n ---------- FATE OUTPUT ANALYSIS \n")
@@ -100,50 +108,12 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
stop("please provide rasters with same crs and resolution for habitat.FATE.map and validation.mask")
}
- #consistency between habitat.FATE.map and simulation.map
- ## MUST BE DONE before
- # if(!compareCRS(simulation.map,habitat.FATE.map)){
- # print("reprojecting habitat.FATE.map to match simulation.map crs")
- # habitat.FATE.map<-projectRaster(habitat.FATE.map,crs=crs(simulation.map))
- # }
- if(!all(res(habitat.FATE.map)==res(simulation.map))){
- stop("provide habitat.FATE.map with same resolution as simulation.map")
- }
- if(extent(simulation.map) != extent(habitat.FATE.map)){
- print("cropping habitat.FATE.map to match simulation.map")
- habitat.FATE.map = crop(x = habitat.FATE.map, y = simulation.map)
- }
- ## MUST BE DONE before
- # if(!all(origin(simulation.map)==origin(habitat.FATE.map))){
- # print("setting origin habitat.FATE.map to match simulation.map")
- # raster::origin(habitat.FATE.map) <- raster::origin(simulation.map)
- # }
- if(!compareRaster(simulation.map,habitat.FATE.map)){ #this is crucial to be able to identify pixel by their index and not their coordinates
- stop("habitat.FATE.map could not be coerced to match simulation.map")
- }else{
- print("simulation.map & habitat.FATE.map are (now) consistent")
- }
-
- #adjust validation.mask accordingly
- ## MUST BE DONE before ?
- # if(!all(res(habitat.FATE.map)==res(validation.mask))){
- # validation.mask<-projectRaster(from=validation.mask,to=habitat.FATE.map,method = "ngb")
- # }
- if(extent(validation.mask)!=extent(habitat.FATE.map)){
- validation.mask<-crop(x=validation.mask,y=habitat.FATE.map)
- }
- if(!compareRaster(validation.mask, habitat.FATE.map)){
- stop("error in correcting validation.mask to match habitat.FATE.map")
- }else{
- print("validation.mask is (now) consistent with (modified) habitat.FATE.map") ## TODO : change message
- }
-
#check consistency for PFG & strata classes between FATE output vs the RF model
-
RF.predictors <- rownames(RF.model$importance)
RF.PFG <- unique(str_sub(RF.predictors, 1, 2))
- FATE.PFG<-str_sub(list.files(paste0(name.simulation,"/DATA/PFGS/SUCC")),6,7) ## TODO : careful, will not match necessarily all PFG names
+ FATE.PFG <- .getGraphics_PFG(name.simulation = str_split(output.path, "/")[[1]][1]
+ , abs.simulParam = paste0(name.simulation, "/PARAM_SIMUL/Simul_parameters_", str_split(sim.version, "_")[[1]][2], ".txt"))
if(length(setdiff(FATE.PFG,RF.PFG)) > 0 | length(setdiff(RF.PFG,FATE.PFG)) > 0){
stop("The PFG used to train the RF algorithm are not the same as the PFG used to run FATE.")
@@ -159,9 +129,14 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
, code.habitat = getValues(habitat.FATE.map)
, for.validation = getValues(validation.mask))
habitat.whole.area.df <- habitat.whole.area.df[which(getValues(simulation.map) == 1), ] #index of the pixels in the simulation area
- habitat.whole.area.df <- habitat.whole.area.df[which(!is.na(habitat.whole.area.df$for.validation)), ]
- habitat.whole.area.df <- merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x = "code.habitat", by.y = "ID")
- habitat.whole.area.df <- habitat.whole.area.df[which(habitat.whole.area.df$habitat %in% RF.model$classes), ]
+ habitat.whole.area.df <- habitat.whole.area.df[which(!is.na(habitat.whole.area.df$for.validation)), ]
+ if (!is.null(studied.habitat) & nrow(studied.habitat) > 0 & ncol(studied.habitat) == 2){
+ habitat.whole.area.df <- merge(habitat.whole.area.df, dplyr::select(studied.habitat,c(ID,habitat)), by.x = "code.habitat", by.y = "ID")
+ habitat.whole.area.df <- habitat.whole.area.df[which(habitat.whole.area.df$habitat %in% RF.model$classes), ]
+ } else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0 & is.null(studied.habitat)){
+ habitat.whole.area.df <- merge(habitat.whole.area.df, dplyr::select(levels(hab.obs)[[1]],c(ID,habitat)), by.x = "code.habitat", by.y = "ID")
+ habitat.whole.area.df <- habitat.whole.area.df[which(habitat.whole.area.df$habitat %in% RF.model$classes), ]
+ }
print(cat("Habitat considered in the prediction exercise: ", c(unique(habitat.whole.area.df$habitat)), "\n", sep = "\t"))
@@ -177,8 +152,6 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
print("processing simulations")
-
- # registerDoParallel(detectCores()-2) ## TODO : put as optional (like in zip/unzip function)
if (opt.no_CPU > 1)
{
if (.getOS() != "windows")
@@ -198,7 +171,6 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#get simulated abundance per pixel*strata*PFG for pixels in the simulation area
if (perStrata == FALSE) {
- ## TODO : add test if file exists
if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv")))
{
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_", sim.version, ".csv"))
@@ -212,7 +184,6 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
}
} else if (perStrata == TRUE) {
- ## TODO : add test if file exists
if(file.exists(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv")))
{
simu_PFG = read.csv(paste0(name.simulation, "/RESULTS/POST_FATE_TABLE_PIXEL_evolution_abundance_perStrata_", sim.version, ".csv"))
@@ -262,7 +233,7 @@ do.habitat.validation<-function(output.path, RF.model, habitat.FATE.map, validat
#################################
data.validation <- data.FATE.PFG.habitat[which(data.FATE.PFG.habitat$for.validation == 1), ]
- x.validation <- dplyr::select(data.validation,all_of(RF.predictors)) ## TODO : change for classic colnames selection but with error message if not fullfilling all names ?
+ x.validation <- dplyr::select(data.validation,all_of(RF.predictors))
y.validation <- data.validation$habitat
y.validation.predicted <- predict(object = RF.model, newdata = x.validation, type = "response", norm.votes = TRUE)
diff --git a/R/UTILS.get_observed_distribution.R b/R/UTILS.get_observed_distribution.R
index 6a64f03..856b9e6 100644
--- a/R/UTILS.get_observed_distribution.R
+++ b/R/UTILS.get_observed_distribution.R
@@ -10,13 +10,16 @@
##' of relative abundance, from observed data.
##'
##' @param name.simulation simulation folder name.
-##' @param obs.path the function needs observed data, please create a folder for them in your
-##' simulation folder and then indicate in this parameter the access path to this new folder.
-##' @param releves.PFG file which contain the observed Braund-Blanquet abundance at each site
-##' and each PFG and strata.
-##' @param releves.sites file which contain coordinates and a description of
-##' the habitat associated with the dominant species of each site in the studied map.
-##' @param hab.obs raster map of the extended studied area in the simulation.
+##' @param releves.PFG a data frame with abundance (column named abund) at each site
+##' and for each PFG and strata.
+##' @param releves.sites a data frame with coordinates and a description of the habitat associated with
+##' the dominant species of each site in the studied map.
+##' @param hab.obs a raster map of the extended studied map in the simulation, with same projection
+##' & resolution than simulation mask.
+##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
+##' take into account of habitats define in the \code{hab.obs} map. Otherwise, please specify
+##' in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+##' into account for the validation.
##' @param PFG.considered_PFG.compo a character vector of the list of PFG considered
##' in the validation.
##' @param strata.considered_PFG.compo a character vector of the list of precise
@@ -56,10 +59,10 @@
get.observed.distribution<-function(name.simulation
- , obs.path
, releves.PFG
, releves.sites
, hab.obs
+ , studied.habitat = NULL
, PFG.considered_PFG.compo
, strata.considered_PFG.compo
, habitat.considered_PFG.compo
@@ -75,62 +78,80 @@ get.observed.distribution<-function(name.simulation
#1. Aggregate coverage per PFG
#########################################
- #identify sites with wrong BB values (ie values that cannot be converted by the PRE_FATE.abundBraunBlanquet function)
- releves.PFG<-filter(releves.PFG,is.element(BB,c(NA, "NA", 0, "+", "r", 1:5)))
-
#transformation into coverage percentage
- releves.PFG$coverage<-PRE_FATE.abundBraunBlanquet(releves.PFG$BB)/100 #as a proportion, not a percentage
-
- if(perStrata==T){
- aggregated.releves.PFG<-aggregate(coverage~site+PFG+strata,data=releves.PFG,FUN="sum")
- }else if(perStrata==F){
- aggregated.releves.PFG<-aggregate(coverage~site+PFG,data=releves.PFG,FUN="sum")
- aggregated.releves.PFG$strata<-"A" #"A" is for "all". Important to have a single-letter code here (useful to check consistency between relevés strata and model strata)
+ if(!is.numeric(releves.PFG$abund)) # Braun-Blanquet abundance
+ {
+ releves.PFG <- filter(releves.PFG,is.element(abund,c(NA, "NA", 0, "+", "r", 1:5)))
+ releves.PFG$coverage = PRE_FATE.abundBraunBlanquet(releves.PFG$abund)/100
+ } else if (is.numeric(releves.PFG$abund) & max(releves.PFG$abund) == 1) # presence-absence data
+ {
+ releves.PFG$coverage = releves.PFG$abund
+ } else if (is.numeric(releves.PFG$abund)) # absolute abundance
+ {
+ releves.PFG$coverage = releves.PFG$abund
}
+ if(perStrata == T){
+ mat.PFG.agg <- aggregate(coverage ~ site + PFG + strata, data = releves.PFG, FUN = "sum")
+ }else if(perStrata == F){
+ mat.PFG.agg <- aggregate(coverage ~ site + PFG, data = releves.PFG, FUN = "sum")
+ mat.PFG.agg$strata <- "A" #"A" is for "all".
+ }
#2. Get habitat information
###################################
#get sites coordinates
- aggregated.releves.PFG<-merge(dplyr::select(releves.sites,c(site)),aggregated.releves.PFG,by="site")
+ mat.PFG.agg = merge(releves.sites, mat.PFG.agg, by = "site")
#get habitat code and name
- if(compareCRS(aggregated.releves.PFG,hab.obs)){
- aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
- }else{
- aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(hab.obs))
- aggregated.releves.PFG$code.habitat<-raster::extract(x=hab.obs,y=aggregated.releves.PFG)
+ mat.PFG.agg$code.habitat = raster::extract(x = hab.obs, y = mat.PFG.agg[, c("x", "y")])
+ mat.PFG.agg = mat.PFG.agg[which(!is.na(mat.PFG.agg$code.habitat)), ]
+ if (nrow(mat.PFG.agg) == 0) {
+ stop("Code habitat vector is empty. Please verify values of your hab.obs map")
}
#correspondance habitat code/habitat name
- table.habitat.releve<-levels(hab.obs)[[1]]
-
- aggregated.releves.PFG<-merge(aggregated.releves.PFG,dplyr::select(table.habitat.releve,c(ID,habitat)),by.x="code.habitat",by.y="ID")
-
- #(optional) keep only releves data in a specific area
- if(!is.null(composition.mask)){
-
- if(compareCRS(aggregated.releves.PFG,composition.mask)==F){ #as this stage it is not a problem to transform crs(aggregated.releves.PFG) since we have no more merge to do (we have already extracted habitat info from the map)
- aggregated.releves.PFG<-st_transform(x=aggregated.releves.PFG,crs=crs(composition.mask))
- }
-
- aggregated.releves.PFG<-st_crop(x=aggregated.releves.PFG,y=composition.mask)
- print("'releve' map has been cropped to match 'external.training.mask'.")
- }
+ if (!is.null(studied.habitat) & nrow(studied.habitat) > 0 & ncol(studied.habitat) == 2)
+ { # cas où pas de levels dans la carte d'habitat et utilisation d'un vecteur d'habitat
+ colnames(obs.habitat) = c("ID", "habitat")
+ table.habitat.releve = studied.habitat
+ mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
+ print(cat("habitat classes used in the RF algo: ", unique(mat.PFG.agg$habitat), "\n", sep = "\t"))
+ } else if (names(raster::levels(hab.obs)[[1]]) == c("ID", "habitat", "colour") & nrow(raster::levels(hab.obs)[[1]]) > 0 & is.null(studied.habitat))
+ { # cas où on utilise les levels définis dans la carte
+ table.habitat.releve = levels(hab.obs)[[1]]
+ mat.PFG.agg = merge(mat.PFG.agg, table.habitat.releve[, c("ID", "habitat")], by.x = "code.habitat", by.y = "ID")
+ mat.PFG.agg = mat.PFG.agg[which(mat.PFG.agg$habitat %in% studied.habitat), ]
+ print(cat("habitat classes used in the RF algo: ", unique(mat.PFG.agg$habitat), "\n", sep = "\t"))
+ } else
+ {
+ stop("Habitat definition in hab.obs map is not correct")
+ }
+
+ # #(optional) keep only releves data in a specific area
+ # if(!is.null(composition.mask)){
+ #
+ # if(compareCRS(mat.PFG.agg,composition.mask)==F){ #as this stage it is not a problem to transform crs(mat.PFG.agg) since we have no more merge to do (we have already extracted habitat info from the map)
+ # mat.PFG.agg<-st_transform(x=mat.PFG.agg,crs=crs(composition.mask))
+ # }
+ #
+ # mat.PFG.agg<-st_crop(x=mat.PFG.agg,y=composition.mask)
+ # print("'releve' map has been cropped to match 'external.training.mask'.")
+ # }
# 3. Keep only releve on interesting habitat, strata and PFG
##################################################################"
- aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG)
- aggregated.releves.PFG<-dplyr::select(aggregated.releves.PFG,c(site,PFG,strata,coverage,habitat))
+ mat.PFG.agg <- as.data.frame(mat.PFG.agg)
+ mat.PFG.agg <- dplyr::select(mat.PFG.agg,c(site,PFG,strata,coverage,habitat))
- aggregated.releves.PFG<-filter(
- aggregated.releves.PFG,
- is.element(PFG,PFG.considered_PFG.compo)&
- is.element(strata,strata.considered_PFG.compo)&
- is.element(habitat,habitat.considered_PFG.compo)
+ mat.PFG.agg <- filter(
+ mat.PFG.agg,
+ is.element(PFG, PFG.considered_PFG.compo) &
+ is.element(strata, strata.considered_PFG.compo) &
+ is.element(habitat, habitat.considered_PFG.compo)
)
@@ -139,51 +160,51 @@ get.observed.distribution<-function(name.simulation
#important to do it only here, because if we filter some PFG, it changes the value of the relative metric
#careful: if several strata are selected, the computation is made for each strata separately
- aggregated.releves.PFG<-as.data.frame(aggregated.releves.PFG %>% group_by(site,strata) %>% mutate(relative.metric= round(prop.table(coverage),digits = 2)))
- aggregated.releves.PFG$relative.metric[is.na(aggregated.releves.PFG$relative.metric)]<-0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
- aggregated.releves.PFG$coverage<-NULL
+ mat.PFG.agg <- as.data.frame(mat.PFG.agg %>% group_by(site, strata) %>% mutate(relative.metric = round(prop.table(coverage), digits = 2)))
+ mat.PFG.agg$relative.metric[is.na(mat.PFG.agg$relative.metric)] <- 0 #NA because abs==0 for some PFG, so put 0 instead of NA (maybe not necessary)
+ mat.PFG.agg$coverage <- NULL
print("releve data have been transformed into a relative metric")
# 5. Save data
#####################
- write.csv(aggregated.releves.PFG,paste0(output.path,"/CBNA.releves.prepared.csv"),row.names = F)
+ write.csv(mat.PFG.agg, paste0(output.path, "/obs.releves.prepared.csv"), row.names = F)
# 6. Compute distribution per PFG, and if require per strata/habitat (else all strata/habitat will be considered together)
####################################
- distribution<-setDT(aggregated.releves.PFG)[, quantile(relative.metric), by=c("PFG","habitat","strata")]
- distribution<-rename(distribution,"quantile"="V1")
- distribution<-data.frame(distribution,rank=seq(0,4,1)) #to be able to sort on quantile
+ distribution <- setDT(mat.PFG.agg)[, quantile(relative.metric), by = c("PFG", "habitat", "strata")]
+ distribution <- rename(distribution, "quantile" = "V1")
+ distribution <- data.frame(distribution, rank = seq(0, 4, 1)) #to be able to sort on quantile
# 7. Add the missing PFG*habitat*strata
#final distribution is the distribution once the missing combination have been added. For these combination, all quantiles are set to 0
- observed.distribution<-expand.grid(
- PFG=PFG.considered_PFG.compo,
- habitat=habitat.considered_PFG.compo,
- strata=strata.considered_PFG.compo
+ observed.distribution <- expand.grid(
+ PFG = PFG.considered_PFG.compo,
+ habitat = habitat.considered_PFG.compo,
+ strata = strata.considered_PFG.compo
)
- null.quantile<-data.frame(rank=seq(0,4,1)) #to have 5 rows per PFG*strata*habitat
- observed.distribution<-merge(observed.distribution,null.quantile,all=T)
+ null.quantile <- data.frame(rank = seq(0, 4, 1)) #to have 5 rows per PFG*strata*habitat
+ observed.distribution <- merge(observed.distribution, null.quantile, all = TRUE)
- observed.distribution<-merge(observed.distribution,distribution,by=c("PFG","habitat","strata","rank"),all.x=T) # "all.x=T" to keep the unobserved combination
+ observed.distribution <- merge(observed.distribution, distribution, by = c("PFG", "habitat", "strata", "rank"), all.x = TRUE) # "all.x=T" to keep the unobserved combination
- observed.distribution$quantile[is.na(observed.distribution$quantile)]<-0
+ observed.distribution$quantile[is.na(observed.distribution$quantile)] <- 0
# 8. Order the table to be able to have output in the right format
- observed.distribution<-setDT(observed.distribution)
- observed.distribution<-observed.distribution[order(habitat,strata,PFG,rank)]
+ observed.distribution <- setDT(observed.distribution)
+ observed.distribution <- observed.distribution[order(habitat, strata, PFG, rank)]
- observed.distribution<-rename(observed.distribution,"observed.quantile"="quantile")
+ observed.distribution <- rename(observed.distribution, "observed.quantile" = "quantile")
# 9. Save results
##########################################
- write.csv(observed.distribution,paste0(output.path,"/observed.distribution.csv"),row.names = F)
+ write.csv(observed.distribution, paste0(output.path, "/observed.distribution.csv"), row.names = F)
# 8. Return
####################
diff --git a/R/UTILS.train_RF_habitat.R b/R/UTILS.train_RF_habitat.R
index 123ecb2..131d679 100644
--- a/R/UTILS.train_RF_habitat.R
+++ b/R/UTILS.train_RF_habitat.R
@@ -14,14 +14,14 @@
##' and for each PFG and strata.
##' @param releves.sites a data frame with coordinates and a description of
##' the habitat associated with the dominant species of each site in the
-##' studied map. Shapefile format.
+##' studied map.
##' @param hab.obs a raster map of the observed habitat in the
##' extended studied area.
##' @param external.training.mask default \code{NULL}. (optional) Keep only
##' releves data in a specific area.
-##' @param studied.habitat If \code{NULL}, the function will
+##' @param studied.habitat default \code{NULL}. If \code{NULL}, the function will
##' take into account of habitats define in the \code{hab.obs} map. Otherwise, please specify
-##' in a 2 columns data frame the habitats and the ID for each of them which will be taken
+##' in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
##' into account for the validation.
##' @param RF.param a list of 2 parameters for random forest model :
##' share.training defines the size of the trainig part of the data base.
@@ -56,7 +56,7 @@
##' @importFrom reshape2 dcast
##' @importFrom data.table setDT
##' @importFrom raster extract compareCRS levels
-##' @importFrom sf st_transform st_crop st_write
+##' @importFrom sf st_transform st_crop
##' @importFrom randomForest randomForest tuneRF
##' @importFrom caret confusionMatrix
##' @importFrom readr write_rds
@@ -90,7 +90,7 @@ train.RF.habitat = function(releves.PFG
releves.PFG = as.data.frame(releves.PFG)
if (nrow(releves.PFG) == 0 || ncol(releves.PFG) != 4)
{
- .stopMessage_numRowCol("releves.PFG", c("site", "PFG", "strata", "BB"))
+ .stopMessage_numRowCol("releves.PFG", c("site", "PFG", "strata", "abund"))
}
if (!is.numeric(releves.PFG$site))
{
@@ -106,7 +106,6 @@ train.RF.habitat = function(releves.PFG
{
stop("PFG list in releves.PFG does not correspond to PFG list in FATE")
}
- .testParam_notInValues.m("releves.PFG$BB", releves.PFG$BB, c(NA, "NA", 0, "+", "r", 1:5))
}
## CHECK parameter releves.sites
if (.testParam_notDf(releves.sites))
@@ -132,6 +131,7 @@ train.RF.habitat = function(releves.PFG
#transformation into coverage percentage
if(!is.numeric(releves.PFG$abund)) # Braun-Blanquet abundance
{
+ releves.PFG <- filter(releves.PFG,is.element(abund,c(NA, "NA", 0, "+", "r", 1:5)))
releves.PFG$coverage = PRE_FATE.abundBraunBlanquet(releves.PFG$abund)/100
} else if (is.numeric(releves.PFG$abund) & max(releves.PFG$abund) == 1) # presence-absence data
{
@@ -180,7 +180,7 @@ train.RF.habitat = function(releves.PFG
}
#correspondence habitat code/habitat name
- if (names(raster::levels(hab.obs)[[1]]) != c("ID", "habitat", "colour") | nrow(raster::levels(hab.obs)[[1]]) == 0 & !is.null(studied.habitat))
+ if (!is.null(studied.habitat) & nrow(studied.habitat) > 0 & ncol(studied.habitat) == 2)
{ # cas où pas de levels dans la carte d'habitat et utilisation d'un vecteur d'habitat
colnames(obs.habitat) = c("ID", "habitat")
table.habitat.releve = studied.habitat
From 827fe8af6f35f04d8dee6d0be74632b2f2894227 Mon Sep 17 00:00:00 2001
From: Maxime Delprat
Date: Wed, 23 Mar 2022 16:07:22 +0100
Subject: [PATCH 071/176] Corrections in documentation of validation functions
---
DESCRIPTION | 2 +-
NAMESPACE | 2 -
docs/reference/POST_FATE.validation.html | 30 +-
.../do.PFG.composition.validation.html | 27 +-
docs/reference/do.habitat.validation.html | 25 +-
docs/reference/get.observed.distribution.html | 21 +-
docs/reference/index.html | 447 +++++-------------
docs/reference/train.RF.habitat.html | 25 +-
man/POST_FATE.validation.Rd | 28 +-
man/do.PFG.composition.validation.Rd | 27 +-
man/do.habitat.validation.Rd | 25 +-
man/get.observed.distribution.Rd | 19 +-
man/train.RF.habitat.Rd | 25 +-
src/libs/iostreams/src/zlib.o | Bin 243648 -> 244552 bytes
14 files changed, 268 insertions(+), 435 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 0d4ee18..2c87e7a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -35,7 +35,7 @@ Description: Wrapper of the C++ model FATE. FATE is a vegetation model based on
1) gather, prepare and format data to be used with FATE ;
2) run FATE simulation(s)
3) process and analyze data produced by FATE.
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
Encoding: UTF-8
NeedsCompilation: yes
License:
diff --git a/NAMESPACE b/NAMESPACE
index 6225478..579d50c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -207,9 +207,7 @@ importFrom(readr,write_rds)
importFrom(reshape2,dcast)
importFrom(reshape2,melt)
importFrom(sf,st_crop)
-importFrom(sf,st_read)
importFrom(sf,st_transform)
-importFrom(sf,st_write)
importFrom(shiny,runApp)
importFrom(sp,SpatialPoints)
importFrom(stats,aggregate)
diff --git a/docs/reference/POST_FATE.validation.html b/docs/reference/POST_FATE.validation.html
index 3f549d0..c230721 100644
--- a/docs/reference/POST_FATE.validation.html
+++ b/docs/reference/POST_FATE.validation.html
@@ -160,8 +160,8 @@
Computes validation data for habitat, PFG richness and composition for a sim.version,
year,
perStrata =TRUE,
+ opt.no_CPU =1,
doHabitat =TRUE,
- obs.path,
releves.PFG,
releves.sites,
hab.obs,
@@ -189,24 +189,29 @@
Arguments
perStrata
Logical. Default TRUE. If TRUE, PFG abundance is defined by strata.
If FALSE, PFG abundance defined for all strata (habitat & PFG composition & PFG richness validation).
+
opt.no_CPU
+
default 1. The number of resources that can be used to
+parallelize the computation of prediction performance for habitat & richness validation.
doHabitat
Logical. Default TRUE. If TRUE, habitat validation module is activated,
if FALSE, habitat validation module is disabled.
-
obs.path
-
the function needs observed data, please create a folder for them in your
-simulation folder and then indicate in this parameter the access path to this new folder (habitat & PFG composition validation).
releves.PFG
-
name of file which contain the observed Braund-Blanquet abundance at each site
-and each PFG and strata (habitat & PFG composition validation).
+
a data frame with abundance (column named abund) at each site
+and for each PFG and strata (habitat & PFG composition validation).
+
releves.sites
+
a data frame with coordinates and a description of the habitat associated with
+the dominant species of each site in the studied map (habitat & PFG composition validation).
hab.obs
-
name of the file which contain the extended studied map in the simulation (habitat & PFG composition validation).
+
a raster map of the extended studied map in the simulation, with same projection
+& resolution than simulation mask (habitat & PFG composition validation).
validation.mask
-
name of the file which contain a raster mask that specified which pixels need validation
-(habitat & PFG composition validation).
+
a raster mask that specified which pixels need validation, with same projection
+& resolution than simulation mask (habitat & PFG composition validation).
studied.habitat
default NULL. If NULL, the function will
-take into account of all habitats in the hab.obs map. Otherwise, please specify
-in a vector habitats that will be take into account for the validation (habitat validation).
+take into account of habitats define in the hab.obs map. Otherwise, please specify
+in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+into account for the validation (habitat validation).
list.strata.simulations
default NULL. A character vector which contain FATE
strata definition and correspondence with observed strata definition.
@@ -232,9 +237,6 @@
Arguments
exclude.PFG
default NULL. A character vector containing the names
of the PFG you want to exclude from the analysis (PFG richness validation).
-
releves.site
-
name of the file which contain coordinates and a description of
-the habitat associated with the dominant species of each site in the studied map (habitat & PFG composition validation).
Compute distance between observed and simulated distribution
Arguments
name.simulation
simulation folder name.
-
obs.path
-
the function needs observed data, please create a folder for them in your
-simulation folder and then indicate in this parameter the access path to this new folder.
sim.version
name of the simulation we want to validate (it works with
only one sim.version).
hab.obs
-
file which contain the extended studied map in the simulation.
+
a raster map of the extended studied map in the simulation, with same projection
+& resolution than simulation mask.
PFG.considered_PFG.compo
a character vector of the list of PFG considered
in the validation.
@@ -184,10 +183,10 @@
Arguments
a character vector of the list of habitat(s)
considered in the validation.
observed.distribution
-
PFG observed distribution table.
+
PFG observed distribution table provides by get.observed.distribution function.
validation.mask
-
file which contain a raster mask that specified
-which pixels need validation.
+
a raster mask that specified
+which pixels need validation, with same projection & resolution than simulation mask.
year
year of simulation to validate.
list.strata.simulations
@@ -196,8 +195,16 @@
Arguments
list.strata.releves
a character vector which contain the observed strata
definition, extracted from observed PFG releves.
+
habitat.FATE.map
+
a raster map of the observed habitat in the
+studied area with same projection & resolution than validation mask and simulation mask.
+
studied.habitat
+
default NULL. If NULL, the function will
+take into account of habitats define in the hab.obs map. Otherwise, please specify
+in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+into account for the validation.
perStrata.compo
-
Logical. All strata together (FALSE) or per strata (TRUE).
+
Logical. All strata together (FALSE) or per strata (TRUE).
Compare observed and simulated habitat of a FATE simulation
hab.obs,
year,
list.strata.releves,
- list.strata.simulations
+ list.strata.simulations,
+ opt.no_CPU =1,
+ studied.habitat =NULL)
@@ -173,12 +175,12 @@
Arguments
function)
habitat.FATE.map
a raster map of the observed habitat in the
-studied area.
+studied area with same projection & resolution than validation mask and simulation mask.
validation.mask
-
a raster mask that specified which pixels need validation.
+
a raster mask that specified
+which pixels need validation, with same projection & resolution than simulation mask.
simulation.map
-
a raster map of the whole studied area use to check
-the consistency between simulation map and the observed habitat map.
+
a raster map of the whole studied area (provides by FATE parameters functions).
predict.all.map
Logical. If TRUE, the script will predict
habitat for the whole map.
@@ -190,8 +192,8 @@
Arguments
Logical. If TRUE, the PFG abundance is defined
by strata in each pixel. If FALSE, PFG abundance is defined for all strata.
hab.obs
-
a raster map of the observed habitat in the
-extended studied area.
+
a raster map of the extended studied map in the simulation, with same projection
+& resolution than simulation mask.
year
simulation year selected for validation.
list.strata.releves
@@ -200,6 +202,15 @@
Arguments
list.strata.simulations
a character vector which contain FATE
strata definition and correspondence with observed strata definition.
+
opt.no_CPU
+
default 1. The number of
+resources that can be used to parallelize the computation of performance of
+habitat prediction.
+
studied.habitat
+
default NULL. If NULL, the function will
+take into account of habitats define in the hab.obs map. Otherwise, please specify
+in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+into account for the validation.
Compute distribution of relative abundance over observed relevés
Arguments
name.simulation
simulation folder name.
-
obs.path
-
the function needs observed data, please create a folder for them in your
-simulation folder and then indicate in this parameter the access path to this new folder.
releves.PFG
-
file which contain the observed Braund-Blanquet abundance at each site
-and each PFG and strata.
+
a data frame with abundance (column named abund) at each site
+and for each PFG and strata.
releves.sites
-
file which contain coordinates and a description of
-the habitat associated with the dominant species of each site in the studied map.
+
a data frame with coordinates and a description of the habitat associated with
+the dominant species of each site in the studied map.
hab.obs
-
raster map of the extended studied area in the simulation.
+
a raster map of the extended studied map in the simulation, with same projection
+& resolution than simulation mask.
+
studied.habitat
+
default NULL. If NULL, the function will
+take into account of habitats define in the hab.obs map. Otherwise, please specify
+in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+into account for the validation.
PFG.considered_PFG.compo
a character vector of the list of PFG considered
in the validation.
Create a graphical representation of several statistics for each PFG
to asses the quality of the model for one (or several) specific year of a
FATE simulation
Create maps of both habitat suitability and simulated occurrences of
each Plant Functional Group for one (or several) specific year of a
FATE simulation
Create a map related to plant functional group results (richness,
relative cover, light or soil CWM) for one (or several) specific year of a
FATE simulation
Create a random forest algorithm trained on CBNA data.
Arguments
releves.PFG
-
a data frame with Braund-Blanquet abundance at each site
-and each PFG and strata.
+
a data frame with abundance (column named abund) at each site
+and for each PFG and strata.
releves.sites
a data frame with coordinates and a description of
the habitat associated with the dominant species of each site in the
@@ -172,9 +172,10 @@
Arguments
default NULL. (optional) Keep only
releves data in a specific area.
studied.habitat
-
If NULL, the function will
-take into account of all habitats in the hab.obs map. Otherwise, please specify
-in a vector the habitats that we take into account for the validation.
+
default NULL. If NULL, the function will
+take into account of habitats define in the hab.obs map. Otherwise, please specify
+in a 2 columns data frame the habitats (2nd column) and the ID (1st column) for each of them which will be taken
+into account for the validation.
RF.param
a list of 2 parameters for random forest model :
share.training defines the size of the trainig part of the data base.
@@ -191,7 +192,7 @@
Arguments
Value
-
2 prepared CBNA releves files are created before the building of the random
+
2 prepared observed releves files are created before the building of the random
forest model in a habitat validation folder.
5 more files are created at the end of the script to save the RF model and
the performance analyzes (confusion matrix and TSS) for the training and
@@ -199,11 +200,11 @@
Value
Details
-
This function transform PFG Braund-Blanquet abundance in relative abundance,
-get habitat information from the releves map, keep only relees on interesting
-habitat and then builds de random forest model. Finally, the function analyzes
-the model performance with computation of confusion matrix and TSS for
-the traning and testing sample.
+
This function transform PFG abundance in relative abundance,
+get habitat information from the releves map of from a vector previously defined,
+keep releves on interesting habitat and then builds a random forest model. Finally,
+the function analyzes the model performance with computation of confusion matrix and TSS between
+the training and testing sample.