#### R script as supplementary information #### # title: # A 'fuzzy clustering' approach to conceptual confusion: how to classify natural ecological associations # authors: # Dario Fiorentino, Roland Pesch, Carmen-Pia Guenther, Lars Gutow, Jan Holstein, Jennifer Dannheim, Brigitte Ebbe, Tim Bildstein, Winfried Schroeder, Bastian Schuchardt, Thomas Brey, Karen Helen Wiltshire. # short description. # the script allows to respoduce analyses and content (figures and tables) as presented in the main manuscript and further supplementary material. #### 01. data import #### # prior data import, species names were harmonized according with the "match taxa" tool # available in World Register of Marine Species (http://www.marinespecies.org/aphia.php?p=match) data.original<-read.table("/file.csv", header=T, sep=";", dec=".") # download bathymetry at https://www.ngdc.noaa.gov/mgg/shorelines/gshhs.html library(raster) depth<- raster("/file.tif") # download coast line at https://www.ngdc.noaa.gov/mgg/shorelines/gshhs.html library(maptools) coast<- readShapePoly("/file.shp") proj4string(coast)<- CRS("+init=epsg:4326") # WGS84 coast<- crop(coast, extent(-12, 70, 28, 72)) coast<- spTransform(coast, CRSobj=CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs")) # in WGS84 / UTM zone 32 coast<- crop(coast, extent(-1.5e+06, 3.5e+06, 3.5e+06, 8.2e+06)) # download German EEZ at http://www.marineregions.org/gazetteer.php?p=details&id=5669 eez<- readShapeLines("/file.shp") proj4string(eez)<- CRS("+init=epsg:4326") # WGS84 eez<- crop(eez, extent(3.35, 9, 53, 55.91928)) eez<- spTransform(eez, CRSobj=CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs")) # in WGS84 / UTM zone 32 # the study area were edited by hand accoring to the coverage of the point sample data study.area <- readShapePoly("/study_area.shp") proj4string(study.area) = CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs") # in WGS84 / UTM zone 32 #### 02.01 Data preparation - remove samples older than 2000 #### # give the right format to temporal information data.original[,8]<- as.Date(as.character(data.original[,8]), format="%d/%m/%Y") data.2000<- data.original[(which(data.original[,8]>= as.Date("01/01/2000", format="%d/%m/%Y"))),] #### 02.02 Data preparation - remove NA in species names #### any(is.na(data.2000$SCIENTIFICNAME_INCLUDED)) data.2000<- data.2000[-which(is.na(data.2000$SCIENTIFICNAME_INCLUDED)),] any(is.na(data.2000$SCIENTIFICNAME_INCLUDED)) # False #### 02.03 Data preparation - remove NA species density #### any(is.na(data.2000$DENSITYupdated)) # True nrow(data.2000[which(is.na(data.2000$DENSITYupdated)),]) data.2000<- data.2000[-which(is.na(data.2000$DENSITYupdated)),] any(is.na(data.2000$DENSITYupdated)) # False #### 02.04 Data preparation - negative and zeros values check #### any(data.2000$DENSITYupdated<0) # False any(data.2000$DENSITYupdated==0) # False #### 02.05 Data preparation - remove data falling in the Baltic Sea #### data.ns <- data.2000[which(data.2000$LONG< 10),] #### 02.06 Data preparation - giving temporal entries #### library(lubridate) data.ns$STATIONENYEAR<-year(data.ns$STATIONENDDATE) data.ns$STATIONENMON<-month(data.ns$STATIONENDDATE) # give season entries data.ns$STATIONENSEASON<- data.ns$STATIONENMON data.ns$STATIONENSEASON[which(data.ns$STATIONENSEASON==1 | data.ns$STATIONENSEASON==2 | data.ns$STATIONENSEASON==3)]<-"winter" data.ns$STATIONENSEASON[which(data.ns$STATIONENSEASON==4 | data.ns$STATIONENSEASON==5 | data.ns$STATIONENSEASON==6)]<-"spring" data.ns$STATIONENSEASON[which(data.ns$STATIONENSEASON==7 | data.ns$STATIONENSEASON==8 | data.ns$STATIONENSEASON==9)]<-"summer" data.ns$STATIONENSEASON[which(data.ns$STATIONENSEASON==10 | data.ns$STATIONENSEASON==11 | data.ns$STATIONENSEASON==12)]<-"fall" #### 02.07 Data preparation - data are averaged according to organinsms with shared scientific name #### library(plyr) data.avg1<-ddply(data.ns, .(LONG, LAT, PROJECT, STATIONNAME, STATIONENDDATE, STATIONENMON, STATIONENSEASON, STATIONENYEAR, SCIENTIFICNAME_INCLUDED, ClassificationPhylum,ClassificationClass,ClassificationOrder,ClassificationFamily,ClassificationGenus), summarize, DENSITY=mean(DENSITYupdated, na.rm=T)) # 47368 #### 02.08 Data preparation - data are averaged according to organinsms with shared Project and station name #### data.avg2<-ddply(data.avg1, .(LONG, LAT, STATIONENDDATE, STATIONENMON, STATIONENSEASON, STATIONENYEAR, SCIENTIFICNAME_INCLUDED, ClassificationPhylum,ClassificationClass,ClassificationOrder,ClassificationFamily,ClassificationGenus), summarize, DENSITY=mean(DENSITY, na.rm=T)) # 47301 #### 02.09 Data preparation - data are averaged according to temporal information #### # removing STATIONENDDATE lets averaging within month, seasons and years data.avg3<-ddply(data.avg2, .(LONG, LAT, STATIONENMON, STATIONENSEASON, STATIONENYEAR, SCIENTIFICNAME_INCLUDED, ClassificationPhylum,ClassificationClass,ClassificationOrder,ClassificationFamily,ClassificationGenus), summarize, DENSITY=mean(DENSITY, na.rm=T)) # 47275 # removing STATIONENMON lets averaging within seasons and years data.avg4<-ddply(data.avg3, .(LONG, LAT, STATIONENSEASON, STATIONENYEAR, SCIENTIFICNAME_INCLUDED, ClassificationPhylum,ClassificationClass,ClassificationOrder,ClassificationFamily,ClassificationGenus), summarize, DENSITY=mean(DENSITY, na.rm=T)) # 47275: this means that there are no samples in the same month # removing STATIONENSEASON lets averaging within years data.avg5<-ddply(data.avg4, .(LONG, LAT, STATIONENYEAR, SCIENTIFICNAME_INCLUDED, ClassificationPhylum,ClassificationClass,ClassificationOrder,ClassificationFamily,ClassificationGenus), summarize, DENSITY=mean(DENSITY, na.rm=T)) # 44658 # removing STATIONENYEAR lets averaging between year and the time info is totally lost data.avg6<-ddply(data.avg5, .(LONG, LAT, SCIENTIFICNAME_INCLUDED, ClassificationPhylum,ClassificationClass,ClassificationOrder,ClassificationFamily,ClassificationGenus), summarize, DENSITY=mean(DENSITY, na.rm=T)) # 39420 #### 02.10 Data preparation - cleaning up #### data.avg<- data.avg6 rm(data.2000, data.avg1, data.avg2, data.avg3, data.avg4, data.avg5, data.avg6, data.ns, data.original) #### 02.11 Data preparation - new grid 2000 x 2000 (m2) #### library(rgeos) library(reshape2) st<- dcast(data.avg, LONG+ LAT ~., value.var="DENSITY", fun.aggregate=mean, na.rm=TRUE, fill=0) st<- st[,-3] coordinates(st)<- ~LONG+LAT proj4string(st) = CRS("+init=epsg:4326") st <-spTransform(st, CRSobj=CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs ")) # in WGS84 / UTM zone 32 # a polygon largely containing the dataset has been drawn bb<- matrix(c(80122.3, 5941232.33, 80122.3, 6241232.33, 498122.3, 6241232.33, 498122.30, 5941232.33, 80122.3, 5941232.33), ncol=2, by=T) bb = Polygon(bb) bb<- SpatialPolygons(list(Polygons(list(bb), ID = "a")), proj4string=CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs ")) bb<- bbox(bb) colnames(bb)<- c("min", "max") rownames(bb)<- c("x", "y") cs<- c(2000, 2000) # cell size cc<- bb[,1]+ (cs/2) cd<- ceiling(diff(t(bb))/cs) topo<- GridTopology(cellcentre.offset=cc, cellsize=cs, cells.dim=cd) new.grid<- SpatialGrid(topo, proj4string=CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs")) rm(bb,cs,cc,cd,topo) new.grid<- raster(new.grid) new.grid<- setValues(new.grid, values=1: length(new.grid) ) id<- extract(new.grid, st) data.matrix<- dcast(data.avg, LONG+ LAT ~SCIENTIFICNAME_INCLUDED, value.var="DENSITY", fun.aggregate=mean, na.rm=TRUE, fill=0) tmp<- data.frame(coordinates(new.grid)[id,], data.matrix[,1:2]) data.gridded<- merge(data.avg, tmp, by=c("LONG","LAT")) data.matrix.gridded<- dcast(data.gridded, x + y ~ SCIENTIFICNAME_INCLUDED, value.var="DENSITY", fun.aggregate=mean, na.rm=TRUE, fill=0) # 832 rm(id, new.grid, tmp, data.gridded, st, data.avg, data.matrix) #### 02.12 Data preparation - removal of rare species #### st<- data.frame(x= data.matrix.gridded$x, y= data.matrix.gridded$y) data.matrix.gridded<- data.matrix.gridded[,names(data.matrix.gridded)!=c("x", "y")] data.matrix.gridded.norare3 <-matrix(,nrow(data.matrix.gridded),(ncol(data.matrix.gridded))) for (i in 1: nrow(data.matrix.gridded)) for (j in 1: ncol(data.matrix.gridded)) { if(data.matrix.gridded[i,j]/sum(data.matrix.gridded[i,])<0.03) data.matrix.gridded.norare3[i,j]=0 else data.matrix.gridded.norare3[i,j]<-data.matrix.gridded[i,j]} colnames(data.matrix.gridded.norare3)<- colnames(data.matrix.gridded) rownames(data.matrix.gridded.norare3)<- rownames(data.matrix.gridded) data.matrix.gridded.norare3<- data.frame(data.matrix.gridded.norare3) any(apply(data.matrix.gridded.norare3,2,sum)==0) data.matrix.gridded.norare3<- data.matrix.gridded.norare3[,-which(apply(data.matrix.gridded.norare3,2,sum)==0)] rm(i,j) #### 02.13 Data preparation - outlilers removal #### library(outliers) data.matrix.gridded.norare3.out<- rm.outlier(data.matrix.gridded.norare3,fill=T) any(apply(data.matrix.gridded.norare3.out,2,sum)==0) # TRUE! data.matrix.gridded.norare3.out<- as.data.frame(data.matrix.gridded.norare3.out[,-which(apply(data.matrix.gridded.norare3.out,2,sum)==0)]) any(apply(data.matrix.gridded.norare3.out,2,sum)==0) # FALSE! #### 03.01 Fuzzy clustering and validation #### library(vegan) library(cluster) library(foreach) library(doMC) registerDoMC(cores=4) best.res1000<- foreach(k = 1:1000, .combine=rbind, .multicombine=TRUE) %dopar% { index<- sample(dim(data.matrix.gridded.norare3.out)[1],replace=F) data.sam<- data.matrix.gridded.norare3.out[index,] d.bray<- vegdist(sqrt(sqrt(data.sam)), "bray", na.rm=T, upper=T) res<- data.frame() for (i in 2:10) {super=fanny(d.bray,k=i, diss=T, maxit=500, , memb.exp=1.1) res=rbind(res, c(i, super$convergence[1], super$coeff[2], super$silinfo$avg.width)) colnames(res)<-c("i", "inter", "dunn.index.normalized", "silhouette.averaged.width") } return(list(res)) rm(i,k) } combineed_list<-do.call("rbind",best.res1000) best.res1000.mean<- data.frame(apply(combineed_list[,2:4], 2, function(x) tapply(x, factor(combineed_list$i), mean))) best.res1000.sd<- data.frame(apply(combineed_list[,2:4], 2, function(x) tapply(x, factor(combineed_list$i), sd))) # plotting results # box plot pdf("/file.pdf", width=3.15,height=6, family="Helvetica", onefile=T, bg="white") {layout(matrix(c(1,2), 2, 1, byrow=T), heights=c(7.1, 8.12)) #layout.show(2) par(cex=.6) par(mar=c(0.5,4.3,0.8,1)) # Hmisc::errbar(2:10, best.res1000.mean[,2], best.res1000.mean[,2]+best.res1000.sd[,2], best.res1000.mean[,2]-best.res1000.sd[,2], add=F, pch=16, xlab= "", ylab= "Normalized dunn index", xaxt="n") rect(1, 0.785, 2.2, 0.9, col = "white", border= "black", lwd=1) text(1.93,0.79, "A", col="black", font=2) box() par(mar=c(4.3,4.2,0.2,1),new=F) Hmisc::errbar(2:10, best.res1000.mean[,3], best.res1000.mean[,3]+best.res1000.sd[,3], best.res1000.mean[,3]-best.res1000.sd[,3], add=F, pch=16, xlab= "Number of clusters", ylab= "Silhouette averaged width") axis(1, 2:10) rect(1, 0.1781, 2.21, 0.185, col = "white", border= "black", lwd=1) text(1.958,0.17937, "B", col="black", font=2) box()} dev.off() # MDS pdf("/file.pdf", width=6.54,height=6.3, family="Helvetica", onefile=T, bg="white") {layout(matrix(c(1,2,3,4,5,6,7,8,9), 3, 3, byrow=T), widths=c(6.33,5.13,5.13), heights=c(4.93, 4.93, 6.13)) par(cex=.6) par(mar=c(0,4,.5,.5)) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", xaxt="n", xlab="") for (i in 1:2){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl2$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl2$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(0,0,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", xaxt="n", yaxt="n", xlab="", ylab="") for (i in 1:3){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl3$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl3$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(0,0,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", xaxt="n", yaxt="n", xlab="", ylab="") for (i in 1:4){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl4$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl4$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(0,4,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", xaxt="n", xlab="") for (i in 1:5){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl5$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl5$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(0,0,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", xaxt="n", yaxt="n", xlab="", ylab="") for (i in 1:6){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl6$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl6$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(0,0,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", xaxt="n", yaxt="n", xlab="", ylab="") for (i in 1:7){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl7$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl7$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(4,4,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n") for (i in 1:8){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl8$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl8$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(4,0,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", yaxt="n", ylab="") for (i in 1:9){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl9$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl9$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) par(mar=c(4,0,.5,.5)) # c(bottom, left, top, right) plot(scores(cmdscale(d.bray), choice=c(1,2)), type="n", yaxt="n", ylab="") for (i in 1:10){ dc.scores<- scores(cmdscale(d.bray)) gg<- dc.scores[fcl10$cluster==i,] hpts<- chull(gg) hpts<- c(hpts, hpts[1]) polygon(gg[hpts,], col=adjustcolor(i+1, alpha.f=.3)) lines(gg[hpts,], col=i+1, lwd=2) points(scores(cmdscale(d.bray)), pch=16, col=fcl10$cluster+1)} text(.25,-.52, paste("clusters n =", i ), font=2) } dev.off() #### 03.01 Fuzzy clustering with best partition #### d.bray<- vegdist(sqrt(sqrt(data.matrix.gridded.norare3.out)), "bray", na.rm=T, upper=T) set.seed(123) fuzzy<-fanny(d.bray,k=4,diss=T, maxit=1000, , memb.exp=1.1)$clustering groups.fuzzy<- as.data.frame(fuzzy) membership<- as.data.frame(fanny(d.bray,k=4,diss=T, maxit=1000, , memb.exp=1.1)$membership) names(membership)<-c("memb1", "memb2", "memb3", "memb4") rm(d.bray) #### 03.02 Confusion index #### for (i in 1: nrow(membership)){ membership$CI[i] <- (sort(t(membership[i,1:4]), decreasing = T)[2] / sort(t(membership[i,1:4]), decreasing = T)[1])} rm(i) #### 04.01 Characteristic species #### # criteria were taken from Rachor et al. 2007 # characteristic species were calculated only for core communities locations, i.e. where the confusion was lower than 0.5 data.clust<- data.frame(data.matrix.gridded.norare3.out, groups.fuzzy) any(apply(data.clust,2,sum)==0) # #### 04.02 Characteristic species - Fidelity in Abundance (FA) #### k<-length(levels(as.factor(data.clust$fuzzy))) # total individual number of a species within a cluster / total individual number of same species in the survey FA<- matrix(,1,1) for (i in 1:k) { FA[i]<- list( apply(data.clust[which(data.clust$fuzzy==i),1:ncol(data.clust)-1],2,sum)/ apply(data.clust[,1:ncol(data.clust)-1],2,sum))} FA<- as.data.frame(FA) colnames(FA)<- paste("group", seq(1,k,1)) rownames(FA)<- names(data.clust)[1:ncol(data.clust)-1] any(is.na(FA)) #### 04.03 Characteristic species - Presence (P) #### # percentage of stations within a cluster in which the species was found P<- matrix(,1,1) for (i in 1:k) { P[i]<- list(apply( decostand(data.clust[which(data.clust$fuzzy==i),1:ncol(data.clust)-1],"pa"),2,sum)/ length(which(data.clust$fuzzy==i)))} P<- as.data.frame(P) colnames(P)<- paste("group", seq(1,k,1)) rownames(P)<- names(data.clust)[1:ncol(data.clust)-1] any(is.na(P)) #### 04.04 Characteristic species - Fidelity in Presence (FP) #### # number of presence stations within a cluster / number of presence stations within a survey FP<- matrix(,1,1) for (i in 1:k) { FP[i]<- list((apply( decostand(data.clust[which(data.clust$fuzzy==i),1:ncol(data.clust)-1],"pa"),2,sum))/ apply( decostand(data.clust[,1:ncol(data.clust)-1],"pa"),2,sum))} FP<- as.data.frame(FP) colnames(FP)<- paste("group", seq(1,k,1)) rownames(FP)<- names(data.clust)[1:ncol(data.clust)-1] any(is.na(FP)) #### 04.05 Characteristic species - Numerical Dominance (ND) #### # Ratio of a species abundance in a cluster to the total species abundance in the same cluster ND<- matrix(,1,1) for (i in 1:k) { ND[i]= list((apply( data.clust[which(data.clust$fuzzy==i),1:ncol(data.clust)-1],2,sum))/ # gives the total abundace of each species for each cluster sum(data.clust[which(data.clust$fuzzy==i),1:ncol(data.clust)-1]))} # gives the total abundace for each cluster ND<- as.data.frame(ND) colnames(ND)<- paste("group", seq(1,k,1)) rownames(ND)<- names(data.clust)[1:ncol(data.clust)-1] any(is.na(ND)) #### 04.07 Characteristic species - Thresholds application #### # each index was evaluated in term of a given threshold. # ND FA P FP # this work >1% >50% >60% >50% FA.rank<- data.frame(matrix(,ncol(data.clust)-1,k)) P.rank<- data.frame(matrix(,ncol(data.clust)-1, k)) FP.rank<-data.frame(matrix(,ncol(data.clust)-1, k)) ND.rank<- data.frame(matrix(,ncol(data.clust)-1,k)) for (i in 1: k) { # i identifies the numer of clusters: pay attentio!! I did choose this number more than once for (j in 1:(ncol(data.clust)-1)) { # for each cluster, j identifies the species, in fact each index should be evaluated for each species if (FA[j,i]<0.5) FA.rank[j,i]=0 else FA.rank[j,i]=1 if (P[j,i]>0.6) P.rank[j,i]=1 else P.rank[j,i]=0 if (FP[j,i]<0.5) FP.rank[j,i]=0 else FP.rank[j,i]=1 if (ND[j,i]<=0.01) ND.rank[j,i]=0 else ND.rank[j,i]=1}} colnames(FA.rank)<- paste("group", seq(1,k,1)) rownames(FA.rank)<- names(data.clust)[1:(ncol(data.clust)-1)] colnames(P.rank)<- paste("group", seq(1,k,1)) rownames(P.rank)<- names(data.clust)[1:(ncol(data.clust)-1)] colnames(FP.rank)<- paste("group", seq(1,k,1)) rownames(FP.rank)<- names(data.clust)[1:(ncol(data.clust)-1)] colnames(ND.rank)<- paste("group", seq(1,k,1)) rownames(ND.rank)<- names(data.clust)[1:(ncol(data.clust)-1)] # here the indices ranks are summed for each species and each cluster ch.sp<- (matrix(,ncol(data.clust)-1,k)) for (i in 1: k) { # i identifies the numer of clusters for (j in 1:(ncol(data.clust)-1)) { # j identifies the species (each index is evaluated for each species) ch.sp[j,i]=(FA.rank[j,i]+ P.rank[j,i] +FP.rank[j,i] + ND.rank[j,i] )}} colnames(ch.sp)<- paste("group", seq(1,k,1)) rownames(ch.sp)<- names(data.clust)[1:(ncol(data.clust)-1)] #### 04.08 Characteristics species - The list #### # if a species exceed at least 3 times the threshold given by the criteria, it was labelled as characteristic for the selected cluster chara.fuzzy<- list() for (i in 1:k) {chara.fuzzy[i]<-list(data.frame(characteristic.taxa=names(which(ch.sp[,i]>=3))))} names(chara.fuzzy)<- paste("group", seq(1,k,1)) chara.fuzzy rm(i,j,k, FA, FA.rank, FP, FP.rank, ND, ND.rank, P, P.rank, data.clust) #### 05.01 Statistical test on beta diversity and fuzzy clustering #### d<- vegdist(data.matrix.gridded.norare3.out, method="jaccard", binary=T) permdisp<-betadisper(d=d, group=groups.fuzzy[,1], type = "centroid") beta.perm<- permutest(permdisp, pairwise = "true", permutations = 999) beta.perm rm(d) #### 05.02 Boxplot of beta diversity along different clusters #### # boxplot pdf("/file.pdf", width=3.15,height=3, family="Helvetica", onefile=T, bg="white") { par(mfrow=c(1,1)) par(mar=c(4.5,4.5,1,1), cex=0.6) # c(bottom, left, top, right) boxplot(permdisp, notch=T,names= c("a", "b", "c", "d"), xlab="Communities")} dev.off() rm(permdisp) #### 06.01 Interpolation - Spatial grid 1000 x 1000 (m2) resolution #### bb<- matrix(c(110000, 5900000, 520000, 6300000),2,2) colnames(bb)<- c("min", "max") rownames(bb)<- c("x", "y") cs<- c(1000, 1000) # cell size cc<- bb[,1]+ (cs/2) cd<- ceiling(diff(t(bb))/cs) topo<- GridTopology(cellcentre.offset=cc, cellsize=cs, cells.dim=cd) new.grid<- SpatialGrid(topo, proj4string=CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs")) rm(bb,cs,cc,cd,topo) #### 06.02 Interpolation - Intersection with depth #### # retain bathymetry data only for the North Sea depth.ns<-crop(depth,extent(c(2.5, 9, 53, 56.5)), snap="out") # transform into depth.ns.utm<- projectRaster(depth.ns, crs= "+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs") depth.ns.utm<- as(depth.ns.utm, "SpatialPixelsDataFrame") rm(depth.ns,depth) new.grid<- as(new.grid, "SpatialGridDataFrame") index<- (over(new.grid, depth.ns.utm, returnList=F) ) new.grid@data<- data.frame(matrix(,length(new.grid),1)) new.grid@data[1]<- index names(new.grid)<- "depth" new.grid<- as.data.frame(new.grid) new.grid<- new.grid[complete.cases(new.grid),] coordinates(new.grid)<- ~x+y proj4string(new.grid)<-CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs") gridded(new.grid)<- T results.v1.sp<- data.frame(id= seq(1,nrow(data.matrix.gridded.norare3.out),1), x=st[,1], y=st[,2], groups=groups.fuzzy, membership) coordinates(results.v1.sp)<- ~x+y proj4string(results.v1.sp)<-CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs") results.v1.sp@data$depth<- unlist(over(results.v1.sp, depth.ns.utm) ) any(is.na(results.v1.sp@data)) rm(index, membership, groups.fuzzy) #### 06.03 Interpolation - Best formula: memb1 ~ x+y #### library(gstat) # here only the piece of script ending up with the lowest RMSE is reported results.v1.sp$memb1.trans6<- sqrt(sqrt(results.v1.sp$memb1)) var.trans6<- variogram(memb1.trans6 ~ x+y, results.v1.sp) vgm.trans6<- vgm( nugget=0, model="Sph",range=sqrt(diff(results.v1.sp@bbox["x",])^2 + diff(results.v1.sp@bbox["y",])^2)/4, psill=var(results.v1.sp@data$memb1.trans6)) fit.trans6<- fit.variogram(var.trans6, vgm.trans6) plot(var.trans6, fit.trans6) rk.memb1.trans6<-krige.cv(memb1.trans6 ~ x+y, results.v1.sp, new.grid, model=fit.trans6, nfold=5) sqrt(mean((rk.memb1.trans6$var1.pred-rk.memb1.trans6$observed)^2)/length(rk.memb1.trans6$var1.pred)) # RMSE = 0.005430016 rk.memb1.trans6<-krige(memb1.trans6 ~ x+y, results.v1.sp, new.grid, model=fit.trans6) # back transformation rk.memb1.trans6@data$var1.pred.back<- rk.memb1.trans6@data$var1.pred^4 rk.memb1.trans6@data$var1.pred.back<- decostand(rk.memb1.trans6@data$var1.pred.back, method="range") # this is to rescale the data between 0 and 1 rm(var.trans6, vgm.trans6, fit.trans6) #### 06.04 Interpolation - Best formula: memb2 ~ 1 #### # here only the piece of script ending up with the lowest RMSE is reported results.v1.sp$memb2.trans6<- sqrt(sqrt(results.v1.sp$memb2)) var.trans6<- variogram(memb2.trans6 ~ 1, results.v1.sp) vgm.trans6<- vgm( nugget=0, model="Sph", range=sqrt(diff(results.v1.sp@bbox["x",])^2 + diff(results.v1.sp@bbox["y",])^2)/4, psill=var(results.v1.sp@data$memb2.trans6)) # RMSE = 0.007985304 fit.trans6<- fit.variogram(var.trans6, vgm.trans6) plot(var.trans6, fit.trans6) rk.memb2.trans6<-krige.cv(memb2.trans6 ~ 1, results.v1.sp, new.grid, model=fit.trans6, nfold=5) sqrt(mean((rk.memb2.trans6$var1.pred-rk.memb2.trans6$observed)^2)/length(rk.memb2.trans6$var1.pred)) # RMSE = 0.007985304 rk.memb2.trans6<-krige(memb2.trans6 ~ 1, results.v1.sp, new.grid, model=fit.trans6) # back transformation rk.memb2.trans6@data$var1.pred.back<- rk.memb2.trans6@data$var1.pred^4 rk.memb2.trans6@data$var1.pred.back<- decostand(rk.memb2.trans6@data$var1.pred.back, method="range") # this is to rescale the data between 0 and 1 rm(var.trans6, vgm.trans6,fit.trans6) #### 06.05 Interpolation - Best formula: memb3 ~ (x^2)+(y^2)+(x*y)+x+y #### # here only the piece of script ending up with the lowest RMSE is reported results.v1.sp$memb3.trans6<- sqrt(sqrt(results.v1.sp$memb3)) var.trans6<- variogram(memb3.trans6 ~ (x^2)+(y^2)+(x*y)+x+y, results.v1.sp) vgm.trans6<- vgm(nugget=0, model="Sph", range=sqrt(diff(results.v1.sp@bbox["x",])^2 + diff(results.v1.sp@bbox["y",])^2)/4, psill=var(results.v1.sp@data$memb3.trans6)) fit.trans6<- fit.variogram(var.trans6, vgm.trans6) plot(var.trans6, fit.trans6) rk.memb3.trans6<-krige.cv(memb3.trans6 ~(x^2)+(y^2)+(x*y)+x+y, results.v1.sp, new.grid, model=fit.trans6, nfold=5) sqrt(mean((rk.memb3.trans6$var1.pred-rk.memb3.trans6$observed)^2)/length(rk.memb3.trans6$var1.pred)) # RMSE = 0.007783309 rk.memb3.trans6<-krige(memb3.trans6 ~ (x^2)+(y^2)+(x*y)+x+y, results.v1.sp, new.grid, model=fit.trans6) # back transformation rk.memb3.trans6@data$var1.pred.back<- rk.memb3.trans6@data$var1.pred^4 rk.memb3.trans6@data$var1.pred.back<- decostand(rk.memb3.trans6@data$var1.pred.back, method="range") # this is to rescale the data between 0 and 1 rm(var.trans6, vgm.trans6,fit.trans6) #### 06.06 Interpolation - Best formula: memb4 ~ depth #### # here only the piece of script ending up with the lowest RMSE is reported results.v1.sp$memb4.trans6<- sqrt(sqrt(results.v1.sp$memb4)) var.trans6<- variogram(memb4.trans6 ~ depth, results.v1.sp) vgm.trans6<- vgm(nugget=0, model="Sph", range=sqrt(diff(results.v1.sp@bbox["x",])^2 + diff(results.v1.sp@bbox["y",])^2)/4, psill=var(results.v1.sp@data$memb4.trans6)) fit.trans6<- fit.variogram(var.trans6, vgm.trans6) plot(var.trans6, fit.trans6) rk.memb4.trans6<-krige.cv(memb4.trans6 ~ depth, results.v1.sp, new.grid, model=fit.trans6, nfold=5) sqrt(mean((rk.memb4.trans6$var1.pred-rk.memb4.trans6$observed)^2)/length(rk.memb4.trans6$var1.pred)) # RMSE = 0.006514325 rk.memb4.trans6<-krige(memb4.trans6 ~ depth, results.v1.sp, new.grid, model=fit.trans6) # back transformation rk.memb4.trans6@data$var1.pred.back<- rk.memb4.trans6@data$var1.pred^4 rk.memb4.trans6@data$var1.pred.back<- decostand(rk.memb4.trans6@data$var1.pred.back, method="range") # this is to rescale the data between 0 and 1 rm(var.trans6, vgm.trans6,fit.trans6) #### 07.01 Communities distribution #### library(plotKML) pdf("/file.pdf", width=6.54,height=6, family="Helvetica", onefile=T, bg="white") { layout(matrix(c(1,2,3,4), 2, 2, byrow=T), heights=c(7.1, 8.14)) par(cex=.6) options("scipen"=-2, "digits"=4) par(mar=c(0.8,5,1,0.5)) image(raster(rk.memb1.trans6["var1.pred.back"]), col=SAGA_pal[[16]], main="", ylab="Latitude (m)", xlab="", xaxt="n") rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) text(129000,6265000, "A", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") box() scalebar(100000, type="bar", xy=c(129000, 5920000),adj=c(0.5,-1.3), below="kilometers",divs=4, lonlat=F, label=c(0, 50, 100), cex=1, font=1) par(mar=c(0.8,.5,1,5)) image(raster(rk.memb2.trans6["var1.pred.back"]), col=SAGA_pal[[16]], main="", ylab="", xlab="",xaxt="n", yaxt="n") rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) text(129000,6265000, "B", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") box() par(mar=c(5,5,.2,.5)) image(raster(rk.memb3.trans6["var1.pred.back"]), col=SAGA_pal[[16]], main="", xlab="Longitude (m)", ylab="Latitude (m)") rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) text(129000,6265000, "C", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") box() par(mar=c(5,.5,.2,5)) image(raster(rk.memb4.trans6["var1.pred.back"]), col=SAGA_pal[[16]], main="", xlab="Longitude (m)", ylab="", yaxt="n") rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) text(129000,6265000, "D", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") box() par(mar=c(4.8,0,0,11), cex=.6, new=FALSE) plot(raster(rk.memb4.trans6["var1.pred.back"]), legend.only=T, legend.width=2, legend.shrink=.98, legend.mar=0, col=SAGA_pal[[16]]) } dev.off() #### 07.02 Communities eveness #### raster.ovelap<- data.frame(coordinates(rk.memb1.trans6), rk.memb1.trans6@data["var1.pred.back"], rk.memb2.trans6@data["var1.pred.back"], rk.memb3.trans6@data["var1.pred.back"], rk.memb4.trans6@data["var1.pred.back"]) names(raster.ovelap)<- c("x", "y", "memb1", "memb2", "memb3", "memb4") raster.ovelap$j <- diversity(raster.ovelap[,3:6], index = "shannon")/log(specnumber(raster.ovelap[,3:6])) coordinates(raster.ovelap)<- ~x+y proj4string(raster.ovelap)<- CRS("+proj=utm +zone=32 +ellps=WGS84 +units=m +no_defs") # in WGS84 / UTM zone 32 gridded(raster.ovelap)<- T pdf("/file.pdf", width=3.15,height=3, family="Helvetica", onefile=T, bg="white") par(mfrow=c(1,1), mar=c(2.5,2.5,.9,2.5), cex=0.6) options("scipen"=-2, "digits"=4) plot(raster(raster.ovelap["j"]), axes=T, xlab="Longitude (m)", ylab="Latitude (m)", legend=F, col=SAGA_pal[[1]], legend.mar=0) plot(eez, add=T, col="grey", lwd=1) plot(coast, add=T, col="grey") scalebar(100000, type="bar", xy=c(129000, 5920000),adj=c(0.5,-1.3), below="kilometers",divs=4, lonlat=F, label=c(0, 50, 100), cex=0.6, font=1) par(cex=0.6, new=FALSE) plot(raster(raster.ovelap["j"]), legend.only=T, legend.width=.8, legend.shrink=1, legend.mar=2, col=SAGA_pal[[1]]) box() dev.off() #### 07.03 Communities with sharp boundaries & Confusion #### # pixel classification # memb1 = Amphiura-filiformis # memb2 = Bathyporeia-Tellina # memb3 = Gonadiella-Spisula # memb4 = Phoronis communities.raster<- data.frame("Amphiura.filiformis"= rk.memb1.trans6@data$var1.pred.back, "Bathyporeia.Tellina"= rk.memb2.trans6@data$var1.pred.back, "Gonadiella.Spisula"= rk.memb3.trans6@data$var1.pred.back, "Phoronis"= rk.memb4.trans6@data$var1.pred.back) for (i in 1: nrow(communities.raster)){ communities.raster$winner[i]<- colnames(communities.raster)[which(communities.raster[i,1:4]==max(communities.raster[i,1:4]))]} rm(i) communities.raster$winner<- as.factor(communities.raster$winner) communities.raster$winner.num<-communities.raster$winner levels(communities.raster$winner.num)<- c("1", "2", "3", "4") communities.raster$winner.num<- as.numeric(as.character(communities.raster$winner.num)) names(communities.raster) for (i in 1: nrow(communities.raster)){communities.raster$Amphiura.filiformis.rescaled[i]<- communities.raster[i,1]/sum(communities.raster[i,1:4])} for (i in 1: nrow(communities.raster)){communities.raster$Bathyporeia.Tellina.rescaled[i]<- communities.raster[i,2]/sum(communities.raster[i,1:4])} for (i in 1: nrow(communities.raster)){communities.raster$Gonadiella.Spisula.rescaled[i]<- communities.raster[i,3]/sum(communities.raster[i,1:4])} for (i in 1: nrow(communities.raster)){communities.raster$Phoronis.rescaled[i]<- communities.raster[i,4]/sum(communities.raster[i,1:4])} for (i in 1: nrow(communities.raster)){ communities.raster$ci[i]<- (sort(t(communities.raster[i,7:10]), decreasing = T)[2] / sort(t(communities.raster[i,7:10]), decreasing = T)[1])} rm(i) communities.raster.sp<-new.grid communities.raster.sp@data$winner.num<- communities.raster$winner.num communities.raster.sp@data$ci<- communities.raster$ci rm(communities.raster) pdf("/file.pdf", width=3.15,height=3, family="Helvetica", onefile=T, bg="white") par(mfrow=c(1,1), mar=c(2.5,2.5,.5,0.5), cex=0.6) # c(bottom, left, top, right) 2.5,2.5,0.5,0 options("scipen"=-2, "digits"=4) plot(raster(communities.raster.sp["winner.num"]), axes=T, xlab="Longitude (m)", ylab="Latitude (m)", legend=F, col=c("red","orange", "green", "blue"), legend.mar=0) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") scalebar(100000, type="bar", xy=c(129000, 6010000),adj=c(0.5,-1.3), below="kilometers",divs=4, lonlat=F, label=c(0, 50, 100), cex=.6, font=1) legend("bottomleft", c("Amphiura-filiformis","Bathyporeia-Tellina","Gonadiella-Spisula","Phoronis"), fill = c("red","orange", "green", "blue"), bg = "white", text.font=1, xjust=0, horiz=F, xpd=T, cex=0.6, bty="n") box() dev.off() #### 08.01 Amount of core areas and of not core areas #### r.sel<- mask(raster(raster.ovelap["j"]), study.area) r.sel<- as(r.sel, "SpatialGridDataFrame") for( i in 1:length(r.sel@data$j)) { if (is.na(r.sel@data$j[i])) r.sel@data$areaTypes[i]=NA else {if(r.sel@data$j[i]< 0.4) r.sel@data$areaTypes[i]<- 1 else r.sel@data$areaTypes[i]<- 4}} # 4 means that more than 1 community is occurring # 1 means taht only 1 community is occurring length(which(r.sel@data$areaTypes==1))/(length(which(r.sel@data$areaTypes==4))+length(which(r.sel@data$areaTypes==1)))*100 # 38.22364 % length(which(r.sel@data$areaTypes==4))/(length(which(r.sel@data$areaTypes==4))+length(which(r.sel@data$areaTypes==1)))*100 # 61.77636 % # Area types plus: i.e. 1 community (the core), 2nd degree of instability and 3rd degree of instability (more than 2 communities) for( i in 1:length(r.sel@data$j)) { if (is.na(r.sel@data$j[i])) r.sel@data$areaTypes.p[i]=NA else {if(r.sel@data$j[i] > 0.6) r.sel@data$areaTypes.p[i]<- 3 else {if(r.sel@data$j[i] <= 0.6 & r.sel@data$j[i]> 0.4) r.sel@data$areaTypes.p[i]<- 2 else {if(r.sel@data$j[i] <= 0.4) r.sel@data$areaTypes.p[i]<- 1 else r.sel@data$areaTypes.p[i]<- -999}}}} # 3 means more than 2 communities occurring # 2 means about 2 communities occurring # 1 means about 1 community occurring any(r.sel@data$areaTypes.p==-999) # no -999: OK !!! #### 08.02 Diversity pattern from core to transition areas - Data preparation #### results.v1.sp$ci<- results.v1.sp %over% communities.raster.sp["ci"] div<- data.frame(coordinates(results.v1.sp), "j.comm"=extract(raster(raster.ovelap["j"]), results.v1.sp), "winner"= results.v1.sp$fuzzy, "memb1"= extract(raster(raster.ovelap["memb1"]), results.v1.sp), "memb2" =extract(raster(raster.ovelap["memb2"]), results.v1.sp), "memb3"= extract(raster(raster.ovelap["memb3"]), results.v1.sp), "memb4"= extract(raster(raster.ovelap["memb4"]), results.v1.sp), data.matrix.gridded.norare3.out) for (i in 1: nrow(div)){ if (div[i,"j.comm"] <= 0.4) div[i,"core"]<- "1" else if (div[i,"j.comm"] > 0.4 & div[i,"j.comm"] <= 0.6) div[i,"core"]<- "2" else if (div[i,"j.comm"] > 0.6) div[i,"core"]<- "3" else div[i,"core"]<- NA} any(is.na(div$core)) # OK! div$spp<- specnumber(div[,9:156]) #### 08.03 Diversity pattern from core to transition areas - Plots #### pdf("/file.pdf", width=6.54,height=6, family="Helvetica", onefile=T, bg="white") { layout(matrix(c(1,2,3,4), 2, 2, byrow=T), heights=c(7.1, 8.14)) par(cex=.6) options("scipen"=-2, "digits"=4) par(mar=c(0.8,5,1,0.5)) image(raster(r.sel["areaTypes.p"]), col=c("blue", "green", "red"), main="", ylab="Latitude (m)", xlab="", xaxt="n") rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) text(129000,6265000, "A", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") box() points(div[div$memb1 >= 0.1 ,1:2], pch=21, bg="grey", cex=0.8) scalebar(100000, type="bar", xy=c(129000, 5920000),adj=c(0.5,-1.3), below="kilometers",divs=4, lonlat=F, label=c(0, 50, 100), cex=1, font=1) legend(x=5e+05, y=6.28e+06, legend=c("low","medium","high"), fill= c("blue", "green", "red"),bg = "white", text.font=1, , xjust=1, horiz=F, xpd=T, cex=1, box.lwd=1) par(mar=c(0.8,.5,1,5)) image(raster(r.sel["areaTypes.p"]), col=c("blue", "green", "red"), main="", ylab="", xlab="",xaxt="n", yaxt="n") rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) text(129000,6265000, "B", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") points(div[div$memb2 >= 0.1 ,1:2], pch=21, bg="grey", cex=0.8) box() par(mar=c(5,5,.2,.5)) # c(bottom, left, top, right) image(raster(r.sel["areaTypes.p"]), col=c("blue", "green", "red"), main="", xlab="Longitude (m)", ylab="Latitude (m)") # 1, 16, 22 rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) # xleft, ybottom, xright, ytop text(129000,6265000, "C", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") points(div[div$memb3 >= 0.1 ,1:2], pch=21, bg="grey", cex=0.8) box() par(mar=c(5,.5,.2,5)) # c(bottom, left, top, right) image(raster(r.sel["areaTypes.p"]), col=c("blue", "green", "red"), main="", xlab="Longitude (m)", ylab="", yaxt="n") # Default is 5.1 for a vertical legend and 3.1 for a horizontal legend rect(110000, 6250000, 147000, 6280000, col = "white", border= "black", lwd=1) # xleft, ybottom, xright, ytop text(129000,6265000, "D", col="black", font=2) plot(eez, add=T, col="grey") plot(coast, add=T, col="grey") points(div[div$memb4 >= 0.1 ,1:2], pch=21, bg="grey", cex=0.8) box() } dev.off() #### 08.04 Diversity pattern from core to transition areas - Beta diversity test #### # AMPHIURA-FILIFORMIS d.sp<- vegdist(div[div$memb1 >=0.1,9:156], method="bray") permdisp1<-betadisper(d=d.sp, group=as.factor(as.matrix(div[div$memb1 >=0.1, "core"])), type = "centroid") beta.perm1<- permutest(permdisp1, pairwise = "true", permutations = 999) # beta.perm1 # BATHYPOREIA-TELLINA d.sp<- vegdist(div[div$memb2 >=0.1,9:156], method="bray") permdisp2<-betadisper(d=d.sp, group=as.factor(as.matrix(div[div$memb2 >=0.1, "core"]))) beta.perm2<- permutest(permdisp2, pairwise = "true", permutations = 999) # beta.perm2 # GONIADELLA-SPISULA d.sp<- vegdist(div[div$memb3 >=0.1,9:156], method="bray") permdisp3<-betadisper(d=d.sp, group=as.factor(as.matrix(div[div$memb3 >=0.1, "core"]))) beta.perm3<- permutest(permdisp3, pairwise = "true", permutations = 999) # beta.perm3 #PHORONIS d.sp<- vegdist(div[div$memb4 >=0.095, 9:156], method="bray") permdisp4<-betadisper(d=d.sp, group=as.factor(as.matrix(div[div$memb4 >=0.095, "core"]))) beta.perm4<- permutest(permdisp4, pairwise = "true", permutations = 999) # beta.perm4 permdisp<- rbind( cbind(permdisp1$distances, permdisp1$group, "comm1"), cbind(permdisp2$distances, permdisp2$group, "comm2"), cbind(permdisp3$distances, permdisp3$group, "comm3"), cbind(permdisp4$distances, permdisp4$group, "comm4")) rm(d.sp) permdisp<- as.data.frame(as.matrix(permdisp)) names(permdisp)<- c("distances", "group", "community") permdisp[,1] <- as.numeric(as.character(permdisp[,1])) quartz() { boxplot(distances ~ interaction(group,community), data=permdisp, col="grey", xaxt="n", ylim=c(0.2,0.85), pch=16, ylab="Distance to centroid", xlab="Degree of equitability") axis(1, at=2, "Aᴍᴘʜɪᴜʀᴀ-ғɪʟɪғᴏʀᴍɪs", padj=-4, hadj=0.5, las=1, tick = F) axis(1, at=5, "Bᴀᴛʜʏᴘᴏʀᴇɪᴀ-Tᴇʟʟɪɴᴀ", padj=-4, hadj=0.5, las=1, tick = F) axis(1, at=8, "Gᴏɴɪᴀᴅᴇʟʟᴀ-Sᴘɪsᴜʟᴀ", padj=-4, hadj=0.5, las=1, tick = F) axis(1, at=11, "Pʜᴏʀᴏɴɪs", padj=-4, hadj=0.5, las=1, tick = F) axis(1, at=c(1,2,3), c("low \n ≤ 0.4", "medium \n > 0.4, ≤ 0.6", "high \n > 0.6"), tick = T, padj=0.5) axis(1, at=c(4,5,6), c("low", "medium", "high"), tick = T) axis(1, at=c(7,8,9), c("low", "medium", "high"), tick = T) axis(1, at=c(10,11,12), c("low", "medium", "high"), tick = T) text(1,0.845, "a", cex=1.2, col="red") ; text(2,0.845, "b", cex=1.2, col="red") ; text(3,0.845, "b", cex=1.2, col="red") text(4,0.845, "a", cex=1.2, col="red") ; text(5,0.845, "b", cex=1.2, col="red") ; text(6,0.845, "b", cex=1.2, col="red") text(7,0.845, "a", cex=1.2, col="red") ; text(8,0.845, "b", cex=1.2, col="red") ; text(9,0.845, "c", cex=1.2, col="red") text(10,0.845, "a", cex=1.2, col="red") ; text(11,0.845, "b", cex=1.2, col="red") ; text(12,0.845, "b", cex=1.2, col="red") } dev.off() #### 10.01 Taxonomic diversity - data preparation #### library(taxize) sp.list<- names(data.matrix.gridded.norare3.out) sp.list<- gsub("..", ".", sp.list, fixed = TRUE) sp.list<- gsub(".", " ", sp.list, fixed = TRUE) sp.class<- tax_name(query = sp.list, get = c("genus","family","order"), db = "both", pref="ncbi") sp.class.na<- sp.class[complete.cases(sp.class),] sp.class.na<- sp.class.na[-which(duplicated(sp.class.na$query)), ] sp.class.data <- cbind(sp.class.na, t(data.matrix.gridded.norare3.out)[which(sp.list %in% sp.class.na$query),]) sp.gen <- aggregate(sp.class.data[,6: ncol(sp.class.data)], by= list(sp.class.data$genus), FUN= mean) sp.gen.df<- data.frame(t(sp.gen[,2: ncol(sp.gen)])) names(sp.gen.df)<- sp.gen[,1] sp.fam <- aggregate(sp.class.data[,6: ncol(sp.class.data)], by= list(sp.class.data$family), FUN= mean) sp.fam.df<- data.frame(t(sp.fam[,2: ncol(sp.fam)])) names(sp.fam.df)<- sp.fam[,1] sp.ord <- aggregate(sp.class.data[,6: ncol(sp.class.data)], by= list(sp.class.data$order), FUN= mean) sp.ord.df<- data.frame(t(sp.ord[,2: ncol(sp.ord)])) names(sp.ord.df)<- sp.ord[,1] rm(sp.class.na,sp.class.data, sp.gen, sp.fam, sp.ord) names(div) sp.gen.div<- data.frame(sp.gen.df, div[,c(1:8,157)]) sp.fam.div<- data.frame(sp.fam.df, div[,c(1:8,157)]) sp.ord.div<- data.frame(sp.ord.df, div[,c(1:8,157)]) rm(sp.gen.df, sp.fam.df, sp.ord.df) #### 10.01 Taxonomic diversity - univariate tests #### k=0.1 # Amphiura-filiformis mod1b.sp<- glm(div[div$memb1 >=k , "spp"] ~ div[div$memb1 >=k , "j.comm"], family = poisson(link="log")) car::Anova(mod1b.sp, type="III") # 0.45 NS # Bathyporeia-Tellina mod2.sp<-glm(div[div$memb2 >=k, "spp"] ~ div[div$memb2 >=k, "j.comm"], family = poisson(link="log")) car::Anova(mod2.sp, type="III") # 0.006134 ** # Gonadiella-spisula mod3b.sp<- glm(div[div$memb3 >=k, "spp"] ~ div[div$memb3 >=k, "j.comm"], family = poisson(link="log")) car::Anova(mod3b.sp, type="III") # 0.2822 NS # Phoronis mod4.sp<- glm(div[div$memb4 >=k, "spp"] ~ div[div$memb4 >=k, "j.comm"], family = poisson(link="log")) car::Anova(mod4.sp, type="III") # 1.124e-14 *** #### 13.01 Sampling design evaluation - example for changing scale and constant information #### library(spatstat) index<- sample.int(832, replace= F) dist.st<-dist(data.frame(results.v1.sp[index,1:2])) dist.st<- as.matrix(dist.st,index) dist.st[lower.tri(dist.st, diag=T)]<-NA d2000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ ifelse(any((dist.st[i,]<2828.427), na.rm=T), d2000.s[i]<- 1, d2000.s[i]<-0) } d2000.s<-data.frame(keep=d2000.s) d2000.s$id<- as.numeric(rownames(dist.st)) d2000.s<-d2000.s$id[which(d2000.s$keep==0)] if (length(d2000.s)<110) d2000.s<- c(d2000.s, rep(NA, 110-length(d2000.s))) else d2000.s<- sample(d2000.s, size=110, replace= F) d3000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ ifelse(any((dist.st[i,]<=2828.427), na.rm=T), d3000.s[i]<- 1, d3000.s[i]<-0) } d3000.s<-data.frame(keep=d3000.s) d3000.s$id<- as.numeric(rownames(dist.st)) d3000.s<-d3000.s$id[which(d3000.s$keep==0)] if (length(d3000.s)<110) d3000.s<- c(d3000.s, rep(NA, 110-length(d3000.s))) else d3000.s<- sample(d3000.s, size=110, replace= F) d5000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ ifelse(any(dist.st[i,]<=4472.136, na.rm=T), d5000.s[i]<- 1, d5000.s[i]<-0) } d5000.s<-data.frame(keep=d5000.s) d5000.s$id<- as.numeric(rownames(dist.st)) d5000.s<-d5000.s$id[which(d5000.s$keep==0)] if (length(d5000.s)<110) d5000.s<- c(d5000.s, rep(NA, 110-length(d5000.s))) else d5000.s<- sample(d5000.s, size=110, replace= F) d6000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ ifelse(any(dist.st[i,]<=6324.555, na.rm=T), d6000.s[i]<- 1, d6000.s[i]<-0) } d6000.s<-data.frame(keep=d6000.s) d6000.s$id<- as.numeric(rownames(dist.st)) d6000.s<-d6000.s$id[which(d6000.s$keep==0)] if (length(d6000.s)<110) d6000.s<- c(d6000.s, rep(NA, 110-length(d6000.s))) else d6000.s<- sample(d6000.s, size=110, replace= F) d10000.s<- matrix(,1,1) for (i in 1:nrow(dist.st)){ if (any(dist.st[i,]<=10198.039, na.rm=T)) d10000.s[i]<- 1 else d10000.s[i]<-0 } d10000.s<-data.frame(keep=d10000.s) d10000.s$id<- as.numeric(rownames(dist.st)) d10000.s<-d10000.s$id[which(d10000.s$keep==0)] if (length(d10000.s)<110) d10000.s<- c(d10000.s, rep(NA, 110-length(d10000.s))) else d10000.s<- sample(d10000.s, size=110, replace= F) d12000.s<- matrix(,1,1) for (i in 1:nrow(dist.st)){ if (any(dist.st[i,]<=11661.904, na.rm=T)) d12000.s[i]<- 1 else d12000.s[i]<-0 } d12000.s<-data.frame(keep=d12000.s) d12000.s$id<- as.numeric(rownames(dist.st)) d12000.s<-d12000.s$id[which(d12000.s$keep==0)] if (length(d12000.s)<110) d12000.s<- c(d12000.s, rep(NA, 110-length(d12000.s))) else d12000.s<- sample(d12000.s, size=110, replace= F) d12000.s<- d12000.s[!is.na(d12000.s)] length(d2000.s) # 110 length(d3000.s) # 110 length(d5000.s) # 110 length(d6000.s) # 110 length(d10000.s) # 110 length(d12000.s) # 107 min(nndist(data.frame(coordinates(results.v1.sp)))) # 2000 min(nndist(coordinates(results.v1.sp)[d3000.s,])) # 2828 min(nndist(coordinates(results.v1.sp)[d5000.s,])) # 4472 min(nndist(coordinates(results.v1.sp)[d6000.s,]))# 6325 min(nndist(coordinates(results.v1.sp)[d10000.s,])) # 10198 min(nndist(coordinates(results.v1.sp)[d12000.s,])) # 11662 # plottig stations in the case of changing of spatial scale pdf("/file.pdf", width=6.54,height=6, family="Helvetica", onefile=T, bg="white") { layout(matrix(c(1,2,3,4,5,6), 2, 3, byrow=T), widths=c(3,2.5,2.5), heights=c(7.1, 8.12)) par(cex=.6) par(mar=c(1,4.5,2,1)) options("scipen"=-2, "digits"=4) plot(results.v1.sp[d2000.s,], pch=21, cex=.8, bg="grey", xlab="", ylab="Latitude (m)", main="110 stations - d > 2 km", axes=T, xaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() raster::scalebar(100000, type="bar", xy=c(105000, 5900000),adj=c(0.5,-1), below="kilometers",divs=4, lonlat=F, label=c(0, 50, 100), font=1) par(mar=c(1,1,2,1), new=F) # plot(results.v1.sp[d3000.s,], pch=21, cex=.8, bg="grey", xlab="", ylab="", main="110 stations - d > 3 km", axes=T, xaxt="n", yaxt="n", xlim=c(100000, 500000),ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(1,1,2,1), new=F) plot(results.v1.sp[d5000.s,], pch=21, cex=.8, bg="grey", xlab="", ylab="", main="110 stations - d > 5 km", axes=T, xaxt="n", yaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(4.5,4.5,2,1)) # plot(results.v1.sp[d6000.s,], pch=21, cex=.8, bg="grey", xlab="Longitude (m)", ylab="Latitude (m)", main="110 stations - d > 6 km", axes=T, xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(4.5,1,2,1), new=F) plot(results.v1.sp[d10000.s,], pch=21, cex=.8, bg="grey", xlab="Longitude (m)", ylab="", main="110 stations - d > 10 km", axes=T, yaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(4.5,1,2,1), new=F) plot(results.v1.sp[d12000.s,], pch=21, cex=.8, bg="grey", xlab="Longitude (m)", ylab="", main="110 stations - d > 12 km", axes=T, yaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() } dev.off() #### 13.02 Sampling design evaluation - example for changing information and constant spatia scale #### d3000.i<- sample(1:832, size=600, replace= F) d5000.i<- sample(1:832, size=330, replace= F) d6000.i<- sample(1:832, size=260, replace= F) d10000.i<- sample(1:832, size=130, replace= F) d12000.i<- sample(1:832, size=110, replace= F) # plottig stations in the case of changing of information pdf("/file.pdf", width=6.54,height=6, family="Helvetica", onefile=T, bg="white") { layout(matrix(c(1,2,3,4,5,6), 2, 3, byrow=T), widths=c(3,2.5,2.5), heights=c(7.1, 8.12)) par(cex=.6) par(mar=c(1,4.5,2,1)) options("scipen"=-2, "digits"=4) plot(results.v1.sp, cex=.8, pch=21, bg="grey", xlab="", ylab="Latitude (m)", main="832 stations", axes=T, xaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() raster::scalebar(100000, type="bar", xy=c(105000, 5900000),adj=c(0.5,-1), below="kilometers",divs=4, lonlat=F, label=c(0, 50, 100), font=1) par(mar=c(1,1,2,1), new=F) plot(results.v1.sp[d3000.i,], cex=.8, pch=21, bg="grey", xlab="", ylab="", main="600 stations", axes=T, xaxt="n", yaxt="n", xlim=c(100000, 500000),ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(1,1,2,1), new=F) plot(results.v1.sp[d5000.i,], cex=.8, pch=21, bg="grey", xlab="", ylab="", main="330 stations", axes=T, xaxt="n", yaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(4.5,4.5,2,1)) plot(results.v1.sp[d6000.i,], cex=.8, pch=21, bg="grey", xlab="Longitude (m)", ylab="Latitude (m)", main="250 stations", axes=T, xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(4.5,1,2,1), new=F) plot(results.v1.sp[d10000.i,], cex=.8, pch=21, bg="grey", xlab="Longitude (m)", ylab="", main="150 stations", axes=T, yaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() par(mar=c(4.5,1,2,1), new=F) plot(results.v1.sp[d12000.i,], cex=.8, pch=21, bg="grey", xlab="Longitude (m)", ylab="", main="110 stations", axes=T, yaxt="n", xlim=c(100000, 500000), ylim=c(6000000, 6200000)) plot(eez, add=T, col="black") plot(coast, add=T, col="grey") box() } dev.off() #### 13.03 Sampling design evaluation - data subselection, bootstrap for changing scale and constant information #### table.scale<- foreach(j = 1:500, .combine=rbind, .multicombine=TRUE) %dopar% { index<- sample.int(832, replace= F) ; dist.st<-dist(data.frame(st[index,])) dist.st<- (as.matrix(dist.st,index)) ; dist.st[lower.tri(dist.st, diag=T)]<-NA d2000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ ifelse(any((dist.st[i,]<2828.427), na.rm=T), d2000.s[i]<- 1, d2000.s[i]<-0) } d2000.s<-data.frame(keep=d2000.s) d2000.s$id<- as.numeric(rownames(dist.st)) d2000.s<-d2000.s$id[which(d2000.s$keep==0)] if (length(d2000.s)<110) d2000.s<- c(d2000.s, sample(d2000.s, size=(110-length(d2000.s)), replace= T)) else d2000.s<- sample(d2000.s, size=110, replace= F) d3000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ ifelse(any(dist.st[i,]<=2828.427, na.rm=T), d3000.s[i]<- 1, d3000.s[i]<-0) } d3000.s<-data.frame(keep=d3000.s) d3000.s$id<- as.numeric(rownames(dist.st)) d3000.s<-d3000.s$id[which(d3000.s$keep==0)] if (length(d3000.s)<110) d3000.s<- c(d3000.s, sample(d3000.s, size=(110-length(d3000.s)), replace= T)) else d3000.s<- sample(d3000.s, size=110, replace= F) d5000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ifelse(any(dist.st[i,]<=4472.136, na.rm=T), d5000.s[i]<- 1, d5000.s[i]<-0) } d5000.s<-data.frame(keep=d5000.s) d5000.s$id<- as.numeric(rownames(dist.st)) d5000.s<-d5000.s$id[which(d5000.s$keep==0)] if (length(d5000.s)<110) d5000.s<- c(d5000.s, sample(d5000.s, size=(110-length(d5000.s)), replace= T)) else d5000.s<- sample(d5000.s, size=110, replace= F) d6000.s<- matrix(,1,1) for (i in as.numeric(rownames(dist.st))){ ifelse(any(dist.st[i,]<=6324.555, na.rm=T), d6000[i]<- 1, d6000[i]<-0) } d6000.s<-data.frame(keep=d6000.s) d6000.s$id<- as.numeric(rownames(dist.st)) d6000.s<-d6000.s$id[which(d6000.s$keep==0)] if (length(d6000.s)<110) d6000.s<- c(d6000.s, sample(d6000.s, size=(110-length(d6000.s)), replace= T)) else d6000.s<- sample(d6000.s, size=110, replace= F) d10000.s<- matrix(,1,1) for (i in 1:nrow(dist.st)){ if (any(dist.st[i,]<=8944.272, na.rm=T)) d10000.s[i]<- 1 else d10000.s[i]<-0 } d10000.s<-data.frame(keep=d10000.s) d10000.s$id<- as.numeric(rownames(dist.st)) d10000.s<-d10000.s$id[which(d10000.s$keep==0)] if (length(d10000.s)<110) d10000.s<- c(d10000.s, sample(d10000.s, size=(110-length(d10000.s)), replace= T)) else d10000.s<- sample(d10000.s, size=110, replace= F) d12000.s<- matrix(,1,1) for (i in 1:nrow(dist.st)){ if (any(dist.st[i,]<=11661.904, na.rm=T)) d12000.s[i]<- 1 else d12000.s[i]<-0 } d12000.s<-data.frame(keep=d12000.s) d12000.s$id<- as.numeric(rownames(dist.st)) d12000.s<-d12000.s$id[which(d12000.s$keep==0)] if (length(d12000.s)<110) d12000.s<- c(d12000.s, sample(d12000.s, size=(110-length(d12000.s)), replace= T)) else d12000.s<- sample(d12000.s, size=110, replace= F) index<- as.vector(apply(data.matrix.gridded.norare3.out[d2000.s,],2,sum,na.rm=T)!=0) ifelse(any(index==F), grid2000<- data.matrix.gridded.norare3.out[d2000.s,which(index)], grid2000<- data.matrix.gridded.norare3.out[d2000.s,]) index<- as.vector(apply(grid2000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid2000<- grid2000[which(index),], grid2000<- grid2000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d3000.s,],2,sum,na.rm=T)!=0) ifelse(any(index==F), grid3000<- data.matrix.gridded.norare3.out[d3000.s,which(index)], grid3000<- data.matrix.gridded.norare3.out[d3000.s,]) index<- as.vector(apply(grid3000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid3000<- grid3000[which(index),], grid3000<- grid3000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d5000.s,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid5000<- data.matrix.gridded.norare3.out[d5000.s,which(index)], grid5000<- data.matrix.gridded.norare3.out[d5000.s,]) index<- as.vector(apply(grid5000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid5000<- grid5000[which(index),], grid5000<- grid5000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d6000.s,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid6000<- data.matrix.gridded.norare3.out[d6000.s,which(index)], grid6000<- data.matrix.gridded.norare3.out[d6000.s,]) index<- as.vector(apply(grid6000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid6000<- grid6000[which(index),], grid6000<- grid6000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d10000.s,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid10000<- data.matrix.gridded.norare3.out[d10000.s,which(index)], grid10000<- data.matrix.gridded.norare3.out[d10000.s,]) index<- as.vector(apply(grid10000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid10000<- grid10000[which(index),], grid10000<- grid10000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d12000.s,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid12000<- data.matrix.gridded.norare3.out[d12000.s,which(index)], grid12000<- data.matrix.gridded.norare3.out[d12000.s,]) index<- as.vector(apply(grid12000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid12000<- grid12000[which(index),], grid12000<- grid12000) d.bray2000<- vegdist(sqrt(sqrt( grid2000[complete.cases(grid2000),] )), "bray", na.rm=T, upper=T) d.bray3000<- vegdist(sqrt(sqrt( grid3000[complete.cases(grid3000),] )), "bray", na.rm=T, upper=T) d.bray5000<- vegdist(sqrt(sqrt( grid5000[complete.cases(grid5000),] )), "bray", na.rm=T, upper=T) d.bray6000<- vegdist(sqrt(sqrt( grid6000[complete.cases(grid6000),] )), "bray", na.rm=T, upper=T) d.bray10000<- vegdist(sqrt(sqrt( grid10000[complete.cases(grid10000),] )), "bray", na.rm=T, upper=T) d.bray12000<- vegdist(sqrt(sqrt( grid12000[complete.cases(grid12000),] )), "bray", na.rm=T, upper=T) fuzzy2000<- data.frame(fanny(d.bray2000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray2000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy3000<- data.frame(fanny(d.bray3000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray3000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy5000<- data.frame(fanny(d.bray5000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray5000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy6000<- data.frame(fanny(d.bray6000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray6000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy10000<- data.frame(fanny(d.bray10000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray10000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy12000<- data.frame(fanny(d.bray12000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray12000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) names(fuzzy2000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy3000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy5000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy6000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy10000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy12000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") ci<- matrix(,max(nrow(fuzzy2000), nrow(fuzzy3000), nrow(fuzzy5000), nrow(fuzzy6000), nrow(fuzzy10000), nrow(fuzzy12000)),6) for (i in 1: nrow(fuzzy2000)){ci[i,1] <- (sort(t(fuzzy2000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy2000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy3000)){ci[i,2] <- (sort(t(fuzzy3000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy3000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy5000)){ci[i,3] <- (sort(t(fuzzy5000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy5000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy6000)){ci[i,4] <- (sort(t(fuzzy6000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy6000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy10000)){ci[i,5] <- (sort(t(fuzzy10000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy10000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy12000)){ci[i,6] <- (sort(t(fuzzy12000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy12000[i,2:5]), decreasing = T)[1])} ci<- as.data.frame(ci) names(ci)<- c("2000", "3000", "5000", "6000", "10000", "12000") memb<- matrix(,max(nrow(fuzzy2000), nrow(fuzzy3000), nrow(fuzzy5000), nrow(fuzzy6000), nrow(fuzzy10000), nrow(fuzzy12000)),6) memb<- matrix(,nrow(fuzzy2000),6) for (i in 1: nrow(fuzzy2000)){memb[i,1] <- sort(t(fuzzy2000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy3000)){memb[i,2] <- sort(t(fuzzy3000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy5000)){memb[i,3] <- sort(t(fuzzy5000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy6000)){memb[i,4] <- sort(t(fuzzy6000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy10000)){memb[i,5] <- sort(t(fuzzy10000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy12000)){memb[i,6] <- sort(t(fuzzy12000[i,2:5]), decreasing = T)[1]} memb<- as.data.frame(memb) names(memb)<- c("2000", "3000", "5000", "6000", "10000", "12000") return(list("confusion"=apply(ci, 2, mean, na.rm=T), "membership"=apply(memb, 2, mean, na.rm=T))) } ci.scale<- t(data.frame(table.scale[,1])) #### 13.04 Sampling design evaluation - data subselection, bootstrap for changing information and constant spatia scale #### table.info<- foreach(j = 1:500, .combine=rbind, .multicombine=TRUE) %dopar% { d2000.i<- sample(1:832, size=832, replace= F) d3000.i<- sample(1:832, size=598, replace= F) d5000.i<- sample(1:832, size=326, replace= F) d6000.i<- sample(1:832, size=251, replace= F) d10000.i<- sample(1:832, size=147, replace= F) d12000.i<- sample(1:832, size=110, replace= F) index<- as.vector(apply(data.matrix.gridded.norare3.out[d2000.i,],2,sum,na.rm=T)!=0) ifelse(any(index==F), grid2000<- data.matrix.gridded.norare3.out[d2000.i,which(index)], grid2000<- data.matrix.gridded.norare3.out[d2000.i,]) index<- as.vector(apply(grid2000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid2000<- grid2000[which(index),], grid2000<- grid2000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d3000.i,],2,sum,na.rm=T)!=0) ifelse(any(index==F), grid3000<- data.matrix.gridded.norare3.out[d3000.i,which(index)], grid3000<- data.matrix.gridded.norare3.out[d3000.i,]) index<- as.vector(apply(grid3000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid3000<- grid3000[which(index),], grid3000<- grid3000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d5000.i,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid5000<- data.matrix.gridded.norare3.out[d5000.i,which(index)], grid5000<- data.matrix.gridded.norare3.out[d5000.i,]) index<- as.vector(apply(grid5000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid5000<- grid5000[which(index),], grid5000<- grid5000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d6000.i,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid6000<- data.matrix.gridded.norare3.out[d6000.i,which(index)], grid6000<- data.matrix.gridded.norare3.out[d6000.i,]) index<- as.vector(apply(grid6000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid6000<- grid6000[which(index),], grid6000<- grid6000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d10000.i,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid10000<- data.matrix.gridded.norare3.out[d10000.i,which(index)], grid10000<- data.matrix.gridded.norare3.out[d10000.i,]) index<- as.vector(apply(grid10000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid10000<- grid10000[which(index),], grid10000<- grid10000) index<- as.vector(apply(data.matrix.gridded.norare3.out[d12000.i,],2,sum,na.rm=T)!=0) ifelse (any(index==F), grid12000<- data.matrix.gridded.norare3.out[d12000.i,which(index)], grid12000<- data.matrix.gridded.norare3.out[d12000.i,]) index<- as.vector(apply(grid12000,1,sum,na.rm=T)!=0) ifelse(any(index==F), grid12000<- grid12000[which(index),], grid12000<- grid12000) d.bray2000<- vegdist(sqrt(sqrt( grid2000 )), "bray", na.rm=T, upper=T) d.bray3000<- vegdist(sqrt(sqrt( grid3000 )), "bray", na.rm=T, upper=T) d.bray5000<- vegdist(sqrt(sqrt( grid5000 )), "bray", na.rm=T, upper=T) d.bray6000<- vegdist(sqrt(sqrt( grid6000 )), "bray", na.rm=T, upper=T) d.bray10000<- vegdist(sqrt(sqrt( grid10000 )), "bray", na.rm=T, upper=T) d.bray12000<- vegdist(sqrt(sqrt( grid12000 )), "bray", na.rm=T, upper=T) fuzzy2000<- data.frame(fanny(d.bray2000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray2000,k=4,diss=T, maxit=500, , memb.exp=1.1)$membership)) fuzzy3000<- data.frame(fanny(d.bray3000,k=4,diss=T, maxit=500, memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray3000,k=4,diss=T, maxit=500, , memb.exp=1.1)$membership)) fuzzy5000<- data.frame(fanny(d.bray5000,k=4,diss=T, maxit=500, , memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray5000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy6000<- data.frame(fanny(d.bray6000,k=4,diss=T, maxit=500, , memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray6000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy10000<- data.frame(fanny(d.bray10000,k=4,diss=T, maxit=500, , memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray10000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) fuzzy12000<- data.frame(fanny(d.bray12000,k=4,diss=T, maxit=500, , memb.exp=1.1)$clustering, as.data.frame(fanny(d.bray12000,k=4,diss=T, maxit=500, memb.exp=1.1)$membership)) names(fuzzy2000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy3000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy5000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy6000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy10000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") names(fuzzy12000)<-c("cluster", "memb1", "memb2", "memb3", "memb4") ci<- matrix(,max(nrow(fuzzy2000), nrow(fuzzy3000), nrow(fuzzy5000), nrow(fuzzy6000), nrow(fuzzy10000), nrow(fuzzy12000)),6) for (i in 1: nrow(fuzzy2000)){ci[i,1] <- (sort(t(fuzzy2000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy2000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy3000)){ci[i,2] <- (sort(t(fuzzy3000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy3000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy5000)){ci[i,3] <- (sort(t(fuzzy5000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy5000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy6000)){ci[i,4] <- (sort(t(fuzzy6000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy6000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy10000)){ci[i,5] <- (sort(t(fuzzy10000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy10000[i,2:5]), decreasing = T)[1])} for (i in 1: nrow(fuzzy12000)){ci[i,6] <- (sort(t(fuzzy12000[i,2:5]), decreasing = T)[2] / sort(t(fuzzy12000[i,2:5]), decreasing = T)[1])} memb<- matrix(,max(nrow(fuzzy2000), nrow(fuzzy3000), nrow(fuzzy5000), nrow(fuzzy6000), nrow(fuzzy10000), nrow(fuzzy12000)),6) for (i in 1: nrow(fuzzy2000)){memb[i,1] <- sort(t(fuzzy2000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy3000)){memb[i,2] <- sort(t(fuzzy3000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy5000)){memb[i,3] <- sort(t(fuzzy5000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy6000)){memb[i,4] <- sort(t(fuzzy6000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy10000)){memb[i,5] <- sort(t(fuzzy10000[i,2:5]), decreasing = T)[1]} for (i in 1: nrow(fuzzy12000)){memb[i,6] <- sort(t(fuzzy12000[i,2:5]), decreasing = T)[1]} ci<- as.data.frame(ci) names(ci)<- c("2000", "3000", "5000", "6000", "10000", "12000") memb<- as.data.frame(memb) names(memb)<- c("2000", "3000", "5000", "6000", "10000", "12000") return(list("confusion"=apply(ci, 2, mean, na.rm=T), "membership"=apply(memb, 2, mean, na.rm=T))) } ci.info<- t(data.frame(table.info[,1])) colnames(ci.info)<- c("832", "600", "330", "250", "150", "110") #### 13.05 Sampling design evaluation - statistical test on boostraps #### test.info<-melt(ci.info) names(test.info)<- c("replicate", "info.factor", "ci.value") test.info$info.factor<- as.factor(test.info$info.factor) summary(aov(ci.value ~ info.factor, data = test.info)) TukeyHSD(aov(ci.value ~ info.factor, data = test.info)) test.scale<-melt(ci.scale) names(test.scale)<- c("replicate", "info.factor", "ci.value") test.scale$info.factor<- as.factor(test.scale$info.factor) summary(aov(ci.value ~ info.factor, data = test.scale)) TukeyHSD(aov(ci.value ~ info.factor, data = test.scale)) #### 13.06 Plotting boostraps #### # boxplot of confusion index pdf("/file.pdf", width=4.92,height=3, family="Helvetica", onefile=T, bg="white", compress=T) { layout(matrix(c(1,2), 1, 2, byrow=T), widths=c(3,2.5)) par(cex=.6) par(mar=c(4.5,4.5,1,.2)) boxplot(ci.info, pch=16, col="grey", notch=T, xlab= "Number of stations", ylab="Confusion index", ylim=c(0.05,0.26)) text(1,0.245, "a", cex=1.5, col="red") text(2,0.246, "ab", cex=1.5, col="red") text(3,0.246, "bc", cex=1.5, col="red") text(4,0.245, "c", cex=1.5, col="red") text(5,0.246, "d", cex=1.5, col="red") text(6,0.245, "e", cex=1.5, col="red") rect(.1, 0.25, .8, .3, col = "white", border= "black", lwd=1) text(.53,0.2585, "A", col="black", font=2) box() par(mar=c(4.5,0.2,1,1), new=F) boxplot(ci.scale, pch=16, col="grey", notch=T, ylab="", xlab= "Distance (km)", names=c("2", "3", "5","6", "10", "12"), ylim=c(0.05,0.26), yaxt="n") text(1,0.245, "a", cex=1.5, col="red") text(2,0.245, "a", cex=1.5, col="red") text(3,0.246, "b", cex=1.5, col="red") text(4,0.245, "c", cex=1.5, col="red") text(5,0.246, "d", cex=1.5, col="red") text(6,0.245, "e", cex=1.5, col="red") rect(.1, 0.25, .8, .3, col = "white", border= "black", lwd=1) text(.53,0.2585, "B", col="black", font=2) box() } dev.off() #### end ####