Remote Sensing & Classification

Hurricane Inundation

Lesson 3 with Benoit Parmentier

Introduction to Remote Sensing
Flood Mapping Analyses

####################################  Introduction to Remote Sensing   #######################################
############################  Analyze and map flooding from RITA hurricane  #######################################
#This script performs analyses for the Exercise 5 of the Short Course using reflectance data derived from MODIS.
#The goal is to map flooding from RITA using various reflectance bands from Remote Sensing platforms.
#Additional data is provided including FEMA flood region. 
#
#AUTHORS: Benoit Parmentier                                             
#DATE CREATED: 03/05/2018 
#DATE MODIFIED: 03/31/2018
#Version: 1
#PROJECT: SESYNC and AAG 2018 Geospatial Short Course and workshop preparation
#TO DO:
#
#COMMIT: clean up of code
#
#################################################################################################

###Loading R library and packages                                                      

library(sp) # spatial/geographfic objects and functions
library(rgdal) #GDAL/OGR binding for R with functionalities

rgdal: version: 1.2-18, (SVN revision 718)
 Geospatial Data Abstraction Library extensions to R successfully loaded
 Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
 Path to GDAL shared files: /usr/share/gdal/2.1
 GDAL binary built with GEOS: TRUE 
 Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
 Path to PROJ.4 shared files: (autodetected)
 Linking to sp version: 1.2-7 

library(spdep) #spatial analyses operations, functions etc.

Loading required package: Matrix

Loading required package: spData

To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source'))`

library(gtools) # contains mixsort and other useful functions
library(maptools) # tools to manipulate spatial data

Checking rgeos availability: TRUE

library(parallel) # parallel computation, part of base package no
library(rasterVis) # raster visualization operations

Loading required package: raster

Loading required package: lattice

Loading required package: latticeExtra

Loading required package: RColorBrewer

library(raster) # raster functionalities
library(forecast) #ARIMA forecasting
library(xts) #extension for time series object and analyses

Loading required package: zoo


Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

library(zoo) # time series object and analysis
library(lubridate) # dates functionality


Attaching package: 'lubridate'

The following object is masked from 'package:base':

    date

library(colorRamps) #contains matlab.like color palette
library(rgeos) #contains topological operations

rgeos version: 0.3-26, (SVN revision 560)
 GEOS runtime version: 3.5.1-CAPI-1.9.1 r4246 
 Linking to sp version: 1.2-5 
 Polygon checking: TRUE 

library(sphet) #contains spreg, spatial regression modeling


Attaching package: 'sphet'

The following object is masked from 'package:raster':

    distance

library(BMS) #contains hex2bin and bin2hex, Bayesian methods
library(bitops) # function for bitwise operations
library(foreign) # import datasets from SAS, spss, stata and other sources
library(gdata) #read xls, dbf etc., not recently updated but useful

gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.


Attaching package: 'gdata'

The following objects are masked from 'package:xts':

    first, last

The following objects are masked from 'package:raster':

    resample, trim

The following object is masked from 'package:stats':

    nobs

The following object is masked from 'package:utils':

    object.size

The following object is masked from 'package:base':

    startsWith

library(classInt) #methods to generate class limits
library(plyr) #data wrangling: various operations for splitting, combining data


Attaching package: 'plyr'

The following object is masked from 'package:lubridate':

    here

#library(gstat) #spatial interpolation and kriging methods
library(readxl) #functionalities to read in excel type data
library(psych) #pca/eigenvector decomposition functionalities


Attaching package: 'psych'

The following object is masked from 'package:gtools':

    logit

library(sf) # spatial objects classes

Linking to GEOS 3.5.1, GDAL 2.1.3, proj.4 4.9.2

library(plotrix) #various graphic functions e.g. draw.circle


Attaching package: 'plotrix'

The following object is masked from 'package:psych':

    rescale

###### Functions used in this script

create_dir_fun <- function(outDir,out_suffix=NULL){
  #if out_suffix is not null then append out_suffix string
  if(!is.null(out_suffix)){
    out_name <- paste("output_",out_suffix,sep="")
    outDir <- file.path(outDir,out_name)
  }
  #create if does not exists
  if(!file.exists(outDir)){
    dir.create(outDir)
  }
  return(outDir)
}

#####  Parameters and argument set up ###########

#in_dir_reflectance <- "data/reflectance_RITA"
in_dir_var <- "../data"
out_dir <- "."

#region coordinate reference system
#http://spatialreference.org/ref/epsg/nad83-texas-state-mapping-system/proj4/
CRS_reg <- "+proj=lcc +lat_1=27.41666666666667 +lat_2=34.91666666666666 +lat_0=31.16666666666667 +lon_0=-100 +x_0=1000000 +y_0=1000000 +ellps=GRS80 +datum=NAD83 +units=m +no_defs" 
file_format <- ".tif" # Output format for raster images that are written out.
NA_flag_val <- -9999
out_suffix <-"exercise5_03312018" #output suffix for the files and ouptu folder #PARAM 8
create_out_dir_param <- TRUE 

### Input data files used:
infile_reg_outline <- "new_strata_rita_10282017" # Region outline and FEMA zones
infile_modis_bands_information <- "df_modis_band_info.txt" # MOD09 bands information.
nlcd_2006_filename <- "nlcd_2006_RITA.tif" # NLCD2006 Land cover data aggregated at ~ 1km.
infile_name_nlcd_legend <- "nlcd_legend.txt" #Legend information for 2006 NLCD.
#MOD09 surface reflectance product on 2005-09-22 or day of year 2005265
infile_reflectance_date1 <- "mosaiced_MOD09A1_A2005265__006_reflectance_masked_RITA_reg_1km.tif"
#MOD09 surface reflectance product on 2005-09-30 or day of year 2005273
infile_reflectance_date2 <- "mosaiced_MOD09A1_A2005273__006_reflectance_masked_RITA_reg_1km.tif"

################# START SCRIPT ###############################

### PART I: READ AND PREPARE DATA FOR ANALYSES #######

## First create an output directory

if(is.null(out_dir)){
  out_dir <- dirname(in_dir) #output will be created in the input dir
}

out_suffix_s <- out_suffix #can modify name of output suffix
if(create_out_dir_param==TRUE){
  out_dir <- create_dir_fun(out_dir,out_suffix_s)
  setwd(out_dir)
}else{
  setwd(out_dir) #use previoulsy defined directory
}

##### PART I: DISPLAY AND EXPLORE DATA ##############

###### Read in MOD09 reflectance images before and after Hurrican Rita.
r_before <- brick(file.path(in_dir_var,infile_reflectance_date1)) # Before RITA, Sept. 22, 2005.
r_after <- brick(file.path(in_dir_var,infile_reflectance_date2)) # After RITA, Sept 30, 2005.

plot(r_before) # Note that this is a multibands image.

plot of chunk unnamed-chunk-1

reg_sf <- st_read(file.path(in_dir_var,infile_reg_outline))

Reading layer `new_strata_rita_10282017' from data source `/nfs/public-data/training/new_strata_rita_10282017' using driver `ESRI Shapefile'
Simple feature collection with 2 features and 17 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -93.92921 ymin: 29.47236 xmax: -91.0826 ymax: 30.49052
epsg (SRID):    4269
proj4string:    +proj=longlat +datum=NAD83 +no_defs

reg_sf <- st_transform(reg_sf,
                       crs=CRS_reg)
reg_sp <-as(reg_sf, "Spatial") #Convert to sp object before rasterization

r_ref <- rasterize(reg_sp,
                   r_before,
                   field="OBJECTID_1",
                   fun="first")
plot(r_ref) # zone 2 is flooded and zone 1 is not flooded

plot of chunk unnamed-chunk-1

#### Let's examine a location within FEMA flooded zone and outside: use centroids
centroids_sf <- st_centroid(reg_sf)

df_before <- extract(r_before,centroids_sf)
df_after <- extract(r_after,centroids_sf)

### Plot values of bands before and after for flooded region:
plot(df_before[2,],type="l")
lines(df_after[2,],col="red")

plot of chunk unnamed-chunk-1

## Read band information since it is more informative!!
df_modis_band_info <- read.table(file.path(in_dir_var,infile_modis_bands_information),
                                 sep=",",
                                 stringsAsFactors = F)
print(df_modis_band_info)

  band_name band_number start_wlength end_wlength
     Red           3           620         670
     NIR           4           841         876
    Blue           1           459         479
   Green           2           545         565
   SWIR1           5          1230        1250
   SWIR2           6          1628        1652
   SWIR3           7          2105        2155

names(r_before) <- df_modis_band_info$band_name
names(r_after) <- df_modis_band_info$band_name

### Order band in terms of wavelenth:

df_modis_band_info <- df_modis_band_info[order(df_modis_band_info$start_wlength),]
#SWIR1 (1230–1250 nm), SWIR2 (1628–1652 nm) and SWIR3 (2105–2155 nm).
band_refl_order <- df_modis_band_info$band_number

plot(df_before[2,band_refl_order],type="l",
     xlab="Bands",
     ylab="Reflectance",
     main="Reflectance profile for centroid of flooded area")
lines(df_after[2,band_refl_order],col="red")
names_vals <- c("Before Rita","After Rita")
legend("topleft",legend=names_vals,
       pt.cex=0.7,cex=0.7,col=c("black","red"),
       lty=1, #add circle symbol to line
       bty="n")

plot of chunk unnamed-chunk-1

###############################################
##### PART II: Examine spectral class signatures for land cover NLCD classes ##############

###### Read in NLDC 2006, note that it was aggregated at 1km.
nlcd2006_reg <- raster(file.path(in_dir_var,nlcd_2006_filename))
#### Extract zonal averages by NLCD classes using zonal raster
avg_reflectance_nlcd <- as.data.frame(zonal(r_before,nlcd2006_reg,fun="mean"))

#### read in NLCD legend to add to the average information
lc_legend_df <- read.table(file.path(in_dir_var,infile_name_nlcd_legend),
                           stringsAsFactors = F,
                           sep=",")
print(avg_reflectance_nlcd)

   zone        Red        NIR       Blue      Green      SWIR1      SWIR2
   0 0.03458904 0.01535499 0.03581749 0.05163573 0.01262342 0.01771813
  11 0.07307984 0.06784635 0.06036733 0.07956521 0.05991636 0.04664948
  21 0.07276522 0.28171285 0.04420428 0.07726309 0.31805232 0.23408753
  22 0.08235028 0.27960481 0.05117458 0.08538413 0.30907538 0.23123385
  23 0.10574247 0.26636831 0.06789142 0.10388469 0.29315783 0.23319375
  24 0.12368051 0.24758425 0.08174059 0.11712968 0.27622949 0.23576209
  31 0.09874217 0.13020133 0.07870421 0.09989436 0.12222435 0.09422932
  41 0.05644301 0.26516752 0.03530746 0.06259356 0.28388245 0.18083792
  42 0.03990187 0.27645189 0.02177976 0.04993706 0.28957741 0.17166508
 43 0.03941129 0.28346313 0.02211763 0.05035833 0.30668417 0.18516430
 52 0.04540521 0.27760067 0.02405570 0.05334825 0.30194988 0.19117841
 71 0.05569214 0.27195001 0.03005173 0.06076262 0.30610093 0.20940236
 81 0.07483913 0.27795804 0.04250243 0.07692526 0.32523050 0.24754272
 82 0.08456354 0.27472078 0.04777970 0.08379540 0.31051877 0.24309384
 90 0.03852322 0.23273819 0.02317667 0.04775357 0.25516940 0.15667656
 95 0.06208044 0.19687418 0.03674701 0.06434722 0.21390883 0.15373826
        SWIR3
0.01260556
0.02672107
0.11926512
0.12615517
0.14802151
0.15738439
0.05498403
0.07959769
0.07041540
0.07498740
0.08314305
0.09997039
0.12366634
0.13108070
0.06237482
0.07421595

names(lc_legend_df)

[1] "ID"                         "COUNT"                     
[3] "Red"                        "Green"                     
[5] "Blue"                       "NLCD.2006.Land.Cover.Class"
[7] "Opacity"                   

### Add relevant categories
lc_legend_df_subset <- subset(lc_legend_df,select=c("ID","NLCD.2006.Land.Cover.Class"))
names(lc_legend_df_subset) <- c("ID","cat_name")

avg_reflectance_nlcd <- merge(avg_reflectance_nlcd,lc_legend_df_subset,by.x="zone",by.y="ID",all.y=F)
head(avg_reflectance_nlcd)

  zone        Red        NIR       Blue      Green      SWIR1      SWIR2
  0 0.03458904 0.01535499 0.03581749 0.05163573 0.01262342 0.01771813
 11 0.07307984 0.06784635 0.06036733 0.07956521 0.05991636 0.04664948
 21 0.07276522 0.28171285 0.04420428 0.07726309 0.31805232 0.23408753
 22 0.08235028 0.27960481 0.05117458 0.08538413 0.30907538 0.23123385
 23 0.10574247 0.26636831 0.06789142 0.10388469 0.29315783 0.23319375
 24 0.12368051 0.24758425 0.08174059 0.11712968 0.27622949 0.23576209
       SWIR3                    cat_name
0.01260556                Unclassified
0.02672107                  Open Water
0.11926512       Developed, Open Space
0.12615517    Developed, Low Intensity
0.14802151 Developed, Medium Intensity
0.15738439   Developed, High Intensity

names(avg_reflectance_nlcd)

[1] "zone"     "Red"      "NIR"      "Blue"     "Green"    "SWIR1"   
[7] "SWIR2"    "SWIR3"    "cat_name"

col_ordering <- band_refl_order + 1

plot(as.numeric(avg_reflectance_nlcd[9,col_ordering]),
     type="l",
     col="green",
     xlab="bands",
     ylab="reflectance") #42 evergreen forest
lines(as.numeric(avg_reflectance_nlcd[6,col_ordering]),
      type="l",
      col="darkred") #22 developed,High intensity

plot of chunk unnamed-chunk-1

####### Explore data in Feature space:

#Feature space NIR1 and Red
plot(r_before$Red, r_before$NIR)

plot of chunk unnamed-chunk-1

plot(r_before$Green,r_before$Red)

plot of chunk unnamed-chunk-1

plot(r_before$SWIR1,r_before$NIR)

plot of chunk unnamed-chunk-1

plot(r_before$Red,r_before$SWIR1)

plot of chunk unnamed-chunk-1

df_r_before_nlcd <- as.data.frame(stack(r_before,nlcd2006_reg))
head(df_r_before_nlcd)

         Red       NIR       Blue      Green     SWIR1     SWIR2
0.03599200 0.2599244 0.02133671 0.04703750 0.2810231 0.1553841
0.02928869 0.2715889 0.01696460 0.04409311 0.2720489 0.1396950
0.02671438 0.2838178 0.01695321 0.04189149 0.2874987 0.1507025
0.02691096 0.2825994 0.01606729 0.04149844 0.2870458 0.1528974
0.04274514 0.2800327 0.02311076 0.05212086 0.3009017 0.1827510
0.03583299 0.2768207 0.02156925 0.04826682 0.2877768 0.1600456
       SWIR3 nlcd_2006_RITA
0.05624928             NA
0.04715122             NA
0.04982105             NA
0.05246148             NA
0.07394192             NA
0.05998935             NA

#Evergreen Forest:
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==42)
plot(df_subset$Green,
     df_subset$Red,
     col="green",
     cex=0.15,
     xlab="Green",
     ylab="Red",
     main="Green-Red feature space")

#Urban: dense
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==22)
points(df_subset$Green,
       df_subset$Red,
       col="darkred",
       cex=0.15)

#Water: 11 for NLCD
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==22)
points(df_subset$Green,
       df_subset$Red,
       col="blue",
       cex=0.15)

title("Feature space Green-Red")
names_vals <- c("Evergreen Forest","Developed High Intensity","Water")
legend("topright",legend=names_vals,
       pt.cex=0.7,cex=0.7,col=c("green","darkred","blue"),
       pch=1, #add circle symbol to line
       bty="n")

plot of chunk unnamed-chunk-1

#### Feature space Red and NIR

#Evergreen Forest:
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==42)
plot(df_subset$Red,
     df_subset$NIR,
     col="green",
     cex=0.15,
     xlab="Red",
     ylab="NIR")

#Developed, High Intensity
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==22)
points(df_subset$Red,
       df_subset$NIR,
       col="darkred",
       cex=0.15)

#Water
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==11)
points(df_subset$Red,
       df_subset$NIR,
       col="blue",
       cex=0.15)

title("Feature space Red-NIR")
names_vals <- c("Evergreen Forest","Developed High Intensity","Water")
legend("topright",legend=names_vals,
       pt.cex=0.7,cex=0.7,col=c("green","darkred","blue"),
       pch=1, #add circle symbol to line
       bty="n")

plot of chunk unnamed-chunk-1

#### Feature space Blue and Red

#Water:
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==11)
plot(df_subset$Blue,
       df_subset$Red,
       col="blue",
       cex=0.15,
       xlab="Blue",
       ylab="Red")

#Evergreen Forest:
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==42)
points(df_subset$Blue,
       df_subset$Red,
       col="green",
       cex=0.15)

#Urban dense:
df_subset <- subset(df_r_before_nlcd,nlcd_2006_RITA==22)
points(df_subset$Blue,
       df_subset$Red,
       col="darkred",
       cex=0.15)

title("Feature space Blue-Red")
names_vals <- c("Evergreen Forest","Developed High Intensity","Water")
legend("topright",legend=names_vals,
       pt.cex=0.7,cex=0.7,col=c("green","darkred","blue"),
       pch=1, #add circle symbol to line
       bty="n")

plot of chunk unnamed-chunk-1

#### Generate Color composites
### True color composite

plotRGB(r_before,
        r=1,
        g=4,
        b=3,
        scale=0.6,
        stretch="hist")

plot of chunk unnamed-chunk-1

### False color composite:

plotRGB(r_before,
        r=2,
        g=1,
        b=4,
        scale=0.6,
        stretch="hist")

plot of chunk unnamed-chunk-1

plotRGB(r_after,
        r=2,
        g=1,
        b=4,
        scale=0.6,
        stretch="hist")

plot of chunk unnamed-chunk-1

### Note the effect of flooding is particularly visible in the false color composites.
### Let's examine different band combination to enhance features in the image, including flooded areas.

###############################################
##### PART III: Band combination: Indices and thresholding for flood mapping ##############

### NIR experiment with threshold to  map water/flooding

plot(subset(r_before,"NIR"))

plot of chunk unnamed-chunk-1

plot(subset(r_after,"NIR"))

plot of chunk unnamed-chunk-1

### Lower NIR often correlates to areas with high water fraction or inundated:
r_rec_NIR_before <- subset(r_before,"NIR") < 0.2
r_rec_NIR_after <- subset(r_after,"NIR") < 0.2

### THis is suggesting flooding!!!
plot(r_rec_NIR_before, main="Before RITA, NIR > 0.2")

plot of chunk unnamed-chunk-1

plot(r_rec_NIR_after, main="After RITA, NIR > 0.2")

plot of chunk unnamed-chunk-1

#Compare to actual flooding data
freq_fema_zones <- as.data.frame(freq(r_ref))
xtab_threshold <- crosstab(r_ref,r_rec_NIR_after,long=T)

## % overlap between the flooded area and values below 0.2 in NIR
(xtab_threshold[5,3]/freq_fema_zones[2,2])*100 #agreement with FEMA flooded area in %.

[1] 76.17498

############## Generating indices based on raster algebra of original bands
## Let's generate a series of indices, we list a few possibility from the literature.
#1) NDVI = (NIR - Red)/(NIR+Red)
#2) NDWI = (Green - NIR)/(Green + NIR)
#3) MNDWI = Green - SWIR2 / Green + SWIR2
#4) NDWI2 (LSWIB5) =  (NIR - SWIR1)/(NIR + SWIR1)
#5) LSWI (LSWIB5) =  (NIR - SWIR2)/(NIR + SWIR2)

names(r_before)

[1] "Red"   "NIR"   "Blue"  "Green" "SWIR1" "SWIR2" "SWIR3"

r_before_NDVI <- (r_before$NIR - r_before$Red) / (r_before$NIR + r_before$Red)
r_after_NDVI <- subset(r_after,"NIR") - subset(r_after,"Red")/(subset(r_after,"NIR") + subset(r_after,"Red"))

plot(r_before_NDVI,zlim=c(-1,1),col=matlab.like(255))

plot of chunk unnamed-chunk-1

plot(r_after_NDVI,zlim=c(-1,1),col=matlab.like2(255))

plot of chunk unnamed-chunk-1

### Experiment with different threshold values:
# THis is suggesting flooding!!!

histogram(r_before_NDVI) # Examine histogram to look for potential threshold values

plot of chunk unnamed-chunk-1

histogram(r_after_NDVI) # Examine histogram to look for poential threshold values

plot of chunk unnamed-chunk-1

plot(r_before_NDVI < -0.5)

plot of chunk unnamed-chunk-1

plot(r_after_NDVI < -0.5)

plot of chunk unnamed-chunk-1

plot(r_before_NDVI < -0.1)

plot of chunk unnamed-chunk-1

plot(r_after_NDVI < -0.1)

plot of chunk unnamed-chunk-1

#2) NDWI = (Green - NIR)/(Green + NIR)
#3) MNDWI = Green - SWIR2 / Green + SWIR2

#Modified NDWI known as MNDWI : Green - SWIR2/(Green + SWIR2)

#According to Gao (1996), NDWI is a good indicator for vegetation liquid water content and 
# is less sensitive to atmospheric scattering effects than NDVI. In some studies, MODIS band 6 
# is used for the NDWI calculation, because it is sensitive to water types and contents 
# (Li et al., 2011), while band 5 is sensitive to vegetation liquid water content (Gao, 1996).

r_before_MNDWI <- (r_before$Green - r_before$SWIR2) / (r_before$Green + r_before$SWIR2)
r_after_MNDWI <- (r_after$Green - r_after$SWIR2) / (r_after$Green + r_after$SWIR2)

plot(r_before_MNDWI,zlim=c(-1,1),col=rev(matlab.like(255)))

plot of chunk unnamed-chunk-1

plot(r_after_MNDWI,zlim=c(-1,1),col=rev(matlab.like(255)))

plot of chunk unnamed-chunk-1

### THis is suggesting flooding!!!
plot(r_before_MNDWI > 0.5)

plot of chunk unnamed-chunk-1

plot(r_after_MNDWI > 0.5)

plot of chunk unnamed-chunk-1

plot(r_before_MNDWI > 0.1)

plot of chunk unnamed-chunk-1

plot(r_after_MNDWI > 0.1)

plot of chunk unnamed-chunk-1

r_diff_MNDWI <- r_after_MNDWI - r_before_MNDWI
plot(r_diff_MNDWI)

plot of chunk unnamed-chunk-1

plot(r_diff_MNDWI > 0.2) ## Increase in MNDWI: Areas that may have become flooded

plot of chunk unnamed-chunk-1

#Standardize image:
mean_val <- cellStats(r_diff_MNDWI,"mean")
sd_val <- cellStats(r_diff_MNDWI,"sd")

r_diff_std <- (r_diff_MNDWI - mean_val)/ sd_val
plot(r_diff_std)

plot of chunk unnamed-chunk-1

histogram(r_diff_std) 

plot of chunk unnamed-chunk-1

plot(r_diff_std > 2) ## increase in MNDWI 2 standard deiation away

plot of chunk unnamed-chunk-1

plot(r_diff_std > 1) ## increase in MNDWI 2 standard deiation away

plot of chunk unnamed-chunk-1

### Generate a map of flooding with MNDWI and compare to FEMA map:

r_after_flood <- r_diff_std > 1 
plot(r_after_flood)

#reclass in zero/1!!!

df <- data.frame(id=c(1,2), v=c(0,1))
r_ref_rec <- subs(r_ref, df)
plot(r_ref_rec)

plot of chunk unnamed-chunk-1

xtab_tb <- crosstab(r_after_flood,r_ref_rec)

## Examine overlap between flooded area and FEMA:

plot(r_after_flood)

plot of chunk unnamed-chunk-1

r_overlap <- r_ref_rec * r_after_flood
plot(r_overlap) ## Flood map area of aggreement with FEMA

plot of chunk unnamed-chunk-1

###############################################
##### PART IV: Principal Component Analysis and band transformation
## Another avenue to combine original bands into indices is to use linear transformation.
## We examine Principal Component Analysis and the Tassel Cap Transform.

#Correlate long term mean to PC!
cor_mat_layerstats <- layerStats(r_before, 'pearson', na.rm=T)
cor_matrix <- cor_mat_layerstats$`pearson correlation coefficient`
class(cor_matrix)

[1] "matrix"

print(cor_matrix) #note size is 7x7

            Red       NIR         Blue     Green        SWIR1     SWIR2
Red   1.0000000 0.2398870  0.903113075 0.9707865  0.256771113 0.4605483
NIR   0.2398870 1.0000000  0.013412596 0.2318272  0.978567657 0.8911699
Blue  0.9031131 0.0134126  1.000000000 0.9498225 -0.002725252 0.1571903
Green 0.9707865 0.2318272  0.949822516 1.0000000  0.218488306 0.3842003
SWIR1 0.2567711 0.9785677 -0.002725252 0.2184883  1.000000000 0.9474207
SWIR2 0.4605483 0.8911699  0.157190340 0.3842003  0.947420706 1.0000000
SWIR3 0.6443259 0.7292226  0.343071136 0.5438923  0.805069686 0.9448527
          SWIR3
Red   0.6443259
NIR   0.7292226
Blue  0.3430711
Green 0.5438923
SWIR1 0.8050697
SWIR2 0.9448527
SWIR3 1.0000000

pca_mod <- principal(cor_matrix,nfactors=7,rotate="none")
class(pca_mod$loadings)

[1] "loadings"

print(pca_mod$loadings)

Loadings:
      PC1    PC2    PC3    PC4    PC5    PC6    PC7   
Red    0.774  0.616                                   
NIR    0.779 -0.556  0.282                            
Blue   0.551  0.812  0.140  0.131                     
Green  0.734  0.665  0.101                            
SWIR1  0.806 -0.576  0.123                            
SWIR2  0.905 -0.403 -0.121                            
SWIR3  0.933 -0.162 -0.315                            

                 PC1   PC2   PC3   PC4   PC5   PC6   PC7
SS loadings    4.387 2.310 0.246 0.034 0.011 0.010 0.003
Proportion Var 0.627 0.330 0.035 0.005 0.002 0.001 0.000
Cumulative Var 0.627 0.957 0.992 0.997 0.998 1.000 1.000

plot(pca_mod$loadings[,1][band_refl_order],type="b",
     xlab="time steps",
     ylab="PC loadings",
     ylim=c(-1,1),
     col="blue")
lines(-1*(pca_mod$loadings[,2][band_refl_order]),type="b",col="red")
lines(pca_mod$loadings[,3][band_refl_order],type="b",col="black")
title("Loadings for the first three components using T-mode")

##Make this a time series
loadings_df <- as.data.frame(pca_mod$loadings[,1:7])
title("Loadings for the first three components using T-mode")
names_vals <- c("pc1","pc2","pc3")
legend("topright",legend=names_vals,
       pt.cex=0.8,cex=1.1,col=c("blue","red","black"),
       lty=c(1,1), # set legend symbol as lines
       pch=1, #add circle symbol to line
       lwd=c(1,1),bty="n")

plot of chunk unnamed-chunk-1

## Add scree plot
plot(pca_mod$values,main="Scree plot: Variance explained",type="b")

plot of chunk unnamed-chunk-1

### Generate scores from eigenvectors
### Using predict function: this is recommended for raster.
r_pca <- predict(r_before, pca_mod, index=1:7,filename="pc_scores.tif",overwrite=T) # fast
plot(r_pca,y=2,zlim=c(-2,2))

plot of chunk unnamed-chunk-1

plot(r_pca,y=1,zlim=c(-2,2))

plot of chunk unnamed-chunk-1

plot(r_pca,y=3,zlim=c(-2,2))

plot of chunk unnamed-chunk-1

plot(subset(r_pca,1),subset(r_pca,2))

plot of chunk unnamed-chunk-1

plot(subset(r_pca,2),subset(r_pca,3))

plot of chunk unnamed-chunk-1

#### Generate a plot for PCA with loadings and compare to Tassel Cap

var_labels <- rownames(loadings_df)

plot(loadings_df[,1],loadings_df[,2],
     type="p",
     pch = 20,
     col ="blue",
     xlab=names(loadings_df)[1],
     ylab=names(loadings_df)[2],
     ylim=c(-1,1),
     xlim=c(-1,1),
     axes = FALSE,
     cex.lab = 1.2)
axis(1, at=seq(-1,1,0.2),cex=1.2)
axis(2, las=1,at=seq(-1,1,0.2),cex=1.2) # "1' for side=below, the axis is drawned  on the right at location 0 and 1

box()    #This draws a box...

title(paste0("Loadings for component ", names(loadings_df)[1]," and " ,names(loadings_df)[2] ))
draw.circle(0,0,c(1.0,1.0),nv=500)#,border="purple",
text(loadings_df[,1],loadings_df[,2],var_labels,pos=1,cex=1)            
grid(2,2)

plot of chunk unnamed-chunk-1

##### plot feature space:

df_raster_val <- as.data.frame(stack(r_after,r_pca,nlcd2006_reg))
head(df_raster_val)

         Red       NIR       Blue      Green     SWIR1     SWIR2
0.04558113 0.3027903 0.02379627 0.05476730 0.3318335 0.1902746
0.04460510 0.3043112 0.02354558 0.05586702 0.3138186 0.1842244
0.04348749 0.2998908 0.02374887 0.05381133 0.3123330 0.1855501
0.04507260 0.3017146 0.02426019 0.05503851 0.3193893 0.1913280
0.04715690 0.3239896 0.02511090 0.05962222 0.3417773 0.1930779
0.03485106 0.2683164 0.01941371 0.04450204 0.2855803 0.1609111
       SWIR3 pc_scores.1 pc_scores.2 pc_scores.3 pc_scores.4 pc_scores.5
0.07314926  -0.7644424  -0.1749840   0.4895376  -0.3940996  -0.7156922
0.06792310  -0.9065449  -0.3173643   0.9340215  -1.2679656  -3.0844703
0.07248760  -0.7900366  -0.5316336   1.2609810  -0.8686562  -2.8946190
0.07654412  -0.7877789  -0.5503042   1.1144350  -0.9439425  -2.8750505
0.07032830  -0.3890644  -0.3253821   0.6932408  -0.6264383  -1.0684590
0.05962812  -0.6346722  -0.3010963   0.9234368  -0.6627523  -2.1239798
  pc_scores.6 pc_scores.7 nlcd_2006_RITA
 0.9251045  1.48893476             NA
 1.4692621  0.02715476             NA
 1.9787420 -0.02099684             NA
 1.8841606 -0.05822323             NA
 1.0431399  0.46276042             NA
 1.4554931  0.37810266             NA

#Water
plot(df_raster_val[df_raster_val$nlcd_2006_RITA==11,c("pc_scores.1")],
     df_raster_val[df_raster_val$nlcd_2006_RITA==11,c("pc_scores.2")],
     col="blue",cex=0.15,
     ylim=c(-2,2),
     xlim=c(-2,2))

#Urban: dense
points(df_raster_val[df_raster_val$nlcd_2006_RITA==22,c("pc_scores.1")],
       df_raster_val[df_raster_val$nlcd_2006_RITA==22,c("pc_scores.2")],
       col="brown",cex=0.15)

#Forest:
points(df_raster_val[df_raster_val$nlcd_2006_RITA==42,c("pc_scores.1")],
       df_raster_val[df_raster_val$nlcd_2006_RITA==42,c("pc_scores.2")],
       col="green",cex=0.15)

plot of chunk unnamed-chunk-1

### Note the negative values are related to Forest and positve to water on PC2
plot(r_pca$pc_scores.2 > 0.1) #water related?

plot of chunk unnamed-chunk-1

plot(r_pca$pc_scores.2 < - 0.1) #vegetation related?

plot of chunk unnamed-chunk-1

#### Examine relationship with Tasselcap transform adapted for MODIS:
#6) TCWI =  0.10839 * Red+ 0.0912 * NIR +0.5065 * Blue+ 0.404 * Green 
#            - 0.241 * SWIR1- 0.4658 * SWIR2-
#           0.5306 * SWIR3
#7) TCBI = 0.3956 * Red + 0.4718 * NIR +0.3354 * Blue+ 0.3834 * Green
#           + 0.3946 * SWIR1 + 0.3434 * SWIR2+ 0.2964 * SWIR3

r_TCWI =  0.10839 * r_before$Red + 0.0912 * r_before$NIR +0.5065 * r_before$Blue+ 0.404 * r_before$Green 
- 0.241 * r_before$SWIR1- 0.4658 * r_before$SWIR2 - 0.5306 * r_before$SWIR3

class       : RasterLayer 
dimensions  : 116, 298, 34568  (nrow, ncol, ncell)
resolution  : 926.6254, 926.6254  (x, y)
extent      : 1585224, 1861359, 842226.7, 949715.2  (xmin, xmax, ymin, ymax)
coord. ref. : +proj=lcc +lat_1=27.41666666666667 +lat_2=34.91666666666666 +lat_0=31.16666666666667 +lon_0=-100 +x_0=1000000 +y_0=1000000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs 
data source : in memory
names       : layer 
values      : -0.5969822, -0.00232494  (min, max)

r_TCBI = 0.3956 * r_before$Red + 0.4718 * r_before$NIR +0.3354 * r_before$Blue+ 0.3834 * r_before$Green
          + 0.3946 * r_before$SWIR1 + 0.3434 * r_before$SWIR2+ 0.2964 * r_before$SWIR3

class       : RasterLayer 
dimensions  : 116, 298, 34568  (nrow, ncol, ncell)
resolution  : 926.6254, 926.6254  (x, y)
extent      : 1585224, 1861359, 842226.7, 949715.2  (xmin, xmax, ymin, ymax)
coord. ref. : +proj=lcc +lat_1=27.41666666666667 +lat_2=34.91666666666666 +lat_0=31.16666666666667 +lon_0=-100 +x_0=1000000 +y_0=1000000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs 
data source : in memory
names       : layer 
values      : 0.00172096, 0.4989139  (min, max)

#r_TCWI <- r_TCWI*10
histogram(r_TCWI)

plot of chunk unnamed-chunk-1

plot(r_TCWI)

plot of chunk unnamed-chunk-1

plot(r_TCWI,zlim=c(0,0.2))

plot of chunk unnamed-chunk-1

plot(r_TCWI>0.1)

plot of chunk unnamed-chunk-1

histogram(r_TCBI)

plot of chunk unnamed-chunk-1

plot(r_TCBI)

plot of chunk unnamed-chunk-1

plot(r_TCBI,zlim=c(0,0.2))

plot of chunk unnamed-chunk-1

r_stack <- stack(r_TCWI,r_TCBI, r_pca)
names(r_stack) <- c("TCWI","TCBI",names(r_pca))
cor_r_TC <- layerStats(r_stack,"pearson",na.rm=T)
cor_r_TC <- as.data.frame(cor_r_TC$`pearson correlation coefficient`)
print(cor_r_TC)

                  TCWI         TCBI pc_scores.1  pc_scores.2 pc_scores.3
TCWI        1.00000000 0.8283253068  0.83971868  0.548005396  0.26578247
TCBI        0.82832531 1.0000000000  0.87114254  0.106891136  0.28142299
pc_scores.1 0.83971868 0.8711425415  1.00000000  0.082280569  0.13336455
pc_scores.2 0.54800540 0.1068911362  0.08228057  1.000000000  0.02282614
pc_scores.3 0.26578247 0.2814229943  0.13336455  0.022826135  1.00000000
pc_scores.4 0.13531034 0.1067056440  0.25180545 -0.008046805 -0.26120385
pc_scores.5 0.06877618 0.0001461868  0.14499957  0.078513273 -0.28695036
pc_scores.6 0.14641088 0.1400215251  0.06428065  0.070531136  0.32528627
pc_scores.7 0.03469429 0.0428780431  0.11454351 -0.114043686  0.03411671
             pc_scores.4   pc_scores.5 pc_scores.6 pc_scores.7
TCWI         0.135310342  0.0687761767  0.14641088  0.03469429
TCBI         0.106705644  0.0001461868  0.14002153  0.04287804
pc_scores.1  0.251805447  0.1449995703  0.06428065  0.11454351
pc_scores.2 -0.008046805  0.0785132730  0.07053114 -0.11404369
pc_scores.3 -0.261203854 -0.2869503646  0.32528627  0.03411671
pc_scores.4  1.000000000  0.2583954010 -0.18593196  0.12719391
pc_scores.5  0.258395401  1.0000000000 -0.31388314  0.35960982
pc_scores.6 -0.185931965 -0.3138831416  1.00000000  0.05074089
pc_scores.7  0.127193911  0.3596098185  0.05074089  1.00000000

##### Plot on the loading space using TCBI and TCWI as supplementary variables
var_labels <- rownames(loadings_df)

plot(loadings_df[,1],loadings_df[,2],
     type="p",
     pch = 20,
     col ="blue",
     xlab=names(loadings_df)[1],
     ylab=names(loadings_df)[2],
     ylim=c(-1,1),
     xlim=c(-1,1),
     axes = FALSE,
     cex.lab = 1.2)

points(cor_r_TC$pc_scores.1[1],cor_r_TC$pc_scores.2[1],col="red")
points(cor_r_TC$pc_scores.1[2],cor_r_TC$pc_scores.2[2],col="green")

axis(1, at=seq(-1,1,0.2),cex=1.2)
axis(2, las=1,at=seq(-1,1,0.2),cex=1.2) # "1' for side=below, the axis is drawned  on the right at location 0 and 1
box()    #This draws a box...

title(paste0("Loadings for component ", names(loadings_df)[1]," and " ,names(loadings_df)[2] ))
draw.circle(0,0,c(1.0,1.0),nv=500)#,border="purple",
text(loadings_df[,1],loadings_df[,2],var_labels,pos=1,cex=1)            
text(cor_r_TC$pc_scores.1[1],cor_r_TC$pc_scores.2[1],"TCWI",pos=1,cex=1)
text(cor_r_TC$pc_scores.1[2],cor_r_TC$pc_scores.2[2],"TCBI",pos=1,cex=1)
grid(2,2)

plot of chunk unnamed-chunk-1

########################## End of Script ###################################

Top of Section

####################################    Flood Mapping Analyses   #######################################
############################  Analyze and map flooding from RITA hurricane  #######################################
#This script performs analyses for the Exercise 6 of the Short Course using reflectance data derived from MODIS.
#The goal is to map flooding from RITA using various reflectance bands from Remote Sensing platforms.
#Additional data is provided including FEMA flood region. 
#
#AUTHORS: Benoit Parmentier                                             
#DATE CREATED: 03/13/2018 
#DATE MODIFIED: 04/01/2018
#Version: 1
#PROJECT: SESYNC and AAG 2018 workshop/Short Course preparation
#TO DO:
#
#COMMIT: setting up input data
#
#################################################################################################

###Loading R library and packages                                                      

library(sp) # spatial/geographfic objects and functions
library(rgdal) #GDAL/OGR binding for R with functionalities
library(spdep) #spatial analyses operations, functions etc.
library(gtools) # contains mixsort and other useful functions
library(maptools) # tools to manipulate spatial data
library(parallel) # parallel computation, part of base package no
library(rasterVis) # raster visualization operations
library(raster) # raster functionalities
library(forecast) #ARIMA forecasting
library(xts) #extension for time series object and analyses
library(zoo) # time series object and analysis
library(lubridate) # dates functionality
library(colorRamps) #contains matlab.like color palette
library(rgeos) #contains topological operations
library(sphet) #contains spreg, spatial regression modeling
library(BMS) #contains hex2bin and bin2hex, Bayesian methods
library(bitops) # function for bitwise operations
library(foreign) # import datasets from SAS, spss, stata and other sources
#library(gdata) #read xls, dbf etc., not recently updated but useful
library(classInt) #methods to generate class limits
library(plyr) #data wrangling: various operations for splitting, combining data
#library(gstat) #spatial interpolation and kriging methods
library(readxl) #functionalities to read in excel type data
library(psych) #pca/eigenvector decomposition functionalities
library(sf)
library(plotrix) #various graphic functions e.g. draw.circle
library(nnet)
library(rpart)
library(e1071)


Attaching package: 'e1071'

The following object is masked from 'package:raster':

    interpolate

The following object is masked from 'package:gtools':

    permutations

library(caret)

Loading required package: ggplot2


Attaching package: 'ggplot2'

The following objects are masked from 'package:psych':

    %+%, alpha

The following object is masked from 'package:forecast':

    autolayer

The following object is masked from 'package:latticeExtra':

    layer

The following object is masked from 'package:raster':

    calc

###### Functions used in this script

create_dir_fun <- function(outDir,out_suffix=NULL){
  #if out_suffix is not null then append out_suffix string
  if(!is.null(out_suffix)){
    out_name <- paste("output_",out_suffix,sep="")
    outDir <- file.path(outDir,out_name)
  }
  #create if does not exists
  if(!file.exists(outDir)){
    dir.create(outDir)
  }
  return(outDir)
}

#Used to load RData object saved within the functions produced.
load_obj <- function(f){
  env <- new.env()
  nm <- load(f, env)[1]
  env[[nm]]
}

#####  Parameters and argument set up ###########

in_dir_var <- "../data"
out_dir <- "."

#region coordinate reference system
#http://spatialreference.org/ref/epsg/nad83-texas-state-mapping-system/proj4/
CRS_reg <- "+proj=lcc +lat_1=27.41666666666667 +lat_2=34.91666666666666 +lat_0=31.16666666666667 +lon_0=-100 +x_0=1000000 +y_0=1000000 +ellps=GRS80 +datum=NAD83 +units=m +no_defs" 
file_format <- ".tif" #PARAM5
NA_flag_val <- -9999 #PARAM6
out_suffix <-"exercise6_03312018" #output suffix for the files and ouptu folder #PARAM 8
create_out_dir_param=TRUE #PARAM9

#ARG9
#local raster name defining resolution, extent
infile_RITA_reflectance_date2 <- "mosaiced_MOD09A1_A2005273__006_reflectance_masked_RITA_reg_1km.tif"
infile_reg_outline <- "new_strata_rita_10282017" # Region outline and FEMA zones
infile_modis_bands_information <- "df_modis_band_info.txt" # MOD09 bands information.
nlcd_2006_filename <- "nlcd_2006_RITA.tif" # NLCD2006 Land cover data aggregated at ~ 1km.

###########################  START SCRIPT  ##############################

####### SET UP OUTPUT DIRECTORY #######

## First create an output directory

if(is.null(out_dir)){
  out_dir <- dirname(in_dir) #output will be created in the input dir
}

out_suffix_s <- out_suffix #can modify name of output suffix
if(create_out_dir_param==TRUE){
  out_dir <- create_dir_fun(out_dir,out_suffix_s)
  setwd(out_dir)
}else{
  setwd(out_dir) #use previoulsy defined directory
}

#####################################
##### PART I: DISPLAY AND EXPLORE DATA ##############

#### MOD09 raster image after hurricane Rita
r_after <- brick(file.path(in_dir_var,infile_RITA_reflectance_date2))

## Read band information since it is more informative!!
df_modis_band_info <- read.table(file.path(in_dir_var,infile_modis_bands_information),
                                 sep=",",
                                 stringsAsFactors = F)
print(df_modis_band_info)

  band_name band_number start_wlength end_wlength
     Red           3           620         670
     NIR           4           841         876
    Blue           1           459         479
   Green           2           545         565
   SWIR1           5          1230        1250
   SWIR2           6          1628        1652
   SWIR3           7          2105        2155

df_modis_band_info$band_number <- c(3,4,1,2,5,6,7)
write.table(df_modis_band_info,file.path(in_dir_var,infile_modis_bands_information),
            sep=",")

band_refl_order <- df_modis_band_info$band_number

names(r_after) <- df_modis_band_info$band_name

## Use subset instead of $ if you want to wrap code into function
r_after_MNDWI <- (subset(r_after,"Green") - subset(r_after,"SWIR2")) / (subset(r_after,"Green") + subset(r_after,"SWIR2"))
plot(r_after_MNDWI,zlim=c(-1,1))

plot of chunk unnamed-chunk-1

r_after_NDVI <- (subset(r_after,"NIR") - subset(r_after,"Red")) / (subset(r_after,"NIR") + subset(r_after,"Red"))
plot(r_after_NDVI)

plot of chunk unnamed-chunk-1

data_type_str <- dataType(r_after_NDVI)

NAvalue(r_after_NDVI) <- NA_flag_val
out_filename <- paste("ndvi_post_Rita","_",out_suffix,file_format,sep="")
out_filename <- file.path(out_dir,out_filename)
writeRaster(r_after_NDVI,
            filename=out_filename,
            datatype=data_type_str,
            overwrite=T)

Error in .local(x, filename, ...): Attempting to write a file to a path that does not exist:
  ./output_exercise6_03312018

NAvalue(r_after_MNDWI) <- NA_flag_val
out_filename <- paste("mndwi_post_Rita","_",out_suffix,file_format,sep="")
out_filename <- file.path(out_dir,out_filename)
writeRaster(r_after_MNDWI,
            filename=out_filename,
            datatype=data_type_str,
            overwrite=T)

Error in .local(x, filename, ...): Attempting to write a file to a path that does not exist:
  ./output_exercise6_03312018

#training_data_sf <- st_read("training1.shp")
#1) vegetation and other (small water fraction)
#2) Flooded vegetation
#3) Flooded area, or water (lake etc)

class1_data_sf <- st_read(file.path(in_dir_var,"class1_sites"))

Reading layer `class1_sites' from data source `/nfs/public-data/training/class1_sites' using driver `ESRI Shapefile'
Simple feature collection with 6 features and 2 fields
geometry type:  POLYGON
dimension:      XY
bbox:           xmin: 1588619 ymin: 859495.9 xmax: 1830803 ymax: 941484.1
epsg (SRID):    NA
proj4string:    NA

class2_data_sf <- st_read(file.path(in_dir_var,"class2_sites"))

Reading layer `class2_sites' from data source `/nfs/public-data/training/class2_sites' using driver `ESRI Shapefile'
Simple feature collection with 8 features and 2 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: 1605433 ymin: 848843.8 xmax: 1794265 ymax: 884027.8
epsg (SRID):    NA
proj4string:    NA

class3_data_sf <- st_read(file.path(in_dir_var,"class3_sites"))

Reading layer `class3_sites' from data source `/nfs/public-data/training/class3_sites' using driver `ESRI Shapefile'
Simple feature collection with 9 features and 2 fields
geometry type:  POLYGON
dimension:      XY
bbox:           xmin: 1587649 ymin: 845271.3 xmax: 1777404 ymax: 891073.1
epsg (SRID):    NA
proj4string:    NA

### combine object, note they should be in the same projection system
class_data_sf <- rbind(class1_data_sf,class2_data_sf,class3_data_sf)
class_data_sf$poly_ID <- 1:nrow(class_data_sf) #unique ID for each polygon
nrow(class_data_sf) # 23 different polygons used at ground truth data

[1] 23

class_data_sp <- as(class_data_sf,"Spatial")

r_x <- init(r_after,"x") #raster with coordinates x
r_y <- init(r_after,"x") #raster with coordiates y

r_stack <- stack(r_x,r_y,r_after,r_after_NDVI,r_after_MNDWI)
names(r_stack) <- c("x","y","Red","NIR","Blue","Green","SWIR1","SWIR2","SWIR3","NDVI","MNDWI")
pixels_extracted_df <- extract(r_stack,class_data_sp,df=T)

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

Warning in merge.data.frame(as.data.frame(x), as.data.frame(y), ...):
column name 'x' is duplicated in the result

dim(pixels_extracted_df) #We have 1547 pixels extracted

[1] 1547   12

class_data_df <- class_data_sf
st_geometry(class_data_df) <- NULL #this will coerce the sf object into a data.frame

pixels_df <- merge(pixels_extracted_df,class_data_df,by.x="ID",by.y="poly_ID")

head(pixels_df)

  ID       x       y        Red       NIR       Blue      Green     SWIR1
1 1615340 1615340 0.05304649 0.3035397 0.02652384 0.05662384 0.3101271
1 1614413 1614413 0.05782417 0.3295924 0.02751791 0.06305390 0.3453724
1 1615340 1615340 0.05735013 0.3037102 0.02596963 0.05654212 0.3176476
1 1616266 1616266 0.05805403 0.3081988 0.02711912 0.05750944 0.3229090
1 1617193 1617193 0.04147716 0.3058677 0.01839499 0.05076184 0.3003513
1 1618120 1618120 0.04209801 0.3089238 0.01832494 0.05200353 0.2943777
      SWIR2      SWIR3      NDVI      MNDWI record_ID class_ID
0.1936452 0.07864757 0.7024759 -0.5474962         1        1
0.2102322 0.08679578 0.7014884 -0.5385502         1        1
0.2073480 0.08732812 0.6823238 -0.5714722         1        1
0.2079238 0.08741243 0.6829839 -0.5666749         1        1
0.1692552 0.06590501 0.7611759 -0.5385644         1        1
0.1697936 0.06790353 0.7601402 -0.5310712         1        1

table(pixels_df$class_ID) # count by class of pixels ground truth data

  1   2   3 
508 285 754 

######## Examining sites data used for the classification

#Water
x_range <- range(pixels_df$Green,na.rm=T)
y_range <- range(pixels_df$NIR,na.rm=T)

###Add legend?
plot(NIR~Green,xlim=x_range,ylim=y_range,cex=0.2,col="blue",subset(pixels_df,class_ID==1))
points(NIR~Green,col="green",cex=0.2,subset(pixels_df,class_ID==2))
points(NIR~Green,col="red",cex=0.2,subset(pixels_df,class_ID==3))
names_vals <- c("water class 1","water class 2","water class 3")
legend("topleft",legend=names_vals,
       pt.cex=0.7,cex=0.7,col=c("blue","green","red"),
       pch=20, #add circle symbol to line
       bty="n")

plot of chunk unnamed-chunk-1

## Let's use a palette that reflects wetness or level of water 
col_palette <- c("green","blue","darkblue")
plot(NDVI ~ MNDWI,
     xlim=c(-1,1),ylim=c(-1,1),
     cex=0.2,
     col=col_palette[1],
     subset(pixels_df,class_ID==1))

points(NDVI ~ MNDWI,
       cex=0.2,
       col=col_palette[2],
       subset(pixels_df,class_ID==2))

points(NDVI ~ MNDWI,
       cex=0.2,
       col=col_palette[3],
       subset(pixels_df,class_ID==3))

names_vals <- c("vegetation","wetland","water")
legend("topright",legend=names_vals,
       pt.cex=0.7,cex=0.7,col=col_palette,
       pch=20, #add circle symbol to line
       bty="n")

plot of chunk unnamed-chunk-1

histogram(r_after)

plot of chunk unnamed-chunk-1

pixels_df$class_ID <- factor(pixels_df$class_ID,
                    levels = c(1,2,3),
                    labels = names_vals)

boxplot(MNDWI~class_ID,
        pixels_df,
        xlab="category",
        main="Boxplot for MNDWI per class")

plot of chunk unnamed-chunk-1

###############################################
##### PART II: Split training and testing ##############

##let's keep 30% of data for testing for each class

pixels_df$pix_ID <- 1:nrow(pixels_df)
prop <- 0.3
table(pixels_df$class_ID)

vegetation    wetland      water 
       508        285        754 

set.seed(100) ## set random seed for reproducibility

### This is for one class:
##Better as a function but we use a loop for clarity here:

list_data_df <- vector("list",length=3)
level_labels <- names_vals

for(i in 1:3){
  data_df <- subset(pixels_df,class_ID==level_labels[i])
  data_df$pix_id <- 1:nrow(data_df)
  indices <- as.vector(createDataPartition(data_df$pix_ID,p=0.7,list=F))
  data_df$training <-  as.numeric(data_df$pix_id %in% indices)
  list_data_df[[i]] <- data_df
}

data_df <- do.call(rbind,list_data_df)

dim(data_df)

[1] 1547   17

head(data_df)

  ID       x       y        Red       NIR       Blue      Green     SWIR1
1 1615340 1615340 0.05304649 0.3035397 0.02652384 0.05662384 0.3101271
1 1614413 1614413 0.05782417 0.3295924 0.02751791 0.06305390 0.3453724
1 1615340 1615340 0.05735013 0.3037102 0.02596963 0.05654212 0.3176476
1 1616266 1616266 0.05805403 0.3081988 0.02711912 0.05750944 0.3229090
1 1617193 1617193 0.04147716 0.3058677 0.01839499 0.05076184 0.3003513
1 1618120 1618120 0.04209801 0.3089238 0.01832494 0.05200353 0.2943777
      SWIR2      SWIR3      NDVI      MNDWI record_ID   class_ID pix_ID
0.1936452 0.07864757 0.7024759 -0.5474962         1 vegetation      1
0.2102322 0.08679578 0.7014884 -0.5385502         1 vegetation      2
0.2073480 0.08732812 0.6823238 -0.5714722         1 vegetation      3
0.2079238 0.08741243 0.6829839 -0.5666749         1 vegetation      4
0.1692552 0.06590501 0.7611759 -0.5385644         1 vegetation      5
0.1697936 0.06790353 0.7601402 -0.5310712         1 vegetation      6
  pix_id training
    1        0
    2        1
    3        0
    4        0
    5        1
    6        0

data_training <- subset(data_df,training==1)

###############################################
##### PART III: Generate classification using CART and SVM ##############

############### Using Classification and Regression Tree model (CART) #########

## Fit model using training data for CART
mod_rpart <- rpart(class_ID ~ Red + NIR + Blue + Green + SWIR1 + SWIR2 + SWIR3,
                   method="class", 
                   data=data_training)

# Plot the fitted  classification tree
plot(mod_rpart, uniform=TRUE, main="Classification Tree")
text(mod_rpart, cex=.8)

plot of chunk unnamed-chunk-1

# Now predict the subset data based on the model; prediction for entire area takes longer time
raster_out_filename <- paste0("r_predicted_rpart_",out_suffix,file_format)
r_predicted_rpart <- predict(r_stack,mod_rpart, 
                             type='class',
                             filename=raster_out_filename,
                             progress = 'text',
                             overwrite=T)

  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================================| 100%

plot(r_predicted_rpart)

plot of chunk unnamed-chunk-1

r_predicted_rpart <- ratify(r_predicted_rpart)
rat <- levels(r_predicted_rpart)[[1]]
rat$legend <- c("vegetation","wetland","water")
levels(r_predicted_rpart) <- rat
levelplot(r_predicted_rpart, maxpixels = 1e6,
          col.regions = c("green","blue","darkblue"),
          scales=list(draw=FALSE),
          main = "Classification Tree")

plot of chunk unnamed-chunk-1

############### Using Support Vector Machine #########

## set class_ID as factor to generate classification

mod_svm <- svm(class_ID ~ Red +NIR + Blue + Green + SWIR1 + SWIR2 + SWIR3,
               data=data_training,
               method="C-classification",
               kernel="linear") # can be radial

summary(mod_svm)

Call:
svm(formula = class_ID ~ Red + NIR + Blue + Green + SWIR1 + SWIR2 + 
    SWIR3, data = data_training, method = "C-classification", 
    kernel = "linear")


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  linear 
       cost:  1 
      gamma:  0.1428571 

Number of Support Vectors:  110

 ( 12 54 44 )


Number of Classes:  3 

Levels: 
 vegetation wetland water

# Now predict the subset data based on the model; prediction for entire area takes longer time
raster_outfilename <- paste0("r_predicted_svm_",out_suffix,file_format)
r_predicted_svm <- predict(r_stack, mod_svm,
                           progress = 'text',
                           filename=raster_outfilename,
                           overwrite=T)

  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================================| 100%

plot(r_predicted_svm)

plot of chunk unnamed-chunk-1

histogram(r_predicted_svm)

plot of chunk unnamed-chunk-1

r_predicted_svm <- ratify(r_predicted_svm)
rat <- levels(r_predicted_svm)[[1]]
rat$legend <- c("vegetation","wetland","water")
levels(r_predicted_svm) <- rat
levelplot(r_predicted_svm, maxpixels = 1e6,
          col.regions = c("green","blue","darkblue"),
          scales=list(draw=FALSE),
          main = "SVM classification")

plot of chunk unnamed-chunk-1

###############################################
##### PART IV: Compare methods for the performance ##############

dim(data_df) # full dataset, let's use data points for testing

[1] 1547   17

#omit values that contain NA, because may be problematic with SVM.
data_testing <- na.omit(subset(data_df,training==0)) 
dim(data_testing)

[1] 450  17

#### Predict on testing data using rpart model fitted with training data
testing_rpart <- predict(mod_rpart, data_testing,type='class')
#### Predict on testing data using SVM model fitted with training data
testing_svm <- predict(mod_svm,data_testing, type='class')

## Predicted classes:
table(testing_svm)

testing_svm
vegetation    wetland      water 
       150         78        222 

#### Generate confusion matrix to assess the performance of the model

tb_rpart <- table(testing_rpart,data_testing$class_ID)
tb_svm <- table(testing_svm,data_testing$class_ID)

#testing_rpart: map prediction in the rows
#data_test$class_ID: ground truth data in the columns
#http://spatial-analyst.net/ILWIS/htm/ilwismen/confusion_matrix.htm
#Producer accuracy: it is the fraction of correctly classified pixels with regard to all pixels 
#of that ground truth class. 

table(testing_rpart) #classification, map results

testing_rpart
vegetation    wetland      water 
       149         77        224 

table(data_testing$class_ID) #reference, ground truth in columns

vegetation    wetland      water 
       149         84        217 

tb_rpart[1]/sum(tb_rpart[,1]) #producer accuracy

[1] 0.9798658

tb_svm[1]/sum(tb_svm[,1]) #producer accuracy

[1] 1

#overall accuracy for svm
sum(diag(tb_svm))/sum(table(testing_svm))

[1] 0.9644444

#overall accuracy for rpart
sum(diag(tb_rpart))/sum(table(testing_rpart))

[1] 0.9444444

#Generate more accuracy measurements from CARET
accuracy_info_svm <- confusionMatrix(testing_svm,data_testing$class_ID, positive = NULL)
accuracy_info_rpart <- confusionMatrix(testing_rpart,data_testing$class_ID, positive = NULL)

accuracy_info_rpart$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
  9.444444e-01   9.101603e-01   9.190787e-01   9.637284e-01   4.822222e-01 
AccuracyPValue  McnemarPValue 
 1.266887e-101            NaN 

accuracy_info_svm$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
  9.644444e-01   9.425947e-01   9.429011e-01   9.795429e-01   4.822222e-01 
AccuracyPValue  McnemarPValue 
 9.645095e-114            NaN 

#### write out the results:
write.table(accuracy_info_rpart$table,"confusion_matrix_rpart.txt",sep=",")
write.table(accuracy_info_svm$table,"confusion_matrix_svm.txt",sep=",")

############################ End of Script ###################################

Top of Section

Remote Sensing & Classification

Hurricane Inundation

Contents