Klimaoverblikk/01_Check_data.md at master · NIVANorge/Klimaoverblikk

title

Overview and some plots of the data

output

html_document

keep_md	toc	toc_depth	toc_float	code_folding	df_print
true	true	3	true	hide	paged

Getting an overview of the data

0. Libraries

library(tidyverse)

## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0

## -- Conflicts ------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(readxl)
library(pander)

1. Folders and files

Content of folders (see code)

dir("Datasett")

## [1] "Bløtbunn"              "Eksempel datafil.xlsx" "hardbunn_kopi"        
## [4] "hydrografi"            "Plankton"              "River data (from OKA)"

dir("Datasett/River data (from OKA)")

## [1] "Annual mean flows"                   
## [2] "Concentrations (individual samples)" 
## [3] "Monthly flow-weighted concentrations"
## [4] "Monthly loads"

dir("Datasett/hydrografi")

## [1] "Arendal_allvars_1990_2016.csv"   "Arendal_allvars_1990_2016.Rdata"
## [3] "R scripts"                       "Rådata"

dir("Datasett/Bløtbunn")

## [1] "Beskrivelse av bløtbunndata.docx"                   
## [2] "Klimaoverblikk bløtbunn_data til Helene og Dag.xlsx"

dir("Datasett/hardbunn_kopi")

## [1] "HBanalysesett.csv" "other docs"        "r workspace"

dir("Datasett/Plankton")

## [1] "Beskrivelse av planktondata.docx" "Planteplankton Arendal.xlsx"

2. River data

Content of folders (see code)

dir("Datasett/River data (from OKA)/Annual mean flows")

## [1] "Mean annual flow.xlsx"

dir("Datasett/River data (from OKA)/Monthly loads")

## [1] "Gjerstadelva_Nidelva_monthly loads.xlsx"
## [2] "RIDx5_monthly loads.xlsx"               
## [3] "Storelva_monthly loads.xlsx"

dir("Datasett/River data (from OKA)/Concentrations (individual samples)")

## [1] "Gjerstadelva_Nidelva_conc.xlsx" "RIDx5_conc.xlsx"               
## [3] "Storelva_conc.xlsx"

dir("Datasett/River data (from OKA)/Monthly flow-weighted concentrations")

## [1] "Gjerstadelva_Nidelva_flow-weighted mean_month.xlsx"
## [2] "RIDx5_flow-weighted mean_month.xlsx"               
## [3] "Storelva_flow-weighted mean_month.xlsx"

a. Data; example: Storelva loads

df <- read_excel("Datasett/River data (from OKA)/Monthly loads/Storelva_monthly loads.xlsx")  
# df <- read_excel("Datasett/River data (from OKA)/Monthly loads/Storelva_monthly loads.xlsx", skip = 1)  
# summary(df)

df <- as.data.frame(df)[-1,]
# colnames(df) %>% dput()
vars <- c("TrspTot TOTN", "TrspTot NO3-N", "TrspTot NH4-N", "TrspTot TOTP", 
          "TrspTot TOC", "TrspTot ALK", "TrspTot Ca", "DisTot")
for (var in vars)
  df[,var] <- as.numeric(df[,var])
df$Time <- with(df, lubridate::ymd(paste(Year, Month, "15")))

# Add "_" in column names (TrspTot Ca -> TrspTot_Ca)
colnames(df) <- sub(" ", "_", colnames(df), fixed = TRUE)

tb <- df %>% 
  gather("Variable", Value, TrspTot_TOTN:DisTot) %>%
  filter(!is.na(Value)) %>%
  xtabs(~Year + Variable, .)
pandoc.table(tb, style = "rmarkdown")

	DisTot	TrspTot_ALK	TrspTot_Ca	TrspTot_NH4-N
1990	12	0	0	0
1991	12	0	0	0
1992	12	0	0	0
1993	12	0	12	0
1994	12	0	12	0
1995	12	0	12	0
1996	12	4	12	0
1997	12	12	12	0
1998	12	12	12	0
1999	12	12	12	0
2000	12	12	12	0
2001	12	12	12	0
2002	12	12	12	0
2003	12	12	12	0
2004	12	12	12	0
2005	12	12	12	12
2006	12	12	12	5
2007	12	12	12	0
2008	12	12	12	0
2009	12	12	12	0
2010	12	12	12	0
2011	12	12	12	0
2012	12	12	12	0
2013	12	12	12	0
2014	12	12	12	0
2015	12	12	12	0

Table: Table continues below

	TrspTot_NO3-N	TrspTot_TOC	TrspTot_TOTN	TrspTot_TOTP
1990	0	0	0	0
1991	0	0	0	0
1992	0	0	0	0
1993	12	12	12	11
1994	12	12	12	12
1995	12	12	12	12
1996	12	12	12	12
1997	12	12	12	12
1998	12	12	12	12
1999	12	12	12	12
2000	12	12	12	12
2001	12	12	12	12
2002	12	12	12	12
2003	12	12	12	12
2004	12	12	12	12
2005	12	12	12	12
2006	12	12	12	12
2007	12	12	12	12
2008	12	12	12	12
2009	12	12	12	12
2010	12	12	12	12
2011	12	12	12	12
2012	12	12	12	12
2013	12	12	12	12
2014	12	12	12	12
2015	12	12	12	12

b. Plot Storelva loads

gg <- df %>%
  gather("Variable", Value,  TrspTot_TOTN:DisTot) %>%
  ggplot(aes(Time, Value)) + 
    geom_line() +
    facet_wrap(~Variable, scales = "free_y")
gg

## Warning: Removed 12 rows containing missing values (geom_path).

c. Plot annual flow

df <- read_excel("Datasett/River data (from OKA)/Annual mean flows/Mean annual flow.xlsx")
# df <- read_excel("Datasett/River data (from OKA)/Monthly loads/Storelva_monthly loads.xlsx", skip = 1)  
# summary(df)

df <- as.data.frame(df)[-1,]
colnames(df) <- sub(" ", "_", colnames(df), fixed = TRUE)
# colnames(df) %>% dput()
vars <- c("DisMean")
for (var in vars)
  df[,var] <- as.numeric(df[,var])

ggplot(df, aes(Year, DisMean)) + geom_line() + facet_wrap(~Station_name)

3. Hydrografi

a. Data

Tables of coverages of depth and seasons

load("Datasett/Hydrografi/Arendal_allvars_1990_2016.Rdata")
Df.Arendal$Month <- Df.Arendal$Dato %>% as.character() %>% substr(6,7) %>% as.numeric()
Df.Arendal$Year <- Df.Arendal$Dato %>% as.character() %>% substr(1,4) %>% as.numeric()

# summary(Df.Arendal)
tb <- xtabs(~Depth + Month, Df.Arendal)
pandoc.table(tb, style = "rmarkdown")

	1	2	3	4	5	6	7	8	9	10	11	12
0	40	47	55	45	53	51	49	56	48	57	50	36
5	40	47	55	45	53	51	49	56	48	57	50	36
10	40	47	55	45	53	51	49	56	48	57	50	36
20	40	47	55	45	53	51	49	56	48	57	50	36
30	40	47	55	45	53	51	49	56	48	57	50	36
50	40	47	55	45	53	51	49	56	48	55	50	36
75	40	47	55	45	53	51	48	56	48	55	50	36

tb <- xtabs(~Year + Month, Df.Arendal)
pandoc.table(tb, style = "rmarkdown")

	1	2	3	4	5	6	7	8	9	10	11	12
1990	7	7	7	14	7	14	7	14	7	14	14	7
1991	14	7	14	7	14	14	14	14	7	21	7	14
1992	14	14	14	14	14	14	14	14	14	14	14	7
1993	14	14	14	7	14	14	14	14	7	14	14	14
1994	14	14	14	14	7	14	7	21	7	14	14	14
1995	7	7	14	14	14	14	14	14	14	14	14	14
1996	14	21	7	14	14	14	7	14	14	14	14	7
1997	14	14	14	14	14	14	14	14	7	14	14	7
1998	7	14	14	14	7	14	7	21	14	21	7	7
1999	7	14	14	7	14	14	20	14	14	7	14	14
2000	7	7	14	7	21	14	14	14	7	31	14	14
2001	21	7	14	14	14	7	14	14	14	14	14	7
2002	7	21	14	14	7	7	14	14	14	14	14	7
2003	14	14	14	7	21	7	14	14	14	14	14	7
2004	14	7	14	7	14	14	7	14	14	14	14	7
2005	7	14	14	14	14	14	14	14	14	14	7	14
2006	7	14	21	7	14	14	14	14	14	14	7	14
2007	7	14	21	7	14	14	14	14	14	14	14	7
2008	14	7	21	14	14	14	7	14	14	7	14	14
2009	14	14	14	14	14	14	14	14	14	14	14	7
2010	7	0	14	7	21	14	14	14	14	14	14	7
2011	7	14	14	14	14	14	14	14	14	14	21	7
2012	7	14	14	14	14	14	14	14	14	14	14	7
2013	7	14	14	14	14	14	14	14	14	14	14	7
2014	7	14	14	14	14	14	14	14	14	14	14	7
2015	7	14	14	14	14	14	14	14	14	14	14	7
2016	14	14	14	14	14	14	14	14	14	14	7	7

b. Plot salinity, depth 10 m

ggplot(Df.Arendal %>% filter(Depth == 10), aes(Dato, Salt)) +
  geom_line() + geom_point()

c. Salinity, time series by month and depth

Df.Arendal %>%
  group_by(Year, Month, Depth) %>%
  summarise_all(mean) %>%
  ggplot(aes(Year, Salt)) + 
    geom_line() + 
    facet_grid(Month~Depth)

4. Soft bottom fauna

a. Data

df_blot_b35 <- read_excel("Datasett/Bløtbunn/Klimaoverblikk bløtbunn_data til Helene og Dag.xlsx", sheet = "B35_artsliste")
colnames(df_blot_b35)[1] <- "Species"

df_blot_b05 <- read_excel("Datasett/Bløtbunn/Klimaoverblikk bløtbunn_data til Helene og Dag.xlsx", sheet = "B05_artsliste")
colnames(df_blot_b05)[1] <- "Species"

df_blot_ind <- read_excel("Datasett/Bløtbunn/Klimaoverblikk bløtbunn_data til Helene og Dag.xlsx", sheet = "indekser_sedimentparametere")

cat("Number of species:", nrow(df_blot_b35), "\nNumber of years:", ncol(df_blot_b35), "\n")

## Number of species: 266 
## Number of years: 28

b. Station B35, boxplot of the 10% most abundant species

df <- df_blot_b35 %>%
  gather("Year", "N", -Species) %>%
  mutate(N = ifelse(is.na(N),0,N)) %>%
  group_by(Species) %>%
  mutate(Median_per_species = median(N))

cat("Percentiles of median abundance per species:\n")

## Percentiles of median abundance per species:

quantile(df$Median_per_species, (0:10)/10, na.rm = TRUE)

##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    0    0    0    0    0    0    0    0    0    2  261

df %>%
  filter(Median_per_species >= 2) %>%
  ggplot(aes(Species, N)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

5. Hard-bottom fauna

a. Data (already chewed a bit on)

# dir("Datasett/hardbunn_kopi")
# readLines("Datasett/hardbunn_kopi/HBanalysesett.csv", 2)
df <- readr::read_csv2("Datasett/hardbunn_kopi/HBanalysesett.csv")

## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.

## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   DCA1 = col_double(),
##   DCA2 = col_double(),
##   DCA3 = col_double(),
##   DCA4 = col_double(),
##   NMDS1 = col_double(),
##   NMDS2 = col_double(),
##   DCA1strand = col_double(),
##   DCA2strand = col_double(),
##   DCA3strand = col_double(),
##   DCA4strand = col_double(),
##   NMDS1strand = col_double(),
##   NMDS2strand = col_double(),
##   midTurf = col_double(),
##   midBarren = col_double(),
##   midFilter = col_double(),
##   midHerb = col_double()
## )

## See spec(...) for full column specifications.

b. Plot DCA

df %>%
  gather("DCA_axis", "Value", DCA1:DCA4) %>%
  ggplot(aes(Year, Value)) +
    geom_line() +
    facet_grid(Site~DCA_axis)

c. Plot NMDS

df %>%
  gather("NMDS_axis", "Value", NMDS1:NMDS2) %>%
  ggplot(aes(Year, Value)) +
    geom_line() +
    facet_grid(Site~NMDS_axis)

d. Plot DCAstrand

df %>%
  gather("DCA_axis", "Value", DCA1strand:DCA4strand) %>%
  ggplot(aes(Year, Value)) +
    geom_line() +
    facet_grid(Site~DCA_axis)

## Warning: Removed 20 rows containing missing values (geom_path).

e. Plot species

df %>%
  gather("Species", "Value", `LGD.Halidrys siliquosa`:`LGD.Rhodomela confervoides`) %>%
  mutate(Species = sub("LDG.", "", Species)) %>%
  ggplot(aes(Year, Value)) +
    geom_line() +
    facet_grid(Site~Species)

6. Phytoplankton

a. Data

Note: deleted the empty second row manually, and one empty column
Also copied the data (just the range with data, A1:V471) to a new sheet

df <- read_excel("Datasett/Plankton/Planteplankton Arendal.xlsx") # range = "A1:V471"
df$Year <- lubridate::year(df$Dato)
df$Month <- lubridate::month(df$Dato)
xtabs(~Year + Month, df)

##       Month
## Year   1 2 3 4 5 6 7 8 9 10 11 12
##   1994 2 2 2 2 1 2 1 2 1  1  1  2
##   1995 1 1 2 2 2 2 1 2 2  1  1  1
##   1996 2 3 1 2 2 2 1 2 2  2  2  1
##   1997 2 2 2 2 2 2 2 2 1  2  2  1
##   1998 1 2 2 2 1 2 1 3 2  2  1  1
##   1999 1 2 2 1 2 2 3 2 2  1  2  2
##   2000 0 0 2 1 2 1 2 1 1  1  0  0
##   2001 0 1 2 1 1 1 1 1 1  1  1  0
##   2002 0 3 2 2 2 1 2 2 1  1  1  0
##   2003 2 4 2 1 3 1 2 2 2  2  2  1
##   2004 2 1 3 1 2 2 1 2 2  2  2  1
##   2005 1 2 6 2 2 1 2 6 2  2  1  2
##   2006 1 2 3 1 2 2 2 2 2  2  1  2
##   2007 1 2 3 1 2 2 2 2 2  2  2  1
##   2008 2 1 3 2 2 2 1 2 2  1  2  1
##   2009 2 2 2 2 2 2 2 2 1  2  2  1
##   2010 1 0 2 1 3 2 2 2 2  2  2  1
##   2011 0 1 2 2 2 2 2 2 2  2  2  1
##   2012 1 2 2 2 2 2 1 2 2  1  2  0
##   2013 1 2 2 2 2 2 2 2 2  2  2  1
##   2014 1 2 2 2 2 2 2 2 2  2  2  1
##   2015 1 2 2 2 2 2 2 2 2  2  2  1
##   2016 2 2 2 2 2 2 2 2 2  2  1  1

b. Plot time series of main groups

df %>%
  gather("Group", "Value", Kiselalger:Flagellater) %>%
  ggplot(aes(Dato, Value)) +
    geom_line() +
    facet_wrap(~Group, scale = "free_y")

c. Plot time series of main groups by month

df %>%
  gather("Group", "Value", Kiselalger:Flagellater) %>%
  group_by(Year, Month, Group) %>%
  summarize(Value = mean(Value)) %>%
  filter(Month %in% c(2,4,6,8,10)) %>%
  ggplot(aes(Year, Value)) +
    geom_line() +
    facet_grid(Group~Month, scale = "free_y")

d. Plot time series of 6 main taxa (by median abundance) by month

mean_abundance <- apply(df %>% select(Cyanophycea:Chlorophycea), 2, mean, na.rm = TRUE) %>% sort() %>% rev()
median_abundance <- apply(df %>% select(Cyanophycea:Chlorophycea), 2, median, na.rm = TRUE) %>% sort() %>% rev()
taxa <- names(median_abundance) %>% head(6)
df %>%
  gather("Taxon", "Value", Cyanophycea:Chlorophycea) %>%
  group_by(Year, Month, Taxon) %>%
  summarize(Value = mean(Value)) %>%
  filter(Month %in% c(2,4,6,8,10) & Taxon %in% taxa) %>%
  ggplot(aes(Year, Value)) +
    geom_line() +
    facet_grid(Taxon~Month, scale = "free_y")

e. Plot time series of 5 lesser taxa (by median abundance) by month

taxa <- names(median_abundance) %>% tail(5)
df %>%
  gather("Taxon", "Value", Cyanophycea:Chlorophycea) %>%
  group_by(Year, Month, Taxon) %>%
  summarize(Value = mean(Value)) %>%
  filter(Month %in% c(2,4,6,8,10) & Taxon %in% taxa) %>%
  ggplot(aes(Year, Value)) +
    geom_line() +
    facet_grid(Taxon~Month, scale = "free_y")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

0. Libraries

1. Folders and files

2. River data

a. Data; example: Storelva loads

b. Plot Storelva loads

c. Plot annual flow

3. Hydrografi

a. Data

b. Plot salinity, depth 10 m

c. Salinity, time series by month and depth

4. Soft bottom fauna

a. Data

b. Station B35, boxplot of the 10% most abundant species

5. Hard-bottom fauna

a. Data (already chewed a bit on)

b. Plot DCA

c. Plot NMDS

d. Plot DCAstrand

e. Plot species

6. Phytoplankton

a. Data

b. Plot time series of main groups

c. Plot time series of main groups by month

d. Plot time series of 6 main taxa (by median abundance) by month

e. Plot time series of 5 lesser taxa (by median abundance) by month

	1	2	3	4	5	6	7	8	9	10	11	12
0	40	47	55	45	53	51	49	56	48	57	50	36
5	40	47	55	45	53	51	49	56	48	57	50	36
10	40	47	55	45	53	51	49	56	48	57	50	36
20	40	47	55	45	53	51	49	56	48	57	50	36
30	40	47	55	45	53	51	49	56	48	57	50	36
50	40	47	55	45	53	51	49	56	48	55	50	36
75	40	47	55	45	53	51	48	56	48	55	50	36

	1	2	3	4	5	6	7	8	9	10	11	12
0	40	47	55	45	53	51	49	56	48	57	50	36
5	40	47	55	45	53	51	49	56	48	57	50	36
10	40	47	55	45	53	51	49	56	48	57	50	36
20	40	47	55	45	53	51	49	56	48	57	50	36
30	40	47	55	45	53	51	49	56	48	57	50	36
50	40	47	55	45	53	51	49	56	48	55	50	36
75	40	47	55	45	53	51	48	56	48	55	50	36

FilesExpand file tree

01_Check_data.md

Latest commit

History

01_Check_data.md

File metadata and controls

0. Libraries

1. Folders and files

2. River data

a. Data; example: Storelva loads

b. Plot Storelva loads

c. Plot annual flow

3. Hydrografi

a. Data

b. Plot salinity, depth 10 m

c. Salinity, time series by month and depth

4. Soft bottom fauna

a. Data

b. Station B35, boxplot of the 10% most abundant species

5. Hard-bottom fauna

a. Data (already chewed a bit on)

b. Plot DCA

c. Plot NMDS

d. Plot DCAstrand

e. Plot species

6. Phytoplankton

a. Data

b. Plot time series of main groups

c. Plot time series of main groups by month

d. Plot time series of 6 main taxa (by median abundance) by month

e. Plot time series of 5 lesser taxa (by median abundance) by month

	1	2	3	4	5	6	7	8	9	10	11	12
0	40	47	55	45	53	51	49	56	48	57	50	36
5	40	47	55	45	53	51	49	56	48	57	50	36
10	40	47	55	45	53	51	49	56	48	57	50	36
20	40	47	55	45	53	51	49	56	48	57	50	36
30	40	47	55	45	53	51	49	56	48	57	50	36
50	40	47	55	45	53	51	49	56	48	55	50	36
75	40	47	55	45	53	51	48	56	48	55	50	36