Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add penguins and penguins_raw to datasets package #154

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions src/library/datasets/data-raw/penguins.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Code adapted from the palmerpenguin package
# by Allison Horst, Alison Hill, and Kristen Gorman
# https://github.com/allisonhorst/palmerpenguins

load("./src/library/datasets/data/penguins_raw.rda")

penguins <- penguins_raw[, c("Species", "Island",
"Culmen Length (mm)", "Culmen Depth (mm)",
"Flipper Length (mm)", "Body Mass (g)",
"Sex", "Date Egg")]
colnames(penguins) <- c(
"species", "island", "bill_length_mm", "bill_depth_mm", "flipper_length_mm",
"body_mass_g", "sex", "year"
)
penguins$species <- regmatches(penguins$species,
regexpr("^\\w+\\b", penguins$species))
penguins$species <- as.factor(penguins$species)
penguins$island <- as.factor(penguins$island)
penguins$flipper_length_mm <- as.integer(penguins$flipper_length_mm)
penguins$body_mass_g <- as.integer(penguins$body_mass_g)
penguins$sex <- tolower(penguins$sex)
penguins$sex <- as.factor(penguins$sex)
penguins$year <- regmatches(penguins$year,
regexpr("\\d{4}", penguins$year))
penguins$year <- as.integer(penguins$year)

save(penguins, file = "./src/library/datasets/data/penguins.rda")

# Check identical with version palmerpenguins package
# rm(penguins)
# load("./src/library/datasets/data/penguins.rda")
# identical(penguins, palmerpenguins:::penguins_df)
106 changes: 106 additions & 0 deletions src/library/datasets/data-raw/penguins_raw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Code adapted from the palmerpenguin package
# by Allison Horst, Alison Hill, and Kristen Gorman
# https://github.com/allisonhorst/palmerpenguins

# penguins raw ------------------------------------------------------------

# Download raw data
# Adelie penguin data from: https://doi.org/10.6073/pasta/abc50eed9138b75f54eaada0841b9b86
uri_adelie <- "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.219.3&entityid=002f3893385f710df69eeebe893144ff"

# Gentoo penguin data from: https://doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce
uri_gentoo <- "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.220.3&entityid=e03b43c924f226486f2f0ab6709d2381"

# Chinstrap penguin data from: https://doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7
uri_chinstrap <- "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.221.2&entityid=fe853aa8f7a59aa84cdd3197619ef462"

# Combining the URIs
uris <- c(uri_adelie, uri_gentoo, uri_chinstrap)

# Download data and combine into one dataframe
penguins_raw_list <- lapply(uris, read.csv)
penguins_raw <- do.call(rbind, penguins_raw_list)

# Adjustments to make penguins_raw identical to palmerpenguins:::penguins_raw
penguins_raw$Sample.Number <- as.numeric(penguins_raw$Sample.Number)
penguins_raw$Date.Egg <- as.Date(penguins_raw$Date.Egg)
penguins_raw$Flipper.Length..mm. <- as.numeric(penguins_raw$Flipper.Length..mm.)
penguins_raw$Body.Mass..g. <- as.numeric(penguins_raw$Body.Mass..g.)
penguins_raw$Sex <- replace(penguins_raw$Sex, penguins_raw$Sex %in% c("", "."), NA)
penguins_raw$Comments <- replace(penguins_raw$Comments, penguins_raw$Comments == "", NA)

colnames(penguins_raw) <- c(
"studyName", "Sample Number", "Species", "Region", "Island", "Stage",
"Individual ID", "Clutch Completion", "Date Egg", "Culmen Length (mm)",
"Culmen Depth (mm)", "Flipper Length (mm)", "Body Mass (g)", "Sex",
"Delta 15 N (o/oo)", "Delta 13 C (o/oo)", "Comments"
)

# add sample numbers that correspond to test/train set in Gorman et al. (2014)
# these have been provided by Kristen Gorman
ADPE_train_sample_nums <- c(
1, 2, 3, 5, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33,
34, 41, 42, 43, 45, 46, 47, 49, 50, 52, 56, 57, 61, 62, 63, 64, 66, 67, 71,
73, 74, 76, 78, 81, 84, 85, 88, 89, 91, 92, 93, 94, 95, 96, 98, 99, 102,
104, 105, 107, 108, 112, 113, 115, 116, 117, 118, 119, 120, 123, 124, 125,
128, 129, 130, 133, 136, 138, 142, 143, 144, 145, 147, 148, 149, 150, 151,
152
)

ADPE_test_sample_nums <- c(
6, 13, 15, 28, 35, 36, 37, 38, 44, 51, 53, 54, 55, 58, 59, 60, 65, 68, 72,
75, 77, 79, 80, 82, 83, 86, 87, 90, 97, 100, 101, 103, 106, 109, 110, 111,
114, 126, 127, 134, 135, 137, 141, 146
)

CHPE_train_sample_nums <- c(
3, 5, 6, 7, 8, 9, 13, 15, 16, 19, 22, 29, 30, 32, 33, 34, 35, 37, 41, 42,
43, 45, 47, 48, 50, 52, 53, 54, 55, 56, 57, 58, 61, 63, 67, 68
)

CHPE_test_sample_nums <- c(4, 10, 11, 12, 14, 20, 21, 31, 36, 38, 44, 46, 49,
51, 59, 60, 62, 64)

GEPE_train_sample_nums <- c(
2, 4, 5, 7, 9, 10, 13, 14, 15, 20, 21, 22, 24, 25, 26, 28, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 44, 49, 50, 52, 53, 54, 55, 60, 62, 63, 64, 65,
66, 69, 70, 73, 75, 76, 77, 78, 79, 82, 84, 85, 86, 89, 90, 91, 93, 94, 95,
97, 98, 99, 101, 102, 103, 106, 109, 110, 112, 114, 115, 118, 121, 123, 124
)

GEPE_test_sample_nums <- c(
1, 3, 6, 8, 16, 17, 18, 19, 23, 29, 43, 45, 46, 51, 56, 57, 58, 59, 61, 68,
71, 72, 74, 80, 81, 83, 87, 88, 92, 96, 100, 104, 107, 108, 111, 113, 116,
122
)

# get count of each species
n_Adelie <- sum(grepl("Adelie", penguins_raw$Species))
n_Gentoo <- sum(grepl("Gentoo", penguins_raw$Species))
n_Chinstrap <- sum(grepl("Chinstrap", penguins_raw$Species))

# vector of train/test for each species, then together
Adelie_sample <- rep(NA, n_Adelie)
Adelie_sample[ADPE_train_sample_nums] <- "train"
Adelie_sample[ADPE_test_sample_nums] <- "test"
Gentoo_sample <- rep(NA, n_Gentoo)
Gentoo_sample[GEPE_train_sample_nums] <- "train"
Gentoo_sample[GEPE_test_sample_nums] <- "test"
Chinstrap_sample <- rep(NA, n_Chinstrap)
Chinstrap_sample[CHPE_train_sample_nums] <- "train"
Chinstrap_sample[CHPE_test_sample_nums] <- "test"
Sample <- c(Adelie_sample, Gentoo_sample, Chinstrap_sample)

# Add sample column to penguins_raw
penguins_raw$Sample <- Sample

save(penguins_raw, file = "./src/library/datasets/data/penguins_raw.rda")

# Check identical with version palmerpenguins package
# rm(penguins_raw)
# load("./src/library/datasets/data/penguins_raw.rda")
# pp_penguins_raw <- palmerpenguins:::penguins_raw_df
# attr(pp_penguins_raw, "spec") <- NULL
# identical(penguins_raw[, 1:17], pp_penguins_raw) # TRUE without Sample col
# all.equal(tibble::as_tibble(penguins_raw[, 1:17]), palmerpenguins::penguins_raw, check.attributes = FALSE) # without sample TRUE

Binary file added src/library/datasets/data/penguins.rda
Binary file not shown.
Binary file added src/library/datasets/data/penguins_raw.rda
Binary file not shown.
38 changes: 38 additions & 0 deletions src/library/datasets/man/penguins.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
\name{penguins}
\encoding{UTF-8}
\docType{data}
\alias{penguins}
\title{Size Measurements for Adult Foraging Penguins near Palmer Station, Antarctica}
\description{
Includes measurements for penguin species, island in Palmer Archipelago,
size (flipper length, body mass, bill dimensions), and sex.
This is a subset of \code{\link{penguins_raw}}.
}
\usage{penguins}
\format{
A tibble with 344 rows and 8 variables:
\describe{
\item{species}{a factor denoting penguin species (Adelie, Chinstrap and Gentoo)}
\item{island}{a factor denoting island in Palmer Archipelago, Antarctica (Biscoe, Dream or Torgersen)}
\item{bill_length_mm}{a number denoting bill length (millimeters)}
\item{bill_depth_mm}{a number denoting bill depth (millimeters)}
\item{flipper_length_mm}{an integer denoting flipper length (millimeters)}
\item{body_mass_g}{an integer denoting body mass (grams)}
\item{sex}{a factor denoting penguin sex (female, male)}
\item{year}{an integer denoting the study year (2007, 2008, or 2009)}
}
}
\source{
\enc{Adélie}{Adelie} penguins: Palmer Station Antarctica LTER and K. Gorman. 2020. Structural size measurements and isotopic signatures of foraging among adult male and female \enc{Adélie}{Adelie} penguins (Pygoscelis adeliae) nesting along the Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative, \doi{10.6073/pasta/98b16d7d563f265cb52372c8ca99e60f}.

Gentoo penguins: Palmer Station Antarctica LTER and K. Gorman. 2020. Structural size measurements and isotopic signatures of foraging among adult male and female Gentoo penguin (Pygoscelis papua) nesting along the Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative, \doi{10.6073/pasta/7fca67fb28d56ee2ffa3d9370ebda689}.

Chinstrap penguins: Palmer Station Antarctica LTER and K. Gorman. 2020. Structural size measurements and isotopic signatures of foraging among adult male and female Chinstrap penguin (Pygoscelis antarcticus) nesting along the Palmer Archipelago near Palmer Station, 2007-2009 ver 6. Environmental Data Initiative, \doi{10.6073/pasta/c14dfcfada8ea13a17536e73eb6fbe9e}.
}
\references{
Gorman, K. B., Williams, T. D. and Fraser, W. R. (2014) Ecological Sexual Dimorphism and Environmental Variability within a Community of Antarctic Penguins (Genus Pygoscelis). PLoS ONE \bold{9}, 3, e90081. doi:10.1371/journal.pone.0090081.
}
\note{
This data is also available in the \CRANpkg{palmerpenguins} package. See also \url{https://allisonhorst.github.io/palmerpenguins/} for further details and resources.
}
\keyword{datasets}
46 changes: 46 additions & 0 deletions src/library/datasets/man/penguins_raw.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
\name{penguins_raw}
\encoding{UTF-8}
\docType{data}
\alias{penguins_raw}
\title{Penguin Size, Clutch, and Blood Isotope Data for Foraging Adults near Palmer Station, Antarctica}
\description{
Includes nesting observations, penguin size data, and isotope measurements from blood samples for adult \enc{Adélie}{Adelie}, Chinstrap, and Gentoo penguins.
}
\usage{penguins_raw}
\format{
A tibble with 344 rows and 17 variables:
\describe{
\item{studyName}{Sampling expedition from which data were collected, generated, etc.}
\item{Sample Number}{an integer denoting the continuous numbering sequence for each sample}
\item{Species}{a character string denoting the penguin species}
\item{Region}{a character string denoting the region of Palmer LTER sampling grid}
\item{Island}{a character string denoting the island near Palmer Station where samples were collected}
\item{Stage}{a character string denoting reproductive stage at sampling}
\item{Individual ID}{a character string denoting the unique ID for each individual in dataset}
\item{Clutch Completion}{a character string denoting if the study nest observed with a full clutch, i.e., 2 eggs}
\item{Date Egg}{a date denoting the date study nest observed with 1 egg (sampled)}
\item{Culmen Length}{a number denoting the length of the dorsal ridge of a bird's bill (millimeters)}
\item{Culmen Depth}{a number denoting the depth of the dorsal ridge of a bird's bill (millimeters)}
\item{Flipper Length}{an integer denoting the length penguin flipper (millimeters)}
\item{Body Mass}{an integer denoting the penguin body mass (grams)}
\item{Sex}{a character string denoting the sex of an animal}
\item{Delta 15 N}{a number denoting the measure of the ratio of stable isotopes 15N:14N}
\item{Delta 13 C}{a number denoting the measure of the ratio of stable isotopes 13C:12C}
\item{Comments}{a character string with text providing additional relevant information for data}
\item{Sample}{a character string denoting whether the bird featured in the test or train set (or neither) in the original analysis (see References).}
}
}
\source{
\enc{Adélie}{Adelie} penguins: Palmer Station Antarctica LTER and K. Gorman. 2020. Structural size measurements and isotopic signatures of foraging among adult male and female \enc{Adélie}{Adelie} penguins (Pygoscelis adeliae) nesting along the Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative, \doi{10.6073/pasta/98b16d7d563f265cb52372c8ca99e60f}.

Gentoo penguins: Palmer Station Antarctica LTER and K. Gorman. 2020. Structural size measurements and isotopic signatures of foraging among adult male and female Gentoo penguin (Pygoscelis papua) nesting along the Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative, \doi{10.6073/pasta/7fca67fb28d56ee2ffa3d9370ebda689}.

Chinstrap penguins: Palmer Station Antarctica LTER and K. Gorman. 2020. Structural size measurements and isotopic signatures of foraging among adult male and female Chinstrap penguin (Pygoscelis antarcticus) nesting along the Palmer Archipelago near Palmer Station, 2007-2009 ver 6. Environmental Data Initiative, \doi{10.6073/pasta/c14dfcfada8ea13a17536e73eb6fbe9e}.
}
\references{
Gorman, K. B., Williams, T. D. and Fraser, W. R. (2014) Ecological Sexual Dimorphism and Environmental Variability within a Community of Antarctic Penguins (Genus Pygoscelis). PLoS ONE \bold{9}, 3, e90081. doi:10.1371/journal.pone.0090081.
}
\note{
This data is also available in the \CRANpkg{palmerpenguins} package. See also \url{https://allisonhorst.github.io/palmerpenguins/} for further details and resources.
}
\keyword{datasets}