-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsupplemental_file_1_markdown.Rmd
167 lines (143 loc) · 5.13 KB
/
supplemental_file_1_markdown.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
---
title: "supplemental_file_1_markdown"
---
Corton et al. "Biological Thresholds Derived from Common Measures in Rat Studies are Predictive of Liver Tumorigenic Chemicals."
Supplemental File 1: The R code used to extract information from ToxRefDB version 2.0.
Katie Paul Friedman ([email protected]) and Jason Brown ([email protected])
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(RSQLite)
library(tidyr)
library(dplyr)
library(DT)
```
```{r load}
# build all queries and extract info from toxref database
con <- dbConnect(SQLite(), dbname="data/tox_ref.sqlite")
#all doses for specific effects
q1 <- "
SELECT
dose.study_id,
dtg.dose_adjusted,
dtg.dose_adjusted_unit,
tg_effect.tg_id,
dose.dose_level,
effect_val,
effect_val_unit,
endpoint.endpoint_target,
effect.effect_desc
FROM
dose
INNER JOIN
dtg ON dose.dose_id = dtg.dose_id
INNER JOIN
dtg_effect ON dtg_effect.dtg_id = dtg.dtg_id
INNER JOIN
tg_effect ON tg_effect.tg_effect_id = dtg_effect.tg_effect_id
INNER JOIN
effect ON effect.effect_id = tg_effect.effect_id
INNER JOIN
endpoint ON endpoint.endpoint_id = effect.endpoint_id
WHERE
effect_desc IN ('relative to body weight' , 'alanine aminotransferase (alt/sgpt)',
'aspartate aminotransferase (ast/sgot)')
AND endpoint_target IN ('alanine aminotransferase (alt/sgpt)' , 'aspartate aminotransferase (ast/sgot)',
'liver')
AND effect_val IS NOT NULL
AND dose.study_id IS NOT NULL
ORDER BY dose.study_id , effect_desc , dose.dose_level;
"
# long format pod info
q2 <- "
SELECT
pod.study_id,tg_id,pod_type,pod_value,pod_unit,endpoint_target,effect_desc,study_type,admin_route,chemical.chemical_id,casrn,preferred_name
FROM
endpoint
INNER JOIN
effect ON endpoint.endpoint_id = effect.endpoint_id
INNER JOIN
tg_effect ON effect.effect_id = tg_effect.effect_id
INNER JOIN
pod_tg_effect ON tg_effect.tg_effect_id = pod_tg_effect.tg_effect_id
INNER JOIN
pod ON pod_tg_effect.pod_id = pod.pod_id
LEFT JOIN
study ON pod.study_id = study.study_id
INNER JOIN
chemical ON study.chemical_id = chemical.chemical_id
WHERE
endpoint_target IN ('alanine aminotransferase (alt/sgpt)' , 'aspartate aminotransferase (ast/sgot)',
'liver')
AND pod_type IN ('lel' , 'loael')
AND effect_desc IN ('relative to body weight' , 'alanine aminotransferase (alt/sgpt)',
'aspartate aminotransferase (ast/sgot)')
and study.study_id is not null
ORDER BY study.study_id, effect_desc;"
# Study citation information
q3 <- "
SELECT
study.*
FROM
endpoint
INNER JOIN
effect ON endpoint.endpoint_id = effect.endpoint_id
INNER JOIN
tg_effect ON effect.effect_id = tg_effect.effect_id
INNER JOIN
pod_tg_effect ON tg_effect.tg_effect_id = pod_tg_effect.tg_effect_id
INNER JOIN
pod ON pod_tg_effect.pod_id = pod.pod_id
LEFT JOIN
study ON pod.study_id = study.study_id
INNER JOIN
chemical ON study.chemical_id = chemical.chemical_id
WHERE
endpoint_target IN ('alanine aminotransferase (alt/sgpt)' , 'aspartate aminotransferase (ast/sgot)',
'liver')
AND pod_type IN ('lel' , 'loael')
AND effect_desc IN ('relative to body weight' , 'alanine aminotransferase (alt/sgpt)',
'aspartate aminotransferase (ast/sgot)')
AND study.study_id IS NOT NULL
ORDER BY study.study_id , effect_desc;"
#load all tables into memory
t1 <- dbGetQuery(con,q1)
t2 <- dbGetQuery(con,q2)
t3 <- dbGetQuery(con,q3)
dbDisconnect(con)
#remove duplicates
t3 <- unique(t3)
t3 <- t3[-1,]
#select relevant columns
t3 <- t3 %>% select(study_id,study_citation,study_year,study_source,study_type:study_comment)
```
# Study information/citations for associated studies
```{r}
datatable(t3)
```
```{r}
#go from long format to wide format for the LEL and LOAEL values
t2 <- unique(t2)
t2 <- t2 %>% spread(pod_type,pod_value)
#join both tables
res <- t1 %>% left_join(t2, by = c("study_id","endpoint_target","effect_desc","tg_id"))
#sort by study_id and tg_id
res <- res %>% arrange(study_id,tg_id)
#extract the control data
control <- res %>% filter(dose_level == 0) %>% mutate(control = effect_val) %>% select(-dose_level,-effect_val,-dose_adjusted)
#join back to results table, control is NA where no dose_level 0
res <- res %>% left_join(control)
#calculate fold change
res <- res %>% mutate(fold_change = ifelse(grepl("control",effect_val_unit),effect_val/100,(effect_val - control)/control))
#rearrange table for easier view
res <- res %>% select(study_id:effect_val_unit,fold_change,lel,loael,pod_unit,everything()) %>% mutate(control_unit = effect_val_unit)
```
# Unfiltered fold change values for all dose levels (not just LEL and/or LOAELS
```{r}
datatable(res)
```
# Fold change values for associated LELS and LOAELS based on the effect value at dose level 0. Those studies that did not report control values will not have associated fold change data
```{r}
#pod table
res_pod <- res %>% filter(dose_adjusted==lel | dose_adjusted==loael)
datatable(res_pod)
```