-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata-loader-aml-annotated-cell-types.R
More file actions
120 lines (97 loc) · 2.87 KB
/
data-loader-aml-annotated-cell-types.R
File metadata and controls
120 lines (97 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# load packages
pkgs = c('flowCore',
'anndata',
'tidyverse',
'magrittr',
'httr')
for(i in pkgs) suppressPackageStartupMessages(library(i, character.only = TRUE))
# define the python interpreter to use
reticulate::use_python('/usr/local/Caskroom/miniconda/base/envs/r-reticulate-env/bin/python')
# request for figshare api
project_id = 140062
articel_id = 19867114 # annotated, 19867021 healthy vs AML
PATH = '~/.scQUEST/Levine_CyTOF_AML_AnnotatedCellTypes/'
REQUEST = paste0('https://api.figshare.com/v2/articles/', articel_id, '/files')
# helpers
mapper = function(key, df){
mapping= list()
cols = colnames(df)
cols = cols[cols != key]
for(i in cols){
m = map(df[[i]], ~ .x)
names(m) = df[[key]]
mapping[[i]] = m
}
return (mapping)
}
download_files = function(path = PATH, request = REQUEST){
path = path.expand(path)
dir.create(path, showWarnings = TRUE, recursive = TRUE, mode = "0777")
resp = GET(request)
stop_for_status(resp)
files = content(resp, 'parsed')
files = map(files, function(x){
url = x$download_url
fpath = paste0(path, x$name)
if(!file.exists(fpath)){
download.file(url, fpath)
}
return(c(url, fpath))
})
}
# download files
files = download_files(PATH, REQUEST)
files = unlist(map(files, ~.x[2]))
files.fcs = files[grepl('.fcs$', files)]
files.csv = files[grepl('.csv$', files)]
files.sampleID = files.csv[grepl('sampleID', files.csv)]
files.channels = files.csv[grepl('channels', files.csv)]
# mappings
rename.sample = readr::read_csv(files.sampleID)
map.sample = mapper('sample_id', rename.sample)
rename.channel = readr::read_csv(files.channels)
map.channel = mapper('channel', rename.channel)
# load FCS file
X = NULL
OBS = NULL
VAR = NULL
fcs.header = list()
f.count = 0
for(f in files.fcs){
f.count = f.count + 1
f.base = basename(f)
cat(f.count, '/', length(files.fcs), ' reading in file ', f.base, '\n')
header = read.FCSheader(f)
header.tbl = tibble(keyword = names(header[[1]]), value=header[[1]])
fcs <- read.FCS(f)
x = exprs(fcs)
obs = list(fcs_file = f.base)
for(i in names(map.sample)){
obs[[i]] = map.sample[[i]][[f.base]]
}
obs = do.call(tibble, obs)
obs %<>% slice(rep(1, each = dim(x)[1]))
var = parameters(fcs)@data %>% select(-range, -minRange, -maxRange)
X = rbind(X,x)
OBS = rbind(OBS, obs)
if(is.null(VAR)) VAR = var
else stopifnot(all(VAR == var))
stopifnot(!f %in% names(fcs.header))
fcs.header[[f.base]] = header.tbl
}
# tidy data
VAR %<>% rename(channel = name, marker = desc)
for(i in names(map.channel)){
VAR %<>% mutate("{i}" := unlist(map.channel[[i]][channel]))
}
# create AnnData object
ad = AnnData(
X = X,
var = VAR,
obs = OBS,
uns = list(
fcs_header = fcs.header
)
)
path.output = path.expand(paste0(PATH, 'ad_annotated_cell_types.h5ad'))
anndata::write_h5ad(ad, path.output)