forked from HeatherARobinson/EHR-reshaping
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathChecking cohort properties in SIR
More file actions
95 lines (81 loc) · 4.49 KB
/
Checking cohort properties in SIR
File metadata and controls
95 lines (81 loc) · 4.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
load ("sir.data.rda")
memory.size(50000)
#READ CONDITION FILES- SUFFIX ALL NAMES WITH A 1 THEN YOU CAN IMPORT THEM ALL TOGETHER
temp = list.files(pattern="*1.csv")
for (i in 1:length(temp)) assign(temp[i], read.csv(temp[i]))
heart<-read.csv("HeartFailure.csv")
library(zoo)
library(plyr)
library(tidyverse)
library(zoo)
library(data.table)
library(survival)
#####
#SELECT HEART FAILURE PATIENTS WITH CREATININE DATA
sir.data[sir.data$ReadCode %in% heart$ReadCode,]->hf
dialysis<-sir.data[sir.data$ReadCode %in% Dialysis1.csv$ReadCode,]
post2008<-sir.data[as.numeric(year(strptime(sir.data$EntryDate, format="%Y%m%d")))>=2008,]
crea<-sir.data[!sir.data$PatientID %in% dialysis$PatientID & sir.data$PatientID %in% hf$PatientID& sir.data$PatientID %in% post2008$PatientID,]
as.numeric(as.character(crea$CodeValue)) -> crea$CodeValue
crea$EntryDate<-as.Date(as.character(strptime(crea$EntryDate, format="%Y%m%d")))
#Create a date format entry date
DAY<- format(crea$EntryDate, "%d")
MONTH<- format(crea$EntryDate, "%m")
YEAR<- format(crea$EntryDate, "%Y")
as.Date.character(paste(YEAR,MONTH,DAY,sep="-")) -> crea$event.date
BNP<-read.csv("BNP.csv")
heart<-sir.data[sir.data$ReadCode %in% heart$ReadCode,]
post2008<-sir.data[as.numeric(year(strptime(sir.data$EntryDate, format="%Y%m%d")))>=2008,]
BNPs<-sir.data[(sir.data$ReadCode %in% BNP$ReadCode[BNP$Sensitive==1] & sir.data$CodeValue>=2000)|(sir.data$CodeValue %in% BNP$ReadCode[BNP$Sensitive==0] & sir.data$CodeValue>=100),]
length(unique(as.factor(sir.data$PatientID[sir.data$PatientID %in% BNPs$PatientID & sir.data$PatientID %in% post2008$PatientID & sir.data$PatientID %in% heart$PatientID]))) #9328 patients with a post 2008 creatinine test, HF code and a BNP based on codes
#WHICH PATIENTS WHICH HAVE A RECORDED TIME RANGE OF 4 YEARs OR MORE
aggregate(crea$event.date, list(crea$PatientID), range) -> ranges
ranges$range <- (ranges$x[,2])/365- (ranges$x[,1])/365
ranges[(which(ranges$range <=4)),1] -> range_short_ids
crea[-which(crea$PatientID %in% range_short_ids),] -> allsp
levels(as.factor(allsp$PatientID))
#WHICH PATIENTS HAVE A POST 2008 TIME RANGE OF 4 YEARs OR MORE
aggregate(crea$event.date, list(crea$PatientID), range) -> ranges
ranges$range <- (ranges$x[,2])/365- (ranges$x[,1])/365
ranges[(which(ranges$range <=4)),1] -> range_short_ids
crea[-which(crea$PatientID %in% range_short_ids),] -> allsp
levels(as.factor(allsp$PatientID))
crea<-sir.data[!sir.data$PatientID %in% dialysis$PatientID & sir.data$PatientID %in% hf$PatientID& sir.data$PatientID %in% post2008$PatientID,]
crea<-crea[as.numeric(year(strptime(crea$EntryDate, format="%Y%m%d")))>=2008 & crea$ReadCode=="44J3.",]
#REMOVE DUPLICATES
as.numeric(as.character(crea$CodeValue)) -> crea$CodeValue
crea$Creatinine<-ifelse(crea$ReadCode=="44J3.",as.numeric(crea$CodeValue),NA) #Any non numeric values are marked NA
crea$EntryDate<-as.Date(as.character(strptime(crea$EntryDate, format="%Y%m%d")))
MONTH<- format(crea$EntryDate, "%m")
YEAR<- format(crea$EntryDate, "%Y")
Over$EntryPeriod<-paste(MONTH,YEAR)
#Delete duplicate test results
library(lubridate)
d<-crea[(duplicated(crea$Creatinine)&duplicated(crea$PatientID)&duplicated(crea$EntryPeriod)),]
crea[-d,] -> crea
crea[-which(crea$CodeValue<18),] -> crea # creatinine values less than 18 considered not valid
crea$Creatinine<-crea$CodeValue
table(crea$PatientID) ==1 -> rare
rownames(as.matrix(rare)) -> ids
table(rare)
crea[!(crea$PatientID %in% ids[rare]),] -> crea.rep
#WHICH PATIENTS WHICH HAVE A RECORDED TIME RANGE OF 1 YEAR OR FEWER
aggregate(alls$EntryDate, list(alls$PatientID), range) -> ranges
ranges$range <- (ranges$x[,2])/365- (ranges$x[,1])/365
ranges[(which(ranges$range <1)),1] -> range_short_ids
alls[which(alls$PatientID %in% range_short_ids),] -> allsp
levels(as.factor(allsp$PatientID))
CHECK FOR SENSITIITY OF SPECULATIVE HEART FAILURE CODES
###################################################################
#HEART FAILURE ALL CODES ANY HF AT ANY POINT
data2<-sir.data[sir.data$ReadCode %in% HeartFailureAll1.csv$Code,]
crea.rep$HeartFailureAll<-ifelse(crea.rep$PatientID %in% data2$PatientID,1,0)
rm(data2)
###################################################################
#HEART FAILURE CONFIRMED CODES ANY HF AT ANY POINT
data2<-sir.data[sir.data$ReadCode %in% HeartFailureConfirmed1.csv$ReadCode,]
crea.rep$HeartFailureConf<-ifelse(crea.rep$PatientID %in% data2$PatientID,1,0)
rm(data2)
specheartpatients<-crea.rep[crea.rep$HeartFailureConf==0&crea.rep$HeartFailureAll==1,]
levels(as.factor(specheartpatients$PatientID))
table(crea.rep$HeartFailureAll)