Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
20fc928
Initial cleanup
QuanliWang Mar 14, 2019
d4a99d3
Code refactor: make an R package first
QuanliWang Mar 14, 2019
9fee34e
Code Refactor: remove global variable vl
QuanliWang Mar 14, 2019
8da1087
Fix library imports
QuanliWang Mar 14, 2019
0315f17
Code refactor: remove global variables counters for now: might need a…
QuanliWang Mar 14, 2019
c40962f
Code refactor: remove global variable opt
QuanliWang Mar 14, 2019
620ea37
ginire history
QuanliWang Mar 14, 2019
5eea081
hide some global varibles
QuanliWang Mar 14, 2019
8a9a3c6
Use package env for counters
QuanliWang Mar 14, 2019
2d3a28f
Fix all global variables
QuanliWang Mar 14, 2019
b13aa1d
remove unwanted file
QuanliWang Mar 14, 2019
f947f6f
remove histtory file
QuanliWang Mar 14, 2019
ef7fcb7
ignore Rhistory
QuanliWang Mar 14, 2019
e5ddaad
remove Rhisitory at the top level
QuanliWang Mar 14, 2019
174c5d4
Code fefactor
QuanliWang Mar 14, 2019
5c2725b
Code Refactor: reorgnize files/functions based on objects
QuanliWang Mar 15, 2019
2fadc11
remove duplicate file
QuanliWang Mar 15, 2019
f9c36ba
Code clean and Rd for input file heders
QuanliWang Mar 15, 2019
e7d6ecf
Code refactor and documentation
QuanliWang Mar 15, 2019
36d191b
Code refactor
QuanliWang Mar 15, 2019
e2b1213
Hide testContinuous2
QuanliWang Mar 15, 2019
08327b8
Add more documents
QuanliWang Mar 15, 2019
390b2bf
Code refactor and fix the buggy "end" word in function setOrderString
QuanliWang Mar 15, 2019
c55bbea
Code clean for exposure related functions
QuanliWang Mar 15, 2019
0871bb4
Code refactor
QuanliWang Mar 15, 2019
0bc16c2
Code refactor and documentation: continued
QuanliWang Mar 16, 2019
66b719a
Restore original code structure
QuanliWang Mar 16, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

.DS_Store
.Rproj.user
PHESANT/.Rhistory
2 changes: 2 additions & 0 deletions PHESANT/.Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
10 changes: 10 additions & 0 deletions PHESANT/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Package: PHESANT
Type: Package
Title: Repliments PHESANT
Version: 0.1
Date: 2019-03-14
Author: Quanli Wang
Maintainer: Quanli Wang <quanliwang20@hotmail.com>
Description: This will replement the original PHESANT in a package and potentially with some optimizations.
License: GPL (>= 3)
Depends: MASS, lmtest, data.table, nnet, optparse
10 changes: 10 additions & 0 deletions PHESANT/NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
exportPattern("^[[:alpha:]]+")
import(MASS)
import(lmtest)
import(data.table)
import(nnet)
import(optparse)
importFrom("stats", "coef", "complete.cases", "confint", "end", "glm",
"lm", "model.matrix", "na.omit", "pnorm", "qnorm",
"quantile", "relevel")
importFrom("utils", "read.table", "type.convert", "write.table")
17 changes: 17 additions & 0 deletions PHESANT/PHESANT.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
127 changes: 127 additions & 0 deletions PHESANT/R/PHESANT-internal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
.Random.seed <-
c(403L, 10L, -2055377151L, -1261003152L, -653149682L, 1435497081L,
1855348235L, -576966854L, 341399100L, 778157439L, -1613771051L,
1531682540L, 412921922L, 1086953373L, 325854743L, -1838982274L,
-1413973704L, -1064806885L, 323434361L, 1468101064L, -205967882L,
-631271375L, -660989341L, -707934670L, 2008175492L, -1116261945L,
385783965L, -399175660L, -563583302L, 625440517L, 1609831647L,
1445418342L, -1909055248L, 1313776659L, 1749100977L, -1356585888L,
1455157758L, 1388664201L, 2074088123L, -1846870390L, -1243661972L,
343572207L, 1950927141L, 1541089244L, -359217742L, -1348217683L,
413594727L, 726502286L, 1026280552L, -622679957L, 1120416265L,
1076264088L, -2134790202L, 2005156641L, 902238131L, -1253660894L,
770078996L, 1084119447L, -1439872627L, 590254244L, -1844983158L,
-1204420363L, -1441798321L, 1447714902L, 1974291200L, -2095841725L,
-585513503L, -1731227312L, 937445294L, 934640985L, -1011479701L,
913148634L, 1909599132L, 1373632415L, 178208245L, -821178228L,
-2087284382L, -1997959875L, 1589640631L, 764332638L, 1962285976L,
261272315L, -1048315239L, -201784536L, -1601408554L, -2012176815L,
668146947L, -1514712942L, 30025252L, -873383705L, 764988733L,
-731944524L, 1454405146L, 904033637L, -33915905L, -515351930L,
2009025040L, 617304243L, 158108497L, -1127143680L, -258595362L,
-1664987607L, 1106011611L, 724134634L, -2001639284L, 262526927L,
499109765L, 1971379260L, 1522683154L, -712324595L, -9821881L,
-1493900498L, 744160904L, -1168410741L, 426661737L, 1753729336L,
-490466330L, 555131777L, 1754946323L, -813456190L, -416563340L,
1511400311L, 1291424877L, 2086259460L, 1989440042L, 735829909L,
-250657745L, -759055050L, -2138913440L, 601457571L, 2098922689L,
1814889648L, -126393394L, 2014160697L, 1337096011L, 2010817786L,
94510588L, -753603009L, -1139018987L, 612225068L, 1440518402L,
1251727837L, -859002665L, 1183175614L, -1121028616L, -388640293L,
850886713L, -2082842232L, -286673610L, -1376702607L, 289504675L,
1370276082L, 2064014404L, 1143236359L, 244238045L, -1750603820L,
2003793658L, 251030085L, -1643296097L, 813770790L, 1153809584L,
368946515L, -1173546895L, 2066941344L, 2125658430L, -1106106295L,
-1619944197L, 56489162L, -1429099988L, -702894545L, 2097114853L,
-1469324388L, -1305795086L, 1942503533L, -783513177L, 979614670L,
-464289496L, -2063539413L, -1102053047L, -571165096L, -1266678010L,
1462313697L, 866272883L, 863080162L, -2029530540L, -1653605545L,
-1657831603L, -1705729308L, -581874998L, -1662598347L, 317032847L,
-1302323562L, -710869440L, 2045748995L, 1157095201L, 1237002256L,
609251310L, 481136537L, -2050820053L, -149807846L, -1435701284L,
712831455L, 124114357L, 1680823116L, 408859042L, -1960140547L,
-134820361L, -212387042L, -1368816168L, 1844444987L, 1914468569L,
1771902056L, -1979266666L, -1699702511L, 427268803L, 1609642706L,
-2067868828L, 137279143L, -1601814531L, -127387148L, 59483866L,
1411659813L, 223425855L, -1177511226L, 1897576272L, 414222195L,
1107216785L, -426198080L, 904228254L, -675370654L, 1407568616L,
-1648012084L, -773699076L, 1481535986L, 1360236672L, 1007529460L,
-1342715256L, -1851472070L, -949733040L, 729675884L, -570554988L,
1833331794L, 2089554400L, -318619444L, 193276576L, 1429041538L,
-2126949144L, -306398484L, 1763143996L, -1277055886L, 1554374256L,
421975588L, 2032640056L, 211760410L, 1924362448L, -1268650692L,
-1928376428L, 1611904898L, -603361344L, -1687638468L, -1350393456L,
1079395362L, 248160520L, 1618027660L, -1988603108L, 665800850L,
1214202240L, -187286508L, -2088061144L, 545226298L, -1092967984L,
-1418993300L, 567848884L, 746534418L, -772762656L, 1200707020L,
1496597728L, 1776060450L, -568946904L, -433355252L, -639612100L,
368270322L, 706859760L, 1953071684L, -1698060712L, -1286869190L,
-834147344L, -415867204L, 1347345684L, -740441918L, -1348860832L,
284594556L, 1966673872L, -535664350L, 656744104L, 677991308L,
-464448324L, -598055118L, -1857457600L, 548719796L, -660904888L,
123257274L, 2067885584L, -2115319572L, 1984745812L, 381529874L,
-298453600L, 2077870796L, -1096423840L, -1618591806L, 1879972648L,
-1549703764L, 2022029628L, 206182258L, 1161427248L, 2028039972L,
1369963832L, 1577974490L, 1466738576L, 1352827644L, -1536618604L,
-1325287358L, 1242152192L, 574403644L, -1861933744L, 1895827618L,
1347786568L, -1926394100L, 604801756L, 1377291090L, -2066652544L,
-127154092L, -1934056472L, 1591219962L, -180620976L, 189311788L,
1286340468L, -37364846L, 1854013024L, 2103385996L, 1819648864L,
543637474L, -1091715160L, -1094826804L, -477037060L, 1789887410L,
255111664L, -654716476L, -1160838824L, 146863482L, -1985572432L,
894295996L, -496635820L, -159435838L, 685579296L, -314557252L,
-8424880L, 438237154L, -1256534424L, -1636173236L, -235171588L,
8093682L, -1930136320L, 695349108L, 1842151432L, -1619477062L,
-851016240L, 881351660L, -1716387948L, -115207214L, -1791137952L,
-33112756L, 324058400L, 157033346L, 208630504L, -594262804L,
1896161852L, 265798514L, -1382278544L, 1861693476L, -602374344L,
1108468890L, 484203216L, 484362428L, -709216620L, -1557066110L,
-1252221760L, 683766076L, -2027837296L, -1045503454L, -228385272L,
2107578636L, 1733003932L, -1349392878L, -558103168L, -496880108L,
1038675496L, 1056519482L, 2093201360L, -2010307988L, -1508511948L,
-377087342L, -1249011360L, -1160312500L, 1870274528L, -232228958L,
1435512744L, -1369185396L, -243099972L, 1927674738L, -2106238736L,
-554463164L, 1986329944L, -1455619014L, 1574239088L, -1919521732L,
914670356L, 445555138L, 2084602080L, -778729604L, -1345243440L,
199575202L, -1920988120L, 1905594636L, -253809732L, -1626538702L,
1548744640L, -677629388L, -2139472952L, 519623866L, -1041137776L,
1637101036L, 2136944724L, -1615245678L, 2128794400L, -59988532L,
-1540817952L, 1147640514L, -44053976L, -882973012L, 1048963772L,
698567154L, -1162995408L, -453493596L, -1281574216L, -413576358L,
846591376L, 1607352700L, -716217068L, 72880450L, 930200320L,
291130300L, -596200624L, 568531234L, 1941716296L, 23336972L,
1518573143L, 1694002593L, 1330335334L, -367935900L, -591315563L,
459321023L, -743172520L, -1153875338L, 1744498307L, -1595034939L,
-2103107614L, -546796976L, -202584199L, 390984875L, 2084477980L,
1812200266L, -1649591489L, -1192012407L, 1048710558L, 440534092L,
222078493L, 670313383L, -1613058352L, 1954390126L, -688183525L,
1469139069L, 1553757802L, -316926200L, -1165826127L, -1885802461L,
1929285924L, -477141550L, -2023655897L, 2073149553L, -958355786L,
-1588544940L, -616261915L, 311055087L, 1339211304L, -692618618L,
-890616653L, -1587672555L, 1473830002L, 1794930880L, -1751978711L,
-706341989L, -174078868L, 1009748090L, -577465937L, -1331312455L,
954243790L, -898918372L, -550806643L, 1301792247L, -1906050816L,
-471410850L, 243503147L, -964325587L, 2040009626L, 165040472L,
4277441L, -1684012173L, -2112323436L, 707749154L, 1246311479L,
1815489793L, -1161228282L, -106256828L, 953040757L, -1505234209L,
-1434948040L, -2082533866L, -1478655453L, 1141738981L, 620266498L,
181872240L, -1541364711L, -1821358581L, -1404373764L, -1721304790L,
430858079L, 403881705L, 62217726L, 1339273708L, -1570424515L,
1231956231L, -1980606224L, 458605710L, -1919336773L, 1922633373L,
-2114897334L, -53110680L, 313559121L, 954907459L, -1235980732L,
-683520782L, -1188665145L, 595888785L, 787797014L, 181722740L,
-550557435L, -172300465L, 1876414792L, -1850278682L, -642551277L,
-1898592907L, -657802926L, 1363939744L, 1281517705L, 1436188859L,
-1660478708L, 526215834L, -359997425L, -1166039975L, 401238254L,
-391058564L, -739133459L, 28807319L, 686529376L, -1191093442L,
-252455925L, -1116826483L, -1645730374L, -1123120136L, 1970423713L,
243630419L, 640639092L, -818695678L, -795186025L, -2076447135L,
1631671974L, -1814995292L, -831882411L, -1899210753L, -1279039848L,
185248694L, 455811139L, -1657661051L, -1742312926L, -1304084208L,
1057480889L, -1839299221L, 2022677596L, 1940418954L, -2044848769L,
1693505737L, 134421854L, 1079041548L, -855240227L, -1810362137L,
-98392816L, -1493717970L, -373067045L, 798565309L, -890258134L,
-1339358776L, 374128369L, -1103474589L, -1752112028L, -1493973102L,
746695783L, -1192483791L, -800576138L, 303344276L, 1196185765L,
-1719902801L, 1792706152L, 1735704134L, -396461069L, -1485459313L
)
121 changes: 121 additions & 0 deletions PHESANT/R/binaryLogisticRegression.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# The MIT License (MIT)
# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without
# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions
# of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.


# Perform binary logistic regression
#
# Performs binary logistic regression on the phenotype stored in thisdata
# and stores result in 'results-logistic-binary' results file.
binaryLogisticRegression <- function(opt, varName, varType, thisdata, isExposure, phenoStartIdx) {

phenoFactor = factor(thisdata[,phenoStartIdx])

facLevels = levels(phenoFactor)

# assert variable has exactly two distinct values
if (length(facLevels)!=2) {
#stop(paste("Not 2 levels: ", length(facLevels), " || ", sep=""))
cat("BINARY-NOT2LEVELS- (", length(facLevels), ") || ",sep="");
.incrementCounter("binary.nottwolevels")
}

idxTrue = length(which(phenoFactor==facLevels[1]))
idxFalse = length(which(phenoFactor==facLevels[2]))
numNotNA = length(which(!is.na(phenoFactor)))

if (idxTrue<10 || idxFalse<10) {
cat("BINARY-LOGISTIC-SKIP-10 (", idxTrue, "/", idxFalse, ") || ", sep="")
.incrementCounter("binary.10")
}
else if (numNotNA<500) {
cat("BINARY-LOGISTIC-SKIP-500 (", numNotNA, ") || ",sep="");
.incrementCounter("binary.500")
}
else {

cat("sample ", idxTrue, "/", idxFalse, "(", numNotNA, ") || ", sep="");

if (opt$save == TRUE) {
# add pheno to dataframe
.storeNewVar(thisdata[,"userID"], phenoFactor, varName, 'bin')
cat("SUCCESS results-logistic-binary ");
.incrementCounter("success.binary")
}
else {

# use standardised geno values
if (opt$standardise==TRUE) {
geno = scale(thisdata[,"geno"])
}
else {
geno = thisdata[,"geno"]
}
confounders=thisdata[,3:(phenoStartIdx -1) , drop = FALSE]

sink()
sink(pkg.env$modelFitLogFile, append=TRUE)
print("--------------")
print(varName)

###### BEGIN TRYCATCH
tryCatch({

mylogit <- glm(phenoFactor ~ geno + ., data=confounders, family="binomial")

sink()
sink(pkg.env$resLogFile, append=TRUE)

sumx = summary(mylogit)

pvalue = sumx$coefficients['geno','Pr(>|z|)']
beta = sumx$coefficients["geno","Estimate"]


if (opt$confidenceintervals == TRUE) {
cis = confint(mylogit, "geno", level=0.95)
lower = cis["2.5 %"]
upper = cis["97.5 %"]
}
else {
lower = NA
upper = NA
}

numNotNA = length(na.omit(phenoFactor))

## save result to file
write(paste(varName,varType,paste(idxTrue,"/",idxFalse,"(",numNotNA,")",sep=""), beta,lower,upper,pvalue, sep=","), file=paste(opt$resDir,"results-logistic-binary-",opt$varTypeArg,".txt",sep=""), append="TRUE");
cat("SUCCESS results-logistic-binary ");

.incrementCounter("success.binary")

if (isExposure==TRUE) {
.incrementCounter("success.exposure.binary")
}

## END TRYCATCH
}, error = function(e) {
sink()
sink(pkg.env$resLogFile, append=TRUE)
cat(paste("ERROR:", varName,gsub("[\r\n]", "", e), sep=" "))
.incrementCounter("binary.error")
})
}
}
}

95 changes: 95 additions & 0 deletions PHESANT/R/cleanData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# The MIT License (MIT)
# Copyright (c) 2017 Louise AC Millard, MRC Integrative Epidemiology Unit, University of Bristol
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without
# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions
# of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
# TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# Reassigns values as specified in data coding info file
.reassignValue <- function(vl, pheno, varName) {
# get data code info - whether this data code is ordinal or not and any reordering and resassignments
dataPheno <- vl$phenoInfo[which(vl$phenoInfo$FieldID==varName),]
dataCode <- dataPheno$DATA_CODING

# not all variables will have a data code info row
dataCodeRow <- which(vl$dataCodeInfo$dataCode==dataCode)
if (length(dataCodeRow)==0) { #do nothing
} else if (length(dataCodeRow)==1) {
dataDataCode <- vl$dataCodeInfo[dataCodeRow,]
reassignments <- as.character(dataDataCode$reassignments)

# Reassigns values in pheno, as specified in resassignments argument
# can be NA if row not included in data coding info file
if (!is.na(reassignments) && nchar(reassignments)>0) {
reassignParts <- unlist(strsplit(reassignments,"\\|"))
cat(paste("reassignments: ", reassignments, " || ", sep=""))

# do each reassignment
for(i in reassignParts) {
reassignParts <- unlist(strsplit(i,"="))

# matrix version
idx <- which(pheno==reassignParts[1],arr.ind=TRUE)
pheno[idx] <- strtoi(reassignParts[2])
}

## see if type has changed (this happens for field 216 (X changed to -1))
## as.numeric will set non numeric to NA so we know if it's ok to do this by seeing if there are extra NA's after the conversion
pNum = as.numeric(unlist(pheno))
isNum = length(which(is.na(pheno), arr.ind=TRUE))==length(which(is.na(pNum), arr.ind=TRUE))
if (isNum) {
pheno = pNum
}
}
} else {
cat("WARNING: >1 ROWS IN DATA CODE INFO FILE || ")

}
return(pheno)
}

# Replace NaN and empty values with NA in pheno
.replaceNaN <- function(pheno) {
if (is.factor(pheno)) {
phenoReplaced <- pheno
nanStr <- which(phenoReplaced=="NaN")
phenoReplaced[nanStr] <- NA
emptyx <- which(phenoReplaced=="")
phenoReplaced[emptyx] <- NA
} else {
phenoReplaced <- pheno
nanx <- which(is.nan(phenoReplaced))
phenoReplaced[nanx] <- NA
emptyStr <- which(phenoReplaced=="")
phenoReplaced[emptyStr] <- NA
}
return(phenoReplaced)
}

.testNumExamples <- function(pheno) {
## loop through values and remove if has < 10 examples
uniqVar <- unique(na.omit(pheno))
for (u in uniqVar) {
withValIdx <- which(pheno==u)
numWithVal <- length(withValIdx);
if (numWithVal<10) {
pheno[withValIdx] <- NA
cat(paste("Removed ",u ,": ", numWithVal, "<10 examples || ", sep=""));
} else {
cat(paste("Inc(>=10): ", u, "(", numWithVal, ") || ", sep=""));
}
}
return(pheno)
}

Loading