sgfin.github.io_old/index.html at master · sgfin/sgfin.github.io_old · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />


  <meta name="description" content="Samuel Finlayson's personal webpage -- machine learning and medicine" />

<meta name="keywords" content= "research, medicine, machine learning"/>
<link rel="stylesheet" type="text/css" href="./static/about.css" media="all" />

<!--[if lte IE 7]>
<link rel="stylesheet" href="new2011/styles/np-ie67.css" type="text/css" media="all" />
<![endif]-->

<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js"></script>
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery-easing/1.3/jquery.easing.min.js"></script>
<script type='text/javascript' src="https://cdnjs.cloudflare.com/ajax/libs/jquery-throttle-debounce/1.1/jquery.ba-throttle-debounce.min.js"></script>
<script type="text/javascript" src="./static/js/layout.js"></script>
<script type="text/javascript" src="./static/js/common.js"></script>
<script type="text/javascript"
    src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
</script>

<!-- toggle abstracts -->
<script>
jQuery(function(){
        jQuery('#showall').click(function(){
               jQuery('.targetDiv').show();
        });
        jQuery('.showSingle').click(function(){
              //jQuery('.abstract').hide();
              jQuery('#div'+$(this).attr('target')).slideToggle(100); //show();
        });
});
</script>

<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-122144402-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'UA-122144402-1');
</script>

<!-- set title / toggle blog vs not blog -->

  <title>Samuel Finlayson</title>

</head>

<body class="white">


<div id="container">

    <div class="storey border-top">

    <div class='nav-left'> <!--class="column w405 left">-->

    <h1>Sam Finlayson</h1>

    </div>
    <div class="nav-right"> <!-- class="column w548 right extra-pad"> -->

      <div id="nav">

        <!-- <a href="#publications">Publications</a> <br>
        <a href="#talks">Talks</a> <br>
        <a href="#projects">Projects</a> <br>
        -->

        <a href="https://sgfin.github.io/learning-resources/">ML Resources</a> <br>
        <a href="https://sgfin.github.io/posts/">Posts</a> <br>
        <a href="./assets/sgf_cv.pdf">CV</a> <br>

        <div class='iconwrapper'>
            <div class='icon'>
            <a href="https://github.com/sgfin">
            <img src="./assets/GitHub-Mark-120px-plus.png"
                 style="width:40px;height:40px;">
            </a>
            </div>
            <div class='icon'>
            <a href="https://scholar.google.com/citations?user=7__yMlEAAAAJ&hl=en">
            <img src="./assets/gscholar_logo.jpg"
                 style="width:40px;height:40px;">
            </a>
            </div>
            <div class='icon'>
            <input id="email-button" style="outline: none;" type="image"
                   src="./assets/email-14-128.png"
                   width="40px" height="40px" border="0" >
            </div>
        </div>
        <div id="email-div" style="display:none; color:#000066;">
            samuel_finlayson "at" hms.harvard.edu
        </div>

      </div> <!-- #nav -->

    </div> <!-- .column -->

    <div style="clear:both;"> </div>

</div> <!-- .storey (header) -->


    <div class="storey border-top">

      <div style="display:table;">
        <div class="headshot">
          <img src="./static/Finlayson_Headshot.jpg" style="width:100%; max-width:180px;">
          <!--style="width:200px;float:left;padding:10px; padding-right:30px; height:100%;">-->
        </div>

        <div class="standfirst">
          MD-PhD Candidate<br>

          <div style="font-size:18px">
          Harvard Medical School, MIT
          </div>

          <p class="intro">
             I am medical student in the <a href="https://hst.mit.edu/">Harvard-MIT HST program</a>.  Recently, I also completed my PhD under the direction of <a href="http://dbmi.hms.harvard.edu/person/faculty/zak-kohane"> Isaac Kohane (Harvard DBMI)</a> and <a href = "https://mit-medg.github.io/index.html"> Peter Szolovits (MIT CSAIL)</a>.  My research centers on biomedical applications of machine learning, which has allowed me to participate in a range of really fun projects leveraging medical images, medical text, EHR and claims data, and various 'omics.
             <br><br>
             In addition to my research, I am very involved in the Hydrocephalus community by way of <a href="http://teamhydro.org">Team Hydro</a>, a non-profit organization that my family and I started to raise money and awareness for the condition in honor of my sister, <a href="http://teamhydro.org/kate-finlayson/">Kate</a>. I also do consulting work as a data scientist and deep learning engineer, with clients including tech, biotech, and pharmaceutical companies.
             <br><br>
             For a more formal account of my academic work, see my <a href="/assets/sgf_cv.pdf"> Curriculum Vitae</a> or <a href="https://scholar.google.com/citations?user=7__yMlEAAAAJ&hl=en">Google Scholar</a>.
          </p>
        </div> <!-- .standfirst -->
      </div><!-- table container -->
    </div> <!-- story border-top -->

    <div class="container" style="font-size:18px; font-weight:300;margin-top:15px;margin-bottom:15px;">

</div>


<div class="container">
  <div id="timeline">
    <div class="timelineitem">
      <div class="tdate">2022 (Anticipated)</div>
      <div class="ttitle">Harvard Medical School, MIT</div>
      <div class="tdesc">MD, <span class="thigh">HST Program</span></div>
      <div class="tdesc">MSTP Fellowship</div>
      </div>
    </div>
    <div class="timelineitem">
      <div class="tdate">2020</div>
      <div class="ttitle">Harvard, MIT</div>
      <div class="tdesc">PhD, <span class="thigh">Quantitative Biology</span></div>
      <div class="tdesc">Advisors: Isaac Kohane (HMS) and Peter Szolovits (MIT)</div>
      <div class="tdesc">Thesis: <a href="https://www.dropbox.com/s/slw2vkxajgwgp6i/PhD_Thesis.pdf?dl=0"> Learning Inductive Representations of Biomedical Data</a> </div>
    </div>
    <div class="timelineitem">
      <div class="tdate">2014</div>
      <div class="ttitle">Stanford University</div>
      <div class="tdesc">MS, <span class="thigh">Biomedical Informatics</span></div>
      <div class="tdesc">Research Mentor: Nigam Shah</div>
    </div>
    <div class="timelineitem">
      <div class="tdate">2013</div>
      <div class="ttitle">Stanford University</div>
      <div class="tdesc">BA, <span class="thigh">Human Biology - Biocomputation</span></div>
      <div class="tdesc">Research Mentor: Daniel Rubin</div>
    </div>
  </div>
</div>

<br>

<hr class="soft">


<div id="container" class="container" style="margin-top:50px; ">
    <!-- PUBLICATIONS ROW -->
    <h2 id="publications">Selected Research</h2>
    <div class="pub">
    <ul>
        <li>
          <img src="./assets/subgnn/subgnn_fig1.png" class="pub-image">
          <div class="pub-info">
            <h3>Subgraph Neural Networks</h3>
            Emily Alsentzer*, <u>Samuel Finlayson*</u>, Michelle Li, Marinka Zitnik <br>
              <i>In Submission, 2020<br>
              <i>*Co-first, listed alphabetically<br>
              [<a class="showSingle", target="27">Abstract</a>]
              [<a href="https://arxiv.org/pdf/2006.10538.pdf">Paper</a>]
            </p>
            <div id="div27" class="abstract">
              Deep learning methods for graphs achieve remarkable performance on many node-level and graph-level prediction tasks. However, despite the proliferation of the methods and their success, prevailing Graph Neural Networks (GNNs) neglect subgraphs, rendering subgraph prediction tasks challenging to tackle in many impactful applications. Further, subgraph prediction tasks present several unique challenges: subgraphs can have non-trivial internal topology, but also carry a notion of position and external connectivity information relative to the underlying graph in which they exist.
              <br/>
              Here, we introduce Sub-GNN, a subgraph neural network to learn disentangled subgraph representations. We propose a novel subgraph routing mechanism that propagates neural messages between the subgraph’s components and randomly sampled anchor patches from the underlying graph, yielding highly accurate subgraph representations. Sub-GNN specifies three channels, each designed to capture a distinct aspect of subgraph topology, and we provide empirical evidence that the channels encode their intended properties. We design a series of new synthetic and real-world subgraph datasets. Empirical results for subgraph classification on eight datasets show that Sub-GNN achieves considerable performance gains, outperforming strong baseline methods, including node-level and graph-level GNNs, by 12.4% over the strongest baseline. Sub-GNN performs well on challenging biomedical datasets when subgraphs have complex topology and even comprise multiple disconnected components.
            </div>
          </div>
        </li>
        <li>
          <img src="./assets/small_mol_ge_coord/figure1.png" class="pub-image">
          <div class="pub-info">
            <h3>Cross-Modal Representation Alignment of Transcriptional Profiles and Small Molecule Therapeutics</h3>
            <u>Samuel Finlayson</u>, Matthew McDermott, Alex Pickering, Scott Lipnick, Isaac Kohane <br>
              <i>In Submission, 2020<br>
              [<a class="showSingle", target="26">Abstract</a>]
              [<a href="">Paper</a>]
            </p>
            <div id="div26" class="abstract">
              Modeling the relationship between chemical structure and molecular activity is a key goal in drug development and precision medicine. Many benchmark tasks have been proposed for molecular property prediction of specific biophysical/structural properties, but few efforts have sought to measure chemical representations' ability to simultaneously encode functional information of broad physiological relevance. In this work, we propose a new chemical modeling task inspired by connectivity mapping, a framework for modern drug development that leverages transcriptomics. More specifically, we seek chemical representations that can encode the full space of transcriptional changes induced by perturbation with a small molecule. We develop this task formally as multi-view alignment problem, and present a coordinated deep learning approach that jointly optimizes representations of both chemical structure and gene expression. We probe the changes in chemical representations that are induced by this training task, and benchmark our results against oracle models and principled baselines.
            </div>
          </div>
        </li>
        <li>
        <img src="./assets/misc_paper_figures/cpt.png" class="pub-image">
        <div class="pub-info">
          <h3>Examining the Use of Real-World Evidence in the Regulatory Process</h3>
          <p>Brett Beaulieu-Jones, <u>Samuel Finlayson</u>, William Yuan, Russ Altman, Isaac Kohane, Vinay Prasad, Kun-Hsing Yu<br>
            <i>Clinical Pharmacology and Therapeutics, 2019<br>
            [<a class="showSingle", target="25">Abstract</a>]
            [<a href="https://ascpt.onlinelibrary.wiley.com/doi/pdf/10.1002/cpt.1658">Paper</a>]
          </p>
          <div id="div25" class="abstract">
            The 21st Century Cures Act passed by the United States Congress mandates the Food and Drug Administration to develop guidance to evaluate the use of real-world evidence (RWE) to support the regulatory process. RWE has generated important medical discoveries, especially in areas where traditional clinical trials would be unethical or infeasible. However, RWE suffers from several issues that hinder its ability to provide proof of treatment efficacy at a level comparable to randomized controlled trials. In this review article, we summarized the advantages and limitations of RWE, identified the key opportunities for RWE, and pointed the way forward to maximize the potential of RWE for regulatory purposes.
          </div>
        </div>
      </li>
        <li>
        <img src="./assets/adversarial_examples_paper/science_fig.png" class="pub-image">
        <div class="pub-info">
          <h3>Adversarial attacks on medical machine learning</h3>
          <p><u>Samuel Finlayson</u>, John Bowers, Joi Ito, Jonathan Zittrain, Andrew Beam, Isaac Kohane<br>
            <i>Science, 2019<br>
            [<a class="showSingle", target="23">Abstract</a>]
            [<a href="http://science.sciencemag.org/cgi/content/full/363/6433/1287?ijkey=OXnSsEp.Iagl6">Paper</a>]
            [<a href="https://github.com/sgfin/adversarial-medicine">GitHub</a>]
            <br>
            [<a href="https://arxiv.org/abs/1804.05296">Preprint/Supplement</a> (more technical)]
            <br>
            [<a href="https://sgfin.github.io/2019/03/21/FAQ-On-Adversarial-Science-Paper/">FAQ</a>]
            [Sample press: <a href="https://www.nytimes.com/2019/03/21/science/health-medicine-artificial-intelligence.html">NYT, <a href="https://www.newyorker.com/tech/annals-of-technology/the-hidden-costs-of-automated-thinking"> New Yorker, </a> <a href="https://spectrum.ieee.org/the-human-os/telecom/security/bracing-medical-ai-systems-for-attacks">IEE,  </a> <a href="https://www.vox.com/future-perfect/2019/4/8/18297410/ai-tesla-self-driving-cars-adversarial-machine-learning">Vox, </a> <a href="https://www.axios.com/medical-ai-vulnerability-adversarial-attack-b0d79abc-335e-4bd3-b706-d587c7c44978.html">Axios</a>]
          </p>
          <div id="div23" class="abstract">
            With public and academic attention increasingly focused on the new role of machine learning in the health information economy, an unusual and no-longer-esoteric category of vulnerabilities in machine learning systems could prove significant. These vulnerabilities allow a small, carefully-designed change in how inputs are presented to a system to completely alter its output, causing it to confidently arrive at manifestly wrong conclusions. These advanced techniques to subvert otherwise-reliable machine learning systems – so-called adversarial attacks – have, to date, been of interest primarily to computer science researchers. However, the landscape of often-competing interests within healthcare, and billions of dollars at stake in systems’ outputs, implies considerable problems. We outline motivations that various players in the healthcare system may have to employ adversarial attacks, and begin a discussion of what to do about them. Far from discouraging continued innovation with medical machine learning, we call for active engagement of medical, technical, legal, and ethical experts in pursuit of efficient, broadly-available, and effective health care that machine learning will enable.
          </div>
        </div>
      </li>
      <li>
        <img src="./assets/misc_paper_figures/lipnick_cwas.png" class="pub-image">
        <div class="pub-info">
          <h3>Systemic nature of spinal muscular atrophy revealed by studying insurance claims</h3>
          <p>Scott Lipnick, Denis Agniel, Rahul Aggarwal, Nina Makhortova, <u>Samuel Finlayson</u>... Isaac Kohane , Lee Rubin<br>
            <i>Plos One, 2019<br>
            [<a class="showSingle", target="24">Abstract</a>]
            [<a href="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0213680">Paper</a>]
          </p>
          <div id="div24" class="abstract">
            <u>Objective</u> We investigated the presence of non-neuromuscular phenotypes in patients affected by Spinal Muscular Atrophy (SMA), a disorder caused by a mutation in the Survival of Motor Neuron (SMN) gene, and whether these phenotypes may be clinically detectable prior to clinical signs of neuromuscular degeneration and therefore independent of muscle weakness.
            <br>
            <u>Methods:</u> We utilized a de-identified database of insurance claims to explore the health of 1,038 SMA patients compared to controls. Two analyses were performed: (1) claims from the entire insurance coverage window; and (2) for SMA patients, claims prior to diagnosis of any neuromuscular disease or evidence of major neuromuscular degeneration to increase the chance that phenotypes could be attributed directly to reduced SMN levels. Logistic regression was used to determine whether phenotypes were diagnosed at significantly different rates between SMA patients and controls and to obtain covariate-adjusted odds ratios.
            <br>
            <u>Results:</u> Results from the entire coverage window revealed a broad spectrum of phenotypes that are differentially diagnosed in SMA subjects compared to controls. Moreover, data from SMA patients prior to their first clinical signs of neuromuscular degeneration revealed numerous non-neuromuscular phenotypes including defects within the cardiovascular, gastrointestinal, metabolic, reproductive, and skeletal systems. Furthermore, our data provide evidence of a potential ordering of disease progression beginning with these non-neuromuscular phenotypes.
            <br>
            <u>Conclusions:</u> Our data point to a direct relationship between early, detectable non-neuromuscular symptoms and SMN deficiency. Our findings are particularly important for evaluating the efficacy of SMN-increasing therapies for SMA, comparing the effectiveness of local versus systemically delivered therapeutics, and determining the optimal therapeutic treatment window prior to irreversible neuromuscular damage.
          </div>
        </div>
      </li>
      <li>
        <img src="./assets/fracgan/fracgan.png" class="pub-image">
        <div class="pub-info">
          <h3>Towards generative adversarial networks as a new paradigm for radiology education</h3>
          <p><u>Samuel Finlayson</u>, Hyunkwang Lee, Isaac Kohane, Luke Oakden-Rayner<br>
            <i>Machine Learning for Health (NeurIPS Workshop)</i>, 2018 <br>
            [<a class="showSingle", target="22">Abstract</a>]
            [<a href="https://arxiv.org/abs/1812.01547">Paper</a>]
            [<a class="showSingle", target="21">Description/Backstory</a>]
          </p>
          <div id="div22" class="abstract">
            Medical students and radiology trainees typically view thousands of images in order to ”train their eye” to detect the subtle visual patterns necessary for diagnosis. Nevertheless, infrastructural and legal constraints often make it difficult to access and quickly query an abundance of images with a user-specified feature set. In this paper, we use a conditional generative adversarial network (GAN) to synthesize 1024 × 1024 pixel pelvic radiographs that can be queried with conditioning on fracture status. We demonstrate that the conditional GAN learns features that distinguish fractures from non-fractures by training a convolutional neural network exclusively on images sampled from the GAN and achieving an AUC of > 0.95 on a held-out set of real images. We conduct additional analysis of the images sampled from the GAN and describe ongoing work to validate educational efficacy.
          </div>
          <div id="div21" class="abstract">
            Generative adversarial networks (GANs) are a very cool technique that uses dualing neural networks to try to learn to approximate a data distribution. They have been used to create some beautiful images, but some researchers have current techniques don't appear to learn the full data distribution they're trying to approximate. As a researcher, this makes me nervous about using GAN-sampled images as an data substitutes in high-stakes clinical applications, but I'm still very interested in considering ways we could find productive uses for GANs while we're figuring out how to solve their limitations.
            <br>
            As a medical student on a radiology rotation, I spent many hours staring at websites like this <a href="http://www.chestx-ray.com/index.php/education/normal-cxr-module-train-your-eye#!1 ">this one</a>, trying to proverbially "train my eye." Note that the above tool--which I found indispensible--only shows 500 *normal* chest x-rays. This produced the mild epiphany that in radiology education, individual tools can be useful even if they just help students see many variants of a thin subset of the data distribution. What I wanted as a student was to pre-specify a combo of medical features, and then cook up an arbitrary number of slightly different images that had them all. In theory, a conditional GAN could do just this, with limited storage and no active link to hospital IT systems.
            <br>
            Back in grad school, I met Hyunkwang Lee during a class with Sasha Rush, and we decided to work on conditional image generation using GANs. We reached out to Luke Oakden-Rayner, who gave us access to an amazing dataset. We played around on this using the StackGAN framework during the class project, and have since extended it into this work using Progressive Growing of GANs.
          </div>
        </div>
      </li>
      <li>
        <img src="./assets/diffp.png" class="pub-image">
        <div class="pub-info">
          <h3>Privacy-Preserving Distributed Deep Learning for Clinical Data</h3>
          <p>Brett Beaulieu-Jones, William Yuan, <u>Samuel Finlayson</u>, Zhiwei Steven Wu <br>
            <i>Machine Learning for Health (NeurIPS Workshop)</i>, 2018 <br>
            [<a class="showSingle", target="20">Abstract</a>]
            [<a href="https://arxiv.org/abs/1812.01484">Paper</a>]
          </p>
          <div id="div20" class="abstract">
            Deep learning with medical data often requires larger samples sizes than are available at single providers. While data sharing among institutions is desirable to train more accurate and sophisticated models, it can lead to severe privacy concerns due the sensitive nature of the data. This problem has motivated a number of studies on distributed training of neural networks that do not require direct sharing of the training data. However, simple distributed training does not offer provable privacy guarantees to satisfy technical safe standards and may reveal information about the underlying patients. We present a method to train neural networks for clinical data in a distributed fashion under differential privacy. We demonstrate these methods on two datasets that include information from multiple independent sites, the eICU collaborative Research Database and The Cancer Genome Atlas.
          </div>
        </div>
      </li>
       <li>
        <img src="./assets/microbiome/gurry_fig2.png" class="pub-image">
        <div class="pub-info">
          <h3>Predictability and persistence of prebiotic dietary supplementation in a healthy human cohort</h3>
          <p>Thomas Gurry, <u>HST Microbiome Consortium</u>, ... Eric Alm<br>
            <i>Scientific Reports</i>, 2018 <br>
            [<a class="showSingle", target="18">Abstract</a>]
            [<a href="https://www.nature.com/articles/s41598-018-30783-1">Paper</a>]
            [<a class="showSingle", target="19">Description/Backstory</a>]
          </p>
          <div id="div18" class="abstract">
            Dietary interventions to manipulate the human gut microbiome for improved health have received increasing attention. However, their design has been limited by a lack of understanding of the quantitative impact of diet on a host’s microbiota. We present a highly controlled diet perturbation experiment in a healthy, human cohort in which individual micronutrients are spiked in against a standardized background. We identify strong and predictable responses of specific microbes across participants consuming prebiotic spike-ins, at the level of both strains and functional genes, suggesting fine-scale resource partitioning in the human gut. No predictable responses to non-prebiotic micronutrients were found. Surprisingly, we did not observe decreases in day-to-day variability of the microbiota compared to a complex, varying diet, and instead found evidence of diet-induced stress and an associated loss of biodiversity. Our data offer insights into the effect of a low complexity diet on the gut microbiome, and suggest that effective personalized dietary interventions will rely on functional, strain-level characterization of a patient’s microbiota.
          </div>
          <div id="div19" class="abstract">
            This was a really fun project with a long story behind it. In brief, during the first year of medical school, some friends and I began wondering if we could execute a clinical experiment on ourselves. Inspired by the use of Soylent by some of our classmates -- and some reservations that we had about the quality of control arms in microbiome studies -- we came up with an idea: recruit about 50 graduate students, place them on an identical all-liquid diet, and run a seven-arm clinical experiment with 6 arms receiving a spike-in of a single macronutrient for the second half of the study period. Thus the "HST microbiome consortium" was born. As it turned out, Thomas Gurry and Eric Alm (researchers at MIT), had been thinking along similar lines as well. So we teamed up, ran the experiment together, and the result was this paper! We wrote a bit more on the backstory <a href="http://imes.mit.edu/microbiome-milkshakes-the-story-behind-the-study/">here</a>.
          </div>
        </div>
      </li>
<!--
      <li>
        <img src="./assets/adversarial_examples_paper/taxonomy.png" class="pub-image">
        <div class="pub-info">
          <h3>Adversarial Attacks Against Medical Deep Learning Systems</h3>
          <p><u>Samuel Finlayson</u>, Hyung Won Chung, Isaac Kohane, Andrew Beam<br>
            <i>Preprint</i>, 2018 <br>
            [<a class="showSingle", target="17">Abstract</a>]
            [<a href="https://arxiv.org/abs/1804.05296">Paper</a>]
            [<a href="https://github.com/sgfin/adversarial-medicine">GitHub</a>]
            [<a href="https://spectrum.ieee.org/the-human-os/biomedical/imaging/medical-imaging-ai-software-vulnerable-to-covert-attacks">IEE</a>]
          </p>
          <div id="div17" class="abstract">
            The discovery of adversarial examples has raised concerns about the practical deployment of deep learning systems. In this paper, we demonstrate that adversarial examples are capable of manipulating deep learning systems across three clinical domains. For each of our representative medical deep learning classifiers, both white and black box attacks were highly successful. Our models are representative of the current state of the art in medical computer vision and, in some cases, directly reflect architectures already seeing deployment in real world clinical settings. In addition to the technical contribution of our paper, we synthesize a large body of knowledge about the healthcare system to argue that medicine may be uniquely susceptible to adversarial attacks, both in terms of monetary incentives and technical vulnerability. To this end, we outline the healthcare economy and the incentives it creates for fraud and provide concrete examples of how and why such attacks could be realistically carried out. We urge practitioners to be aware of current vulnerabilities when deploying deep learning systems in clinical settings, and encourage the machine learning community to further investigate the domain-specific characteristics of medical learning system
          </div>
        </div>
      </li>
    -->
      <li>
        <img src="./static/m_ofx27002-fig1.jpeg" class="pub-image">
        <div class="pub-info">
          <h3>Potential Adverse Effects of Broad-Spectrum Antimicrobial Exposure in the Intensive Care Unit</h3>
          <p>Jenna Wiens*, Graham Snyder*, <u>Samuel Finlayson</u>, Monica Majoney, Leo Celi<br>
            <i>Open Forum Infectious Diseases</i>, 2018 <br>
            [<a class="showSingle", target="16">Abstract</a>]
            [<a href="https://academic.oup.com/ofid/article/5/2/ofx270/4762255">Paper</a>]
          </p>
          <div id="div16" class="abstract">
            <u>Background:</u> The potential adverse effects of empiric broad-spectrum antimicrobial use among patients with suspected but subsequently excluded infection have not been fully characterized. We sought novel methods to quantify the risk of adverse effects of broad-spectrum antimicrobial exposure among patients admitted to an intensive care unit (ICU).
            <br>
            <u>Methods:</u> Among all adult patients admitted to ICUs at a single institution, we selected patients with negative blood cultures who also received ≥1 broad-spectrum antimicrobials. Broad-spectrum antimicrobials were categorized in ≥1 of 5 categories based on their spectrum of activity against potential pathogens. We performed, in serial, 5 cohort studies to measure the effect of each broad-spectrum category on patient outcomes. Exposed patients were defined as those receiving a specific category of broad-spectrum antimicrobial; nonexposed were all other patients in the cohort. The primary outcome was 30-day mortality. Secondary outcomes included length of hospital and ICU stay and nosocomial acquisition of antimicrobial-resistant bacteria (ARB) or Clostridium difficile within 30 days of admission.
            <br>
            <u>Results:</u>  Among the study cohort of 1918 patients, 316 (16.5%) died within 30 days, 821 (42.8%) had either a length of hospital stay >7 days or an ICU length of stay >3 days, and 106 (5.5%) acquired either a nosocomial ARB or C. difficile. The short-term use of broad-spectrum antimicrobials in any of the defined broad-spectrum categories was not significantly associated with either primary or secondary outcomes.
            <br>
            <u>Conclusions:</u> The prompt and brief empiric use of defined categories of broad-spectrum antimicrobials could not be associated with additional patient harm.
          </div>
        </div>
      </li>
      <li>
        <img src="./static/nihms756241f3-mrlu1.jpg" class="pub-image">
        <div class="pub-info">
          <h3>Toward rapid learning in cancer treatment selection: An interactive analytical engine for clinical oncology</h3>
          <p><u>Samuel Finlayson</u>, Mia Levy, Sunil Reddy, Daniel Rubin<br>
            <i>Journal of Biomedical Informatics</i>, 2016 <br>
            [<a class="showSingle", target="15">Abstract</a>]
            [<a href="https://www.ncbi.nlm.nih.gov/pubmed/26836975">Paper</a>]
            [<a href="https://github.com/sgfin/mrlu">GitHub</a>]
          </p>
          <div id="div15" class="abstract">
            <u>OBJECTIVE:</u> Wide-scale adoption of electronic medical records (EMRs) has created an unprecedented opportunity for the implementation of Rapid Learning Systems (RLSs) that leverage primary clinical data for real-time decision support. In cancer, where large variations among patient features leave gaps in traditional forms of medical evidence, the potential impact of a RLS is particularly promising. We developed the Melanoma Rapid Learning Utility (MRLU), a component of the RLS, providing an analytical engine and user interface that enables physicians to gain clinical insights by rapidly identifying and analyzing cohorts of patients similar to their own.
            <br>
            <u>MATERIALS AND METHODS:</u> A new approach for clinical decision support in Melanoma was developed and implemented, in which patient-centered cohorts are generated from practice-based evidence and used to power on-the-fly stratified survival analyses. A database to underlie the system was generated from clinical, pharmaceutical, and molecular data from 237 patients with metastatic melanoma from two academic medical centers. The system was assessed in two ways: (1) ability to rediscover known knowledge and (2) potential clinical utility and usability through a user study of 13 practicing oncologists.
            <br>
            <u>RESULTS:</u> The MRLU enables physician-driven cohort selection and stratified survival analysis. The system successfully identified several known clinical trends in melanoma, including frequency of BRAF mutations, survival rate of patients with BRAF mutant tumors in response to BRAF inhibitor therapy, and sex-based trends in prevalence and survival. Surveyed physician users expressed great interest in using such on-the-fly evidence systems in practice (mean response from relevant survey questions 4.54/5.0), and generally found the MRLU in particular to be both useful (mean score 4.2/5.0) and useable (4.42/5.0).
            <br>
            <u>DISCUSSION:</u> The MRLU is an RLS analytical engine and user interface for Melanoma treatment planning that presents design principles useful in building RLSs. Further research is necessary to evaluate when and how to best use this functionality within the EMR clinical workflow for guiding clinical decision making.
            <br>
            <u>CONCLUSION:</u> The MRLU is an important component in building a RLS for data driven precision medicine in Melanoma treatment that could be generalized to other clinical disorders.
          </div>
        </div>
      </li>
      <li>
        <img src="./static/tamang_unplanned.png" class="pub-image">
        <div class="pub-info">
          <h3>Detecting Unplanned Care From Clinician Notes in Electronic Health Records</h3>
          <p>Suzanne Tamang, Manali Patel, Douglas Blayney, Julie Kuznetsov, <u>Samuel Finlayson</u>, ... Nigam Shah<br>
            <i>Journal of Oncology Practice</i>, 2015 <br>
            [<a class="showSingle", target="12">Abstract</a>]
            [<a href="http://ascopubs.org/doi/pdfdirect/10.1200/JOP.2014.002741">Paper</a>]
          </p>
          <div id="div12" class="abstract">
            <u>Purpose:</u> Reduction in unplanned episodes of care, such as emergency department visits and unplanned hospitalizations, are important quality outcome measures. However, many events are only documented in free-text clinician notes and are labor intensive to detect by manual medical record review.
            <br>
            <u>Methods:</u> We studied 308,096 free-text machine-readable documents linked to individual entries in our electronic health records, representing care for patients with breast, GI, or thoracic cancer, whose treatment was initiated at one academic medical center, Stanford Health Care (SHC). Using a clinical text-mining tool, we detected unplanned episodes documented in clinician notes (for non-SHC visits) or in coded encounter data for SHC-delivered care and the most frequent symptoms documented in emergency department (ED) notes.
            <br>
            <u>Results:</u> Combined reporting increased the identification of patients with one or more unplanned care visits by 32% (15% using coded data; 20% using all the data) among patients with 3 months of follow-up and by 21% (23% using coded data; 28% using all the data) among those with 1 year of follow-up. Based on the textual analysis of SHC ED notes, pain (75%), followed by nausea (54%), vomiting (47%), infection (36%), fever (28%), and anemia (27%), were the most frequent symptoms mentioned. Pain, nausea, and vomiting co-occur in 35% of all ED encounter notes.
            <br>
            <u>Conclusion:</u> The text-mining methods we describe can be applied to automatically review free-text clinician notes to detect unplanned episodes of care mentioned in these notes. These methods have broad application for quality improvement efforts in which events of interest occur outside of a network that allows for patient data sharing.
          </div>
        </div>
      </li>
      <li>
        <img src="./static/sdata201432-f1.jpg" class="pub-image">
        <div class="pub-info">
          <h3>Building the Graph of Medicine from Millions of Clinical Narratives</h3>
          <p><u>Samuel Finlayson</u>, Paea LePendu, Nigam Shah<br>
            <i>Scientific Data</i>, 2014 <br>
            [<a class="showSingle", target="14">Abstract</a>]
            [<a href="https://www.nature.com/articles/sdata201432">Paper</a>]
            [<a href="https://datadryad.org/resource/doi:10.5061/dryad.jp917">Data</a>]
          </p>
          <div id="div14" class="abstract">
            Electronic health records (EHR) represent a rich and relatively untapped resource for characterizing the true nature of clinical practice and for quantifying the degree of inter-relatedness of medical entities such as drugs, diseases, procedures and devices. We provide a unique set of co-occurrence matrices, quantifying the pairwise mentions of 3 million terms mapped onto 1 million clinical concepts, calculated from the raw text of 20 million clinical notes spanning 19 years of data. Co-frequencies were computed by means of a parallelized annotation, hashing, and counting pipeline that was applied over clinical notes from Stanford Hospitals and Clinics. The co-occurrence matrix quantifies the relatedness among medical concepts which can serve as the basis for many statistical tests, and can be used to directly compute Bayesian conditional probabilities, association rules, as well as a range of test statistics such as relative risks and odds ratios. This dataset can be leveraged to quantitatively assess comorbidity, drug-drug, and drug-disease patterns for a range of clinical, epidemiological, and financial applications.
          </div>
        </div>
      </li>


    </ul> <!-- end publication list! -->
    </div> <!-- end div#pub -->

    </div> <!-- publications storey row -->

</div> <!-- #container -->

</body>


<script>
$('#email-button').click(function() {
  $('#email-div').toggle('slow', function() {
    // Animation complete.
  });
});
</script>

<script>

$( "div.pub-info" ).click(function(event) {
    $(this).toggleClass("pub-full");
});
$( "div.pub-info" ).hover(function(event) {
    $(this).toggleClass("pub-full");
});
</script>

</html>