Binary_classification_phase_separation/a-few-examples.html at master · rafael-a-monteiro-math/Binary_classification_phase_separation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
<!DOCTYPE html>
<html lang="" xml:lang="">
<head>

  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <title> 2 Nonlinear diffusion equations: a numerical example | Binary classification as a phase separation process - a short tutorial</title>
  <meta name="description" content="This is a minimal example of using the bookdown package to write a book. The output format for this example is bookdown::gitbook." />
  <meta name="generator" content="bookdown 0.20 and GitBook 2.6.7" />

  <meta property="og:title" content=" 2 Nonlinear diffusion equations: a numerical example | Binary classification as a phase separation process - a short tutorial" />
  <meta property="og:type" content="book" />


  <meta property="og:description" content="This is a minimal example of using the bookdown package to write a book. The output format for this example is bookdown::gitbook." />


  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content=" 2 Nonlinear diffusion equations: a numerical example | Binary classification as a phase separation process - a short tutorial" />

  <meta name="twitter:description" content="This is a minimal example of using the bookdown package to write a book. The output format for this example is bookdown::gitbook." />


<meta name="date" content="2021-09-27" />

  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <meta name="apple-mobile-web-app-capable" content="yes" />
  <meta name="apple-mobile-web-app-status-bar-style" content="black" />


<link rel="prev" href="index.html"/>
<link rel="next" href="sec-PSBC.html"/>
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />


<script src="libs/accessible-code-block-0.0.1/empty-anchor.js"></script>


<link rel="stylesheet" href="style.css" type="text/css" />
</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li><a href="./">Binary classification as a phase separation process</a></li>

<li class="divider"></li>
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Introduction</a></li>
<li class="chapter" data-level="2" data-path="a-few-examples.html"><a href="a-few-examples.html"><i class="fa fa-check"></i><b>2</b> Nonlinear diffusion equations: a numerical example</a><ul>
<li class="chapter" data-level="2.1" data-path="a-few-examples.html"><a href="a-few-examples.html#propagation-with-randomly-generated-coefficients"><i class="fa fa-check"></i><b>2.1</b> Propagation with randomly generated coefficients</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="sec-PSBC.html"><a href="sec-PSBC.html"><i class="fa fa-check"></i><b>3</b> A glimpse at the PSBC model</a></li>
<li class="chapter" data-level="4" data-path="sec-mnist.html"><a href="sec-mnist.html"><i class="fa fa-check"></i><b>4</b> Applying the PSBC to pairs of digits of the MNIST database</a><ul>
<li class="chapter" data-level="4.1" data-path="sec-mnist.html"><a href="sec-mnist.html#multiclass-classification-using-the-psbc"><i class="fa fa-check"></i><b>4.1</b> Multiclass classification using the PSBC</a><ul>
<li class="chapter" data-level="4.1.1" data-path="sec-mnist.html"><a href="sec-mnist.html#how-committees-vote"><i class="fa fa-check"></i><b>4.1.1</b> How committees vote</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="5" data-path="the-phase-separation-binary-classifier-where-to-read-more-about-it.html"><a href="the-phase-separation-binary-classifier-where-to-read-more-about-it.html"><i class="fa fa-check"></i><b>5</b> The Phase Separation Binary Classifier: where to read more about it</a></li>
<li class="divider"></li>
<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>
<li><a href="https://sites.google.com/view/rafaelmonteiro-math/home" target="blank">Rafael Monteiro's website</a></li>

</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Binary classification as a phase separation process - a short tutorial</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="a_few_examples" class="section level1">
<h1><span class="header-section-number"> 2</span> Nonlinear diffusion equations: a numerical example</h1>
<p>As discussed in Section 1.1 in the paper, the nonlinear diffusion processes are in the backdrop of this model. The heart of the model is the Allen-Cahn equation <span class="citation">(Fife <a href="#ref-Fife" role="doc-biblioref">1979</a>)</span>,<span class="citation">(Aronson and Weinberger <a href="#ref-Ar_Wein" role="doc-biblioref">1978</a>)</span>,<span class="citation">(Allen and Cahn <a href="#ref-allen1979microscopic" role="doc-biblioref">1979</a>)</span>, a well-known equation in the field of pattern formation. Just to show how part of the PSBC’s implementation can be used to evolve an initial boundary value problem with Neumann boundary conditions: we shall take</p>
<p><span class="math display" id="eq:1">\[\begin{equation}
u_0(x) = \frac{1- \sin(\pi(2x - 1))}{2}\tag{2.1}
\end{equation}\]</span></p>
<p>as an initial condition to the Allen-Cahn equation</p>
<p><span class="math display" id="eq:2">\[\begin{equation}
\partial_tu(x, t) = \varepsilon^2 \partial_x^2u(x, t) + u(x, t)(1 -  u(x, t))(u(x, t) - \alpha(x)).\tag{2.2}
\end{equation}\]</span></p>
<p>The parameter <span class="math inline">\(\alpha(\cdot)\)</span> embodies medium heterogeneity. In the case shown here, we choose <span class="math inline">\(\alpha(x) = -2\)</span>, when <span class="math inline">\(x &lt;0.5\)</span>, and <span class="math inline">\(\alpha(x)\)</span> = 2, when <span class="math inline">\(x \geq 0.5\)</span>.</p>
<!-- Parameters to the model are assigned below: -->
<!-- ```python -->
<!-- Nu = 20 -->
<!-- x = np.linspace(0, 1, N, endpoint = True) -->
<!-- V_0 = 1/2 - 1/2 * np.reshape(np.sin(np.pi * (2 * x - 1)) , (-1,1)) -->
<!-- prop = Propagate() -->
<!-- dt, eps, Nx, Nt = 0.1, .3,  N, 400 -->
<!-- dx, ptt_cardnlty, weigths_k_sharing = x[1]-x[0], Nx, Nt -->
<!-- ``` -->
<!-- ```python -->
<!-- for i in range(param["Nt"]): param["alpha_x_t"][:,i] = -2 * (x < .5) + 2 * (x >= .5) -->
<!-- ``` -->
<!-- which we now run, using the numerical scheme (1.7a) in the paper. Recall that this step is the same as doing a forward propagation in a feed forward network: that's the reason why you see the method "prop.forward" in the code below. -->
<!-- ```python -->
<!-- flow, waterfall, time = prop.forward(V_0, param, waterfall_save = True , Flow_save = True) -->
<!-- time = np.arange(Nt + 1) -->
<!-- X, Y = np.meshgrid(x, time) -->
<!-- flow = np.squeeze(flow, axis = 1) -->
<!-- ``` -->
<!-- ```{python, eval=FALSE, echo=FALSE} -->
<!-- fig = plt.figure(figsize = (15,8)) -->
<!-- ax = fig.add_subplot(111, projection = '3d') -->
<!-- color = plt.cm.viridis(np.arange(N))  -->
<!-- surf = ax.plot_wireframe(X, Y, flow.T, rstride = 10, cstride = 1,\ -->
<!--                          alpha = None, antialiased = True, linewidth = 3) -->
<!-- ax.view_init(60, -40) -->
<!-- plt.draw() -->
<!-- surf.set_edgecolors(color) -->
<!-- ax.set_xlabel('X Label') -->
<!-- ax.set_ylabel('Y Label') -->
<!-- ax.tick_params(which = 'both', labelsize = 16) -->
<!-- ax.set_xlabel('x', size = 22, labelpad = 30) -->
<!-- ax.set_ylabel('t', size = 22, labelpad = 30) -->
<!-- ax.set_zlabel('u', size = 22, labelpad = 10) -->
<!-- ax.set_zlim([0,1]) -->
<!-- plt.show() -->
<!-- ``` -->
<p>When we plot the evolution of <span class="math inline">\(u_0(\cdot)\)</span> through the Allen-Cahn equation as a surface, we get the plot below.</p>
<div class="figure">
<img src="figures/output_11_0.png" alt="" />
<p class="caption">The initial condition <span class="math inline">\(u_0(\cdot)\)</span> shown in Equation <a href="a-few-examples.html#eq:1">(2.1)</a>, as it evolves according to the Allen-Cahn equation <a href="a-few-examples.html#eq:2">(2.2)</a>.</p>
</div>
<p>Actually, we shall mostly treat the coefficients of this PDE as trainable weights: parameters that we will tweak in order to get the desired asymptotic behavior and, in this way, do binary classification. This is what we explain next.</p>
<p>One of the first motivations to this project can be found in the interesting paper <span class="citation">(Angenent, Mallet-Paret, and Peletier <a href="#ref-Ang_Mal" role="doc-biblioref">1987</a>)</span>, where they have shown that several nontrivial layered patterns^{Stationary solutions to equation <a href="a-few-examples.html#eq:2">(2.2)</a> that, roughly speaking, gets “concentrated” around values 0 and 1, displaying layers in between these values.} can be found if <span class="math inline">\(\alpha(\cdot)\)</span> is non-homogeneous in space. There is an extensive discussion about why this is interesting, and we refer the reader to Section 1.1 in the paper.</p>
<blockquote>
<p><strong>Remark:</strong> You can read more about pattern formation in the book <span class="citation">(Nishiura <a href="#ref-Nishiura" role="doc-biblioref">2002</a>)</span> (Chapter 4.2 deals with the Allen-Cahn model), and also in the very nice article Arnd Scheel wrote to the Princeton Companion to Applied Mathematics, Section IV. 27 <span class="citation">(Dennis et al. <a href="#ref-Arnd_companion" role="doc-biblioref">2015</a>)</span>.</p>
</blockquote>
<div id="propagation-with-randomly-generated-coefficients" class="section level2">
<h2><span class="header-section-number">2.1</span> Propagation with randomly generated coefficients</h2>
<p>Next, for illustrative purposes, we would like to evolve and plot the evolution of several different initial conditions in the interval [0,1] (this is not shown in the paper, but the code can be found in version 1 of this project, in “<a href="https://github.com/rafael-a-monteiro-math/Binary_classification_phase_separation/blob/master/PSBC_v1/Notebook_PSBC_examples.ipynb">Notebook_examples.ipynb</a>”). The model that we use is, initially, a discretization of <a href="a-few-examples.html#eq:2">(2.2)</a>, with <span class="math inline">\(\varepsilon = 0\)</span>, which then becomes an ODE:</p>
<p><span class="math display" id="eq:3">\[\begin{equation}
U^{[n+1]} = U^{[n]} + \Delta_t^{u}f(U^{[n]},\alpha^{[n]}),\tag{2.3}
\end{equation}\]</span></p>
<p>where <span class="math inline">\(f(U^{[n]},\alpha^{[n]}):= U^{[n]}(1 - U^{[n]})(U^{[n]} - \alpha^{[n]} )\)</span>.</p>
<p>It is good to have in mind that the coefficients in the above ODE will play the role of trainable weights in Machine Learning: we will “adjust” the coefficients in <span class="math inline">\(\alpha(\cdot)\)</span> in order to achieve a certain final, target end state.</p>
<p>As mentioned earlier, there is a clear correspondence between the initial value (ODE/PDE) problem and forward propagation and, consequently, the stability of <a href="a-few-examples.html#eq:3">(2.3)</a> has to be considered. The discretization it presents is known as (explicit) Euler method, which is known to be (linearly) unstable in many cases. A good part of the paper was devoted to showing that there is some kind of <em>nonlinear stabilization</em> mechanism that prevents solutions from blowing up, a condition referred to as <em>Invariant Region Enforcing Condition</em>, which establishes a critical threshold for the size of <span class="math inline">\(\Delta_t^u\)</span>, beyond which solutions can blow up.</p>
<!-- This is discussed at lenght in the [paper](https://arxiv.org/abs/2009.02467). -->
<p>To get this critical value for <span class="math inline">\(\Delta_t^u\)</span> it is necessary to quantify</p>
<p><span class="math display" id="eq:boundedness">\[\begin{equation}
\max_{1\leq k \leq \mathrm{N_t}}\max\{1,\vert \alpha^{[k]}\vert\} &lt;+\infty,\tag{2.4}
\end{equation}\]</span></p>
<p>based on which we adjust the parameter <span class="math inline">\(\Delta_t^u\)</span> accordingly, in a nontrivial way.<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a> In result, the evolution of <span class="math inline">\(U^{[\cdot]}\)</span> does not end up in a floating point overflow (in other words, a blow up in <span class="math inline">\(\ell^{\infty}\)</span> norm).</p>
<p>We set up a little experiment, where in some we obey the <em>Invariant Region Enforcing Condition</em>, and in some we don’t. We take several initial condition on the interval <span class="math inline">\([0,1]\)</span> and evolving them according to <a href="a-few-examples.html#eq:3">(2.3)</a>.</p>
<!-- Parameters are as follows: -->
<!-- ```python -->
<!-- N = 1 -->
<!-- init = Initialize_parameters() -->
<!-- prop = Propagate() -->
<!-- dt_vec = np.array([.1,.3,.57,1.5,3,4]) -->
<!-- dt, eps, Nx, Nt, dx = .1, 0, N, 20, 1 -->
<!-- ptt_cardnlty, weights_k_sharing = Nx, Nt -->
<!-- ``` -->
<p>As discussed in the Appendix A in the paper, the PSBC randomly initializes trainable weights coefficients as realizations of a Normal random variable with average 0.5 and variance 0.1. We set them as uniform random variables in the interval [0,1], which implies that the left hand side of <a href="a-few-examples.html#eq:boundedness">(2.4)</a> is bounded by 1.</p>
<!-- ```python -->
<!-- param = init.dictionary(N, eps, dt, dx, Nt, ptt_cardnlty, weights_k_sharing) -->
<!-- for i in range(param["Nt"]): param["alpha_x_t"][:,i] = np.random.uniform(0,1) -->
<!-- n_points = 10     -->
<!-- V_0 = np.reshape(1/n_points * np.arange(0, n_points + 1), (1, -1)) -->
<!-- flow, waterfall, time = prop.forward(V_0, param, waterfall_save = True , Flow_save = True) -->
<!-- ``` -->
<p>We obtain the following figure.</p>
<div class="figure">
<img src="figures/output_16_0.png" alt="" />
<p class="caption">The evolution of initial conditions in the interval <span class="math inline">\([0,1]\)</span> through the Allen-Cahn-equation <a href="a-few-examples.html#eq:2">(2.2)</a> with <span class="math inline">\(\varepsilon = 0\)</span> (that is, an ODE) in 1D, with coefficients given by random coefficients generated as an uniform distribution in the interval <span class="math inline">\([0,1]\)</span>.</p>
</div>
<p>In this figure we can observe that for small values of <span class="math inline">\(\mathrm{\Delta_{t}^u}\)</span> the orbits obey some monotonic behavior. Of course, existence and uniqueness theorems in ODE guarantee that at the continuum level, but translating such property to the discrete model is far different: in fact, in many cases this property do not hold. The reasoning behind the existence of critical values of <span class="math inline">\(\mathrm{\Delta_{t}^u}\)</span> under which the solution is “well behaved” (that is, the solution is always bounded) goes back to the idea of <em>Invariant regions</em>, exploited extensively in PDEs: we refer the reader to Chapter 14 in <span class="citation">(Smoller <a href="#ref-Smoller" role="doc-biblioref">1994</a>)</span>; if you want to see how it applies in the discrete setting, especially in finite-difference schemes for reaction diffusion models, see <span class="citation">(Hoff <a href="#ref-Hoff" role="doc-biblioref">1978</a>)</span> and the Appendix C of the <a href="https://arxiv.org/abs/2009.02467">paper</a>).</p>
<blockquote>
<p><strong>Remark:</strong> in spite of being a Recursive Neural Network (RNN) type of model, there are considerable differences. Indeed, RNN’s parameters are adjusted with the unique intention of optimizing a cost function, whereas in the PSBC we adjust <span class="math inline">\(\mathrm{\Delta_{t}^u}\)</span> in order to tame the growth of evolved features through forward propagation. We address the similarities in Section 4 of the <a href="https://arxiv.org/abs/2009.02467">paper</a>.</p>
</blockquote>

</div>
</div>
<h3>References</h3>
<div id="refs" class="references">
<div id="ref-allen1979microscopic">
<p>Allen, Samuel M, and John W Cahn. 1979. “A Microscopic Theory for Antiphase Boundary Motion and Its Application to Antiphase Domain Coarsening.” <em>Acta Metallurgica</em> 27 (6): 1085–95.</p>
</div>
<div id="ref-Ang_Mal">
<p>Angenent, S. B., J. Mallet-Paret, and L. A. Peletier. 1987. “Stable Transition Layers in a Semilinear Boundary Value Problem.” <em>J. Differential Equations</em> 67 (2): 212–42. <a href="https://doi.org/10.1016/0022-0396(87)90147-1">https://doi.org/10.1016/0022-0396(87)90147-1</a>.</p>
</div>
<div id="ref-Ar_Wein">
<p>Aronson, D. G., and H. F. Weinberger. 1978. “Multidimensional Nonlinear Diffusion Arising in Population Genetics.” <em>Adv. In Math.</em> 30 (1): 33–76. <a href="https://doi.org/10.1016/0001-8708(78)90130-5">https://doi.org/10.1016/0001-8708(78)90130-5</a>.</p>
</div>
<div id="ref-Arnd_companion">
<p>Dennis, Mark R., Paul Glendinning, Paul A. Martin, Fadil Santosa, and Jared Tanner, eds. 2015. <em>The Princeton Companion to Applied Mathematics</em>. Princeton University Press, Princeton, NJ. <a href="https://doi.org/10.1515/9781400874477">https://doi.org/10.1515/9781400874477</a>.</p>
</div>
<div id="ref-Fife">
<p>Fife, Paul C. 1979. <em>Mathematical Aspects of Reacting and Diffusing Systems</em>. Vol. 28. Lecture Notes in Biomathematics. Springer-Verlag, Berlin-New York.</p>
</div>
<div id="ref-Hoff">
<p>Hoff, David. 1978. “Stability and Convergence of Finite Difference Methods for Systems of Nonlinear Reaction-Diffusion Equations.” <em>SIAM J. Numer. Anal.</em> 15 (6): 1161–77. <a href="https://doi.org/10.1137/0715077">https://doi.org/10.1137/0715077</a>.</p>
</div>
<div id="ref-Nishiura">
<p>Nishiura, Yasumasa. 2002. <em>Far-from-Equilibrium Dynamics</em>. Vol. 209. Translations of Mathematical Monographs. American Mathematical Society, Providence, RI.</p>
</div>
<div id="ref-Smoller">
<p>Smoller, Joel. 1994. <em>Shock Waves and Reaction-Diffusion Equations</em>. Second. Vol. 258. Grundlehren Der Mathematischen Wissenschaften [Fundamental Principles of Mathematical Sciences]. Springer-Verlag, New York. <a href="https://doi.org/10.1007/978-1-4612-0873-0">https://doi.org/10.1007/978-1-4612-0873-0</a>.</p>
</div>
</div>
<div class="footnotes">
<hr />
<ol start="1">
<li id="fn1"><p>Because, of course, <span class="math inline">\(\Delta_t^u =0\)</span> also does the job, but does not deliver what we want.<a href="a-few-examples.html#fnref1" class="footnote-back">↩︎</a></p></li>
</ol>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="index.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="sec-PSBC.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": null,
"text": null
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": ["Website.pdf"],
"toc": {
"collapse": "subsection"
}
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "true";
    if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:")
      if (/^https?:/.test(src))
        src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>