diff --git a/README.md b/README.md
index f7d1f5f..f6a9d1d 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 This benchmark is about reading pure PDF files - notscanned documents and not documents that applied OCR.
 
 ## Benchmarking machine
- Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
+ Intel(R) Core(TM) i7 CPU         860  @ 2.80GHz
 
 ## Input Documents
 | #  |                                               Name                                               | File Size | Pages |
@@ -18,73 +18,77 @@ This benchmark is about reading pure PDF files - notscanned documents and not do
 |  9 | [2201.00201](https://arxiv.org/pdf/2201.00201.pdf)                                               |    1.3MiB |     9 |
 | 10 | [1602.06541](https://arxiv.org/pdf/1602.06541.pdf)                                               |    2.9MiB |    16 |
 | 11 | [2201.00200](https://arxiv.org/pdf/2201.00200.pdf)                                               |  284.8KiB |     7 |
-| 12 | [2201.00022](https://arxiv.org/pdf/2201.00022.pdf)                                               |    1.1MiB |    11 |
+| 12 | [2201.00022](https://arxiv.org/pdf/2201.00022.pdf)                                               |    1.2MiB |    14 |
 | 13 | [2201.00029](https://arxiv.org/pdf/2201.00029.pdf)                                               |  797.6KiB |    12 |
 | 14 | [1601.03642](https://arxiv.org/pdf/1601.03642.pdf)                                               | 1004.9KiB |     8 |
 
 ## Libraries
 |     Name     | Last PyPI Release |             License             | Version  |                       Dependencies                        |
 | -----------: | :---------------- | ------------------------------: | -------: | :-------------------------------------------------------- |
-|         Borb | 2023-06-23        |                 AGPL/Commercial |   2.1.16 |                                                           |
-|    pypdfium2 | 2023-07-04        |      Apache-2.0 or BSD-3-Clause |   4.18.0 | PDFium (Foxit/Google)                                     |
-| pdfminer.six | 2022-11-05        |                           MIT/X | 20221105 |                                                           |
-|   pdfplumber | 2023-07-29        |                             MIT |   0.10.2 | pdfminer.six                                              |
+|         Borb | 2024-08-03        |                 AGPL/Commercial |   2.1.16 |                                                           |
+|    pypdfium2 | 2024-12-19        |      Apache-2.0 or BSD-3-Clause |   4.30.1 | PDFium (Foxit/Google)                                     |
+| pdfminer.six | 2024-07-06        |                           MIT/X | 20250327 |                                                           |
+|   pdfplumber | 2025-01-01        |                             MIT |   0.11.6 | pdfminer.six                                              |
 |        pdfrw | 2017-09-18        |                             MIT |      0.4 |                                                           |
 |    pdftotext | -                 |                             GPL |   0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev |
-|      PyMuPDF | 2023-08-24        | GNU AFFERO GPL 3.0 / Commerical |   1.23.1 | MuPDF                                                     |
-|        pypdf | 2023-08-26        |                    BSD 3-Clause |   3.15.4 |                                                           |
-|         Tika | 2023-01-01        |                       Apache v2 |    2.6.0 | Apache Tika                                               |
+|        playa | 2025-02-20        |                             MIT |    0.5.0 |                                                           |
+|      PyMuPDF | 2025-02-06        | GNU AFFERO GPL 3.0 / Commerical |   1.25.5 | MuPDF                                                     |
+|        pypdf | 2025-02-09        |                    BSD 3-Clause |    5.5.0 |                                                           |
+|         Tika | 2023-01-01        |                       Apache v2 |    3.1.0 | Apache Tika                                               |
 
 
 ## Text Extraction Speed
 
 | #  |                          Library                          | Average | [   1   ](https://arxiv.org/pdf/2201.00214.pdf) | [   2   ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [   3   ](https://arxiv.org/pdf/2201.00151.pdf) | [   4   ](https://arxiv.org/pdf/1707.09725.pdf) | [   5   ](https://arxiv.org/pdf/2201.00021.pdf) | [   6   ](https://arxiv.org/pdf/2201.00037.pdf) | [   7   ](https://arxiv.org/pdf/2201.00069.pdf) | [   8   ](https://arxiv.org/pdf/2201.00178.pdf) | [   9   ](https://arxiv.org/pdf/2201.00201.pdf) | [  10   ](https://arxiv.org/pdf/1602.06541.pdf) | [  11   ](https://arxiv.org/pdf/2201.00200.pdf) | [  12   ](https://arxiv.org/pdf/2201.00022.pdf) | [  13   ](https://arxiv.org/pdf/2201.00029.pdf) | [  14   ](https://arxiv.org/pdf/1601.03642.pdf) |
 | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- |
-| 1  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/)      |    0.1s | 0.4s                                            | 0.2s                                                                                        | 0.2s                                            | 0.2s                                            | 0.0s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            |
-| 2  | [pypdfium2      ](https://pypi.org/project/pypdfium2/)    |    0.2s | 1.9s                                            | 0.2s                                                                                        | 0.2s                                            | 0.2s                                            | 0.0s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.0s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            |
-| 3  | [pdftotext      ](https://poppler.freedesktop.org/)       |    0.3s | 0.8s                                            | 1.0s                                                                                        | 0.3s                                            | 0.8s                                            | 0.1s                                            | 0.2s                                            | 0.2s                                            | 0.1s                                            | 0.0s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            |
-| 4  | [Tika           ](https://pypi.org/project/tika/)         |    1.1s | 12.9s                                           | 0.9s                                                                                        | 0.6s                                            | 0.4s                                            | 0.1s                                            | 0.3s                                            | 0.2s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            |
-| 5  | [pypdf          ](https://pypi.org/project/pypdf/)        |    2.6s | 18.7s                                           | 4.8s                                                                                        | 5.3s                                            | 2.3s                                            | 0.7s                                            | 0.9s                                            | 0.4s                                            | 0.5s                                            | 0.3s                                            | 0.6s                                            | 0.5s                                            | 0.4s                                            | 0.4s                                            | 0.2s                                            |
-| 6  | [pdfminer.six   ](https://pypi.org/project/pdfminer.six/) |    4.5s | 26.0s                                           | 12.9s                                                                                       | 8.0s                                            | 4.6s                                            | 1.3s                                            | 2.1s                                            | 1.0s                                            | 1.2s                                            | 0.8s                                            | 1.5s                                            | 0.9s                                            | 0.9s                                            | 0.6s                                            | 0.6s                                            |
-| 7  | [pdfplumber     ](https://pypi.org/project/pdfplumber/)   |    6.7s | 41.7s                                           | 10.9s                                                                                       | 11.5s                                           | 8.4s                                            | 2.4s                                            | 4.3s                                            | 2.0s                                            | 1.9s                                            | 1.9s                                            | 2.7s                                            | 1.8s                                            | 1.7s                                            | 1.0s                                            | 1.2s                                            |
-| 8  | [Borb           ](https://pypi.org/project/borb/)         |   34.7s | 111.2s                                          | 105.0s                                                                                      | 1.4s                                            | 87.2s                                           | 21.1s                                           | 7.4s                                            | 83.5s                                           | 16.4s                                           | 20.3s                                           | 5.4s                                            | 3.4s                                            | 18.8s                                           | 3.2s                                            | 2.1s                                            |
+| 1  | [pypdfium2      ](https://pypi.org/project/pypdfium2/)    |    0.2s | 1.0s                                            | 0.3s                                                                                        | 0.2s                                            | 0.2s                                            | 0.0s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.0s                                            | 0.1s                                            | 0.0s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            |
+| 2  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/)      |    0.3s | 1.3s                                            | 0.4s                                                                                        | 0.7s                                            | 0.3s                                            | 0.1s                                            | 0.2s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            |
+| 3  | [pdftotext      ](https://poppler.freedesktop.org/)       |    0.3s | 1.0s                                            | 1.1s                                                                                        | 0.3s                                            | 0.8s                                            | 0.1s                                            | 0.3s                                            | 0.2s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.0s                                            | 0.1s                                            |
+| 4  | [playa          ](https://pypi.org/project/playa-pdf/)    |    2.5s | 17.1s                                           | 5.5s                                                                                        | 4.4s                                            | 2.4s                                            | 0.7s                                            | 1.2s                                            | 0.6s                                            | 0.6s                                            | 0.4s                                            | 0.7s                                            | 0.6s                                            | 0.6s                                            | 0.5s                                            | 0.3s                                            |
+| 5  | [pypdf          ](https://pypi.org/project/pypdf/)        |    4.1s | 28.6s                                           | 8.0s                                                                                        | 8.2s                                            | 4.0s                                            | 1.2s                                            | 1.8s                                            | 0.9s                                            | 0.8s                                            | 0.6s                                            | 1.0s                                            | 0.9s                                            | 0.8s                                            | 0.7s                                            | 0.4s                                            |
+| 6  | [pdfminer.six   ](https://pypi.org/project/pdfminer.six/) |    9.5s | 60.6s                                           | 24.4s                                                                                       | 18.6s                                           | 9.1s                                            | 2.4s                                            | 4.1s                                            | 1.8s                                            | 2.1s                                            | 1.4s                                            | 2.6s                                            | 1.8s                                            | 2.0s                                            | 1.1s                                            | 0.9s                                            |
+| 7  | [pdfplumber     ](https://pypi.org/project/pdfplumber/)   |   13.0s | 86.8s                                           | 22.2s                                                                                       | 24.0s                                           | 14.3s                                           | 4.0s                                            | 7.2s                                            | 3.3s                                            | 3.2s                                            | 2.9s                                            | 4.4s                                            | 3.2s                                            | 3.6s                                            | 1.8s                                            | 1.7s                                            |
+| 8  | [Tika           ](https://pypi.org/project/tika/)         |   23.7s | 14.1s                                           | 100.1s                                                                                      | 0.6s                                            | 23.4s                                           | 47.5s                                           | 48.3s                                           | 26.2s                                           | 34.6s                                           | 0.1s                                            | 13.2s                                           | 0.1s                                            | 24.0s                                           | 0.1s                                            | 0.1s                                            |
+| 9  | [Borb           ](https://pypi.org/project/borb/)         |   53.5s | 189.5s                                          | 151.8s                                                                                      | 2.3s                                            | 128.6s                                          | 34.0s                                           | 11.8s                                           | 118.7s                                          | 25.8s                                           | 31.9s                                           | 8.4s                                            | 5.8s                                            | 32.3s                                           | 5.0s                                            | 2.9s                                            |
 
 
 ## Image Extraction Speed
 
 | #  |                          Library                          | Average | [   1   ](https://arxiv.org/pdf/2201.00214.pdf) | [   2   ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [   3   ](https://arxiv.org/pdf/2201.00151.pdf) | [   4   ](https://arxiv.org/pdf/1707.09725.pdf) | [   5   ](https://arxiv.org/pdf/2201.00021.pdf) | [   6   ](https://arxiv.org/pdf/2201.00037.pdf) | [   7   ](https://arxiv.org/pdf/2201.00069.pdf) | [   8   ](https://arxiv.org/pdf/2201.00178.pdf) | [   9   ](https://arxiv.org/pdf/2201.00201.pdf) | [  10   ](https://arxiv.org/pdf/1602.06541.pdf) | [  11   ](https://arxiv.org/pdf/2201.00200.pdf) | [  12   ](https://arxiv.org/pdf/2201.00022.pdf) | [  13   ](https://arxiv.org/pdf/2201.00029.pdf) | [  14   ](https://arxiv.org/pdf/1601.03642.pdf) |
 | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- |
-| 1  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/)      |    0.5s | 0.3s                                            | 0.5s                                                                                        | 0.0s                                            | 1.7s                                            | 0.4s                                            | 0.0s                                            | 3.2s                                            | 0.4s                                            | 0.4s                                            | 0.1s                                            | 0.0s                                            | 0.3s                                            | 0.2s                                            | 0.0s                                            |
-| 2  | [pypdf          ](https://pypi.org/project/pypdf/)        |    2.8s | 16.4s                                           | 2.1s                                                                                        | 0.8s                                            | 9.2s                                            | 1.1s                                            | 0.0s                                            | 6.7s                                            | 0.9s                                            | 0.9s                                            | 0.4s                                            | 0.0s                                            | 0.7s                                            | 0.2s                                            | 0.1s                                            |
-| 3  | [pdfminer.six   ](https://pypi.org/project/pdfminer.six/) |    6.5s | 31.8s                                           | 13.7s                                                                                       | 9.2s                                            | 24.0s                                           | 1.5s                                            | 2.3s                                            | 1.5s                                            | 1.4s                                            | 0.9s                                            | 1.5s                                            | 0.9s                                            | 1.0s                                            | 0.6s                                            | 0.5s                                            |
+| 1  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/)      |    0.6s | 0.3s                                            | 0.7s                                                                                        | 0.0s                                            | 2.3s                                            | 0.6s                                            | 0.0s                                            | 3.3s                                            | 0.5s                                            | 0.5s                                            | 0.1s                                            | 0.0s                                            | 0.4s                                            | 0.3s                                            | 0.0s                                            |
+| 2  | [pypdfium2      ](https://pypi.org/project/pypdfium2/)    |    1.3s | 1.6s                                            | 2.3s                                                                                        | 0.1s                                            | 4.3s                                            | 1.1s                                            | 0.2s                                            | 5.7s                                            | 0.9s                                            | 0.8s                                            | 0.3s                                            | 0.0s                                            | 0.7s                                            | 0.3s                                            | 0.0s                                            |
+| 3  | [pypdf          ](https://pypi.org/project/pypdf/)        |    5.3s | 24.7s                                           | 7.0s                                                                                        | 6.7s                                            | 19.1s                                           | 1.6s                                            | 0.7s                                            | 7.7s                                            | 1.5s                                            | 1.6s                                            | 0.8s                                            | 0.2s                                            | 1.3s                                            | 0.3s                                            | 0.3s                                            |
+| 4  | [pdfminer.six   ](https://pypi.org/project/pdfminer.six/) |   12.2s | 72.4s                                           | 25.7s                                                                                       | 21.0s                                           | 30.1s                                           | 2.7s                                            | 4.3s                                            | 2.4s                                            | 2.3s                                            | 1.5s                                            | 2.7s                                            | 2.0s                                            | 2.1s                                            | 1.1s                                            | 0.9s                                            |
 
 
 ## Watermarking Speed
 
 | #  |                       Library                        | Average | [   1   ](https://arxiv.org/pdf/2201.00214.pdf) | [   2   ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [   3   ](https://arxiv.org/pdf/2201.00151.pdf) | [   4   ](https://arxiv.org/pdf/1707.09725.pdf) | [   5   ](https://arxiv.org/pdf/2201.00021.pdf) | [   6   ](https://arxiv.org/pdf/2201.00037.pdf) | [   7   ](https://arxiv.org/pdf/2201.00069.pdf) | [   8   ](https://arxiv.org/pdf/2201.00178.pdf) | [   9   ](https://arxiv.org/pdf/2201.00201.pdf) | [  10   ](https://arxiv.org/pdf/1602.06541.pdf) | [  11   ](https://arxiv.org/pdf/2201.00200.pdf) | [  12   ](https://arxiv.org/pdf/2201.00022.pdf) | [  13   ](https://arxiv.org/pdf/2201.00029.pdf) | [  14   ](https://arxiv.org/pdf/1601.03642.pdf) |
 | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- |
-| 1  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/) |    0.0s | 0.0s                                            | 0.1s                                                                                        | 0.0s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            | 0.0s                                            |
-| 2  | [pdfrw          ](https://pypi.org/project/pdfrw/)   |    0.1s | 0.0s                                            | 0.4s                                                                                        | 0.0s                                            | 0.3s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.0s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            |
-| 3  | [pypdf          ](https://pypi.org/project/pypdf/)   |    0.4s | 0.6s                                            | 1.7s                                                                                        | 0.4s                                            | 0.9s                                            | 0.2s                                            | 0.3s                                            | 0.4s                                            | 0.3s                                            | 0.2s                                            | 0.3s                                            | 0.1s                                            | 0.2s                                            | 0.0s                                            | 0.2s                                            |
+| 1  | [pdfrw          ](https://pypi.org/project/pdfrw/)   |    0.1s | 0.1s                                            | 0.6s                                                                                        | 0.1s                                            | 0.4s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.2s                                            | 0.0s                                            | 0.1s                                            |
+| 2  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/) |    0.2s | 0.5s                                            | 0.7s                                                                                        | 0.2s                                            | 0.5s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.1s                                            | 0.0s                                            | 0.1s                                            | 0.0s                                            | 0.0s                                            |
+| 3  | [pypdf          ](https://pypi.org/project/pypdf/)   |    0.6s | 0.7s                                            | 2.3s                                                                                        | 0.5s                                            | 1.8s                                            | 0.4s                                            | 0.6s                                            | 0.3s                                            | 0.5s                                            | 0.2s                                            | 0.6s                                            | 0.2s                                            | 0.6s                                            | 0.1s                                            | 0.1s                                            |
 
 
 ## Watermarking File Size
 
 | #  |                       Library                        | Average | [   1   ](https://arxiv.org/pdf/2201.00214.pdf) | [   2   ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [   3   ](https://arxiv.org/pdf/2201.00151.pdf) | [   4   ](https://arxiv.org/pdf/1707.09725.pdf) | [   5   ](https://arxiv.org/pdf/2201.00021.pdf) | [   6   ](https://arxiv.org/pdf/2201.00037.pdf) | [   7   ](https://arxiv.org/pdf/2201.00069.pdf) | [   8   ](https://arxiv.org/pdf/2201.00178.pdf) | [   9   ](https://arxiv.org/pdf/2201.00201.pdf) | [  10   ](https://arxiv.org/pdf/1602.06541.pdf) | [  11   ](https://arxiv.org/pdf/2201.00200.pdf) | [  12   ](https://arxiv.org/pdf/2201.00022.pdf) | [  13   ](https://arxiv.org/pdf/2201.00029.pdf) | [  14   ](https://arxiv.org/pdf/1601.03642.pdf) |
 | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- |
-| 1  | [pdfrw          ](https://pypi.org/project/pdfrw/)   | 3.4MB   | 2.5MB                                           | 5.7MB                                                                                       | 1.6MB                                           | 7.3MB                                           | 2.7MB                                           | 3.1MB                                           | 15.4MB                                          | 2.4MB                                           | 1.3MB                                           | 3.0MB                                           | 0.3MB                                           | 1.1MB                                           | 0.8MB                                           | 1.0MB                                           |
-| 2  | [pypdf          ](https://pypi.org/project/pypdf/)   | 3.5MB   | 2.5MB                                           | 5.7MB                                                                                       | 1.6MB                                           | 7.3MB                                           | 2.7MB                                           | 3.1MB                                           | 15.4MB                                          | 2.4MB                                           | 1.3MB                                           | 3.0MB                                           | 0.3MB                                           | 1.1MB                                           | 0.8MB                                           | 1.0MB                                           |
-| 3  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/) | 3.7MB   | 2.7MB                                           | 6.8MB                                                                                       | 1.7MB                                           | 8.5MB                                           | 2.8MB                                           | 3.4MB                                           | 15.5MB                                          | 2.5MB                                           | 1.4MB                                           | 3.2MB                                           | 0.3MB                                           | 1.2MB                                           | 0.9MB                                           | 1.1MB                                           |
+| 1  | [pypdf          ](https://pypi.org/project/pypdf/)   | 3.4MB   | 2.5MB                                           | 5.6MB                                                                                       | 1.6MB                                           | 7.2MB                                           | 2.7MB                                           | 3.1MB                                           | 15.4MB                                          | 2.4MB                                           | 1.3MB                                           | 3.0MB                                           | 0.3MB                                           | 1.2MB                                           | 0.8MB                                           | 1.0MB                                           |
+| 2  | [pdfrw          ](https://pypi.org/project/pdfrw/)   | 3.5MB   | 2.5MB                                           | 5.7MB                                                                                       | 1.6MB                                           | 7.3MB                                           | 2.7MB                                           | 3.1MB                                           | 15.4MB                                          | 2.4MB                                           | 1.3MB                                           | 3.0MB                                           | 0.3MB                                           | 1.2MB                                           | 0.8MB                                           | 1.0MB                                           |
+| 3  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/) | 3.7MB   | 2.7MB                                           | 6.9MB                                                                                       | 1.7MB                                           | 8.5MB                                           | 2.8MB                                           | 3.4MB                                           | 15.5MB                                          | 2.5MB                                           | 1.4MB                                           | 3.2MB                                           | 0.3MB                                           | 1.3MB                                           | 0.9MB                                           | 1.1MB                                           |
 
 ## Text Extraction Quality
 
 | #  |                          Library                          | Average | [   1   ](https://arxiv.org/pdf/2201.00214.pdf) | [   2   ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [   3   ](https://arxiv.org/pdf/2201.00151.pdf) | [   4   ](https://arxiv.org/pdf/1707.09725.pdf) | [   5   ](https://arxiv.org/pdf/2201.00021.pdf) | [   6   ](https://arxiv.org/pdf/2201.00037.pdf) | [   7   ](https://arxiv.org/pdf/2201.00069.pdf) | [   8   ](https://arxiv.org/pdf/2201.00178.pdf) | [   9   ](https://arxiv.org/pdf/2201.00201.pdf) | [  10   ](https://arxiv.org/pdf/1602.06541.pdf) | [  11   ](https://arxiv.org/pdf/2201.00200.pdf) | [  12   ](https://arxiv.org/pdf/2201.00022.pdf) | [  13   ](https://arxiv.org/pdf/2201.00029.pdf) | [  14   ](https://arxiv.org/pdf/1601.03642.pdf) |
 | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- |
-| 1  | [pypdfium2      ](https://pypi.org/project/pypdfium2/)    |  98%    |  99%                                            |  97%                                                                                        |  94%                                            |  99%                                            |  98%                                            |  96%                                            |  99%                                            |  98%                                            |  99%                                            |  99%                                            |  98%                                            |  98%                                            |  99%                                            |  99%                                            |
-| 2  | [pypdf          ](https://pypi.org/project/pypdf/)        |  97%    |  98%                                            |  93%                                                                                        |  94%                                            |  98%                                            |  98%                                            |  96%                                            |  97%                                            |  98%                                            |  99%                                            |  99%                                            |  98%                                            |  98%                                            |  98%                                            |  99%                                            |
-| 3  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/)      |  97%    |  98%                                            |  96%                                                                                        |  93%                                            |  97%                                            |  98%                                            |  96%                                            |  98%                                            |  98%                                            |  98%                                            |  98%                                            |  97%                                            |  97%                                            |  98%                                            |  99%                                            |
-| 4  | [Tika           ](https://pypi.org/project/tika/)         |  96%    |  99%                                            |  98%                                                                                        |  92%                                            |  97%                                            |  98%                                            |  96%                                            |  93%                                            |  97%                                            |  98%                                            |  93%                                            |  98%                                            |  93%                                            |  98%                                            |  96%                                            |
-| 5  | [pdftotext      ](https://poppler.freedesktop.org/)       |  93%    |  96%                                            |  93%                                                                                        |  91%                                            |  94%                                            |  92%                                            |  96%                                            |  96%                                            |  96%                                            |  97%                                            |  83%                                            |  94%                                            |  96%                                            |  96%                                            |  79%                                            |
-| 6  | [pdfminer.six   ](https://pypi.org/project/pdfminer.six/) |  90%    |  95%                                            |  79%                                                                                        |  86%                                            |  92%                                            |  86%                                            |  93%                                            |  95%                                            |  93%                                            |  92%                                            |  92%                                            |  93%                                            |  86%                                            |  98%                                            |  86%                                            |
-| 7  | [pdfplumber     ](https://pypi.org/project/pdfplumber/)   |  75%    |  94%                                            |  84%                                                                                        |  61%                                            |  97%                                            |  61%                                            |  93%                                            |  61%                                            |  89%                                            |  57%                                            |  59%                                            |  67%                                            |  59%                                            |  98%                                            |  67%                                            |
-| 8  | [Borb           ](https://pypi.org/project/borb/)         |  45%    |  70%                                            |  79%                                                                                        |   0%                                            |  40%                                            |  48%                                            |  92%                                            |   0%                                            |  64%                                            |  51%                                            |  41%                                            |  55%                                            |  43%                                            |   0%                                            |  53%                                            |
+| 1  | [pypdfium2      ](https://pypi.org/project/pypdfium2/)    |  97%    |  99%                                            |  97%                                                                                        |  94%                                            |  99%                                            |  98%                                            |  96%                                            |  99%                                            |  99%                                            |  99%                                            |  99%                                            |  98%                                            |  78%                                            |  99%                                            |  99%                                            |
+| 2  | [pypdf          ](https://pypi.org/project/pypdf/)        |  96%    |  99%                                            |  95%                                                                                        |  93%                                            |  98%                                            |  99%                                            |  96%                                            |  97%                                            |  99%                                            |  99%                                            |  99%                                            |  99%                                            |  78%                                            | 100%                                            |  99%                                            |
+| 3  | [playa          ](https://pypi.org/project/playa-pdf/)    |  96%    |  99%                                            |  95%                                                                                        |  94%                                            |  98%                                            |  98%                                            |  96%                                            |  98%                                            |  98%                                            |  99%                                            |  99%                                            |  99%                                            |  78%                                            |  98%                                            |  99%                                            |
+| 4  | [PyMuPDF        ](https://pypi.org/project/PyMuPDF/)      |  96%    |  98%                                            |  96%                                                                                        |  93%                                            |  97%                                            |  98%                                            |  95%                                            |  99%                                            |  98%                                            |  98%                                            |  98%                                            |  97%                                            |  77%                                            |  98%                                            |  99%                                            |
+| 5  | [pdfplumber     ](https://pypi.org/project/pdfplumber/)   |  93%    |  96%                                            |  88%                                                                                        |  88%                                            |  98%                                            |  92%                                            |  94%                                            |  93%                                            |  95%                                            |  93%                                            |  97%                                            |  94%                                            |  76%                                            |  99%                                            |  98%                                            |
+| 6  | [pdftotext      ](https://poppler.freedesktop.org/)       |  92%    |  96%                                            |  94%                                                                                        |  91%                                            |  95%                                            |  92%                                            |  96%                                            |  96%                                            |  96%                                            |  97%                                            |  83%                                            |  94%                                            |  77%                                            |  96%                                            |  79%                                            |
+| 7  | [pdfminer.six   ](https://pypi.org/project/pdfminer.six/) |  89%    |  95%                                            |  79%                                                                                        |  86%                                            |  92%                                            |  86%                                            |  93%                                            |  95%                                            |  93%                                            |  92%                                            |  92%                                            |  93%                                            |  71%                                            |  98%                                            |  86%                                            |
+| 8  | [Tika           ](https://pypi.org/project/tika/)         |  83%    |  99%                                            |   0%                                                                                        |  92%                                            |  95%                                            |  77%                                            |  86%                                            |  82%                                            |  82%                                            |  98%                                            |  88%                                            |  98%                                            |  67%                                            |  98%                                            |  96%                                            |
+| 9  | [Borb           ](https://pypi.org/project/borb/)         |  45%    |  70%                                            |  79%                                                                                        |   0%                                            |  40%                                            |  48%                                            |  92%                                            |   0%                                            |  64%                                            |  51%                                            |  41%                                            |  55%                                            |  41%                                            |   0%                                            |  53%                                            |
diff --git a/benchmark.py b/benchmark.py
index aa8ef82..c609257 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -15,6 +15,7 @@
 import pdfminer
 import pdfplumber
 import pdfrw
+import playa
 import pypdf
 import pypdfium2
 import tika
@@ -30,6 +31,7 @@
     pdfplubmer_get_text,
     pdfrw_watermarking,
     pdftotext_get_text,
+    playa_get_text,
     pymupdf_get_text,
     pymupdf_image_extraction,
     pymupdf_watermarking,
@@ -172,7 +174,7 @@ def write_single_result(
             version=pypdf.__version__,
             watermarking_function=pypdf_watermarking,
             license="BSD 3-Clause",
-            last_release_date="2023-08-26",
+            last_release_date="2025-02-09",
             image_extraction_function=pypdf_image_extraction,
         ),
         "pdfminer": Library(
@@ -182,7 +184,7 @@ def write_single_result(
             text_extraction_function=lambda n: pdfminder_extract_text(BytesIO(n)),
             version=pdfminer.__version__,
             license="MIT/X",
-            last_release_date="2022-11-05",
+            last_release_date="2024-07-06",
             image_extraction_function=pdfminer_image_extraction,
         ),
         "pdfplumber": Library(
@@ -192,7 +194,7 @@ def write_single_result(
             text_extraction_function=pdfplubmer_get_text,
             version=pdfplumber.__version__,
             license="MIT",
-            last_release_date="2023-07-29",
+            last_release_date="2025-01-01",
             dependencies="pdfminer.six",
         ),
         "pymupdf": Library(
@@ -205,7 +207,7 @@ def write_single_result(
             image_extraction_function=pymupdf_image_extraction,
             dependencies="MuPDF",
             license="GNU AFFERO GPL 3.0 / Commerical",
-            last_release_date="2023-08-24",
+            last_release_date="2025-02-06",
         ),
         "pdftotext": Library(
             "pdftotext",
@@ -226,7 +228,7 @@ def write_single_result(
             version="2.1.16",
             watermarking_function=None,
             license="AGPL/Commercial",
-            last_release_date="2023-06-23",
+            last_release_date="2024-08-03",
         ),
         "pdfium": Library(
             "pypdfium2",
@@ -237,7 +239,7 @@ def write_single_result(
             watermarking_function=None,
             image_extraction_function=pdfium_image_extraction,
             license="Apache-2.0 or BSD-3-Clause",
-            last_release_date="2023-07-04",
+            last_release_date="2024-12-19",
             dependencies="PDFium (Foxit/Google)",
         ),
         "pdfrw": Library(
@@ -251,5 +253,14 @@ def write_single_result(
             last_release_date="2017-09-18",
             dependencies="",
         ),
+        "playa": Library(
+            "playa",
+            "playa",
+            "https://pypi.org/project/playa-pdf/",
+            text_extraction_function=playa_get_text,
+            version=playa.__version__,
+            license="MIT",
+            last_release_date="2025-02-20",
+        ),
     }
     main(docs, libraries)
diff --git a/cache.json b/cache.json
index 521253c..91973c0 100644
--- a/cache.json
+++ b/cache.json
@@ -1,601 +1,675 @@
 {
     "benchmark_times": {
         "borb": {
-            "1601.03642": {
-                "read": 2.053179979324341
+            "2201.00214": {
+                "read": 189.48876190185547
             },
-            "1602.06541": {
-                "read": 5.41968560218811
+            "GeoTopo-book": {
+                "read": 151.83953523635864
+            },
+            "2201.00151": {
+                "read": 2.2889041900634766
             },
             "1707.09725": {
-                "read": 87.24084305763245
+                "read": 128.60176134109497
             },
             "2201.00021": {
-                "read": 21.095511198043823
-            },
-            "2201.00022": {
-                "read": 18.75901699066162
-            },
-            "2201.00029": {
-                "read": 3.2207601070404053
+                "read": 33.952091693878174
             },
             "2201.00037": {
-                "read": 7.43989634513855
+                "read": 11.81212306022644
             },
             "2201.00069": {
-                "read": 83.48481893539429
-            },
-            "2201.00151": {
-                "read": 1.3735122680664062
+                "read": 118.73726058006287
             },
             "2201.00178": {
-                "read": 16.361470699310303
+                "read": 25.80863618850708
+            },
+            "2201.00201": {
+                "read": 31.945479154586792
+            },
+            "1602.06541": {
+                "read": 8.378407716751099
             },
             "2201.00200": {
-                "read": 3.4077320098876953
+                "read": 5.813374042510986
             },
-            "2201.00201": {
-                "read": 20.287503480911255
+            "2201.00022": {
+                "read": 32.266849517822266
             },
-            "2201.00214": {
-                "read": 111.19087290763855
+            "2201.00029": {
+                "read": 5.010681629180908
             },
-            "GeoTopo-book": {
-                "read": 105.00666165351868
+            "1601.03642": {
+                "read": 2.859659194946289
             }
         },
         "pdfium": {
-            "1601.03642": {
-                "read": 0.026182889938354492
+            "2201.00214": {
+                "read": 1.0257675647735596,
+                "image_extraction": 1.6196339130401611
             },
-            "1602.06541": {
-                "read": 0.05526089668273926
+            "GeoTopo-book": {
+                "read": 0.3268585205078125,
+                "image_extraction": 2.269947052001953
+            },
+            "2201.00151": {
+                "read": 0.22581076622009277,
+                "image_extraction": 0.05360603332519531
             },
             "1707.09725": {
-                "read": 0.19156575202941895
+                "read": 0.22593140602111816,
+                "image_extraction": 4.305531740188599
             },
             "2201.00021": {
-                "read": 0.04799509048461914
-            },
-            "2201.00022": {
-                "read": 0.0395662784576416
-            },
-            "2201.00029": {
-                "read": 0.02202439308166504
+                "read": 0.04949045181274414,
+                "image_extraction": 1.0980193614959717
             },
             "2201.00037": {
-                "read": 0.10238933563232422
+                "read": 0.11467123031616211,
+                "image_extraction": 0.22275733947753906
             },
             "2201.00069": {
-                "read": 0.05537557601928711
-            },
-            "2201.00151": {
-                "read": 0.16690778732299805
+                "read": 0.0616908073425293,
+                "image_extraction": 5.682236671447754
             },
             "2201.00178": {
-                "read": 0.0502932071685791
+                "read": 0.05685162544250488,
+                "image_extraction": 0.8708248138427734
+            },
+            "2201.00201": {
+                "read": 0.04024791717529297,
+                "image_extraction": 0.8477842807769775
+            },
+            "1602.06541": {
+                "read": 0.06260371208190918,
+                "image_extraction": 0.3279297351837158
             },
             "2201.00200": {
-                "read": 0.030122041702270508
+                "read": 0.03628039360046387,
+                "image_extraction": 0.04786324501037598
             },
-            "2201.00201": {
-                "read": 0.03543281555175781
+            "2201.00022": {
+                "read": 0.054438114166259766,
+                "image_extraction": 0.686147928237915
             },
-            "2201.00214": {
-                "read": 1.9107882976531982
+            "2201.00029": {
+                "read": 0.02482748031616211,
+                "image_extraction": 0.3170650005340576
             },
-            "GeoTopo-book": {
-                "read": 0.24455952644348145
+            "1601.03642": {
+                "read": 0.027425289154052734,
+                "image_extraction": 0.03217649459838867
             }
         },
         "pdfminer": {
-            "1601.03642": {
-                "image_extraction": 0.5113904476165771,
-                "read": 0.5691556930541992
+            "2201.00214": {
+                "read": 60.60245084762573,
+                "image_extraction": 72.40467977523804
             },
-            "1602.06541": {
-                "image_extraction": 1.5237865447998047,
-                "read": 1.5191314220428467
+            "GeoTopo-book": {
+                "read": 24.425355911254883,
+                "image_extraction": 25.683483839035034
+            },
+            "2201.00151": {
+                "read": 18.612955808639526,
+                "image_extraction": 20.973793506622314
             },
             "1707.09725": {
-                "image_extraction": 23.978344202041626,
-                "read": 4.634711742401123
+                "read": 9.125925540924072,
+                "image_extraction": 30.07633948326111
             },
             "2201.00021": {
-                "image_extraction": 1.4553284645080566,
-                "read": 1.3015577793121338
-            },
-            "2201.00022": {
-                "image_extraction": 0.9866993427276611,
-                "read": 0.8724193572998047
-            },
-            "2201.00029": {
-                "image_extraction": 0.6153700351715088,
-                "read": 0.5640342235565186
+                "read": 2.3772432804107666,
+                "image_extraction": 2.6542723178863525
             },
             "2201.00037": {
-                "image_extraction": 2.2505035400390625,
-                "read": 2.127037763595581
+                "read": 4.093456506729126,
+                "image_extraction": 4.28325629234314
             },
             "2201.00069": {
-                "image_extraction": 1.5143694877624512,
-                "read": 0.9530880451202393
-            },
-            "2201.00151": {
-                "image_extraction": 9.24244236946106,
-                "read": 7.973270893096924
+                "read": 1.8156311511993408,
+                "image_extraction": 2.380213737487793
             },
             "2201.00178": {
-                "image_extraction": 1.3969669342041016,
-                "read": 1.2367455959320068
+                "read": 2.0833513736724854,
+                "image_extraction": 2.259970188140869
+            },
+            "2201.00201": {
+                "read": 1.3995592594146729,
+                "image_extraction": 1.5048816204071045
+            },
+            "1602.06541": {
+                "read": 2.6256520748138428,
+                "image_extraction": 2.7129485607147217
             },
             "2201.00200": {
-                "image_extraction": 0.9476666450500488,
-                "read": 0.9224035739898682
+                "read": 1.8143031597137451,
+                "image_extraction": 1.967012882232666
             },
-            "2201.00201": {
-                "image_extraction": 0.8980197906494141,
-                "read": 0.7907443046569824
+            "2201.00022": {
+                "read": 1.9715628623962402,
+                "image_extraction": 2.0709922313690186
             },
-            "2201.00214": {
-                "image_extraction": 31.77288317680359,
-                "read": 25.975944757461548
+            "2201.00029": {
+                "read": 1.0807011127471924,
+                "image_extraction": 1.1380336284637451
             },
-            "GeoTopo-book": {
-                "image_extraction": 13.708941459655762,
-                "read": 12.885846138000488
+            "1601.03642": {
+                "read": 0.932868480682373,
+                "image_extraction": 0.9043912887573242
             }
         },
         "pdfplumber": {
-            "1601.03642": {
-                "read": 1.150937557220459
+            "2201.00214": {
+                "read": 86.757169008255
             },
-            "1602.06541": {
-                "read": 2.7273688316345215
+            "GeoTopo-book": {
+                "read": 22.199340105056763
+            },
+            "2201.00151": {
+                "read": 24.04423952102661
             },
             "1707.09725": {
-                "read": 8.424808502197266
+                "read": 14.33962893486023
             },
             "2201.00021": {
-                "read": 2.4490716457366943
-            },
-            "2201.00022": {
-                "read": 1.7107877731323242
-            },
-            "2201.00029": {
-                "read": 1.0425853729248047
+                "read": 4.038215637207031
             },
             "2201.00037": {
-                "read": 4.317584991455078
+                "read": 7.1929755210876465
             },
             "2201.00069": {
-                "read": 2.015364170074463
-            },
-            "2201.00151": {
-                "read": 11.465008974075317
+                "read": 3.2600326538085938
             },
             "2201.00178": {
-                "read": 1.9202370643615723
+                "read": 3.171274185180664
+            },
+            "2201.00201": {
+                "read": 2.876185178756714
+            },
+            "1602.06541": {
+                "read": 4.379940986633301
             },
             "2201.00200": {
-                "read": 1.7538721561431885
+                "read": 3.1631577014923096
             },
-            "2201.00201": {
-                "read": 1.8756508827209473
+            "2201.00022": {
+                "read": 3.5800487995147705
             },
-            "2201.00214": {
-                "read": 41.712098360061646
+            "2201.00029": {
+                "read": 1.8037359714508057
             },
-            "GeoTopo-book": {
-                "read": 10.938571691513062
+            "1601.03642": {
+                "read": 1.7201035022735596
             }
         },
         "pdfrw": {
-            "1601.03642": {
-                "watermark": 0.036574363708496094
+            "2201.00214": {
+                "watermark": 0.06628918647766113
             },
-            "1602.06541": {
-                "watermark": 0.09327936172485352
+            "GeoTopo-book": {
+                "watermark": 0.5555062294006348
+            },
+            "2201.00151": {
+                "watermark": 0.05751514434814453
             },
             "1707.09725": {
-                "watermark": 0.27477574348449707
+                "watermark": 0.4335949420928955
             },
             "2201.00021": {
-                "watermark": 0.0827476978302002
-            },
-            "2201.00022": {
-                "watermark": 0.08185839653015137
-            },
-            "2201.00029": {
-                "watermark": 0.013228416442871094
+                "watermark": 0.11416792869567871
             },
             "2201.00037": {
-                "watermark": 0.053519248962402344
+                "watermark": 0.08194208145141602
             },
             "2201.00069": {
-                "watermark": 0.08611893653869629
-            },
-            "2201.00151": {
-                "watermark": 0.0400242805480957
+                "watermark": 0.12613558769226074
             },
             "2201.00178": {
-                "watermark": 0.0774388313293457
+                "watermark": 0.11217474937438965
+            },
+            "2201.00201": {
+                "watermark": 0.08137369155883789
+            },
+            "1602.06541": {
+                "watermark": 0.13975119590759277
             },
             "2201.00200": {
-                "watermark": 0.038611650466918945
+                "watermark": 0.05994772911071777
             },
-            "2201.00201": {
-                "watermark": 0.05524253845214844
+            "2201.00022": {
+                "watermark": 0.16331195831298828
             },
-            "2201.00214": {
-                "watermark": 0.0414888858795166
+            "2201.00029": {
+                "watermark": 0.02135634422302246
             },
-            "GeoTopo-book": {
-                "watermark": 0.35054945945739746
+            "1601.03642": {
+                "watermark": 0.05025434494018555
             }
         },
         "pdftotext": {
-            "1601.03642": {
-                "read": 0.04276108741760254
+            "2201.00214": {
+                "read": 0.983544111251831
             },
-            "1602.06541": {
-                "read": 0.12382841110229492
+            "GeoTopo-book": {
+                "read": 1.0938339233398438
+            },
+            "2201.00151": {
+                "read": 0.32129335403442383
             },
             "1707.09725": {
-                "read": 0.8157875537872314
+                "read": 0.828188419342041
             },
             "2201.00021": {
-                "read": 0.07499313354492188
+                "read": 0.09987092018127441
+            },
+            "2201.00037": {
+                "read": 0.25408291816711426
+            },
+            "2201.00069": {
+                "read": 0.2126762866973877
+            },
+            "2201.00178": {
+                "read": 0.14757943153381348
+            },
+            "2201.00201": {
+                "read": 0.07124114036560059
+            },
+            "1602.06541": {
+                "read": 0.12359809875488281
+            },
+            "2201.00200": {
+                "read": 0.07798433303833008
             },
             "2201.00022": {
-                "read": 0.07213330268859863
+                "read": 0.1180119514465332
             },
             "2201.00029": {
-                "read": 0.030113697052001953
+                "read": 0.049498558044433594
             },
-            "2201.00037": {
-                "read": 0.21310901641845703
+            "1601.03642": {
+                "read": 0.05458521842956543
+            }
+        },
+        "playa": {
+            "2201.00214": {
+                "read": 17.097771167755127
             },
-            "2201.00069": {
-                "read": 0.24145245552062988
+            "GeoTopo-book": {
+                "read": 5.519521951675415
             },
             "2201.00151": {
-                "read": 0.2548847198486328
+                "read": 4.4114460945129395
+            },
+            "1707.09725": {
+                "read": 2.4427313804626465
+            },
+            "2201.00021": {
+                "read": 0.7357511520385742
+            },
+            "2201.00037": {
+                "read": 1.1612508296966553
+            },
+            "2201.00069": {
+                "read": 0.6288008689880371
             },
             "2201.00178": {
-                "read": 0.12105298042297363
+                "read": 0.6197876930236816
+            },
+            "2201.00201": {
+                "read": 0.4411795139312744
+            },
+            "1602.06541": {
+                "read": 0.678673505783081
             },
             "2201.00200": {
-                "read": 0.052548885345458984
+                "read": 0.5687770843505859
             },
-            "2201.00201": {
-                "read": 0.04883241653442383
+            "2201.00022": {
+                "read": 0.5955770015716553
             },
-            "2201.00214": {
-                "read": 0.76462721824646
+            "2201.00029": {
+                "read": 0.47310757637023926
             },
-            "GeoTopo-book": {
-                "read": 1.0103209018707275
+            "1601.03642": {
+                "read": 0.27886438369750977
             }
         },
         "pymupdf": {
-            "1601.03642": {
-                "image_extraction": 0.0022954940795898438,
-                "read": 0.02459883689880371,
-                "watermark": 0.012035369873046875
+            "2201.00214": {
+                "read": 1.2560763359069824,
+                "watermark": 0.4893491268157959,
+                "image_extraction": 0.2912435531616211
             },
-            "1602.06541": {
-                "image_extraction": 0.08666229248046875,
-                "read": 0.04839634895324707,
-                "watermark": 0.027688026428222656
+            "GeoTopo-book": {
+                "read": 0.41425490379333496,
+                "watermark": 0.6550765037536621,
+                "image_extraction": 0.667384147644043
+            },
+            "2201.00151": {
+                "read": 0.6849265098571777,
+                "watermark": 0.20680928230285645,
+                "image_extraction": 0.0033898353576660156
             },
             "1707.09725": {
-                "image_extraction": 1.6604242324829102,
-                "read": 0.1567060947418213,
-                "watermark": 0.11259746551513672
+                "read": 0.31463146209716797,
+                "watermark": 0.5394175052642822,
+                "image_extraction": 2.269129753112793
             },
             "2201.00021": {
-                "image_extraction": 0.4480102062225342,
-                "read": 0.03892374038696289,
-                "watermark": 0.019346952438354492
-            },
-            "2201.00022": {
-                "image_extraction": 0.2813708782196045,
-                "read": 0.036031246185302734,
-                "watermark": 0.021375656127929688
-            },
-            "2201.00029": {
-                "image_extraction": 0.20908689498901367,
-                "read": 0.023555278778076172,
-                "watermark": 0.008962631225585938
+                "read": 0.12572789192199707,
+                "watermark": 0.08519124984741211,
+                "image_extraction": 0.6188168525695801
             },
             "2201.00037": {
-                "image_extraction": 0.0018870830535888672,
-                "read": 0.0897064208984375,
-                "watermark": 0.028612375259399414
+                "read": 0.2286970615386963,
+                "watermark": 0.14116668701171875,
+                "image_extraction": 0.003799915313720703
             },
             "2201.00069": {
-                "image_extraction": 3.1560046672821045,
-                "read": 0.03975987434387207,
-                "watermark": 0.02903914451599121
-            },
-            "2201.00151": {
-                "image_extraction": 0.0021486282348632812,
-                "read": 0.16691279411315918,
-                "watermark": 0.013991832733154297
+                "read": 0.0737600326538086,
+                "watermark": 0.10507345199584961,
+                "image_extraction": 3.3033792972564697
             },
             "2201.00178": {
-                "image_extraction": 0.3780343532562256,
-                "read": 0.04298090934753418,
-                "watermark": 0.02316737174987793
+                "read": 0.07315778732299805,
+                "watermark": 0.10161709785461426,
+                "image_extraction": 0.45417189598083496
+            },
+            "2201.00201": {
+                "read": 0.061261653900146484,
+                "watermark": 0.06329631805419922,
+                "image_extraction": 0.47312164306640625
+            },
+            "1602.06541": {
+                "read": 0.0915529727935791,
+                "watermark": 0.10732769966125488,
+                "image_extraction": 0.0887451171875
             },
             "2201.00200": {
-                "image_extraction": 0.001901388168334961,
-                "read": 0.028005599975585938,
-                "watermark": 0.01051187515258789
+                "read": 0.06952333450317383,
+                "watermark": 0.046387434005737305,
+                "image_extraction": 0.003120899200439453
             },
-            "2201.00201": {
-                "image_extraction": 0.3551938533782959,
-                "read": 0.031032323837280273,
-                "watermark": 0.015187978744506836
+            "2201.00022": {
+                "read": 0.09096360206604004,
+                "watermark": 0.10521316528320312,
+                "image_extraction": 0.3852880001068115
             },
-            "2201.00214": {
-                "image_extraction": 0.2810511589050293,
-                "read": 0.41155457496643066,
-                "watermark": 0.01828455924987793
+            "2201.00029": {
+                "read": 0.06443285942077637,
+                "watermark": 0.03647136688232422,
+                "image_extraction": 0.25585365295410156
             },
-            "GeoTopo-book": {
-                "image_extraction": 0.5082950592041016,
-                "read": 0.24191975593566895,
-                "watermark": 0.12025952339172363
+            "1601.03642": {
+                "read": 0.06111550331115723,
+                "watermark": 0.048032283782958984,
+                "image_extraction": 0.003945112228393555
             }
         },
         "pypdf": {
-            "1601.03642": {
-                "image_extraction": 0.05196261405944824,
-                "read": 0.1702582836151123,
-                "watermark": 0.20092368125915527
+            "2201.00214": {
+                "read": 28.589249849319458,
+                "watermark": 0.7176375389099121,
+                "image_extraction": 24.738558769226074
             },
-            "1602.06541": {
-                "image_extraction": 0.35231685638427734,
-                "read": 0.6377561092376709,
-                "watermark": 0.3079540729522705
+            "GeoTopo-book": {
+                "read": 7.980882167816162,
+                "watermark": 2.3296380043029785,
+                "image_extraction": 7.01096773147583
+            },
+            "2201.00151": {
+                "read": 8.237623691558838,
+                "watermark": 0.45946359634399414,
+                "image_extraction": 6.705368518829346
             },
             "1707.09725": {
-                "image_extraction": 9.18139910697937,
-                "read": 2.2707440853118896,
-                "watermark": 0.9489884376525879
+                "read": 3.9532535076141357,
+                "watermark": 1.8308773040771484,
+                "image_extraction": 19.12919783592224
             },
             "2201.00021": {
-                "image_extraction": 1.0856575965881348,
-                "read": 0.6681113243103027,
-                "watermark": 0.20781850814819336
-            },
-            "2201.00022": {
-                "image_extraction": 0.6631152629852295,
-                "read": 0.42851996421813965,
-                "watermark": 0.2411966323852539
-            },
-            "2201.00029": {
-                "image_extraction": 0.20132708549499512,
-                "read": 0.4068417549133301,
-                "watermark": 0.04504132270812988
+                "read": 1.1517627239227295,
+                "watermark": 0.35332202911376953,
+                "image_extraction": 1.6376030445098877
             },
             "2201.00037": {
-                "image_extraction": 0.02179741859436035,
-                "read": 0.8668158054351807,
-                "watermark": 0.28296732902526855
+                "read": 1.8098814487457275,
+                "watermark": 0.598846435546875,
+                "image_extraction": 0.6979629993438721
             },
             "2201.00069": {
-                "image_extraction": 6.685812473297119,
-                "read": 0.36688804626464844,
-                "watermark": 0.37302637100219727
-            },
-            "2201.00151": {
-                "image_extraction": 0.8311829566955566,
-                "read": 5.328065633773804,
-                "watermark": 0.43613553047180176
+                "read": 0.9310543537139893,
+                "watermark": 0.32545042037963867,
+                "image_extraction": 7.657184362411499
             },
             "2201.00178": {
-                "image_extraction": 0.9407749176025391,
-                "read": 0.5250120162963867,
-                "watermark": 0.2806210517883301
+                "read": 0.8240063190460205,
+                "watermark": 0.48301267623901367,
+                "image_extraction": 1.478360891342163
+            },
+            "2201.00201": {
+                "read": 0.621835470199585,
+                "watermark": 0.21535801887512207,
+                "image_extraction": 1.6354153156280518
+            },
+            "1602.06541": {
+                "read": 0.9688084125518799,
+                "watermark": 0.5814881324768066,
+                "image_extraction": 0.8102591037750244
             },
             "2201.00200": {
-                "image_extraction": 0.005396366119384766,
-                "read": 0.5119338035583496,
-                "watermark": 0.14427518844604492
+                "read": 0.9108552932739258,
+                "watermark": 0.15725207328796387,
+                "image_extraction": 0.18536925315856934
             },
-            "2201.00201": {
-                "image_extraction": 0.9274122714996338,
-                "read": 0.27799558639526367,
-                "watermark": 0.1510756015777588
+            "2201.00022": {
+                "read": 0.8084313869476318,
+                "watermark": 0.6330957412719727,
+                "image_extraction": 1.3017487525939941
             },
-            "2201.00214": {
-                "image_extraction": 16.357728481292725,
-                "read": 18.680219888687134,
-                "watermark": 0.6249253749847412
+            "2201.00029": {
+                "read": 0.6995418071746826,
+                "watermark": 0.07724857330322266,
+                "image_extraction": 0.26739954948425293
             },
-            "GeoTopo-book": {
-                "image_extraction": 2.1184744834899902,
-                "read": 4.759061336517334,
-                "watermark": 1.6904757022857666
+            "1601.03642": {
+                "read": 0.35368847846984863,
+                "watermark": 0.12895679473876953,
+                "image_extraction": 0.3331313133239746
             }
         },
         "tika": {
-            "1601.03642": {
-                "read": 0.04623913764953613
+            "2201.00214": {
+                "read": 14.126012086868286
             },
-            "1602.06541": {
-                "read": 0.1097116470336914
+            "GeoTopo-book": {
+                "read": 100.12433242797852
+            },
+            "2201.00151": {
+                "read": 0.5522549152374268
             },
             "1707.09725": {
-                "read": 0.44649672508239746
+                "read": 23.391923666000366
             },
             "2201.00021": {
-                "read": 0.130723237991333
-            },
-            "2201.00022": {
-                "read": 0.0659487247467041
-            },
-            "2201.00029": {
-                "read": 0.04696822166442871
+                "read": 47.53182625770569
             },
             "2201.00037": {
-                "read": 0.31470751762390137
+                "read": 48.28274869918823
             },
             "2201.00069": {
-                "read": 0.1581587791442871
-            },
-            "2201.00151": {
-                "read": 0.5808892250061035
+                "read": 26.16471815109253
             },
             "2201.00178": {
-                "read": 0.11357355117797852
+                "read": 34.607691526412964
+            },
+            "2201.00201": {
+                "read": 0.09984779357910156
+            },
+            "1602.06541": {
+                "read": 13.2386314868927
             },
             "2201.00200": {
-                "read": 0.09151124954223633
+                "read": 0.13814258575439453
             },
-            "2201.00201": {
-                "read": 0.07536077499389648
+            "2201.00022": {
+                "read": 24.02240300178528
             },
-            "2201.00214": {
-                "read": 12.93626356124878
+            "2201.00029": {
+                "read": 0.09504485130310059
             },
-            "GeoTopo-book": {
-                "read": 0.947016716003418
+            "1601.03642": {
+                "read": 0.08772063255310059
             }
         }
     },
     "read_quality": {
         "borb": {
-            "1601.03642": 0.5295431890832847,
-            "1602.06541": 0.405852417302799,
+            "2201.00214": 0.7037028842821007,
+            "GeoTopo-book": 0.7910254212656228,
+            "2201.00151": 0.0,
             "1707.09725": 0.40052709687324084,
             "2201.00021": 0.4769067796610169,
-            "2201.00022": 0.4301739518287243,
-            "2201.00029": 0.0,
             "2201.00037": 0.9182362504460923,
             "2201.00069": 0.0,
-            "2201.00151": 0.0,
             "2201.00178": 0.643753339745645,
-            "2201.00200": 0.5542067356599346,
             "2201.00201": 0.5095382561142038,
-            "2201.00214": 0.7037028842821007,
-            "GeoTopo-book": 0.7910254212656228
+            "1602.06541": 0.405852417302799,
+            "2201.00200": 0.5542067356599346,
+            "2201.00022": 0.41112858259133134,
+            "2201.00029": 0.0,
+            "1601.03642": 0.5295431890832847
         },
         "pdfium": {
-            "1601.03642": 0.9935736623251659,
-            "1602.06541": 0.9919005142642908,
-            "1707.09725": 0.9869033794742829,
-            "2201.00021": 0.9825806792373105,
-            "2201.00022": 0.9782950402996555,
-            "2201.00029": 0.988813497157528,
-            "2201.00037": 0.9617606084193095,
-            "2201.00069": 0.9894269749096088,
-            "2201.00151": 0.9371048049607478,
-            "2201.00178": 0.9849444987879046,
-            "2201.00200": 0.9836863694438841,
-            "2201.00201": 0.9860127582372564,
-            "2201.00214": 0.9932975353472919,
-            "GeoTopo-book": 0.9656593310168123
+            "2201.00214": 0.9934491487758218,
+            "GeoTopo-book": 0.9662662046401999,
+            "2201.00151": 0.9371906885806807,
+            "1707.09725": 0.9868031113572143,
+            "2201.00021": 0.9834949300569906,
+            "2201.00037": 0.9621590520170431,
+            "2201.00069": 0.9897771809137501,
+            "2201.00178": 0.9850943175410412,
+            "2201.00201": 0.9860800858410964,
+            "1602.06541": 0.9920750604984221,
+            "2201.00200": 0.9838848533763788,
+            "2201.00022": 0.7772343606810399,
+            "2201.00029": 0.9934588336455487,
+            "1601.03642": 0.9936929660245063
         },
         "pdfminer": {
-            "1601.03642": 0.8623963054819123,
-            "1602.06541": 0.9230412447205493,
-            "1707.09725": 0.9189976553065722,
+            "2201.00214": 0.9487280293804596,
+            "GeoTopo-book": 0.7883172614741182,
+            "2201.00151": 0.8602045202371076,
+            "1707.09725": 0.9189694626000253,
             "2201.00021": 0.8588197275011207,
-            "2201.00022": 0.856704693395706,
-            "2201.00029": 0.975523516322736,
             "2201.00037": 0.9301479087658201,
             "2201.00069": 0.9540472289854548,
-            "2201.00151": 0.8602045202371076,
             "2201.00178": 0.9286101949651401,
-            "2201.00200": 0.9338492127465206,
             "2201.00201": 0.9153569694026227,
-            "2201.00214": 0.9487280293804596,
-            "GeoTopo-book": 0.7882974400650142
+            "1602.06541": 0.9230412447205493,
+            "2201.00200": 0.9338492127465206,
+            "2201.00022": 0.7090353973857456,
+            "2201.00029": 0.975523516322736,
+            "1601.03642": 0.8623963054819123
         },
         "pdfplumber": {
-            "1601.03642": 0.6740574957927792,
-            "1602.06541": 0.5866773388981397,
-            "1707.09725": 0.9664367136584459,
-            "2201.00021": 0.6088302790069504,
-            "2201.00022": 0.5887380987010662,
-            "2201.00029": 0.984571971904426,
-            "2201.00037": 0.9338000304367676,
-            "2201.00069": 0.6146320998483967,
-            "2201.00151": 0.6123003519582357,
-            "2201.00178": 0.8864967748757534,
-            "2201.00200": 0.6680035900600213,
-            "2201.00201": 0.5674785100286532,
-            "2201.00214": 0.9386228438413943,
-            "GeoTopo-book": 0.8423270156489054
+            "2201.00214": 0.9617839460759947,
+            "GeoTopo-book": 0.881156947575813,
+            "2201.00151": 0.8834078325527807,
+            "1707.09725": 0.9778034125495448,
+            "2201.00021": 0.9165505666048686,
+            "2201.00037": 0.9398528606089066,
+            "2201.00069": 0.931573476258142,
+            "2201.00178": 0.9505142881280757,
+            "2201.00201": 0.931133252859218,
+            "1602.06541": 0.9735710510150145,
+            "2201.00200": 0.9366531687427314,
+            "2201.00022": 0.7638770612371294,
+            "2201.00029": 0.9926702855215138,
+            "1601.03642": 0.9819316802496966
         },
         "pdfrw": {},
         "pdftotext": {
-            "1601.03642": 0.7876688841643235,
-            "1602.06541": 0.832311127441282,
-            "1707.09725": 0.9445130343025355,
-            "2201.00021": 0.9194266776433834,
-            "2201.00022": 0.9633196241682549,
-            "2201.00029": 0.9649219467401285,
-            "2201.00037": 0.9555825890870249,
-            "2201.00069": 0.9580918006489482,
-            "2201.00151": 0.9134287661895024,
-            "2201.00178": 0.962171435833351,
-            "2201.00200": 0.9386013327051221,
+            "2201.00214": 0.9600762653108389,
+            "GeoTopo-book": 0.9411707401930939,
+            "2201.00151": 0.9134194729880964,
+            "1707.09725": 0.9452373645570218,
+            "2201.00021": 0.919541928333949,
+            "2201.00037": 0.9555313782701153,
+            "2201.00069": 0.9586758781603748,
+            "2201.00178": 0.9634509272301712,
             "2201.00201": 0.9652466695944957,
-            "2201.00214": 0.9600477616539399,
-            "GeoTopo-book": 0.9329012382167732
+            "1602.06541": 0.8323735364569717,
+            "2201.00200": 0.9386551793767248,
+            "2201.00022": 0.7741910594589381,
+            "2201.00029": 0.9649219467401285,
+            "1601.03642": 0.7867700010287713
+        },
+        "playa": {
+            "2201.00214": 0.9892356749444391,
+            "GeoTopo-book": 0.9539066558356663,
+            "2201.00151": 0.9364750771229096,
+            "1707.09725": 0.9849325894160281,
+            "2201.00021": 0.9833233125534002,
+            "2201.00037": 0.9599506720927364,
+            "2201.00069": 0.9795408772361502,
+            "2201.00178": 0.9805815986016209,
+            "2201.00201": 0.9875833300845971,
+            "1602.06541": 0.991808318903064,
+            "2201.00200": 0.9857781717094396,
+            "2201.00022": 0.7775499398315283,
+            "2201.00029": 0.9772836250299688,
+            "1601.03642": 0.9931662087912088
         },
         "pymupdf": {
-            "1601.03642": 0.988502191286414,
-            "1602.06541": 0.9798295776242781,
-            "1707.09725": 0.9705185650275407,
-            "2201.00021": 0.9773729808638253,
-            "2201.00022": 0.9744584545748465,
+            "2201.00214": 0.9780473882293753,
+            "GeoTopo-book": 0.957868684569868,
+            "2201.00151": 0.9261222831606744,
+            "1707.09725": 0.9700781181218339,
+            "2201.00021": 0.9771989038544963,
+            "2201.00037": 0.9543154784114144,
+            "2201.00069": 0.9856916902090933,
+            "2201.00178": 0.9783809778252739,
+            "2201.00201": 0.980766604896128,
+            "1602.06541": 0.9796117742003992,
+            "2201.00200": 0.9745551529519525,
+            "2201.00022": 0.7739993052070868,
             "2201.00029": 0.9771271181366386,
-            "2201.00037": 0.9550639423053028,
-            "2201.00069": 0.9811348240949814,
-            "2201.00151": 0.9262640520751881,
-            "2201.00178": 0.9792454038818782,
-            "2201.00200": 0.9749010314275711,
-            "2201.00201": 0.9810750465567505,
-            "2201.00214": 0.9780968228783716,
-            "GeoTopo-book": 0.9644376202326115
+            "1601.03642": 0.9884500360936372
         },
         "pypdf": {
-            "1601.03642": 0.9927797833935018,
-            "1602.06541": 0.9879715846375909,
-            "1707.09725": 0.9799128437947946,
-            "2201.00021": 0.9806264058057124,
-            "2201.00022": 0.979681702355939,
-            "2201.00029": 0.9798789064888997,
-            "2201.00037": 0.959331208757123,
-            "2201.00069": 0.9668886543437042,
-            "2201.00151": 0.9366784193042167,
-            "2201.00178": 0.9825861828182239,
-            "2201.00200": 0.9839537609635827,
-            "2201.00201": 0.9865737079024715,
-            "2201.00214": 0.984773043075498,
-            "GeoTopo-book": 0.9267843483814432
+            "2201.00214": 0.9875784744753969,
+            "GeoTopo-book": 0.9519678772970627,
+            "2201.00151": 0.9316025356320911,
+            "1707.09725": 0.9834021823012359,
+            "2201.00021": 0.9852542946602353,
+            "2201.00037": 0.9643816837117355,
+            "2201.00069": 0.9718067652608781,
+            "2201.00178": 0.9882277602860344,
+            "2201.00201": 0.9880860613748353,
+            "1602.06541": 0.9921834657310404,
+            "2201.00200": 0.9866232437960919,
+            "2201.00022": 0.779580889163322,
+            "2201.00029": 0.9953323774367735,
+            "1601.03642": 0.9937638509508839
         },
         "tika": {
-            "1601.03642": 0.9551993153165015,
-            "1602.06541": 0.92860998828161,
-            "1707.09725": 0.9691487650775417,
-            "2201.00021": 0.9807331664030006,
-            "2201.00022": 0.9299007574327018,
-            "2201.00029": 0.9828859664925239,
-            "2201.00037": 0.9633315975122081,
-            "2201.00069": 0.9327143795813528,
+            "2201.00214": 0.9905843784546182,
+            "GeoTopo-book": 0.00047713271306082383,
             "2201.00151": 0.9216462958343726,
-            "2201.00178": 0.9663575232885726,
-            "2201.00200": 0.9774490203918432,
+            "1707.09725": 0.9520265054911324,
+            "2201.00021": 0.770705041657062,
+            "2201.00037": 0.8572065203619317,
+            "2201.00069": 0.8213549890969246,
+            "2201.00178": 0.8169214856278629,
             "2201.00201": 0.981721720946443,
-            "2201.00214": 0.9905843784546182,
-            "GeoTopo-book": 0.9826887048907266
+            "1602.06541": 0.8827184830564161,
+            "2201.00200": 0.9774490203918432,
+            "2201.00022": 0.6698371085569961,
+            "2201.00029": 0.9828859664925239,
+            "1601.03642": 0.9551993153165015
         }
     },
     "watermarking_result_file_size": {
@@ -604,54 +678,55 @@
         "pdfminer": {},
         "pdfplumber": {},
         "pdfrw": {
-            "1601.03642": 1026759,
-            "1602.06541": 3029173,
-            "1707.09725": 7251530,
-            "2201.00021": 2725055,
-            "2201.00022": 1093465,
-            "2201.00029": 828767,
-            "2201.00037": 3086248,
-            "2201.00069": 15393345,
-            "2201.00151": 1582521,
-            "2201.00178": 2379988,
-            "2201.00200": 288194,
-            "2201.00201": 1329452,
-            "2201.00214": 2515466,
-            "GeoTopo-book": 5738184
+            "2201.00214": 2515466.0,
+            "GeoTopo-book": 5738184.0,
+            "2201.00151": 1582521.0,
+            "1707.09725": 7251530.0,
+            "2201.00021": 2725055.0,
+            "2201.00037": 3086248.0,
+            "2201.00069": 15393345.0,
+            "2201.00178": 2379988.0,
+            "2201.00201": 1329452.0,
+            "1602.06541": 3029173.0,
+            "2201.00200": 288194.0,
+            "2201.00022": 1187087.0,
+            "2201.00029": 828767.0,
+            "1601.03642": 1026759.0
         },
         "pdftotext": {},
+        "playa": {},
         "pymupdf": {
-            "1601.03642": 1091306,
-            "1602.06541": 3163793,
-            "1707.09725": 8524289,
-            "2201.00021": 2802599,
-            "2201.00022": 1180780,
-            "2201.00029": 935908,
-            "2201.00037": 3395981,
-            "2201.00069": 15520607,
-            "2201.00151": 1682101,
-            "2201.00178": 2518436,
-            "2201.00200": 341709,
-            "2201.00201": 1400680,
-            "2201.00214": 2716298,
-            "GeoTopo-book": 6838694
+            "2201.00214": 2716298.0,
+            "GeoTopo-book": 6857999.0,
+            "2201.00151": 1682101.0,
+            "1707.09725": 8546399.0,
+            "2201.00021": 2804209.0,
+            "2201.00037": 3401294.0,
+            "2201.00069": 15523022.0,
+            "2201.00178": 2521012.0,
+            "2201.00201": 1402129.0,
+            "1602.06541": 3166433.0,
+            "2201.00200": 342836.0,
+            "2201.00022": 1302162.0,
+            "2201.00029": 935908.0,
+            "1601.03642": 1092594.0
         },
         "pypdf": {
-            "1601.03642": 1021439,
-            "1602.06541": 3024013,
-            "1707.09725": 7273085,
-            "2201.00021": 2727836,
-            "2201.00022": 1113365,
-            "2201.00029": 830633,
-            "2201.00037": 3113158,
-            "2201.00069": 15399764,
-            "2201.00151": 1575105,
-            "2201.00178": 2398354,
-            "2201.00200": 285365,
-            "2201.00201": 1327004,
-            "2201.00214": 2511916,
-            "GeoTopo-book": 5732063
+            "2201.00214": 2503373.0,
+            "GeoTopo-book": 5642629.0,
+            "2201.00151": 1568954.0,
+            "1707.09725": 7197331.0,
+            "2201.00021": 2713512.0,
+            "2201.00037": 3094398.0,
+            "2201.00069": 15384192.0,
+            "2201.00178": 2365349.0,
+            "2201.00201": 1315587.0,
+            "1602.06541": 3000282.0,
+            "2201.00200": 278934.0,
+            "2201.00022": 1170781.0,
+            "2201.00029": 830154.0,
+            "1601.03642": 1014378.0
         },
         "tika": {}
     }
-}
+}
\ No newline at end of file
diff --git a/pdf_benchmark/library_code.py b/pdf_benchmark/library_code.py
index 3e8a302..ab3a8cc 100644
--- a/pdf_benchmark/library_code.py
+++ b/pdf_benchmark/library_code.py
@@ -6,6 +6,7 @@
 import fitz as PyMuPDF
 import pdfminer
 import pdfplumber
+import playa
 import pypdf
 import pypdfium2 as pdfium
 from borb.pdf.pdf import PDF
@@ -16,6 +17,19 @@
 from .text_extraction_post_processing import postprocess, PDFIUM_ZERO_WIDTH_NO_BREAK_SPACE
 
 
+def playa_get_text(data: bytes) -> str:
+    with tempfile.TemporaryDirectory() as tempdir:
+        path = os.path.join(tempdir, "pdf.pdf")
+        with open(path, "wb") as outfh:
+            outfh.write(data)
+        texts = []
+        with playa.open(path, max_workers=2) as pdf:
+            pages = pdf.pages
+            page_labels = [page.label for page in pages]
+            texts = list(pages.map(playa.Page.extract_text))
+        return postprocess(texts, page_labels)
+
+
 def pymupdf_get_text(data: bytes) -> str:
     with PyMuPDF.open(stream=data, filetype="pdf") as doc:
         text = ""
@@ -190,7 +204,7 @@ def pdfplubmer_get_text(data: bytes) -> str:
     text = ""
     with pdfplumber.open(BytesIO(data)) as pdf:
         for page in pdf.pages:
-            text += page.extract_text()
+            text += page.extract_text(use_text_flow=True)
             text += "\n"
     return text
 
diff --git a/read/results/borb/2201.00022.txt b/read/results/borb/2201.00022.txt
index 2a59027..d8229b2 100644
Binary files a/read/results/borb/2201.00022.txt and b/read/results/borb/2201.00022.txt differ
diff --git a/read/results/pdfium/1601.03642.txt b/read/results/pdfium/1601.03642.txt
index 706658e..631c030 100644
--- a/read/results/pdfium/1601.03642.txt
+++ b/read/results/pdfium/1601.03642.txt
@@ -1,592 +1,592 @@
-1
-Creativity in Machine Learning
-Martin Thoma
-E-Mail: info@martin-thoma.de
-Abstract—Recent machine learning techniques can be modified
-to produce creative results. Those results did not exist before; it
-is not a trivial combination of the data which was fed into the
-machine learning system. The obtained results come in multiple
-forms: As images, as text and as audio.
-This paper gives a high level overview of how they are created
-and gives some examples. It is meant to be a summary of the
-current work and give people who are new to machine learning
-some starting points.
-I. INTRODUCTION
-According to [Gad06] creativity is “the ability to use your
-imagination to produce new ideas, make things etc.” and
-imagination is “the ability to form pictures or ideas in your
-mind”.
-Recent advances in machine learning produce results which the
-author would intuitively call creative. A high-level overview
-over several of those algorithms are described in the following.
-This paper is structured as follows: Section II introduces the
-reader on a very simple and superficial level to machine
-learning, Section III gives examples of creativity with images,
-Section IV gives examples of machines producing textual
-content, and Section V gives examples of machine learning
-and music. A discussion follows in Section VI.
-II. BASICS OF MACHINE LEARNING
-The traditional approach of solving problems with software
-is to program machines to do so. The task is divided in as
-simple sub-tasks as possible, the subtasks are analyzed and the
-machine is instructed to process the input with human-designed
-algorithms to produce the desired output. However, for some
-tasks like object recognition this approach is not feasible. There
-are way to many different objects, different lighting situations,
-variations in rotation and the arrangement of a scene for a
-human to think of all of them and model them. But with the
-internet, cheap computers, cameras, crowd-sourcing platforms
-like Wikipedia and lots of Websites, services like Amazon
-Mechanical Turk and several other changes in the past decades
-a lot of data has become available. The idea of machine learning
-is to make use of this data.
-A formal definition of the field of Machine Learning is given
-by Tom Mitchel [Mit97]:
-A computer program is said to learn from experi￾ence E with respect to some class of tasks T and
-performance measure P, if its performance at tasks
-in T, as measured by P, improves with experience E.
-Σ ϕ
-x0
-x1
-x2
-x3
-xn
-w0
-w1
-w2
-w3
-wn
-.
-.
-.
-(a) Example of an artificial neuron unit.
-xi are the input signals and wi are
-weights which have to get learned.
-Each input signal gets multiplied
-with its weight, everything gets
-summed up and the activation func￾tion ϕ is applied.
-(b) A visualization of a simple feed￾forward neural network. The 5 in￾put nodes are red, the 2 bias nodes
-are gray, the 3 hidden units are
-green and the single output node
-is blue.
-Fig. 1: Neural networks are based on simple units which get
-combined to complex networks.
-This means that machine learning programs adjust internal
-parameters to fit the data they are given. Those computer
-programs are still developed by software developers, but the
-developer writes them in a way which makes it possible to
-adjust them without having to re-program everything. Machine
-learning programs should generally improve when they are fed
-with more data.
-The field of machine learning is related to statistics. Some
-algorithms directly try to find models which are based on well￾known distribution assumptions of the developer, others are
-more general.
-A common misunderstanding of people who are not related
-in this field is that the developers don’t understand what their
-machine learning program is doing. It is understood very well
-in the sense that the developer, given only a pen, lots of paper
-and a calculator could calculate the same result as the machine
-does when he gets the same data. And lots of time, of course. It
-is not understood in the sense that it is hard to make predictions
-how the algorithm behaves without actually trying it. However,
-this is similar to expecting from an electrical engineer to
-explain how a computer works. The electrical engineer could
-probably get the knowledge he needs to do so, but the amount
-of time required to understand such a complex system from
-basic building blocks is a time-intensive and difficult task.
-An important group of machine learning algorithms was
-inspired by biological neurons and are thus called artificial
-neural networks. Those networks are based on mathematical
-functions called artificial neurons which take n ∈ N num￾bers x1, . . . , xn ∈ R as input, multiply them with weights
-w1, . . . , wn ∈ R, add them and apply a so called activation
-function ϕ as visualized in Figure 1(a). One example of such
-an activation function is the sigmoid function ϕ(x) = 1
-1+e−x .
-Those functions act as building blocks for more complex
-systems as they can be chained and grouped in layers as
-visualized in Figure 1(b). The interesting question is how
-the parameters wi are learned. This is usually done by an
-optimization technique called gradient descent. The gradient
-descent algorithm takes a function which has to be derivable,
-starts at any point of the surface of this error function and
+
+Creativity in Machine Learning
+Martin Thoma
+E-Mail: info@martin-thoma.de
+Abstract—Recent machine learning techniques can be modified
+to produce creative results. Those results did not exist before; it
+is not a trivial combination of the data which was fed into the
+machine learning system. The obtained results come in multiple
+forms: As images, as text and as audio.
+This paper gives a high level overview of how they are created
+and gives some examples. It is meant to be a summary of the
+current work and give people who are new to machine learning
+some starting points.
+I. INTRODUCTION
+According to [Gad06] creativity is “the ability to use your
+imagination to produce new ideas, make things etc.” and
+imagination is “the ability to form pictures or ideas in your
+mind”.
+Recent advances in machine learning produce results which the
+author would intuitively call creative. A high-level overview
+over several of those algorithms are described in the following.
+This paper is structured as follows: Section II introduces the
+reader on a very simple and superficial level to machine
+learning, Section III gives examples of creativity with images,
+Section IV gives examples of machines producing textual
+content, and Section V gives examples of machine learning
+and music. A discussion follows in Section VI.
+II. BASICS OF MACHINE LEARNING
+The traditional approach of solving problems with software
+is to program machines to do so. The task is divided in as
+simple sub-tasks as possible, the subtasks are analyzed and the
+machine is instructed to process the input with human-designed
+algorithms to produce the desired output. However, for some
+tasks like object recognition this approach is not feasible. There
+are way to many different objects, different lighting situations,
+variations in rotation and the arrangement of a scene for a
+human to think of all of them and model them. But with the
+internet, cheap computers, cameras, crowd-sourcing platforms
+like Wikipedia and lots of Websites, services like Amazon
+Mechanical Turk and several other changes in the past decades
+a lot of data has become available. The idea of machine learning
+is to make use of this data.
+A formal definition of the field of Machine Learning is given
+by Tom Mitchel [Mit97]:
+A computer program is said to learn from experience E with respect to some class of tasks T and
+performance measure P, if its performance at tasks
+in T, as measured by P, improves with experience E.
+Σ ϕ
+x0
+x1
+x2
+x3
+xn
+w0
+w1
+w2
+w3
+wn
+.
+.
+.
+(a) Example of an artificial neuron unit.
+xi are the input signals and wi are
+weights which have to get learned.
+Each input signal gets multiplied
+with its weight, everything gets
+summed up and the activation function ϕ is applied.
+(b) A visualization of a simple feedforward neural network. The 5 input nodes are red, the 2 bias nodes
+are gray, the 3 hidden units are
+green and the single output node
+is blue.
+Fig. 1: Neural networks are based on simple units which get
+combined to complex networks.
+This means that machine learning programs adjust internal
+parameters to fit the data they are given. Those computer
+programs are still developed by software developers, but the
+developer writes them in a way which makes it possible to
+adjust them without having to re-program everything. Machine
+learning programs should generally improve when they are fed
+with more data.
+The field of machine learning is related to statistics. Some
+algorithms directly try to find models which are based on wellknown distribution assumptions of the developer, others are
+more general.
+A common misunderstanding of people who are not related
+in this field is that the developers don’t understand what their
+machine learning program is doing. It is understood very well
+in the sense that the developer, given only a pen, lots of paper
+and a calculator could calculate the same result as the machine
+does when he gets the same data. And lots of time, of course. It
+is not understood in the sense that it is hard to make predictions
+how the algorithm behaves without actually trying it. However,
+this is similar to expecting from an electrical engineer to
+explain how a computer works. The electrical engineer could
+probably get the knowledge he needs to do so, but the amount
+of time required to understand such a complex system from
+basic building blocks is a time-intensive and difficult task.
+An important group of machine learning algorithms was
+inspired by biological neurons and are thus called artificial
+neural networks. Those networks are based on mathematical
+functions called artificial neurons which take n ∈ N numbers x1, . . . , xn ∈ R as input, multiply them with weights
+w1, . . . , wn ∈ R, add them and apply a so called activation
+function ϕ as visualized in Figure 1(a). One example of such
+an activation function is the sigmoid function ϕ(x) = 1
+1+e−x .
+Those functions act as building blocks for more complex
+systems as they can be chained and grouped in layers as
+visualized in Figure 1(b). The interesting question is how
+the parameters wi are learned. This is usually done by an
+optimization technique called gradient descent. The gradient
+descent algorithm takes a function which has to be derivable,
+starts at any point of the surface of this error function and
 arXiv:1601.03642v1 [cs.CV] 12 Jan 2016
-2
-makes a step in the direction which goes downwards. Hence
-it tries to find a minimum of this high-dimensional function.
-There is, of course, a lot more to say about machine learning.
-The interested reader might want to read the introduction given
-by Mitchell [Mit97].
-III. IMAGE DATA
-Applying a simple neural network on image data directly can
-work, but the number of parameters gets extraordinary large.
-One would take one neuron per pixel and channel. This means
-for 500 px×500 px RGB images one would get 750,000 input
-signals. To approach this problem, so called Convolutional
-Neural Networks (CNNs) were introduced. Instead of learning
-the full connection between the input layer and the first
-hidden layer, those networks make use of convolution layers.
-Convolution layers learn a convolution; this means they learn
-the weights of an image filter. An additional advantage is that
-CNNs make use of spacial relationships of the pixels instead
-of flattening the image to a stream of single numbers.
-An excellent introduction into CNNs is given by [Nie15].
-A. Google DeepDream
-The gradient descent algorithm which optimizes most of the
-parameters in neural networks is well-understood. However, the
-effect it has on the recognition system is difficult to estimate.
-[MOT15] proposes a technique to analyze the weights learned
-by such a network. A similar idea was applied by [VKMT13].
-For example, consider a neural network which was trained to
-recognize various images like bananas. This technique turns
-the network upside down and starts with random noise. To
-analyze what the network considers bananas to look like, the
-random noise image is gradually tweaked so that it generates
-the output “banana”. Additionally, the changes can be restricted
-in a way that the statistics of the input image have to be similar
-to natural images. One example of this is that neighboring
-pixels are correlated.
-Another technique is to amplify the output of layers. This was
-described in [MOT15]:
-We ask the network: “Whatever you see there, I want
-more of it!” This creates a feedback loop: if a cloud
-looks a little bit like a bird, the network will make
-it look more like a bird. This in turn will make the
-network recognize the bird even more strongly on
-the next pass and so forth, until a highly detailed
-bird appears, seemingly out of nowhere.
-The name “Inceptionism” in the title of [MOT15] comes from
-the science-fiction movie “Inception” (2010). One reason it
-might be chosen is because neural networks are structured
-in layers. Recent publications tend to have more and more
-layers [HZRS15]. The used jargon is to say they get “deeper”.
-As this technique as published by Google engineers, the
-technique is called Google DeepDream.
-Fig. 2: Aurelia aurita
-Fig. 3: DeepDream impression of Aurelia aurita
-It has become famous in the internet [Red]. Usually, the images
-are generated in iterations and in each iteration it is zoomed
-into the image.
-Images and videos published by the Google engineers can be
-seen at [goo15]. Figure 2 shows the original image from which
-Figure 3 was created with the deep dream algorithm.
-B. Artistic Style Imitation
-A key idea of neural networks is that they learn different
-representations of the data in each layer. In the case of
-CNNs, this can easily be visualized as it was done in various
-papers [ZF14]. Usually, one finds that the network learned
-to build edge detectors in the first layer and more complex
-structures in the upper layers.
-Gatys, Ecker and Bethge showed in [GEB15] that with a clever
-choice of features it is possible to separate the general style of
-an image in terms of local image appearance from the content
-of an image. They support their claim by applying the style of
+
+makes a step in the direction which goes downwards. Hence
+it tries to find a minimum of this high-dimensional function.
+There is, of course, a lot more to say about machine learning.
+The interested reader might want to read the introduction given
+by Mitchell [Mit97].
+III. IMAGE DATA
+Applying a simple neural network on image data directly can
+work, but the number of parameters gets extraordinary large.
+One would take one neuron per pixel and channel. This means
+for 500 px×500 px RGB images one would get 750,000 input
+signals. To approach this problem, so called Convolutional
+Neural Networks (CNNs) were introduced. Instead of learning
+the full connection between the input layer and the first
+hidden layer, those networks make use of convolution layers.
+Convolution layers learn a convolution; this means they learn
+the weights of an image filter. An additional advantage is that
+CNNs make use of spacial relationships of the pixels instead
+of flattening the image to a stream of single numbers.
+An excellent introduction into CNNs is given by [Nie15].
+A. Google DeepDream
+The gradient descent algorithm which optimizes most of the
+parameters in neural networks is well-understood. However, the
+effect it has on the recognition system is difficult to estimate.
+[MOT15] proposes a technique to analyze the weights learned
+by such a network. A similar idea was applied by [VKMT13].
+For example, consider a neural network which was trained to
+recognize various images like bananas. This technique turns
+the network upside down and starts with random noise. To
+analyze what the network considers bananas to look like, the
+random noise image is gradually tweaked so that it generates
+the output “banana”. Additionally, the changes can be restricted
+in a way that the statistics of the input image have to be similar
+to natural images. One example of this is that neighboring
+pixels are correlated.
+Another technique is to amplify the output of layers. This was
+described in [MOT15]:
+We ask the network: “Whatever you see there, I want
+more of it!” This creates a feedback loop: if a cloud
+looks a little bit like a bird, the network will make
+it look more like a bird. This in turn will make the
+network recognize the bird even more strongly on
+the next pass and so forth, until a highly detailed
+bird appears, seemingly out of nowhere.
+The name “Inceptionism” in the title of [MOT15] comes from
+the science-fiction movie “Inception” (2010). One reason it
+might be chosen is because neural networks are structured
+in layers. Recent publications tend to have more and more
+layers [HZRS15]. The used jargon is to say they get “deeper”.
+As this technique as published by Google engineers, the
+technique is called Google DeepDream.
+Fig. 2: Aurelia aurita
+Fig. 3: DeepDream impression of Aurelia aurita
+It has become famous in the internet [Red]. Usually, the images
+are generated in iterations and in each iteration it is zoomed
+into the image.
+Images and videos published by the Google engineers can be
+seen at [goo15]. Figure 2 shows the original image from which
+Figure 3 was created with the deep dream algorithm.
+B. Artistic Style Imitation
+A key idea of neural networks is that they learn different
+representations of the data in each layer. In the case of
+CNNs, this can easily be visualized as it was done in various
+papers [ZF14]. Usually, one finds that the network learned
+to build edge detectors in the first layer and more complex
+structures in the upper layers.
+Gatys, Ecker and Bethge showed in [GEB15] that with a clever
+choice of features it is possible to separate the general style of
+an image in terms of local image appearance from the content
+of an image. They support their claim by applying the style of
 different artists to an arbitrary image of their choice.
-3
-(a) Original Image (b) Style image
-(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph
-of a Scottish Highland Cattle.
-Fig. 4: The algorithm takes both, the original image and the
-style image to produce the result.
-This artistic style imitation can be seen itself as creative work.
-An example is given by Figure 4. The code which created this
-example is available under [Joh16].
-Something similar was done by [SPB+14], where the style of
-a portrait photograph was transferred to another photograph.
-A demo can be seen on [Shi14].
-C. Drawing Robots
-Patrick Tresset and Frdric Fol Leymarie created a system called
-AIKON (Automatic IKONic drawing) which can automatically
-generated sketches for portraits [TL05]. AIKON takes a digital
-photograph, detects faces on them and sketches them with a
-pen-plotter.
-Tresset and Leymaire use k-means clustering [KMN+02] to
-segment regions of the photograph with similar color which,
-in turn, will get a similar shading.
-Such a drawing robot could apply machine learning techniques
-known from computer vision for detecting the human. It
-could apply self-learning techniques to draw results most
-similar to the artists impression of the image. However, the
-system described in [TL05] seems not to be a machine
-learning computer program according to the definition by Tom
-Mitchell [Mit97].
-IV. TEXT DATA
-Digital text is the first form of natural communication which
-involved computers. It is used in the form of chats, websites,
-on collaborative projects like Wikipedia, in scientific literature.
-Of course, it was used in pre-digital times, too: In newspaper,
-in novels, in dramas, in religious texts like the bible, in books
-for education, in notes from conversations.
-This list could be continued and most of these kinds of texts
-are now available in digital form. This digital form can be
-used to teach machines to generate similar texts.
-The most simple language model which is of use is an n-gram
-model. This model makes use of sequences of the length n to
-model language. It can be used to get the probability of a third
-word, given the previous two words. This way, a complete text
-can be generated word by word. Refinements and extensions
-to this model are discussed in the field of Natural Language
-Processing (NLP).
-However, there are much more sophisticated models. One
-of those are character predictors based on Recurrent Neural
-Networks (RNNs). Those character predictors take a sequence
-of characters as input and predict the next character. In that
-sense they are similar to the n-gram model, but operate on
-a lower level. Using such a predictor, one can generate texts
-character by character. If the model is good, the text can have
-the correct punctuation. This would not be possible with a
-word predictor.
-Character predictors can be implemented with RNNs. In con￾trast to standard feed-forward neural networks like multilayer
-Perceptrons (MLPs) which was shown in Figure 1(b), those
-networks are trained to take their output at some point as well as
-the normal input. This means they can keep some information
-over time. One of the most common variant to implement
-RNNs is by using so called Long short-term memory (LSTM)
-cells [HS97].
-Recurrent networks apply two main ideas in order to learn: The
-first is called unrolling and means that an recurrent network
-is imagined to be an infinite network over time. At each time
-step the recurrent neurons get duplicated. The second idea is
-weight sharing which means that those unrolled neurons share
-the same weight.
-A. Similar Texts Generation
-Karpathy trained multiple character RNNs on different datasets
-and gave an excellent introduction [Kar15b]. He trained it on
-Paul Graham’s essays, all the works of Shakespeare, the Hutter
-Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw
-LATEX source file of a book about algebraic stacks and geometry
-and Linux C code.
-With that training data, the models can generate similar texts.
-New works which look like Shakespeare plays, new Wikipedia
-articles, new Linux code and new papers about algebraic
+
+(a) Original Image (b) Style image
+(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph
+of a Scottish Highland Cattle.
+Fig. 4: The algorithm takes both, the original image and the
+style image to produce the result.
+This artistic style imitation can be seen itself as creative work.
+An example is given by Figure 4. The code which created this
+example is available under [Joh16].
+Something similar was done by [SPB+14], where the style of
+a portrait photograph was transferred to another photograph.
+A demo can be seen on [Shi14].
+C. Drawing Robots
+Patrick Tresset and Frdric Fol Leymarie created a system called
+AIKON (Automatic IKONic drawing) which can automatically
+generated sketches for portraits [TL05]. AIKON takes a digital
+photograph, detects faces on them and sketches them with a
+pen-plotter.
+Tresset and Leymaire use k-means clustering [KMN+02] to
+segment regions of the photograph with similar color which,
+in turn, will get a similar shading.
+Such a drawing robot could apply machine learning techniques
+known from computer vision for detecting the human. It
+could apply self-learning techniques to draw results most
+similar to the artists impression of the image. However, the
+system described in [TL05] seems not to be a machine
+learning computer program according to the definition by Tom
+Mitchell [Mit97].
+IV. TEXT DATA
+Digital text is the first form of natural communication which
+involved computers. It is used in the form of chats, websites,
+on collaborative projects like Wikipedia, in scientific literature.
+Of course, it was used in pre-digital times, too: In newspaper,
+in novels, in dramas, in religious texts like the bible, in books
+for education, in notes from conversations.
+This list could be continued and most of these kinds of texts
+are now available in digital form. This digital form can be
+used to teach machines to generate similar texts.
+The most simple language model which is of use is an n-gram
+model. This model makes use of sequences of the length n to
+model language. It can be used to get the probability of a third
+word, given the previous two words. This way, a complete text
+can be generated word by word. Refinements and extensions
+to this model are discussed in the field of Natural Language
+Processing (NLP).
+However, there are much more sophisticated models. One
+of those are character predictors based on Recurrent Neural
+Networks (RNNs). Those character predictors take a sequence
+of characters as input and predict the next character. In that
+sense they are similar to the n-gram model, but operate on
+a lower level. Using such a predictor, one can generate texts
+character by character. If the model is good, the text can have
+the correct punctuation. This would not be possible with a
+word predictor.
+Character predictors can be implemented with RNNs. In contrast to standard feed-forward neural networks like multilayer
+Perceptrons (MLPs) which was shown in Figure 1(b), those
+networks are trained to take their output at some point as well as
+the normal input. This means they can keep some information
+over time. One of the most common variant to implement
+RNNs is by using so called Long short-term memory (LSTM)
+cells [HS97].
+Recurrent networks apply two main ideas in order to learn: The
+first is called unrolling and means that an recurrent network
+is imagined to be an infinite network over time. At each time
+step the recurrent neurons get duplicated. The second idea is
+weight sharing which means that those unrolled neurons share
+the same weight.
+A. Similar Texts Generation
+Karpathy trained multiple character RNNs on different datasets
+and gave an excellent introduction [Kar15b]. He trained it on
+Paul Graham’s essays, all the works of Shakespeare, the Hutter
+Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw
+LATEX source file of a book about algebraic stacks and geometry
+and Linux C code.
+With that training data, the models can generate similar texts.
+New works which look like Shakespeare plays, new Wikipedia
+articles, new Linux code and new papers about algebraic
 geometry can thus automatically be generated. At a first
-4
-glance, they do look authentic. The syntax was mostly used
-correctly, the formatting looks as expected, the sentences are
-grammatically correct. However, when one looks at the broader
-context it is easy to recognize that the algorithm has no insight
-in what it is doing. It does match patterns really well, but it
-fails to follow a central theme. In the context of C code this
-means that new variables are introduced, but not used. At the
-same time, variables which were not declared are used. In
-the context of Shakespear plays this means that a lot of new
-characters are introduced, but they don’t speak with each other
-or about each other.
-The code used to generate these examples is available and
-ready to use through [Kar15a]. A couple of examples are
-in Section A.
-B. Chatbots
-Chatbots are computer programs which participate in chat
-rooms as autonomous agents. This means they have similar
-permissions and possibilities as usual human users have, but
-users can trigger a set of commands to make the bot give them
-valuable information or features.
-A special category of chatbots are such bots which actively
-participate in the conversation, which is usually not the case.
-One of the earliest programs in this category is ELIZA, a bot
-created by Joseph Weizenbaum in the 1960s [Wei76]. This
-program had a set of patterns implemented to which it would
-reply in a seemingly smart way in a psychologists fashion.
-This means quite often the program would simply repeat the
-last sentence and add something meaningless like “How do
-you feel about it?”. According to [Cur14], Weizenbaum once
-found his secretary — who was aware of the fact that this is
-a computer program — chatting with the machine. When he
-looked over her shoulder, she asked him “would you mind
-leaving the room”.
-Today, much more sophisticated chatbots exist. They make use
-of the vast amount of data which is available by millions of
-Wikipedia articles, chat protocols, websites, help desk protocols,
-subtitles of movies as well as the astonishing increase in
-computing power to train RNNs and language models similar
-to the ones described before.
-Interesting results like the following were obtained by [VL15]:
-Human: what is the purpose of life ?
-Machine: to serve the greater good .
-Human: what is the purpose of living ?
-Machine: to live forever .
-V. AUDIO DATA
-Common machine learning tasks which involve audio data
-are speech recognition, speaker identification, identification of
-songs. This leads to some less-common, but interesting topics:
-The composition of music, the synthesizing of audio as art.
-While the composition might be considered in Section IV,
-we will now investigate the work which was done in audio
-synthesization.
-A. Emily Howell
-David Cope created a project called “Experiments in Musical
-Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He
-introduces the idea of seeing music as a language which
-can be analyzed with natural language processing (NLP)
-methods. Cope mentions that EMI was more useful to him,
-when he used the system to “create small phrase-size textures
-as next possibilities using its syntactic dictionary and rule
-base” [Cop87].
-In 2003, Cope started a new project which was based on EMI:
-Emily Howell [Cop13]. This program is able to “creat[e] both
-highly authentic replications and novel music compositions”.
-The reader might want to listen to [Cop12] to get an impression
-of the beauty of the created music.
-According to Cope, an essential part of music is “a set of
-instructions for creating different, but highly related self￾replications”. Emmy was programmed to find this set of
-instructions. It tries to find the “signature” of a composer,
-which Cope describes as “contiguous patterns that recur in two
-or more works of the composer”.
-The new feature of Emily Howell compared to Emmy is that
-Emily Howell does not necessarily remain in a single, already
-known style.
-Emily Howell makes use of association network. Cope empha￾sizes that this is not a form of a neural network. However, it
-is not clear from [Cop13] how exactly an association network
-is trained. Cope mentions that Emily Howell is explained in
-detail in [Cop05].
-B. GRUV
-Recurrent neural networks — LSTM networks, to be exact
-— are used in [NV15] together with Gated Recurrent Units
-(GRU) to build a network which can be trained to generate
-music. Instead of taking notes directly or MIDI files, Nayebi
-and Vitelli took raw audio waveforms as input. Those audio
-waveforms are feature vectors given for time steps 0, 1, . . . , t−
-1, t. The network is given those feature vectors X1, . . . , Xt
-and has to predict the following feature vector Xt+1. This
-means it continues the music. As the input is continuous, the
-problem was modeled as a regression task. Discrete Fourier
-Transformation (DFT) was used on chunks of length N of the
-music to obtain features in the frequency domain.
-An implementation can be found at [VN15] and a demonstration
-can be found at [Vit15].
-C. Audio Synthesization
-Audio synthesization is generating new audio files. This can
+
+glance, they do look authentic. The syntax was mostly used
+correctly, the formatting looks as expected, the sentences are
+grammatically correct. However, when one looks at the broader
+context it is easy to recognize that the algorithm has no insight
+in what it is doing. It does match patterns really well, but it
+fails to follow a central theme. In the context of C code this
+means that new variables are introduced, but not used. At the
+same time, variables which were not declared are used. In
+the context of Shakespear plays this means that a lot of new
+characters are introduced, but they don’t speak with each other
+or about each other.
+The code used to generate these examples is available and
+ready to use through [Kar15a]. A couple of examples are
+in Section A.
+B. Chatbots
+Chatbots are computer programs which participate in chat
+rooms as autonomous agents. This means they have similar
+permissions and possibilities as usual human users have, but
+users can trigger a set of commands to make the bot give them
+valuable information or features.
+A special category of chatbots are such bots which actively
+participate in the conversation, which is usually not the case.
+One of the earliest programs in this category is ELIZA, a bot
+created by Joseph Weizenbaum in the 1960s [Wei76]. This
+program had a set of patterns implemented to which it would
+reply in a seemingly smart way in a psychologists fashion.
+This means quite often the program would simply repeat the
+last sentence and add something meaningless like “How do
+you feel about it?”. According to [Cur14], Weizenbaum once
+found his secretary — who was aware of the fact that this is
+a computer program — chatting with the machine. When he
+looked over her shoulder, she asked him “would you mind
+leaving the room”.
+Today, much more sophisticated chatbots exist. They make use
+of the vast amount of data which is available by millions of
+Wikipedia articles, chat protocols, websites, help desk protocols,
+subtitles of movies as well as the astonishing increase in
+computing power to train RNNs and language models similar
+to the ones described before.
+Interesting results like the following were obtained by [VL15]:
+Human: what is the purpose of life ?
+Machine: to serve the greater good .
+Human: what is the purpose of living ?
+Machine: to live forever .
+V. AUDIO DATA
+Common machine learning tasks which involve audio data
+are speech recognition, speaker identification, identification of
+songs. This leads to some less-common, but interesting topics:
+The composition of music, the synthesizing of audio as art.
+While the composition might be considered in Section IV,
+we will now investigate the work which was done in audio
+synthesization.
+A. Emily Howell
+David Cope created a project called “Experiments in Musical
+Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He
+introduces the idea of seeing music as a language which
+can be analyzed with natural language processing (NLP)
+methods. Cope mentions that EMI was more useful to him,
+when he used the system to “create small phrase-size textures
+as next possibilities using its syntactic dictionary and rule
+base” [Cop87].
+In 2003, Cope started a new project which was based on EMI:
+Emily Howell [Cop13]. This program is able to “creat[e] both
+highly authentic replications and novel music compositions”.
+The reader might want to listen to [Cop12] to get an impression
+of the beauty of the created music.
+According to Cope, an essential part of music is “a set of
+instructions for creating different, but highly related selfreplications”. Emmy was programmed to find this set of
+instructions. It tries to find the “signature” of a composer,
+which Cope describes as “contiguous patterns that recur in two
+or more works of the composer”.
+The new feature of Emily Howell compared to Emmy is that
+Emily Howell does not necessarily remain in a single, already
+known style.
+Emily Howell makes use of association network. Cope emphasizes that this is not a form of a neural network. However, it
+is not clear from [Cop13] how exactly an association network
+is trained. Cope mentions that Emily Howell is explained in
+detail in [Cop05].
+B. GRUV
+Recurrent neural networks — LSTM networks, to be exact
+— are used in [NV15] together with Gated Recurrent Units
+(GRU) to build a network which can be trained to generate
+music. Instead of taking notes directly or MIDI files, Nayebi
+and Vitelli took raw audio waveforms as input. Those audio
+waveforms are feature vectors given for time steps 0, 1, . . . , t−
+1, t. The network is given those feature vectors X1, . . . , Xt
+and has to predict the following feature vector Xt+1. This
+means it continues the music. As the input is continuous, the
+problem was modeled as a regression task. Discrete Fourier
+Transformation (DFT) was used on chunks of length N of the
+music to obtain features in the frequency domain.
+An implementation can be found at [VN15] and a demonstration
+can be found at [Vit15].
+C. Audio Synthesization
+Audio synthesization is generating new audio files. This can
 either be music or speech. With the techniques described before,
-5
-neural networks can be trained to generate music note by note.
-However, it is desirable to allow multiple notes being played
-at the same time.
-This idea and some others were applied by Daniel Johnson. He
-wrote a very good introduction into neural networks for music
-composition which explains those ideas [Joh15b]. Example
-compositions are available there, too. He also made the code for
-his Biaxial Recurrent Neural Network available under [Joh15a].
-VI. DISCUSSION
-What does these examples mean for our understanding of
-creativity? Does it influence how much we value art? Could
-we define art and creativity better after having those and similar
-results?
-I think we might readjust our understanding of creativity just
-like we adjusted our understanding of algorithmically hard
-problems after Deep Blue won against the reigning world
-chess champion Garry Kasparov in 1997.
-However, by now it is obvious that machine learning algorithms
-cannot compete with human artists. Today’s state of the art
-algorithms which are purely based on machine learning don’t
-follow a central theme. They lack the ability to plan. Although
-clever algorithms were implemented for composing music, it
-seems as if there is still a lot of supervision involved.
-REFERENCES
-[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987.
-[Online]. Available: http://hdl.handle.net/2027/spo.bbp2372.1987.
-025
-[Cop05] ——, Computer models of musical creativity. MIT Press
-Cambridge, 2005.
-[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online].
-Available: https://www.youtube.com/watch?v=jLR- c uCwI
-[Cop13] ——, “The well-programmed clavier: Style in computer music
-composition,” XRDS: Crossroads, The ACM Magazine for
-Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available:
-http://dl.acm.org/citation.cfm?id=2460444
-[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On￾line]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/
-78691781-c9b7-30a0-9a0a-3ff76e8bfe58
-[Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson
-Education Limited, 2006.
-[GEB15] L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of
-artistic style,” arXiv preprint arXiv:1508.06576, 2015. [Online].
-Available: http://arxiv.org/abs/1508.06576
-[goo15] “Inceptionism: Going deeper into neural networks,” Google
-Photos, Jun. 2015. [Online]. Available: https://goo.gl/Bydofw
-[HS97] S. Hochreiter and J. Schmidhuber, “Long short-term memory,”
-Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997.
-[Online]. Available: http://ieeexplore.ieee.org/xpl/freeabs all.jsp?
-arnumber=6795963
-[hut] “50’000 euro prize for compressing human knowledge.” [Online].
-Available: http://prize.hutter1.net/
-[HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning
-for image recognition,” arXiv preprint arXiv:1512.03385, 2015.
-[Online]. Available: http://arxiv.org/abs/1512.03385
-[Joh15a] D. Johnson, “Biaxial recurrent neural network for music
-composition,” GitHub, Aug. 2015. [Online]. Available: https:
-//github.com/hexahedria/biaxial-rnn-music-composition
-[Joh15b] ——, “Composing music with recurrent neu￾ral networks,” Personal Blog, Aug. 2015. [On￾line]. Available: http://www.hexahedria.com/2015/08/03/
-composing-music-with-recurrent-neural-networks/
-[Joh16] J. Johnson, “neural-style,” GitHub, Jan. 2016. [Online]. Available:
-https://github.com/jcjohnson/neural-style
-[Kar15a] A. Karpathy, “char-rnn,” GitHub, Nov. 2015. [Online]. Available:
-https://github.com/karpathy/char-rnn
-[Kar15b] ——, “The unreasonable effectiveness of recurrent neural
-networks,” Personal Blog, May 2015. [Online]. Available:
-http://karpathy.github.io/2015/05/21/rnn-effectiveness/
-[KMN+02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman,
-and A. Wu, “An efficient k-means clustering algorithm: analysis
-and implementation,” Pattern Analysis and Machine Intelligence,
-IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002.
-[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in
-computer science. McGraw-Hill, 1997.
-[MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going
-deeper into neural networks,” googleresearch.blogspot.co.uk,
-Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/
-2015/06/inceptionism-going-deeper-into-neural.html
-[Nie15] M. A. Nielsen, Neural Networks and Deep Learn￾ing. Determination Press, 2015. [Online]. Avail￾able: http://neuralnetworksanddeeplearning.com/chap6.html#
-introducing convolutional networks
-[NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation
-using recurrent neural networks,” 2015. [Online]. Available:
-http://cs224d.stanford.edu/reports/NayebiAran.pdf
-[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit.
-com/r/deepdream/
-[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun.
-2014. [Online]. Available: https://www.youtube.com/watch?v=
-Hj5lGFzlubU
-[SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand,
-“Style transfer for headshot portraits,” ACM Transactions on
-Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available:
-http://dl.acm.org/citation.cfm?id=2601137
-[TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in
-Proceedings of VSMM, 2005, pp. 739–748.
-[Vit15] M. Vitelli, “Algorithmic music generation with recurrent
-neural networks,” YouTube, Jun. 2015. [Online]. Available:
-https://youtu.be/0VTI1BBLydE
-[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba,
-“Hoggles: Visualizing object detection features,” in Computer
-Vision (ICCV), 2013 IEEE International Conference on. IEEE,
-2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/
-xpls/abs all.jsp?arnumber=6751109
-[VL15] O. Vinyals and Q. Le, “A neural conversational model,”
-arXiv preprint arXiv:1506.05869, Jul. 2015. [Online]. Available:
-http://arxiv.org/abs/1506.05869v2
-[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online].
-Available: https://github.com/MattVitelli/GRUV
-[Wei76] J. Weizenbaum, Computer Power and Human Reason: From
-Judgement to Calculation. W.H.Freeman & Co Ltd, 1976.
-[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding con￾volutional networks,” in Computer Vision–ECCV 2014. Springer,
+
+neural networks can be trained to generate music note by note.
+However, it is desirable to allow multiple notes being played
+at the same time.
+This idea and some others were applied by Daniel Johnson. He
+wrote a very good introduction into neural networks for music
+composition which explains those ideas [Joh15b]. Example
+compositions are available there, too. He also made the code for
+his Biaxial Recurrent Neural Network available under [Joh15a].
+VI. DISCUSSION
+What does these examples mean for our understanding of
+creativity? Does it influence how much we value art? Could
+we define art and creativity better after having those and similar
+results?
+I think we might readjust our understanding of creativity just
+like we adjusted our understanding of algorithmically hard
+problems after Deep Blue won against the reigning world
+chess champion Garry Kasparov in 1997.
+However, by now it is obvious that machine learning algorithms
+cannot compete with human artists. Today’s state of the art
+algorithms which are purely based on machine learning don’t
+follow a central theme. They lack the ability to plan. Although
+clever algorithms were implemented for composing music, it
+seems as if there is still a lot of supervision involved.
+REFERENCES
+[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987.
+[Online]. Available: http://hdl.handle.net/2027/spo.bbp2372.1987.
+025
+[Cop05] ——, Computer models of musical creativity. MIT Press
+Cambridge, 2005.
+[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online].
+Available: https://www.youtube.com/watch?v=jLR- c uCwI
+[Cop13] ——, “The well-programmed clavier: Style in computer music
+composition,” XRDS: Crossroads, The ACM Magazine for
+Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available:
+http://dl.acm.org/citation.cfm?id=2460444
+[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [Online]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/
+78691781-c9b7-30a0-9a0a-3ff76e8bfe58
+[Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson
+Education Limited, 2006.
+[GEB15] L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of
+artistic style,” arXiv preprint arXiv:1508.06576, 2015. [Online].
+Available: http://arxiv.org/abs/1508.06576
+[goo15] “Inceptionism: Going deeper into neural networks,” Google
+Photos, Jun. 2015. [Online]. Available: https://goo.gl/Bydofw
+[HS97] S. Hochreiter and J. Schmidhuber, “Long short-term memory,”
+Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/freeabs all.jsp?
+arnumber=6795963
+[hut] “50’000 euro prize for compressing human knowledge.” [Online].
+Available: http://prize.hutter1.net/
+[HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning
+for image recognition,” arXiv preprint arXiv:1512.03385, 2015.
+[Online]. Available: http://arxiv.org/abs/1512.03385
+[Joh15a] D. Johnson, “Biaxial recurrent neural network for music
+composition,” GitHub, Aug. 2015. [Online]. Available: https:
+//github.com/hexahedria/biaxial-rnn-music-composition
+[Joh15b] ——, “Composing music with recurrent neural networks,” Personal Blog, Aug. 2015. [Online]. Available: http://www.hexahedria.com/2015/08/03/
+composing-music-with-recurrent-neural-networks/
+[Joh16] J. Johnson, “neural-style,” GitHub, Jan. 2016. [Online]. Available:
+https://github.com/jcjohnson/neural-style
+[Kar15a] A. Karpathy, “char-rnn,” GitHub, Nov. 2015. [Online]. Available:
+https://github.com/karpathy/char-rnn
+[Kar15b] ——, “The unreasonable effectiveness of recurrent neural
+networks,” Personal Blog, May 2015. [Online]. Available:
+http://karpathy.github.io/2015/05/21/rnn-effectiveness/
+[KMN+02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman,
+and A. Wu, “An efficient k-means clustering algorithm: analysis
+and implementation,” Pattern Analysis and Machine Intelligence,
+IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002.
+[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in
+computer science. McGraw-Hill, 1997.
+[MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going
+deeper into neural networks,” googleresearch.blogspot.co.uk,
+Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/
+2015/06/inceptionism-going-deeper-into-neural.html
+[Nie15] M. A. Nielsen, Neural Networks and Deep Learning. Determination Press, 2015. [Online]. Available: http://neuralnetworksanddeeplearning.com/chap6.html#
+introducing convolutional networks
+[NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation
+using recurrent neural networks,” 2015. [Online]. Available:
+http://cs224d.stanford.edu/reports/NayebiAran.pdf
+[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit.
+com/r/deepdream/
+[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun.
+2014. [Online]. Available: https://www.youtube.com/watch?v=
+Hj5lGFzlubU
+[SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand,
+“Style transfer for headshot portraits,” ACM Transactions on
+Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available:
+http://dl.acm.org/citation.cfm?id=2601137
+[TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in
+Proceedings of VSMM, 2005, pp. 739–748.
+[Vit15] M. Vitelli, “Algorithmic music generation with recurrent
+neural networks,” YouTube, Jun. 2015. [Online]. Available:
+https://youtu.be/0VTI1BBLydE
+[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba,
+“Hoggles: Visualizing object detection features,” in Computer
+Vision (ICCV), 2013 IEEE International Conference on. IEEE,
+2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs all.jsp?arnumber=6751109
+[VL15] O. Vinyals and Q. Le, “A neural conversational model,”
+arXiv preprint arXiv:1506.05869, Jul. 2015. [Online]. Available:
+http://arxiv.org/abs/1506.05869v2
+[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online].
+Available: https://github.com/MattVitelli/GRUV
+[Wei76] J. Weizenbaum, Computer Power and Human Reason: From
+Judgement to Calculation. W.H.Freeman & Co Ltd, 1976.
+[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional networks,” in Computer Vision–ECCV 2014. Springer,
 2014, pp. 818–833.
-6
-APPENDIX A
-AUTOMATICALLY GENERATED TEXTS
-The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for
-convenience and the case that the website is not reachable.
-A. Shakespeare
-PANDARUS:
-Alas, I think he shall be come approached and the day
-When little srain would be attain’d into being never fed,
-And who is but a chain and subjects of his death,
-I should not sleep.
-Second Senator:
-They are away this miseries, produced upon my soul,
-Breaking and strongly should be buried, when I perish
-The earth and thoughts of many states.
-DUKE VINCENTIO:
-Well, your wit is in the care of side and that.
-Second Lord:
-They would be ruled after this chamber, and
-my fair nues begun out of the fact, to be conveyed,
-Whose noble souls I’ll have the heart of the wars.
-Clown:
-Come, sir, I will make did behold your worship.
-VIOLA:
-I’ll drink it.
-B. Wikipeda
-Naturalism and decision for the majority of Arab countries’ capitalide was grounded
-by the Irish language by [[John Clair]], [[An Imperial Japanese Revolt]], associated
-with Guangzham’s sovereignty. His generals were the powerful ruler of the Portugal
-in the [[Protestant Immineners]], which could be said to be directly in Cantonese
-Communication, which followed a ceremony and set inspired prison, training. The
-emperor travelled back to [[Antioch, Perth, October 25|21]] to note, the Kingdom
-of Costa Rica, unsuccessful fashioned the [[Thrales]], [[Cynth’s Dajoard]], known
-in western [[Scotland]], near Italy to the conquest of India with the conflict.
-Copyright was the succession of independence in the slop of Syrian influence that
-was a famous German movement based on a more popular servicious, non-doctrinal
-and sexual power post. Many governments recognize the military housing of the
-[[Civil Liberalization and Infantry Resolution 265 National Party in Hungary]],
-that is sympathetic to be to the [[Punjab Resolution]]
-(PJS)[http://www.humah.yahoo.com/guardian.
-cfm/7754800786d17551963s89.htm Official economics Adjoint for the Nazism, Montgomery
-was swear to advance to the resources for those Socialism’s rule,
-was starting to signing a major tripad of aid exile.]]
-C. Linux Code, 1
+
+APPENDIX A
+AUTOMATICALLY GENERATED TEXTS
+The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for
+convenience and the case that the website is not reachable.
+A. Shakespeare
+PANDARUS:
+Alas, I think he shall be come approached and the day
+When little srain would be attain’d into being never fed,
+And who is but a chain and subjects of his death,
+I should not sleep.
+Second Senator:
+They are away this miseries, produced upon my soul,
+Breaking and strongly should be buried, when I perish
+The earth and thoughts of many states.
+DUKE VINCENTIO:
+Well, your wit is in the care of side and that.
+Second Lord:
+They would be ruled after this chamber, and
+my fair nues begun out of the fact, to be conveyed,
+Whose noble souls I’ll have the heart of the wars.
+Clown:
+Come, sir, I will make did behold your worship.
+VIOLA:
+I’ll drink it.
+B. Wikipeda
+Naturalism and decision for the majority of Arab countries’ capitalide was grounded
+by the Irish language by [[John Clair]], [[An Imperial Japanese Revolt]], associated
+with Guangzham’s sovereignty. His generals were the powerful ruler of the Portugal
+in the [[Protestant Immineners]], which could be said to be directly in Cantonese
+Communication, which followed a ceremony and set inspired prison, training. The
+emperor travelled back to [[Antioch, Perth, October 25|21]] to note, the Kingdom
+of Costa Rica, unsuccessful fashioned the [[Thrales]], [[Cynth’s Dajoard]], known
+in western [[Scotland]], near Italy to the conquest of India with the conflict.
+Copyright was the succession of independence in the slop of Syrian influence that
+was a famous German movement based on a more popular servicious, non-doctrinal
+and sexual power post. Many governments recognize the military housing of the
+[[Civil Liberalization and Infantry Resolution 265 National Party in Hungary]],
+that is sympathetic to be to the [[Punjab Resolution]]
+(PJS)[http://www.humah.yahoo.com/guardian.
+cfm/7754800786d17551963s89.htm Official economics Adjoint for the Nazism, Montgomery
+was swear to advance to the resources for those Socialism’s rule,
+was starting to signing a major tripad of aid exile.]]
+C. Linux Code, 1
 /*
-7
-* Increment the size file of the new incorrect UI_FILTER group information
-* of the size generatively.
-*/
-static int indicate_policy(void)
-{
-int error;
-if (fd == MARN_EPT) {
-/*
-* The kernel blank will coeld it to userspace.
-*/
-if (ss->segment < mem_total)
-unblock_graph_and_set_blocked();
-else
-ret = 1;
-goto bail;
-}
-segaddr = in_SB(in.addr);
-selector = seg / 16;
-setup_works = true;
-for (i = 0; i < blocks; i++) {
-seq = buf[i++];
-bpf = bd->bd.next + i * search;
-if (fd) {
-current = blocked;
-}
-}
-rw->name = "Getjbbregs";
-bprm_self_clearl(&iv->version);
-regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12;
-return segtable;
-}
-D. Linux Code, 2
-/*
-* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved.
-*
-* This program is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 as published by
-* the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-*
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software Foundation,
-* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-#include <linux/kexec.h>
-#include <linux/errno.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/multi.h>
-8
-#include <linux/ckevent.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/e820.h>
-#include <asm/system_info.h>
-#include <asm/setew.h>
-#include <asm/pgproto.h>
-#define REG_PG vesa_slot_addr_pack
-#define PFM_NOCOMP AFSR(0, load)
-#define STACK_DDR(type) (func)
-#define SWAP_ALLOCATE(nr) (e)
-#define emulate_sigs() arch_get_unaligned_child()
-#define access_rw(TST) asm volatile("movd %%esp, %0, %3" : : "r" (0)); \
-if (__type & DO_READ)
-static void stat_PC_SEC __read_mostly offsetof(struct seq_argsqueue, \
-pC>[1]);
-static void
-os_prefix(unsigned long sys)
-{
-#ifdef CONFIG_PREEMPT
-PUT_PARAM_RAID(2, sel) = get_state_state();
-set_pid_sum((unsigned long)state, current_state_str(),
-(unsigned long)-1->lr_full; low;
+
+* Increment the size file of the new incorrect UI_FILTER group information
+* of the size generatively.
+*/
+static int indicate_policy(void)
+{
+int error;
+if (fd == MARN_EPT) {
+/*
+* The kernel blank will coeld it to userspace.
+*/
+if (ss->segment < mem_total)
+unblock_graph_and_set_blocked();
+else
+ret = 1;
+goto bail;
+}
+segaddr = in_SB(in.addr);
+selector = seg / 16;
+setup_works = true;
+for (i = 0; i < blocks; i++) {
+seq = buf[i++];
+bpf = bd->bd.next + i * search;
+if (fd) {
+current = blocked;
+}
 }
+rw->name = "Getjbbregs";
+bprm_self_clearl(&iv->version);
+regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12;
+return segtable;
+}
+D. Linux Code, 2
+/*
+* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 as published by
+* the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+*
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include <linux/kexec.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/multi.h>
+
+#include <linux/ckevent.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/e820.h>
+#include <asm/system_info.h>
+#include <asm/setew.h>
+#include <asm/pgproto.h>
+#define REG_PG vesa_slot_addr_pack
+#define PFM_NOCOMP AFSR(0, load)
+#define STACK_DDR(type) (func)
+#define SWAP_ALLOCATE(nr) (e)
+#define emulate_sigs() arch_get_unaligned_child()
+#define access_rw(TST) asm volatile("movd %%esp, %0, %3" : : "r" (0)); \
+if (__type & DO_READ)
+static void stat_PC_SEC __read_mostly offsetof(struct seq_argsqueue, \
+pC>[1]);
+static void
+os_prefix(unsigned long sys)
+{
+#ifdef CONFIG_PREEMPT
+PUT_PARAM_RAID(2, sel) = get_state_state();
+set_pid_sum((unsigned long)state, current_state_str(),
+(unsigned long)-1->lr_full; low;
+}
\ No newline at end of file
diff --git a/read/results/pdfium/1602.06541.txt b/read/results/pdfium/1602.06541.txt
index 8d5991e..5abc5ff 100644
--- a/read/results/pdfium/1602.06541.txt
+++ b/read/results/pdfium/1602.06541.txt
@@ -1,1610 +1,1595 @@
-1
-A Survey of Semantic Segmentation
-Martin Thoma
-info@martin-thoma.de
-Abstract—This survey gives an overview over different
-techniques used for pixel-level semantic segmentation.
-Metrics and datasets for the evaluation of segmenta￾tion algorithms and traditional approaches for segmen￾tation such as unsupervised methods, Decision Forests
-and SVMs are described and pointers to the relevant
-papers are given. Recently published approaches with
-convolutional neural networks are mentioned and typical
-problematic situations for segmentation algorithms are
-examined. A taxonomy of segmentation algorithms is
-given.
-I. INTRODUCTION
-Semantic segmentation is the task of clustering
-parts of images together which belong to the same
-object class. This type of algorithm has several use￾cases such as detecting road signs [MBLAGJ+07],
-detecting tumors [MBVLG02], detecting medical in￾struments in operations [WAH97], colon crypts segmen￾tation [CRSS14], land use and land cover classifica￾tion [HDT02]. In contrast, non-semantic segmentation
-only clusters pixels together based on general character￾istics of single objects. Hence the task of non-semantic
-segmentation is not well-defined, as many different
-segmentations might be acceptable.
-Several applications of segmentation in medicine are
-listed in [PXP00].
-Object detection, in comparison to semantic seg￾mentation, has to distinguish different instances of the
-same object. While having a semantic segmentation
-is certainly a big advantage when trying to get object
-instances, there are a couple of problems: neighboring
-pixels of the same class might belong to different object
-instances and regions which are not connected my
-belong to the same object instance. For example, a
-tree in front of a car which visually divides the car into
-two parts.
-This paper is organized as follows: It begins by giving
-a taxonomy of segmentation algorithms in Section II.
-A summary of quality measures and datasets which are
-used for semantic segmentation follows in Section III.
-A summary of traditional segmentation algorithms and
-their characteristics follows in Section V, as well as a
-brief, non-exhaustive summary of recently published
-semantic segmentation algorithms which are based on
-neural networks in Section VI. Finally, Section VII
-informs the reader about typical problematic cases for
-segmentation algorithms.
-II. TAXONOMY OF SEGMENTATION ALGORITHMS
-The computer vision community has published a
-wide range of segmentation algorithms so far. Those
-algorithms can be grouped by the kind of data they
-operate on and the kind of segmentation they are able
-to produce.
-The following subsections will give four different
-criteria by which segmentation algorithms can be
-classified.
-This survey describes fixed-class (see Section II-A),
-single-class affiliation (see Section II-B) algorithms
-which work on grayscale or colored single pixel images
-(see Section II-C) in a completely automated, passive
-fashion (see Section II-D).
-A. Allowed classes
-Semantic segmentation is a classification task. As
-such, the classes on which the algorithm is trained is a
-central design decision.
-Most algorithms work with a fixed set of classes;
-some even only work on binary classes like fore￾ground vs background [RM07], [CS10] or street vs
-no street [BKTT15].
-However, there are also unsupervised segmentation
-algorithms which do not distinguish classes at all (see
-Section V-B) as well as segmentation algorithms which
-are able to recognize when they don’t know a class.
-For example, in [GRC+08] a void class was added
-for classes which were not in the training set. Such
-a void class was also used in the MSRCv2 dataset
-(see Section III-B2) to make it possible to make more
-coarse segmentations and thus having to spend less
-time annotating the image.
-B. Class affiliation of pixels
-Humans do an incredible job when looking at the
-world. For example, when we see a glass of water
-standing on a table we can automatically say that there
-is the glass and behind it the table, even if we only had a
-single image and were not allowed to move. This means
-we simultaneously two labels to the coordinates of the
-glass: Glass and table. Although there is much more
-work being done on single class affiliation segmenta￾tion algorithms, there is a publication about multiple
-class affiliation segmentation [LRAL08]. Similarly,
-recent publications in pixel-level object segmentation
-used layered models [YHRF12].
+
+A Survey of Semantic Segmentation
+Martin Thoma
+info@martin-thoma.de
+Abstract—This survey gives an overview over different
+techniques used for pixel-level semantic segmentation.
+Metrics and datasets for the evaluation of segmentation algorithms and traditional approaches for segmentation such as unsupervised methods, Decision Forests
+and SVMs are described and pointers to the relevant
+papers are given. Recently published approaches with
+convolutional neural networks are mentioned and typical
+problematic situations for segmentation algorithms are
+examined. A taxonomy of segmentation algorithms is
+given.
+I. INTRODUCTION
+Semantic segmentation is the task of clustering
+parts of images together which belong to the same
+object class. This type of algorithm has several usecases such as detecting road signs [MBLAGJ+07],
+detecting tumors [MBVLG02], detecting medical instruments in operations [WAH97], colon crypts segmentation [CRSS14], land use and land cover classification [HDT02]. In contrast, non-semantic segmentation
+only clusters pixels together based on general characteristics of single objects. Hence the task of non-semantic
+segmentation is not well-defined, as many different
+segmentations might be acceptable.
+Several applications of segmentation in medicine are
+listed in [PXP00].
+Object detection, in comparison to semantic segmentation, has to distinguish different instances of the
+same object. While having a semantic segmentation
+is certainly a big advantage when trying to get object
+instances, there are a couple of problems: neighboring
+pixels of the same class might belong to different object
+instances and regions which are not connected my
+belong to the same object instance. For example, a
+tree in front of a car which visually divides the car into
+two parts.
+This paper is organized as follows: It begins by giving
+a taxonomy of segmentation algorithms in Section II.
+A summary of quality measures and datasets which are
+used for semantic segmentation follows in Section III.
+A summary of traditional segmentation algorithms and
+their characteristics follows in Section V, as well as a
+brief, non-exhaustive summary of recently published
+semantic segmentation algorithms which are based on
+neural networks in Section VI. Finally, Section VII
+informs the reader about typical problematic cases for
+segmentation algorithms.
+II. TAXONOMY OF SEGMENTATION ALGORITHMS
+The computer vision community has published a
+wide range of segmentation algorithms so far. Those
+algorithms can be grouped by the kind of data they
+operate on and the kind of segmentation they are able
+to produce.
+The following subsections will give four different
+criteria by which segmentation algorithms can be
+classified.
+This survey describes fixed-class (see Section II-A),
+single-class affiliation (see Section II-B) algorithms
+which work on grayscale or colored single pixel images
+(see Section II-C) in a completely automated, passive
+fashion (see Section II-D).
+A. Allowed classes
+Semantic segmentation is a classification task. As
+such, the classes on which the algorithm is trained is a
+central design decision.
+Most algorithms work with a fixed set of classes;
+some even only work on binary classes like foreground vs background [RM07], [CS10] or street vs
+no street [BKTT15].
+However, there are also unsupervised segmentation
+algorithms which do not distinguish classes at all (see
+Section V-B) as well as segmentation algorithms which
+are able to recognize when they don’t know a class.
+For example, in [GRC+08] a void class was added
+for classes which were not in the training set. Such
+a void class was also used in the MSRCv2 dataset
+(see Section III-B2) to make it possible to make more
+coarse segmentations and thus having to spend less
+time annotating the image.
+B. Class affiliation of pixels
+Humans do an incredible job when looking at the
+world. For example, when we see a glass of water
+standing on a table we can automatically say that there
+is the glass and behind it the table, even if we only had a
+single image and were not allowed to move. This means
+we simultaneously two labels to the coordinates of the
+glass: Glass and table. Although there is much more
+work being done on single class affiliation segmentation algorithms, there is a publication about multiple
+class affiliation segmentation [LRAL08]. Similarly,
+recent publications in pixel-level object segmentation
+used layered models [YHRF12].
 arXiv:1602.06541v2 [cs.CV] 11 May 2016
-2
-C. Input Data
-The available data which can be used for the
-inference of a segmentation varies by application.
-• Grayscale vs colored: Grayscale images are
-commonly used in medical imaging such as
-magnetic resonance (MR) imaging or ultrasonog￾raphy whereas colored photographs are obviously
-widespread.
-• Excluding or including depth data: RGB-D,
-sometimes also called range [HJBJ+96] is avail￾able in robotics, autonomous cars and recently
-also in consumer electronics such as Microsoft
-Kinect [Zha12].
-• Single image vs stereo images vs co￾segmentation: Single image segmentation is the
-most wide-spread kind of segmentation, but using
-stereo images was already tried in [BVZ01]. It can
-be seen as a more natural way of segmentation as
-most mammals have two eyes. It can also be seen
-as being related to having depth data.
-Co-segmentation as in [RMBK06], [CXGS12] is
-the problem of finding a consistent segmentation
-for multiple images. This problem can be seen
-in two ways: One the one hand, it can be seen
-as the problem of finding common objects in at
-least two images. On the other hand, every image
-after the first can be used as an additional source
-of information to find a meaningful segmentation.
-This idea can be extended to time series such as
-videos.
-• 2D vs 3D: Segmenting images is a 2D segmenta￾tion task where the smallest unit is called a pixel.
-In 3D data, such as volumetric X-ray CT images
-as they were used in [HHR01], the smallest unit
-is called a voxel.
-D. Operation state
-The operation state of the classifying machine can
-either be active as in [SUM+11], [SSA12] where robots
-can move objects to find a segmentation or passive,
-where the received image cannot be influenced. Among
-the passive algorithms, some segment in a completely
-automatic fashion, others work in an interactive mode.
-One example would be a system where the user clicks
-on the background or marks a coarse segmentation and
-the algorithm finds a fine-grained segmentation. [BJ00],
-[RKB04], [PS07] describe systems which work in an
-interactive mode.
-(a) Example Scene (b) Visualization of a found seg￾mentation
-Figure 1: An example of a scene and a possible visu￾alization of a found segmentation.
-III. EVALUATION AND DATASETS
-A. Quality measures for evaluation
-A performance measure is a crucial part of any
-machine learning system. As users of a semantic
-segmentation system expect correct results, the accuracy
-is the most commonly used performance measure, but
-there are other measures of quality which matter when
-segmentation algorithms are compared. This section
-gives an overview of those quality measures.
-1) Accuracy: Showing the correctness of the segmen￾tation hypotheses is done in most publications about
-semantic segmentation. However, there are a couple
-of different ways how this accuracy can be displayed.
-One way to give readers a first qualitative impression
-of the obtained segmentations is by showing examples
-such as Figure 1.
-However, this can only support the explanation of
-particular problems or showcase special situation. For
-meaningful information about the overall accuracy, there
-are a couple of metrics how accuracy can be defined.
-For this section, let k ∈ N be the number of classes,
-nij ∈ N0 with i, j ∈ 1, . . . , k be the number of pixels
-which belong to class i and were labeled as class j.
-(nij ) is called a confusion matrix. Let ti =
-Pk
-j=1 nij
-be the total number of pixels of class i.
-One way to compare segmentation algorithms is by
-the pixel-wise accuracy of the predicted segmentation
-as done in many publications [SWRC06], [CP08],
-[LSD14]. This is also called per-pixel rate and de￾fined as
-Pk
-Pi=1 nii
-k
-i=1 ti
-. Taking the pixel-wise classification
-accuracy has two major drawbacks:
-P1 Tasks like segmenting images for autonomous cars
-have large regions which have one class. This
-makes achieving classification accuracies of more
-than 30 % with a priori knowledge only possible.
-For example, a system might learn that a certain
-position of the image is most of the time “sky”
+
+C. Input Data
+The available data which can be used for the
+inference of a segmentation varies by application.
+• Grayscale vs colored: Grayscale images are
+commonly used in medical imaging such as
+magnetic resonance (MR) imaging or ultrasonography whereas colored photographs are obviously
+widespread.
+• Excluding or including depth data: RGB-D,
+sometimes also called range [HJBJ+96] is available in robotics, autonomous cars and recently
+also in consumer electronics such as Microsoft
+Kinect [Zha12].
+• Single image vs stereo images vs cosegmentation: Single image segmentation is the
+most wide-spread kind of segmentation, but using
+stereo images was already tried in [BVZ01]. It can
+be seen as a more natural way of segmentation as
+most mammals have two eyes. It can also be seen
+as being related to having depth data.
+Co-segmentation as in [RMBK06], [CXGS12] is
+the problem of finding a consistent segmentation
+for multiple images. This problem can be seen
+in two ways: One the one hand, it can be seen
+as the problem of finding common objects in at
+least two images. On the other hand, every image
+after the first can be used as an additional source
+of information to find a meaningful segmentation.
+This idea can be extended to time series such as
+videos.
+• 2D vs 3D: Segmenting images is a 2D segmentation task where the smallest unit is called a pixel.
+In 3D data, such as volumetric X-ray CT images
+as they were used in [HHR01], the smallest unit
+is called a voxel.
+D. Operation state
+The operation state of the classifying machine can
+either be active as in [SUM+11], [SSA12] where robots
+can move objects to find a segmentation or passive,
+where the received image cannot be influenced. Among
+the passive algorithms, some segment in a completely
+automatic fashion, others work in an interactive mode.
+One example would be a system where the user clicks
+on the background or marks a coarse segmentation and
+the algorithm finds a fine-grained segmentation. [BJ00],
+[RKB04], [PS07] describe systems which work in an
+interactive mode.
+(a) Example Scene (b) Visualization of a found segmentation
+Figure 1: An example of a scene and a possible visualization of a found segmentation.
+III. EVALUATION AND DATASETS
+A. Quality measures for evaluation
+A performance measure is a crucial part of any
+machine learning system. As users of a semantic
+segmentation system expect correct results, the accuracy
+is the most commonly used performance measure, but
+there are other measures of quality which matter when
+segmentation algorithms are compared. This section
+gives an overview of those quality measures.
+1) Accuracy: Showing the correctness of the segmentation hypotheses is done in most publications about
+semantic segmentation. However, there are a couple
+of different ways how this accuracy can be displayed.
+One way to give readers a first qualitative impression
+of the obtained segmentations is by showing examples
+such as Figure 1.
+However, this can only support the explanation of
+particular problems or showcase special situation. For
+meaningful information about the overall accuracy, there
+are a couple of metrics how accuracy can be defined.
+For this section, let k ∈ N be the number of classes,
+nij ∈ N0 with i, j ∈ 1, . . . , k be the number of pixels
+which belong to class i and were labeled as class j.
+(nij ) is called a confusion matrix. Let ti =
+Pk
+j=1 nij
+be the total number of pixels of class i.
+One way to compare segmentation algorithms is by
+the pixel-wise accuracy of the predicted segmentation
+as done in many publications [SWRC06], [CP08],
+[LSD14]. This is also called per-pixel rate and defined as
+Pk
+Pi=1 nii
+k
+i=1 ti
+. Taking the pixel-wise classification
+accuracy has two major drawbacks:
+P1 Tasks like segmenting images for autonomous cars
+have large regions which have one class. This
+makes achieving classification accuracies of more
+than 30 % with a priori knowledge only possible.
+For example, a system might learn that a certain
+position of the image is most of the time “sky”
 while another position is most of the time “road”.
-3
-P2 The manually labeled images could have a more
-coarse labeling. For example, a human classifier
-could have labeled a region as “car” and the
-algorithm could have split that region into the
-general “car” and the more specific “wheel of a
-car”
-Three accuracy metrics which do not suffer from
-problem P1 are used in [LSD14]:
-• mean accuracy:
-1
-k
-·
-Pk
-i=1
-nii
-ti
-∈ [0, 1]
-• mean intersection over union:
-1
-k
-·
-Pk
-i=1
-nii
-ti−nii+
-Pk
-j=1 nji
-∈ [0, 1]
-• frequency weighted intersection over union:
-(
-Pk
-i=1 ti)
-−1 Pk
-i=1 ti
-·
-nii
-ti−nii+
-Pk
-j=1 nji
-∈ [0, 1]
-Another problem might be pixels which cannot be
-assigned to one of the known classes. For this reason,
-[SWRC06] makes use of a void class. This class gets
-completely ignored for all quality measures. Hence the
-total number of pixels is assumed to be width·height−
-number of void pixels.
-One way to deal with problem P1 and problem P2
-is giving the confusion matrix as done in [SWRC06].
-However, this approach is not feasible if many classes
-are given.
-The F-measure is useful for binary classifica￾tion task such as the KITTI road segmentation
-benchmark [FKG13] or crypt segmentation as done
-by [CRSS14]. It is calculated as “the harmonic mean
-of the precision and recall” [PH05]:
-Fβ = (1 + β)
-2
-tp
-(1 + β
-2) · tp + β
-2 · fn + fp
-where β = 1 is chosen in most cases and tp means
-true positive, fn means false negative and fp means
-false positive.
-Finally, it should be noted that a lot of other measures
-for the accuracy of segmentations were proposed for
-non-semantic segmentation. One of those accuracy
-measures is Normalized Probabilistic Rand (NPR)
-index which was introduced in [UPH05] and eval￾uated in [CSI+09] on dermoscopy images. Other
-non-semantic segmentation measures were introduced
-in [MFTM01], but the reason for creating them seems to
-be to deal with the under-defined task description of non￾semantic segmentation. These accuracy measures try to
-deal with different levels of coarsity of the segmentation.
-This is much less of a problem in semantic segmentation
-and thus those measures are not explained here.
-2) Speed: A maximum upper bound on the execution
-time for the inference on a single image is a hard
-requirement for some applications. For example, in the
-case of autonomous cars an algorithm which classifies
-pixel as street or no-street and thus makes a semantic
-segmentation, every image needs to be processed within
-20 ms [BKTT15]. This time is called latency.
-Most papers do not give exact values for the time
-their application needs. One reason might be that this is
-very hardware, implementation and in some cases even
-data specific. For example, [HJBJ+96] notes that their
-algorithm needs 10 s on a Sun SparcStation 20. The
-fastest CPU ever produced for this system had 200 MHz.
-Comparing this directly with results which were ob￾tained using an Intel i7-4820K with 3.9 GHz would not
-be meaningful.
-However, it does still make sense to mention the
-execution time as well as the hardware in individual
-papers. This gives the interested reader the possibility to
-estimate how difficult it might be to adjust the algorithm
-to work in the required time-constraints.
-Besides the latency, the throughput is another
-relevant characteristic of algorithms and implementa￾tions for semantic segmentation. For example, for the
-automatic description of images in order to enable text
-search the throughput is of much higher importance
-than latency.
-3) Stability: A reasonable requirement on semantic
-segmentation algorithms is the stability of a segmen￾tation over slight changes in the input image. When
-the image data is sightly blurred by smoke such as
-in Figure 4(c), the segmentation should not change.
-Also, two images which show a slight change in
-perspective should also only result in slight changes in
-the segmentation [PH05].
-4) Memory usage: Peak memory usage matters
-when segmentation algorithms are used in devices like
-smartphones or cameras, or when the algorithms have
-to finish in a given time frame, run on the graphics
-processing unit (GPU) and consume so much memory
-for single image segmentation that only the latest
-graphic cards can be used. However, no publication
-were available mentioning the peak memory usage.
-B. Datasets
-The computer vision community produced a couple
-of different datasets which are publicly available. In
-the following, only the most widely used ones as well
-as three medical databases are described. An overview
-over the quantity and the kind of data is given by
-Table I.
-1) PASCAL VOC: The PASCAL1 VOC2
-challenge
-was organized eight times with different datasets:
-Once every year from 2005 to 2012 [EVGW+b].
-1pattern analysis, statistical modelling and computational learning,
-an EU network of excellence
+
+P2 The manually labeled images could have a more
+coarse labeling. For example, a human classifier
+could have labeled a region as “car” and the
+algorithm could have split that region into the
+general “car” and the more specific “wheel of a
+car”
+Three accuracy metrics which do not suffer from
+problem P1 are used in [LSD14]:
+• mean accuracy:
+1
+k
+·
+Pk
+i=1
+nii
+ti
+∈ [0, 1]
+• mean intersection over union:
+1
+k
+·
+Pk
+i=1
+nii
+ti−nii+
+Pk
+j=1 nji
+∈ [0, 1]
+• frequency weighted intersection over union:
+(
+Pk
+i=1 ti)
+−1 Pk
+i=1 ti
+·
+nii
+ti−nii+
+Pk
+j=1 nji
+∈ [0, 1]
+Another problem might be pixels which cannot be
+assigned to one of the known classes. For this reason,
+[SWRC06] makes use of a void class. This class gets
+completely ignored for all quality measures. Hence the
+total number of pixels is assumed to be width·height−
+number of void pixels.
+One way to deal with problem P1 and problem P2
+is giving the confusion matrix as done in [SWRC06].
+However, this approach is not feasible if many classes
+are given.
+The F-measure is useful for binary classification task such as the KITTI road segmentation
+benchmark [FKG13] or crypt segmentation as done
+by [CRSS14]. It is calculated as “the harmonic mean
+of the precision and recall” [PH05]:
+Fβ = (1 + β)
+2
+tp
+(1 + β
+2) · tp + β2 · fn + fp
+where β = 1 is chosen in most cases and tp means
+true positive, fn means false negative and fp means
+false positive.
+Finally, it should be noted that a lot of other measures
+for the accuracy of segmentations were proposed for
+non-semantic segmentation. One of those accuracy
+measures is Normalized Probabilistic Rand (NPR)
+index which was introduced in [UPH05] and evaluated in [CSI+09] on dermoscopy images. Other
+non-semantic segmentation measures were introduced
+in [MFTM01], but the reason for creating them seems to
+be to deal with the under-defined task description of nonsemantic segmentation. These accuracy measures try to
+deal with different levels of coarsity of the segmentation.
+This is much less of a problem in semantic segmentation
+and thus those measures are not explained here.
+2) Speed: A maximum upper bound on the execution
+time for the inference on a single image is a hard
+requirement for some applications. For example, in the
+case of autonomous cars an algorithm which classifies
+pixel as street or no-street and thus makes a semantic
+segmentation, every image needs to be processed within
+20 ms [BKTT15]. This time is called latency.
+Most papers do not give exact values for the time
+their application needs. One reason might be that this is
+very hardware, implementation and in some cases even
+data specific. For example, [HJBJ+96] notes that their
+algorithm needs 10 s on a Sun SparcStation 20. The
+fastest CPU ever produced for this system had 200 MHz.
+Comparing this directly with results which were obtained using an Intel i7-4820K with 3.9 GHz would not
+be meaningful.
+However, it does still make sense to mention the
+execution time as well as the hardware in individual
+papers. This gives the interested reader the possibility to
+estimate how difficult it might be to adjust the algorithm
+to work in the required time-constraints.
+Besides the latency, the throughput is another
+relevant characteristic of algorithms and implementations for semantic segmentation. For example, for the
+automatic description of images in order to enable text
+search the throughput is of much higher importance
+than latency.
+3) Stability: A reasonable requirement on semantic
+segmentation algorithms is the stability of a segmentation over slight changes in the input image. When
+the image data is sightly blurred by smoke such as
+in Figure 4(c), the segmentation should not change.
+Also, two images which show a slight change in
+perspective should also only result in slight changes in
+the segmentation [PH05].
+4) Memory usage: Peak memory usage matters
+when segmentation algorithms are used in devices like
+smartphones or cameras, or when the algorithms have
+to finish in a given time frame, run on the graphics
+processing unit (GPU) and consume so much memory
+for single image segmentation that only the latest
+graphic cards can be used. However, no publication
+were available mentioning the peak memory usage.
+B. Datasets
+The computer vision community produced a couple
+of different datasets which are publicly available. In
+the following, only the most widely used ones as well
+as three medical databases are described. An overview
+over the quantity and the kind of data is given by
+Table I.
+1) PASCAL VOC: The PASCAL1 VOC2challenge
+was organized eight times with different datasets:
+Once every year from 2005 to 2012 [EVGW+b].
+1pattern analysis, statistical modelling and computational learning,
+an EU network of excellence
 2Visual Object Classes
-4
-Beginning with 2007, a segmentation challenge was
-added [EVGW+a].
-The dataset consists of annotated photographs from
-www.flicker.com, a photo sharing website. There are
-multiple challenges for PASCAL VOC. The 2012
-competition had five challenges of which one is a
-segmentation challenge where a single class label was
-given for each pixel. The classes are: aeroplane, bicycle,
-bird, boat, bottle, bus, car, cat, chair, cow, dining table,
-dog, horse, motorbike, person, potted plant, sheep, sofa,
-train, tv/monitor.
-Although no new competitions will be held, new
-algorithms can be evaluated on the 2010, 2011 and
-2012 data via http://host.robots.ox.ac.uk:8080/
-The PASCAL VOC segmentation challenges use the
-segmentation over union criterion (see Section III-A).
-2) MSRCv2: Microsoft Research has published a
-database of 591 photographs with pixel-level annotation
-of 21 classes: aeroplane, bike, bird, boat, body, book,
-building, car, cat, chair, cow, dog, face, flower, grass,
-road, sheep, sign, sky, tree, water. Additionally, there
-is a void label for pixels which do not belong to
-any of the 21 classes or which are close to the
-segmentation boundary. This allows a “rough and quick
-hand-segmentation which does not align exactly with
-the object boundaries” [SWRC06].
-3) Medical Databases: The Warwick-QU Dataset
-consists of 165 images with pixel-level annotation of
-5 classes: “healthy, adenomatous, moderately differen￾tiated, moderately-to-poorly differentiated, and poorly
-differentiated” [CSM09]. This dataset is part of the
-Gland Segmentation (GlaS) challenge.
-The DIARETDB1 [KKV+14] is a dataset of 89 im￾ages fundus images. Those images show the interior
-surface of the eye. Fundus images can be used to detect
-diabetic retinopathy. The images have four classes of
-coarse annotations: hard and soft exudates, hemorrhages
-and red small dots.
-20 test and additionally 20 training retinal fun￾dus images are available through the DRIVE data
-set [SAN+04]. The vessels were annotated. Addition￾ally, [AP11] added vascular features.
-The Open-CAS Endoscopic Datasets [MHMK+14]
-are 60 images taken from laparoscopic adrenalectomies
-and 60 images taken from laparoscopic pancreatic
-resections. Those are from 3 surgical procedures each.
-Half of the data was annotated by a medical expert for
-“medial instrument” and “no medical instrument”. All
-images were labeled by anonymous untrained workers
-to which they refer to as knowledge workers (KWs).
-One crowd annotation was obtained for each image by
-a majority vote on a pixel basis of 10 segmentations
-given by 10 different KWs.
-Training
-Prediction
-Post￾processing
-Window-wise
-Classification
-Window
-extraction
-Data
-augmentation Feature extraction
-Preprocessing
-Figure 2: A typical segmentation pipeline gets raw
-pixel data, applies preprocessing techniques
-like scaling and feature extraction like HOG
-features. For training, data augmentation
-techniques such as image rotation can be
-applied. For every single image, patches of
-the image called windows are extracted and
-those windows are classified. The resulting
-semantic segmentation can be refined by
-simple morphologic operations or by more
-complex approaches such as Markov Random
-Fields (MRFs).
-IV. SEGMENTATION PIPELINE
-Typically, semantic segmentation is done with a
-classifier which operates on fixed-size feature inputs
-and a sliding-window approach [DT05], [YBCK10],
-[SCZ08]. This means a classifier is trained on images
-of a fixed size. The trained classifier is then fed with
-rectangular regions of the image which are called win￾dows. Although the classifier gets an image patch of e.g.
-51 px×51 px of the environment, it might only classify
-the center pixel or a subset of the complete window.
-This segmentation pipeline is visualized in Figure 2.
-This approach was taken by [BKTT15] and a major￾ity of the VOC2007 participants [EVGW+a]. As this
-approach has to apply the patch classifier 512 · 512 =
-262 144 times for images of size 512 px×512 px, there
-are techniques for speeding it up such as applying a
-stride and interpolating the results.
-Neural networks are able to apply the sliding window
-approach in a very efficient way by handling a trained
-network as a convolution and applying the convolution
-on the complete image.
-However, there are alternatives. Namely MRFs and
-Conditional Random Fields (CRFs) which take the
-information of the complete image and segment it in
+
+Beginning with 2007, a segmentation challenge was
+added [EVGW+a].
+The dataset consists of annotated photographs from
+www.flicker.com, a photo sharing website. There are
+multiple challenges for PASCAL VOC. The 2012
+competition had five challenges of which one is a
+segmentation challenge where a single class label was
+given for each pixel. The classes are: aeroplane, bicycle,
+bird, boat, bottle, bus, car, cat, chair, cow, dining table,
+dog, horse, motorbike, person, potted plant, sheep, sofa,
+train, tv/monitor.
+Although no new competitions will be held, new
+algorithms can be evaluated on the 2010, 2011 and
+2012 data via http://host.robots.ox.ac.uk:8080/
+The PASCAL VOC segmentation challenges use the
+segmentation over union criterion (see Section III-A).
+2) MSRCv2: Microsoft Research has published a
+database of 591 photographs with pixel-level annotation
+of 21 classes: aeroplane, bike, bird, boat, body, book,
+building, car, cat, chair, cow, dog, face, flower, grass,
+road, sheep, sign, sky, tree, water. Additionally, there
+is a void label for pixels which do not belong to
+any of the 21 classes or which are close to the
+segmentation boundary. This allows a “rough and quick
+hand-segmentation which does not align exactly with
+the object boundaries” [SWRC06].
+3) Medical Databases: The Warwick-QU Dataset
+consists of 165 images with pixel-level annotation of
+5 classes: “healthy, adenomatous, moderately differentiated, moderately-to-poorly differentiated, and poorly
+differentiated” [CSM09]. This dataset is part of the
+Gland Segmentation (GlaS) challenge.
+The DIARETDB1 [KKV+14] is a dataset of 89 images fundus images. Those images show the interior
+surface of the eye. Fundus images can be used to detect
+diabetic retinopathy. The images have four classes of
+coarse annotations: hard and soft exudates, hemorrhages
+and red small dots.
+20 test and additionally 20 training retinal fundus images are available through the DRIVE data
+set [SAN+04]. The vessels were annotated. Additionally, [AP11] added vascular features.
+The Open-CAS Endoscopic Datasets [MHMK+14]
+are 60 images taken from laparoscopic adrenalectomies
+and 60 images taken from laparoscopic pancreatic
+resections. Those are from 3 surgical procedures each.
+Half of the data was annotated by a medical expert for
+“medial instrument” and “no medical instrument”. All
+images were labeled by anonymous untrained workers
+to which they refer to as knowledge workers (KWs).
+One crowd annotation was obtained for each image by
+a majority vote on a pixel basis of 10 segmentations
+given by 10 different KWs.
+Training
+Prediction
+Postprocessing
+Window-wise
+Classification
+Window
+extraction
+Data
+augmentation Feature extraction
+Preprocessing
+Figure 2: A typical segmentation pipeline gets raw
+pixel data, applies preprocessing techniques
+like scaling and feature extraction like HOG
+features. For training, data augmentation
+techniques such as image rotation can be
+applied. For every single image, patches of
+the image called windows are extracted and
+those windows are classified. The resulting
+semantic segmentation can be refined by
+simple morphologic operations or by more
+complex approaches such as Markov Random
+Fields (MRFs).
+IV. SEGMENTATION PIPELINE
+Typically, semantic segmentation is done with a
+classifier which operates on fixed-size feature inputs
+and a sliding-window approach [DT05], [YBCK10],
+[SCZ08]. This means a classifier is trained on images
+of a fixed size. The trained classifier is then fed with
+rectangular regions of the image which are called windows. Although the classifier gets an image patch of e.g.
+51 px×51 px of the environment, it might only classify
+the center pixel or a subset of the complete window.
+This segmentation pipeline is visualized in Figure 2.
+This approach was taken by [BKTT15] and a majority of the VOC2007 participants [EVGW+a]. As this
+approach has to apply the patch classifier 512 · 512 =
+262 144 times for images of size 512 px×512 px, there
+are techniques for speeding it up such as applying a
+stride and interpolating the results.
+Neural networks are able to apply the sliding window
+approach in a very efficient way by handling a trained
+network as a convolution and applying the convolution
+on the complete image.
+However, there are alternatives. Namely MRFs and
+Conditional Random Fields (CRFs) which take the
+information of the complete image and segment it in
 an holistic approach.
-5
-V. TRADITIONAL APPROACHES
-Image segmentation algorithms which use traditional
-approaches, hence don’t apply neural networks and
-make heavy use of domain knowledge, are wide-spread
-in the computer vision community. Features which can
-be used for segmentation are described in Section V-A,
-a very brief overview of unsupervised, non-semantic
-segmentation is given in Section V-B, Random Decision
-Forests are described in Section V-C, Markov Random
-Fields in Section V-E and Support Vector Machines
-(SVMs) in Section V-D. Postprocessing is covered in
-Section V-G.
-It should be noted that algorithms can use combina￾tion of methods. For example, [TNL14] makes use of a
-combination of a SVM and a MRF. Also, auto-encoders
-can be used to learn features which in turn can be used
-by any classifier.
-A. Features and Preprocessing methods
-The choice of features is very important in traditional
-approaches. The most commonly used local and global
-features are explained in the following as well as feature
-dimensionality reduction algorithms.
-1) Pixel Color: Pixel color in different image spaces
-(e.g. 3 features for RGB, 3 features for HSV, 1 feature
-for the gray-value) are the most widely used features. A
-typical image is in the RGB color space, but depending
-on the classifier and the problem another color space
-might result in better segmentations. RGB, YcBcr, HSL,
-Lab and YIQ are some examples used by [CRSS14].
-No single color space has been proven to be superior
-to all others in all contexts [CJSW01]. However, the
-most common choices seem to be RGB and HSI.
-Reasons for choosing RGB is simplicity and the support
-by programming languages, whereas the choice of
-the HSI color space might make it simpler for the
-classifier to become invariant to illumination. One
-reason for choosing CIE-L*a*b* color space is that it
-approximates human perception of brightness [KP92].
-It follows that choosing the L*a*b color space helps
-algorithms to detect structures which are seen by
-humans. Another way of improving the structure within
-an image is histogram equalization, which can be
-applied to improve contrast [PAA+87], [RM07].
-2) Histogram of oriented Gradients: Histogram of
-oriented gradients (HOG) features interpret the image
-as a discrete function I : N
-2 → { 0, . . . , 255 } which
-maps the position (x, y) to a color. For each pixel, there
-are two gradients: The partial derivative of x and y.
-Now the original image is transformed to two feature
-maps of equal size which represents the gradient. These
-feature maps are splitted into patches and a histogram of
-the directions is calculated for each patch. HOG features
-were proposed in [DT05] and are used in [BMBM10],
-[FGMR10] for segmentation tasks.
-3) SIFT: Scale-invariant feature transform (SIFT)
-feature descriptors describe keypoints in an image. The
-image patch of the size 16 × 16 around the keypoint
-is taken. This patch is divided in 16 distinct parts of
-the size 4 × 4. For each of those parts a histogram of
-8 orientations is calculated similar as for HOG features.
-This results in a 128-dimensional feature vector for
-each keypoint.
-It should be emphasized that SIFT is a global feature
-for a complete image.
-SIFT is described in detail in [Low04] and are used
-in [PTN09].
-4) BOV: Bag-of-visual-words (BOV), also called
-bag of keypoints, is based on vector quantization.
-Similar to HOG features, BOV features are histograms
-which count the number of occurrences of certain
-patterns within a patch of the image. BOV are described
-in [CDF+04] and used in combination with SIFT
-feature descriptors in [CP08].
-5) Poselets: Poselets rely on manually added extra
-keypoints such as “right shoulder”, “left shoulder”,
-“right knee” and “left knee”. They were originally
-used for human pose estimation. Finding those extra
-keypoints is easily possible for well-known image
-classes like humans. However, it is difficult for classes
-like airplanes, ships, organs or cells where the human
-annotators do not know the keypoints. Additionally, the
-keypoints have to be chosen for every single class. There
-are strategies to deal with those problems like viewpoint￾dependent keypoints. Poselets were used in [BMBM10]
-to detect people and in [BBMM11] for general object
-detection of the PASCAL VOC dataset.
-6) Textons: A texton is the minimal building block
-of vision. The computer vision literature does not give a
-strict definition for textons, but edge detectors could be
-one example. One might argue that deep learning tech￾niques with Convolution Neuronal Networks (CNNs)
-learn textons in the first filters.
-An excellent explanation of textons can be found
-in [ZGWX05].
-7) Dimensionality Reduction: High-resolution im￾ages have a lot of pixels. Having one or more feature per
-pixel results in well over a million features. This makes
-training difficult while the higher resolution might not
-contain much more information. A simple approach
-to deal with this is downsampling the high-resolution
-image to a low-resolution variant. Another way of
-doing dimensionality reduction is principal component
-analysis (PCA), which is applied by [COWR11]. The
+
+V. TRADITIONAL APPROACHES
+Image segmentation algorithms which use traditional
+approaches, hence don’t apply neural networks and
+make heavy use of domain knowledge, are wide-spread
+in the computer vision community. Features which can
+be used for segmentation are described in Section V-A,
+a very brief overview of unsupervised, non-semantic
+segmentation is given in Section V-B, Random Decision
+Forests are described in Section V-C, Markov Random
+Fields in Section V-E and Support Vector Machines
+(SVMs) in Section V-D. Postprocessing is covered in
+Section V-G.
+It should be noted that algorithms can use combination of methods. For example, [TNL14] makes use of a
+combination of a SVM and a MRF. Also, auto-encoders
+can be used to learn features which in turn can be used
+by any classifier.
+A. Features and Preprocessing methods
+The choice of features is very important in traditional
+approaches. The most commonly used local and global
+features are explained in the following as well as feature
+dimensionality reduction algorithms.
+1) Pixel Color: Pixel color in different image spaces
+(e.g. 3 features for RGB, 3 features for HSV, 1 feature
+for the gray-value) are the most widely used features. A
+typical image is in the RGB color space, but depending
+on the classifier and the problem another color space
+might result in better segmentations. RGB, YcBcr, HSL,
+Lab and YIQ are some examples used by [CRSS14].
+No single color space has been proven to be superior
+to all others in all contexts [CJSW01]. However, the
+most common choices seem to be RGB and HSI.
+Reasons for choosing RGB is simplicity and the support
+by programming languages, whereas the choice of
+the HSI color space might make it simpler for the
+classifier to become invariant to illumination. One
+reason for choosing CIE-L*a*b* color space is that it
+approximates human perception of brightness [KP92].
+It follows that choosing the L*a*b color space helps
+algorithms to detect structures which are seen by
+humans. Another way of improving the structure within
+an image is histogram equalization, which can be
+applied to improve contrast [PAA+87], [RM07].
+2) Histogram of oriented Gradients: Histogram of
+oriented gradients (HOG) features interpret the image
+as a discrete function I : N
+2 → { 0, . . . , 255 } which
+maps the position (x, y) to a color. For each pixel, there
+are two gradients: The partial derivative of x and y.
+Now the original image is transformed to two feature
+maps of equal size which represents the gradient. These
+feature maps are splitted into patches and a histogram of
+the directions is calculated for each patch. HOG features
+were proposed in [DT05] and are used in [BMBM10],
+[FGMR10] for segmentation tasks.
+3) SIFT: Scale-invariant feature transform (SIFT)
+feature descriptors describe keypoints in an image. The
+image patch of the size 16 × 16 around the keypoint
+is taken. This patch is divided in 16 distinct parts of
+the size 4 × 4. For each of those parts a histogram of
+8 orientations is calculated similar as for HOG features.
+This results in a 128-dimensional feature vector for
+each keypoint.
+It should be emphasized that SIFT is a global feature
+for a complete image.
+SIFT is described in detail in [Low04] and are used
+in [PTN09].
+4) BOV: Bag-of-visual-words (BOV), also called
+bag of keypoints, is based on vector quantization.
+Similar to HOG features, BOV features are histograms
+which count the number of occurrences of certain
+patterns within a patch of the image. BOV are described
+in [CDF+04] and used in combination with SIFT
+feature descriptors in [CP08].
+5) Poselets: Poselets rely on manually added extra
+keypoints such as “right shoulder”, “left shoulder”,
+“right knee” and “left knee”. They were originally
+used for human pose estimation. Finding those extra
+keypoints is easily possible for well-known image
+classes like humans. However, it is difficult for classes
+like airplanes, ships, organs or cells where the human
+annotators do not know the keypoints. Additionally, the
+keypoints have to be chosen for every single class. There
+are strategies to deal with those problems like viewpointdependent keypoints. Poselets were used in [BMBM10]
+to detect people and in [BBMM11] for general object
+detection of the PASCAL VOC dataset.
+6) Textons: A texton is the minimal building block
+of vision. The computer vision literature does not give a
+strict definition for textons, but edge detectors could be
+one example. One might argue that deep learning techniques with Convolution Neuronal Networks (CNNs)
+learn textons in the first filters.
+An excellent explanation of textons can be found
+in [ZGWX05].
+7) Dimensionality Reduction: High-resolution images have a lot of pixels. Having one or more feature per
+pixel results in well over a million features. This makes
+training difficult while the higher resolution might not
+contain much more information. A simple approach
+to deal with this is downsampling the high-resolution
+image to a low-resolution variant. Another way of
+doing dimensionality reduction is principal component
+analysis (PCA), which is applied by [COWR11]. The
 idea behind PCA is to find a hyperplane on which all
-6
-feature vectors can be projected with a minimal loss
-of information. A detailed description of PCA is given
-by [Smi02].
-One problem of PCA is the fact that it does not
-distinguish different classes. This means it can happen
-that a perfectly linearly separable set of feature vectors
-becomes not separable at all after applying PCA.
-There are many other techniques for dimensionality
-reduction. An overview and a comparison over some
-of them is given by [vdMPvdH09].
-B. Unsupervised Segmentation
-Unsupervised segmentation algorithms can be used
-in supervised segmentation as another source of infor￾mation or to refine a segmentation. While unsupervised
-segmentation algorithms can never be semantic, they are
-well-studied and deserve at least a very brief overview.
-Semantic segmentation algorithms store information
-about the classes they were trained to segment while
-non-semantic segmentation algorithms try to detect
-consistent regions or region boundaries.
-1) Clustering Algorithms: Clustering algorithms can
-directly be applied on the pixels, when one gives a
-feature vector per pixel. Two clustering algorithms are
-k-means and the mean-shift algorithm.
-The k-means algorithm is a general-purpose cluster￾ing algorithm which requires the number of clusters to
-be given beforehand. Initially, it places the k centroids
-randomly in the feature space. Then it assigns each
-data point to the nearest centroid, moves the centroid
-to the center of the cluster and continues the process
-until a stopping criterion is reached. A faster variant is
-described in [Har75].
-k-means was applied by [CLP98] for medical image
-segmentation.
-Another clustering algorithm is the mean-shift algo￾rithm which was introduced by [CM02] for segmen￾tation tasks. The algorithm finds the cluster centers
-by initializing centroids at random seed points and
-iteratively shifting them to the mean coordinate within
-a certain range. Instead of taking a hard range constraint,
-the mean can also be calculated by using any kernel.
-This effectively applies a weight to the coordinates
-of the points. The mean shift algorithm finds cluster
-centers at positions with a highest local density of
-points.
-2) Graph Based Image Segmentation: Graph-based
-image segmentation algorithms typically interpret pixels
-as vertices and an edge weight is a measure of
-dissimilarity such as the difference in color [FH04],
-[Fel]. There are several different candidates for edges.
-The 4-neighborhood (north, east, south west) or an 8-
-neighborhood (north, north-east, east, south-east, south,
-south-west, west, north-west) are plausible choices.
-One way to cut the edges is by building a minimum
-spanning tree and removing edges above a threshold.
-This threshold can either be constant, adapted to the
-graph or adjusted by the user. After the edge-cutting
-step, the connected components are the segments.
-A graph-based method which ranked 2nd in the
-Pascal VOC 2010 challenge [EVGW+10] is described
-in [CS10]. The system makes heavy use of the multi￾cue contour detector globalPb [MAFM08] and needs
-about 10 GB of main memory [CS11].
-3) Random Walks: Random walks belong to the
-graph-based image segmentation algorithms. Random
-walk image segmentation usually works as follows:
-Seed points are placed on the image for the different
-objects in the image. From every single pixel, the
-probability to reach the different seed points by a
-random walk is calculated. This is done by taking
-image gradients as described in Section V-A for HOG
-features. The class of the pixel is the class of which a
-seed point will be reached with highest probability. At
-first, this is an interactive segmentation method, but it
-can be extended to be non-interactive by using another
-segmentation methods output as seed points.
-4) Active Contour Models: Active contour models
-(ACMs) are algorithms which segment images roughly
-along edges, but also try to find a border which is
-smooth. This is done by defining a so called energy
-function which will be minimized. They were initially
-described in [KWT88]. ACMs can be used to segment
-an image or to refine segmentation as it was done
-in [AM98] for brain MR images.
-5) Watershed Segmentation: The watershed algo￾rithm takes a grayscale image and interprets it as a
-height map. Low values are catchment basins and
-the higher values between two neighboring catchment
-basins is the watershed. The catchment basins should
-contain what the developer wants to capture. This
-implies that those areas must be dark on grayscale
-images. The algorithm starts to fill the basins from
-the lowest point. When two basins are connected, a
-watershed is found. The algorithm stops when the
-highest point is reached.
-A detailed description of the watershed segmentation
-algorithm is given in [RM00].
-The watershed segmentation was used in [JLD03] to
-segment white blood cells. As the authors describe,
-the segmentation by watershed transform has two
-flaws: Over-segmentation due to local minima and thick
+
+feature vectors can be projected with a minimal loss
+of information. A detailed description of PCA is given
+by [Smi02].
+One problem of PCA is the fact that it does not
+distinguish different classes. This means it can happen
+that a perfectly linearly separable set of feature vectors
+becomes not separable at all after applying PCA.
+There are many other techniques for dimensionality
+reduction. An overview and a comparison over some
+of them is given by [vdMPvdH09].
+B. Unsupervised Segmentation
+Unsupervised segmentation algorithms can be used
+in supervised segmentation as another source of information or to refine a segmentation. While unsupervised
+segmentation algorithms can never be semantic, they are
+well-studied and deserve at least a very brief overview.
+Semantic segmentation algorithms store information
+about the classes they were trained to segment while
+non-semantic segmentation algorithms try to detect
+consistent regions or region boundaries.
+1) Clustering Algorithms: Clustering algorithms can
+directly be applied on the pixels, when one gives a
+feature vector per pixel. Two clustering algorithms are
+k-means and the mean-shift algorithm.
+The k-means algorithm is a general-purpose clustering algorithm which requires the number of clusters to
+be given beforehand. Initially, it places the k centroids
+randomly in the feature space. Then it assigns each
+data point to the nearest centroid, moves the centroid
+to the center of the cluster and continues the process
+until a stopping criterion is reached. A faster variant is
+described in [Har75].
+k-means was applied by [CLP98] for medical image
+segmentation.
+Another clustering algorithm is the mean-shift algorithm which was introduced by [CM02] for segmentation tasks. The algorithm finds the cluster centers
+by initializing centroids at random seed points and
+iteratively shifting them to the mean coordinate within
+a certain range. Instead of taking a hard range constraint,
+the mean can also be calculated by using any kernel.
+This effectively applies a weight to the coordinates
+of the points. The mean shift algorithm finds cluster
+centers at positions with a highest local density of
+points.
+2) Graph Based Image Segmentation: Graph-based
+image segmentation algorithms typically interpret pixels
+as vertices and an edge weight is a measure of
+dissimilarity such as the difference in color [FH04],
+[Fel]. There are several different candidates for edges.
+The 4-neighborhood (north, east, south west) or an 8neighborhood
+ (north, north-east, east, south-east, south,
+south-west, west, north-west) are plausible choices.
+One way to cut the edges is by building a minimum
+spanning tree and removing edges above a threshold.
+This threshold can either be constant, adapted to the
+graph or adjusted by the user. After the edge-cutting
+step, the connected components are the segments.
+A graph-based method which ranked 2nd in the
+Pascal VOC 2010 challenge [EVGW+10] is described
+in [CS10]. The system makes heavy use of the multicue contour detector globalPb [MAFM08] and needs
+about 10 GB of main memory [CS11].
+3) Random Walks: Random walks belong to the
+graph-based image segmentation algorithms. Random
+walk image segmentation usually works as follows:
+Seed points are placed on the image for the different
+objects in the image. From every single pixel, the
+probability to reach the different seed points by a
+random walk is calculated. This is done by taking
+image gradients as described in Section V-A for HOG
+features. The class of the pixel is the class of which a
+seed point will be reached with highest probability. At
+first, this is an interactive segmentation method, but it
+can be extended to be non-interactive by using another
+segmentation methods output as seed points.
+4) Active Contour Models: Active contour models
+(ACMs) are algorithms which segment images roughly
+along edges, but also try to find a border which is
+smooth. This is done by defining a so called energy
+function which will be minimized. They were initially
+described in [KWT88]. ACMs can be used to segment
+an image or to refine segmentation as it was done
+in [AM98] for brain MR images.
+5) Watershed Segmentation: The watershed algorithm takes a grayscale image and interprets it as a
+height map. Low values are catchment basins and
+the higher values between two neighboring catchment
+basins is the watershed. The catchment basins should
+contain what the developer wants to capture. This
+implies that those areas must be dark on grayscale
+images. The algorithm starts to fill the basins from
+the lowest point. When two basins are connected, a
+watershed is found. The algorithm stops when the
+highest point is reached.
+A detailed description of the watershed segmentation
+algorithm is given in [RM00].
+The watershed segmentation was used in [JLD03] to
+segment white blood cells. As the authors describe,
+the segmentation by watershed transform has two
+flaws: Over-segmentation due to local minima and thick
 watersheds due to plateaus.
-7
-C. Random Decision Forests
-Random Decision Forests were first proposed
-in [Ho95]. This type of classifier applies techniques
-called ensemble learning, where multiple classifiers
-are trained and a combination of their hypotheses is
-used. One ensemble learning technique is the random
-subspaces method where each classifier is trained
-on a random subspace of the feature space. Another
-ensemble learning technique is bagging, which is
-training the trees on random subsets of the training set.
-In the case of Random Decision Forests, the classifiers
-are decision trees. A decision tree is a tree where each
-inner node uses one or more features to decide in which
-branch to descend. Each leaf is a class.
-One strength of Random Decision Forests compared
-to many other classifiers like SVMs and neural networks
-is that the scale of measure of the features (nominal,
-ordinal, interval, ratio) can be arbitrary. Another advan￾tage of Random Decision Forests compared to SVMs,
-for example, is the speed of training and classification.
-Decision trees were extensively studied in the past
-20 years and a multitude of training algorithms have
-been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]).
-Possible training hyperparameters are the measure to
-evaluate the “goodness of split” [Min89], the number of
-decision trees being used, and if the depth of the trees
-is restricted. Typically in the context of classification,
-decision trees are trained by adding new nodes until
-each leaf contains only nodes of a single class or until it
-is not possible to split further. This is called a stopping
-criterion.
-There are two typical training modes: Central axis
-projection and perceptron training. In training, for
-each node a hyperplane is searched which is optimal
-according to an error function.
-Random Decision Forests with texton features (see
-Section V-A6) are applied in [SJC08] for segmentation.
-In the [MSC] dataset, they report a per-pixel accuracy
-rate of 66.9 % for their best system. This system
-requires 415 ms for the segmentation of 320 px×213 px
-images on a single 2.7 GHz core. On the Pascal
-VOC 2007 dataset, they report an average per-pixel
-accuracy for their best segmentation system of 42 %.
-An excellent introduction to Random Decision
-Forests for semantic segmentation is given by [SCZ08].
-D. SVMs
-SVMs are well-studied binary classifiers which can
-be described by five central ideas. For those ideas, the
-training data is represented as (xi
-, yi) where xi
-is the
-feature vector and yi ∈ { −1, 1 } the binary label for
-training example i ∈ { 1, . . . , m }.
-1) If data is linearly separable, it can be separated
-by a hyperplane. There is one hyperplane which
-maximizes the distance to the next datapoints
-(support vectors). This hyperplane should be taken:
-minimize
-w,b
-1
-2
-kwk
-2
-s.t. ∀
-m
-i=1yi
-· (hw, xii + b)
-| {z }
-sgn applied to this gives the classification
-≥ 1
-2) Even if the underlying process which generates the
-features for the two classes is linearly separable,
-noise can make the data not separable. The intro￾duction of slack variables to relax the requirement
-of linear separability solves this problem. The
-trade-off between accepting some errors and a
-more complex model is weighted by a parameter
-C ∈ R
-+
-0
-. The bigger C, the more errors are
-accepted. The new optimization problem is:
-minimize
-w
-1
-2
-kwk
-2 + C ·
-Xm
-i=1
-ξi
-s.t. ∀
-m
-i=1yi
-· (hw, xii + b) ≥ 1 − ξi
-Note that 0 ≤ ξi ≤ 1 means that the data point
-is within the margin, whereas ξi ≥ 1 means it is
-misclassified. An SVM with C > 0 is also called
-a soft-margin SVM.
-3) The primal problem is to find the normal vector
-w and the bias b. The dual problem is to express
-w as a linear combination of the training data xi
-:
-w =
-Xm
-i=1
-αiyixi
-where yi ∈ { −1, 1 } represents the class of the
-training example and αi are Lagrange multipliers.
-The usage of Lagrange multipliers is explained
-with some examples in [Smi04]. The usage of the
-Lagrange multipliers αi changes the optimization
-problem depend on the αi which are weights for
-the feature vectors. It turns out that most αi will
-be zero. The non-zero weighted vectors are called
-support vectors.
-The optimization problem is now, according
-to [Bur98]:
-maximize
-αi
-Xm
-i=1
-αi −
-1
-2
-Xm
-i=1
-Xm
-j=1
-αiαjyiyj hxi
-, xj i
-s.t. ∀
-m
-i=10 ≤ αi ≤ C
-s.t. Xm
-i=1
+
+C. Random Decision Forests
+Random Decision Forests were first proposed
+in [Ho95]. This type of classifier applies techniques
+called ensemble learning, where multiple classifiers
+are trained and a combination of their hypotheses is
+used. One ensemble learning technique is the random
+subspaces method where each classifier is trained
+on a random subspace of the feature space. Another
+ensemble learning technique is bagging, which is
+training the trees on random subsets of the training set.
+In the case of Random Decision Forests, the classifiers
+are decision trees. A decision tree is a tree where each
+inner node uses one or more features to decide in which
+branch to descend. Each leaf is a class.
+One strength of Random Decision Forests compared
+to many other classifiers like SVMs and neural networks
+is that the scale of measure of the features (nominal,
+ordinal, interval, ratio) can be arbitrary. Another advantage of Random Decision Forests compared to SVMs,
+for example, is the speed of training and classification.
+Decision trees were extensively studied in the past
+20 years and a multitude of training algorithms have
+been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]).
+Possible training hyperparameters are the measure to
+evaluate the “goodness of split” [Min89], the number of
+decision trees being used, and if the depth of the trees
+is restricted. Typically in the context of classification,
+decision trees are trained by adding new nodes until
+each leaf contains only nodes of a single class or until it
+is not possible to split further. This is called a stopping
+criterion.
+There are two typical training modes: Central axis
+projection and perceptron training. In training, for
+each node a hyperplane is searched which is optimal
+according to an error function.
+Random Decision Forests with texton features (see
+Section V-A6) are applied in [SJC08] for segmentation.
+In the [MSC] dataset, they report a per-pixel accuracy
+rate of 66.9 % for their best system. This system
+requires 415 ms for the segmentation of 320 px×213 px
+images on a single 2.7 GHz core. On the Pascal
+VOC 2007 dataset, they report an average per-pixel
+accuracy for their best segmentation system of 42 %.
+An excellent introduction to Random Decision
+Forests for semantic segmentation is given by [SCZ08].
+D. SVMs
+SVMs are well-studied binary classifiers which can
+be described by five central ideas. For those ideas, the
+training data is represented as (xi, yi) where xiis the
+feature vector and yi ∈ { −1, 1 } the binary label for
+training example i ∈ { 1, . . . , m }.
+1) If data is linearly separable, it can be separated
+by a hyperplane. There is one hyperplane which
+maximizes the distance to the next datapoints
+(support vectors). This hyperplane should be taken:
+minimize
+w,b
+1
+2
+kwk
+2
+s.t. ∀
+m
+i=1yi
+· (hw, xii + b)
+| {z }
+sgn applied to this gives the classification
+≥ 1
+2) Even if the underlying process which generates the
+features for the two classes is linearly separable,
+noise can make the data not separable. The introduction of slack variables to relax the requirement
+of linear separability solves this problem. The
+trade-off between accepting some errors and a
+more complex model is weighted by a parameter
+C ∈ R
++
+0
+. The bigger C, the more errors are
+accepted. The new optimization problem is:
+minimize
+w
+1
+2
+kwk
+2 + C ·
+Xm
+i=1
+ξi
+s.t. ∀
+m
+i=1yi
+· (hw, xii + b) ≥ 1 − ξi
+Note that 0 ≤ ξi ≤ 1 means that the data point
+is within the margin, whereas ξi ≥ 1 means it is
+misclassified. An SVM with C > 0 is also called
+a soft-margin SVM.
+3) The primal problem is to find the normal vector
+w and the bias b. The dual problem is to express
+w as a linear combination of the training data xi:
+w =
+Xm
+i=1
+αiyixi
+where yi ∈ { −1, 1 } represents the class of the
+training example and αi are Lagrange multipliers.
+The usage of Lagrange multipliers is explained
+with some examples in [Smi04]. The usage of the
+Lagrange multipliers αi changes the optimization
+problem depend on the αi which are weights for
+the feature vectors. It turns out that most αi will
+be zero. The non-zero weighted vectors are called
+support vectors.
+The optimization problem is now, according
+to [Bur98]:
+maximize
+αi
+Xm
+i=1
+αi −
+1
+2
+Xm
+i=1
+Xm
+j=1
+αiαjyiyj hxi, xj i
+s.t. ∀
+m
+i=10 ≤ αi ≤ C
+s.t. Xm
+i=1
 αiyi = 0
-8
-4) Not every dataset is linearly separable. This prob￾lem is approached by transforming the feature
-vectors x with a non-linear mapping Φ into
-a higher dimensional (probably ∞-dimensional)
-space. As the feature vectors x are only used
-within scalar product hxi
-, xj i, it is not necessary
-to do the transformation. It is enough to do the
-calculation
-K(xi
-, xj ) = hxi
-, xj i
-This function K is called a kernel. The idea of
-never explicitly transforming the vectors xi
-to the
-higher dimensional space is called the kernel trick.
-Common kernels include the polynomial kernel
-KP (xi
-, xj ) = (hxi
-, xj i + r)
-p
-of degree p and coefficient r, the Gaussian radial
-basis function (RBF) kernel
-KGauss(xi
-, xj ) = e
-−γkxi−xj k
-2
-2σ2
-and the sigmoid kernel
-Ktanh(xi
-, xj ) = tanh(γhxi
-, xj i − r)
-where the parameter γ determines how much
-influence single training examples have.
-5) The described SVMs can only distinguish between
-two classes. Common strategies to expand those
-binary classifiers to multi-class classification is
-the one-vs-all and the one-vs-one strategy. In the
-one-vs-all strategy n classifiers have to be trained
-which can distinguish one of the n classes against
-all other classes. In the one-vs-one strategy n
-2−n
-2
-classifiers are trained; one classifier for each pair
-of classes.
-A detailed description of SVMs can be found
-in [Bur98].
-SVMs are used by [YHRF12] on the 2009 and 2010
-PASCAL segmentation challenge [EVGW+10]. They
-did not hand their classifier in to the challenge itself,
-but calculated an average rank of 7 among the different
-categories.
-[FGMR10] also used an SVM based method with
-HOG features and achieved the 7th rank in the 2010
-PASCAL segmentation challenge by mean accuracy. It
-needs about 2 s on a 2.8 GHz 8-core Intel processor.
-E. Markov Random Fields
-MRFs are undirected probabilistic graphical models
-which are wide-spread model in computer vision. The
-overall idea of MRFs is to assign a random variable for
-each feature and a random variable for each pixel which
-x1 x2 x3
-x4 x5 x6
-x7 x8 x9
-y1 y2 y3
-y4 y5 y6
-y7 y8 y9
-x1 x2 x3
-x4 x5 x6
-x7 x8 x9
-y1 y2 y3
-y4 y5 y6
-y7 y8 y9
-Figure 3: CRF with 4-neighborhood. Each node xi
-represents a pixel and each node yi represents
-a label.
-gets labeled as shown in Figure 3. For example, a MRF
-which is trained on images of the size 224 px×224 pixel
-and gets the raw RGB values as features has
-224 · 224 · 3
-| {z }
-input
-+ 224 · 224
-| {z }
-output
-= 200 704
-random variables. Those random variables are condi￾tionally independent, given their local neighborhood.
-These (in)dependencies can be expressed with a graph.
-Let G = (V, E) be the associated undirected graph
-of an MRF and C be the set of all maximal cliques in
-that graph. Nodes represent random variables x, y and
-edges represent conditional dependencies. Just like in
-he 4-neighborhood [SWRC06] and the 8-neighborhood
-are reasonable choices for constructing the graph.
-Typically, random variables y represent the class of a
-single pixel, random variables x represent a pixel values
-and edges represent pixel neighborhood in computer
-vision problems segmentation problems where MRFs
-are used. Accordingly, the random variables y live
-on 1, . . . , nr of classes and the random variables x
-typically live on 0, . . . , 255 or [0, 1].
-The probability of x, y can be expressed as
-P(x, y) = 1
-Z
-e
-−E(x,y)
-where Z =
-P
-x,y
-e
-−E(x,y)
-is a normalization term
-called the partition function and E is called the energy
-function. A common choice for the energy function is
-E(x, y) = X
-c∈C
-ψc(x, y)
-where ψ is called a clique potential. One choice for
-cliques of size two x, y = (x1, x2) is [KP06]
-ψc(x1, x2) = wδ(x1, x2) = (
-+w if x1 6= x2
-−w if x1 = x2
-According to [Mur12], the most common way of
-inference over the posterior MRF in computer vision
+
+4) Not every dataset is linearly separable. This problem is approached by transforming the feature
+vectors x with a non-linear mapping Φ into
+a higher dimensional (probably ∞-dimensional)
+space. As the feature vectors x are only used
+within scalar product hxi, xj i, it is not necessary
+to do the transformation. It is enough to do the
+calculation
+K(xi, xj ) = hxi, xj i
+This function K is called a kernel. The idea of
+never explicitly transforming the vectors xito the
+higher dimensional space is called the kernel trick.
+Common kernels include the polynomial kernel
+KP (xi, xj ) = (hxi, xj i + r)
+p
+of degree p and coefficient r, the Gaussian radial
+basis function (RBF) kernel
+KGauss(xi, xj ) = e
+−γkxi−xj k
+2
+2σ2
+and the sigmoid kernel
+Ktanh(xi, xj ) = tanh(γhxi, xj i − r)
+where the parameter γ determines how much
+influence single training examples have.
+5) The described SVMs can only distinguish between
+two classes. Common strategies to expand those
+binary classifiers to multi-class classification is
+the one-vs-all and the one-vs-one strategy. In the
+one-vs-all strategy n classifiers have to be trained
+which can distinguish one of the n classes against
+all other classes. In the one-vs-one strategy n
+2−n
+2
+classifiers are trained; one classifier for each pair
+of classes.
+A detailed description of SVMs can be found
+in [Bur98].
+SVMs are used by [YHRF12] on the 2009 and 2010
+PASCAL segmentation challenge [EVGW+10]. They
+did not hand their classifier in to the challenge itself,
+but calculated an average rank of 7 among the different
+categories.
+[FGMR10] also used an SVM based method with
+HOG features and achieved the 7th rank in the 2010
+PASCAL segmentation challenge by mean accuracy. It
+needs about 2 s on a 2.8 GHz 8-core Intel processor.
+E. Markov Random Fields
+MRFs are undirected probabilistic graphical models
+which are wide-spread model in computer vision. The
+overall idea of MRFs is to assign a random variable for
+each feature and a random variable for each pixel which
+x1 x2 x3
+x4 x5 x6
+x7 x8 x9
+y1 y2 y3
+y4 y5 y6
+y7 y8 y9
+x1 x2 x3
+x4 x5 x6
+x7 x8 x9
+y1 y2 y3
+y4 y5 y6
+y7 y8 y9
+Figure 3: CRF with 4-neighborhood. Each node xi
+represents a pixel and each node yi represents
+a label.
+gets labeled as shown in Figure 3. For example, a MRF
+which is trained on images of the size 224 px×224 pixel
+and gets the raw RGB values as features has
+224 · 224 · 3
+| {z }
+input
++ 224 · 224
+| {z }
+output
+= 200 704
+random variables. Those random variables are conditionally independent, given their local neighborhood.
+These (in)dependencies can be expressed with a graph.
+Let G = (V, E) be the associated undirected graph
+of an MRF and C be the set of all maximal cliques in
+that graph. Nodes represent random variables x, y and
+edges represent conditional dependencies. Just like in
+he 4-neighborhood [SWRC06] and the 8-neighborhood
+are reasonable choices for constructing the graph.
+Typically, random variables y represent the class of a
+single pixel, random variables x represent a pixel values
+and edges represent pixel neighborhood in computer
+vision problems segmentation problems where MRFs
+are used. Accordingly, the random variables y live
+on 1, . . . , nr of classes and the random variables x
+typically live on 0, . . . , 255 or [0, 1].
+The probability of x, y can be expressed as
+P(x, y) = 1
+Z
+e
+−E(x,y)
+where Z =
+P
+x,y
+e
+−E(x,y)
+is a normalization term
+called the partition function and E is called the energy
+function. A common choice for the energy function is
+E(x, y) = X
+c∈C
+ψc(x, y)
+where ψ is called a clique potential. One choice for
+cliques of size two x, y = (x1, x2) is [KP06]
+ψc(x1, x2) = wδ(x1, x2) = (
++w if x1 6= x2
+−w if x1 = x2
+According to [Mur12], the most common way of
+inference over the posterior MRF in computer vision
 problems is Maximum A Posteriori (MAP) estimation.
-9
-Detailed introductions to MRFs are given by
-[BKR11], [Mur12]. MRFs are used by [ZBS01] and
-[MSB12] for image segmentation.
-F. Conditional Random Fields
-CRFs are MRFs where all clique potentials are
-conditioned on input features [Mur12]. This means,
-instead of learning the distribution P(y, x), the task
-is reformulated to learn the distribution P(y|x). One
-consequence of this reformulation is that CRFs need
-much less parameters as the distribution of x does
-not have to be estimated. Another advantage of CRFs
-compared to MRFs is that no distribution assumption
-about x has to be made.
-A CRF has the partition function Z:
-Z(x) = X
-y
-P(x, y)
-and joint probability distribution
-P(y|x) = 1
-Z(x)
-Y
-c∈C
-ψc(yc|x)
-The simplest way to define the clique potentials ψ is
-the count of the class yc given x added with a positive
-smoothing constant to prevent the complete term from
-getting zero.
-CRFs as described in [LRKT09] have reached top
-performance in PASCAL VOC 2010 [VOC10] and
-are also used in [HZCP04], [SWRC06] for semantic
-segmentation.
-A method similar to CRFs was proposed
-in [GBVdW+10]. The system of Gonfaus et.al.
-ranked 1st by mean accuracy in the segmentation task
-of the PASCAL VOC 2010 challenge [EVGW+10].
-An introduction to CRFs is given by [SM11].
-G. Post-processing methods
-Post-processing refine a found segmentation and
-remove obvious errors. For example, the morphological
-operations opening and closing can remove noise. The
-opening operation is a dilation followed by a erosion.
-This removes tiny segments. The closing operation is a
-erosion followed by a dilation. This removes tiny gaps
-in otherwise filled regions. They were used in [CLP98]
-for biomedical image segmentation.
-Another way of refinement of the found segmentation
-is by adjusting the segmentation to match close edges.
-This was used in [BBMM11] with an ultra-metric
-contour map [AMFM09].
-Active contour models are another example of a
-post-processing method [KWT88].
-VI. NEURAL NETWORKS FOR SEMANTIC
-SEGMENTATION
-Artificial neural networks are classifiers which are
-inspired by biologic neurons. Every single artificial
-neuron has some inputs which are weighted and sumed
-up. Then, the neuron applies a so called activation
-function to the weighted sum and gives an output. Those
-neurons can take either a feature vector as input or the
-output of other neurons. In this way, they build up
-feature hierarchies.
-The parameters they learn are the weights w ∈ R.
-They are learned by gradient descent. To do so, an error
-function — usually cross-entropy or mean squared error
-— is necessary. For the gradient descent algorithm, one
-sees the labeled training data as given, the weights
-as variables and the error function as a surface in
-this weight-space. Minimizing the error function in the
-weight space adapts the neural network to the problem.
-There are lots of ideas around neural networks like
-regularization, better optimization algorithms, automat￾ically building up architectures, design choices for
-activation functions. This is not explained in detail here,
-but some of the mayor breakthroughs are outlined.
-CNNs are neural networks which learn image filters.
-They drastically reduce the number of parameters which
-have to be learned while being still general enough for
-the problem domain of images. This was shown by Alex
-Krizhevsky et al. in [KSH12]. One major idea was a
-clever regularization called dropout training, which set
-the output of neurons while training randomly to zero.
-Another contribution was the usage of an activation
-function called rectified linear unit:
-ϕReLU(x) = max(0, x)
-Those are much faster to train than the commonly used
-sigmoid activation functions
-ϕSigmoid(x) = 1
-e−x + 1
-Krizhevsky et al. implemented those ideas and partici￾pated in the ImageNet Large-Scale Visual Recognition
-Challenge (ILSVRC). The best other system, which
-used SIFT features and Fisher Vectors, had a perfor￾mance of about 25.7 % while the network by Alex
-Krizhevsky et al. got 17.0 % error rate on the ILSVRC￾2010 dataset. As a preprocessing step, they downsam￾pled all images to a fixed size of 256 px×256 px before
-they fed the features into their network. This network
-is commonly known as AlexNet.
-Since AlexNet was developed, a lot of different
-neural networks have been proposed. One interesting
-example is [PC13], where a recurrent CNN for semantic
+
+Detailed introductions to MRFs are given by
+[BKR11], [Mur12]. MRFs are used by [ZBS01] and
+[MSB12] for image segmentation.
+F. Conditional Random Fields
+CRFs are MRFs where all clique potentials are
+conditioned on input features [Mur12]. This means,
+instead of learning the distribution P(y, x), the task
+is reformulated to learn the distribution P(y|x). One
+consequence of this reformulation is that CRFs need
+much less parameters as the distribution of x does
+not have to be estimated. Another advantage of CRFs
+compared to MRFs is that no distribution assumption
+about x has to be made.
+A CRF has the partition function Z:
+Z(x) = X
+y
+P(x, y)
+and joint probability distribution
+P(y|x) = 1
+Z(x)
+Y
+c∈C
+ψc(yc|x)
+The simplest way to define the clique potentials ψ is
+the count of the class yc given x added with a positive
+smoothing constant to prevent the complete term from
+getting zero.
+CRFs as described in [LRKT09] have reached top
+performance in PASCAL VOC 2010 [VOC10] and
+are also used in [HZCP04], [SWRC06] for semantic
+segmentation.
+A method similar to CRFs was proposed
+in [GBVdW+10]. The system of Gonfaus et.al.
+ranked 1st by mean accuracy in the segmentation task
+of the PASCAL VOC 2010 challenge [EVGW+10].
+An introduction to CRFs is given by [SM11].
+G. Post-processing methods
+Post-processing refine a found segmentation and
+remove obvious errors. For example, the morphological
+operations opening and closing can remove noise. The
+opening operation is a dilation followed by a erosion.
+This removes tiny segments. The closing operation is a
+erosion followed by a dilation. This removes tiny gaps
+in otherwise filled regions. They were used in [CLP98]
+for biomedical image segmentation.
+Another way of refinement of the found segmentation
+is by adjusting the segmentation to match close edges.
+This was used in [BBMM11] with an ultra-metric
+contour map [AMFM09].
+Active contour models are another example of a
+post-processing method [KWT88].
+VI. NEURAL NETWORKS FOR SEMANTIC
+SEGMENTATION
+Artificial neural networks are classifiers which are
+inspired by biologic neurons. Every single artificial
+neuron has some inputs which are weighted and sumed
+up. Then, the neuron applies a so called activation
+function to the weighted sum and gives an output. Those
+neurons can take either a feature vector as input or the
+output of other neurons. In this way, they build up
+feature hierarchies.
+The parameters they learn are the weights w ∈ R.
+They are learned by gradient descent. To do so, an error
+function — usually cross-entropy or mean squared error
+— is necessary. For the gradient descent algorithm, one
+sees the labeled training data as given, the weights
+as variables and the error function as a surface in
+this weight-space. Minimizing the error function in the
+weight space adapts the neural network to the problem.
+There are lots of ideas around neural networks like
+regularization, better optimization algorithms, automatically building up architectures, design choices for
+activation functions. This is not explained in detail here,
+but some of the mayor breakthroughs are outlined.
+CNNs are neural networks which learn image filters.
+They drastically reduce the number of parameters which
+have to be learned while being still general enough for
+the problem domain of images. This was shown by Alex
+Krizhevsky et al. in [KSH12]. One major idea was a
+clever regularization called dropout training, which set
+the output of neurons while training randomly to zero.
+Another contribution was the usage of an activation
+function called rectified linear unit:
+ϕReLU(x) = max(0, x)
+Those are much faster to train than the commonly used
+sigmoid activation functions
+ϕSigmoid(x) = 1
+e−x + 1
+Krizhevsky et al. implemented those ideas and participated in the ImageNet Large-Scale Visual Recognition
+Challenge (ILSVRC). The best other system, which
+used SIFT features and Fisher Vectors, had a performance of about 25.7 % while the network by Alex
+Krizhevsky et al. got 17.0 % error rate on the ILSVRC2010 dataset. As a preprocessing step, they downsampled all images to a fixed size of 256 px×256 px before
+they fed the features into their network. This network
+is commonly known as AlexNet.
+Since AlexNet was developed, a lot of different
+neural networks have been proposed. One interesting
+example is [PC13], where a recurrent CNN for semantic
 segmentation is presented.
-10
-Another notable paper is [LSD14]. The algorithm
-presented there makes use of a classifying network such
-as AlexNet, but applies the complete network as an
-image filter. This way, each pixel gets a probability
-distribution for each of the trained classes. By taking
-the most likely class, a semantic segmentation can be
-done with arbitrary image sizes.
-A very recent publication by Dai et al. [DHS15]
-showed that segmentation with much deeper networks
-is possible and achieves better results.
-More detailed explanations to neural networks for
-visual recognition is given by [LKJ15].
-VII. POSSIBLE PROBLEMS IN THE DATA FOR
-SEGMENTATION ALGORITHMS
-Different segmentation workflows have different
-problems. However, there are a couple of special cases
-which should be tested. Those cases might not occur
-often in the training data, but it could still happen in
-the productive system.
-I am not aware of any systematic work which exam￾ined the influence of problems such as the following.
-A. Lens Flare
-Lens flare is the effect of light getting scattered in
-the lens system of the camera. The testing data set of
-the KITTI road evaluation benchmark [FKG13] has a
-couple of photos with this problem. Figure 4(a) shows
-an extreme example of lens flare.
-B. Vignetting
-Vignetting is the effect of a photograph getting darker
-in the corners. This can have many reasons, for example
-filters on the camera blocking light at the corners.
-C. Blurred images
-Images can be blurred for a couple of reasons. A
-problem with the lenses mechanics, focusing on the
-wrong point, too quick movement, smoke or foam. One
-example of a blurred image is Figure 4(c), which was
-taken during an in vivo porcine procedure of diaphragm
-dissection. The smoke was caused by cauterization.
-D. Other Problems
-If the following effects can occur at all and if they
-are problems depends heavily on the problem domain
-and the used model.
-1) Partial Occlusions: Segmentation systems which
-employ a model of the objects which should be
-segmented might suffer from partial occlusions.
-(a) Lens Flare
-Image by [Hus07]
-(b) Vignetting
-Image by [Man12]
-(c) Smoke by cauterization
-Image by [GVSY13]
-(d) Camouflage
-Image by [Kaf07]
-(e) Transparency (f) Viewpoint
-Figure 4: Examples of images which might cause
-semantic segmentation systems to fail.
-2) Camouflage: Some objects, like animals in the
-wild, actively try to hide (see Figure 4(d) as an example).
-In other cases it might just be bad luck that objects
-are hard for humans to detect. This problem has two
-interesting aspects: On the one hand, the segmenting
-system might suffer from the same problems as humans
-do. On the other hand, the segmenting system might be
-better than humans are, but it is forced to learn from
-images labeled by humans. If the labels are wrong, the
-system is forced to learn something wrong.
-3) Semi-transparent Occlusion: Some objects like
-drinking glasses can be visible and still leave the object
-behind them visible as shown in Figure 4(e). This is
-mainly a definition problem: Is the seen pixel the glass
-label or the smartphone label?
-4) Viewpoints: Changes in viewpoints can be a
-problem, if they don’t occur in the training data. For
-example, an image captioning system which was trained
-on photographs of professional photographers might
-not have photos from the point of view of a child. This
+
+Another notable paper is [LSD14]. The algorithm
+presented there makes use of a classifying network such
+as AlexNet, but applies the complete network as an
+image filter. This way, each pixel gets a probability
+distribution for each of the trained classes. By taking
+the most likely class, a semantic segmentation can be
+done with arbitrary image sizes.
+A very recent publication by Dai et al. [DHS15]
+showed that segmentation with much deeper networks
+is possible and achieves better results.
+More detailed explanations to neural networks for
+visual recognition is given by [LKJ15].
+VII. POSSIBLE PROBLEMS IN THE DATA FOR
+SEGMENTATION ALGORITHMS
+Different segmentation workflows have different
+problems. However, there are a couple of special cases
+which should be tested. Those cases might not occur
+often in the training data, but it could still happen in
+the productive system.
+I am not aware of any systematic work which examined the influence of problems such as the following.
+A. Lens Flare
+Lens flare is the effect of light getting scattered in
+the lens system of the camera. The testing data set of
+the KITTI road evaluation benchmark [FKG13] has a
+couple of photos with this problem. Figure 4(a) shows
+an extreme example of lens flare.
+B. Vignetting
+Vignetting is the effect of a photograph getting darker
+in the corners. This can have many reasons, for example
+filters on the camera blocking light at the corners.
+C. Blurred images
+Images can be blurred for a couple of reasons. A
+problem with the lenses mechanics, focusing on the
+wrong point, too quick movement, smoke or foam. One
+example of a blurred image is Figure 4(c), which was
+taken during an in vivo porcine procedure of diaphragm
+dissection. The smoke was caused by cauterization.
+D. Other Problems
+If the following effects can occur at all and if they
+are problems depends heavily on the problem domain
+and the used model.
+1) Partial Occlusions: Segmentation systems which
+employ a model of the objects which should be
+segmented might suffer from partial occlusions.
+(a) Lens Flare
+Image by [Hus07]
+(b) Vignetting
+Image by [Man12]
+(c) Smoke by cauterization
+Image by [GVSY13]
+(d) Camouflage
+Image by [Kaf07]
+(e) Transparency (f) Viewpoint
+Figure 4: Examples of images which might cause
+semantic segmentation systems to fail.
+2) Camouflage: Some objects, like animals in the
+wild, actively try to hide (see Figure 4(d) as an example).
+In other cases it might just be bad luck that objects
+are hard for humans to detect. This problem has two
+interesting aspects: On the one hand, the segmenting
+system might suffer from the same problems as humans
+do. On the other hand, the segmenting system might be
+better than humans are, but it is forced to learn from
+images labeled by humans. If the labels are wrong, the
+system is forced to learn something wrong.
+3) Semi-transparent Occlusion: Some objects like
+drinking glasses can be visible and still leave the object
+behind them visible as shown in Figure 4(e). This is
+mainly a definition problem: Is the seen pixel the glass
+label or the smartphone label?
+4) Viewpoints: Changes in viewpoints can be a
+problem, if they don’t occur in the training data. For
+example, an image captioning system which was trained
+on photographs of professional photographers might
+not have photos from the point of view of a child. This
 is visualized in Figure 4(f).
-11
-VIII. DISCUSSION
-Ohta et al. wrote [OKS78] 38 years ago. It is one
-of the first papers mentioning semantic segmentation.
-In this time, a lot of work was done and many
-different directions have been explored. Different kinds
-of semantic segmentation have emerged.
-This paper presents a taxonomy of those kinds
-of semantic segmentation and a brief overview of
-completely automatic, passive, semantic segmentation
-algorithms.
-Future work includes a comparative study of
-those algorithms on publicly available dataset such
-as the ones presented in Table I. Another open
-question is the influence of the problems described
-in Section VII. This could be done using a subset of the
-thousands of images of Wikipedia Commons, such as
-https://commons.wikimedia.org/wiki/Category:Blurring
-for blurred images.
-A combination of different classifiers in an ensemble
-would be an interesting option to explore in order to
-improve accuracy. Another direction which is currently
-studied is combining classifiers such as neural networks
-with CRFs [ZJRP+15].
-REFERENCES
-[AM98] M. S. Atkins and B. T. Mackiewich, “Fully
-automatic segmentation of the brain in
-mri,” Medical Imaging, IEEE Transactions
-on, vol. 17, no. 1, pp. 98–107, Feb. 1998.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=668699
-[AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and
-J. Malik, “From contours to regions: An
-empirical evaluation,” in Computer Vision and
-Pattern Recognition, 2009. CVPR 2009. IEEE
-Conference on. IEEE, Jun. 2009, pp. 2294–2301.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=5206707
-[AP11] G. Azzopardi and N. Petkov, “Detection of
-retinal vascular bifurcations by trainable v4-like
-filters,” in Computer Analysis of Images and
-Patterns. Springer, 2011, pp. 451–459. [Online].
-Available: http://www.cs.rug.nl/~imaging/databases/
-retina_database/retinalfeatures_database.html
-[BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik,
-“Object segmentation by alignment of poselet
-activations to image contours,” in Computer Vision
-and Pattern Recognition (CVPR), 2011 IEEE
-Conference on. IEEE, Jun. 2011, pp. 2225–2232.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=5995659
-[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ
-segmentation using graph cuts,” in Medical Image
-Computing and Computer-Assisted Intervention–
-MICCAI 2000. Springer, 2000, pp. 276–
-286. [Online]. Available: http://link.springer.com/
-chapter/10.1007/978-3-540-40899-4_28
-[BKR11] A. Blake, P. Kohli, and C. Rother, Markov random
-fields for vision and image processing. Mit Press,
-2011.
-[BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma,
-“Pixel-wise segmentation of street with neural
-networks,” arXiv preprint arXiv:1511.00513, 2015.
-[Online]. Available: http://arxiv.org/abs/1511.00513
-[BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik,
-“Detecting people using mutually consistent
-poselet activations,” in Computer Vision–ECCV
-2010. Springer, 2010, pp. 168–181. [Online].
-Available: http://link.springer.com/chapter/10.1007/
-978-3-642-15567-3_13#page-1
-[Bur98] C. J. Burges, “A tutorial on support vector machines
-for pattern recognition,” Data mining and knowledge
-discovery, vol. 2, no. 2, pp. 121–167, 1998.
-[BVZ01] Y. Boykov, O. Veksler, and R. Zabih, “Fast
-approximate energy minimization via graph cuts,”
-Pattern Analysis and Machine Intelligence, IEEE
-Transactions on, vol. 23, no. 11, pp. 1222–1239,
-2001. [Online]. Available: http://ieeexplore.ieee.org/
-xpls/abs_all.jsp?arnumber=969114
-[CDF+04] G. Csurka, C. Dance, L. Fan, J. Willamowski,
-and C. Bray, “Visual categorization with bags of
-keypoints,” in Workshop on statistical learning in
-computer vision, ECCV, vol. 1, no. 1-22. Prague,
-2004, pp. 1–2.
-[CJSW01] H.-D. Cheng, X. Jiang, Y. Sun, and J. Wang,
-“Color image segmentation: advances and prospects,”
-Pattern recognition, vol. 34, no. 12, pp. 2259–2281,
-2001.
-[CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image
-segmentation via adaptive k-mean clustering and
-knowledge-based morphological operations with
-biomedical applications,” Image Processing, IEEE
+
+VIII. DISCUSSION
+Ohta et al. wrote [OKS78] 38 years ago. It is one
+of the first papers mentioning semantic segmentation.
+In this time, a lot of work was done and many
+different directions have been explored. Different kinds
+of semantic segmentation have emerged.
+This paper presents a taxonomy of those kinds
+of semantic segmentation and a brief overview of
+completely automatic, passive, semantic segmentation
+algorithms.
+Future work includes a comparative study of
+those algorithms on publicly available dataset such
+as the ones presented in Table I. Another open
+question is the influence of the problems described
+in Section VII. This could be done using a subset of the
+thousands of images of Wikipedia Commons, such as
+https://commons.wikimedia.org/wiki/Category:Blurring
+for blurred images.
+A combination of different classifiers in an ensemble
+would be an interesting option to explore in order to
+improve accuracy. Another direction which is currently
+studied is combining classifiers such as neural networks
+with CRFs [ZJRP+15].
+REFERENCES
+[AM98] M. S. Atkins and B. T. Mackiewich, “Fully
+automatic segmentation of the brain in
+mri,” Medical Imaging, IEEE Transactions
+on, vol. 17, no. 1, pp. 98–107, Feb. 1998.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=668699
+[AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and
+J. Malik, “From contours to regions: An
+empirical evaluation,” in Computer Vision and
+Pattern Recognition, 2009. CVPR 2009. IEEE
+Conference on. IEEE, Jun. 2009, pp. 2294–2301.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5206707
+[AP11] G. Azzopardi and N. Petkov, “Detection of
+retinal vascular bifurcations by trainable v4-like
+filters,” in Computer Analysis of Images and
+Patterns. Springer, 2011, pp. 451–459. [Online].
+Available: http://www.cs.rug.nl/~imaging/databases/
+retina_database/retinalfeatures_database.html
+[BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik,
+“Object segmentation by alignment of poselet
+activations to image contours,” in Computer Vision
+and Pattern Recognition (CVPR), 2011 IEEE
+Conference on. IEEE, Jun. 2011, pp. 2225–2232.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5995659
+[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ
+segmentation using graph cuts,” in Medical Image
+Computing and Computer-Assisted Intervention–
+MICCAI 2000. Springer, 2000, pp. 276–
+286. [Online]. Available: http://link.springer.com/
+chapter/10.1007/978-3-540-40899-4_28
+[BKR11] A. Blake, P. Kohli, and C. Rother, Markov random
+fields for vision and image processing. Mit Press,
+2011.
+[BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma,
+“Pixel-wise segmentation of street with neural
+networks,” arXiv preprint arXiv:1511.00513, 2015.
+[Online]. Available: http://arxiv.org/abs/1511.00513
+[BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik,
+“Detecting people using mutually consistent
+poselet activations,” in Computer Vision–ECCV
+2010. Springer, 2010, pp. 168–181. [Online].
+Available: http://link.springer.com/chapter/10.1007/
+978-3-642-15567-3_13#page-1
+[Bur98] C. J. Burges, “A tutorial on support vector machines
+for pattern recognition,” Data mining and knowledge
+discovery, vol. 2, no. 2, pp. 121–167, 1998.
+[BVZ01] Y. Boykov, O. Veksler, and R. Zabih, “Fast
+approximate energy minimization via graph cuts,”
+Pattern Analysis and Machine Intelligence, IEEE
+Transactions on, vol. 23, no. 11, pp. 1222–1239,
+2001. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=969114
+[CDF+04] G. Csurka, C. Dance, L. Fan, J. Willamowski,
+and C. Bray, “Visual categorization with bags of
+keypoints,” in Workshop on statistical learning in
+computer vision, ECCV, vol. 1, no. 1-22. Prague,
+2004, pp. 1–2.
+[CJSW01] H.-D. Cheng, X. Jiang, Y. Sun, and J. Wang,
+“Color image segmentation: advances and prospects,”
+Pattern recognition, vol. 34, no. 12, pp. 2259–2281,
+2001.
+[CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image
+segmentation via adaptive k-mean clustering and
+knowledge-based morphological operations with
+biomedical applications,” Image Processing, IEEE
 Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec.
-12
-1998. [Online]. Available: http://ieeexplore.ieee.org/
-xpls/abs_all.jsp?arnumber=730379
-[CM02] D. Comaniciu and P. Meer, “Mean shift: A
-robust approach toward feature space analysis,”
-Pattern Analysis and Machine Intelligence, IEEE
-Transactions on, vol. 24, no. 5, pp. 603–619, 2002.
-[Online]. Available: http://ieeexplore.ieee.org/xpl/
-login.jsp?tp=&arnumber=1000236
-[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde,
-“A pixel classification system for segmenting
-biomedical images using intensity neighborhoods
-and dimension reduction,” in Biomedical Imaging:
-From Nano to Macro, 2011 IEEE International
-Symposium on. IEEE, 2011, pp. 1649–1652.
-[Online]. Available: https://www.andrew.cmu.edu/
-user/gustavor/chen_isbi_11.pdf
-[CP08] G. Csurka and F. Perronnin, “A simple high
-performance approach to semantic segmentation.”
-in BMVC, 2008, pp. 1–10. [Online]. Avail￾able: http://www.xrce.xerox.com/layout/set/print/
-content/download/16654/118653/file/2008-023.pdf
-[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and
-E. Sabo, “Colon crypt segmentation website.” [On￾line]. Available: http://mis.haifa.ac.il/~ishimshoni/
-SegmentCrypt/Download.htm
-[CRSS14] ——, “Memory based active contour algorithm
-using pixel-level classified images for colon crypt
-segmentation,” Computerized Medical Imaging
-and Graphics, Nov. 2014. [Online]. Available:
-http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/
-Active%20contour%20based%20on%20pixel￾level%20classified%20image%20for%20colon%
-20crypts%20segmentation.pdf
-[CS10] J. Carreira and C. Sminchisescu, “Constrained
-parametric min-cuts for automatic object segmenta￾tion,” in Computer Vision and Pattern Recognition
-(CVPR), 2010 IEEE Conference on. IEEE, 2010,
-pp. 3241–3248.
-[CS11] ——, “Cpmc: Constrained parametric min-cuts for
-automatic object segmentation,” Feb. 2011. [Online].
-Available: http://www.maths.lth.se/matematiklth/
-personal/sminchis/code/cpmc/
-[CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V.
-Stoecker, J. M. Malters, and J. M. Grichnik, “An
-improved objective evaluation measure for border
-detection in dermoscopy images,” Skin Research
-and Technology, vol. 15, no. 4, pp. 444–450, 2009.
-[Online]. Available: http://arxiv.org/abs/1009.1020
-[CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear
-segmentation in microscope cell images: a hand￾segmented dataset and comparison of algorithms,”
-in Biomedical Imaging: From Nano to Macro,
-2009. ISBI’09. IEEE International Symposium on.
-IEEE, 2009, pp. 518–521. [Online]. Available:
-http://murphylab.web.cmu.edu/data
-[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh,
-“Random walks based multi-image segmentation:
-Quasiconvexity results and gpu-based solutions,”
-in Computer Vision and Pattern Recognition
-(CVPR), 2012 IEEE Conference on. IEEE,
-2012, pp. 1656–1663. [Online]. Available: http:
-//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf
-[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware seman￾tic segmentation via multi-task network cascades,”
-arXiv preprint arXiv:1512.04412, 2015.
-[DT05] N. Dalal and B. Triggs, “Histograms of oriented
-gradients for human detection,” in Computer
-Vision and Pattern Recognition, 2005. CVPR
-2005. IEEE Computer Society Conference on,
-vol. 1, June 2005, pp. 886–893 vol. 1.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=1467360
-[EVGW+a] M. Everingham, L. Van Gool, C. K. I.
-Williams, J. Winn, and A. Zisserman, “The
-PASCAL Visual Object Classes Challenge
-2007 (VOC2007) Results,” http://www.pascal￾network.org/challenges/VOC/voc2007/workshop/index.html.
-[Online]. Available: http://host.robots.ox.ac.uk:
-8080/pascal/VOC/voc2007/index.html
-[EVGW+b] ——, “The PASCAL Visual Object Classes Chal￾lenge 2012 (VOC2012) Results,” http://www.pascal￾network.org/challenges/VOC/voc2012/workshop/index.html.
-[Online]. Available: http://host.robots.ox.ac.uk:
-8080/pascal/VOC/voc2012/index.html
-[EVGW+10] M. Everingham, L. Van Gool, C. K. Williams,
-J. Winn, and A. Zisserman, “The pascal visual object
-classes (voc) challenge,” International journal of
-computer vision, vol. 88, no. 2, pp. 303–338, 2010.
-[EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams,
-J. Winn, and A. Zisserman, “Visual object
-classes challenge 2012 (voc2012),” 2012. [Online].
-Available: http://host.robots.ox.ac.uk:8080/pascal/
-VOC/voc2012/index.html
-[Fel] P. F. Felzenszwalb, “Graph based im￾age segmentation.” [Online]. Available: http:
-//cs.brown.edu/~pff/segment/
-[FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester,
-and D. Ramanan, “Object detection with discrimina￾tively trained part-based models,” Pattern Analysis
-and Machine Intelligence, IEEE Transactions on,
-vol. 32, no. 9, pp. 1627–1645, 2010.
-[FH04] P. F. Felzenszwalb and D. P. Huttenlocher,
-“Efficient graph-based image segmentation,”
-International Journal of Computer Vision,
-vol. 59, no. 2, pp. 167–181, 2004. [Online].
-Available: http://link.springer.com/article/10.1023/
-B:VISI.0000022288.19776.77
-[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A
-new performance measure and evaluation
-benchmark for road detection algorithms,” in
-International Conference on Intelligent Transporta￾tion Systems (ITSC), 2013. [Online]. Available:
-http://www.cvlibs.net/datasets/kitti/eval_road.php
-[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D.
-Bagdanov, J. Serrat, and J. Gonzalez, “Harmony po￾tentials for joint classification and segmentation,” in
-Computer Vision and Pattern Recognition (CVPR),
-2010 IEEE Conference on. IEEE, 2010, pp. 3280–
-3287.
-[GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and
-D. Koller, “Multi-class segmentation with relative
-location prior,” International Journal of Computer
-Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008.
-[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.-
-Z. Yang, “Probabilistic tracking of affine-invariant
-anisotropic regions,” Pattern Analysis and Machine
-Intelligence, IEEE Transactions on, vol. 35, no. 1,
-pp. 130–143, 2013.
-[Har75] J. A. Hartigan, Clustering algorithms. John Wiley
-& Sons, Inc., 1975.
-[HDT02] C. Huang, L. Davis, and J. Townshend, “An
-assessment of support vector machines for land
-cover classification,” International Journal of remote
-sensing, vol. 23, no. 4, pp. 725–749, 2002.
-[HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic
-lung segmentation for accurate quantitation of
+
+1998. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=730379
+[CM02] D. Comaniciu and P. Meer, “Mean shift: A
+robust approach toward feature space analysis,”
+Pattern Analysis and Machine Intelligence, IEEE
+Transactions on, vol. 24, no. 5, pp. 603–619, 2002.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/
+login.jsp?tp=&arnumber=1000236
+[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde,
+“A pixel classification system for segmenting
+biomedical images using intensity neighborhoods
+and dimension reduction,” in Biomedical Imaging:
+From Nano to Macro, 2011 IEEE International
+Symposium on. IEEE, 2011, pp. 1649–1652.
+[Online]. Available: https://www.andrew.cmu.edu/
+user/gustavor/chen_isbi_11.pdf
+[CP08] G. Csurka and F. Perronnin, “A simple high
+performance approach to semantic segmentation.”
+in BMVC, 2008, pp. 1–10. [Online]. Available: http://www.xrce.xerox.com/layout/set/print/
+content/download/16654/118653/file/2008-023.pdf
+[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and
+E. Sabo, “Colon crypt segmentation website.” [Online]. Available: http://mis.haifa.ac.il/~ishimshoni/
+SegmentCrypt/Download.htm
+[CRSS14] ——, “Memory based active contour algorithm
+using pixel-level classified images for colon crypt
+segmentation,” Computerized Medical Imaging
+and Graphics, Nov. 2014. [Online]. Available:
+http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/
+Active%20contour%20based%20on%20pixellevel%20classified%20image%20for%20colon%
+20crypts%20segmentation.pdf
+[CS10] J. Carreira and C. Sminchisescu, “Constrained
+parametric min-cuts for automatic object segmentation,” in Computer Vision and Pattern Recognition
+(CVPR), 2010 IEEE Conference on. IEEE, 2010,
+pp. 3241–3248.
+[CS11] ——, “Cpmc: Constrained parametric min-cuts for
+automatic object segmentation,” Feb. 2011. [Online].
+Available: http://www.maths.lth.se/matematiklth/
+personal/sminchis/code/cpmc/
+[CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V.
+Stoecker, J. M. Malters, and J. M. Grichnik, “An
+improved objective evaluation measure for border
+detection in dermoscopy images,” Skin Research
+and Technology, vol. 15, no. 4, pp. 444–450, 2009.
+[Online]. Available: http://arxiv.org/abs/1009.1020
+[CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear
+segmentation in microscope cell images: a handsegmented dataset and comparison of algorithms,”
+in Biomedical Imaging: From Nano to Macro,
+2009. ISBI’09. IEEE International Symposium on.
+IEEE, 2009, pp. 518–521. [Online]. Available:
+http://murphylab.web.cmu.edu/data
+[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh,
+“Random walks based multi-image segmentation:
+Quasiconvexity results and gpu-based solutions,”
+in Computer Vision and Pattern Recognition
+(CVPR), 2012 IEEE Conference on. IEEE,
+2012, pp. 1656–1663. [Online]. Available: http:
+//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf
+[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via multi-task network cascades,”
+arXiv preprint arXiv:1512.04412, 2015.
+[DT05] N. Dalal and B. Triggs, “Histograms of oriented
+gradients for human detection,” in Computer
+Vision and Pattern Recognition, 2005. CVPR
+2005. IEEE Computer Society Conference on,
+vol. 1, June 2005, pp. 886–893 vol. 1.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=1467360
+[EVGW+a] M. Everingham, L. Van Gool, C. K. I.
+Williams, J. Winn, and A. Zisserman, “The
+PASCAL Visual Object Classes Challenge
+2007 (VOC2007) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2007/workshop/index.html.
+[Online]. Available: http://host.robots.ox.ac.uk:
+8080/pascal/VOC/voc2007/index.html
+[EVGW+b] ——, “The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2012/workshop/index.html.
+[Online]. Available: http://host.robots.ox.ac.uk:
+8080/pascal/VOC/voc2012/index.html
+[EVGW+10] M. Everingham, L. Van Gool, C. K. Williams,
+J. Winn, and A. Zisserman, “The pascal visual object
+classes (voc) challenge,” International journal of
+computer vision, vol. 88, no. 2, pp. 303–338, 2010.
+[EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams,
+J. Winn, and A. Zisserman, “Visual object
+classes challenge 2012 (voc2012),” 2012. [Online].
+Available: http://host.robots.ox.ac.uk:8080/pascal/
+VOC/voc2012/index.html
+[Fel] P. F. Felzenszwalb, “Graph based image segmentation.” [Online]. Available: http:
+//cs.brown.edu/~pff/segment/
+[FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester,
+and D. Ramanan, “Object detection with discriminatively trained part-based models,” Pattern Analysis
+and Machine Intelligence, IEEE Transactions on,
+vol. 32, no. 9, pp. 1627–1645, 2010.
+[FH04] P. F. Felzenszwalb and D. P. Huttenlocher,
+“Efficient graph-based image segmentation,”
+International Journal of Computer Vision,
+vol. 59, no. 2, pp. 167–181, 2004. [Online].
+Available: http://link.springer.com/article/10.1023/
+B:VISI.0000022288.19776.77
+[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A
+new performance measure and evaluation
+benchmark for road detection algorithms,” in
+International Conference on Intelligent Transportation Systems (ITSC), 2013. [Online]. Available:
+http://www.cvlibs.net/datasets/kitti/eval_road.php
+[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D.
+Bagdanov, J. Serrat, and J. Gonzalez, “Harmony potentials for joint classification and segmentation,” in
+Computer Vision and Pattern Recognition (CVPR),
+2010 IEEE Conference on. IEEE, 2010, pp. 3280–
+3287.
+[GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and
+D. Koller, “Multi-class segmentation with relative
+location prior,” International Journal of Computer
+Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008.
+[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.Z.
+ Yang, “Probabilistic tracking of affine-invariant
+anisotropic regions,” Pattern Analysis and Machine
+Intelligence, IEEE Transactions on, vol. 35, no. 1,
+pp. 130–143, 2013.
+[Har75] J. A. Hartigan, Clustering algorithms. John Wiley
+& Sons, Inc., 1975.
+[HDT02] C. Huang, L. Davis, and J. Townshend, “An
+assessment of support vector machines for land
+cover classification,” International Journal of remote
+sensing, vol. 23, no. 4, pp. 725–749, 2002.
+[HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic
+lung segmentation for accurate quantitation of
 volumetric x-ray ct images,” Medical Imaging, IEEE
-13
-Transactions on, vol. 20, no. 6, pp. 490–498, Jun.
-2001.
-[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J.
-Flynn, H. Bunke, D. B. Goldgof, K. Bowyer,
-D. W. Eggert, A. Fitzgibbon, and R. B.
-Fisher, “An experimental comparison of range
-image segmentation algorithms,” Pattern Analysis
-and Machine Intelligence, IEEE Transactions
-on, vol. 18, no. 7, pp. 673–689, Jul. 1996.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=506791
-[Ho95] T. K. Ho, “Random decision forests,” in
-Document Analysis and Recognition, 1995.,
-Proceedings of the Third International Conference
-on, vol. 1. IEEE, 1995, pp. 278–282.
-[Online]. Available: http://ect.bell-labs.com/who/
-tkh/publications/papers/odt.pdf
-[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia
-Commons, Nov. 2007. [Online]. Avail￾able: https://commons.wikimedia.org/wiki/File:
-CCTV_Lens_flare.jpg
-[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn,
-“Multiscale conditional random fields for image
-labeling,” in Computer Vision and Pattern
-Recognition, 2004. CVPR 2004. Proceedings
-of the 2004 IEEE Computer Society Conference
-on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2.
-[Online]. Available: http://ieeexplore.ieee.org/xpl/
-login.jsp?tp=&arnumber=1315232
-[JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white
-blood cell segmentation scheme using scale-space
-filtering and watershed clustering,” in Machine
-Learning and Cybernetics, 2003 International
-Conference on, vol. 5, Nov 2003, pp. 2820–2825
-Vol.5. [Online]. Available: http://ieeexplore.ieee.org/
-xpl/login.jsp?tp=&arnumber=1260033
-[Kaf07] L. Kaffer, “File:great male leopard in south afrika￾jd.jpg,” Wikipedia Commons, Jul. 2007. [Online].
-Available: https://commons.wikimedia.org/wiki/File:
-Great_male_Leopard_in_South_Afrika-JD.JPG
-[KKV+14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen,
-J. Pietilä, H. Kälviäinen, and H. Uusitalo,
-“Diaretdb1 diabetic retinopathy database and
-evaluation protocol,” 2014. [Online]. Available:
-http://www2.it.lut.fi/project/imageret/diaretdb1/
-[KP92] J. M. Kasson and W. Plouffe, “An analysis of
-selected computer interchange color spaces,” ACM
-Transactions on Graphics (TOG), vol. 11, no. 4, pp.
-373–405, 1992.
-[KP06] Z. Kato and T.-C. Pong, “A markov random
-field image segmentation model for color
-textured images,” Image and Vision Computing,
-vol. 24, no. 10, pp. 1103–1114, 2006. [Online].
-Available: http://www.sciencedirect.com/science/
-article/pii/S0262885606001223
-[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton,
-“Imagenet classification with deep convolutional
-neural networks,” in Advances in neural information
-processing systems, 2012, pp. 1097–1105.
-[KWT88] M. Kass, A. Witkin, and D. Terzopoulos,
-“Snakes: Active contour models,” International
-journal of computer vision, vol. 1, no. 4, pp.
-321–331, Jan. 1988. [Online]. Available: http:
-//link.springer.com/article/10.1007/BF00133570
-[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson,
-“CS231n: Convolutional neural networks for
-visual recognition,” 2015. [Online]. Available:
-http://cs231n.stanford.edu/
-[Low04] D. Lowe, “Distinctive image features from scale￾invariant keypoints,” International Journal of
-Computer Vision, vol. 60, no. 2, pp. 91–110, 2004.
-[Online]. Available: http://dx.doi.org/10.1023/B%
-3AVISI.0000029664.99615.94
-[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski,
-“Spectral matting,” Pattern Analysis and
-Machine Intelligence, IEEE Transactions on,
-vol. 30, no. 10, pp. 1699–1712, 2008.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=4547428
-[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr,
-“Associative hierarchical crfs for object class image
-segmentation,” in Computer Vision, 2009 IEEE 12th
-International Conference on, 2009, pp. 739–746.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=5459248
-[LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully
-convolutional networks for semantic segmentation,”
-arXiv preprint arXiv:1411.4038, 2014. [Online].
-Available: http://arxiv.org/abs/1411.4038
-[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and
-J. Malik, “Using contours to detect and localize
-junctions in natural images,” in Computer Vision
-and Pattern Recognition, 2008. CVPR 2008.
-IEEE Conference on, June 2008, pp. 1–8.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=4587420
-[Man12] M. Manske, “File:randabschattung mikroskop
-kamera 6.jpg,” Wikipedia Com￾mons, Dec. 2012. [Online]. Avail￾able: https://commons.wikimedia.org/wiki/File:
-Randabschattung_Mikroskop_Kamera_6.JPG
-[MBLAGJ+07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. Gil￾Jimenez, H. Gomez-Moreno, and F. Lopez￾Ferreras, “Road-sign detection and recognition
-based on support vector machines,” Intelligent
-Transportation Systems, IEEE Transactions on,
-vol. 8, no. 2, pp. 264–278, Jun. 2007.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=4220659
-[MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig,
-“Automatic brain and tumor segmentation,” in Med￾ical Image Computing and Computer-Assisted In￾tervention—MICCAI 2002. Springer, 2002, pp.
-372–379.
-[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik,
-“A database of human segmented natural
-images and its application to evaluating
-segmentation algorithms and measuring ecological
-statistics,” in Computer Vision, 2001. ICCV
-2001. Proceedings. Eighth IEEE International
-Conference on, vol. 2. IEEE, 2001, pp. 416–423.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=937655
-[MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann,
-S. Bodenstedt, A. Sanchez, C. Stock, H. G.
-Kenngott, M. Eisenmann, and S. Speidel, “Can
-masses of non-experts train highly accurate
-image classifiers?” in Medical Image Computing
-and Computer-Assisted Intervention–MICCAI 2014.
-Springer, 2014, pp. 438–445. [Online]. Available:
-http://opencas.webarchiv.kit.edu/?q=node/26
-[Min89] J. Mingers, “An empirical comparison of selection
-measures for decision-tree induction,” Machine
-Learning, vol. 3, no. 4, pp. 319–342, 1989.
-[Online]. Available: http://dx.doi.org/10.1023/A%
-3A1022645801436
-[MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson,
+
+Transactions on, vol. 20, no. 6, pp. 490–498, Jun.
+2001.
+[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J.
+Flynn, H. Bunke, D. B. Goldgof, K. Bowyer,
+D. W. Eggert, A. Fitzgibbon, and R. B.
+Fisher, “An experimental comparison of range
+image segmentation algorithms,” Pattern Analysis
+and Machine Intelligence, IEEE Transactions
+on, vol. 18, no. 7, pp. 673–689, Jul. 1996.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=506791
+[Ho95] T. K. Ho, “Random decision forests,” in
+Document Analysis and Recognition, 1995.,
+Proceedings of the Third International Conference
+on, vol. 1. IEEE, 1995, pp. 278–282.
+[Online]. Available: http://ect.bell-labs.com/who/
+tkh/publications/papers/odt.pdf
+[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia
+Commons, Nov. 2007. [Online]. Available: https://commons.wikimedia.org/wiki/File:
+CCTV_Lens_flare.jpg
+[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn,
+“Multiscale conditional random fields for image
+labeling,” in Computer Vision and Pattern
+Recognition, 2004. CVPR 2004. Proceedings
+of the 2004 IEEE Computer Society Conference
+on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/
+login.jsp?tp=&arnumber=1315232
+[JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white
+blood cell segmentation scheme using scale-space
+filtering and watershed clustering,” in Machine
+Learning and Cybernetics, 2003 International
+Conference on, vol. 5, Nov 2003, pp. 2820–2825
+Vol.5. [Online]. Available: http://ieeexplore.ieee.org/
+xpl/login.jsp?tp=&arnumber=1260033
+[Kaf07] L. Kaffer, “File:great male leopard in south afrikajd.jpg,” Wikipedia Commons, Jul. 2007. [Online].
+Available: https://commons.wikimedia.org/wiki/File:
+Great_male_Leopard_in_South_Afrika-JD.JPG
+[KKV+14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen,
+J. Pietilä, H. Kälviäinen, and H. Uusitalo,
+“Diaretdb1 diabetic retinopathy database and
+evaluation protocol,” 2014. [Online]. Available:
+http://www2.it.lut.fi/project/imageret/diaretdb1/
+[KP92] J. M. Kasson and W. Plouffe, “An analysis of
+selected computer interchange color spaces,” ACM
+Transactions on Graphics (TOG), vol. 11, no. 4, pp.
+373–405, 1992.
+[KP06] Z. Kato and T.-C. Pong, “A markov random
+field image segmentation model for color
+textured images,” Image and Vision Computing,
+vol. 24, no. 10, pp. 1103–1114, 2006. [Online].
+Available: http://www.sciencedirect.com/science/
+article/pii/S0262885606001223
+[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton,
+“Imagenet classification with deep convolutional
+neural networks,” in Advances in neural information
+processing systems, 2012, pp. 1097–1105.
+[KWT88] M. Kass, A. Witkin, and D. Terzopoulos,
+“Snakes: Active contour models,” International
+journal of computer vision, vol. 1, no. 4, pp.
+321–331, Jan. 1988. [Online]. Available: http:
+//link.springer.com/article/10.1007/BF00133570
+[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson,
+“CS231n: Convolutional neural networks for
+visual recognition,” 2015. [Online]. Available:
+http://cs231n.stanford.edu/
+[Low04] D. Lowe, “Distinctive image features from scaleinvariant keypoints,” International Journal of
+Computer Vision, vol. 60, no. 2, pp. 91–110, 2004.
+[Online]. Available: http://dx.doi.org/10.1023/B%
+3AVISI.0000029664.99615.94
+[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski,
+“Spectral matting,” Pattern Analysis and
+Machine Intelligence, IEEE Transactions on,
+vol. 30, no. 10, pp. 1699–1712, 2008.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4547428
+[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr,
+“Associative hierarchical crfs for object class image
+segmentation,” in Computer Vision, 2009 IEEE 12th
+International Conference on, 2009, pp. 739–746.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5459248
+[LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully
+convolutional networks for semantic segmentation,”
+arXiv preprint arXiv:1411.4038, 2014. [Online].
+Available: http://arxiv.org/abs/1411.4038
+[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and
+J. Malik, “Using contours to detect and localize
+junctions in natural images,” in Computer Vision
+and Pattern Recognition, 2008. CVPR 2008.
+IEEE Conference on, June 2008, pp. 1–8.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587420
+[Man12] M. Manske, “File:randabschattung mikroskop
+kamera 6.jpg,” Wikipedia Commons, Dec. 2012. [Online]. Available: https://commons.wikimedia.org/wiki/File:
+Randabschattung_Mikroskop_Kamera_6.JPG
+[MBLAGJ+07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez, H. Gomez-Moreno, and F. LopezFerreras, “Road-sign detection and recognition
+based on support vector machines,” Intelligent
+Transportation Systems, IEEE Transactions on,
+vol. 8, no. 2, pp. 264–278, Jun. 2007.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4220659
+[MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig,
+“Automatic brain and tumor segmentation,” in Medical Image Computing and Computer-Assisted Intervention—MICCAI 2002. Springer, 2002, pp.
+372–379.
+[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik,
+“A database of human segmented natural
+images and its application to evaluating
+segmentation algorithms and measuring ecological
+statistics,” in Computer Vision, 2001. ICCV
+2001. Proceedings. Eighth IEEE International
+Conference on, vol. 2. IEEE, 2001, pp. 416–423.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=937655
+[MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann,
+S. Bodenstedt, A. Sanchez, C. Stock, H. G.
+Kenngott, M. Eisenmann, and S. Speidel, “Can
+masses of non-experts train highly accurate
+image classifiers?” in Medical Image Computing
+and Computer-Assisted Intervention–MICCAI 2014.
+Springer, 2014, pp. 438–445. [Online]. Available:
+http://opencas.webarchiv.kit.edu/?q=node/26
+[Min89] J. Mingers, “An empirical comparison of selection
+measures for decision-tree induction,” Machine
+Learning, vol. 3, no. 4, pp. 319–342, 1989.
+[Online]. Available: http://dx.doi.org/10.1023/A%
+3A1022645801436
+[MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson,
 “Markov random field models for supervised land
-14
-cover classification from very high resolution
-multispectral remote sensing images,” in Advances
-in Radar and Remote Sensing (TyWRRS), 2012
-Tyrrhenian Workshop on. IEEE, 2012, pp. 235–
-242. [Online]. Available: http://ieeexplore.ieee.org/
-xpl/login.jsp?tp=&arnumber=6381135
-[MSC] “Object class recognition image database.”
-[Online]. Available: http://research.microsoft.com/
-vision/cambridge/recognition/
-[MSR] “Image understanding - research data,”
-Microsoft Research. [Online]. Avail￾able: http://research.microsoft.com/en-us/projects/
-objectclassrecognition/
-[Mur12] K. P. Murphy, Machine learning: a probabilistic
-perspective. MIT press, 2012.
-[OKS78] Y.-i. Ohta, T. Kanade, and T. Sakai, “An analysis
-system for scenes containing objects with substruc￾tures,” in Proceedings of the Fourth International
-Joint Conference on Pattern Recognitions, 1978, pp.
-752–754.
-[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin,
-R. Cromartie, A. Geselowitz, T. Greer, B. ter
-Haar Romeny, J. B. Zimmerman, and K. Zuiderveld,
-“Adaptive histogram equalization and its variations,”
-Computer vision, graphics, and image processing,
-vol. 39, no. 3, pp. 355–368, 1987. [Online].
-Available: http://www.sciencedirect.com/science/
-article/pii/S0734189X8780186X
-[PC13] P. H. Pinheiro and R. Collobert, “Recurrent
-convolutional neural networks for scene parsing,”
-arXiv preprint arXiv:1306.2795, 2013. [Online].
-Available: http://arxiv.org/abs/1306.2795v1
-[PH05] C. Pantofaru and M. Hebert, “A
-comparison of image segmentation algorithms,”
-Robotics Institute, p. 336, 2005. [Online].
-Available: http://riweb-backend.ri.cmu.edu/
-pub_files/pub4/pantofaru_caroline_2005_1/
-pantofaru_caroline_2005_1.pdf
-[PS07] A. Protiere and G. Sapiro, “Interactive
-image segmentation via adaptive weighted
-distances,” Image Processing, IEEE Transactions
-on, vol. 16, no. 4, pp. 1046–1057, 2007.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=4130436
-[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multi￾class image segmentation using conditional random
-fields and global classification,” in Proceedings
-of the 26th Annual International Conference on
-Machine Learning. ACM, 2009, pp. 817–824.
-[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A
-survey of current methods in medical image
-segmentation,” Annual Review of Biomedical
-Engineering, vol. 2, no. 1, pp. 315–337, 2000,
-pMID: 11701515. [Online]. Available: http://
-dx.doi.org/10.1146/annurev.bioeng.2.1.315
-[Qui86] J. R. Quinlan, “Induction of decision trees,”
-Machine learning, vol. 1, no. 1, pp. 81–106,
-Aug. 1986. [Online]. Available: http://dx.doi.org/
-10.1023/A%3A1022643204877
-[Qui93] ——, C4.5: Programs for Machine Learning, P. Lan￾gley, Ed. Morgan Kaufmann Publishers, Inc., 1993.
-[RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut:
-Interactive foreground extraction using iterated
-graph cuts,” ACM Transactions on Graphics
-(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online].
-Available: http://delivery.acm.org/10.1145/1020000/
-1015720/p309-rother.pdf
-[RM00] J. B. Roerdink and A. Meijster, “The watershed
-transform: Definitions, algorithms and paralleliza￾tion strategies,” Fundam. Inform., vol. 41, no. 1-2,
-pp. 187–228, 2000.
-[RM07] J. Reynolds and K. Murphy, “Figure-ground
-segmentation using a hierarchical conditional
-random field,” in Computer and Robot
-Vision, 2007. CRV ’07. Fourth Canadian
-Conference on, May 2007, pp. 175–182.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=4228537
-[RMBK06] C. Rother, T. Minka, A. Blake, and V. Kolmogorov,
-“Cosegmentation of image pairs by histogram
-matching - incorporating a global constraint
-into mrfs,” in Computer Vision and Pattern
-Recognition, 2006 IEEE Computer Society
-Conference on, vol. 1, June 2006, pp. 993–
-1000. [Online]. Available: http://ieeexplore.ieee.org/
-xpls/abs_all.jsp?arnumber=1640859
-[SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer,
-M. Viergever, B. Van Ginneken et al., “Ridge-based
-vessel segmentation in color images of the retina,”
-Medical Imaging, IEEE Transactions on, vol. 23,
-no. 4, pp. 501–509, 2004. [Online]. Available:
-http://www.isi.uu.nl/Research/Databases/DRIVE/
-[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman,
-“Object class segmentation using random
-forests.” in BMVC, 2008, pp. 1–10. [On￾line]. Available: http://research.microsoft.com/pubs/
-72423/Criminisi_bmvc2008.pdf
-[SJC08] J. Shotton, M. Johnson, and R. Cipolla,
-“Semantic texton forests for image categorization
-and segmentation,” in Computer vision and
-pattern recognition, 2008. CVPR 2008. IEEE
-Conference on. IEEE, Jun. 2008, pp. 1–8.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=4587503
-[SM11] C. Sutton and A. McCallum, “An introduction
-to conditional random fields,” Machine Learning,
-vol. 4, no. 4, pp. 267–373, 2011. [Online].
-Available: http://homepages.inf.ed.ac.uk/csutton/
-publications/crftutv2.pdf
-[Smi02] L. I. Smith, “A tutorial on principal components
-analysis,” Cornell University, USA, vol. 51, p. 52,
-2002.
-[Smi04] B. T. Smith, “Lagrange multipliers tutorial in the
-context of support vector machines,” Memorial Uni￾versity of Newfoundland St. John’s, Newfoundland,
-Canada, Jun. 2004.
-[SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery,
-segmentation and reactive grasping of unknown
-objects.” in Humanoids, 2012, pp. 71–77. [On￾line]. Available: http://h2t.anthropomatik.kit.edu/
-pdf/Schiebener2012.pdf
-[SUM+11] D. Schiebener, A. Ude, J. Morimotot,
-T. Asfour, and R. Dillmann, “Segmentation
-and learning of unknown objects through physical
-interaction,” in Humanoid Robots (Humanoids),
-2011 11th IEEE-RAS International Conference
-on. IEEE, 2011, pp. 500–506. [Online].
-Available: http://ieeexplore.ieee.org/ielx5/6086637/
-6100798/06100843.pdf
-[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi,
-“Textonboost: Joint appearance, shape and context
-modeling for multi-class object recognition and
-segmentation,” in Computer Vision–ECCV 2006.
-Springer, 2006, pp. 1–15. [Online]. Available: http:
-//link.springer.com/chapter/10.1007/11744023_1
-[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik,
-“Scene parsing with object instances and
+
+cover classification from very high resolution
+multispectral remote sensing images,” in Advances
+in Radar and Remote Sensing (TyWRRS), 2012
+Tyrrhenian Workshop on. IEEE, 2012, pp. 235–
+242. [Online]. Available: http://ieeexplore.ieee.org/
+xpl/login.jsp?tp=&arnumber=6381135
+[MSC] “Object class recognition image database.”
+[Online]. Available: http://research.microsoft.com/
+vision/cambridge/recognition/
+[MSR] “Image understanding - research data,”
+Microsoft Research. [Online]. Available: http://research.microsoft.com/en-us/projects/
+objectclassrecognition/
+[Mur12] K. P. Murphy, Machine learning: a probabilistic
+perspective. MIT press, 2012.
+[OKS78] Y.-i. Ohta, T. Kanade, and T. Sakai, “An analysis
+system for scenes containing objects with substructures,” in Proceedings of the Fourth International
+Joint Conference on Pattern Recognitions, 1978, pp.
+752–754.
+[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin,
+R. Cromartie, A. Geselowitz, T. Greer, B. ter
+Haar Romeny, J. B. Zimmerman, and K. Zuiderveld,
+“Adaptive histogram equalization and its variations,”
+Computer vision, graphics, and image processing,
+vol. 39, no. 3, pp. 355–368, 1987. [Online].
+Available: http://www.sciencedirect.com/science/
+article/pii/S0734189X8780186X
+[PC13] P. H. Pinheiro and R. Collobert, “Recurrent
+convolutional neural networks for scene parsing,”
+arXiv preprint arXiv:1306.2795, 2013. [Online].
+Available: http://arxiv.org/abs/1306.2795v1
+[PH05] C. Pantofaru and M. Hebert, “A
+comparison of image segmentation algorithms,”
+Robotics Institute, p. 336, 2005. [Online].
+Available: http://riweb-backend.ri.cmu.edu/
+pub_files/pub4/pantofaru_caroline_2005_1/
+pantofaru_caroline_2005_1.pdf
+[PS07] A. Protiere and G. Sapiro, “Interactive
+image segmentation via adaptive weighted
+distances,” Image Processing, IEEE Transactions
+on, vol. 16, no. 4, pp. 1046–1057, 2007.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4130436
+[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multiclass image segmentation using conditional random
+fields and global classification,” in Proceedings
+of the 26th Annual International Conference on
+Machine Learning. ACM, 2009, pp. 817–824.
+[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A
+survey of current methods in medical image
+segmentation,” Annual Review of Biomedical
+Engineering, vol. 2, no. 1, pp. 315–337, 2000,
+pMID: 11701515. [Online]. Available: http://
+dx.doi.org/10.1146/annurev.bioeng.2.1.315
+[Qui86] J. R. Quinlan, “Induction of decision trees,”
+Machine learning, vol. 1, no. 1, pp. 81–106,
+Aug. 1986. [Online]. Available: http://dx.doi.org/
+10.1023/A%3A1022643204877
+[Qui93] ——, C4.5: Programs for Machine Learning, P. Langley, Ed. Morgan Kaufmann Publishers, Inc., 1993.
+[RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut:
+Interactive foreground extraction using iterated
+graph cuts,” ACM Transactions on Graphics
+(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online].
+Available: http://delivery.acm.org/10.1145/1020000/
+1015720/p309-rother.pdf
+[RM00] J. B. Roerdink and A. Meijster, “The watershed
+transform: Definitions, algorithms and parallelization strategies,” Fundam. Inform., vol. 41, no. 1-2,
+pp. 187–228, 2000.
+[RM07] J. Reynolds and K. Murphy, “Figure-ground
+segmentation using a hierarchical conditional
+random field,” in Computer and Robot
+Vision, 2007. CRV ’07. Fourth Canadian
+Conference on, May 2007, pp. 175–182.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4228537
+[RMBK06] C. Rother, T. Minka, A. Blake, and V. Kolmogorov,
+“Cosegmentation of image pairs by histogram
+matching - incorporating a global constraint
+into mrfs,” in Computer Vision and Pattern
+Recognition, 2006 IEEE Computer Society
+Conference on, vol. 1, June 2006, pp. 993–
+1000. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=1640859
+[SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer,
+M. Viergever, B. Van Ginneken et al., “Ridge-based
+vessel segmentation in color images of the retina,”
+Medical Imaging, IEEE Transactions on, vol. 23,
+no. 4, pp. 501–509, 2004. [Online]. Available:
+http://www.isi.uu.nl/Research/Databases/DRIVE/
+[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman,
+“Object class segmentation using random
+forests.” in BMVC, 2008, pp. 1–10. [Online]. Available: http://research.microsoft.com/pubs/
+72423/Criminisi_bmvc2008.pdf
+[SJC08] J. Shotton, M. Johnson, and R. Cipolla,
+“Semantic texton forests for image categorization
+and segmentation,” in Computer vision and
+pattern recognition, 2008. CVPR 2008. IEEE
+Conference on. IEEE, Jun. 2008, pp. 1–8.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587503
+[SM11] C. Sutton and A. McCallum, “An introduction
+to conditional random fields,” Machine Learning,
+vol. 4, no. 4, pp. 267–373, 2011. [Online].
+Available: http://homepages.inf.ed.ac.uk/csutton/
+publications/crftutv2.pdf
+[Smi02] L. I. Smith, “A tutorial on principal components
+analysis,” Cornell University, USA, vol. 51, p. 52,
+2002.
+[Smi04] B. T. Smith, “Lagrange multipliers tutorial in the
+context of support vector machines,” Memorial University of Newfoundland St. John’s, Newfoundland,
+Canada, Jun. 2004.
+[SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery,
+segmentation and reactive grasping of unknown
+objects.” in Humanoids, 2012, pp. 71–77. [Online]. Available: http://h2t.anthropomatik.kit.edu/
+pdf/Schiebener2012.pdf
+[SUM+11] D. Schiebener, A. Ude, J. Morimotot,
+T. Asfour, and R. Dillmann, “Segmentation
+and learning of unknown objects through physical
+interaction,” in Humanoid Robots (Humanoids),
+2011 11th IEEE-RAS International Conference
+on. IEEE, 2011, pp. 500–506. [Online].
+Available: http://ieeexplore.ieee.org/ielx5/6086637/
+6100798/06100843.pdf
+[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi,
+“Textonboost: Joint appearance, shape and context
+modeling for multi-class object recognition and
+segmentation,” in Computer Vision–ECCV 2006.
+Springer, 2006, pp. 1–15. [Online]. Available: http:
+//link.springer.com/chapter/10.1007/11744023_1
+[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik,
+“Scene parsing with object instances and
 occlusion ordering,” in Computer Vision and
-15
-Pattern Recognition (CVPR), 2014 IEEE
-Conference on. IEEE, 2014, pp. 3748–3755.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=6909874
-[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert,
-“A measure for objective evaluation of
-image segmentation algorithms,” in Computer
-Vision and Pattern Recognition-Workshops, 2005.
-CVPR Workshops. IEEE Computer Society
-Conference on. IEEE, 2005, pp. 34–34.
-[Online]. Available: http://repository.cmu.edu/cgi/
-viewcontent.cgi?article=1365&context=robotics
-[vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J.
-van den Herik, “Dimensionality reduction: A com￾parative review,” Journal of Machine Learning
-Research, vol. 10, no. 1-41, pp. 66–71, 2009.
-[VOC10] “Voc2010 preliminary results,” 2010. [Online].
-Available: http://host.robots.ox.ac.uk/pascal/VOC/
-voc2010/results/index.html
-[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic
-tracking of laparoscopic instruments by color
-coding,” in CVRMed-MRCAS’97, ser. Lecture
-Notes in Computer Science, J. Troccaz, E. Grimson,
-and R. Mösges, Eds. Springer Berlin Heidelberg,
-1997, vol. 1205, pp. 357–366. [Online]. Available:
-http://dx.doi.org/10.1007/BFb0029257
-[YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell
-segmentation in microscopy imagery using a
-bag of local bayesian classifiers,” in Biomedical
-Imaging: From Nano to Macro, 2010 IEEE
-International Symposium on, Apr. 2010, pp. 125–
-128. [Online]. Available: http://ieeexplore.ieee.org/
-xpls/abs_all.jsp?arnumber=5490399
-[YHRF12] Y. Yang, S. Hallman, D. Ramanan, and
-C. C. Fowlkes, “Layered object models for
-image segmentation,” Pattern Analysis and
-Machine Intelligence, IEEE Transactions on,
-vol. 34, no. 9, pp. 1731–1743, Sep. 2012.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=6042883
-[ZBS01] Y. Zhang, M. Brady, and S. Smith, “Segmentation
-of brain MR images through a hidden Markov
-random field model and the expectation￾maximization algorithm,” Medical Imaging, IEEE
-Transactions on, vol. 20, no. 1, pp. 45–57, 2001.
-[Online]. Available: http://ieeexplore.ieee.org/xpls/
-abs_all.jsp?arnumber=906424
-[ZGWX05] S.-C. Zhu, C.-E. Guo, Y. Wang, and Z. Xu, “What
-are textons?” International Journal of Computer
-Vision, vol. 62, no. 1-2, pp. 121–143, 2005.
-[Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,”
-MultiMedia, IEEE, vol. 19, no. 2, pp. 4–10, Feb.
-2012.
-[ZJRP+15] S. Zheng, S. Jayasumana, B. Romera-Paredes,
-V. Vineet, Z. Su, D. Du, C. Huang, and
-P. H. Torr, “Conditional random fields as
-recurrent neural networks,” in Proceedings
-of the IEEE International Conference on
-Computer Vision, 2015, pp. 1529–1537. [Online].
-Available: http://www.robots.ox.ac.uk/~szheng/
-papers/CRFasRNN.pdf
-GLOSSARY
-ACM active contour model. 6
-BOV bag-of-visual-words. 5
-CNN Convolution Neuronal Network. 5, 9
-CRF Conditional Random Field. 4, 8, 9, 11
-GPU graphics processing unit. 3
-HOG histogram of oriented gradients. 5, 6, 8
-ILSVRC ImageNet Large-Scale Visual Recognition
-Challenge. 9
-MAP Maximum A Posteriori. 8
-MR magnetic resonance. 2, 6
-MRF Markov Random Field. 4, 8
-PCA principal component analysis. 5
-RBF radial basis function. 8
-SIFT scale-invariant feature transform. 5
+
+Pattern Recognition (CVPR), 2014 IEEE
+Conference on. IEEE, 2014, pp. 3748–3755.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=6909874
+[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert,
+“A measure for objective evaluation of
+image segmentation algorithms,” in Computer
+Vision and Pattern Recognition-Workshops, 2005.
+CVPR Workshops. IEEE Computer Society
+Conference on. IEEE, 2005, pp. 34–34.
+[Online]. Available: http://repository.cmu.edu/cgi/
+viewcontent.cgi?article=1365&context=robotics
+[vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J.
+van den Herik, “Dimensionality reduction: A comparative review,” Journal of Machine Learning
+Research, vol. 10, no. 1-41, pp. 66–71, 2009.
+[VOC10] “Voc2010 preliminary results,” 2010. [Online].
+Available: http://host.robots.ox.ac.uk/pascal/VOC/
+voc2010/results/index.html
+[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic
+tracking of laparoscopic instruments by color
+coding,” in CVRMed-MRCAS’97, ser. Lecture
+Notes in Computer Science, J. Troccaz, E. Grimson,
+and R. Mösges, Eds. Springer Berlin Heidelberg,
+1997, vol. 1205, pp. 357–366. [Online]. Available:
+http://dx.doi.org/10.1007/BFb0029257
+[YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell
+segmentation in microscopy imagery using a
+bag of local bayesian classifiers,” in Biomedical
+Imaging: From Nano to Macro, 2010 IEEE
+International Symposium on, Apr. 2010, pp. 125–
+128. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=5490399
+[YHRF12] Y. Yang, S. Hallman, D. Ramanan, and
+C. C. Fowlkes, “Layered object models for
+image segmentation,” Pattern Analysis and
+Machine Intelligence, IEEE Transactions on,
+vol. 34, no. 9, pp. 1731–1743, Sep. 2012.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=6042883
+[ZBS01] Y. Zhang, M. Brady, and S. Smith, “Segmentation
+of brain MR images through a hidden Markov
+random field model and the expectationmaximization algorithm,” Medical Imaging, IEEE
+Transactions on, vol. 20, no. 1, pp. 45–57, 2001.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=906424
+[ZGWX05] S.-C. Zhu, C.-E. Guo, Y. Wang, and Z. Xu, “What
+are textons?” International Journal of Computer
+Vision, vol. 62, no. 1-2, pp. 121–143, 2005.
+[Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,”
+MultiMedia, IEEE, vol. 19, no. 2, pp. 4–10, Feb.
+2012.
+[ZJRP+15] S. Zheng, S. Jayasumana, B. Romera-Paredes,
+V. Vineet, Z. Su, D. Du, C. Huang, and
+P. H. Torr, “Conditional random fields as
+recurrent neural networks,” in Proceedings
+of the IEEE International Conference on
+Computer Vision, 2015, pp. 1529–1537. [Online].
+Available: http://www.robots.ox.ac.uk/~szheng/
+papers/CRFasRNN.pdf
+GLOSSARY
+ACM active contour model. 6
+BOV bag-of-visual-words. 5
+CNN Convolution Neuronal Network. 5, 9
+CRF Conditional Random Field. 4, 8, 9, 11
+GPU graphics processing unit. 3
+HOG histogram of oriented gradients. 5, 6, 8
+ILSVRC ImageNet Large-Scale Visual Recognition
+Challenge. 9
+MAP Maximum A Posteriori. 8
+MR magnetic resonance. 2, 6
+MRF Markov Random Field. 4, 8
+PCA principal component analysis. 5
+RBF radial basis function. 8
+SIFT scale-invariant feature transform. 5
 SVM Support Vector Machine. 4, 6–8
-16
-APPENDIX A
-TABLES
-Database Image Resolution (width × height)
-Number
-of
-Images
-Number
-of
-Classes
-Channels Data source
-Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS]
-DIARETDB1 1500 px × 1500 px 89 4 3 [KKV+14]
-KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13]
-MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR]
-MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR]
-Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK+14]
-PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW+12]
-Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09]
-Table I: An overview over publicly available image databases with a semantic segmentation ground trouth.
+
+APPENDIX A
+TABLES
+Database Image Resolution (width × height)
+Number
+of
+Images
+Number
+of
+Classes
+Channels Data source
+Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS]
+DIARETDB1 1500 px × 1500 px 89 4 3 [KKV+14]
+KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13]
+MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR]
+MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR]
+Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK+14]
+PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW+12]
+Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09]
+Table I: An overview over publicly available image databases with a semantic segmentation ground trouth.
\ No newline at end of file
diff --git a/read/results/pdfium/1707.09725.txt b/read/results/pdfium/1707.09725.txt
index f499152..6c84dcd 100644
--- a/read/results/pdfium/1707.09725.txt
+++ b/read/results/pdfium/1707.09725.txt
@@ -1,4193 +1,4149 @@
-Analysis and Optimization of
-Convolutional Neural Network
-Architectures
-Master Thesis of
-Martin Thoma
-Department of Computer Science
-Institute for Anthropomatics
-and
-FZI Research Center for Information Technology
-Reviewer: Prof. Dr.–Ing. R. Dillmann
-Second reviewer: Prof. Dr.–Ing. J. M. Zöllner
-Advisor: Dipl.–Inform. Michael Weber
-Research Period: 03. May 2017 – 03. August 2017
-KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.edu
+Analysis and Optimization of
+Convolutional Neural Network
+Architectures
+Master Thesis of
+Martin Thoma
+Department of Computer Science
+Institute for Anthropomatics
+and
+FZI Research Center for Information Technology
+Reviewer: Prof. Dr.–Ing. R. Dillmann
+Second reviewer: Prof. Dr.–Ing. J. M. Zöllner
+Advisor: Dipl.–Inform. Michael Weber
+Research Period: 03. May 2017 – 03. August 2017
+KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.edu
 arXiv:1707.09725v1 [cs.CV] 31 Jul 2017
 
-Analysis and Optimization of Convolutional Neural
-Network Architectures
-by
-Martin Thoma
-Master Thesis
+Analysis and Optimization of Convolutional Neural
+Network Architectures
+by
+Martin Thoma
+Master Thesis
 August 2017
-Master Thesis, FZI
-Department of Computer Science, 2017
-Gutachter: Prof. Dr.–Ing. R. Dillmann, Prof. Dr.–Ing. J. M. Zöllner
-Abteilung Technisch Kognitive Assistenzsysteme
+Master Thesis, FZI
+Department of Computer Science, 2017
+Gutachter: Prof. Dr.–Ing. R. Dillmann, Prof. Dr.–Ing. J. M. Zöllner
+Abteilung Technisch Kognitive Assistenzsysteme
 FZI Research Center for Information Technology
-Affirmation
-Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfs￾mittel vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus
-Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde.
-Karlsruhe, Martin Thoma
-August 2017
-v
-
-Abstract
-Convolutional Neural Networks (CNNs) dominate various computer vision tasks since
-Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error
-from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many
-aspects of CNNs are examined in various publications, but literature about the analysis
-and construction of neural network architectures is rare. This work is one step to close this
-gap. A comprehensive overview over existing techniques for CNN analysis and topology
-construction is provided. A novel way to visualize classification errors with confusion
-matrices was developed. Based on this method, hierarchical classifiers are described and
-evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For
-example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation
-and test-time transformations on the accuracy. Other results, such as the positive impact of
-learned color transformation on the test accuracy could not be confirmed. A model which
-has only one million learned parameters for an input size of 32 × 32 × 3 and 100 classes and
-which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and
-STL-10 was developed.
-vii
-Zusammenfassung
-Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen
-Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese
-effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual
-recognition challenge Benchmark von 26.2 % auf 15.3 % drücken konnte. Viele Aspekte
-von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleich￾sweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen
-geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine
-umfassende Überblick über Analyseverfahren und Topologielernverfahren wird gegeben. Ein
-neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde
-entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt
-und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtun￾gen wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der
-Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die In￾varianzbildung durch künstliche Transformationen zur Test-Zeit (Test-time transformations)
-experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss
-gelernter Farbraumtransformationen konnten nicht bestätigt werden. Ein Modell welches
-weniger als eine Millionen Parameter nutzt und auf den Benchmark-Datensätzen Asirra,
+Affirmation
+Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfsmittel vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus
+Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde.
+Karlsruhe, Martin Thoma
+August 2017
+
+
+Abstract
+Convolutional Neural Networks (CNNs) dominate various computer vision tasks since
+Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error
+from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many
+aspects of CNNs are examined in various publications, but literature about the analysis
+and construction of neural network architectures is rare. This work is one step to close this
+gap. A comprehensive overview over existing techniques for CNN analysis and topology
+construction is provided. A novel way to visualize classification errors with confusion
+matrices was developed. Based on this method, hierarchical classifiers are described and
+evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For
+example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation
+and test-time transformations on the accuracy. Other results, such as the positive impact of
+learned color transformation on the test accuracy could not be confirmed. A model which
+has only one million learned parameters for an input size of 32 × 32 × 3 and 100 classes and
+which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and
+STL-10 was developed.
+
+Zusammenfassung
+Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen
+Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese
+effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual
+recognition challenge Benchmark von 26.2 % auf 15.3 % drücken konnte. Viele Aspekte
+von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleichsweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen
+geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine
+umfassende Überblick über Analyseverfahren und Topologielernverfahren wird gegeben. Ein
+neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde
+entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt
+und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtungen wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der
+Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die Invarianzbildung durch künstliche Transformationen zur Test-Zeit (Test-time transformations)
+experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss
+gelernter Farbraumtransformationen konnten nicht bestätigt werden. Ein Modell welches
+weniger als eine Millionen Parameter nutzt und auf den Benchmark-Datensätzen Asirra,
 GTSRB, HASYv2 und STL-10 den Stand der Technik neu definiert wurde entwickelt.
-Acknowledgment
-I would like to thank Stephan Gocht and Marvin Teichmann for the many inspiring
-conversations we had about various topics, including machine learning.
-I also want to thank my father for the support he gave me. He made it possible for me to
-study without having to worry about anything besides my studies. Thank you!
-Finally, I want to thank Timothy Gebhard, Daniel Schütz and Yang Zhang for proof-reading
-my masters thesis and Stephan Gocht for giving me access to a GTX 1070.
-ix
-This work can be cited the following way:
-@MastersThesis{Thoma:2017,
-Title = {Analysis and Optimization of Convolutional Neural Network
-Architectures},
-Author = {Martin Thoma},
-School = {Karlsruhe Institute of Technology},
-Year = {2017},
-Address = {Karlsruhe, Germany},
-Month = jun,
-Type = {Masters’s Thesis},
-Keywords = {machine learning; artificial neural networks;
-classification; supervised learning; CNNs},
-Url = {https://martin-thoma.com/msthesis/}
-}
-A DVD with a digital version of this master thesis and the source code as well as the used
+Acknowledgment
+I would like to thank Stephan Gocht and Marvin Teichmann for the many inspiring
+conversations we had about various topics, including machine learning.
+I also want to thank my father for the support he gave me. He made it possible for me to
+study without having to worry about anything besides my studies. Thank you!
+Finally, I want to thank Timothy Gebhard, Daniel Schütz and Yang Zhang for proof-reading
+my masters thesis and Stephan Gocht for giving me access to a GTX 1070.
+
+This work can be cited the following way:
+@MastersThesis{Thoma:2017,
+Title = {Analysis and Optimization of Convolutional Neural Network
+Architectures},
+Author = {Martin Thoma},
+School = {Karlsruhe Institute of Technology},
+Year = {2017},
+Address = {Karlsruhe, Germany},
+Month = jun,
+Type = {Masters’s Thesis},
+Keywords = {machine learning; artificial neural networks;
+classification; supervised learning; CNNs},
+Url = {https://martin-thoma.com/msthesis/}
+}
+A DVD with a digital version of this master thesis and the source code as well as the used
 data is part of this work.
-Contents
-1 Introduction 1
-2 Convolutional Neural Networks 3
-2.1 Linear Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
-2.2 CNN Layer Types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
-2.2.1 Convolutional Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . 5
-2.2.2 Pooling Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7
-2.2.3 Dropout . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
-2.2.4 Normalization Layers . . . . . . . . . . . . . . . . . . . . . . . . . . 9
-2.3 CNN Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
-2.3.1 Residual Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
-2.3.2 Aggregation Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . 12
-2.3.3 Dense Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
-2.4 Transition Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14
-2.5 Analysis Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15
-2.5.1 Qualitative Analysis by Example . . . . . . . . . . . . . . . . . . . . 15
-2.5.2 Confusion Matrices . . . . . . . . . . . . . . . . . . . . . . . . . . . 16
-2.5.3 Validation Curves: Accuracy, loss and other metrics . . . . . . . . . 16
-2.5.4 Learning Curves . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
-2.5.5 Input-feature based model explanations . . . . . . . . . . . . . . . . 21
-2.5.6 Argmax Method . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22
-2.5.7 Feature Map Reconstructions . . . . . . . . . . . . . . . . . . . . . . 22
-2.5.8 Filter comparison . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23
-2.5.9 Weight update tracking . . . . . . . . . . . . . . . . . . . . . . . . . 23
-2.6 Accuracy boosting techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 24
-3 Topology Learning 27
-3.1 Growing approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
-3.1.1 Cascade-Correlation . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
-3.1.2 Meiosis Networks . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
-3.1.3 Automatic Structure Optimization . . . . . . . . . . . . . . . . . . . . 29
-3.2 Pruning approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29
-3.3 Genetic approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30
-3.4 Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30
-xi
-3.5 Convolutional Neural Fabrics . . . . . . . . . . . . . . . . . . . . . . . . . . 31
-4 Hierarchical Classification 33
-4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34
-4.2 Clustering classes . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
-5 Experimental Evaluation 37
-5.1 Baseline Model and Training setup . . . . . . . . . . . . . . . . . . . . . . . 38
-5.1.1 Baseline Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . 40
-5.1.2 Weight distribution . . . . . . . . . . . . . . . . . . . . . . . . . . . . 41
-5.1.3 Training behavior . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 45
-5.2 Confusion Matrix Ordering . . . . . . . . . . . . . . . . . . . . . . . . . . . . 48
-5.3 Spectral Clustering vs CMO . . . . . . . . . . . . . . . . . . . . . . . . . . . 51
-5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53
-5.5 Increased width for faster learning . . . . . . . . . . . . . . . . . . . . . . . 54
-5.6 Weight updates . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 55
-5.7 Multiple narrow layers vs One wide layer . . . . . . . . . . . . . . . . . . . . 56
-5.8 Batch Normalization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 57
-5.9 Batch size . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59
-5.10 Bias . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59
-5.11 Learned Color Space Transformation . . . . . . . . . . . . . . . . . . . . . . 60
-5.12 Pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60
-5.13 Activation Functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60
-5.14 Label smoothing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64
-5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66
-5.16 Early Stopping vs More Data . . . . . . . . . . . . . . . . . . . . . . . . . . 68
-5.17 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 68
-6 Conclusion and Outlook 71
-A Figures, Tables and Algorithms 75
-B Hyperparameters 79
-B.1 Preprocessing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 79
-B.2 Data augmentation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80
-B.3 Initialization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81
-B.4 Objective function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81
-B.5 Optimization Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 82
-B.6 Network Design . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84
-B.7 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85
-C Calculating Network Characteristics 87
+Contents
+1 Introduction 1
+2 Convolutional Neural Networks 3
+2.1 Linear Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
+2.2 CNN Layer Types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
+2.2.1 Convolutional Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . 5
+2.2.2 Pooling Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7
+2.2.3 Dropout . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
+2.2.4 Normalization Layers . . . . . . . . . . . . . . . . . . . . . . . . . . 9
+2.3 CNN Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+2.3.1 Residual Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+2.3.2 Aggregation Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . 12
+2.3.3 Dense Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
+2.4 Transition Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+2.5 Analysis Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15
+2.5.1 Qualitative Analysis by Example . . . . . . . . . . . . . . . . . . . . 15
+2.5.2 Confusion Matrices . . . . . . . . . . . . . . . . . . . . . . . . . . . 16
+2.5.3 Validation Curves: Accuracy, loss and other metrics . . . . . . . . . 16
+2.5.4 Learning Curves . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
+2.5.5 Input-feature based model explanations . . . . . . . . . . . . . . . . 21
+2.5.6 Argmax Method . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22
+2.5.7 Feature Map Reconstructions . . . . . . . . . . . . . . . . . . . . . . 22
+2.5.8 Filter comparison . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23
+2.5.9 Weight update tracking . . . . . . . . . . . . . . . . . . . . . . . . . 23
+2.6 Accuracy boosting techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 24
+3 Topology Learning 27
+3.1 Growing approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
+3.1.1 Cascade-Correlation . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
+3.1.2 Meiosis Networks . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
+3.1.3 Automatic Structure Optimization . . . . . . . . . . . . . . . . . . . . 29
+3.2 Pruning approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29
+3.3 Genetic approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30
+3.4 Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30
+
+3.5 Convolutional Neural Fabrics . . . . . . . . . . . . . . . . . . . . . . . . . . 31
+4 Hierarchical Classification 33
+4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34
+4.2 Clustering classes . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
+5 Experimental Evaluation 37
+5.1 Baseline Model and Training setup . . . . . . . . . . . . . . . . . . . . . . . 38
+5.1.1 Baseline Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . 40
+5.1.2 Weight distribution . . . . . . . . . . . . . . . . . . . . . . . . . . . . 41
+5.1.3 Training behavior . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 45
+5.2 Confusion Matrix Ordering . . . . . . . . . . . . . . . . . . . . . . . . . . . . 48
+5.3 Spectral Clustering vs CMO . . . . . . . . . . . . . . . . . . . . . . . . . . . 51
+5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53
+5.5 Increased width for faster learning . . . . . . . . . . . . . . . . . . . . . . . 54
+5.6 Weight updates . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 55
+5.7 Multiple narrow layers vs One wide layer . . . . . . . . . . . . . . . . . . . . 56
+5.8 Batch Normalization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 57
+5.9 Batch size . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59
+5.10 Bias . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59
+5.11 Learned Color Space Transformation . . . . . . . . . . . . . . . . . . . . . . 60
+5.12 Pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60
+5.13 Activation Functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60
+5.14 Label smoothing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64
+5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66
+5.16 Early Stopping vs More Data . . . . . . . . . . . . . . . . . . . . . . . . . . 68
+5.17 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 68
+6 Conclusion and Outlook 71
+A Figures, Tables and Algorithms 75
+B Hyperparameters 79
+B.1 Preprocessing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 79
+B.2 Data augmentation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80
+B.3 Initialization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81
+B.4 Objective function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81
+B.5 Optimization Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 82
+B.6 Network Design . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84
+B.7 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85
+C Calculating Network Characteristics 87
 C.1 Parameter Numbers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
-C.2 FLOPs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
-C.3 Memory Footprint . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88
-D Common Architectures 89
-D.1 LeNet-5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
-D.2 AlexNet . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
-D.3 VGG-16 D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92
-D.4 GoogleNet, Inception v2 and v3 . . . . . . . . . . . . . . . . . . . . . . . . . 94
-D.5 Inception-v4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95
-E Datasets 97
-F List of Tables 99
-G List of Figures 101
-H Bibliography 103
+C.2 FLOPs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
+C.3 Memory Footprint . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88
+D Common Architectures 89
+D.1 LeNet-5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
+D.2 AlexNet . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+D.3 VGG-16 D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92
+D.4 GoogleNet, Inception v2 and v3 . . . . . . . . . . . . . . . . . . . . . . . . . 94
+D.5 Inception-v4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95
+E Datasets 97
+F List of Tables 99
+G List of Figures 101
+H Bibliography 103
 I Glossary 119
 
-1. Introduction
-Computer vision is the academic field which aims to gain a high-level understanding of the
-low-level information given by raw pixels from digital images.
-Robots, search engines, self-driving cars, surveillance agencies and many others have
-applications which include one of the following six problems in computer vision as sub￾problems:
-• Classification:
-1 The algorithm is given an image and k possible classes. The task is
-to decide which of the k classes the image belongs to. For example, an image from
-a self-driving cars on-board camera contains either paved road, unpaved road or
-no road: Which of those given three classes is in the image?
-• Localization: The algorithm is given an image and one class k. The task is to find
-bounding boxes for all instances of k.
-• Detection: Given an image and k classes, find bounding boxes for all instances of
-those classes.
-• Semantic Segmentation: Given an image and k classes, classify each pixel.
-• Instance segmentation: Given an image and k classes, classify each pixel as one of
-the k classes, but distinguish different instances of the classes.
-• Content-based Image Retrieval: Given an image x and n images in a database,
-find the top u images which are most similar to x.
-There are many techniques to approach those problems, but since AlexNet [KSH12] was
-published, all of those problems have high-quality solutions which make use of Convolutional
-Neural Networks (CNNs) [HZRS15a, LAE+16, RFB15, DHS16, SKP15].
-Today, most neural networks are constructed by rules of thumb and gut feeling. The
-architectures evolved and got deeper, more hyperparameters were added. Although there
-are methods for analyzing CNNs, those methods are not enough to determine all steps in
-the development of network architectures without gut feeling. A detailed introduction to
-CNNs as well as nine methods for analysis of CNNs is given in Chapter 2.
-1Classification is also called identification if the classes are humans. Another name is object recognition,
-although the classes can be humans and animals as well.
-1
-1. Introduction
-Despite the fact that most researchers and developers do not use topology learning, a couple
-of algorithms have been proposed for this task. Five classes of topology learning algorithms
-are introduced in Chapter 3.
-When datasets and the number of classes are large, evaluating a single idea how to improve
-the network can take several weeks just for the training. Hence the idea of building a
-hierarchy of classifiers which allows to split the classification task into various sub-tasks
-that can easily be combined is evaluated in Chapter 4.
-Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters
-and label smoothing are evaluated in Chapter 5.
-This work focuses on classification problems to keep the presented ideas as pure and
-simple as possible. The described techniques are relevant to all six described computer
-vision problems due to the fact that Encoder-Decoder architectures are one component of
-state-of-the-art algorithms for all six of them.
+. Introduction
+Computer vision is the academic field which aims to gain a high-level understanding of the
+low-level information given by raw pixels from digital images.
+Robots, search engines, self-driving cars, surveillance agencies and many others have
+applications which include one of the following six problems in computer vision as subproblems:
+• Classification:
+1 The algorithm is given an image and k possible classes. The task is
+to decide which of the k classes the image belongs to. For example, an image from
+a self-driving cars on-board camera contains either paved road, unpaved road or
+no road: Which of those given three classes is in the image?
+• Localization: The algorithm is given an image and one class k. The task is to find
+bounding boxes for all instances of k.
+• Detection: Given an image and k classes, find bounding boxes for all instances of
+those classes.
+• Semantic Segmentation: Given an image and k classes, classify each pixel.
+• Instance segmentation: Given an image and k classes, classify each pixel as one of
+the k classes, but distinguish different instances of the classes.
+• Content-based Image Retrieval: Given an image x and n images in a database,
+find the top u images which are most similar to x.
+There are many techniques to approach those problems, but since AlexNet [KSH12] was
+published, all of those problems have high-quality solutions which make use of Convolutional
+Neural Networks (CNNs) [HZRS15a, LAE+16, RFB15, DHS16, SKP15].
+Today, most neural networks are constructed by rules of thumb and gut feeling. The
+architectures evolved and got deeper, more hyperparameters were added. Although there
+are methods for analyzing CNNs, those methods are not enough to determine all steps in
+the development of network architectures without gut feeling. A detailed introduction to
+CNNs as well as nine methods for analysis of CNNs is given in Chapter 2.
+1Classification is also called identification if the classes are humans. Another name is object recognition,
+although the classes can be humans and animals as well.
+
+1. Introduction
+Despite the fact that most researchers and developers do not use topology learning, a couple
+of algorithms have been proposed for this task. Five classes of topology learning algorithms
+are introduced in Chapter 3.
+When datasets and the number of classes are large, evaluating a single idea how to improve
+the network can take several weeks just for the training. Hence the idea of building a
+hierarchy of classifiers which allows to split the classification task into various sub-tasks
+that can easily be combined is evaluated in Chapter 4.
+Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters
+and label smoothing are evaluated in Chapter 5.
+This work focuses on classification problems to keep the presented ideas as pure and
+simple as possible. The described techniques are relevant to all six described computer
+vision problems due to the fact that Encoder-Decoder architectures are one component of
+state-of-the-art algorithms for all six of them.
+
+2. Convolutional Neural Networks
+In the following, it is assumed that the reader knows what a multilayer perceptron (MLP)
+is and how they are designed for classification problems, what activation functions are and
+how gradient descent works. In case the reader needs a refresher on any of those topics, I
+recommend chapter 4.3 and 4.4 of [Tho14a] as well as [LBH15].
+This chapter introduces linear image filters in Section 2.1, then standard layer types of
+CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3,
+transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5.
+2.1. Linear Image Filters
+A linear image filter (also called a filter bank or a kernel) is an element F ∈ R
+kw×kh×d
+,
+where kw represents the filter’s width, kh the filter’s height and d the number of input
+channels. The filter F is convolved with the image I ∈ R
+w×h×d
+to produce a new image I
+0
+.
+The output image I
+0 has only one channel. Each pixel I0
+(x, y) of the output image gets
+calculated by point-wise multiplication of one filter element with one element of the original
+image I:
+I
+0
+(x, y) =
+b
+kw
+2X
+c
+ix=1−d kw
 2
-2. Convolutional Neural Networks
-In the following, it is assumed that the reader knows what a multilayer perceptron (MLP)
-is and how they are designed for classification problems, what activation functions are and
-how gradient descent works. In case the reader needs a refresher on any of those topics, I
-recommend chapter 4.3 and 4.4 of [Tho14a] as well as [LBH15].
-This chapter introduces linear image filters in Section 2.1, then standard layer types of
-CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3,
-transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5.
-2.1. Linear Image Filters
-A linear image filter (also called a filter bank or a kernel) is an element F ∈ R
-kw×kh×d
-,
-where kw represents the filter’s width, kh the filter’s height and d the number of input
-channels. The filter F is convolved with the image I ∈ R
-w×h×d
-to produce a new image I
-0
-.
-The output image I
-0 has only one channel. Each pixel I
-0
-(x, y) of the output image gets
-calculated by point-wise multiplication of one filter element with one element of the original
-image I:
-I
-0
-(x, y) =
-b
-kw
-2X
-c
-ix=1−d kw
-2
-e
-b
-kh
-2X
-c
-iy=1−d kh
-2
-e
-X
-d
-ic=1
-I(x + ix, y + iy, ic) · F(ix, iy, ic)
-This procedure is explained by Figure 2.1. It is essentially a discrete convolution.
-I ∈ R
-7×7
-Filter kernel
-F ∈ R
-3×3
-Result of point-wise
-multiplication
-I
-0 ∈ R
-7×7
-104
-116
-116
-112
-58
-47
-47
-109
-97
-114
-116
-105
-110
-45
-116
-104
-111
-109
-97
-46
-100
-101
-47
-109
-97
-115
-116
-101
-114
-47
-99
-97
-116
-99
-97
-116
-99
-97
-116
-46
-112
-104
-112
-63
-118
-61
-49
-46
-48
-9
--3
--1
--6
-5
-3
-2
--8
-0
-936
--333
--109
--282
-545
-291
-94
--792
-0
--4
--254
--498
--662
--849
--642
-187
--520
-45
-240
-211
-388
-215
--861
--340
-559
--105
-185
--138
--180
-503
--718
-429
-350
-173
-251
-268
--655
--567
--53
--75
-80
-571
--128
-24
--408
-596
--550
-368
-26
-976
-156
-302
-647
-879
-223
-811
-54
-660
-Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the
-output image, k
-2 multiplications and k
-2 additions of the products have to be calculated.
-3
-2. Convolutional Neural Networks
-One important detail is how boundaries are treated. There are four common ways of
-boundary treatment:
-• don’t compute: The image I
-0 will be smaller than the original image. I
-0 ∈
-R
-(w−kw+1)×(h−kh+1)×d3
-, to be exact.
-• zero padding: The image I is padded by zeros where the filter would access elements
-which do not exist. This will result in edges being detected at the border if the border
-pixels are not black, but doesn’t need any computation.
-• nearest: Repeat the pixel which is closest to the boundary.
-• reflect: Reflect the image at the boundaries.
-Common tasks that can be done with linear filters include edge detection, corner detection,
-smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples.
-Please note that the result of a filtering operation is again an image. This means filters
-can be applied successively. While each pixel after one filtering operation with a 3 × 3
-filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3
-filters increase the area of the original image which influenced the output. The output is
-then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is
-detected by a filter is called a feature. The bigger the receptive field is, the more complex
-can features get as they are able to consider more of the original image. Instead of taking
-one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters
-with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering
-operations compared to the two 3 × 3 filters, but the relevance of this technique will become
-clear in Section 2.2.
-2.2. CNN Layer Types
-While the idea behind deep MLPs is that feature hierarchies capture the important parts
-of the input more easily, CNNs are inspired by the idea of translational invariance: Many
-features in an image are translationally invariant. For example, if a car is developed, one
-could try to detect it by its parts [FGMR10]. But then there are many positions at which
-the wheels could be. Combining those, it is desirable to capture low-level, translationally
-invariant features at lower layers of an artificial neural network (ANN) and in higher layers
-high-level features which are combinations of the low-level features.
-Also, models should utilize the fact that the pixels of images are ordered. One way to use
-this is by learning image filters in so called convolutional layers.
-While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature
-map is a matrix m ∈ R
-w×h
-, but typically the width equals the height (w = h). For an RGB
-4
-2.2. CNN Layer Types
-input image, the number of feature maps is d = 3. Each color channel is a feature map.
-Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are
-state-of-the-art in various computer vision tasks.
-Traditional CNNs have three important building tools:
-• Convolutional layers with a non-linear activation function as described in Section 2.2.1,
-• pooling layers as described in Section 2.2.2 and
-• normalization layers as described in Section 2.2.4.
-2.2.1. Convolutional Layers
-Convolutional layers take several feature maps as input and produce n feature maps1 as
-output, where n is the number of filters in the convolution layer. The filter weights of
-the linear convolutions are the parameters which are adapted to the training data. The
-number n of filters as well as the filter’s size kw × kh are hyperparameters of convolutional
-layers. Sometimes, it is denoted as n@kw × kh. Although the filter depth is usually omitted
-in the notation, the filters are of dimension kw × kh × d
-(i−1), where d
-(i−1) is the number of
-feature maps of the input layer (i − 1).
-Another hyperparameter of convolution layers is the stride s ∈ N≥1 and the padding.
-Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the
-size of the feature maps doesn’t change.
-The hyperparameters of convolutional layers are
-• the number of filters n ∈ N≥1,
-• kw, kh ∈ N≥1 of the filter size kw × kh × d
-(i−1)
-,
-• the activation function of the layer (see Table B.3) and
-• the stride s ∈ N≥1
-Typical choices are n ∈ { 32, 64, 128 }, kw = kh = k ∈ { 1, 3, 5, 11 } such as in [KSH12,
-SZ14, SLJ+15], rectified linear unit (ReLU) activation and s = 1.
-The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+89].
-With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just
-like MLPs. In fact, every CNN has an equivalent MLP which computes the same function
-if only the flattened output is compared.
-1
-also called activation maps or channels
-5
-2. Convolutional Neural Networks
-This is easier to see when the filtering operation is denoted formally:
-o
-(i)
-(x) = b +
-X
-k
-j=1
-wij · xj with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1]
-o
-(x,y,z)
-(I) = b +
-b
-kw
-2X
-c
-ix=1−d kw
-2
-e
-b
-kh
-2X
-c
-iy=1−d kh
-2
-e
-X
-d
-ic=1
-Fz(ix, iy, ic) · I(x + ix, y + iy, ic) [2.2]
-with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d }
-One can see that most weights of the equivalent MLP are zero and many weights are
-equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters.
-The effect of fewer parameters is that less training data is necessary to get suitable
-estimations for those. This means a MLP which is able to compute the same functions as a
-CNN will likely have worse results on the same dataset, if a CNN architecture is suitable
-for the dataset.
-See Figure 2.2 for a visualization of the application of a convolutional layer.
-3 feature maps
-(e.g. RGB) n feature maps
-n filters of
-size k × k × 3
-width w
-width w
-height
-h
-height
-h
-neural
-network
-data
-apply
-. . .
-. . .
-. . .
-. . .
-. . .
-. . .
-Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride
-s = 1 to input data of size width × height with three channels.
-6
-2.2. CNN Layer Types
-A convolutional layer with n filters of size kw × kh and SAME padding after d
-(i−1) feature
-maps of size sx × sy has n · d
-(i−1)
-·(kw · kh) parameters if no bias is used. In contrast, a fully
-connected layer which produces the same output size and does not use a bias would have
-n · d
-(i−1)
-· (sx × sy)
-2 parameters. This means a convolutional layer has drastically fewer
-parameters. One the one hand, this means it can learn less complex decision boundaries. On
-the other hand, it means fewer parameters have to be learned and hence the optimization
-procedure needs fewer examples and the optimization objective is simpler.
-It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does
-learn a linear combination of the d input feature maps. This can be used for dimensionality
-reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps.
-Another insight recently got important: Every fully connected layer has an equivalent
-convolutional layer which has the same weights.2 This way, one can use the complete
-classification network as a very complex non-linear image filter which can be used for
-semantic segmentation.
-A fully connected layer with d ∈ N≥1 inputs and n ∈ N≥1 nodes can be interpreted as a
-convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1. This will
-produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs.
-When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize
-to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output,
-it is completely equivalent to a fully connected layer. However, the vectorization can be
-omitted if a convolution layer without padding and a filter size equal to the feature maps
-size is applied. This was used by [LSD15].
-2.2.2. Pooling Layers
-Pooling summarizes a p × p area of the input feature map. Just like convolutional layers,
-pooling can be used with a stride of s ∈ N>1. As s ≥ 2 is the usual choice, pooling layers
-are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as
-for AlexNet [KSH12] and VGG-16 [SZ14].
-The type of summary for the set of activations A varies between the functions listed
-in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling
-functions as introduced in [LGT16].
-2But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1
-7
-2. Convolutional Neural Networks
-Name Definition Used by
-Max pooling max { a ∈ A } [BPL10, KSH12]
-Average / mean pooling 1
-|A|
-P
-a∈A
-a LeNet-5 [LBBH98] and [KSlB+10]
-`2 pooling pP
-a∈A
-a
-2 [Le13]
-Stochastic pooling * [ZF13]
-Table 2.1.: Pooling types for a set A of activations a ∈ R.
-(*) For stochastic pooling, each of the p×p activation values ai
-in the pooling region gets
-picked with probability pi = P ai
-aj ∈A aj
-. This assumes the activations ai are non-negative.
-Pooling is applied for three reasons: To get local translational invariance, to get invariance
-against minor local changes and, most important, for data reduction to 1
-s
-2 th of the data by
-using strides of s > 1.
-See Figure 2.3 for a visualization of max pooling.
-7 9 3 5 9 4
-0 7 0 0 9 0
-5 0 9 3 7 5
-9 2 9 6 4 3
-2 × 2 max pooling
-9 5 9
-9 9 7
-2
-2
-Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with stride s = 2 and padding.
-Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If
-the input of the pooling layer are d
-(i−1) feature maps, the convolutional layer has to have
-d
-(i−1) filters of size p × p and stride s. The ith filter has the values
-
-
-1
-p
-2 . . .
-1
-p
-2
-.
-.
-.
-.
-.
-.
-.
-.
-.
-1
-p
-2 . . .
-1
-p
-2
-
-
-for the dimension i and the zero matrix
-
-
-0 . . . 0
-.
-.
-.
-.
-.
-.
-.
-.
-.
-0 . . . 0
-
-
-for all other dimensions i = 1, . . . , d(i−1)
-.
-8
-2.2. CNN Layer Types
-2.2.3. Dropout
-Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting
-the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is
-well-described in [SHK+14].
-A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of
-the same shape D ∈ { 0, 1 }
-s
-is sampled, where each element di
-is sampled independently
-from a Bernoulli distribution. The results are element-wise multiplied to calculate the
-output out of the Dropout layer:
-out = D  in with di ∼ B(1, p)
-where  is the Hadamard product
-(A  B)i,j := (A)i,j (B)i,j
-Hence every value of the input gets set to zero with a dropout probability of p. Typically,
-Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob￾ability than later layers. In order to keep the expected output at the same value, the
-output of a dropout layer is multiplied with 1
-1−p when dropout is enabled [Las17, tf-16b].
-At inference time, dropout is disabled.
-Dropout is usually only applied after fully connected layers, but not after convolutional
-layers as it usually increases the test error as pointed out in [GG16].
-Models which use Dropout can be interpreted as an ensemble of models with different
-numbers of neurons in each layer, but also with weight sharing.
-Conceptually similar are DropConnect and networks with stochastic depth. DropCon￾nect [WZZ+13] is a generalization of Dropout, which sets weights to zero in contrast to
-setting the output of a neuron to zero. Networks with stochastic depth as introduced
-in [HSL+16] dropout only complete layers. This can be done by having Residual networks
-which have one identity connection and one residual feature connection. Hence the residual
-features can be dropped out and the identity connection remains.
-2.2.4. Normalization Layers
-One problem when training deep neural networks is internal covariate shift: While the
-parameters of layers close to the output are adapted to some input produced by lower layers,
-those lower layers parameters are also adapted. This leads to the parameters in the upper
-layers being worse. A very low learning rate has to be chosen to adjust for the fact that the
-input features might drastically change over time.
-9
-2. Convolutional Neural Networks
-One way to approach this problem is by normalizing mini-batches as described in [IS15]. A
-Batch Normalization layer with d-dimensional input x = (x
-(1), . . . , x(d)
-) is first normalized
-point-wise to
-xˆ
-(k) =
-x
-(k) − x¯
-(k)
-p
-s
-0
-[x
-(k)
-]
-2 + ε
-with x¯
-(k) =
-1
-m
-Pm
-i=1 x
-(k)
-i
-being the sample mean and s
-0
-[x
-(k)
-]
-2 =
-1
-m
-Pm
-i=1(x
-(k)
-i − x¯
-(k)
-) the
-sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0
-being a small constant to prevent division by zero and x
-(k)
-i
-is the activation of neuron k for
-training sample i.
-Additionally, for each activation x
-(k)
-two parameters γ
-(k)
-, β(k) are introduced which scale
-and shift the feature:
-y
-(k) = γ
-(k)
-· xˆ
-(k) + β
-(k)
-In the case of fully connected layers, this is applied to the activation, before the non-linearity
-is applied. If it is applied after the activation, it harms the training in early stages. For
-convolution, only one γ and one β is learned per feature map.
-One important special case is γ
-(k) =
-p
-s
-0
-[x
-(k)
-]
-2 + ε and β
-(k) = x¯
-(k)
-, which would make the
-Batch Normalization layer an identity layer.
-During evaluation time,3
-the expected value and the variance are calculated once for the
-complete dataset. An unbiased estimate of the empirical variance is used.
-The question where Batch Normalization layers (BN) should be applied and for which
-reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the
-activation function. Considering this, the possible options for the order are:
-1. CONV / FC → BN → activation function → Dropout → . . .
-2. CONV / FC → activation function → BN → Dropout → . . .
-3. CONV / FC → activation function → Dropout → BN → . . .
-4. CONV / FC → Dropout → BN → activation function → . . .
-The authors of [IS15] suggest to use Batch Normalization before the activation function
-as in Items 1 and 4. Batch Normalization after the activation lead to better results in
-https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md
-Another normalization layer is Local Response Normalization as described in [KSH12],
-which includes `2 normalization as described in [WWQ13]. Those two normalization layers,
-however, are superseded by Batch Normalization.
-3
-also called inference time
-10
-2.3. CNN Blocks
-2.3. CNN Blocks
-This section describes more complex building blocks than simple layers. CNN blocks act
-similar to a layer, but they are themselves composed of layers.
-2.3.1. Residual Blocks
-Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They
-enabled the computer vision community to go from about 16 layers as in VGG 16-D (see
-Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets)
-as introduced in [HZRS15a] is to add an identity connection which skips two layers. This
-identity connection adds the feature maps onto the other feature maps and thus requires
-the output of the input layer of the residual block to be of the same dimension as last layer
-of the residual block.
-Formally, it can be described as follows. If xi are the feature maps after layer i and x0 is
-the input image, H is a non-linear transformation of feature maps, then
-y = H(x)
-describes a traditional CNN. Note that this could be multiple layers. A residual block as
-visualized in Figure 2.4 is described by
-y = H(x) + x
-In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if
-convi(xi) describes the application of the convolutional layer i to the input xi without the
-nonlinearity, then such a residual block is
-xi+2 = conv i+1(ReLU(conv i(xi))) + xi
-Figure 2.4.: ResNet module
-Image source: [HZRS15a]
-[HM16] provides some insights why deep residual networks are successful.
-11
-2. Convolutional Neural Networks
-2.3.2. Aggregation Blocks
-Two common ways to add more parameters to neural networks are increasing their depth
-by adding more layers or increasing their width by adding more neurons / filters. Inception
-blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as
-“ResNeXt block”: Increasing the cardinality C ∈ N≥1. By cardinality, the authors describe
-the concept of having C small convolutional networks with the same topology but different
-weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not
-combine aggregation blocks with residual blocks as the authors did.
-256-d in
-concatenate
-total 32
-groups
-. . .
-128-d out
-4 @ 1 × 1 × 256
-4 @ 3 × 3 × 4
-4 @ 1 × 1 × 256
-4 @ 3 × 3 × 4
-4 @ 1 × 1 × 256
-4 @ 3 × 3 × 4
-Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer
-convolutional network. The first layer receives 256 feature maps and applies four 1 × 1
-filters to it. The second layer applies four 3 × 3 filters. Although every group has
-the same topology, the learned weights are different. The outputs of the groups are
-concatenated.
-The hyperparameters of an aggregation block are:
-• The topology of the group members.
-• The cardinality C ∈ N≥1. Note that a cardinality of C = 1 is equivalent in every
-aspect to using the group network without an aggregation block.
-12
-2.3. CNN Blocks
-2.3.3. Dense Blocks
-Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The
-idea is to connect each convolutional layer directly to subsequent convolutional layers.
-Traditional CNNs with L layers and one input layer have L connections between layers,
-but dense blocks have L(L+1)
-2
-connections between layers. The input feature maps are
-concatenated in depth. According to the authors, this prevents features from being re￾learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16
-have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors
-used only on the order of 12 feature maps per layer.
-A dense block is visualized in Figure 2.6.
-256-d in
-k @ 3 × 3
-concatenate
-k @ 3 × 3
-concatenate
-256-d
-k-d
-(256 + k)-d
-k-d
-(256 + L · k)-d out
-Figure 2.6.: Dense block with L = 2 layers and a growth factor of k.
-Dense block have five hyperparameters:
-• The activation function being used. The authors use ReLU.
-• The size kw × kh of filters. The authors use kw = kh = 3.
-• The number of layers L, where L = 2 is a simple convolutional layer.
-• The number k of filters added per layer (called growth rate in the paper)
-It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps.
-13
-2. Convolutional Neural Networks
-2.4. Transition Layers
-Transition layers are used to overcome constraints imposed by resource limitations or
-architectural design choices. One constraint is the number of feature maps (see Appendix C.3
-for details). In order to reduce the number of feature maps while still keeping as much
-relevant information as possible in the network, a convolutional layer i with ki filters of
-the shape 1 × 1 × ki−1 is added. The number of filters ki directly controls the number of
-generated feature maps.
-In order to reduce the dimensionality (width and height) of the feature maps, one typically
-applies pooling.
-Global pooling is another type of transition layer. It applies pooling over the complete
-feature map size to shrink the input to a constant 1 × 1 feature map and hence allows one
-network to have different input sizes.
-14
-2.5. Analysis Techniques
-2.5. Analysis Techniques
-CNNs have dozens of hyperparameters and ways to tune them. Although there are
-automatic methods like random search [BB12], grid search [LBOM98], gradient-based
-hyperparameter optimization [MDA15] and Hyperband [LJD+16] some actions need a
-manual investigation to improve the model’s quality. For this reason, analysis techniques
-which guide developers and researchers to the important hyperparameters are necessary. In
-the following, nine diagnostic techniques are explained.
-A machine learning developer has the following choices to improve the model’s quality:
-(I1) Change the problem definition (e.g., the classes which are to be distinguished)
-(I2) Get more training data
-(I3) Clean the training data
-(I4) Change the preprocessing (see Appendix B.1)
-(I5) Augment the training data set (see Appendix B.2)
-(I6) Change the training setup (see Appendices B.3 to B.5)
-(I7) Change the model (see Appendices B.6 and B.7)
-The preprocessing is usually not changed in modern architectures. However, this still leaves
-six very different ways to improve the classifier. Changing the training setup and the model
-each have too many possible choices to explore them completely. Thus, techniques are
-necessary to guide the developer to changes which are most promising to improve the model.
-For all of the following methods, it is important to use only the training set and the
-validation set.
-2.5.1. Qualitative Analysis by Example
-The most basic analysis technique which should always be used is looking at examples
-which the network correctly predicted with a high certainty and what the classifier got
-wrong with a high certainty. Those examples can be arranged by applying t-SNE [MH08].
-One the one hand, this might reveal errors in the training data. Most of the time, training
-data is manually labeled by humans who make mistakes. If a model is fit to those errors,
-its quality decreases.
-On the other hand, this can show differences in the distribution of validation data which
-are not covered by the training set and thus indicate the need to collect more data.
-15
-2. Convolutional Neural Networks
-2.5.2. Confusion Matrices
-A confusion matrix is a matrix (c)ij ∈ N
-K×K
-≥0
-, where K ∈ N≥2 is the number of classes,
-which contains all correct and wrong classifications. The item cij is the number of times
-items of class i were classified as class j. This means the correct classification is on the
-diagonal cii and all wrong classifications are of the diagonal. The sum PK
-i=1
-PK
-j=1 cij is the
-total number of samples which were evaluated and
-P
-i=1 P
-cii
-K
-i=1
-PK
-j=1 cij
-is the accuracy.
-The sums r(i) = PK
-j=1 cij of each class i are worth being investigated as they show if the
-classes are skewed. If the number of samples of one class dominates the data set, then the
-classifier can get a high accuracy by simply always prediction the most common class. If
-the accuracy of the classifier is close to the a priory probability of the most common class,
-techniques to deal with skewed classes might help.
-An automatic criterion to check for this problem is
-accuracy ≤
-max({ r(i) | i = 1, . . . , k })
-Pk
-i=1 r(i)
-+ ε
-where ε is a small value to compensate the fact that some examples might be correct just
-by chance.
-Other values which should be checked are the class-wise sensitivities:
-s(k) = # correctly identified instances of class k
-# instances of class k
-=
-ckk
-r(k)
-∈ [0, 1]
-If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is
-necessary for s(i).
-The class-wise confusion
-fconfusability(k1, k2) = P
-ck1k2
-K
-j=1 ck1j
-indicates if class k1 gets often classified as class k2. The highest values here can indicate
-if two classes should be merged or a specialized model for separating those classes could
-improve the overall system.
-2.5.3. Validation Curves: Accuracy, loss and other metrics
-Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal
-axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are
-typical quality metrics. Other quality metrics can be found in [OHIL16].
-In case that the number of training epochs are used as the examined hyperparameter,
-validation curves give an indicator if training longer improves the model’s performance. By
-16
-2.5. Analysis Techniques
-plotting the error on the training set as well as the error on a validation set, one can also
-estimate if overfitting might become a problem. See Figure 2.7 for an example.
-10 20 30 40 50 60 70 80 90 100
-0.2
-0.4
-0.6
-0.8
-overfitting
-Epochs
-Error Training set
-Validation set
-Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs
-and the quality metric is the error (1 − accuracy). The longer the network is trained,
-the better it gets on the training set. At some point the network is fit too well to the
-training data and loses its capability to generalize. At this point the quality curve of
-the training set and the validation set diverge. While the classifier is still improving on
-the training set, it gets worse on the validation and the test set.
-When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the opti￾mization process did not improve for several epochs. Three possible ways to reduce the
-problem of plateaus are (i) to change weight initialization if the plateau was at the beginning,
-(ii) regularizing the model or (iii) changing the optimization algorithm.
-Loss functions
-The loss function (also called error function or cost function) is a function which assigns a
-real value to a complex event like the predicted class of a feature vector. It is used to define
-the objective function. For classification problems the loss function is typically cross-entropy
-with `1 or `2 regularization, as it was described in [NH92]:
-ECE(W) = −
-X
-x∈X
-X
-K
-k=1
-[t
-x
-k
-log(o
-x
-k
-) + (1 − t
-x
-k
-) log(1 − o
-x
-k
-)]
-| {z }
-cross-entropy data loss
-+ λ1 ·
-`1
-zX}| {
-w∈W
-|w| +λ2 ·
-`2
-zX}| {
-w∈W
-w
-2
-| {z }
-model complexity loss
-where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and
-t
-x
-k
-indicates if the training example x is of class k. o
-x
-k
-is the output of the classification
-algorithm which depends on the weights. λ1, λ2 ∈ [0, ∞) weights the regularization and is
-typically smaller than 0.1.
-17
-2. Convolutional Neural Networks
-Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange
-curve is smoothed, but the non-smoothed curve is also plotted in light orange.
-The data loss is positive whenever the classification is not correct, whereas the model
-complexity loss is higher for more complex models. The model complexity loss exists due
-to the intuition of Occam’s razor : If two models explain the same data with an accuracy of
-100 %, the simpler model is to be preferred.
-A reason to show the loss for the validation curve technique instead of other quality metrics
-is that it contains more information about the quality of the model. A reason against the
-loss is that it has no upper bound like the accuracy and can be hard to interpret. The
-loss only shows relative learning progress whereas the accuracy shows absolute progress to
-human readers.
-There are three observations in the loss validation curve which can help to improve the
-network:
-• If the loss does not decrease for several epochs, the learning rate might be too low.
-The optimization process might also be stuck in a local minimum.
-• Loss being NAN might be due to too high learning rates. Another reason is division
-by zero or taking the logarithm of zero. In both cases, adding a small constant like
-10−7 fixes the problem.
-• If the loss-epoch validation curve has a plateau at the beginning, the weight initializa￾tion might be bad.
-18
-2.5. Analysis Techniques
-Quality criteria
-There are several quality criteria for classification models. Most quality criteria are based
-the confusion matrix c which denotes at cij the number of times the real class was i and j
-was predicted. This means the diagonal contains the number of correct predictions. For
-the following, let ti =
-Pk
-j=1 cij be the number of training samples for class i. The most
-common quality criterion is accuracy:
-accuracy(c) =
-Pk
-i=1 cii
-Pk
-i=1 ti
-∈ [0, 1]
-One problem of accuracy as a quality criterion are skewed classes. If one class is by far
-more common than all other classes, then the simplest way to achieve a high score is to
-always classify everything as the most common class.
-In order to fix this problem, one can use the mean accuracy:
-mean-accuracy(c) = 1
-k
-·
-X
-k
-i=1
-cii
-ti
-∈ [0, 1]
-For two-class problems there are many other metrics like precision, recall and Fβ-score.
-Quality criteria for semantic segmentation are explained in [Tho16].
-Besides the quality of the classification result, several other quality criteria are important
-in practice:
-• Speed of evaluation for new images,
-• latency,
-• power consumption,
-• robustness against (non)random perturbations in the training data (see [SZS+13,
-PMW+15]),
-• robustness against (non)random perturbations in the training labels (see [NDRT13,
-XXE12]),
-• model size
-As reducing the floating point accuracy allows to process more data on a given device [Har15],
-analysis under this aspect is also highly relevant in some scenarios.
-However, the following focuses on the quality of the classification result.
-19
-2. Convolutional Neural Networks
-2.5.4. Learning Curves
-A learning curve is a plot where the horizontal axis displays the number of training samples
-given to the network and the vertical axis displays the error. Two curves are plotted: The
-error on the training set (of which the size is given by the horizontal axis) and the error on
-the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the
-validation set is an indicator if more training data without any other changes will improve
-the networks performance. Having the training set’s learning curve, it is possible to estimate
-if the capacity of the model to fit the data is high enough for the desired classification error.
-The error on the validation set should never be expected to be significantly lower than the
-error on the training set. If the error on the training set is too high, then more data will
-not help. Instead, the model or the training algorithm need to be adjusted.
-If the training set’s learning curve is significantly higher than the validation set’s learning
-curve, then removing features (e.g., by decreasing the images resolution), more training
-samples or more regularization will help.
-10 20 30 40 50 60 70 80 90 100
-0.2
-0.4
-0.6
-avoidable bias
-variance
-human-level error
-Training samples
-Error Validation set
-Training set
-Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given
-architecture will make to fit the given training data. At the same time, it is expected
-that the training data gets more similar to the true distribution of the data which
-should be captured by the test data. At some point, the error on the training and
-test set should be about the same. The term “avoidable bias” was coined by Andrew
-Ng [Ng16]. In some cases it is not possible to classify data correctly by the given
-features. If humans can classify the data given the features correctly, however, then
-the bias is avoidable by building a better classifier.
-The major drawback of this analysis technique is its computational intensity. In order to
-get one point on the training curve and one point on the testing curve, a complete training
-has to be executed. On the full data set, this can be several days on high-end computers.
-20
-2.5. Analysis Techniques
-2.5.5. Input-feature based model explanations
-Understanding which clues the model took to come to its prediction is crucial to check if
-the model actually learns what the developer thinks it learns. For example, a model which
-has to distinguish sled dogs from Chihuahuas might simply look at the background and
-check if there is snow. Depending on the training and test data, this works exceptionally
-well. However, it is not the desired solution.
-For classification problems in computer vision, there are two types of visualizations which
-help to diagnose such problems. Both color superpixels of the original image to convey
-information how the model used those superpixels:
-• Correct class heatmap: The probability of the correct class is encoded to give a
-heat map which superpixels are important for the correct class. This can also be done
-by setting the opacity accordingly.
-• Most-likely class image: Each of the most likely classes for all superpixels is
-represented by a color. The colored image thus gives clues why different predictions
-were assigned a high probability.
-Two methods to generate such images are explained in the following.
-Occlusion Sensitivity Analysis
-Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the
-image by something. This could be a gray square as in [ZF14] or a black superpixel as
-in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g.,
-superpixel or position of the square) and the regions are then colored to generate either a
-correct class heatmap of the most-likely class image. It is important to note that the color
-at region ri denotes the result if ri
-is occluded.
-Both visualizations are shown in Figure 2.10. One can see that the network makes sensible
-predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan
-Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation.
-Gradient-based approaches
-In [SVZ13], a gradient-based approach was used to generate image-specific class saliency
-maps. The authors describe the problem as a ranking problem, where each pixel of the
-image I0 is assigned a score Sc(I0) for a class c of interest. CNNs are non-linear functions,
-but they can be approximated by the first order Taylor expansion Sc(I) ≈ w
-T
-I + b where
-w is the derivative of Sc at I0.
-21
-2. Convolutional Neural Networks
-2.5.6. Argmax Method
-The argmax method has two variants:
-• Fixed class argmax: Propagate all elements of a given class through the network
-and analyze which neurons are activated most often / have the highest activation.
-• Fixed neuron argmax: Propagate the data through the network and find the n
-data elements which cause the highest activation for a given neuron.
-Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an
-image I is calculated by applying F to I and calculating the element-wise sum of the result.
-Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides
-showing the 9 images which caused the highest activation, they also trained a deconvolutional
-neural network to project the activation of the filter back into pixel space.
-The fixed neuron argmax can be used qualitatively to get an impression of the kind of
-features which are learned. This is useful to diagnose problems, for example in [AM15] it is
-described that the network recognized the class “dumbbell” only if a hand was present, too.
-Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters
-being shared between classes or how many parameters are mainly assigned to which classes.
-Going one step further from the fixed neuron argmax method is using an optimization
-algorithm to change an initial image minimally in such a way that any desired class gets
-predicted. This is called caricaturization in [MV16].
-2.5.7. Feature Map Reconstructions
-Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights
-into the learned features. This shows what the network emphasizes. However, it is not
-necessarily the case that the feature maps allow direct and easy conclusions about the
-learned features. This technique is called inversion in [MV16].
-A key idea of feature map visualizations is to reconstruct a layers input, given its activation.
-This makes it possible find which inputs would cause neurons to activate with extremely
-high or low values.
-More recent work like [NYC16] tries to make the reconstructions appearance look more
-natural.
-22
-2.5. Analysis Techniques
-2.5.8. Filter comparison
-One question which might lead to some insight is how robust the features are which
-are learned. If the same network is trained with the same data, but different weight
-initializations, the learned weights should still be comparable.
-If the set of learned filters changes with initialization, this might be an indicator for too
-little capacity of that layer. Hence adding more filters to that layer could improve the
-performance.
-Filters can be compared with the k-translation correlation as introduced in [ZCZL16]:
-ρk(Wi
-,Wj) = max
-(x,y)∈{−k,...,k}
-2\(0,0)
-hWi
-, T(Wj
-, x, y)if
-kWik2
-kWjk2
-∈ [−1, 1],
-where T(·, x, y) denotes the translation of the first operand by (x, y), with zero padding at
-the borders to keep the shape. h·, ·if denotes the flattened inner product, where the two
-operands are flattened into column vectors before applying the standard inner product. The
-closer the absolute value of the k-translation correlation to one, the more similar two filters
-Wi
-, Wj are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and
-VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found
-this by comparing the averaged maximum k-translational correlation of the networks with
-Gaussian-distributed initialized filters. The averaged maximum k-translational correlation
-is defined as
-ρ¯k(W) = 1
-N
-X
-N
-i=1
-N
-max
-j=1,j6=i
-ρk(Wi
-,Wj )
-where N is the number of filters in the layer W and Wi denotes the ith filter.
-2.5.9. Weight update tracking
-Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if
-the learning rate is well-chosen. He suggests that the weight update should be in the order
-of 10−3
-. If the weight update is too high, then the learning rate has to be decreased. If the
-weight update is too low, then the learning rate has to be increased.
-The order of the weight updates as well as possible implications highly depend on the model
-and the training algorithm. See Appendix B.5 for a short overview of training algorithms
-for neural networks.
-23
-2. Convolutional Neural Networks
-2.6. Accuracy boosting techniques
-There are techniques which can almost always be applied to improve accuracy of CNN
-classifiers:
-• Ensembles [CMS12]
-• Training-time augmentation (see Appendix B.2)
-• Test-time transformations [DDFK16, How13, HZRS15b]
-• Pre-training and fine-tuning [ZDGD14, GDDM14]
-One of the most simple ensemble techniques which was introduced in [CMS12] is averaging
-the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly
-the same training setup by reducing variance.
-Data augmentation techniques give the optimizer the possibility to take invariances like
-rotation into account by generating artificial training samples from real training samples.
-Data augmentation hence reduces bias and variance with no cost at inference time.
-Data augmentation at inference time reduces the variance of the classifier. Similar to using
-an ensemble, it increases the computational cost of inference.
-Pretraining the classifier on another dataset to obtain start from a good position or finetuning
-a model which was originally created for another task is also a common technique.
-24
-2.6. Accuracy boosting techniques
-Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images,
-where a gray square occluded a part of the image. This gray squares center (x, y) was
-moved over the complete image and the classifier was run on each of the occluded
-images. The probability of the correct class, depending on the gray squares position,
-is showed in the middle column. One can see that the predicted probability of the
-correct class “Pomeranian” drops if the face of the dog is occluded. The last image
-gives the class with the highest predicted probability. In the case of the Pomeranian,
-it always predicts the correct class if the head is visible. However, if the head of the
-dog is occluded, it predicts other classes.
-25
-2. Convolutional Neural Networks
-Figure 2.11.: Filter visualization from [ZF14]: The filters themselves as well as the input feature
-maps which caused the highest activation are displayed.
-26
-3. Topology Learning
-The topology of a neural network is crucial for the number of parameters, the number
-of floating point operations (FLOPs), the required memory, as well as the features being
-learned. The choice of the topology, however, is still mainly done by trial-and-error.
-This chapter introduces three general approaches to automatic topology learning: Growing a
-networks from a minimal network in Section 3.1, pruning in Section 3.2, genetic approaches
-in Section 3.3 and reinforcement learning approaches in Section 3.4.
-3.1. Growing approaches
-Growing approaches for topology learning start with a minimal network, which only has
-the necessary number of input nodes and the number of output nodes which are determined
-by the application and the features of the input. They then apply a criterion to insert new
-layers / neurons into the network.
-In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Opti￾mization are introduced.
-3.1.1. Cascade-Correlation
-Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which
-is similar to dense block described in Section 2.3.3.
-Cascade-Correlation works as follows:
-1. Initialization: The number of input nodes and the number of output nodes are
-defined by the problem. Create a minimal, fully connected network for those.
-2. Training: Train the network until the error no longer decreases.
-3. Candidate Generation: Generate candidate nodes. Each candidate node is con￾nected to all inputs. They are not connected to other candidate nodes and not
-connected to the output nodes.
-27
-3. Topology Learning
-4. Correlation Maximization: Train the weights of the candidates by maximizing S,
-the correlation between candidates output value V with the networks residual error:
-S =
-X
-o∈O
-
-
-
-
-
-
-X
-p∈T
-￾
-Vp − V¯
-
-(Ep,o − E¯
-o)
-
-
-
-
-
-
-where O is the set of output nodes, T is the training set, Vp is the candidate neurons
-activation for a training pattern p. Ep,o is the residual output error at node o for
-pattern p. V¯ and E¯
-o are averaged values over all elements of T. This step is finished
-when the correlation no longer increases.
-5. Candidate selection: Keep the candidate node with the highest correlation, freeze
-its incoming weights and add connections to the output nodes.
-6. Continue: If the error is higher than desired, continue with step 2.
-One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1.
-1
-Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray)
-to the left, three hidden nodes (green) in the middle and two output nodes in the upper
-right corner. The black squares represent frozen weights which are found by correlation
-maximization whereas the white squares are trainable weights.
-3.1.2. Meiosis Networks
-Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where
-weights are deterministic and fixed at prediction time, each weight wij in Meiosis networks
-follows a normal distribution:
-wij ∼ N (µij , σ2
-ij )
-2 
-3.2. Pruning approaches
-Hence every connection has two learned parameters: µij and σ
-2
-ij .
-The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell
-division. A node j is splitted, when the random part dominates the value of the sampled
-weights:
-P
-i
-P
-σij
-i µij
-> 1 and
-P
-k
-P
-σjk
-k µjk
-> 1
-The mean of the new nodes is sampled around the old mean, half the variance is assigned
-to the new connections.
-Hence Meiosis networks only change the number of neurons per layer. They do not add
-layers or add skip connections.
-3.1.3. Automatic Structure Optimization
-Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on￾line handwriting recognition. It makes use of the confusion matrix C = (cij ) ∈ N
-k×k
-≥0
-(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix
-S with sij = sj i = cij · cji. The maximum of S defines where the ASO algorithm adds
-more parameters. The details how the resources are added are not transferable to CNNs.
-3.2. Pruning approaches
-Pruning approaches start with a network which is bigger than necessary and prune it. The
-motivation to prune a network which has the desired accuracy is to save storage for easier
-model sharing, memory for easier deployment and FLOPs to reduce inference time and
-energy consumption. Especially for embedded systems, deployment is a challenge and low
-energy consumption is important.
-Pruning generally works as follows:
-1. Train a given network until a reasonable solution is obtained,
-2. prune weights according to a pruning criterion and
-3. retrain the pruned network.
-This procedure can be repeated.
-One family of pruning criterions uses the Hessian matrix. For example, Optimal Brain
-Damage (OBD) as introduced in [LDS+89]. For every single parameter k, OBD calculates
-the effect on the objective function of deleting k. The authors call the effect of the deletion
-29
-3. Topology Learning
-of parameter k the saliency sk. The parameters with the lowest saliency are deleted, which
-means they are set to 0 and are not updated anymore.
-A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights
-in a much better way. This requires, however, to calculate the inverse Hessian matrix
-H−1 ∈ R
-n×n where n ∈ N is typically n > 106
-.
-A much simpler and computationally cheaper pruning criterion is the weight magnitude.
-[HPTD15] prunes all weights w which are below a threshold θ:
-w ←
-
-
-
-w if w ≥ θ
-0 otherwise
-3.3. Genetic approaches
-The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which
-can recombine themselves via crossover and inversion. An introduction to such algorithms
-is given in [ES03].
-Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its
-successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10].
-The results, however, are of unacceptable quality: On MNIST (see Appendix E), where
-random chance gives 10 % accuracy, even simple topologies trained with SGD achieve
-about 92 % accuracy [TF-16a] and state of the art is 99.79 % [WZZ+13], the HyperNEAT
-algorithm achieves only 23.9 % accuracy [VH13].
-Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve 96.47 % accuracy
-on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer
-so that not more than three hidden layers could be trained. At the same time, VGG￾19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers
-in [HZRS15a].
-[LX17] shows that Genetic algorithms can achieve competitive results on MNIST and
-SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is
-at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves 29.03 % error on CIFAR-100,
-but the state of the art is 17.18 % [HLW16].
-3.4. Reinforcement Learning
-Reinforcement learning is a sub-field of machine learning, which focuses on the question
-how to choose actions that lead to high rewards.
-30
-3.5. Convolutional Neural Fabrics
-One can think of the search for good neural network topologies as a reinforcement learning
-problem. The agent is a recurrent neural network which can generate bitstrings. Those
-variable-length bitstrings encode neural network topologies.
-In 2016, this approach was applied to construct neural networks for computer vision.
-In [BGNR16], Q-learning with an ε-greedy exploration was applied.
-In [ZL16], the REINFORCE algorithm from [Wil92] was used to train state of the art models
-for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous
-amounts of computational resources were used to obtain those results.
-3.5. Convolutional Neural Fabrics
-Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions
-about topologies by learning an ensemble of different CNN architectures. The idea is to
-define a single architecture as a trellis through a 3D grid of nodes. Each node represents a
-convolutional layer. One dimension is the index of the layer, the other two dimensions are
-the amount of filters and the feature size. Each node is connected to nine other nodes and
-thus represents nine possible choices of convolutional layers:
-• Resolution: (i) convolution with stride=1 or (ii) convolution with stride=2 or
-(iii) deconvolution (doubling the resolution)
-• Channels: (i) half the number of filters than the layer before (ii) the same number
-of filters as the layer before (iii) double the number of filters than the layer before
-They always use ReLU as an activation function and they always use filters of size 3 × 3.
-They don’t use pooling at all.
-31
-3. Topology Learning
-32
-4. Hierarchical Classification
-Designing a classifier for a new dataset is hard for two main reasons: Many design choices are
-not clearly superior to others and evaluating one design choice takes much time. Especially
-CNNs are known to take several days [KSH12, SLJ+15] or even weeks [SZ14] to train.
-Additionally, some methods for analyzing a dataset become harder to use with more classes
-and more training samples. Examples are t-SNE, the manual inspection of errors and
-confusion matrices, and the argmax method.
-One idea to approach this problem is by building a hierarchy of classifiers. The root
-classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single
-classes. Figure 4.1 gives an example for an hierarchy of classifiers.
-Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle.
-The root classifier C0 has to distinguish six coarse classes (pedestrian, four+-wheelers,
-traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0 predicts a
-pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C0
-predicts traffic sign, then another classifier has to predict if it is a speed limit, a
-sign indicating danger or something else. If C0, however, predicts road, then no other
-classifier will become active.
-In this example, the problem has 17 classes. The hierarchical approach introduces
-7 clusters of classes and thus uses 8 classifiers.
-Such a hierarchy of classifiers needs clusters of classes.
-33
-4. Hierarchical Classification
-4.1. Advantages of classifier hierarchies
-Having a classifier hierarchy has five advantages:
-• Division of labor: Different teams can work together. Instead of having a monolithic
-task, the solutions can be combined.
-• Guarantees: Changing a classifier will only change the prediction of itself and its
-children. Siblings are not affected. In the example from Figure 4.1, the classifier
-which distinguishes traffic signs can be changed while the classification as pedestrian,
-four+-wheelers, traffic sign, street, other will not be affected. Also, the
-classification between speed limits, danger signs and other signs will not change.
-• Faster training: Except for the root classifier C0, each other classifier will have
-less than the total amount of training data. Depending on the combined classes, the
-models could also be simpler. Hence the training time is reduced.
-• Weighting of errors: In practice, some errors are more severe than others. For
-example, it could be acceptable if the two-wheelers classifier has an error rate of
-40 %. But it is not acceptable if the speed limit classifier has such a high error rate.
-• Post-hoc explanations: The simpler a model is, the easier it is to explain why a
-classification is made the way it is made.
-4.2. Clustering classes
-There are two ways to cluster classes: By similarity or by semantics. While semantic
-clustering needs either additional information or manual work, the similarity can be
-automatically inferred from the data. As pointed out in [XZY+14], semantically similar
-classes are often also visually similar. For example, in the ImageNet dataset most dogs
-are semantically and visually more similar to each other than to non-dogs. An example
-where this is obviously not the case are symbols: The summation symbol \sum is identical
-in appearance to the Greek letter \Sigma, but semantically much closer to the addition
-operator +.
-One approach to cluster classes by similarity is to train a classifier and examine its
-predictions. Each class is represented in the confusion matrix by one row. Those rows
-can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS+96],
-OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral
-clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of
-them do not allow a human to improve the found clustering manually.
-The confusion matrix (c)ij ∈ N
-k×k
-states how often class i was present and class j was
-34
-4.2. Clustering classes
-predicted. The more often this confusion happens, the more similar those two classes are to
-the classifier. Based on the confusion matrix, the classes can be clustered as explained in
-the following.
-[HAE16] indicates that more classes make it easier to generalize, but the accuracy gains
-diminish after a critical point of classes is reached. Hence a binary tree might not be a
-good choice. As an alternative, an approach which allows building arbitrary many clusters,
-is proposed.
-The proposed algorithm has two main ideas:
-• The order of columns and rows in the confusion matrix is arbitrary. This means one
-can swap rows and columns. If row i and j are swapped, then the columns i and j
-have to be swapped to in order to keep the same confusion matrix.
-• If two classes are confused often, then they are similar to the classifier.
-Hence the order of the classes is permutated in such a way that the highest errors are close
-to the diagonal. One possible objective function to be minimized is
-f(C) = Xn
-i=1
-Xn
-j=1
-Cij · |i − j| [4.1]
-which punishes errors linearly with the distance to the diagonal. This method is called CMO
-in the following.
-As pointed out by Tobias Ribizel (personal communication), this optimization problem
-is a weighted version of Optimal Linear Arrangement problem. That problem is NP￾complete [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however,
-produces reasonable clusterings as well as visually appealing confusion matrices. The
-algorithm works as follows: First, decide with probability 0.5 if only two random rows are
-swapped or a block is swapped. If two rows are swapped, choose both of them randomly.
-If a block is swapped, then choose the start randomly and the end of the block randomly
-after the start. The insert position has to be a valid position considering the block length,
-but besides that it is also chosen uniformly random.
-Simple row-swapping can exploit local improvements. For example, in the context of
-ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier
-and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters
-of dog breeds could be separated by car and bus due to random chance. Moving any single
-class increases the score, but moving either one of the dog breed clusters or the vehicle
-cluster decreases the score. Hence it is beneficial to implement block moving.
-One advantage of permutating the classes in order to minimize Equation (4.1) in comparison
-to spectral clustering as used in [XZY+14] is that the adjusted confusion matrix can be
-35
-4. Hierarchical Classification
-split into many much smaller matrices along the diagonal. In the case of many classes (e.g.,
-1000 classes of ImageNet or 369 classes of HASYv2) this permutation makes it possible to
-visualize the types of errors made. If the errors are systematic due to visual similarity, many
-confusions are not made and thus many elements of the confusion matrix are close to 0.
-Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1).
-Once a permutation of the classes is found which has a low score Equation (4.1), the clusters
-can either be made by hand by deciding why classes should not be in one clusters. With
-such a permutation, only n − 1 binary decisions have to be made and hence only the list of
-classes has to be read. Alternatively, one can calculate the confusions C
-0
-i,i+1 + C
-0
-i+1,i for
-each pair of classes which are neighbors in the confusion matrix. The higher this value, the
-more similar are the classes according to the classifier. Hence a threshold θ can be applied.
-θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold)
-or semi-automatically by asking the user for information if two classes belong to the same
-cluster. Such an approach only needs log(n) binary decisions from the user where n is the
-number of classes.
-Please note that CMO only works if the classifier is neither too bad nor too good. A classifier
-which does not solve the task at all might just give almost uniform predictions whereas the
-confusion matrix of an extremely good classifier is almost diagonal and thus contains no
-information about the similarity of classes. One possible solution to this problem is to take
-the prediction of the class in contrast to using only the argmax in order to find a useful
-permutation.
-36
-5. Experimental Evaluation
-All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+16]
-and cuDNN 5.1 [CWV+14] as the backend. The experiments were run on different machines
-with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce
-GTX 970 and GeForce 940MX.
-The GTSRB [SSSI12], SVHN [NWC+11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98],
-HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are
-used as their size is small enough to be trained within a day. Other classification datasets
-which were considered are listed in Appendix E.
-CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color
-images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer,
-dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 % [HLW16].
-According to [Kar11], human accuracy is at about 94 %.
-CIFAR-100 is a 100-class dataset of color images of the size 32 px × 32 px. Its 100 classes
-are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles
-and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain
-the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16].
-GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs.
-The 51 839 images are in color and of a minimum size of 25 px×25 px up to 266 px×232 px.
-The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11].
-According to [SSSI], human performance is at 98.84 %.
-HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images
-of the size 32 px × 32 px. The 369 classes contain the Latin and Greek letters, arrows,
-mathematical symbols. The state of the art achieves an accuracy of 82.00 % [Tho17a].
-STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px×96 px.
-Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state
-of the art achieves an accuracy of 74.80 % [ZMGL15]. It contains 100 000 unlabeled images
-for unsupervised training and 500 images per class for supervised training.
-SVHN (Street View House Numbers) exists in two formats. For the following experiments,
-the cropped digit format was used. It contains the 10 digits cropped from photos of Google
-Street View. The images are in color and of size 32 px × 32 px. The state of the art
-37
-5. Experimental Evaluation
-achieves an accuracy of 98.41 % [HLW16]. According to [NWC+11a], human performance
-is at 98.0 %.
-As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0, 1].
-For GTSRB, the training and test data was scaled to 32 px × 32 px.
-5.1. Baseline Model and Training setup
-The baseline model is trained with Adam [KB14], an initial learning rate of 10−4
-, a batch
-size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation
-depends on the dataset:
-• CIFAR-10, CIFAR-100 and STL-10: Random width and height shift by at most
-±3 pixels in either direction; Random horizontal flip.
-• GTSRB, MNIST: Random width and height shift by at most ±5 pixels in either
-direction; random rotation by at most ±15 degrees; random channel shift; random
-zoom in [0.5, 1.5]; random shear by at most 6 degrees.
-• HASYv2: Random width and height shift by at most ±5 pixels in either direction;
-random rotation by at most ±5 degree.
-• SVHN: No data augmentation.
-If the dataset does not define a training/test set, a stratified 67 % / 33 % split is applied. If
-the dataset does not define a validation set, the training set is split in a stratified manner
-into 90 % training set / 10 % test set.
-Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of
-10 epochs is applied. After this, the model is trained without data augmentation for at most
-1000 epochs with early stopping and the validation accuracy as a stopping criterion and a
-patience of 10 epochs. Kernel weights are initialized according to the uniform initialization
-scheme of He [HZRS15b] (see Appendix B.3).
-The architecture of the baseline model uses a pattern of
-Conv-Block(n) = (Convolution − Batch Normalization − Activation)
-n − Pooling
-The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for
-the last layer where softmax is used. Before the last two convolutional layer, a dropout
-layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1.
-Please note that the number of input- and output channels of the network depends on
-the dataset. If the input image is larger than 32 px × 32 px, for each power of two a
-Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to
-32 px × 32 px.
-38
-5.1. Baseline Model and Training setup
-# Type Filters @
-Patch size / stride
-Parameters FLOPs Output size
-Input 0 0 3 @ 32 × 32
-1 Convolution 32 @ 3 × 3 × 3 / 1 896 1 736 704 32 @ 32 × 32
-2 BN + ELU 64 163 904 32 @ 32 × 32
-3 Convolution 32 @ 3 × 3 × 32 / 1 9 248 18 841 600 32 @ 32 × 32
-4 BN + ELU 64 163 904 32 @ 32 × 32
-Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16
-5 Convolution 64 @ 3 × 3 × 32 / 1 18 496 9 420 800 64 @ 16 × 16
-6 BN + ELU 128 82 048 64 @ 16 × 16
-7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16
-8 BN + ELU 128 82 048 64 @ 16 × 16
-Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8
-9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8
-10 BN + ELU 128 20 608 64 @ 8 × 8
-Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4
-11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 800 1 048 064 512 @ 1 × 1
-12 BN + ELU 1 024 3 584 512 @ 1 × 1
-Dropout 0.5 0 0 512 @ 1 × 1
-13 Convolution 512 @ 1 × 1 × 512 / 1 262 656 523 776 512 @ 1 × 1
-14 BN + ELU 1 024 3 584 512 @ 1 × 1
-Dropout 0.5 0 0 512 @ 1 × 1
-15 Convolution k @ 1 × 1 × 512 / 1 k · (512 + 1) 1024 · k k @ 1 × 1
-Global avg Pooling 1 × 1 0 k k @ 1 × 1
-16 BN + Softmax 2k 7k k @ 1 × 1
-P 515k
-+892 512
-1032k
-+55 729 664 103 424+2k
-Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers
-use SAME padding, except for layer 11 which used VALID padding in order to decrease
-the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for
-each power of two there are two Convolution + BN + ELU blocks and one Max pooling
-block added. This is the framed part in the table.
-32 × 32
-Input
-C 32@3 × 3/1
-BN + ELU
-C 32@3 × 3/1
-BN + ELU
-16 × 16
-max pooling 2 × 2/2
-C 64@3 × 3/1
-BN + ELU
-C 64@3 × 3/1
-BN + ELU
-8 × 8
-max pooling 2 × 2/2
-C 64@3 × 3/1
-BN + ELU
-4 × 4
-max pooling 2 × 2/2
-C 512@4 × 4/1 (V)
-BN + ELU
-Dropout, p = 0.5
-1 × 1
-C 512@1 × 1/1
-BN + ELU
-Dropout, p = 0.5
-C k@1 × 1/1
-Global AVG pooling
-BN + Softmax
-Figure 5.1.: Architecture of the baseline model. C 32@3×3/1 is a convolutional layer with 32 filters
-of kernel size 3 × 3 with stride 1.
-39
-5. Experimental Evaluation
-5.1.1. Baseline Evaluation
-The results for the baseline model evaluated on eight datasets are given in Table 5.2. The
-speed for inference for different GPUs is given in Table 5.3.
-Dataset Single Model Accuracy Ensemble of 10
-Training Set Test Set Training Set Test Set
-Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 %
-CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 %
-CIFAR-100 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 %
-GTSRB 100.00 % σ = 0.00 99.18 % σ = 0.11 100.00 % 99.46 %
-HASYv2 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 %
-MNIST 99.93 % σ = 0.07 99.53 % σ = 0.06 99.99 % 99.58 %
-STL-10 94.12 % σ = 0.87 75.67 % σ = 0.34 96.35 % 77.62 %
-SVHN 99.02 % σ = 0.07 96.28 % σ = 0.10 99.42 % 97.20 %
-Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models
-used in the ensemble. The empirical standard deviation σ of the accuracy is also given.
-CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
-models uses unlabeled data or data from other datasets. For HASYv2 no test time
-transformations are used.
-Network GPU Tensorflow Inference per Training
-1 Image 128 images time / epoch
-Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s
-Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s
-Baseline Default GeForce 940MX 4 ms 120 ms 145.6 s
-Baseline Default GTX 970 6 ms 32 ms 25.0 s-26.3 s
-Baseline Default GTX 980 3 ms 24 ms 20.5 s-21.1 s
-Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22.1 s
-Baseline Default GTX 1070 2 ms 15 ms 14.4 s-14.5 s
-Baseline Default Titan Black 4 ms 25 ms 28.1 s-28.1 s
-Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24.4 s
-DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms —
-Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on
-six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Maj17].
-Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build
-makes use of SSE4.X, AVX, AVX2 and FMA instructions.
-40
-5.1. Baseline Model and Training setup
-5.1.2. Weight distribution
-The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution
-of bias weights by layer is shown in Figure 5.3. Although both figures only show the
-distribution for one specific model trained on CIFAR-100, the following observed patterns
-are consistent for 70 models (7 datasets and 10 models per dataset):
-• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the
-filter weights is almost symmetric around zero. The same is true for the bias weights.
-• The farther a layer is from the input away, the smaller the 99-percentile interval is,
-except for the last layer (see Table A.1).
-• The 99-percentile interval of the first layers filter weights is about [−0.5, +0.5], except
-for MNIST and HASYv2 where it is in [−0.8, 0.8].
-• The 99-percentile interval of the first layers bias weights is always in [−0.2, 0.2].
-• The distribution of filter weights of the last convolutional layer is not symmetric. In
-some cases the distribution is also not unimodal.
-• The bias weights of the last three layers are very close to zero. The absolute value of
-most of them is smaller than 10−2
-.
-Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of
-Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases,
-the Batch Normalization layer equals the identity and thus is only relevant for the training.
-While γ and β do not show as clear patterns as the filter and bias weights of convolutional
-layers, some observations are also consistent through all models even for different datasets:
-• γ of the last layer (layer 16) is bigger than 1.3.
-• The 99-percentile interval for β of the last layer is longer than the other 99-percentile
-intervals.
-• The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for
-all other models) is more negative then all other layers.
-Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional
-layer. The ranges are calculated for each channel and filter separately. The smaller the
-values are, the less information is lost if the filters are replaced by smaller filters.
-41
-5. Experimental Evaluation
-Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR￾100. The weights of the first layer are relatively evenly spread in the interval [−0.4, +0.4].
-With every layer the interval which contains 95 % of the weights and is centered around
-the mean becomes smaller, especially with layer 11 where the feature maps are of
-size 1 × 1. In contrast to the other layers, the last convolutional layer has a bimodal
-distribution.
-This plot indicates that the network might benefit from bigger filters in the first layer,
-whereas the filters in layers 7 – 11 could potentially be smaller.
-Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100.
-While the first layers biases are in [−0.1, +0.1], after each max-pooling layer the interval
-which contains 95 % of the weights and is centered around the mean becomes smaller.
-In the last three convolutional layer, most bias weights are in [−0.005, +0.005].
-42
-5.1. Baseline Model and Training setup
-Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a
-baseline model trained on CIFAR-100.
-Figure 5.5.: The distribution of the β parameter of Batch Normalization layers of a baseline model
-trained on CIFAR-100.
-43
-5. Experimental Evaluation
-Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For
-each filter, the range of values is recorded by channel. The smaller this range is, the
-less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter.
-44
-5.1. Baseline Model and Training setup
-5.1.3. Training behavior
-Due to early stopping, the number of epochs which a model was trained differ. The number
-of epochs trained with augmentation ranged from 133 epochs to 182 epochs with a standard
-deviation of 17.3 epochs for CIFAR-100.
-Figure 5.7 shows the worst and the best validation accuracy during the training with
-augmented data. Different initializations lead to very similar validation accuracies during
-training. The image might lead to the wrong conclusion that models which are better at
-the start are also better at the end. In order to check this hypothesis, the relative order of
-validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering
-stays approximately the same, then it can be considered to run the first few epochs many
-times and only train the best models to the end. For 10 models, there can be 102−10
-2 = 45
-pair-wise changes in the ordering at maximum if the relative order of validation accuracies
-is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred
-in average for each pair of epochs (i, i + 1). This means if one knows only the relative order
-of the validation accuracy of two models m and m0
-in epoch i, it is doubtful if one can
-make any statement about the ordering of m and m0
-in epoch i + 1.
-0
-10
-20
-30
-40
-50
-60
-70
-80
-90
-100
-110
-120
-130
-140
-0.2
-0.3
-0.4
-0.5
-0.6
-0.7
-epoch
-validation accuracy
-maximum validation accuracy
-minimum validation accuracy
-1.5
-2
-2.5
-3
-3.5
-4
-4.5
-loss
-maximum validation accuracy
-minimum validation accuracy
-mean loss
-Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The
-differences do not exceed 1 % and does not increase by training epoch. Four models
-stopped the first training stage at epoch 133 which causes the shift in the loss and the
-maximum validation accuracy.
-Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was
-expected that the absolute value of weight updates during epochs (sum, max, and mean)
-decrease in later training stages. The intuition was that weights need to be adjusted in a
-coarse way first. After that, the intuition was that only slight modifications are applied by
-45
-5. Experimental Evaluation
-the SGD based training algorithm (ADAM). The mean, max and sum of weight updates as
-displayed in Figures 5.8 to 5.10, however, do not show such a clear pattern. The biggest
-change happens as expected in the first epoch after the weights are initialized. The change
-from augmented training to non-augmented training was at epoch 156 to epoch 157
-It can be observed, that layers which receive more input feature maps get larger weight
-updates in mean. As layers which are closer to the output take more input feature maps,
-their weight updates are larger. This pattern does not occur when SGD is used as the
-optimizer.
-Figure 5.8.: Mean weight updates of the baseline model between epochs by layer.
-46
-5.1. Baseline Model and Training setup
-Figure 5.9.: Maximum weight updates of the baseline model between epochs by layer.
-Figure 5.10.: Sum of weight updates of the baseline model between epochs by layer.
-47
-5. Experimental Evaluation
-5.2. Confusion Matrix Ordering
-The visualization of the confusion matrix can give valuable information about which part
-of the task is hard. For more than about 10 classes, however, it becomes hard to visualize
-and read.
-For CIFAR-10, the proposed method groups the four object classes and the six animal
-classes together (see Figure 5.11a).
-(a) CIFAR-10 Test set (b) Random
-Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal
-elements are set to 0 in order to make other elements easier to see.
-Figure 5.11b shows a confusion matrix with random mistakes.
-The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test
-accuracy where a good permutation was found. Please note that this is not the best classifier.
-The confusion matrix which resulted from a baseline classifier with 99.32 % test accuracy is
-displayed in as the second image.
-Those results suggest that the ordering of classes is a valuable tool to make patterns easier
-to see. Humans, however, are good at finding patterns even if they come from random noise.
-Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy
-and 40 % uniformly random errors of a balanced dataset is created, optimized according to
-Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a.
-On the HASYv2 dataset the class-ordering is necessary to see anything as most possible
-confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the
-unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a
-maximum size of 50 × 50 are displayed, the ordered method can show only 8 matrices
-because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be
-displayed.
-48
-5.2. Confusion Matrix Ordering
-Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization
-to Equation (4.1). The diagonal elements are set to 0 in order to make other elements
-easier to see. The symbols next to the label on the vertical axis indicate the shape
-and the color of the signs.
-The second image shows the same, but with baseline model.
-Best viewed in electronic form. 49
-Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal
-elements are set to 0 in order to make other elements easier to see. The top image
-shows arbitrary class ordering, the bottom image shows the optimized ordering.
-5.3. Spectral Clustering vs CMO
-5.3. Spectral Clustering vs CMO
-This section evaluates the clustering quality of CMO in comparison to the clustering quality
-of spectral clustering.
-The evaluated model achieves 70.50 % training accuracy and 53.16 % test accuracy on
-CIFAR-100. Figure 5.14 shows the sorted confusion matrix.
-Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The
-diagonal elements are set to 0 in order to make other elements easier to see. Best
-viewed in electronic form.
-CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters
-which are to be found. The number of errors is determined by (i) Join all n clusters which
-contain the classes of the coarse class C to a set M. The error is n. (ii) Within M, find the
-set of classes M− which do not belong to C. (iii) The final error is n + |M−|. As can be
-seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has
-only half the error of spectral clustering.
-The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be
-noted that the number of clusters was determined by using the semi-automatic method
-based on CMO as described in Section 4.2.
-51
-5. Experimental Evaluation
-Cluster Spectral clustering Errors CMO Errors
-fish aquarium fish, orchid + flatfish
-+ ray, shark + trout, lion
-5 aquarium fish, orchid + flatfish
-+ ray + shark, trout
-4
-flowers orchid, aquarium fish + sun￾flower + poppy, tulip + rose,
-train
-5 orchid, aquarium fish + sun￾flower, poppy, tulip, rose
-2
-people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0
-reptiles crocodile, plain, road, table,
-wardrobe + dinosaur + lizard
-+ snake, worm + turtle
-9 crocodile, lizard, lobster, cater￾pillar + dinosaur + snake + tur￾tle, crab
-6
-trees maple, oak, pine + willow, forest
-+ palm
-3 palm, willow, pine, maple, oak 0
-Total 24 12
-Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by ,
-whereas clusters are separated by +.
-Cluster Spectral clustering Errors CMO Errors
-A A, A, A 0 A, A, A , Å 1
-B B, B 0 B, B 0
-C C, c, ⊂ and C , ξ, E and C 4 C, c, ⊂, C and C 1
-D D, D, D, . 1 D, D, D 0
-E E and E, ε 2 E and E, ε, , ∈ 4
-F F and F, F 1 F and F, F 1
-H H and H , κ and H 3 H and H, H 1
-K K, κ 0 K, κ 0
-L L, b and L, L 1 L, b and L, L 1
-M M and M and M 2 M and µ, M and M 3
-N N and N, N and N 2 N and N, N and N , ℵ 3
-O O, O, 0, ◦, °, # and o 1 O, O, 0, ◦, ° and # and o 2
-P P, P and p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2
-Q Q, Q, Q, ι, t, &, `, =, Æ, 1 7 Q and Q, Q 1
-R R, R and R, R, k and < 3 R and <, R, R, R 1
-S S, s, S 0 S, s, S 0
-T T, > and T , τ 1 T, > and T , τ 1
-U U, ∪ and u, U, A 1 U, u, U, A and ∪ 2
-V V , v, ∨ 0 V , v, ∨ 0
-W W, w, ω 0 W, w and ω 1
-X X, x, X , χ, × 0 X, x, X , χ, × 0
-Y Y and y 1 Y , y 0
-Z Z, z, Z and Z, Z 1 Z, z, Z, Z, Z 0
-Total 34 25
-Table 5.5.: Differences in spectral clustering and CMO.
-52
-5.4. Hierarchy of Classifiers
-5.4. Hierarchy of Classifiers
-In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root
-classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on
-the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better
-results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either
-be due to limited training data, overfitting or the small size of 32 px × 32 px of the data.
-The experiment also shows that most of the errors are due to not identifying the correct
-cluster. Hence, in this case, more work in improving the root classifier is necessary rather
-than improving the discrimination of classes within a cluster.
-Although the classes within a cluster capture most of the classifications, many misclassifica￾tions happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would
-push the accuracy in the full column only to 63.50 % due to errors of the root classifier
-where the root classifier does not predict the correct cluster.
-The leaf classifiers use the same topology as the root classifier. By initializing them with
-the root classifiers weights their performance can be pushed at about the inner accuracy.
-They are, however, only useful if their accuracy is well above the inner accuracy of the root
-classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful.
-Cluster Classes
-accuracy
-root classifier leaf classifier
-cluster identified class identified | cluster class identified | cluster
-1 3 69.67 % 84.27 % 72.98 %
-2 5 46.60 % 58.54 % 43.47 %
-3 2 58.50 % 92.13 % 83.46 %
-4 2 50.50 % 87.83 % 81.74 %
-5 3 44.67 % 79.29 % 71.01 %
-6 2 29.50 % 78.67 % 72.00 %
-7 2 52.50 % 92.11 % 87.72 %
-8 2 59.50 % 86.23 % 81.88 %
-9 2 59.00 % 90.08 % 87.79 %
-10 2 62.00 % 85.52 % 73.10 %
-11 2 67.00 % 87.01 % 75.32 %
-12 2 72.50 % 94.77 % 76.77 %
-13 2 64.00 % 82.58 % 86.27 %
-14 2 79.67 % 89.85 % 89.10 %
-Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on
-14 clusters of classes. Each class has 100 elements to test. The column cluster identified
-gives the percentage that the root classifiers argmax prediction is within the correct
-cluster, but not necessarily the correct class. The columns class identified | cluster only
-consider data points where the root classifier correctly identified the cluster.
-53
-5. Experimental Evaluation
-5.5. Increased width for faster learning
-More filters in one layer could simplify the optimization problem as each filter needs smaller
-updates. Hence a CNN N with ni filters in layer i is expected to take more epochs than a
-CNN N0 with 2 · ni filters in layer i to achieve the same validation accuracy.
-This hypothesis can be falsified by training a CNN N and a CNN N0 and comparing the
-trained number of epochs. As more filters can lead to different results depending on the
-layer where they are added, five models are trained. The details about those models are
-given in Table 5.7
-Name Layer Filter count Total
-Baseline New parameters
-m9 9 64 638 5 978 566
-m0
-9
-9 64 974 8 925 622
-m11 11 512 3786 5 982 698
-m0
-11 11 512 1024 1 731 980
-m13 13 512 8704 5 982 092
-Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer
-was increased.
-The detailed results are given in Table 5.8. As expected, the number of training epochs of
-the models with increased numbers of parameters is lower. The wall-clock time, however, is
-higher due to the increase in computation per forward- and backward-pass.
-For m9, m11 and m13, the filter weight range of the layer with increased capacity decreases
-compared to Figure 5.6, the filter weights of the layer with increased capacity are more
-concentrated around zero compared to Figure 5.2. For model m13, the distribution of
-weight of the output layer changed to a more bell-shaped distribution. Except for this, the
-distribution of filter weights in other layers did not change for all three models compared to
-the baseline.
-Model Parameters
-Accuracy Training
-Single Model Ensemble Mean Epochs Mean Time
-Mean std
-baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s
-m9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s
-m0
-9
-8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s
-m11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s
-m0
-11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s
-m13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s
-Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9, m11, m13
-as well as their accuracies.
-54
-5.6. Weight updates
-5.6. Weight updates
-Section 5.5 shows that wider networks learn faster. One hypothesis why this happens is
-that every single weight updates can be smaller to learn the same function. Thus the loss
-function is smoother and thus gradient descent based optimization algorithms lead to more
-consistent weight updates.
-Consequently, it is expected that layers with fewer filters have more erratic updates. If
-there are many filters, the weights of a filter which does not contribute much to the end
-results or is even harmful filter can gradually be set to zero, essentially removing one path
-in the network.
-In order to test the hypothesis, the baseline model was adjusted. The number of filters in
-layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean
-weight update of the layers 1, 3, 5, 7 and 9 have a far bigger range than the layers 11, 13 and
-15 after epoch 50. Compared to the baseline models mean updates (Figure 5.8, Page 46),
-the mean weight updates of layers 1 and 3 are higher, the range of the mean weight update
-from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher.
-For the maximum and the sum, no similar pattern could be observed (see Figures A.3
-and A.4).
-Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but
-with layer 5 reduced to 3 filters.
-55
-5. Experimental Evaluation
-5.7. Multiple narrow layers vs One wide layer
-On a given feature map size one can have an arbitrary number of convolutional layers with
-SAME padding and each layer can have an arbitrary number of filters. A convolutional layer
-with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called
-narrower and the number of filters in a convolutional layer is the layers width.
-If the number of parameters which may be used for the feature map scale is fixed and high
-enough, there are still many combinations. If ni with i = 0, . . . , k is the number of output
-feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without
-a bias, then the number of parameters is
-Parameters =
-X
-k
-i=1
-￾
-(ni−1 · 3
-2 + 1) · ni
-
-Hence the width of one layer does not only influence the parameters in this layer, but also
-in the next layer.
-The number of possible subsequent layers of one feature map size is enormous, even if
-constraints are placed on the number of parameters. For example, the first convolutional
-layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per
-layer are not desirable, one keeps all layers having a bias and all layers only use 3 × 3 filters,
-then the maximum depth is 10. If one furthermore assumes that at least 800 parameters
-should be used, there are still 120 possible layer combinations. As experimentally evaluating
-one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible
-to evaluate all layer combinations. In the following, a couple of changes to the network
-width / depth will be evaluated.
-Each layer expands the perceptive field. Hence deeper layer can use more of the input for
-every single output value. But deeper networks need more time for inference as the output
-of layer i has to be computed before the output of i + 1 can be computed. Hence there is
-less potential to parallelize computations. Each filter can be seen as a concept which can
-be learned. The deeper the filter is in the network, the higher is the abstraction level of the
-concept. In most cases, both is necessary: Many different concepts (width) and high-level
-concepts (depth).
-Reducing the two first convolutional layers of the baseline model (see Page 39) to one
-convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model
-has 944 012 parameters) resulted in a mean accuracy of 61.64 % (-1.74 %) and a standard
-deviation of σ = 1.12 (+0.57). The ensemble achieved 63.18 % (-1.52 %). As expected,
-the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of
-the baseline model to 15 s of the model with one less convolutional layer, one less Batch
-Normalization and one less activation layer. The inference time was also reduced from 6 ms
-5 
-5.8. Batch Normalization
-to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of
-more then one percentage point of the mean model and the increased standard deviation of
-the models performance, at least two convolutional layers are on the 32 px × 32 px feature
-map scale are recommendable for CIFAR-100.
-Changing the baseline to have less filters but more layers is another option. This was tried
-for the first block at the 32 px × 32 px feature map scale. The two convolutional layers
-(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one
-convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model
-has 944 132 parameters. Compared to the baseline model, the time for inference was the
-same. This is unexpected, because the inference time changed when a layer was removed at
-this scale. The mean test accuracy was 63.66 % (+0.28) and the standard deviation was
-σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21).
-Having two nonlinearities at each feature map scale could be important to learn nonlinear
-transformations at that scale. As the baseline model does only have one nonlinearity at the
-8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization
-and ELU was added. To keep the number of parameters constant, layer 11 of the baseline
-model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy
-of 63.09 % (-0.29) with a standard deviation of σ = 0.70 (+0.15). The ensemble achieves
-an accuracy of 64.39 % (+0.31). This could indicate that having two convolutional layers
-is more important for layers close to the input than intermediate layer. Alternatively, the
-parameters could be more important in layer 11 than having a new convolutional layer after
-layer 9.
-In order to control the hypothesis that having two convolutional layers are less important in
-the middle of a network, the second convolutional layer at the 16 × 16 feature map scale is
-removed. The first convolutional layer was increased from 32 filters to 59 filters, the second
-convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of
-parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test
-accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 %
-test accuracy (-0.66).
-Even more extreme, if both convolutional layers are removed from the 16 × 16 feature map
-scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51
-(-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important
-to have at least one convolutional layer at this feature map scale.
-5.8. Batch Normalization
-In [CUH15], the authors write that Batch Normalization does not improve ELU networks.
-Hence the effect of removing Batch Normalization from the baseline is investigated in this
-57
-5. Experimental Evaluation
-experiment.
-As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn
-are identical to the baseline model m, except that in mno-bn the Batch Normalization layers
-are removed.
-One notable difference is the training time: While m needs 21 ms per epoch in average on
-a GTX 980, mno-bn only needs 21 ms per epoch. The number of epochs used for training,
-however, also increased noticeably from 149 epochs to 178 epochs in average. The standard
-deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for mno-bn.
-The mean accuracy of mno-bn is 62.86 % and hence 0.52 percentage points worse. The
-standard deviation between models increased from 0.55 to 0.61. This is likely a result of the
-early stopping policy and the differences in training epochs. This can potentially be fixed
-by retraining the models which stopped earlier than the model which was trained for the
-biggest amount of epochs. The ensemble test accuracy is 63.88 % and hence 0.82 percentage
-points worse than the baseline.
-The filter weight range and distribution is approximately the same as Figure 5.6 and
-Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of
-the baseline are spread out in the first layer and much more concentrated in subsequent layers
-(see Figure 5.3), the model without Batch Normalization has rather concentrated weights
-in the first layers and only the bias weights of the last layer is spread out (see Figure A.2).
-Another model m0
-no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to
-compensate for the loss of parameters in Batch Normalization. The mean test accuracy of
-10 such models is 62.87 % which is 0.51 percentage points worse than the baseline. The
-ensemble of m0
-no-bn achieves 64.33 % which is 0.37 percentage points worse than the baseline.
-The mean training time was 14 s per epoch and 157.4 epochs with a standard deviation of
-20.7 epochs.
-Hence it is not advisable to remove Batch Normalization for the final model. It could,
-however, be possible to remove Batch Normalization for the experiments to iterate quicker
-through different ideas if the relative performance changes behave the same with or without
-Batch Normalization.
+e
+b
+kh
+2X
+c
+iy=1−d kh
+2
+e
+X
+d
+ic=1
+I(x + ix, y + iy, ic) · F(ix, iy, ic)
+This procedure is explained by Figure 2.1. It is essentially a discrete convolution.
+I ∈ R
+7×7
+Filter kernel
+F ∈ R
+3×3
+Result of point-wise
+multiplication
+I
+0 ∈ R7×7
+104
+116
+116
+112
 58
-5.9. Batch size
-5.9. Batch size
-The mini-batch size m ∈ N≥1 influences
-• Epochs until convergence: The smaller m, the more often the model is updated
-in one epoch. Those updates, however, are based on fewer samples of the dataset.
-Hence the gradients of different mini-batches can noticeably differ. In the literature,
-this is referred to as gradient noise [KMN+16].
-• Training time per epoch: The smaller the batch size, the higher the training time
-per epoch as the hardware is not optimally utilized.
-• Resulting model quality: The choice of the hyperparameter m influences the
-accuracy of the classifier when training is finished. [KMN+16] supports the view that
-smaller m result in less sharp minima. Hence smaller m lead to better generalization.
-Empiric evaluation results can be found in Table 5.9. Those results confirm the claim
-of [KMN+16] that lower batch sizes generalize better.
-m
-Training
-Epochs
-Mean total Single model Ensemble
-time training time Accuracy std Accuracy
-8 118 s
-epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 %
-16 62 s
-epoch 103 – 173 8349 s 64.16 % σ = 0.81 66.98 %
-32 35 s
-epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 %
-64 25 s
-epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 %
-128 18 s
-epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 %
-Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation)
-of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on
-CIFAR-100.
-5.10. Bias
-Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a
-model mno-bias is created which is identical to the baseline model m, except that the bias of
-layers 11, 13 and 15 is removed.
-The mean test accuracy of 10 trained mno-bias is 63.74 % which is an improvement of
-0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 %
-which is 0.43 percentage points better than the baseline. Hence the bias can safely be
-removed.
-Removing the biases did not have a noticeable effect on the filter weight range, the filter
-weight distribution or the distribution of the remaining biases. Also, the γ and β parameters
-of the Batch Normalization layers did not noticeably change.
-59
-5. Experimental Evaluation
-5.11. Learned Color Space Transformation
-In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1
-directly after the input and then another convolutional layer with 3 filters of size 1 × 1 acts
-as a learned transformation in another color space and boosts the accuracy.
-This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU ac￾tivation and 10 filters followed by another convolutional layer with ELU activation and
-3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37.
-The standard deviation is noticeable higher than the standard deviation of the baseline
-model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of
-the ensemble is at 64.77 % and hence 0.07 percentage points higher than the accuracy of
-the baseline models.
-The inference time for 1 image and for 128 images did not change compared to the baseline.
-The training time per epoch increased from 26 s to 30 s on the GTX 970.
-Hence it is not advisable to use the learned color space transformation.
-5.12. Pooling
-An alternative to max pooling with stride 2 with a 2 × 2 kernel is using a 3 × 3 kernel with
-stride 2.
-This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the
-3×3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 %
-(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test
-accuracy (+0.45).
-The training time per epoch decreased from 20.5 s-21.1 s to 18.6 s (mean of 10 training runs)
-on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch
-of 128 images.
-5.13. Activation Functions
-Nonlinear, differentiable activation functions are important for neural networks to allow them
-to learn nonlinear decision boundaries. One of the simplest and most widely used activation
-functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized
-rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+15] and softsign [BDLB09] have
-been proposed. The baseline uses ELU.
-60
-5.13. Activation Functions
-Activation functions differ in the range of values and the derivative. The definitions and
-other comparisons of eleven activation functions are given in Table B.3.
-Theoretical explanations why one activation function is preferable to another in some
-scenarios are the following:
-• Vanishing Gradient: Activation functions like tanh and the logistic function sat￾urate outside of the interval [−5, 5]. This means weight updates are very small for
-preceding neurons, which is especially a problem for very deep or recurrent networks as
-described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12].
-• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem.
-The gradient of the ReLU function is 0 for all non-positive values. This means if all
-elements of the training set lead to a negative input for one neuron at any point in the
-training process, this neuron does not get any update and hence does not participate
-in the training process. This problem is addressed in [MHN13].
-• Mean unit activation: Some publications like [CUH15, IS15] claim that mean
-unit activations close to 0 are desirable. They claim that this speeds up learning
-by reducing the bias shift effect. The speedup of learning is supported by many
-experiments. Hence the possibility of negative activations is desirable.
-Those considerations are listed in Table 5.10 for 11 activation functions. Besides the
-theoretical properties, empiric results are provided in Tables 5.11 and 5.12. The baseline
-network was adjusted so that every activation function except the one of the output layer
-was replaced by one of the 11 activation functions.
-As expected, PReLU and ELU performed best. Unexpected was that the logistic function,
-tanh and softplus performed worse than the identity and it is unclear why the pure-softmax
-network performed so much better than the logistic function. One hypothesis why the
-logistic function performs so bad is that it cannot produce negative outputs. Hence the
-logistic− function was developed:
-logistic−(x) = 1
-1 + e−x
-− 0.5
-The logistic− function has the same derivative as the logistic function and hence still suffers
-from the vanishing gradient problem. The network with the logistic− function achieves an
-accuracy which is 11.30 % better than the network with the logistic function, but is still
-5.54 % worse than the ELU.
-Similarly, ReLU was adjusted to have a negative output:
-ReLU−(x) = max(−1, x) = ReLU(x + 1) − 1
-The results of ReLU− are much worse on the training set, but perform similar on the test
-61
-5. Experimental Evaluation
-set. The result indicates that the possibility of hard zero and thus a sparse representation
-is either not important or similar important as the possibility to produce negative outputs.
-This contradicts [GBB11, SMGS14].
-A key difference between the logistic− function and ELU is that ELU does neither suffers
-from the vanishing gradient problem nor is its range of values bound. For this reason, the
-S2ReLU activation function, defined as
-S2ReLU(x) = ReLU(
-x
-2
-+ 1) − ReLU(−
-x
-2
-+ 1) =
-
-
-
-−
-x
-2 + 1 if x ≤ −2
-x if − 2 ≤ x ≤ 2
-x
-2 + 1 if x > −2
-This function is similar to SReLUs as introduced in [JXF+16]. The difference is that S2ReLU
-does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be
-the identity close to zero and have a smaller absolute value than the identity farther away.
-It is easy to compute and easy to implement.
-Those results — not only the absolute values, but also the relative comparison — might
-depend on the network architecture, the training algorithm, the initialization and the
-dataset. Results for MNIST can be found in Table 5.13 and for HASYv2 in Table A.2. For
-both datasets, the logistic function has a much shorter training time and a noticeably lower
-test accuracy.
-Function Vanishing Gradient Negative Activation possible Bound activation
-Identity No Yes No
-Logistic Yes No Yes
-Logistic− Yes Yes Yes
-Softmax Yes Yes Yes
-tanh Yes Yes Yes
-Softsign Yes Yes Yes
-ReLU Yes1 No Half-sided
-Softplus No No Half-sided
-S2ReLU No Yes No
-LReLU/PReLU No Yes No
-ELU No Yes No
-Table 5.10.: Properties of activation functions.
-1The dying ReLU problem is similar to the vanishing gradient problem.
-62
-5.13. Activation Functions
-Function
-Single model Ensemble of 10
-Training set Test set Training set Test set
-Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 %
-Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 %
-Logistic− 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 %
-Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 %
-Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 %
-Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 %
-ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 %
-ReLU− 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 %
-Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 %
-S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 %
-LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 %
-PReLU 80.01 % σ = 2.03 62.16 % σ = 0.73 83.50 % 64.79 %
-ELU 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 %
-Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation
-functions on CIFAR-100. For LReLU, α = 0.3 was chosen.
-Function
-Inference per Training
-Epochs
-Mean total
-1 Image 128 time training time
-Identity 8 ms 42 ms 31 s
-epoch 108 – 148 3629 s
-Logistic 6 ms 31 ms 24 s
-epoch 101 – 167 2234 s
-Logistic− 6 ms 31 ms 22 s
-epoch 133 – 255 3421 s
-Softmax 7 ms 37 ms 33 s
-epoch 127 – 248 5250 s
-Tanh 6 ms 31 ms 23 s
-epoch 125 – 211 3141 s
-Softsign 6 ms 31 ms 23 s
-epoch 122 – 205 3505 s
-ReLU 6 ms 31 ms 23 s
-epoch 118 – 192 3449 s
-Softplus 6 ms 31 ms 24 s
-epoch 101 – 165 2718 s
-S2ReLU 5 ms 32 ms 26 s
-epoch 108 – 209 3231 s
-LReLU 7 ms 34 ms 25 s
-epoch 109 – 198 3388 s
-PReLU 7 ms 34 ms 28 s
-epoch 131 – 215 3970 s
-ELU 6 ms 31 ms 23 s
-epoch 146 – 232 3692 s
-Table 5.12.: Training time and inference time of adjusted baseline models trained with different
-activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the
-identity is the fastest function. This result is likely an implementation specific problem
-of Keras 2.0.4 or Tensorflow 1.1.0.
-63
-5. Experimental Evaluation
-Function
-Single model Ensemble Epochs
-Accuracy std Accuracy Range Mean
-Identity 99.45 % σ = 0.09 99.63 % 55 – 77 62.2
-Logistic 97.27 % σ = 2.10 99.48 % 37 – 76 54.5
-Softmax 99.60 % σ = 0.03 99.63 % 44 – 73 55.6
-Tanh 99.40 % σ = 0.09 99.57 % 56 – 80 67.6
-Softsign 99.40 % σ = 0.08 99.57 % 72 – 101 84.0
-ReLU 99.62 % σ = 0.04 99.73 % 51 – 94 71.7
-Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9
-PReLU 99.57 % σ = 0.07 99.73 % 44 – 89 71.2
-ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5
-Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions
-on MNIST.
-5.14. Label smoothing
-Ensembles consisting of n models trained by the same procedure on the same data but
-initialized with different weights and trained with a different order of the training data
-perform consistently better than single models. One drawback of ensembles in applications
-such as self-driving cars is that they increase the computation by a factor of n. One idea
-why they improve the test accuracy is by reducing the variance.
-The idea of label smoothing is to use the ensemble prediction of the training data as labels
-for another classifier. For every element x of the training set, the one-hot encoded target
-t(x) is smoothed by the ensemble prediction yE(x)
-t
-0
-(x) = α · t(x) + (1 − α)yE(x)
-where α ∈ [0, 1] is the smoothing factor.
-There are three reasons why label smoothing could be beneficial:
-• Training speed: The ensemble prediction contains more information about the
-image than binary class decisions. Classifiers in computer vision predict how similar
-the input looks to other input of the classes they are trained on. By smoothing the
-labels, the information that one image could also belong to another class is passed to
-the optimizer. In early stages of the optimization this could lead to a lower loss on
-the non-smoothed validation set.
-• Higher accuracy: Using smoothed labels for the optimization could lead to a higher
-accuracy of the base-classifier due to a smoothed error surface. It might be less likely
-64
-5.14. Label smoothing
-that the classifier gets into bad local minima.
-• Label noise: Depending on the way how the labels are obtained, it might not always
-be clear which label is the correct one. Also, labeling errors can be present in training
-datasets. Those errors severely harm the training. By smoothing the labels errors
-could be relaxed.
-10 models msmooth are trained with the α = 0.5 smoothed labels from the prediction
-of an ensemble of 10 baseline models. The mean accuracy of the models trained on the
-smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72
-(+0.17 %). The ensemble of 10 msmooth models achieved 64.79 % accuracy (+0.09 %). Hence
-the effect of this kind of label smoothing on the final accuracy is questionable.
-The training speed didn’t noticeably change either: The number of trained epochs ranged
-from 144 to 205, the mean number of epochs was 177. The baseline training ranged from
-146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training
-methods accuracy differed by less than one percentage point. Hence it is unlikely that label
-smoothing has a positive effect on the training speed.
-Hinton et al. called this method distillation in [HVD15]. Hinton et al. used smooth and
-hard labels for training, this work only used smoothed labels.
-65
-5. Experimental Evaluation
-5.15. Optimized Classifier
-In comparison to the baseline classifier, the following changes are applied to the optimized
-classifier:
-• Remove the bias for the last layers: For all layers which output a 1 × 1 feature
-map, the bias is removed
-• Increase the max pooling kernel to 3 × 3
-• More filters in the first layers
-The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation
-is given in Table 5.15 and the timing comparison is given in Table 5.16.
-# Type Filters @
-Patch size / stride
-Parameters FLOPs Output size
-Input 0 0 3 @ 32 × 32
-1 Convolution 69 @ 3 × 3 × 3 / 1 1 932 3 744 768 69 @ 32 × 32
-2 BN + ELU 138 353 418 69 @ 32 × 32
-3 Convolution 69 @ 3 × 3 × 32 / 1 42 918 37 684 096 69 @ 32 × 32
-4 BN + ELU 138 353 418 69 @ 32 × 32
-Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16
-5 Convolution 64 @ 3 × 3 × 32 / 1 39 808 20 332 544 64 @ 16 × 16
-6 BN + ELU 128 82 048 64 @ 16 × 16
-7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16
-8 BN + ELU 128 82 048 64 @ 16 × 16
-Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8
-9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8
-10 BN + ELU 128 20 608 64 @ 8 × 8
-Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4
-11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 288 1 048 064 512 @ 1 × 1
-12 BN + ELU 1 024 3 584 512 @ 1 × 1
-Dropout 0.5 0 0 512 @ 1 × 1
-13 Convolution 512 @ 1 × 1 × 512 / 1 262 144 523 776 512 @ 1 × 1
-14 BN + ELU 1 024 3 584 512 @ 1 × 1
-Dropout 0.5 0 0 512 @ 1 × 1
-15 Convolution k @ 1 × 1 × 512 / 1 512 · k 512 · k k @ 1 × 1
-Global avg Pooling 1 × 1 0 k k @ 1 × 1
-16 BN + Softmax 2k 7k k @ 1 × 1
-P 514k
-+947 654
-520k
-+87 870 996 179 200+2k
-Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers
-use SAME padding, except for layer 11 which used VALID padding in order to decrease
-the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each
-power of two there are two Convolution + BN + ELU blocks and one Max pooling
-block added. This is the framed part in the table.
-66
-5.15. Optimized Classifier
-32 × 32
-Input
-C 69@3 × 3/1
-BN + ELU
-C 69@3 × 3/1
-BN + ELU
-16 × 16
-max pooling 3 × 3/2
-C 64@3 × 3/1
-BN + ELU
-C 64@3 × 3/1
-BN + ELU
-8 × 8
-max pooling 3 × 3/2
-C 64@3 × 3/1
-BN + ELU
-4 × 4
-max pooling 3 × 3/2
-C* 512@4 × 4/1 (V)
-BN + ELU
-Dropout, p = 0.5
-1 × 1
-C* 512@1 × 1/1
-BN + ELU
-Dropout, p = 0.5
-C* k@1 × 1/1
-Global AVG pooling
-BN + Softmax
-Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with
-32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used.
-Dataset Single Model Accuracy Ensemble of 10
-Training Set Test Set Training Set Test Set
-Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 %
-CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 %
-CIFAR-100 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 %
-GTSRB 100.00 % σ = 0.00 99.28 % σ = 0.10 100.00 % 99.51 %
-HASYv2 88.79 % σ = 0.45 85.36 % σ = 0.15 89.36 % 85.92 %
-MNIST 99.88 % σ = 0.10 99.48 % σ = 0.13 99.99 % 99.67 %
-STL-10 95.43 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 %
-SVHN 99.08 % σ = 0.07 96.37 % σ = 0.12 99.50 % 97.47 %
-Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models
-used in the ensemble. The empirical standard deviation σ of the accuracy is also given.
-CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
-models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN
-and HASY, no test time transformations are used.
-Network GPU Tensorflow Inference per Training
-1 Image 128 images time / epoch
-Optimized Default Intel i7-4930K 5 ms 432 ms 386 s
-Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s
-Optimized Default GeForce 940MX 4 ms 205 ms 192 s
-Optimized Default GTX 970 6 ms 41 ms 35 s
-Optimized Default GTX 980 3 ms 35 ms 27 s
-Optimized Default GTX 980 Ti 6 ms 36 ms 26 s
-Optimized Default GTX 1070 2 ms 24 ms 21 s
-Optimized Default Titan Black 4 ms 46 ms 43 s
-Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is
-evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken
-from [Maj17]. Weights the baseline model can be found at [Tho17b]. The optimized
-Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions.
-67
-5. Experimental Evaluation
-5.16. Early Stopping vs More Data
-A separate validation set is necessary for two reasons: (1) Early stopping and (2) preventing
-overfitting due to many experiments. To prevent overfitting, a different dataset can be used.
-For example, all decisions about hyperparameters in this thesis are based on CIFAR-100,
-but the network is finally trained and evaluated with the same hyperparameters on all
-datasets.2 The validation set can hence be removed if early stopping is removed. Instead,
-the validation data is used in a first run to determine the number of epochs necessary for
-training. In a second training run the validation data is added to the training set. The
-number of used epochs for the second run is given in Table 5.17.
-Dataset Mean epochs Train data classes average data / class
-Asirra 60 15 075 2 7538
-MNIST 41 54 000 10 5400
-SVHN 45 543 949 10 54 395
-CIFAR-10 84 45 000 10 4500
-HASYv2 92 136 116 369 369
-GTSRB 97 35 288 43 821
-STL-10 116 4500 10 450
-CIFAR-100 155 45 000 100 450
-Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total
-amount of used training data, the number of classes of the dataset and the average
-amount of data per class is given.
-Alternatively, the model can be trained with early stopping (ES) purely on the training
-loss. All three methods – early stopping on the validation set accuracy, early stopping on
-the training loss and training a fixed number of epochs are evaluated. While having more
-data helped with Asirra and CIFAR-100, the results as shown in Table 5.18 on the other
-datasets are only marginally different. For CIFAR-10, training with more data did not
-improve the results when the number of epochs is fixed, but notably improved the results
-when the training loss was used as the early stopping criterion.
-5.17. Regularization
-Stronger regularization might even improve the results when using the training loss as an
-early stopping criterion. `2 regularization with a weighting factor of λ = 0.0001 is used in
-all other experiments. While the accuracy as shown in Table 5.19 does not show a clear
-pattern, the number of epochs increases with lower model regularization (see Table 5.20).
-2Except data augmentation and test time transformations.
-3Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model.
-4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model.
-68
-5.17. Regularization
-Dataset Early Stopping Fixed epochs
-val. acc train loss
-Asirra 93.09 % 96.01 %3 96.01 %
-CIFAR-10 89.86 % 91.75 % 88.88 %
-CIFAR-100 67.03 % 71.01 % 69.08 %
-HASYv2 85.92 % 82.89 %4 85.05 %
-MNIST 99.67 % 99.64 % 99.57 %
-STL-10 78.66 % 83.25 % 78.64 %
-Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy
-compared training setups without a validation set and thus more training data. The
-second column uses the training loss as a stopping criterion, the third column uses a
-fixed number of epochs which is equal to the mean number of training epochs of the
-models with early stopping on the validation set accuracy.
-λ
-Single Model Accuracy Ensemble of 10
-Training Set Test Set Training Set Test Set
-λ = 0.01 73.83 % σ = 1.78 58.94 % σ = 1.33 87.78 % 69.98 %
-λ = 0.001 82.86 % σ = 0.89 63.03 % σ = 0.67 91.86 % 71.02 %
-λ = 0.0001 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 %
-Table 5.19.: Different choices of `2 model regularization applied to the optimized model.
-λ min max mean std
-λ = 0.01 457 503 404.6 37.2
-λ = 0.001 516 649 588.4 41.6
-λ = 0.0001 579 833 696.1 79.1
-Table 5.20.: Training time in epochs of models with early stopping on training loss by different
-choices of `2 model regularization applied to the optimized model.
-69
-5. Experimental Evaluation
-70
-6. Conclusion and Outlook
-This master thesis gave an extensive overview over the design patterns of CNNs in Chapter 2,
-the methods how CNNs can be analyzed and the principle directions of topology learning
-algorithms in Chapter 3.
-Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations
-of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering
-algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are:
-• Ordering the classes in the confusion matrix allows to display the relevant parts even
-for several hundred classes.
-• A hierarchy of classifiers based on the classes does not improve the results on CIFAR￾100. There are three possible reasons for this:
-– 32 px × 32 px is too low dimensional
-– 100 classes are not enough for this approach
-– More classes are always easier to distinguish if each new class comes with more
-data. One reason why this might be the case is that distinguishing the object
-from background has similar properties even for different classes.
-• Label smoothing had only a minor effect on the accuracy and no effect on the training
-time when a single base classifier was used to train with the smoothed labels by an
-ensemble of base classifiers.
-A baseline model was defined and evaluated on eight publicly available datasets. The
-baselines topology and training setup are described in detail as well as its behavior during
-training and properties of the weights of the trained model.
-The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100.
-The insights of those experiments are:
-• Averaging ensembles of 10 base classifiers of the same architecture and trained with the
-same setup consistently improve the accuracy. The amount of improvement depends
-on the base classifiers, but the ensemble tends to improve the test accuracy by about
-one percentage point.
-• Wider networks learn in fewer epochs. This, however, does not mean that the
-71
-6. Conclusion and Outlook
-wall-clock time is lower due to increased computation in forward- and backward
-passes.
-• Batch Normalization increases the training time noticeably. For the described ELU
-baseline model it also increases accuracy, which contradicts [CUH15].
-• The lower the batch size, the longer the time for each epoch of training and the less
-epochs need to be trained. Higher accuracy by lower batch sizes was empirically
-confirmed. The batch size, however, can also be too low.
-• An analysis of the weights of the baseline indicated that the bias of layers close to
-the output layer can be removed. This was experimentally confirmed.
-• It could not be confirmed that learned color space transformation, as described
-in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear
-unit (LReLU) and α = 0.3.
-• It could be confirmed that ELU networks gives better results than any other activation
-function on CIFAR-100. For the character datasets MNIST and HASYv2, however,
-ReLU, LReLU, PReLU, Softplus and ELU all performed similar.
-• Changing the activation functions to the identity had very little impact on the HASYv2
-and MNIST classifiers. Note that those networks are still able to learn nonlinear
-decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however,
-the accuracy drops by 6.64 % when ELU is replaced by the identity.
-Based on the results of those experiments, an optimized classifier was developed and
-evaluated on all eight datasets.
-The state of the art of STL-10 was improved from 74.80 % [ZMGL15] to 78.66 % without
-using the unlabeled part of the dataset. The state of the art of HASYv2 was improved
-from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from
-99.46 % [SL11] to 99.51 %, for Asirra it was improved from 82.7 % [Gol08] to 93.09 %.
-1
-This was mainly achieved by the combination of ELU, Dropout, ensembles, training data
-augmentation and test-time transformations. The removal of the bias of layers close to the
-output and re-usage of those parameters in layers close to the input as well as using 3 × 3
-pooling instead of 2 × 2 pooling improved the baseline.
-While writing this masters thesis, several related questions could not be answered:
-• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting
-is not a problem. But at which subsampling-level does having more layers have the
-biggest effect? Can this question be answered before a deeper network is trained?
-• Is label smoothing helpful for noisy labels?
-1The baseline is better than the optimized model on Asirra and on HASYv2.
-72
-• How does the choice of activation functions influence residual architectures? Could the
-results be the same for different activation functions in architectures with hundreds
-of layers?
-• The results for the pooling kernel were inconclusive. Larger pooling kernels might be
-advantageous as well as fractional max pooling [Gra15].
-• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that
-can and should be fixed?
-• Why is softmax so much better than the logistic function? Can the reason be used to
-further improve ELU?
-Besides those questions, the influence of optimizers on time per epoch, epochs until
-convergence, total training time, memory consumption, accuracy of the models and standard
-deviation of the models was not evaluated. This, and the stopping criterion for training
-might be crucial for the models quality.
-73
-74
-A. Figures, Tables and Algorithms
-(a) Original image (b) Smoothing filter (c) Laplace edge detection filter
-(d) Sobel edge detection filter (e) Prewitt edge detection filter (f) Canny filter
-Figure A.1.: Examples of image filters. Best viewed in electronic form.
-Layer 99-percentile interval
-filter bias
-1 [-0.50, 0.48] [-0.06, 0.07]
-3 [-0.21, 0.19] [-0.07, 0.07]
-5 [-0.20, 0.17] [-0.07, 0.05]
-7 [-0.15, 0.14] [-0.05, 0.06]
-9 [-0.14, 0.15] [-0.04, 0.03]
-11 [-0.08, 0.08] [-0.00, 0.00]
-13 [-0.08, 0.08] [-0.00, 0.00]
-15 [-0.10, 0.11] [-0.01, 0.01]
-Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model
-trained on CIFAR-100.
-75
-Figure A.2.: The distribution of bias weights of a model without batch normalization trained on
-CIFAR-100.
-Algorithm 1 Simulated Annealing for minimizing Equation (4.1).
-Require: C ∈ N
-n×n
-, steps ∈ N, T ∈ R
-+, c ∈ (0, 1)
-procedure SimulatedAnnealing(C, steps, T, c)
-bestScore ← accuracy(C)
-bestC ← C
-for i = 0; i < steps; i ← i + 1 do
-p ← randomFloat(0, 1)
-if p < 0.5 then . Swap rows
-i ← randomInteger(1, . . . , n)
-j ← randomInteger(1, . . . , n) \ { i }
-p ← randomUniform(0, 1)
-C
-0 ← swap(C, i, j)
-s ← accuracy(C
-0
-)
-if p < exp( s−bestScore
-T
-) then
-C ← C
-0
-if s > bestScore then
-bestScore ← s
-bestC ← C
-T ← T · c
-else . Move Block
-s ← randomInteger(1, . . . , n) . Block start
-e ← randomInteger(s, . . . , n) . Block end
-i ← randomInteger(1, . . . , n − (e − s)) . Block insert position
-Move Block (s, . . . , e) to position i
-return bestM
-76
-Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model,
-but with layer 5 reduced to 3 filters.
-Function
-Single model Ensemble of 10 Epochs
-Training set Test set Train Test Range Mean
-Identity 87.92 % σ = 0.40 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 114.5
-Logistic 81.46 % σ = 5.08 79.67 % σ = 4.85 86.38 % 84.60 % 58 – 91 77.3
-Softmax 88.19 % σ = 0.31 84.70 % σ = 0.15 88.69 % 85.43 % 124 – 171 145.8
-Tanh 88.41 % σ = 0.36 84.46 % σ = 0.27 89.24 % 85.45 % 89 – 123 108.7
-Softsign 88.00 % σ = 0.47 84.46 % σ = 0.23 88.77 % 85.33 % 77 – 119 104.1
-ReLU 88.93 % σ = 0.46 85.35 % σ = 0.21 89.35 % 85.95 % 96 – 132 102.8
-Softplus 88.42 % σ = 0.29 85.16 % σ = 0.15 88.90 % 85.73 % 108 – 143 121.0
-LReLU 88.61 % σ = 0.41 85.21 % σ = 0.05 89.07 % 85.83 % 87 – 117 104.5
-PReLU 89.62 % σ = 0.41 85.35 % σ = 0.17 90.10 % 86.01 % 85 – 111 100.5
-ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % 73 – 113 92.4
-Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on
-HASYv2. For LReLU, α = 0.3 was chosen.
-77
-Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but
-with layer 5 reduced to 3 filters.
-Function
-Single model Ensemble of 10 Epochs
-Training set Test set Train Test Range Mean
-Identity 87.49 % σ = 2.50 69.86 % σ = 1.41 89.78 % 71.90 % 51 – 65 53.4
-Logistic 45.32 % σ = 14.88 40.85 % σ = 12.56 51.06 % 45.49 % 38 – 93 74.6
-Softmax 87.90 % σ = 3.58 67.91 % σ = 2.32 91.51 % 70.96 % 108 – 150 127.5
-Tanh 85.38 % σ = 4.04 67.65 % σ = 2.01 90.47 % 71.29 % 48 – 92 65.2
-Softsign 88.57 % σ = 4.00 69.32 % σ = 1.68 93.04 % 72.40 % 55 – 117 83.2
-ReLU 94.35 % σ = 3.38 71.01 % σ = 1.63 98.20 % 74.85 % 52 – 98 75.5
-Softplus 83.03 % σ = 2.07 68.28 % σ = 1.74 93.04 % 75.99 % 56 – 89 68.9
-LReLU 93.83 % σ = 3.89 74.66 % σ = 2.11 97.56 % 78.08 % 52 – 120 80.1
-PReLU 95.53 % σ = 1.92 71.69 % σ = 1.37 98.17 % 74.69 % 59 – 101 78.8
-ELU 95.42 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % 66 – 72 67.2
-Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on
-STL-10. For LReLU, α = 0.3 was chosen.
-78
-B. Hyperparameters
-Hyperparameters are parameters of models which are not optimized automatically (e.g., by
-gradient descent), but by methods like random search [BB12], grid search [LBOM98] or
-manual search.
-B.1. Preprocessing
-Preprocessing used to be of major importance in machine learning. However, with the
-availability of data sets with hundreds of examples per class and the possibility of CNNs to
-learn features themselves, most models today rely on raw pixel values. The only common
-preprocessing is size normalization. In order to get a fixed input-size for a CNN, the
-following procedure can be used:
-• Take one or multiple crops of the image which have the desired aspect ratio.
-• Scale the crop(s) to the desired size.
-• In training, all crops can be used independently. In testing, all crops can be passed
-through the network and the output probability distributions can get fusioned, for
-example by averaging.
-Other preprocessing methods are:
-• Color space transformations (RGB, HSV, etc.)
-• Mean subtraction
-• Standardization of pixel-values to [0, 1] by dividing through 255 (used by [HLW16])
-• Dimensionality reduction
-– Principal component analysis (PCA): An unsupervised linear transformation
-which can be learned in the first hidden layer. It is hence doubtful if PCA
-improves the network.
-– Linear discriminant analysis (LDA)
-• Zero Components Analysis (ZCA) whitening (used by [KH09])
-79
-B.2. Data augmentation
-Data augmentation techniques aim at making artificially more data from real data items by
-applying invariances. For computer vision, they include:
-Name Augmentation Factor Used by
-Horizontal flip 2 [KSH12, WYS+15]
-Vertical flip 2 [DWD15]1
-Rotation ∼ 40 (δ = 20) [DSRB14]
-Scaling ∼ 14 (δ ∈ [0.7, 1.4]) [DSRB14]
-Crops 322 = 1024 [KSH12, WYS+15]
-Shearing [Gra15]
-GANs [BCW+17]
-Brightness ∼ 20 (δ ∈ [0.5, 1.5]) [How13]
-Hue 51 (δ = 0.1) [MRM15, DSRB14]
-Saturation ∼ 20 (δ = 0.5) [DSRB14]
-Contrast ∼ 20 (δ ∈ [0.5, 1.5]) [How13]
-Channel shift [KSH12]
-Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for
-typical situations. For example, the augmentation factor for random crops is calculated
-for 256 px × 256 px images which are cropped to 224 px × 224 px.
-Taking several scales if the original is of higher resolution than desired is another technique.
-Combinations of the techniques above can also be applied. Please note that the order of
-operations does matter in many cases and hence the order is another augmentation factor.
-Less common, but also reasonable are:
-• Adding noise
-• Elastic deformations
-• Color casting (used by [WYS+15])
-• Vignetting (used by [WYS+15])
-• Lens distortion (used by [WYS+15])
-1Vertical flipping combined with 180◦
-rotation is equivalent to horizontal flipping
-80
-B.3. Initialization
-Weight initializations are usually chosen to be small and centered around zero. One way to
-characterize many initialization schemes is by
-w ∼ α · U[−1, 1] + β · N (0, 1) + γ with α, β, γ ≥ 0
-Table B.2 shows six commonly used weight initialization schemes. Several schemes use the
-same idea, that unit-variance is desired for each layer as the training converges faster [IS15].
-Name α β γ Reference
-Constant α = 0 β = 0 γ ≥ 0 used by [ZF14]
-Xavier/Glorot uniform α =
-q 6
-nin+nout
-β = 0 γ = 0 [GB10]
-Xavier/Glorot normal α = 0 β =
-
-2
-(nin+nout)
-2
-γ = 0 [GB10]
-He α = 0 β =
-2
-nin
-γ = 0 [HZRS15b]
-Orthogonal — — γ = 0 [SMG13]
-LSUV — — γ = 0 [MM15]
-Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N (0, 1) + γ.
-nin, nout are the number of units in the previous layer and the next layer. Typically,
-biases are initialized with constant 0 and weights by one of the other schemes to prevent
-unit-coadaptation. However, dropout makes it possible to use constant initialization for
-all parameters.
-LSUV and Orthogonal initialization cannot be described with this simple pattern.
-B.4. Objective function
-For classification tasks, the cross-entropy
-ECE(W) = −
-X
-x∈X
-X
-K
-k=1
-[t
-x
-k
-log(o
-x
-k
-) + (1 − t
-x
-k
-) log(1 − o
-x
-k
-)]
-is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation,
-X is the set of training examples, K is the number of classes, t
-x
-k ∈ { 0, 1 } indicates if the
-training example x is of class k, o
-x
-k
-is the output of the classifier for the training example x
-and class k.
-However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added:
-• LASSO: `1 (e.g., used in [HPTD15])
-• Weight decay: `2 (e.g., λ = 0.0005 as in [MSM16])
-• Orthogonality regularization (|(WT
-· W − I)|, see [VTKP17])
-81
-B.5. Optimization Techniques
-Most relevant optimization techniques for CNNs are based on SGD, which updates the
-weights according to the rule
-wji ← wji + ∆wji with ∆wji = −η
-∂Ex
-∂wji
-where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate.
-A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically
-mini-batch sizes are |B| ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes
-lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes
-lead to longer training times due to computational overhead and to more training steps due
-to gradient noise.
-wji ← wji + ∆wji with ∆wji = −η
-∂EB
-∂wji
-Nine variations which adjust the learning rate during training are:
-• Momentum:
-w
-(t+1)
-ji ← w
-(t)
-ji + ∆w
-(t+1)
-ji with ∆w
-(t+1)
-ji = −η
-∂EB
-∂wji
-+ α∆w
-(t)
-ji
-with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16])
-• Adagrad [DHS11]
-• RProp and the mini-batch version RMSProp [TH12]
-• Adadelta [Zei12]
-• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)
-−c
-, where t ∈ N0 is the training step,
-a, c are constants.
-• Performance Scheduling [SHY+13]: Measure the error on the cross validation set and
-decrease the learning rate when the algorithms improvement is below a threshold.
-• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10− t
-k where t ∈ N0 is the
-training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps
-until the learning rate is decreased by 1
-10 th.
-• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential
-Decay Scheduling.
-• Adam and AdaMax [KB14]
-82
-• Nadam [Doz15]
-Some of those are explained in [Rud16].
-Other first-order gradient optimization methods are:
-• Quickprop [Fah88]
-• Nesterov Accellerated Momentum (NAG) [Nes83]
-• Conjugate Gradient method [Cha92]: Combines a line search for the step size with
-the gradients direction.
-Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS
-and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs.
-However, there are alternatives which do not use gradient information:
-• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02]
-• Simulated Annealing [vLA87]
-• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described
-on [Tho14b]
-There are also approaches which learn the optimization algorithm [ADG+16, LM16].
-83
-B.6. Network Design
-CNNs have the following hyperparameters:
-• Depth: The number of layers
-• Width: The number of filters per layer
-• Layer and block connectivity graph
-• Layer and block hyperparameters:
-– Activation Functions as shown in Table B.3
-– For more, see Sections 2.2 and 2.3.
-Name Function ϕ(x) Range of Values ϕ
-0
-(x) Used by
-Sign function†
-
-
-
-+1 if x ≥ 0
-−1 if x < 0
-{ −1, 1 } 0 [KS02]
-Heaviside
-step function†
-
-
-
-+1 if x > 0
-0 if x < 0
-{ 0, 1 } 0 [MP43]
-Logistic function 1
-1+e−x [0, 1] e
-x
-(e
-x+1)2 [DJ99]
-Tanh e
-x−e−x
-e
-x+e−x = tanh(x) [−1, 1] sech2
-(x) [LBBH98, Tho14a]
-ReLU† max(0, x) [0, +∞)
-
-
-
-1 if x > 0
-0 if x < 0
-[KSH12]
-LReLU†2
-(PReLU)
-ϕ(x) = max(αx, x) (−∞, +∞)
-
-
-
-1 if x > 0
-α if x < 0
-[MHN13, HZRS15b]
-Softplus log(e
-x + 1) (0, +∞)
-e
-x
-e
-x+1 [DBB+01, GBB11]
-ELU
-
-
-
-x if x > 0
-α(e
-x − 1) if x ≤ 0
-(−∞, +∞)
-
-
-
-1 if x > 0
-αex otherwise
-[CUH15]
-Softmax‡ o(x)j =
-e
-xj
-PK
-k=1 e
-xk
-[0, 1]K o(x)j ·
-PK
-k=1 e
-xk −e
-xj
-PK
-k=1 e
-xk
-[KSH12, Tho14a]
-Maxout‡ o(x) = maxx∈x x (−∞, +∞)
-
-
-
-1 if xi = max x
-0 otherwise
-[GWFM+13]
-Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0
-and functions marked with ‡ operate on all elements of a layer simultaneously. The
-hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other
-activation function like randomized leaky ReLUs exist [XWCL15], but are far less
-commonly used.
-Some functions are smoothed versions of others, like the logistic function for the
-Heaviside step function, tanh for the sign function, softplus for ReLU.
-Softmax is the standard activation function for the last layer of a classification network
-as it produces a probability distribution. See Figure B.1 for a plot of some of them.
-2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.
-84
-−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0
-−1.0
-−0.5
-0.5
-1.0
-1.5
-2.0
-x
-y
-ϕ1(x) = 1
-1+e−x
-ϕ2(x) = tanh(x)
-ϕ3(x) = max(0, x)
-ϕ4(x) = log(e
-x + 1)
-ϕ5(x) = max(x, ex − 1)
-Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative
-numbers. The image of ELU, ReLU and Softplus is not bound on the positive side,
-whereas tanh and the logistic function are always below 1.
-B.7. Regularization
-Regularization techniques aim to make the fitted function smoother and reduce overfitting.
-Regularization techniques are:
-• `1, `2, and Orthogonality regularization: See Appendix B.4
-• Max-norm regularization (e.g. used ins [SHK+14])
-• Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth
-(see [HSL+16])
-• Feature scale clipping (see [ZF14])
-• Data augmentation (according to [ZBH+16])
-• Global average pooling (according to [ZKL+15])
-• Dense-Sparse-Dense training (see [HPN+16])
-• Soft targets (see [HVD15])
-85
-86
-C. Calculating Network Characteristics
-C.1. Parameter Numbers
-• A fully connected layer with n nodes, k inputs has n · (k + 1) parameters. The +1 is
-due to the bias.
-• A convolutional layer i with ki filters of size n × m being applied to ki−1 feature maps
-has ki
-· ki−1(n · m + 1) parameters. The +1 is due to the bias.
-• A fully connected layer with n nodes after k feature maps of size m1 × m2 has
-n · (k · m1 · m2 + 1) parameters.
-• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 3
-2 +
-3
-2
-· n
-2 PL
-i=0(L − i) = L + 9n + 9n
-2 L2−L
-2
-parameters.
-According to [HPTD15], AlexNet has 60 million parameters which is roughly the number
-calculated in Table D.2.
-C.2. FLOPs
-The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence
-the following number are only giving rough estimates.
-In the following, nϕ denotes the number of FLOPs to compute the non-linearity ϕ. For
-simplicity, nϕ = 5 was chosen.
-• A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with
-W ∈ R
-n×k
-, x ∈ R
-k×1
-, b ∈ R
-n×1
-. It hence needs about n · (k + (k − 1) + 1) = 2nk
-additions / multiplications before the non-linearity ϕ is calculated. The total number
-of FLOPs is 2 · n · k + n · nϕ.
-• In the following, biases are ignored. A convolutional layer with ki filters of size n × m
-being applied to ki−1 filter maps of size w × h results in ki filter maps of size w × h if
-padding is applied. For each element of each filter map, n·m·ki−1 multiplications and
-(n · m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1)·(ki
-· w · h)
-operations. The total number of FLOPs is (2 ·n·m·ki−1 −1)·(ki
-·w ·h)+ki
-·w ·h·nϕ.
-This is, of course, a naive way of calculating a convolution. There are other ways of
-calculating convolutions [LG16].
-87
-• A fully connected layer with n nodes after k feature maps of size w×h needs 2n(k·w·h)
-FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ.
-• As Dropout is only calculated during training, the number of FLOPs was set to 0.
-• The number of FLOPs for max pooling is dominated by the number of positions to
-which the pooling kernel is applied. For a feature map of size w × h a max pooling
-filter with stride s gets applied w·h
-s
-2 . The number of FLOPs per application depends
-on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs.
-• The number of FLOPs for Batch Normalization is the same as the number of its
-parameters.
-Here are some references which give information for the FLOPs:
-• AlexNet
-– 1.5B in total [HPTD15].
-– 725M in total [KPY+15].
-– 3300M in total in Table D.2
-• VGG-16:
-– 15484M in total [HPTD15].
-– 31000M in total in Table D.3.
-• GoogleNet: 1566M in total [HPTD15].
-One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same
-network.
-C.3. Memory Footprint
-The memory footprint of CNNs determines when networks can be used at all and if they
-can be trained efficiently. In order to be able to train CNNs efficiently, one weight update
-step has to fit in the memory of the GPU. This includes the following:
-• Activations: All activations of one mini-batch in order to calculate the gradients
-in the backward pass. This is the number of floats in the feature maps of all weight
-layers combined.
-• Weights
-• Optimization algorithm: The optimization algorithm introduces some overhead.
-For example, Adam stores two parameters per weights.
-At inference time, every two consecutive layers have to fit into memory. When the forward
-pass of layer A to layer B is calculated, the memory can be freed if no skip connections are
-used.
-88
-D. Common Architectures
-In the following, some of the most important CNN architectures are explained. Understand￾ing the development of these architectures helps understanding critical insights the machine
-learning community got in the past years for convolutional networks for image recognition.
-It starts with LeNet-5 from 1998, continues with AlexNet from 2012, VGG-16 D from
-2014, the Inception modules v1 to v3 as well as ResNets in 2015. The recently developed
-Inception-v4 is also covered.
-The summation row gives the sum of all floats for the output size column. This allows
-conclusions about the maximum mini-batch size which can be in memory for training.
-89
-D.1. LeNet-5
-One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common
-pattern of a single convolutional layer with tanh as a non-linear activation function followed
-by a pooling layer and three fully connected layers. One fully connected layer is used to
-get the right output dimension, another one is necessary to allow the network to learn a
-non-linear combination of the features of the feature maps.
-Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test
-error rate of 0.8 % on MNIST.
-Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98].
-# Type Filters @
-Patch size / stride
-Parameters FLOPs Output size
-Input 0 0 1 @ 32 × 32
-1 Convolution 6 @ 5 × 5 × 1 / 1 156 307 800 6 @ 28 × 28
-2 Scaled average pooling 2 × 2 / 2 2 336 6 @ 14 × 14
-3 Convolution 16 @ 5 × 5 × 6 / 1 2 416 942 400 16 @ 10 × 10
-4 Scaled average pooling 2 × 2 / 2 2 1 600 16 @ 5 × 5
-5 Fully Connected 120 neurons 48 120 240 000 120
-6 Fully Connected 84 neurons 10 164 20 580 84
-7 Fully Connected (output) 10 neurons 850 1 730 10
-P 61 710 15 144 446 9118
-Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied.
-After layer 7, the softmax function is applied. One can see that convolutional layer
-need much fewer parameters, but an order of magnitude more FLOPs per parameter
-than fully connected layers.
-90
-D.2. AlexNet
-The first CNN which achieved major improvements on the ImageNet dataset was AlexNet [KSH12].
-Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 param￾eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet.
-Note that the uncompressed size is at least 60 965 224 floats · 32 bit
-float ≈ 244 MB.
-Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed
-by pooling layers multiple times. At the end, a fully connected network is applied.
-Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1).
-# Type Filters @
-Patch size / stride
-Parameters FLOPs Output size
-Input 3 @ 224 × 224
-1 Convolution 96 @ 11 × 11 × 3 / 4 34 944 211 M 96 @ 55 × 55
-LCN 12 M 96 @ 55 × 55
-2 Max pooling 3 × 3 / 2 0 301 k 96 @ 27 × 27
-3 Convolution 256 @ 5 × 5 × 48 / 1 307 456 448M 256 @ 13 × 13
-LCN 3 M 256 @ 13 × 13
-4 Max pooling 3 × 3 / 2 0 50 k 256 @ 13 × 13
-5 Convolution 384 @ 3 × 3 × 256 / 1 885 120 299 M 384 @ 13 × 13
-7 Convolution 384 @ 3 × 3 × 192 / 1 663 936 224 M 384 @ 13 × 13
-9 Convolution 256 @ 3 × 3 × 192 / 1 442 624 150 M 256 @ 13 × 13
-10 Max pooling 3 × 3 / 2 0 50 k 256 @ 6 × 6
-11 FC 4096 neurons 37 752 832 75 M 4096
-12 FC 4096 neurons 16 781 312 34 M 4096
-13 FC 1000 neurons 4 097 000 8 M 1000
-P 60 965 224 3300 M 1 122 568
-Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to
-computational restrictions at the time of its development. This also reduces the number
-of parameters and allows parallel computation on separate GPUs. However, to make
-the architecture easier to compare, this grouping was ignored for the parameter count.
-The FLOPs are taken from [HPTD15] and combined with rough estimates for Local
-Contrast Normalization and max pooling.
-The calculated number of parameters was checked against the downloaded version. It
-also has 60 965 224 parameters.
-91
-D.3. VGG-16 D
-Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual
-Geometry Group in Oxford which developed this architecture. It has 16 layers which can
-learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3 × 3
-filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a
-detailed textual description is given in Table D.3.
-A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/
-tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit
-float ≈
-520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and
-514 MB with compression.
-224 × 224
-Input
-C 64@3 × 3/1
-C 64@3 × 3/1
-112 × 112
-max pooling 2 × 2/1
-C 128@3 × 3/1
-C 128@3 × 3/1
-56 × 56
-max pooling 2 × 2/1
-C 256@3 × 3/1
-C 256@3 × 3/1
-C 256@3 × 3/1
-28 × 28
-max pooling 2 × 2/1
-C 512@3 × 3/1
-C 512@3 × 3/1
-C 512@3 × 3/1
-14 × 14
-max pooling 2 × 2/1
-C 512@3 × 3/1
-C 512@3 × 3/1
-C 512@3 × 3/1
-7 × 7
-max pooling 2 × 2/1
-Fully Connected 4096
-Dropout, p = 0.5
-Fully Connected 4096
-Dropout, p = 0.5
-Fully Connected 1000
-Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of
-kernel size 3 × 3 with stride 1. All convolutional layers use SAME padding.
-92
-# Type Filters @
-Patch size / stride
-Parameters FLOPs Output size
-Input 3 @ 224 × 224
-1 Convolution 64 @ 3 × 3 × 3 / 1 1 792 186 M 64 @ 224 × 224
-2 Convolution 64 @ 3 × 3 × 64 / 1 36 928 3712M 64 @ 224 × 224
-Max pooling 2 × 2 / 2 0 2 M 64 @ 112 × 112
-3 Convolution 128 @ 3 × 3 × 64 / 1 73 856 1856 M 128 @ 112 × 112
-4 Convolution 128 @ 3 × 3 × 128 / 1 147 584 3705 M 128 @ 112 × 112
-Max pooling 2 × 2 / 2 0 1 M 128 @ 56 × 56
-5 Convolution 256 @ 3 × 3 × 128 / 1 295 168 1853 M 256 @ 56 × 56
-6 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56
-7 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56
-Max pooling 2 × 2 / 2 0 <1 M 256 @ 28 × 28
-8 Convolution 512 @ 3 × 3 × 256 / 1 1 180 160 1851 M 512 @ 28 × 28
-9 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28
-10 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28
-Max pooling 2 × 2 / 2 0 <1 M 512 @ 14 × 14
-11 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
-12 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
-13 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
-Max pooling 2 × 2 / 2 0 <1 M 512 @ 7 × 7
-14 FC 4096 neurons 102 764 544 206 M 4096
-Dropout 0 0 4096
-15 FC 4096 neurons 16 781 312 34 M 4096
-Dropout 0 0 4096
-16 FC 1000 neurons 4 097 000 8 M 1000
-P 138 357 544 31 000 M 15 245 800
-Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have
-learnable parameters. All convolutions are zero padded to prevent size changes and
-use ReLU activation functions. The channels mean is subtracted from each pixel as
-a preprocessing step (−103.939, −116.779, −123.68). As Dropout is only calculated
-during training time, the number of FLOPs is 0. The dropout probability is 0.5.
-The calculated number of parameters was checked against the downloaded version. It
-also has 138 357 544 parameters.
-93
-D.4. GoogleNet, Inception v2 and v3
-The large number of parameters and operations is a problem when such models should get
-applied in practice to thousands of images. In order to reduce the computational cost while
-maintaining the classification quality, GoogleNet [SLJ+15] and the Inception module were
-developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and
-5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of
-parameters. It is shown in Figure D.4.
-Figure D.4.: Inception module
-Image source: [SLJ+15]
-Compared to GoogleNet, Inception v2 [SVI+15] removed the 5 × 5 filters and replaced
-them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module
-is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to
-approximate symmetric filters with fewer parameters. The authors call this approach filter
-factorization.
-Inception v3 introduced Batch Normalization to the network [SVI+15].
-Figure D.5.: Inception v2 module
-Image source: [SVI+15]
-94
-D.5. Inception-v4
-Inception-v4 as described in [SIV16] consists of four main building blocks: The stem,
-Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper,
-wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A
-and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use
-average pooling. The stem, module B and module C use separable convolutions.
-# × Type Parameters Output size
-Input 3 @ 299 × 299
-1 Stem 605 728 384 @ 35 × 35
-2 4× Inception A 317 632 384 @ 35 × 35
-3 Reduction A 2 306 112 1024 @ 17 × 17
-4 7× Inception B 2 936 256 1024 @ 17 × 17
-5 Reduction B 2 747 392 1536 @ 8 × 8
-6 3× Inception C 4 553 088 1536 @ 8 × 8
-Global Average Pooling 0 1536 @ 1 × 1
-Dropout (p=0.8) 0 1536 @ 1 × 1
-7 Softmax 1 537 000 1000
-P 42 679 816
-Table D.4.: Inception-v4 network.
-95
-96
-E. Datasets
-Well-known benchmark datasets for classification problems in computer vision are listed
-in Table E.1. The best results known to me are given in Table E.2. However, every semantic
-segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers
-using Algorithm 2.
-Database
-Image Resolution
-(width × height)
-Number
-of
-Images
-Number
-of
-Classes
-Channels Data source
-MNIST 28 px × 28 px 70 000 10 1 [YL98, LBBH98]
-HASYv2 32 px × 32 px 168 233 369 1 [Tho17a]
-SVHN 32 px × 32 px 630 420 10 3
-[NWC+11b],
-[NWC+11a]
-CIFAR-10 32 px × 32 px 60 000 10 3 [Kri, KH09]
-CIFAR-100 32 px × 32 px 60 000 100 3 [Kri, KH09]
-STL-10 96 px × 96 px 13 000 10 3 [CLN11, CLN10]
-Caltech-101 (80 px − 3481 px)
-×(92 px − 3999 px) 9144 102 3 [FFP03, FFFP06]
-Caltech-256 (75 px − 7913 px)
-×(75 px − 7913 px) 30 607 257 3 [Gri06, GG07]
-ILSVRC 20121
-(8 px − 9331 px)
-×(10 px − 6530 px) 1.2 · 106 1000 3 [Ima12, RDS+14]
-Places3652
-(290px − 3158px)
-×(225px − 2630px)
-1.8 · 106 365 3 [Zho16, ZKL+16]
-GTSRB (25 px − 266 px)
-×(25 px − 232 px) 51 839 43 3 [SSSI, SSSI12]
-Asirra3
-(4 px − 500 px)
-×(4 px − 500 px) 25 000 2 3 [Asi17, EDHS07]
-Graz-02 480 px × 640 px
-and 640 px × 480 px 1096 3 3 [Mar08, MS07]
-Table E.1.: An overview over publicly available image databases for classification. The number
-of images row gives the sum of the training and the test images. Some datasets, like
-SVHN, have additional unlabeled data which is not given in this table.
-1
-ImageNet Large Scale Visual Recognition Competition
-2The dimensions are only calculated for the validation set.
-3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle
+47
+47
+109
 97
-Dataset Model type / name Result Score Achieved /
-Claimed by
-MNIST — 0.21 % error [WZZ+13]
-HASYv2 TF-CNN 81.00 % accuracy [Tho17a]
-SVHN DenseNet (k = 24) 1.59 % error [HLW16]
-CIFAR-10 DenseNet-BC (k = 40) 3.46 % error [HLW16]
-CIFAR-100 WRN-28-10 16.21 % error [LH16]
-STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15]
-Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14]
-Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14]
-ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a]
-GTSRB MCDNN 99.46 % accuracy [SL11]
-Asirra SVM 82.7 % accuracy [Gol08]
-Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10]
-Table E.2.: An overview over state of the art results achieved in computer vision datasets.
-Algorithm 2 Create a classification dataset from a semantic segmentation dataset
-Require: Semantic segmentation dataset (DS)
-procedure CreateDataset(Annotated dataset DS)
-DC ← List
-w ← desired image width
-h ← desired image height
-for Image and associated label (x, y) in DS do
-i ← randint(0, L.width − w)
-j ← randint(0, L.height − h)
-cL ← crop(y,(i, j),(i + w, j + h))
-if at least 50% of s are of one class then
-cI ← crop(x,(i, j),(i + w, j + h))
-D.append((cI , cL))
-return (DC)
-98
-F. List of Tables
-2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
-5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
-5.2 Baseline model evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40
-5.3 Baseline model speed comparison . . . . . . . . . . . . . . . . . . . . . . . . 40
-5.4 Clustering errors for spectral clustering and CMO on CIFAR-100 . . . . . . 52
-5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52
-5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53
-5.7 Parameters of models with increased capacity . . . . . . . . . . . . . . . . . 54
-5.8 Training time for models with increased capacity . . . . . . . . . . . . . . . 54
-5.9 Baseline model training time . . . . . . . . . . . . . . . . . . . . . . . . . . 59
-5.10 Activation function properties . . . . . . . . . . . . . . . . . . . . . . . . . . 62
-5.11 Activation function evaluation results on CIFAR-100 . . . . . . . . . . . . . 63
-5.12 Activation function timing results on CIFAR-100 . . . . . . . . . . . . . . . 63
-5.13 Activation function evaluation results on MNIST . . . . . . . . . . . . . . . 64
-5.14 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66
-5.15 Optimized model evaluation results . . . . . . . . . . . . . . . . . . . . . . . 67
-5.16 Optimized model speed comparison . . . . . . . . . . . . . . . . . . . . . . . 67
-5.17 Optimized model mean training epochs . . . . . . . . . . . . . . . . . . . . . 68
-5.18 Optimized model trained with early stopping vs training with more data . . 69
-5.19 Model regularization with early stopping on training loss . . . . . . . . . . . 69
-5.20 Model regularization with early stopping on training loss - Training time . . 69
-A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75
-A.2 Activation function evaluation results on HASYv2 . . . . . . . . . . . . . . . 77
-A.3 Activation function evaluation results on STL-10 . . . . . . . . . . . . . . . 78
-B.1 Data augmentation techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 80
-B.2 Weight initialization schemes . . . . . . . . . . . . . . . . . . . . . . . . . . 81
-B.3 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84
-D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
-D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
-D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 93
-D.4 Inception-v4 network . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95
-99
-E.1 Image Benchmark datasets . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97
-E.2 State of the Art results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 98
-100
-G. List of Figures
-2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3
-2.2 Application of a convolutional layer . . . . . . . . . . . . . . . . . . . . . . . 6
-2.3 Max pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
-2.4 ResNet module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
-2.5 Aggregation block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12
-2.6 Dense block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
-2.7 Validation curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
-2.8 Validation curve with plateaus . . . . . . . . . . . . . . . . . . . . . . . . . 18
-2.9 Learning curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
-2.10 Occlusion analysis . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25
-2.11 Filter visualization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26
-3.1 Cascade-correlation network . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
-4.1 Class Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33
-5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
-5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42
-5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42
-5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
-5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
-5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44
-5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45
-5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46
-5.9 Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 47
-5.10 Baseline Weight updates (sum) . . . . . . . . . . . . . . . . . . . . . . . . . 47
-5.11 Confusion matrices for CIFAR-10 . . . . . . . . . . . . . . . . . . . . . . . . 48
-5.12 Confusion matrices for GTSRB . . . . . . . . . . . . . . . . . . . . . . . . . 49
-5.13 Confusion matrices for HASYv2 . . . . . . . . . . . . . . . . . . . . . . . . . 50
-5.14 Confusion matrix of CIFAR-100 . . . . . . . . . . . . . . . . . . . . . . . . . 51
-5.15 Mean weight updates of model with bottleneck . . . . . . . . . . . . . . . . 55
-5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67
-A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75
-A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76
-101
-A.3 Maximum weight updates of baseline with bottleneck . . . . . . . . . . . . . 77
-A.4 Sum of weight updates of baseline with bottleneck . . . . . . . . . . . . . . 78
-B.1 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85
-D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
-D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
-D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92
-D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94
-D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94
-102
-H. Bibliography
-[AAB+16] M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on
-heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467, Mar.
-2016. [Online]. Available: https://arxiv.org/abs/1603.04467
-[ABKS99] M. Ankerst, M. M. Breunig et al., “OPTICS: Ordering points to identify the
-clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp.
-49–60.
-[ADG+16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by
-gradient descent,” in Advances in Neural Information Processing Systems 29
-(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar.
-2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461-
-learning-to-learn-by-gradient-descent-by-gradient-descent.pdf
-[AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism:
-Going deeper into neural networks,” Jun. 2015. [Online]. Avail￾able: https://research.googleblog.com/2015/06/inceptionism-going-deeper￾into-neural.html
-[Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https:
-//www.microsoft.com/en-us/download/details.aspx?id=54765
-[BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,”
-Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305,
-Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/
-bergstra12a/bergstra12a.pdf
-[BCW+17] J. Bao, D. Chen et al., “CVAE-GAN: Fine-grained image generation through
-asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online].
-Available: https://arxiv.org/abs/1703.10155
-[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better im￾age features,” Département d’Informatique et de Recherche Opérationnelle,
-Université de Montréal, Tech. Rep. 1337, 2009.
-[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using
-reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online].
-Available: https://arxiv.org/abs/1611.02167
-103
-[BM93] U. Bodenhausen and S. Manke, Automatically Structured Neural
-Networks For Handwritten Character And Word Recognition. London:
-Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http:
-//dx.doi.org/10.1007/978-1-4471-2063-6_283
-[BMDP10] R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest
-neighbor,” in European Conference on Computer Vision (ECCV). Springer,
-2010, pp. 171–184.
-[BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of
-feature pooling in visual recognition,” in International Conference on
-Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available:
-http://yann.lecun.com/exdb/publis/pdf/boureau-icml-10.pdf
-[BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies
-with gradient descent is difficult,” IEEE transactions on neural networks,
-vol. 5, no. 2, pp. 157–166, 1994.
-[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training
-of artificial neural networks,” IEEE Proceedings G-Circuits, Devices
-and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available:
-http://ieeexplore.ieee.org/document/143326/
-[Cho15] F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015.
-[CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks
-in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109,
-p. 2, 2010. [Online]. Available: http://cs.stanford.edu/~acoates/papers/
-coatesleeng_aistats_2011.pdf
-[CLN11] A. Coates, H. Lee, and A. Y. Ng, “STL-10 dataset,” 2011. [Online]. Available:
-http://cs.stanford.edu/~acoates/stl10
-[CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural
-networks for image classification,” in Conference on Computer Vision and
-Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online].
-Available: https://arxiv.org/abs/1202.2745v1
-[CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate
-deep network learning by exponential linear units (ELUs),” arXiv
-preprint arXiv:1511.07289, Nov. 2015. [Online]. Available: https:
-//arxiv.org/abs/1511.07289
-[CWV+14] S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep
-learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available:
-https://arxiv.org/abs/1410.0759
-104
-[DBB+01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional
-knowledge for better option pricing,” in Advances in Neural Infor￾mation Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich,
-and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online].
-Available: http://papers.nips.cc/paper/1920-incorporating-second-order￾functional-knowledge-for-better-option-pricing.pdf
-[DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry
-in convolutional neural networks,” arXiv preprint arXiv:1602.02660, Feb.
-2016. [Online]. Available: https://arxiv.org/abs/1602.02660
-[DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for
-online learning and stochastic optimization,” Journal of Machine Learning
-Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available:
-http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
-[DHS16] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via
-multi-task network cascades,” in Conference on Computer Vision and Pattern
-Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available:
-https://arxiv.org/abs/1512.04412
-[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural
-Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available:
-ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf
-[Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford
-University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/
-proj2015/054_report.pdf
-[DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised
-feature learning with convolutional neural networks,” in Advances in Neural
-Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling
-et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online].
-Available: http://papers.nips.cc/paper/5548-discriminative-unsupervised￾feature-learning-with-convolutional-neural-networks.pdf
-[DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional
-neural networks for galaxy morphology prediction,” Monthly notices of the
-royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015.
-[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that
-exploits interest-aligned manual image categorization,” in ACM Con￾ference on Computer and Communications Security (CCS), no. 14.
-Association for Computing Machinery, Inc., Oct. 2007. [Online].
+114
+116
 105
-Available: https://www.microsoft.com/en-us/research/publication/asirra-a￾captcha-that-exploits-interest-aligned-manual-image-categorization/
-[EKS+96] M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering
-clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996,
-pp. 226–231.
-[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing.
-Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3-
-662-44874-8
-[Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation
-networks,” 1988. [Online]. Available: http://repository.cmu.edu/cgi/
-viewcontent.cgi?article=2799&context=compsci
-[FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of object
-categories,” IEEE transactions on pattern analysis and machine intelligence,
-vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http:
-//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf
-[FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http:
-//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html
-[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Object detection with discrimina￾tively trained part-based models,” IEEE transactions on pattern analysis and
-machine intelligence, vol. 32, no. 9, pp. 1627–1645, 2010.
-[FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,”
-1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/
-[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep
-feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online].
-Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
-[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural
-networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available:
-http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf
-[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate object
-detection and semantic segmentation,” in Conference on Computer Vision
-and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online].
-Available: https://arxiv.org/abs/1311.2524
-[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 object category dataset,” Apr.
-2007. [Online]. Available: http://authors.library.caltech.edu/7694/
-106
-[GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with
-Bernoulli approximate variational inference,” arXiv preprint arXiv:1506.02158,
-Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6
-[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman
-New York, 2002, vol. 29.
-[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete
-graph problems,” Theoretical computer science, vol. 1, no. 3, pp. 237–267,
-1976.
-[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM
-conference on Computer and communications security (CCS), no. 15. ACM,
-2008, pp. 535–542.
-[Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May
-2015. [Online]. Available: https://arxiv.org/abs/1412.6071
-[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available:
-http://www.vision.caltech.edu/Image_Datasets/Caltech256/
-[GWFM+13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML,
-vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http:
-//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
-[HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for
-transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online].
-Available: https://arxiv.org/abs/1608.08614
-[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online].
-Available: http://papers.nips.cc/paper/227-meiosis-networks.pdf
-[Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available:
-https://devblogs.nvidia.com/parallelforall/new-features-cuda-7-5/
-[HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional
-networks,” arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available:
-https://arxiv.org/abs/1608.06993v1
-[HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv
-preprint arXiv:1611.04231, Nov. 2016. [Online]. Available: https:
-//arxiv.org/abs/1611.04231
-[How13] A. G. Howard, “Some improvements on deep convolutional neural network
-based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013.
-[Online]. Available: https://arxiv.org/abs/1312.5402
-107
-[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques.
-Elsevier, 2011.
-[HPN+16] S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with
-dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016.
-[Online]. Available: https://arxiv.org/abs/1607.04381
-[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient
-neural network,” in Advances in Neural Information Processing Systems 28
-(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun.
-2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784-
-learning-both-weights-and-connections-for-efficient-neural-network.pdf
-[HSK+12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing
-co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580, Jul.
-2012. [Online]. Available: https://arxiv.org/abs/1207.0580
-[HSL+16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,”
-arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https:
-//arxiv.org/abs/1603.09382
-[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon
-and general network pruning,” in International Conference on Neural
-Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http:
-//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf
-[HVD15] G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural
-network,” arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available:
-https://arxiv.org/abs/1503.02531
-[HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional
-networks for visual recognition,” in European Conference on Computer
-Vision (ECCV). Springer, 2014, pp. 346–361. [Online]. Available:
-https://arxiv.org/abs/1406.4729
-[HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,”
-arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https:
-//arxiv.org/abs/1512.03385v1
-[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level
-performance on imagenet classification,” in International Conference on
-Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available:
-https://arxiv.org/abs/1502.01852
-[Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),”
-108
-2012. [Online]. Available: http://www.image-net.org/challenges/LSVRC/
-2012/nonpub-downloads
-[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network
-training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167,
-Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167
-[JXF+16] X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation
-units,” in Thirtieth AAAI Conference on Artificial Intelligence, Dec. 2016.
-[Online]. Available: https://arxiv.org/abs/1512.07030
-[Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr.
-2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manually￾classifying-cifar10/
-[KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,”
-arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https:
-//arxiv.org/abs/1412.6980
-[KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny
-images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/
-learning-features-2009-TR.pdf
-[KMN+16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning:
-Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836,
-Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836
-[Koc15] T. Kocmánek, “HyperNEAT and novelty search for image recognition,” Ph.D.
-dissertation, Master’s thesis, Czech Technical University in Prague, 2015.
-[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf
-[KPY+15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks
-for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530,
-Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530
-[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to
-cluster analysis. John Wiley & Sons, 2009, vol. 344.
-[Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https:
-//www.cs.toronto.edu/~kriz/cifar.html
-[KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear
-and neural network approximation,” IEEE Transactions on Information
-Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available:
-http://ieeexplore.ieee.org/abstract/document/971754/
-109
-[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification
-with deep convolutional neural networks,” in Advances in Neural
-Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges
-et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online].
-Available: http://papers.nips.cc/paper/4824-imagenet-classification-with￾deep-convolutional-neural-networks.pdf
-[KSlB+10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature
-hierarchies for visual recognition,” in Advances in Neural Information
-Processing Systems 23 (NIPS), J. D. Lafferty, C. K. I. Williams
-et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online].
-Available: http://papers.nips.cc/paper/4133-learning-convolutional-feature￾hierarchies-for-visual-recognition.pdf
-[LAE+16] W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in
-European Conference on Computer Vision (ECCV). Springer, 2016, pp.
-21–37. [Online]. Available: https://arxiv.org/abs/1512.02325
-[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/
-en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer
-[LBBH98] Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document
-recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov.
-1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun￾01a.pdf
-[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature,
-vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available:
-http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html
-[LBOM98] Y. A. LeCun, L. Bottou et al., Efficient BackProp, ser. Lecture Notes in
-Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol.
-1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8
-[LDS+89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989,
-pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/
-lecun-90b.pdf
-[Le13] Q. V. Le, “Building high-level features using large scale unsupervised
-learning,” in International conference on acoustics, speech and signal
-processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http:
-//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343
-[LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in
 110
-Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep.
-2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308
-[LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in
-convolutional neural networks: Mixed, gated, and tree,” in International
-Conference on Artificial Intelligence and Statistics, 2016. [Online]. Available:
-https://arxiv.org/abs/1509.08985v2
-[LH16] I. Loshchilov and F. Hutter, “SGDR: stochastic gradient descent
-with warm restarts,” Learning, Aug. 2016. [Online]. Available: https:
-//arxiv.org/abs/1608.03983
-[LJD+16] L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to
-hyperparameter optimization,” arXiv preprint arXiv:1603.06560, Mar. 2016.
-[Online]. Available: https://arxiv.org/abs/1603.06560
-[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885,
-Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885
-[LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for
-semantic segmentation,” in Conference on Computer Vision and Pattern
-Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available:
-https://arxiv.org/abs/1411.4038v2
-[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513, Mar.
-2017. [Online]. Available: https://arxiv.org/abs/1703.01513
-[Maj17] S. Majumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available:
-https://github.com/titu1994/DenseNet
-[Mar08] M. Marszałek, “INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online].
-Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/
-[MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter
-optimization through reversible learning,” in International Conference on
-Machine Learning (ICML), 2015, pp. 2113–2122.
-[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of
-Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008.
-[MHN13] A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities
-improve neural network acoustic models,” in Proc. ICML, vol. 30,
-no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/
-relu_hybrid_icml2013_final.pdf
-[MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv
+45
+116
+104
 111
-preprint arXiv:1511.06422, Nov. 2015. [Online]. Available: https:
-//arxiv.org/abs/1511.06422
-[MP43] W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in
-nervous activity,” The bulletin of mathematical biophysics, vol. 5, no. 4, pp.
-115–133, 1943.
-[MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for
-reducing dataset bias in person re-identification,” in International Conference
-on Advanced Video and Signal Based Surveillance (AVSS), no. 12, Aug. 2015,
-pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/
-7301739/
-[MS07] M. Marszalek and C. Schmid, “Accurate object localization with
-shape masks,” in Conference on Computer Vision and Pattern
-Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http:
-//ieeexplore.ieee.org/document/4270110/
-[MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN
-advances on the ImageNet,” arXiv preprint arXiv:1606.02228, Jun. 2016.
-[Online]. Available: https://arxiv.org/abs/1606.02228
-[MV16] A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural
-networks using natural pre-images,” International Journal of Computer Vision,
-pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017
-[NDRT13] N. Natarajan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances
-in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges,
-L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online].
-Available: http://papers.nips.cc/paper/5073-learning-with-noisy-labels.pdf
-[Nes83] Y. Nesterov, “A method of solving a convex programming problem with
-convergence rate o (1/k2),” in Soviet Mathematics Doklady, vol. 27, no. 2,
-1983, pp. 372–376.
-[new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available:
-http://www1.icsi.berkeley.edu/Speech/faq/nn-train.html
-[Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS
-Talk, Dec. 2016.
-[NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft
-weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992.
-[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf
-[NH02] R. T. Ng and J. Han, “CLARANS: A method for clustering objects for spatial
-112
-data mining,” IEEE transactions on knowledge and data engineering, vol. 14,
-no. 5, pp. 1003–1016, 2002.
-[NWC+11a] Y. Netzer, T. Wang et al., “Reading digits in natural images with
-unsupervised feature learning,” in NIPS workshop on deep learning and
-unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available:
-http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf
-[NWC+11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,”
-2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/
-[NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization:
-Uncovering the different types of features learned by each neuron in deep
-neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online].
-Available: https://arxiv.org/abs/1602.03616
-[OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive
-classifiers for unbalanced classification problems: A study on the performance
-scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available:
-https://arxiv.org/abs/1608.08984
-[PMW+15] N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial
-perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508,
-Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508
-[Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer
-Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/
-10.1007/3-540-49430-8_3
-[RDS+14] O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition
-challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep.
-2014. [Online]. Available: https://arxiv.org/abs/1409.0575
-[RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks
-for biomedical image segmentation,” in International Conference on Medical
-Image Computing and Computer-Assisted Intervention. Springer, 2015, pp.
-234–241. [Online]. Available: https://arxiv.org/abs/1505.04597
-[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and den￾sity of neurons in the hyperneat substrate,” in Conference on Genetic and
-evolutionary computation, no. 12. ACM, 2010, pp. 563–570.
-[RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?":
-Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938,
-Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938
-113
-[Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,”
-arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https:
-//arxiv.org/abs/1609.04747
-[SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks
-applied to house numbers digit classification,” in International Conference
-on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291.
-[Online]. Available: https://arxiv.org/abs/1204.3968
-[SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding
-for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185–
-212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/
-[SEZ+13] P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization
-and detection using convolutional networks,” arXiv preprint arXiv:1312.6229,
-Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4
-[SHK+14] N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to
-prevent neural networks from overfitting.” Journal of Machine Learning
-Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available:
-https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
-[SHY+13] A. Senior, G. Heigold et al., “An empirical study of learning rates in deep
-neural networks for speech recognition,” in International Conference on
-Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online].
-Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963
-[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the
-impact of residual connections on learning,” arXiv preprint arXiv:1602.07261,
-Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261
-[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding
-for face recognition and clustering,” in Conference on Computer Vision
-and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online].
-Available: https://arxiv.org/abs/1503.03832
-[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale
-convolutional networks,” in International Joint Conference on Neural
-Networks (IJCNN), Jul. 2011, pp. 2809–2813. [Online]. Available:
-http://ieeexplore.ieee.org/document/6033589/
-[SLJ+15] C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference
-on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp.
-1–9. [Online]. Available: https://arxiv.org/abs/1409.4842
-[SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through
-114
-augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127,
-2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/
-106365602320169811
-[SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to
-the nonlinear dynamics of learning in deep linear neural networks,”
-arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https:
-//arxiv.org/abs/1312.6120
-[SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive
-networks,” arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available:
-https://arxiv.org/abs/1410.1165
-[SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition
-benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section=
-gtsrb&subsection=news
-[SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking
-machine learning algorithms for traffic sign recognition,” Neural Networks,
-no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/
-article/pii/S0893608012000457
-[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint
-arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606.02492
-[SVI+15] C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture
-for computer vision,” arXiv preprint arXiv:1512.00567, Dec. 2015. [Online].
-Available: https://arxiv.org/abs/1512.00567v3
-[SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional
-networks: Visualising image classification models and saliency maps,”
-arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https:
-//arxiv.org/abs/1312.6034
-[SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for
-large-scale image recognition,” arXiv preprint arXiv:1409.1556, Sep. 2014.
-[Online]. Available: https://arxiv.org/abs/1409.1556
-[SZS+13] C. Szegedy, W. Zaremba et al., “Intriguing properties of neural
-networks,” arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available:
-https://arxiv.org/abs/1312.6199v4
-[TF-16a] “MNIST for ML beginners,” Dec. 2016. [Online]. Available: https:
-//www.tensorflow.org/tutorials/mnist/beginners/
+109
+97
+46
+100
+101
+47
+109
+97
 115
-[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/
-api_docs/python/nn/activation_functions_#dropout
-[TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient
-by a running average of its recent magnitude,” COURSERA: Neural
-Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available:
-http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
-[Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,”
-Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martin￾thoma.com/write-math
-[Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available:
-https://martin-thoma.com/twiddle/
-[Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint
-arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/
-1602.06541
-[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380, Jan.
-2017. [Online]. Available: https://arxiv.org/abs/1701.08380
-[Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available:
-https://martin-thoma.com/msthesis
-[VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep
-learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available:
-https://arxiv.org/abs/1312.5355
-[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing.
-Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available:
-http://dx.doi.org/10.1007/978-94-015-7744-1_2
-[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent
-networks with long term dependencies,” arXiv preprint arXiv:1702.00071,
-Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071
-[WHH+89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay
-neural networks,” IEEE transactions on acoustics, speech, and signal
-processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available:
-http://ieeexplore.ieee.org/document/21701/
-[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connec￾tionist reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256,
-1992.
 116
-[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling
-and Normalization Methods for Action Recognition. Berlin, Heidelberg:
-Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online].
-Available: http://dx.doi.org/10.1007/978-3-642-37431-9_44
-[WYS+15] R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv
-preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available:
-https://arxiv.org/abs/1501.02876v4
-[WZZ+13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,”
-in International Conference on Machine Learning (ICML), no. 30, 2013,
-pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/
-icml2013/icml2013.pdf
-[XGD+16] S. Xie, R. Girshick et al., “Aggregated residual transformations for deep
-neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online].
-Available: https://arxiv.org/abs/1611.05431v1
-[Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged
-stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011.
-[Online]. Available: https://arxiv.org/abs/1107.2490
-[XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in
-convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online].
-Available: https://arxiv.org/abs/1505.00853
-[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on
-support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available:
-https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf
-[XZY+14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolu￾tional neural network for large-scale image classification,” in International
-Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186.
-[YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten
-digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/
-[ZBH+16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking
-generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online].
-Available: https://arxiv.org/abs/1611.03530
-[ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in
-Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee,
-M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090.
-[Online]. Available: http://papers.nips.cc/paper/6340-doubly-convolutional￾neural-networks.pdf
-117
-[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category
-detection,” in European Conference on Computer Vision (ECCV). Springer,
-Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867
-[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint
-arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv.org/abs/
-1212.5701v1
-[ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep
-convolutional neural networks,” arXiv preprint arXiv:1301.3557, Jan. 2013.
-[Online]. Available: https://arxiv.org/abs/1301.3557v1
-[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional
-networks,” in European Conference on Computer Vision (ECCV). Springer,
-Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901
-[Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http://
-places2.csail.mit.edu/download.html
-[ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv
-preprint arXiv:1605.07146, May 2016. [Online]. Available: https:
-//arxiv.org/abs/1605.07146
-[ZKL+15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative
-localization,” arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available:
-https://arxiv.org/abs/1512.04150
-[ZKL+16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene
-understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online].
-Available: https://arxiv.org/abs/1610.02055
-[ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement
-learning,” arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available:
-https://arxiv.org/abs/1611.01578
-[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,”
-arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https:
-//arxiv.org/abs/1506.02351v1
-[ZYL+15] H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus
-units,” in International Joint Conference on Neural Networks (IJCNN), Jul.
-2015, pp. 1–4.
+101
+114
+47
+99
+97
+116
+99
+97
+116
+99
+97
+116
+46
+112
+104
+112
+63
 118
-I. Glossary
-ANN artificial neural network. 4
-ASO Automatic Structure Optimization. 29
-CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71
-CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60,
-71, 72, 79, 82–84, 88–91
-ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84
-ES early stopping. 68
-FC Fully Connected. 91, 93
-FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93
-GA genetic algorithm. 30
-GAN Generative Adverserial Network. 80
-GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91
-HSV hue, saturation, value. 79
-LCN Local Contrast Normalization. 91
-LDA linear discriminant analysis. 79
-LReLU leaky rectified linear unit. 63, 72, 77, 78, 84
-MLP multilayer perceptron. 3–6, 28
-NAG Nesterov Accellerated Momentum. 83
-NEAT NeuroEvolution of Augmenting Topologies. 83
-OBD Optimal Brain Damage. 29
-119
-PCA principal component analysis. 79
-PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84
-ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84
-SGD stochastic gradient descent. 5, 30, 45, 46, 82
-ZCA Zero Components Analysis. 79
-120
+61
+49
+46
+48
+9
+-3
+-1
+-6
+5
+3
+2
+-8
+0
+936
+-333
+-109
+-282
+545
+291
+94
+-792
+0
+-4
+-254
+-498
+-662
+-849
+-642
+187
+-520
+45
+240
+211
+388
+215
+-861
+-340
+559
+-105
+185
+-138
+-180
+503
+-718
+429
+350
+173
+251
+268
+-655
+-567
+-53
+-75
+80
+571
+-128
+24
+-408
+596
+-550
+368
+26
+976
+156
+302
+647
+879
+223
+811
+54
+660
+Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the
+output image, k
+2 multiplications and k2 additions of the products have to be calculated.
+
+2. Convolutional Neural Networks
+One important detail is how boundaries are treated. There are four common ways of
+boundary treatment:
+• don’t compute: The image I
+0 will be smaller than the original image. I0 ∈
+R
+(w−kw+1)×(h−kh+1)×d3
+, to be exact.
+• zero padding: The image I is padded by zeros where the filter would access elements
+which do not exist. This will result in edges being detected at the border if the border
+pixels are not black, but doesn’t need any computation.
+• nearest: Repeat the pixel which is closest to the boundary.
+• reflect: Reflect the image at the boundaries.
+Common tasks that can be done with linear filters include edge detection, corner detection,
+smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples.
+Please note that the result of a filtering operation is again an image. This means filters
+can be applied successively. While each pixel after one filtering operation with a 3 × 3
+filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3
+filters increase the area of the original image which influenced the output. The output is
+then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is
+detected by a filter is called a feature. The bigger the receptive field is, the more complex
+can features get as they are able to consider more of the original image. Instead of taking
+one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters
+with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering
+operations compared to the two 3 × 3 filters, but the relevance of this technique will become
+clear in Section 2.2.
+2.2. CNN Layer Types
+While the idea behind deep MLPs is that feature hierarchies capture the important parts
+of the input more easily, CNNs are inspired by the idea of translational invariance: Many
+features in an image are translationally invariant. For example, if a car is developed, one
+could try to detect it by its parts [FGMR10]. But then there are many positions at which
+the wheels could be. Combining those, it is desirable to capture low-level, translationally
+invariant features at lower layers of an artificial neural network (ANN) and in higher layers
+high-level features which are combinations of the low-level features.
+Also, models should utilize the fact that the pixels of images are ordered. One way to use
+this is by learning image filters in so called convolutional layers.
+While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature
+map is a matrix m ∈ R
+w×h
+, but typically the width equals the height (w = h). For an RGB
+
+2.2. CNN Layer Types
+input image, the number of feature maps is d = 3. Each color channel is a feature map.
+Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are
+state-of-the-art in various computer vision tasks.
+Traditional CNNs have three important building tools:
+• Convolutional layers with a non-linear activation function as described in Section 2.2.1,
+• pooling layers as described in Section 2.2.2 and
+• normalization layers as described in Section 2.2.4.
+2.2.1. Convolutional Layers
+Convolutional layers take several feature maps as input and produce n feature maps1 as
+output, where n is the number of filters in the convolution layer. The filter weights of
+the linear convolutions are the parameters which are adapted to the training data. The
+number n of filters as well as the filter’s size kw × kh are hyperparameters of convolutional
+layers. Sometimes, it is denoted as n@kw × kh. Although the filter depth is usually omitted
+in the notation, the filters are of dimension kw × kh × d
+(i−1), where d(i−1) is the number of
+feature maps of the input layer (i − 1).
+Another hyperparameter of convolution layers is the stride s ∈ N≥1 and the padding.
+Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the
+size of the feature maps doesn’t change.
+The hyperparameters of convolutional layers are
+• the number of filters n ∈ N≥1,
+• kw, kh ∈ N≥1 of the filter size kw × kh × d
+(i−1)
+,
+• the activation function of the layer (see Table B.3) and
+• the stride s ∈ N≥1
+Typical choices are n ∈ { 32, 64, 128 }, kw = kh = k ∈ { 1, 3, 5, 11 } such as in [KSH12,
+SZ14, SLJ+15], rectified linear unit (ReLU) activation and s = 1.
+The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+89].
+With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just
+like MLPs. In fact, every CNN has an equivalent MLP which computes the same function
+if only the flattened output is compared.
+1
+also called activation maps or channels
+
+2. Convolutional Neural Networks
+This is easier to see when the filtering operation is denoted formally:
+o
+(i)
+(x) = b +
+X
+k
+j=1
+wij · xj with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1]
+o
+(x,y,z)
+(I) = b +
+b
+kw
+2X
+c
+ix=1−d kw
+2
+e
+b
+kh
+2X
+c
+iy=1−d kh
+2
+e
+X
+d
+ic=1
+Fz(ix, iy, ic) · I(x + ix, y + iy, ic) [2.2]
+with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d }
+One can see that most weights of the equivalent MLP are zero and many weights are
+equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters.
+The effect of fewer parameters is that less training data is necessary to get suitable
+estimations for those. This means a MLP which is able to compute the same functions as a
+CNN will likely have worse results on the same dataset, if a CNN architecture is suitable
+for the dataset.
+See Figure 2.2 for a visualization of the application of a convolutional layer.
+3 feature maps
+(e.g. RGB) n feature maps
+n filters of
+size k × k × 3
+width w
+width w
+height
+h
+height
+h
+neural
+network
+data
+apply
+. . .
+. . .
+. . .
+. . .
+. . .
+. . .
+Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride
+s = 1 to input data of size width × height with three channels.
+
+2.2. CNN Layer Types
+A convolutional layer with n filters of size kw × kh and SAME padding after d
+(i−1) feature
+maps of size sx × sy has n · d
+(i−1)
+·(kw · kh) parameters if no bias is used. In contrast, a fully
+connected layer which produces the same output size and does not use a bias would have
+n · d
+(i−1)
+· (sx × sy)
+2 parameters. This means a convolutional layer has drastically fewer
+parameters. One the one hand, this means it can learn less complex decision boundaries. On
+the other hand, it means fewer parameters have to be learned and hence the optimization
+procedure needs fewer examples and the optimization objective is simpler.
+It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does
+learn a linear combination of the d input feature maps. This can be used for dimensionality
+reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps.
+Another insight recently got important: Every fully connected layer has an equivalent
+convolutional layer which has the same weights.2 This way, one can use the complete
+classification network as a very complex non-linear image filter which can be used for
+semantic segmentation.
+A fully connected layer with d ∈ N≥1 inputs and n ∈ N≥1 nodes can be interpreted as a
+convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1. This will
+produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs.
+When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize
+to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output,
+it is completely equivalent to a fully connected layer. However, the vectorization can be
+omitted if a convolution layer without padding and a filter size equal to the feature maps
+size is applied. This was used by [LSD15].
+2.2.2. Pooling Layers
+Pooling summarizes a p × p area of the input feature map. Just like convolutional layers,
+pooling can be used with a stride of s ∈ N>1. As s ≥ 2 is the usual choice, pooling layers
+are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as
+for AlexNet [KSH12] and VGG-16 [SZ14].
+The type of summary for the set of activations A varies between the functions listed
+in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling
+functions as introduced in [LGT16].
+2But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1
+
+2. Convolutional Neural Networks
+Name Definition Used by
+Max pooling max { a ∈ A } [BPL10, KSH12]
+Average / mean pooling 1
+|A|
+P
+a∈A
+a LeNet-5 [LBBH98] and [KSlB+10]
+`2 pooling pP
+a∈A
+a
+2 [Le13]
+Stochastic pooling * [ZF13]
+Table 2.1.: Pooling types for a set A of activations a ∈ R.
+(*) For stochastic pooling, each of the p×p activation values aiin the pooling region gets
+picked with probability pi = P ai
+aj ∈A aj
+. This assumes the activations ai are non-negative.
+Pooling is applied for three reasons: To get local translational invariance, to get invariance
+against minor local changes and, most important, for data reduction to 1
+s
+2 th of the data by
+using strides of s > 1.
+See Figure 2.3 for a visualization of max pooling.
+7 9 3 5 9 4
+0 7 0 0 9 0
+5 0 9 3 7 5
+9 2 9 6 4 3
+2 × 2 max pooling
+9 5 9
+9 9 7
+2
+2
+Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with stride s = 2 and padding.
+Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If
+the input of the pooling layer are d
+(i−1) feature maps, the convolutional layer has to have
+d
+(i−1) filters of size p × p and stride s. The ith filter has the values
+
+
+1
+p
+2 . . .
+1
+p
+2
+.
+.
+.
+.
+.
+.
+.
+.
+.
+1
+p
+2 . . .
+1
+p
+2
+
+
+for the dimension i and the zero matrix
+
+
+0 . . . 0
+.
+.
+.
+.
+.
+.
+.
+.
+.
+0 . . . 0
+
+
+for all other dimensions i = 1, . . . , d(i−1).
+
+2.2. CNN Layer Types
+2.2.3. Dropout
+Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting
+the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is
+well-described in [SHK+14].
+A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of
+the same shape D ∈ { 0, 1 }
+s
+is sampled, where each element diis sampled independently
+from a Bernoulli distribution. The results are element-wise multiplied to calculate the
+output out of the Dropout layer:
+out = D  in with di ∼ B(1, p)
+where  is the Hadamard product
+(A  B)i,j := (A)i,j (B)i,j
+Hence every value of the input gets set to zero with a dropout probability of p. Typically,
+Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout probability than later layers. In order to keep the expected output at the same value, the
+output of a dropout layer is multiplied with 1
+1−p when dropout is enabled [Las17, tf-16b].
+At inference time, dropout is disabled.
+Dropout is usually only applied after fully connected layers, but not after convolutional
+layers as it usually increases the test error as pointed out in [GG16].
+Models which use Dropout can be interpreted as an ensemble of models with different
+numbers of neurons in each layer, but also with weight sharing.
+Conceptually similar are DropConnect and networks with stochastic depth. DropConnect [WZZ+13] is a generalization of Dropout, which sets weights to zero in contrast to
+setting the output of a neuron to zero. Networks with stochastic depth as introduced
+in [HSL+16] dropout only complete layers. This can be done by having Residual networks
+which have one identity connection and one residual feature connection. Hence the residual
+features can be dropped out and the identity connection remains.
+2.2.4. Normalization Layers
+One problem when training deep neural networks is internal covariate shift: While the
+parameters of layers close to the output are adapted to some input produced by lower layers,
+those lower layers parameters are also adapted. This leads to the parameters in the upper
+layers being worse. A very low learning rate has to be chosen to adjust for the fact that the
+input features might drastically change over time.
+
+2. Convolutional Neural Networks
+One way to approach this problem is by normalizing mini-batches as described in [IS15]. A
+Batch Normalization layer with d-dimensional input x = (x
+(1), . . . , x(d)
+) is first normalized
+point-wise to
+xˆ
+(k) =
+x
+(k) − x¯(k)
+p
+s
+0
+[x
+(k)
+]
+2 + ε
+with x¯
+(k) =
+1
+m
+Pm
+i=1 x
+(k)
+i
+being the sample mean and s
+0
+[x
+(k)
+]
+2 =
+1
+m
+Pm
+i=1(x
+(k)
+i − x¯
+(k)
+) the
+sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0
+being a small constant to prevent division by zero and x
+(k)
+i
+is the activation of neuron k for
+training sample i.
+Additionally, for each activation x
+(k)
+two parameters γ
+(k)
+, β(k) are introduced which scale
+and shift the feature:
+y
+(k) = γ(k)
+· xˆ
+(k) + β(k)
+In the case of fully connected layers, this is applied to the activation, before the non-linearity
+is applied. If it is applied after the activation, it harms the training in early stages. For
+convolution, only one γ and one β is learned per feature map.
+One important special case is γ
+(k) =
+p
+s
+0
+[x
+(k)
+]
+2 + ε and β
+(k) = x¯(k)
+, which would make the
+Batch Normalization layer an identity layer.
+During evaluation time,3the expected value and the variance are calculated once for the
+complete dataset. An unbiased estimate of the empirical variance is used.
+The question where Batch Normalization layers (BN) should be applied and for which
+reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the
+activation function. Considering this, the possible options for the order are:
+1. CONV / FC → BN → activation function → Dropout → . . .
+2. CONV / FC → activation function → BN → Dropout → . . .
+3. CONV / FC → activation function → Dropout → BN → . . .
+4. CONV / FC → Dropout → BN → activation function → . . .
+The authors of [IS15] suggest to use Batch Normalization before the activation function
+as in Items 1 and 4. Batch Normalization after the activation lead to better results in
+https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md
+Another normalization layer is Local Response Normalization as described in [KSH12],
+which includes `2 normalization as described in [WWQ13]. Those two normalization layers,
+however, are superseded by Batch Normalization.
+3
+also called inference time
+
+2.3. CNN Blocks
+2.3. CNN Blocks
+This section describes more complex building blocks than simple layers. CNN blocks act
+similar to a layer, but they are themselves composed of layers.
+2.3.1. Residual Blocks
+Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They
+enabled the computer vision community to go from about 16 layers as in VGG 16-D (see
+Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets)
+as introduced in [HZRS15a] is to add an identity connection which skips two layers. This
+identity connection adds the feature maps onto the other feature maps and thus requires
+the output of the input layer of the residual block to be of the same dimension as last layer
+of the residual block.
+Formally, it can be described as follows. If xi are the feature maps after layer i and x0 is
+the input image, H is a non-linear transformation of feature maps, then
+y = H(x)
+describes a traditional CNN. Note that this could be multiple layers. A residual block as
+visualized in Figure 2.4 is described by
+y = H(x) + x
+In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if
+convi(xi) describes the application of the convolutional layer i to the input xi without the
+nonlinearity, then such a residual block is
+xi+2 = conv i+1(ReLU(conv i(xi))) + xi
+Figure 2.4.: ResNet module
+Image source: [HZRS15a]
+[HM16] provides some insights why deep residual networks are successful.
+
+2. Convolutional Neural Networks
+2.3.2. Aggregation Blocks
+Two common ways to add more parameters to neural networks are increasing their depth
+by adding more layers or increasing their width by adding more neurons / filters. Inception
+blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as
+“ResNeXt block”: Increasing the cardinality C ∈ N≥1. By cardinality, the authors describe
+the concept of having C small convolutional networks with the same topology but different
+weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not
+combine aggregation blocks with residual blocks as the authors did.
+256-d in
+concatenate
+total 32
+groups
+. . .
+128-d out
+4 @ 1 × 1 × 256
+4 @ 3 × 3 × 4
+4 @ 1 × 1 × 256
+4 @ 3 × 3 × 4
+4 @ 1 × 1 × 256
+4 @ 3 × 3 × 4
+Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer
+convolutional network. The first layer receives 256 feature maps and applies four 1 × 1
+filters to it. The second layer applies four 3 × 3 filters. Although every group has
+the same topology, the learned weights are different. The outputs of the groups are
+concatenated.
+The hyperparameters of an aggregation block are:
+• The topology of the group members.
+• The cardinality C ∈ N≥1. Note that a cardinality of C = 1 is equivalent in every
+aspect to using the group network without an aggregation block.
+
+2.3. CNN Blocks
+2.3.3. Dense Blocks
+Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The
+idea is to connect each convolutional layer directly to subsequent convolutional layers.
+Traditional CNNs with L layers and one input layer have L connections between layers,
+but dense blocks have L(L+1)
+2
+connections between layers. The input feature maps are
+concatenated in depth. According to the authors, this prevents features from being relearned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16
+have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors
+used only on the order of 12 feature maps per layer.
+A dense block is visualized in Figure 2.6.
+256-d in
+k @ 3 × 3
+concatenate
+k @ 3 × 3
+concatenate
+256-d
+k-d
+(256 + k)-d
+k-d
+(256 + L · k)-d out
+Figure 2.6.: Dense block with L = 2 layers and a growth factor of k.
+Dense block have five hyperparameters:
+• The activation function being used. The authors use ReLU.
+• The size kw × kh of filters. The authors use kw = kh = 3.
+• The number of layers L, where L = 2 is a simple convolutional layer.
+• The number k of filters added per layer (called growth rate in the paper)
+It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps.
+
+2. Convolutional Neural Networks
+2.4. Transition Layers
+Transition layers are used to overcome constraints imposed by resource limitations or
+architectural design choices. One constraint is the number of feature maps (see Appendix C.3
+for details). In order to reduce the number of feature maps while still keeping as much
+relevant information as possible in the network, a convolutional layer i with ki filters of
+the shape 1 × 1 × ki−1 is added. The number of filters ki directly controls the number of
+generated feature maps.
+In order to reduce the dimensionality (width and height) of the feature maps, one typically
+applies pooling.
+Global pooling is another type of transition layer. It applies pooling over the complete
+feature map size to shrink the input to a constant 1 × 1 feature map and hence allows one
+network to have different input sizes.
+
+2.5. Analysis Techniques
+2.5. Analysis Techniques
+CNNs have dozens of hyperparameters and ways to tune them. Although there are
+automatic methods like random search [BB12], grid search [LBOM98], gradient-based
+hyperparameter optimization [MDA15] and Hyperband [LJD+16] some actions need a
+manual investigation to improve the model’s quality. For this reason, analysis techniques
+which guide developers and researchers to the important hyperparameters are necessary. In
+the following, nine diagnostic techniques are explained.
+A machine learning developer has the following choices to improve the model’s quality:
+(I1) Change the problem definition (e.g., the classes which are to be distinguished)
+(I2) Get more training data
+(I3) Clean the training data
+(I4) Change the preprocessing (see Appendix B.1)
+(I5) Augment the training data set (see Appendix B.2)
+(I6) Change the training setup (see Appendices B.3 to B.5)
+(I7) Change the model (see Appendices B.6 and B.7)
+The preprocessing is usually not changed in modern architectures. However, this still leaves
+six very different ways to improve the classifier. Changing the training setup and the model
+each have too many possible choices to explore them completely. Thus, techniques are
+necessary to guide the developer to changes which are most promising to improve the model.
+For all of the following methods, it is important to use only the training set and the
+validation set.
+2.5.1. Qualitative Analysis by Example
+The most basic analysis technique which should always be used is looking at examples
+which the network correctly predicted with a high certainty and what the classifier got
+wrong with a high certainty. Those examples can be arranged by applying t-SNE [MH08].
+One the one hand, this might reveal errors in the training data. Most of the time, training
+data is manually labeled by humans who make mistakes. If a model is fit to those errors,
+its quality decreases.
+On the other hand, this can show differences in the distribution of validation data which
+are not covered by the training set and thus indicate the need to collect more data.
+
+2. Convolutional Neural Networks
+2.5.2. Confusion Matrices
+A confusion matrix is a matrix (c)ij ∈ N
+K×K
+≥0
+, where K ∈ N≥2 is the number of classes,
+which contains all correct and wrong classifications. The item cij is the number of times
+items of class i were classified as class j. This means the correct classification is on the
+diagonal cii and all wrong classifications are of the diagonal. The sum PK
+i=1
+PK
+j=1 cij is the
+total number of samples which were evaluated and
+P
+i=1 P
+cii
+K
+i=1
+PK
+j=1 cij
+is the accuracy.
+The sums r(i) = PK
+j=1 cij of each class i are worth being investigated as they show if the
+classes are skewed. If the number of samples of one class dominates the data set, then the
+classifier can get a high accuracy by simply always prediction the most common class. If
+the accuracy of the classifier is close to the a priory probability of the most common class,
+techniques to deal with skewed classes might help.
+An automatic criterion to check for this problem is
+accuracy ≤
+max({ r(i) | i = 1, . . . , k })
+Pk
+i=1 r(i)
++ ε
+where ε is a small value to compensate the fact that some examples might be correct just
+by chance.
+Other values which should be checked are the class-wise sensitivities:
+s(k) = # correctly identified instances of class k
+# instances of class k
+=
+ckk
+r(k)
+∈ [0, 1]
+If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is
+necessary for s(i).
+The class-wise confusion
+fconfusability(k1, k2) = P
+ck1k2
+K
+j=1 ck1j
+indicates if class k1 gets often classified as class k2. The highest values here can indicate
+if two classes should be merged or a specialized model for separating those classes could
+improve the overall system.
+2.5.3. Validation Curves: Accuracy, loss and other metrics
+Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal
+axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are
+typical quality metrics. Other quality metrics can be found in [OHIL16].
+In case that the number of training epochs are used as the examined hyperparameter,
+validation curves give an indicator if training longer improves the model’s performance. By
+
+2.5. Analysis Techniques
+plotting the error on the training set as well as the error on a validation set, one can also
+estimate if overfitting might become a problem. See Figure 2.7 for an example.
+10 20 30 40 50 60 70 80 90 100
+0.2
+0.4
+0.6
+0.8
+overfitting
+Epochs
+Error Training set
+Validation set
+Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs
+and the quality metric is the error (1 − accuracy). The longer the network is trained,
+the better it gets on the training set. At some point the network is fit too well to the
+training data and loses its capability to generalize. At this point the quality curve of
+the training set and the validation set diverge. While the classifier is still improving on
+the training set, it gets worse on the validation and the test set.
+When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the optimization process did not improve for several epochs. Three possible ways to reduce the
+problem of plateaus are (i) to change weight initialization if the plateau was at the beginning,
+(ii) regularizing the model or (iii) changing the optimization algorithm.
+Loss functions
+The loss function (also called error function or cost function) is a function which assigns a
+real value to a complex event like the predicted class of a feature vector. It is used to define
+the objective function. For classification problems the loss function is typically cross-entropy
+with `1 or `2 regularization, as it was described in [NH92]:
+ECE(W) = −
+X
+x∈X
+X
+K
+k=1
+[t
+x
+k
+log(o
+x
+k
+) + (1 − t
+x
+k
+) log(1 − o
+x
+k
+)]
+| {z }
+cross-entropy data loss
++ λ1 ·
+`1
+zX}| {
+w∈W
+|w| +λ2 ·
+`2
+zX}| {
+w∈W
+w
+2
+| {z }
+model complexity loss
+where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and
+t
+x
+k
+indicates if the training example x is of class k. o
+x
+k
+is the output of the classification
+algorithm which depends on the weights. λ1, λ2 ∈ [0, ∞) weights the regularization and is
+typically smaller than 0.1.
+
+2. Convolutional Neural Networks
+Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange
+curve is smoothed, but the non-smoothed curve is also plotted in light orange.
+The data loss is positive whenever the classification is not correct, whereas the model
+complexity loss is higher for more complex models. The model complexity loss exists due
+to the intuition of Occam’s razor : If two models explain the same data with an accuracy of
+100 %, the simpler model is to be preferred.
+A reason to show the loss for the validation curve technique instead of other quality metrics
+is that it contains more information about the quality of the model. A reason against the
+loss is that it has no upper bound like the accuracy and can be hard to interpret. The
+loss only shows relative learning progress whereas the accuracy shows absolute progress to
+human readers.
+There are three observations in the loss validation curve which can help to improve the
+network:
+• If the loss does not decrease for several epochs, the learning rate might be too low.
+The optimization process might also be stuck in a local minimum.
+• Loss being NAN might be due to too high learning rates. Another reason is division
+by zero or taking the logarithm of zero. In both cases, adding a small constant like
+10−7 fixes the problem.
+• If the loss-epoch validation curve has a plateau at the beginning, the weight initialization might be bad.
+
+2.5. Analysis Techniques
+Quality criteria
+There are several quality criteria for classification models. Most quality criteria are based
+the confusion matrix c which denotes at cij the number of times the real class was i and j
+was predicted. This means the diagonal contains the number of correct predictions. For
+the following, let ti =
+Pk
+j=1 cij be the number of training samples for class i. The most
+common quality criterion is accuracy:
+accuracy(c) =
+Pk
+i=1 cii
+Pk
+i=1 ti
+∈ [0, 1]
+One problem of accuracy as a quality criterion are skewed classes. If one class is by far
+more common than all other classes, then the simplest way to achieve a high score is to
+always classify everything as the most common class.
+In order to fix this problem, one can use the mean accuracy:
+mean-accuracy(c) = 1
+k
+·
+X
+k
+i=1
+cii
+ti
+∈ [0, 1]
+For two-class problems there are many other metrics like precision, recall and Fβ-score.
+Quality criteria for semantic segmentation are explained in [Tho16].
+Besides the quality of the classification result, several other quality criteria are important
+in practice:
+• Speed of evaluation for new images,
+• latency,
+• power consumption,
+• robustness against (non)random perturbations in the training data (see [SZS+13,
+PMW+15]),
+• robustness against (non)random perturbations in the training labels (see [NDRT13,
+XXE12]),
+• model size
+As reducing the floating point accuracy allows to process more data on a given device [Har15],
+analysis under this aspect is also highly relevant in some scenarios.
+However, the following focuses on the quality of the classification result.
+
+2. Convolutional Neural Networks
+2.5.4. Learning Curves
+A learning curve is a plot where the horizontal axis displays the number of training samples
+given to the network and the vertical axis displays the error. Two curves are plotted: The
+error on the training set (of which the size is given by the horizontal axis) and the error on
+the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the
+validation set is an indicator if more training data without any other changes will improve
+the networks performance. Having the training set’s learning curve, it is possible to estimate
+if the capacity of the model to fit the data is high enough for the desired classification error.
+The error on the validation set should never be expected to be significantly lower than the
+error on the training set. If the error on the training set is too high, then more data will
+not help. Instead, the model or the training algorithm need to be adjusted.
+If the training set’s learning curve is significantly higher than the validation set’s learning
+curve, then removing features (e.g., by decreasing the images resolution), more training
+samples or more regularization will help.
+10 20 30 40 50 60 70 80 90 100
+0.2
+0.4
+0.6
+avoidable bias
+variance
+human-level error
+Training samples
+Error Validation set
+Training set
+Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given
+architecture will make to fit the given training data. At the same time, it is expected
+that the training data gets more similar to the true distribution of the data which
+should be captured by the test data. At some point, the error on the training and
+test set should be about the same. The term “avoidable bias” was coined by Andrew
+Ng [Ng16]. In some cases it is not possible to classify data correctly by the given
+features. If humans can classify the data given the features correctly, however, then
+the bias is avoidable by building a better classifier.
+The major drawback of this analysis technique is its computational intensity. In order to
+get one point on the training curve and one point on the testing curve, a complete training
+has to be executed. On the full data set, this can be several days on high-end computers.
+
+2.5. Analysis Techniques
+2.5.5. Input-feature based model explanations
+Understanding which clues the model took to come to its prediction is crucial to check if
+the model actually learns what the developer thinks it learns. For example, a model which
+has to distinguish sled dogs from Chihuahuas might simply look at the background and
+check if there is snow. Depending on the training and test data, this works exceptionally
+well. However, it is not the desired solution.
+For classification problems in computer vision, there are two types of visualizations which
+help to diagnose such problems. Both color superpixels of the original image to convey
+information how the model used those superpixels:
+• Correct class heatmap: The probability of the correct class is encoded to give a
+heat map which superpixels are important for the correct class. This can also be done
+by setting the opacity accordingly.
+• Most-likely class image: Each of the most likely classes for all superpixels is
+represented by a color. The colored image thus gives clues why different predictions
+were assigned a high probability.
+Two methods to generate such images are explained in the following.
+Occlusion Sensitivity Analysis
+Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the
+image by something. This could be a gray square as in [ZF14] or a black superpixel as
+in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g.,
+superpixel or position of the square) and the regions are then colored to generate either a
+correct class heatmap of the most-likely class image. It is important to note that the color
+at region ri denotes the result if riis occluded.
+Both visualizations are shown in Figure 2.10. One can see that the network makes sensible
+predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan
+Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation.
+Gradient-based approaches
+In [SVZ13], a gradient-based approach was used to generate image-specific class saliency
+maps. The authors describe the problem as a ranking problem, where each pixel of the
+image I0 is assigned a score Sc(I0) for a class c of interest. CNNs are non-linear functions,
+but they can be approximated by the first order Taylor expansion Sc(I) ≈ w
+T
+I + b where
+w is the derivative of Sc at I0.
+
+2. Convolutional Neural Networks
+2.5.6. Argmax Method
+The argmax method has two variants:
+• Fixed class argmax: Propagate all elements of a given class through the network
+and analyze which neurons are activated most often / have the highest activation.
+• Fixed neuron argmax: Propagate the data through the network and find the n
+data elements which cause the highest activation for a given neuron.
+Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an
+image I is calculated by applying F to I and calculating the element-wise sum of the result.
+Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides
+showing the 9 images which caused the highest activation, they also trained a deconvolutional
+neural network to project the activation of the filter back into pixel space.
+The fixed neuron argmax can be used qualitatively to get an impression of the kind of
+features which are learned. This is useful to diagnose problems, for example in [AM15] it is
+described that the network recognized the class “dumbbell” only if a hand was present, too.
+Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters
+being shared between classes or how many parameters are mainly assigned to which classes.
+Going one step further from the fixed neuron argmax method is using an optimization
+algorithm to change an initial image minimally in such a way that any desired class gets
+predicted. This is called caricaturization in [MV16].
+2.5.7. Feature Map Reconstructions
+Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights
+into the learned features. This shows what the network emphasizes. However, it is not
+necessarily the case that the feature maps allow direct and easy conclusions about the
+learned features. This technique is called inversion in [MV16].
+A key idea of feature map visualizations is to reconstruct a layers input, given its activation.
+This makes it possible find which inputs would cause neurons to activate with extremely
+high or low values.
+More recent work like [NYC16] tries to make the reconstructions appearance look more
+natural.
+
+2.5. Analysis Techniques
+2.5.8. Filter comparison
+One question which might lead to some insight is how robust the features are which
+are learned. If the same network is trained with the same data, but different weight
+initializations, the learned weights should still be comparable.
+If the set of learned filters changes with initialization, this might be an indicator for too
+little capacity of that layer. Hence adding more filters to that layer could improve the
+performance.
+Filters can be compared with the k-translation correlation as introduced in [ZCZL16]:
+ρk(Wi,Wj) = max
+(x,y)∈{−k,...,k}
+2\(0,0)
+hWi, T(Wj, x, y)if
+kWik2kWjk2
+∈ [−1, 1],
+where T(·, x, y) denotes the translation of the first operand by (x, y), with zero padding at
+the borders to keep the shape. h·, ·if denotes the flattened inner product, where the two
+operands are flattened into column vectors before applying the standard inner product. The
+closer the absolute value of the k-translation correlation to one, the more similar two filters
+Wi, Wj are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and
+VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found
+this by comparing the averaged maximum k-translational correlation of the networks with
+Gaussian-distributed initialized filters. The averaged maximum k-translational correlation
+is defined as
+ρ¯k(W) = 1
+N
+X
+N
+i=1
+N
+max
+j=1,j6=i
+ρk(Wi,Wj )
+where N is the number of filters in the layer W and Wi denotes the ith filter.
+2.5.9. Weight update tracking
+Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if
+the learning rate is well-chosen. He suggests that the weight update should be in the order
+of 10−3. If the weight update is too high, then the learning rate has to be decreased. If the
+weight update is too low, then the learning rate has to be increased.
+The order of the weight updates as well as possible implications highly depend on the model
+and the training algorithm. See Appendix B.5 for a short overview of training algorithms
+for neural networks.
+
+2. Convolutional Neural Networks
+2.6. Accuracy boosting techniques
+There are techniques which can almost always be applied to improve accuracy of CNN
+classifiers:
+• Ensembles [CMS12]
+• Training-time augmentation (see Appendix B.2)
+• Test-time transformations [DDFK16, How13, HZRS15b]
+• Pre-training and fine-tuning [ZDGD14, GDDM14]
+One of the most simple ensemble techniques which was introduced in [CMS12] is averaging
+the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly
+the same training setup by reducing variance.
+Data augmentation techniques give the optimizer the possibility to take invariances like
+rotation into account by generating artificial training samples from real training samples.
+Data augmentation hence reduces bias and variance with no cost at inference time.
+Data augmentation at inference time reduces the variance of the classifier. Similar to using
+an ensemble, it increases the computational cost of inference.
+Pretraining the classifier on another dataset to obtain start from a good position or finetuning
+a model which was originally created for another task is also a common technique.
+
+2.6. Accuracy boosting techniques
+Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images,
+where a gray square occluded a part of the image. This gray squares center (x, y) was
+moved over the complete image and the classifier was run on each of the occluded
+images. The probability of the correct class, depending on the gray squares position,
+is showed in the middle column. One can see that the predicted probability of the
+correct class “Pomeranian” drops if the face of the dog is occluded. The last image
+gives the class with the highest predicted probability. In the case of the Pomeranian,
+it always predicts the correct class if the head is visible. However, if the head of the
+dog is occluded, it predicts other classes.
+
+2. Convolutional Neural Networks
+Figure 2.11.: Filter visualization from [ZF14]: The filters themselves as well as the input feature
+maps which caused the highest activation are displayed.
+
+3. Topology Learning
+The topology of a neural network is crucial for the number of parameters, the number
+of floating point operations (FLOPs), the required memory, as well as the features being
+learned. The choice of the topology, however, is still mainly done by trial-and-error.
+This chapter introduces three general approaches to automatic topology learning: Growing a
+networks from a minimal network in Section 3.1, pruning in Section 3.2, genetic approaches
+in Section 3.3 and reinforcement learning approaches in Section 3.4.
+3.1. Growing approaches
+Growing approaches for topology learning start with a minimal network, which only has
+the necessary number of input nodes and the number of output nodes which are determined
+by the application and the features of the input. They then apply a criterion to insert new
+layers / neurons into the network.
+In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Optimization are introduced.
+3.1.1. Cascade-Correlation
+Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which
+is similar to dense block described in Section 2.3.3.
+Cascade-Correlation works as follows:
+1. Initialization: The number of input nodes and the number of output nodes are
+defined by the problem. Create a minimal, fully connected network for those.
+2. Training: Train the network until the error no longer decreases.
+3. Candidate Generation: Generate candidate nodes. Each candidate node is connected to all inputs. They are not connected to other candidate nodes and not
+connected to the output nodes.
+
+3. Topology Learning
+4. Correlation Maximization: Train the weights of the candidates by maximizing S,
+the correlation between candidates output value V with the networks residual error:
+S =
+X
+o∈O
+
+
+
+
+
+
+X
+p∈T
+
+Vp − V¯
+
+(Ep,o − E¯
+o)
+
+
+
+
+
+
+where O is the set of output nodes, T is the training set, Vp is the candidate neurons
+activation for a training pattern p. Ep,o is the residual output error at node o for
+pattern p. V¯ and E¯
+o are averaged values over all elements of T. This step is finished
+when the correlation no longer increases.
+5. Candidate selection: Keep the candidate node with the highest correlation, freeze
+its incoming weights and add connections to the output nodes.
+6. Continue: If the error is higher than desired, continue with step 2.
+One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1.
+1
+Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray)
+to the left, three hidden nodes (green) in the middle and two output nodes in the upper
+right corner. The black squares represent frozen weights which are found by correlation
+maximization whereas the white squares are trainable weights.
+3.1.2. Meiosis Networks
+Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where
+weights are deterministic and fixed at prediction time, each weight wij in Meiosis networks
+follows a normal distribution:
+wij ∼ N (µij , σ2
+ij )
+
+3.2. Pruning approaches
+Hence every connection has two learned parameters: µij and σ
+2
+ij .
+The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell
+division. A node j is splitted, when the random part dominates the value of the sampled
+weights:
+P
+i
+P
+σij
+i µij
+> 1 and
+P
+k
+P
+σjk
+k µjk
+> 1
+The mean of the new nodes is sampled around the old mean, half the variance is assigned
+to the new connections.
+Hence Meiosis networks only change the number of neurons per layer. They do not add
+layers or add skip connections.
+3.1.3. Automatic Structure Optimization
+Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of online handwriting recognition. It makes use of the confusion matrix C = (cij ) ∈ N
+k×k
+≥0
+(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix
+S with sij = sj i = cij · cji. The maximum of S defines where the ASO algorithm adds
+more parameters. The details how the resources are added are not transferable to CNNs.
+3.2. Pruning approaches
+Pruning approaches start with a network which is bigger than necessary and prune it. The
+motivation to prune a network which has the desired accuracy is to save storage for easier
+model sharing, memory for easier deployment and FLOPs to reduce inference time and
+energy consumption. Especially for embedded systems, deployment is a challenge and low
+energy consumption is important.
+Pruning generally works as follows:
+1. Train a given network until a reasonable solution is obtained,
+2. prune weights according to a pruning criterion and
+3. retrain the pruned network.
+This procedure can be repeated.
+One family of pruning criterions uses the Hessian matrix. For example, Optimal Brain
+Damage (OBD) as introduced in [LDS+89]. For every single parameter k, OBD calculates
+the effect on the objective function of deleting k. The authors call the effect of the deletion
+
+3. Topology Learning
+of parameter k the saliency sk. The parameters with the lowest saliency are deleted, which
+means they are set to 0 and are not updated anymore.
+A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights
+in a much better way. This requires, however, to calculate the inverse Hessian matrix
+H−1 ∈ R
+n×n where n ∈ N is typically n > 106
+.
+A much simpler and computationally cheaper pruning criterion is the weight magnitude.
+[HPTD15] prunes all weights w which are below a threshold θ:
+w ←
+
+
+
+w if w ≥ θ
+0 otherwise
+3.3. Genetic approaches
+The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which
+can recombine themselves via crossover and inversion. An introduction to such algorithms
+is given in [ES03].
+Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its
+successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10].
+The results, however, are of unacceptable quality: On MNIST (see Appendix E), where
+random chance gives 10 % accuracy, even simple topologies trained with SGD achieve
+about 92 % accuracy [TF-16a] and state of the art is 99.79 % [WZZ+13], the HyperNEAT
+algorithm achieves only 23.9 % accuracy [VH13].
+Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve 96.47 % accuracy
+on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer
+so that not more than three hidden layers could be trained. At the same time, VGG19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers
+in [HZRS15a].
+[LX17] shows that Genetic algorithms can achieve competitive results on MNIST and
+SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is
+at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves 29.03 % error on CIFAR-100,
+but the state of the art is 17.18 % [HLW16].
+3.4. Reinforcement Learning
+Reinforcement learning is a sub-field of machine learning, which focuses on the question
+how to choose actions that lead to high rewards.
+
+3.5. Convolutional Neural Fabrics
+One can think of the search for good neural network topologies as a reinforcement learning
+problem. The agent is a recurrent neural network which can generate bitstrings. Those
+variable-length bitstrings encode neural network topologies.
+In 2016, this approach was applied to construct neural networks for computer vision.
+In [BGNR16], Q-learning with an ε-greedy exploration was applied.
+In [ZL16], the REINFORCE algorithm from [Wil92] was used to train state of the art models
+for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous
+amounts of computational resources were used to obtain those results.
+3.5. Convolutional Neural Fabrics
+Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions
+about topologies by learning an ensemble of different CNN architectures. The idea is to
+define a single architecture as a trellis through a 3D grid of nodes. Each node represents a
+convolutional layer. One dimension is the index of the layer, the other two dimensions are
+the amount of filters and the feature size. Each node is connected to nine other nodes and
+thus represents nine possible choices of convolutional layers:
+• Resolution: (i) convolution with stride=1 or (ii) convolution with stride=2 or
+(iii) deconvolution (doubling the resolution)
+• Channels: (i) half the number of filters than the layer before (ii) the same number
+of filters as the layer before (iii) double the number of filters than the layer before
+They always use ReLU as an activation function and they always use filters of size 3 × 3.
+They don’t use pooling at all.
+
+3. Topology Learning
+
+4. Hierarchical Classification
+Designing a classifier for a new dataset is hard for two main reasons: Many design choices are
+not clearly superior to others and evaluating one design choice takes much time. Especially
+CNNs are known to take several days [KSH12, SLJ+15] or even weeks [SZ14] to train.
+Additionally, some methods for analyzing a dataset become harder to use with more classes
+and more training samples. Examples are t-SNE, the manual inspection of errors and
+confusion matrices, and the argmax method.
+One idea to approach this problem is by building a hierarchy of classifiers. The root
+classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single
+classes. Figure 4.1 gives an example for an hierarchy of classifiers.
+Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle.
+The root classifier C0 has to distinguish six coarse classes (pedestrian, four+-wheelers,
+traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0 predicts a
+pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C0
+predicts traffic sign, then another classifier has to predict if it is a speed limit, a
+sign indicating danger or something else. If C0, however, predicts road, then no other
+classifier will become active.
+In this example, the problem has 17 classes. The hierarchical approach introduces
+7 clusters of classes and thus uses 8 classifiers.
+Such a hierarchy of classifiers needs clusters of classes.
+
+4. Hierarchical Classification
+4.1. Advantages of classifier hierarchies
+Having a classifier hierarchy has five advantages:
+• Division of labor: Different teams can work together. Instead of having a monolithic
+task, the solutions can be combined.
+• Guarantees: Changing a classifier will only change the prediction of itself and its
+children. Siblings are not affected. In the example from Figure 4.1, the classifier
+which distinguishes traffic signs can be changed while the classification as pedestrian,
+four+-wheelers, traffic sign, street, other will not be affected. Also, the
+classification between speed limits, danger signs and other signs will not change.
+• Faster training: Except for the root classifier C0, each other classifier will have
+less than the total amount of training data. Depending on the combined classes, the
+models could also be simpler. Hence the training time is reduced.
+• Weighting of errors: In practice, some errors are more severe than others. For
+example, it could be acceptable if the two-wheelers classifier has an error rate of
+40 %. But it is not acceptable if the speed limit classifier has such a high error rate.
+• Post-hoc explanations: The simpler a model is, the easier it is to explain why a
+classification is made the way it is made.
+4.2. Clustering classes
+There are two ways to cluster classes: By similarity or by semantics. While semantic
+clustering needs either additional information or manual work, the similarity can be
+automatically inferred from the data. As pointed out in [XZY+14], semantically similar
+classes are often also visually similar. For example, in the ImageNet dataset most dogs
+are semantically and visually more similar to each other than to non-dogs. An example
+where this is obviously not the case are symbols: The summation symbol \sum is identical
+in appearance to the Greek letter \Sigma, but semantically much closer to the addition
+operator +.
+One approach to cluster classes by similarity is to train a classifier and examine its
+predictions. Each class is represented in the confusion matrix by one row. Those rows
+can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS+96],
+OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral
+clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of
+them do not allow a human to improve the found clustering manually.
+The confusion matrix (c)ij ∈ N
+k×k
+states how often class i was present and class j was
+
+4.2. Clustering classes
+predicted. The more often this confusion happens, the more similar those two classes are to
+the classifier. Based on the confusion matrix, the classes can be clustered as explained in
+the following.
+[HAE16] indicates that more classes make it easier to generalize, but the accuracy gains
+diminish after a critical point of classes is reached. Hence a binary tree might not be a
+good choice. As an alternative, an approach which allows building arbitrary many clusters,
+is proposed.
+The proposed algorithm has two main ideas:
+• The order of columns and rows in the confusion matrix is arbitrary. This means one
+can swap rows and columns. If row i and j are swapped, then the columns i and j
+have to be swapped to in order to keep the same confusion matrix.
+• If two classes are confused often, then they are similar to the classifier.
+Hence the order of the classes is permutated in such a way that the highest errors are close
+to the diagonal. One possible objective function to be minimized is
+f(C) = Xn
+i=1
+Xn
+j=1
+Cij · |i − j| [4.1]
+which punishes errors linearly with the distance to the diagonal. This method is called CMO
+in the following.
+As pointed out by Tobias Ribizel (personal communication), this optimization problem
+is a weighted version of Optimal Linear Arrangement problem. That problem is NPcomplete [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however,
+produces reasonable clusterings as well as visually appealing confusion matrices. The
+algorithm works as follows: First, decide with probability 0.5 if only two random rows are
+swapped or a block is swapped. If two rows are swapped, choose both of them randomly.
+If a block is swapped, then choose the start randomly and the end of the block randomly
+after the start. The insert position has to be a valid position considering the block length,
+but besides that it is also chosen uniformly random.
+Simple row-swapping can exploit local improvements. For example, in the context of
+ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier
+and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters
+of dog breeds could be separated by car and bus due to random chance. Moving any single
+class increases the score, but moving either one of the dog breed clusters or the vehicle
+cluster decreases the score. Hence it is beneficial to implement block moving.
+One advantage of permutating the classes in order to minimize Equation (4.1) in comparison
+to spectral clustering as used in [XZY+14] is that the adjusted confusion matrix can be
+
+4. Hierarchical Classification
+split into many much smaller matrices along the diagonal. In the case of many classes (e.g.,
+1000 classes of ImageNet or 369 classes of HASYv2) this permutation makes it possible to
+visualize the types of errors made. If the errors are systematic due to visual similarity, many
+confusions are not made and thus many elements of the confusion matrix are close to 0.
+Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1).
+Once a permutation of the classes is found which has a low score Equation (4.1), the clusters
+can either be made by hand by deciding why classes should not be in one clusters. With
+such a permutation, only n − 1 binary decisions have to be made and hence only the list of
+classes has to be read. Alternatively, one can calculate the confusions C
+0
+i,i+1 + C
+0
+i+1,i for
+each pair of classes which are neighbors in the confusion matrix. The higher this value, the
+more similar are the classes according to the classifier. Hence a threshold θ can be applied.
+θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold)
+or semi-automatically by asking the user for information if two classes belong to the same
+cluster. Such an approach only needs log(n) binary decisions from the user where n is the
+number of classes.
+Please note that CMO only works if the classifier is neither too bad nor too good. A classifier
+which does not solve the task at all might just give almost uniform predictions whereas the
+confusion matrix of an extremely good classifier is almost diagonal and thus contains no
+information about the similarity of classes. One possible solution to this problem is to take
+the prediction of the class in contrast to using only the argmax in order to find a useful
+permutation.
+
+5. Experimental Evaluation
+All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+16]
+and cuDNN 5.1 [CWV+14] as the backend. The experiments were run on different machines
+with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce
+GTX 970 and GeForce 940MX.
+The GTSRB [SSSI12], SVHN [NWC+11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98],
+HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are
+used as their size is small enough to be trained within a day. Other classification datasets
+which were considered are listed in Appendix E.
+CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color
+images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer,
+dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 % [HLW16].
+According to [Kar11], human accuracy is at about 94 %.
+CIFAR-100 is a 100-class dataset of color images of the size 32 px × 32 px. Its 100 classes
+are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles
+and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain
+the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16].
+GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs.
+The 51 839 images are in color and of a minimum size of 25 px×25 px up to 266 px×232 px.
+The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11].
+According to [SSSI], human performance is at 98.84 %.
+HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images
+of the size 32 px × 32 px. The 369 classes contain the Latin and Greek letters, arrows,
+mathematical symbols. The state of the art achieves an accuracy of 82.00 % [Tho17a].
+STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px×96 px.
+Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state
+of the art achieves an accuracy of 74.80 % [ZMGL15]. It contains 100 000 unlabeled images
+for unsupervised training and 500 images per class for supervised training.
+SVHN (Street View House Numbers) exists in two formats. For the following experiments,
+the cropped digit format was used. It contains the 10 digits cropped from photos of Google
+Street View. The images are in color and of size 32 px × 32 px. The state of the art
+
+5. Experimental Evaluation
+achieves an accuracy of 98.41 % [HLW16]. According to [NWC+11a], human performance
+is at 98.0 %.
+As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0, 1].
+For GTSRB, the training and test data was scaled to 32 px × 32 px.
+5.1. Baseline Model and Training setup
+The baseline model is trained with Adam [KB14], an initial learning rate of 10−4, a batch
+size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation
+depends on the dataset:
+• CIFAR-10, CIFAR-100 and STL-10: Random width and height shift by at most
+±3 pixels in either direction; Random horizontal flip.
+• GTSRB, MNIST: Random width and height shift by at most ±5 pixels in either
+direction; random rotation by at most ±15 degrees; random channel shift; random
+zoom in [0.5, 1.5]; random shear by at most 6 degrees.
+• HASYv2: Random width and height shift by at most ±5 pixels in either direction;
+random rotation by at most ±5 degree.
+• SVHN: No data augmentation.
+If the dataset does not define a training/test set, a stratified 67 % / 33 % split is applied. If
+the dataset does not define a validation set, the training set is split in a stratified manner
+into 90 % training set / 10 % test set.
+Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of
+10 epochs is applied. After this, the model is trained without data augmentation for at most
+1000 epochs with early stopping and the validation accuracy as a stopping criterion and a
+patience of 10 epochs. Kernel weights are initialized according to the uniform initialization
+scheme of He [HZRS15b] (see Appendix B.3).
+The architecture of the baseline model uses a pattern of
+Conv-Block(n) = (Convolution − Batch Normalization − Activation)
+n − Pooling
+The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for
+the last layer where softmax is used. Before the last two convolutional layer, a dropout
+layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1.
+Please note that the number of input- and output channels of the network depends on
+the dataset. If the input image is larger than 32 px × 32 px, for each power of two a
+Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to
+32 px × 32 px.
+
+5.1. Baseline Model and Training setup
+# Type Filters @
+Patch size / stride
+Parameters FLOPs Output size
+Input 0 0 3 @ 32 × 32
+1 Convolution 32 @ 3 × 3 × 3 / 1 896 1 736 704 32 @ 32 × 32
+2 BN + ELU 64 163 904 32 @ 32 × 32
+3 Convolution 32 @ 3 × 3 × 32 / 1 9 248 18 841 600 32 @ 32 × 32
+4 BN + ELU 64 163 904 32 @ 32 × 32
+Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16
+5 Convolution 64 @ 3 × 3 × 32 / 1 18 496 9 420 800 64 @ 16 × 16
+6 BN + ELU 128 82 048 64 @ 16 × 16
+7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16
+8 BN + ELU 128 82 048 64 @ 16 × 16
+Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8
+9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8
+10 BN + ELU 128 20 608 64 @ 8 × 8
+Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4
+11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 800 1 048 064 512 @ 1 × 1
+12 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+13 Convolution 512 @ 1 × 1 × 512 / 1 262 656 523 776 512 @ 1 × 1
+14 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+15 Convolution k @ 1 × 1 × 512 / 1 k · (512 + 1) 1024 · k k @ 1 × 1
+Global avg Pooling 1 × 1 0 k k @ 1 × 1
+16 BN + Softmax 2k 7k k @ 1 × 1
+P 515k
++892 512
+1032k
++55 729 664 103 424+2k
+Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers
+use SAME padding, except for layer 11 which used VALID padding in order to decrease
+the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for
+each power of two there are two Convolution + BN + ELU blocks and one Max pooling
+block added. This is the framed part in the table.
+32 × 32
+Input
+C 32@3 × 3/1
+BN + ELU
+C 32@3 × 3/1
+BN + ELU
+16 × 16
+max pooling 2 × 2/2
+C 64@3 × 3/1
+BN + ELU
+C 64@3 × 3/1
+BN + ELU
+8 × 8
+max pooling 2 × 2/2
+C 64@3 × 3/1
+BN + ELU
+4 × 4
+max pooling 2 × 2/2
+C 512@4 × 4/1 (V)
+BN + ELU
+Dropout, p = 0.5
+1 × 1
+C 512@1 × 1/1
+BN + ELU
+Dropout, p = 0.5
+C k@1 × 1/1
+Global AVG pooling
+BN + Softmax
+Figure 5.1.: Architecture of the baseline model. C 32@3×3/1 is a convolutional layer with 32 filters
+of kernel size 3 × 3 with stride 1.
+
+5. Experimental Evaluation
+5.1.1. Baseline Evaluation
+The results for the baseline model evaluated on eight datasets are given in Table 5.2. The
+speed for inference for different GPUs is given in Table 5.3.
+Dataset Single Model Accuracy Ensemble of 10
+Training Set Test Set Training Set Test Set
+Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 %
+CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 %
+CIFAR-100 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 %
+GTSRB 100.00 % σ = 0.00 99.18 % σ = 0.11 100.00 % 99.46 %
+HASYv2 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 %
+MNIST 99.93 % σ = 0.07 99.53 % σ = 0.06 99.99 % 99.58 %
+STL-10 94.12 % σ = 0.87 75.67 % σ = 0.34 96.35 % 77.62 %
+SVHN 99.02 % σ = 0.07 96.28 % σ = 0.10 99.42 % 97.20 %
+Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models
+used in the ensemble. The empirical standard deviation σ of the accuracy is also given.
+CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
+models uses unlabeled data or data from other datasets. For HASYv2 no test time
+transformations are used.
+Network GPU Tensorflow Inference per Training
+1 Image 128 images time / epoch
+Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s
+Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s
+Baseline Default GeForce 940MX 4 ms 120 ms 145.6 s
+Baseline Default GTX 970 6 ms 32 ms 25.0 s-26.3 s
+Baseline Default GTX 980 3 ms 24 ms 20.5 s-21.1 s
+Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22.1 s
+Baseline Default GTX 1070 2 ms 15 ms 14.4 s-14.5 s
+Baseline Default Titan Black 4 ms 25 ms 28.1 s-28.1 s
+Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24.4 s
+DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms —
+Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on
+six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Maj17].
+Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build
+makes use of SSE4.X, AVX, AVX2 and FMA instructions.
+
+5.1. Baseline Model and Training setup
+5.1.2. Weight distribution
+The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution
+of bias weights by layer is shown in Figure 5.3. Although both figures only show the
+distribution for one specific model trained on CIFAR-100, the following observed patterns
+are consistent for 70 models (7 datasets and 10 models per dataset):
+• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the
+filter weights is almost symmetric around zero. The same is true for the bias weights.
+• The farther a layer is from the input away, the smaller the 99-percentile interval is,
+except for the last layer (see Table A.1).
+• The 99-percentile interval of the first layers filter weights is about [−0.5, +0.5], except
+for MNIST and HASYv2 where it is in [−0.8, 0.8].
+• The 99-percentile interval of the first layers bias weights is always in [−0.2, 0.2].
+• The distribution of filter weights of the last convolutional layer is not symmetric. In
+some cases the distribution is also not unimodal.
+• The bias weights of the last three layers are very close to zero. The absolute value of
+most of them is smaller than 10−2.
+Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of
+Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases,
+the Batch Normalization layer equals the identity and thus is only relevant for the training.
+While γ and β do not show as clear patterns as the filter and bias weights of convolutional
+layers, some observations are also consistent through all models even for different datasets:
+• γ of the last layer (layer 16) is bigger than 1.3.
+• The 99-percentile interval for β of the last layer is longer than the other 99-percentile
+intervals.
+• The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for
+all other models) is more negative then all other layers.
+Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional
+layer. The ranges are calculated for each channel and filter separately. The smaller the
+values are, the less information is lost if the filters are replaced by smaller filters.
+
+5. Experimental Evaluation
+Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100. The weights of the first layer are relatively evenly spread in the interval [−0.4, +0.4].
+With every layer the interval which contains 95 % of the weights and is centered around
+the mean becomes smaller, especially with layer 11 where the feature maps are of
+size 1 × 1. In contrast to the other layers, the last convolutional layer has a bimodal
+distribution.
+This plot indicates that the network might benefit from bigger filters in the first layer,
+whereas the filters in layers 7 – 11 could potentially be smaller.
+Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100.
+While the first layers biases are in [−0.1, +0.1], after each max-pooling layer the interval
+which contains 95 % of the weights and is centered around the mean becomes smaller.
+In the last three convolutional layer, most bias weights are in [−0.005, +0.005].
+
+5.1. Baseline Model and Training setup
+Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a
+baseline model trained on CIFAR-100.
+Figure 5.5.: The distribution of the β parameter of Batch Normalization layers of a baseline model
+trained on CIFAR-100.
+
+5. Experimental Evaluation
+Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For
+each filter, the range of values is recorded by channel. The smaller this range is, the
+less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter.
+
+5.1. Baseline Model and Training setup
+5.1.3. Training behavior
+Due to early stopping, the number of epochs which a model was trained differ. The number
+of epochs trained with augmentation ranged from 133 epochs to 182 epochs with a standard
+deviation of 17.3 epochs for CIFAR-100.
+Figure 5.7 shows the worst and the best validation accuracy during the training with
+augmented data. Different initializations lead to very similar validation accuracies during
+training. The image might lead to the wrong conclusion that models which are better at
+the start are also better at the end. In order to check this hypothesis, the relative order of
+validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering
+stays approximately the same, then it can be considered to run the first few epochs many
+times and only train the best models to the end. For 10 models, there can be 102−10
+2 = 45
+pair-wise changes in the ordering at maximum if the relative order of validation accuracies
+is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred
+in average for each pair of epochs (i, i + 1). This means if one knows only the relative order
+of the validation accuracy of two models m and m0in epoch i, it is doubtful if one can
+make any statement about the ordering of m and m0in epoch i + 1.
+0
+102030405060708090
+100110120130
+140
+0.2
+0.3
+0.4
+0.5
+0.6
+0.7
+epoch
+validation accuracy
+maximum validation accuracy
+minimum validation accuracy
+1.5
+2
+2.5
+3
+3.5
+4
+4.5
+loss
+maximum validation accuracy
+minimum validation accuracy
+mean loss
+Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The
+differences do not exceed 1 % and does not increase by training epoch. Four models
+stopped the first training stage at epoch 133 which causes the shift in the loss and the
+maximum validation accuracy.
+Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was
+expected that the absolute value of weight updates during epochs (sum, max, and mean)
+decrease in later training stages. The intuition was that weights need to be adjusted in a
+coarse way first. After that, the intuition was that only slight modifications are applied by
+
+5. Experimental Evaluation
+the SGD based training algorithm (ADAM). The mean, max and sum of weight updates as
+displayed in Figures 5.8 to 5.10, however, do not show such a clear pattern. The biggest
+change happens as expected in the first epoch after the weights are initialized. The change
+from augmented training to non-augmented training was at epoch 156 to epoch 157
+It can be observed, that layers which receive more input feature maps get larger weight
+updates in mean. As layers which are closer to the output take more input feature maps,
+their weight updates are larger. This pattern does not occur when SGD is used as the
+optimizer.
+Figure 5.8.: Mean weight updates of the baseline model between epochs by layer.
+
+5.1. Baseline Model and Training setup
+Figure 5.9.: Maximum weight updates of the baseline model between epochs by layer.
+Figure 5.10.: Sum of weight updates of the baseline model between epochs by layer.
+
+5. Experimental Evaluation
+5.2. Confusion Matrix Ordering
+The visualization of the confusion matrix can give valuable information about which part
+of the task is hard. For more than about 10 classes, however, it becomes hard to visualize
+and read.
+For CIFAR-10, the proposed method groups the four object classes and the six animal
+classes together (see Figure 5.11a).
+(a) CIFAR-10 Test set (b) Random
+Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal
+elements are set to 0 in order to make other elements easier to see.
+Figure 5.11b shows a confusion matrix with random mistakes.
+The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test
+accuracy where a good permutation was found. Please note that this is not the best classifier.
+The confusion matrix which resulted from a baseline classifier with 99.32 % test accuracy is
+displayed in as the second image.
+Those results suggest that the ordering of classes is a valuable tool to make patterns easier
+to see. Humans, however, are good at finding patterns even if they come from random noise.
+Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy
+and 40 % uniformly random errors of a balanced dataset is created, optimized according to
+Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a.
+On the HASYv2 dataset the class-ordering is necessary to see anything as most possible
+confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the
+unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a
+maximum size of 50 × 50 are displayed, the ordered method can show only 8 matrices
+because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be
+displayed.
+
+5.2. Confusion Matrix Ordering
+Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization
+to Equation (4.1). The diagonal elements are set to 0 in order to make other elements
+easier to see. The symbols next to the label on the vertical axis indicate the shape
+and the color of the signs.
+The second image shows the same, but with baseline model.
+Best viewed in electronic form. 
+Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal
+elements are set to 0 in order to make other elements easier to see. The top image
+shows arbitrary class ordering, the bottom image shows the optimized ordering.
+5.3. Spectral Clustering vs CMO
+5.3. Spectral Clustering vs CMO
+This section evaluates the clustering quality of CMO in comparison to the clustering quality
+of spectral clustering.
+The evaluated model achieves 70.50 % training accuracy and 53.16 % test accuracy on
+CIFAR-100. Figure 5.14 shows the sorted confusion matrix.
+Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The
+diagonal elements are set to 0 in order to make other elements easier to see. Best
+viewed in electronic form.
+CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters
+which are to be found. The number of errors is determined by (i) Join all n clusters which
+contain the classes of the coarse class C to a set M. The error is n. (ii) Within M, find the
+set of classes M− which do not belong to C. (iii) The final error is n + |M−|. As can be
+seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has
+only half the error of spectral clustering.
+The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be
+noted that the number of clusters was determined by using the semi-automatic method
+based on CMO as described in Section 4.2.
+
+5. Experimental Evaluation
+Cluster Spectral clustering Errors CMO Errors
+fish aquarium fish, orchid + flatfish
++ ray, shark + trout, lion
+5 aquarium fish, orchid + flatfish
++ ray + shark, trout
+4
+flowers orchid, aquarium fish + sunflower + poppy, tulip + rose,
+train
+5 orchid, aquarium fish + sunflower, poppy, tulip, rose2
+people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0
+reptiles crocodile, plain, road, table,
+wardrobe + dinosaur + lizard
++ snake, worm + turtle
+9 crocodile, lizard, lobster, caterpillar + dinosaur + snake + turtle, crab6
+trees maple, oak, pine + willow, forest
++ palm
+3 palm, willow, pine, maple, oak 0
+Total 24 12
+Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by ,
+whereas clusters are separated by +.
+Cluster Spectral clustering Errors CMO Errors
+A A, A, A 0 A, A, A , Å 1
+B B, B 0 B, B 0
+C C, c, ⊂ and C , ξ, E and C 4 C, c, ⊂, C and C 1
+D D, D, D, . 1 D, D, D 0
+E E and E, ε 2 E and E, ε, , ∈ 4
+F F and F, F 1 F and F, F 1
+H H and H , κ and H 3 H and H, H 1
+K K, κ 0 K, κ 0
+L L, b and L, L 1 L, b and L, L 1
+M M and M and M 2 M and µ, M and M 3
+N N and N, N and N 2 N and N, N and N , ℵ 3
+O O, O, 0, ◦, °, # and o 1 O, O, 0, ◦, ° and # and o 2
+P P, P and p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2
+Q Q, Q, Q, ι, t, &, `, =, Æ, 1 7 Q and Q, Q 1
+R R, R and R, R, k and < 3 R and <, R, R, R 1
+S S, s, S 0 S, s, S 0
+T T, > and T , τ 1 T, > and T , τ 1
+U U, ∪ and u, U, A 1 U, u, U, A and ∪ 2
+V V , v, ∨ 0 V , v, ∨ 0
+W W, w, ω 0 W, w and ω 1
+X X, x, X , χ, × 0 X, x, X , χ, × 0
+Y Y and y 1 Y , y 0
+Z Z, z, Z and Z, Z 1 Z, z, Z, Z, Z 0
+Total 34 25
+Table 5.5.: Differences in spectral clustering and CMO.
+
+5.4. Hierarchy of Classifiers
+5.4. Hierarchy of Classifiers
+In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root
+classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on
+the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better
+results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either
+be due to limited training data, overfitting or the small size of 32 px × 32 px of the data.
+The experiment also shows that most of the errors are due to not identifying the correct
+cluster. Hence, in this case, more work in improving the root classifier is necessary rather
+than improving the discrimination of classes within a cluster.
+Although the classes within a cluster capture most of the classifications, many misclassifications happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would
+push the accuracy in the full column only to 63.50 % due to errors of the root classifier
+where the root classifier does not predict the correct cluster.
+The leaf classifiers use the same topology as the root classifier. By initializing them with
+the root classifiers weights their performance can be pushed at about the inner accuracy.
+They are, however, only useful if their accuracy is well above the inner accuracy of the root
+classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful.
+Cluster Classes
+accuracy
+root classifier leaf classifier
+cluster identified class identified | cluster class identified | cluster
+1 3 69.67 % 84.27 % 72.98 %
+2 5 46.60 % 58.54 % 43.47 %
+3 2 58.50 % 92.13 % 83.46 %
+4 2 50.50 % 87.83 % 81.74 %
+5 3 44.67 % 79.29 % 71.01 %
+6 2 29.50 % 78.67 % 72.00 %
+7 2 52.50 % 92.11 % 87.72 %
+8 2 59.50 % 86.23 % 81.88 %
+9 2 59.00 % 90.08 % 87.79 %
+10 2 62.00 % 85.52 % 73.10 %
+11 2 67.00 % 87.01 % 75.32 %
+12 2 72.50 % 94.77 % 76.77 %
+13 2 64.00 % 82.58 % 86.27 %
+14 2 79.67 % 89.85 % 89.10 %
+Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on
+14 clusters of classes. Each class has 100 elements to test. The column cluster identified
+gives the percentage that the root classifiers argmax prediction is within the correct
+cluster, but not necessarily the correct class. The columns class identified | cluster only
+consider data points where the root classifier correctly identified the cluster.
+
+5. Experimental Evaluation
+5.5. Increased width for faster learning
+More filters in one layer could simplify the optimization problem as each filter needs smaller
+updates. Hence a CNN N with ni filters in layer i is expected to take more epochs than a
+CNN N0 with 2 · ni filters in layer i to achieve the same validation accuracy.
+This hypothesis can be falsified by training a CNN N and a CNN N0 and comparing the
+trained number of epochs. As more filters can lead to different results depending on the
+layer where they are added, five models are trained. The details about those models are
+given in Table 5.7
+Name Layer Filter count Total
+Baseline New parameters
+m9 9 64 638 5 978 566
+m0
+9
+9 64 974 8 925 622
+m11 11 512 3786 5 982 698
+m0
+11 11 512 1024 1 731 980
+m13 13 512 8704 5 982 092
+Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer
+was increased.
+The detailed results are given in Table 5.8. As expected, the number of training epochs of
+the models with increased numbers of parameters is lower. The wall-clock time, however, is
+higher due to the increase in computation per forward- and backward-pass.
+For m9, m11 and m13, the filter weight range of the layer with increased capacity decreases
+compared to Figure 5.6, the filter weights of the layer with increased capacity are more
+concentrated around zero compared to Figure 5.2. For model m13, the distribution of
+weight of the output layer changed to a more bell-shaped distribution. Except for this, the
+distribution of filter weights in other layers did not change for all three models compared to
+the baseline.
+Model Parameters
+Accuracy Training
+Single Model Ensemble Mean Epochs Mean Time
+Mean std
+baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s
+m9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s
+m0
+9
+8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s
+m11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s
+m0
+11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s
+m13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s
+Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9, m11, m13
+as well as their accuracies.
+
+5.6. Weight updates
+5.6. Weight updates
+Section 5.5 shows that wider networks learn faster. One hypothesis why this happens is
+that every single weight updates can be smaller to learn the same function. Thus the loss
+function is smoother and thus gradient descent based optimization algorithms lead to more
+consistent weight updates.
+Consequently, it is expected that layers with fewer filters have more erratic updates. If
+there are many filters, the weights of a filter which does not contribute much to the end
+results or is even harmful filter can gradually be set to zero, essentially removing one path
+in the network.
+In order to test the hypothesis, the baseline model was adjusted. The number of filters in
+layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean
+weight update of the layers 1, 3, 5, 7 and 9 have a far bigger range than the layers 11, 13 and
+15 after epoch 50. Compared to the baseline models mean updates (Figure 5.8, Page 46),
+the mean weight updates of layers 1 and 3 are higher, the range of the mean weight update
+from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher.
+For the maximum and the sum, no similar pattern could be observed (see Figures A.3
+and A.4).
+Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but
+with layer 5 reduced to 3 filters.
+
+5. Experimental Evaluation
+5.7. Multiple narrow layers vs One wide layer
+On a given feature map size one can have an arbitrary number of convolutional layers with
+SAME padding and each layer can have an arbitrary number of filters. A convolutional layer
+with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called
+narrower and the number of filters in a convolutional layer is the layers width.
+If the number of parameters which may be used for the feature map scale is fixed and high
+enough, there are still many combinations. If ni with i = 0, . . . , k is the number of output
+feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without
+a bias, then the number of parameters is
+Parameters =
+X
+k
+i=1
+
+(ni−1 · 3
+2 + 1) · ni
+
+Hence the width of one layer does not only influence the parameters in this layer, but also
+in the next layer.
+The number of possible subsequent layers of one feature map size is enormous, even if
+constraints are placed on the number of parameters. For example, the first convolutional
+layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per
+layer are not desirable, one keeps all layers having a bias and all layers only use 3 × 3 filters,
+then the maximum depth is 10. If one furthermore assumes that at least 800 parameters
+should be used, there are still 120 possible layer combinations. As experimentally evaluating
+one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible
+to evaluate all layer combinations. In the following, a couple of changes to the network
+width / depth will be evaluated.
+Each layer expands the perceptive field. Hence deeper layer can use more of the input for
+every single output value. But deeper networks need more time for inference as the output
+of layer i has to be computed before the output of i + 1 can be computed. Hence there is
+less potential to parallelize computations. Each filter can be seen as a concept which can
+be learned. The deeper the filter is in the network, the higher is the abstraction level of the
+concept. In most cases, both is necessary: Many different concepts (width) and high-level
+concepts (depth).
+Reducing the two first convolutional layers of the baseline model (see Page 39) to one
+convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model
+has 944 012 parameters) resulted in a mean accuracy of 61.64 % (-1.74 %) and a standard
+deviation of σ = 1.12 (+0.57). The ensemble achieved 63.18 % (-1.52 %). As expected,
+the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of
+the baseline model to 15 s of the model with one less convolutional layer, one less Batch
+Normalization and one less activation layer. The inference time was also reduced from 6 ms
+
+5.8. Batch Normalization
+to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of
+more then one percentage point of the mean model and the increased standard deviation of
+the models performance, at least two convolutional layers are on the 32 px × 32 px feature
+map scale are recommendable for CIFAR-100.
+Changing the baseline to have less filters but more layers is another option. This was tried
+for the first block at the 32 px × 32 px feature map scale. The two convolutional layers
+(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one
+convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model
+has 944 132 parameters. Compared to the baseline model, the time for inference was the
+same. This is unexpected, because the inference time changed when a layer was removed at
+this scale. The mean test accuracy was 63.66 % (+0.28) and the standard deviation was
+σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21).
+Having two nonlinearities at each feature map scale could be important to learn nonlinear
+transformations at that scale. As the baseline model does only have one nonlinearity at the
+8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization
+and ELU was added. To keep the number of parameters constant, layer 11 of the baseline
+model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy
+of 63.09 % (-0.29) with a standard deviation of σ = 0.70 (+0.15). The ensemble achieves
+an accuracy of 64.39 % (+0.31). This could indicate that having two convolutional layers
+is more important for layers close to the input than intermediate layer. Alternatively, the
+parameters could be more important in layer 11 than having a new convolutional layer after
+layer 9.
+In order to control the hypothesis that having two convolutional layers are less important in
+the middle of a network, the second convolutional layer at the 16 × 16 feature map scale is
+removed. The first convolutional layer was increased from 32 filters to 59 filters, the second
+convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of
+parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test
+accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 %
+test accuracy (-0.66).
+Even more extreme, if both convolutional layers are removed from the 16 × 16 feature map
+scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51
+(-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important
+to have at least one convolutional layer at this feature map scale.
+5.8. Batch Normalization
+In [CUH15], the authors write that Batch Normalization does not improve ELU networks.
+Hence the effect of removing Batch Normalization from the baseline is investigated in this
+
+5. Experimental Evaluation
+experiment.
+As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn
+are identical to the baseline model m, except that in mno-bn the Batch Normalization layers
+are removed.
+One notable difference is the training time: While m needs 21 ms per epoch in average on
+a GTX 980, mno-bn only needs 21 ms per epoch. The number of epochs used for training,
+however, also increased noticeably from 149 epochs to 178 epochs in average. The standard
+deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for mno-bn.
+The mean accuracy of mno-bn is 62.86 % and hence 0.52 percentage points worse. The
+standard deviation between models increased from 0.55 to 0.61. This is likely a result of the
+early stopping policy and the differences in training epochs. This can potentially be fixed
+by retraining the models which stopped earlier than the model which was trained for the
+biggest amount of epochs. The ensemble test accuracy is 63.88 % and hence 0.82 percentage
+points worse than the baseline.
+The filter weight range and distribution is approximately the same as Figure 5.6 and
+Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of
+the baseline are spread out in the first layer and much more concentrated in subsequent layers
+(see Figure 5.3), the model without Batch Normalization has rather concentrated weights
+in the first layers and only the bias weights of the last layer is spread out (see Figure A.2).
+Another model m0
+no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to
+compensate for the loss of parameters in Batch Normalization. The mean test accuracy of
+10 such models is 62.87 % which is 0.51 percentage points worse than the baseline. The
+ensemble of m0
+no-bn achieves 64.33 % which is 0.37 percentage points worse than the baseline.
+The mean training time was 14 s per epoch and 157.4 epochs with a standard deviation of
+20.7 epochs.
+Hence it is not advisable to remove Batch Normalization for the final model. It could,
+however, be possible to remove Batch Normalization for the experiments to iterate quicker
+through different ideas if the relative performance changes behave the same with or without
+Batch Normalization.
+
+5.9. Batch size
+5.9. Batch size
+The mini-batch size m ∈ N≥1 influences
+• Epochs until convergence: The smaller m, the more often the model is updated
+in one epoch. Those updates, however, are based on fewer samples of the dataset.
+Hence the gradients of different mini-batches can noticeably differ. In the literature,
+this is referred to as gradient noise [KMN+16].
+• Training time per epoch: The smaller the batch size, the higher the training time
+per epoch as the hardware is not optimally utilized.
+• Resulting model quality: The choice of the hyperparameter m influences the
+accuracy of the classifier when training is finished. [KMN+16] supports the view that
+smaller m result in less sharp minima. Hence smaller m lead to better generalization.
+Empiric evaluation results can be found in Table 5.9. Those results confirm the claim
+of [KMN+16] that lower batch sizes generalize better.
+m
+Training
+Epochs
+Mean total Single model Ensemble
+time training time Accuracy std Accuracy
+8 118 s
+epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 %
+16 62 s
+epoch 103 – 173 8349 s 64.16 % σ = 0.81 66.98 %
+32 35 s
+epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 %
+64 25 s
+epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 %
+128 18 s
+epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 %
+Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation)
+of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on
+CIFAR-100.
+5.10. Bias
+Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a
+model mno-bias is created which is identical to the baseline model m, except that the bias of
+layers 11, 13 and 15 is removed.
+The mean test accuracy of 10 trained mno-bias is 63.74 % which is an improvement of
+0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 %
+which is 0.43 percentage points better than the baseline. Hence the bias can safely be
+removed.
+Removing the biases did not have a noticeable effect on the filter weight range, the filter
+weight distribution or the distribution of the remaining biases. Also, the γ and β parameters
+of the Batch Normalization layers did not noticeably change.
+
+5. Experimental Evaluation
+5.11. Learned Color Space Transformation
+In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1
+directly after the input and then another convolutional layer with 3 filters of size 1 × 1 acts
+as a learned transformation in another color space and boosts the accuracy.
+This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU activation and 10 filters followed by another convolutional layer with ELU activation and
+3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37.
+The standard deviation is noticeable higher than the standard deviation of the baseline
+model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of
+the ensemble is at 64.77 % and hence 0.07 percentage points higher than the accuracy of
+the baseline models.
+The inference time for 1 image and for 128 images did not change compared to the baseline.
+The training time per epoch increased from 26 s to 30 s on the GTX 970.
+Hence it is not advisable to use the learned color space transformation.
+5.12. Pooling
+An alternative to max pooling with stride 2 with a 2 × 2 kernel is using a 3 × 3 kernel with
+stride 2.
+This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the
+3×3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 %
+(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test
+accuracy (+0.45).
+The training time per epoch decreased from 20.5 s-21.1 s to 18.6 s (mean of 10 training runs)
+on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch
+of 128 images.
+5.13. Activation Functions
+Nonlinear, differentiable activation functions are important for neural networks to allow them
+to learn nonlinear decision boundaries. One of the simplest and most widely used activation
+functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized
+rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+15] and softsign [BDLB09] have
+been proposed. The baseline uses ELU.
+
+5.13. Activation Functions
+Activation functions differ in the range of values and the derivative. The definitions and
+other comparisons of eleven activation functions are given in Table B.3.
+Theoretical explanations why one activation function is preferable to another in some
+scenarios are the following:
+• Vanishing Gradient: Activation functions like tanh and the logistic function saturate outside of the interval [−5, 5]. This means weight updates are very small for
+preceding neurons, which is especially a problem for very deep or recurrent networks as
+described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12].
+• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem.
+The gradient of the ReLU function is 0 for all non-positive values. This means if all
+elements of the training set lead to a negative input for one neuron at any point in the
+training process, this neuron does not get any update and hence does not participate
+in the training process. This problem is addressed in [MHN13].
+• Mean unit activation: Some publications like [CUH15, IS15] claim that mean
+unit activations close to 0 are desirable. They claim that this speeds up learning
+by reducing the bias shift effect. The speedup of learning is supported by many
+experiments. Hence the possibility of negative activations is desirable.
+Those considerations are listed in Table 5.10 for 11 activation functions. Besides the
+theoretical properties, empiric results are provided in Tables 5.11 and 5.12. The baseline
+network was adjusted so that every activation function except the one of the output layer
+was replaced by one of the 11 activation functions.
+As expected, PReLU and ELU performed best. Unexpected was that the logistic function,
+tanh and softplus performed worse than the identity and it is unclear why the pure-softmax
+network performed so much better than the logistic function. One hypothesis why the
+logistic function performs so bad is that it cannot produce negative outputs. Hence the
+logistic− function was developed:
+logistic−(x) = 1
+1 + e−x
+− 0.5
+The logistic− function has the same derivative as the logistic function and hence still suffers
+from the vanishing gradient problem. The network with the logistic− function achieves an
+accuracy which is 11.30 % better than the network with the logistic function, but is still
+5.54 % worse than the ELU.
+Similarly, ReLU was adjusted to have a negative output:
+ReLU−(x) = max(−1, x) = ReLU(x + 1) − 1
+The results of ReLU− are much worse on the training set, but perform similar on the test
+
+5. Experimental Evaluation
+set. The result indicates that the possibility of hard zero and thus a sparse representation
+is either not important or similar important as the possibility to produce negative outputs.
+This contradicts [GBB11, SMGS14].
+A key difference between the logistic− function and ELU is that ELU does neither suffers
+from the vanishing gradient problem nor is its range of values bound. For this reason, the
+S2ReLU activation function, defined as
+S2ReLU(x) = ReLU(
+x
+2
++ 1) − ReLU(−
+x
+2
++ 1) =
+
+
+
+−
+x
+2 + 1 if x ≤ −2
+x if − 2 ≤ x ≤ 2
+x
+2 + 1 if x > −2
+This function is similar to SReLUs as introduced in [JXF+16]. The difference is that S2ReLU
+does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be
+the identity close to zero and have a smaller absolute value than the identity farther away.
+It is easy to compute and easy to implement.
+Those results — not only the absolute values, but also the relative comparison — might
+depend on the network architecture, the training algorithm, the initialization and the
+dataset. Results for MNIST can be found in Table 5.13 and for HASYv2 in Table A.2. For
+both datasets, the logistic function has a much shorter training time and a noticeably lower
+test accuracy.
+Function Vanishing Gradient Negative Activation possible Bound activation
+Identity No Yes No
+Logistic Yes No Yes
+Logistic− Yes Yes Yes
+Softmax Yes Yes Yes
+tanh Yes Yes Yes
+Softsign Yes Yes Yes
+ReLU Yes1 No Half-sided
+Softplus No No Half-sided
+S2ReLU No Yes No
+LReLU/PReLU No Yes No
+ELU No Yes No
+Table 5.10.: Properties of activation functions.
+1The dying ReLU problem is similar to the vanishing gradient problem.
+
+5.13. Activation Functions
+Function
+Single model Ensemble of 10
+Training set Test set Training set Test set
+Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 %
+Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 %
+Logistic− 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 %
+Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 %
+Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 %
+Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 %
+ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 %
+ReLU− 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 %
+Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 %
+S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 %
+LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 %
+PReLU 80.01 % σ = 2.03 62.16 % σ = 0.73 83.50 % 64.79 %
+ELU 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 %
+Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation
+functions on CIFAR-100. For LReLU, α = 0.3 was chosen.
+Function
+Inference per Training
+Epochs
+Mean total
+1 Image 128 time training time
+Identity 8 ms 42 ms 31 s
+epoch 108 – 148 3629 s
+Logistic 6 ms 31 ms 24 s
+epoch 101 – 167 2234 s
+Logistic− 6 ms 31 ms 22 s
+epoch 133 – 255 3421 s
+Softmax 7 ms 37 ms 33 s
+epoch 127 – 248 5250 s
+Tanh 6 ms 31 ms 23 s
+epoch 125 – 211 3141 s
+Softsign 6 ms 31 ms 23 s
+epoch 122 – 205 3505 s
+ReLU 6 ms 31 ms 23 s
+epoch 118 – 192 3449 s
+Softplus 6 ms 31 ms 24 s
+epoch 101 – 165 2718 s
+S2ReLU 5 ms 32 ms 26 s
+epoch 108 – 209 3231 s
+LReLU 7 ms 34 ms 25 s
+epoch 109 – 198 3388 s
+PReLU 7 ms 34 ms 28 s
+epoch 131 – 215 3970 s
+ELU 6 ms 31 ms 23 s
+epoch 146 – 232 3692 s
+Table 5.12.: Training time and inference time of adjusted baseline models trained with different
+activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the
+identity is the fastest function. This result is likely an implementation specific problem
+of Keras 2.0.4 or Tensorflow 1.1.0.
+
+5. Experimental Evaluation
+Function
+Single model Ensemble Epochs
+Accuracy std Accuracy Range Mean
+Identity 99.45 % σ = 0.09 99.63 % 55 – 77 62.2
+Logistic 97.27 % σ = 2.10 99.48 % 37 – 76 54.5
+Softmax 99.60 % σ = 0.03 99.63 % 44 – 73 55.6
+Tanh 99.40 % σ = 0.09 99.57 % 56 – 80 67.6
+Softsign 99.40 % σ = 0.08 99.57 % 72 – 101 84.0
+ReLU 99.62 % σ = 0.04 99.73 % 51 – 94 71.7
+Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9
+PReLU 99.57 % σ = 0.07 99.73 % 44 – 89 71.2
+ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5
+Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions
+on MNIST.
+5.14. Label smoothing
+Ensembles consisting of n models trained by the same procedure on the same data but
+initialized with different weights and trained with a different order of the training data
+perform consistently better than single models. One drawback of ensembles in applications
+such as self-driving cars is that they increase the computation by a factor of n. One idea
+why they improve the test accuracy is by reducing the variance.
+The idea of label smoothing is to use the ensemble prediction of the training data as labels
+for another classifier. For every element x of the training set, the one-hot encoded target
+t(x) is smoothed by the ensemble prediction yE(x)
+t
+0
+(x) = α · t(x) + (1 − α)yE(x)
+where α ∈ [0, 1] is the smoothing factor.
+There are three reasons why label smoothing could be beneficial:
+• Training speed: The ensemble prediction contains more information about the
+image than binary class decisions. Classifiers in computer vision predict how similar
+the input looks to other input of the classes they are trained on. By smoothing the
+labels, the information that one image could also belong to another class is passed to
+the optimizer. In early stages of the optimization this could lead to a lower loss on
+the non-smoothed validation set.
+• Higher accuracy: Using smoothed labels for the optimization could lead to a higher
+accuracy of the base-classifier due to a smoothed error surface. It might be less likely
+
+5.14. Label smoothing
+that the classifier gets into bad local minima.
+• Label noise: Depending on the way how the labels are obtained, it might not always
+be clear which label is the correct one. Also, labeling errors can be present in training
+datasets. Those errors severely harm the training. By smoothing the labels errors
+could be relaxed.
+10 models msmooth are trained with the α = 0.5 smoothed labels from the prediction
+of an ensemble of 10 baseline models. The mean accuracy of the models trained on the
+smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72
+(+0.17 %). The ensemble of 10 msmooth models achieved 64.79 % accuracy (+0.09 %). Hence
+the effect of this kind of label smoothing on the final accuracy is questionable.
+The training speed didn’t noticeably change either: The number of trained epochs ranged
+from 144 to 205, the mean number of epochs was 177. The baseline training ranged from
+146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training
+methods accuracy differed by less than one percentage point. Hence it is unlikely that label
+smoothing has a positive effect on the training speed.
+Hinton et al. called this method distillation in [HVD15]. Hinton et al. used smooth and
+hard labels for training, this work only used smoothed labels.
+
+5. Experimental Evaluation
+5.15. Optimized Classifier
+In comparison to the baseline classifier, the following changes are applied to the optimized
+classifier:
+• Remove the bias for the last layers: For all layers which output a 1 × 1 feature
+map, the bias is removed
+• Increase the max pooling kernel to 3 × 3
+• More filters in the first layers
+The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation
+is given in Table 5.15 and the timing comparison is given in Table 5.16.
+# Type Filters @
+Patch size / stride
+Parameters FLOPs Output size
+Input 0 0 3 @ 32 × 32
+1 Convolution 69 @ 3 × 3 × 3 / 1 1 932 3 744 768 69 @ 32 × 32
+2 BN + ELU 138 353 418 69 @ 32 × 32
+3 Convolution 69 @ 3 × 3 × 32 / 1 42 918 37 684 096 69 @ 32 × 32
+4 BN + ELU 138 353 418 69 @ 32 × 32
+Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16
+5 Convolution 64 @ 3 × 3 × 32 / 1 39 808 20 332 544 64 @ 16 × 16
+6 BN + ELU 128 82 048 64 @ 16 × 16
+7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16
+8 BN + ELU 128 82 048 64 @ 16 × 16
+Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8
+9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8
+10 BN + ELU 128 20 608 64 @ 8 × 8
+Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4
+11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 288 1 048 064 512 @ 1 × 1
+12 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+13 Convolution 512 @ 1 × 1 × 512 / 1 262 144 523 776 512 @ 1 × 1
+14 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+15 Convolution k @ 1 × 1 × 512 / 1 512 · k 512 · k k @ 1 × 1
+Global avg Pooling 1 × 1 0 k k @ 1 × 1
+16 BN + Softmax 2k 7k k @ 1 × 1
+P 514k
++947 654
+520k
++87 870 996 179 200+2k
+Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers
+use SAME padding, except for layer 11 which used VALID padding in order to decrease
+the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each
+power of two there are two Convolution + BN + ELU blocks and one Max pooling
+block added. This is the framed part in the table.
+
+5.15. Optimized Classifier
+32 × 32
+Input
+C 69@3 × 3/1
+BN + ELU
+C 69@3 × 3/1
+BN + ELU
+16 × 16
+max pooling 3 × 3/2
+C 64@3 × 3/1
+BN + ELU
+C 64@3 × 3/1
+BN + ELU
+8 × 8
+max pooling 3 × 3/2
+C 64@3 × 3/1
+BN + ELU
+4 × 4
+max pooling 3 × 3/2
+C* 512@4 × 4/1 (V)
+BN + ELU
+Dropout, p = 0.5
+1 × 1
+C* 512@1 × 1/1
+BN + ELU
+Dropout, p = 0.5
+C* k@1 × 1/1
+Global AVG pooling
+BN + Softmax
+Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with
+32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used.
+Dataset Single Model Accuracy Ensemble of 10
+Training Set Test Set Training Set Test Set
+Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 %
+CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 %
+CIFAR-100 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 %
+GTSRB 100.00 % σ = 0.00 99.28 % σ = 0.10 100.00 % 99.51 %
+HASYv2 88.79 % σ = 0.45 85.36 % σ = 0.15 89.36 % 85.92 %
+MNIST 99.88 % σ = 0.10 99.48 % σ = 0.13 99.99 % 99.67 %
+STL-10 95.43 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 %
+SVHN 99.08 % σ = 0.07 96.37 % σ = 0.12 99.50 % 97.47 %
+Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models
+used in the ensemble. The empirical standard deviation σ of the accuracy is also given.
+CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
+models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN
+and HASY, no test time transformations are used.
+Network GPU Tensorflow Inference per Training
+1 Image 128 images time / epoch
+Optimized Default Intel i7-4930K 5 ms 432 ms 386 s
+Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s
+Optimized Default GeForce 940MX 4 ms 205 ms 192 s
+Optimized Default GTX 970 6 ms 41 ms 35 s
+Optimized Default GTX 980 3 ms 35 ms 27 s
+Optimized Default GTX 980 Ti 6 ms 36 ms 26 s
+Optimized Default GTX 1070 2 ms 24 ms 21 s
+Optimized Default Titan Black 4 ms 46 ms 43 s
+Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is
+evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken
+from [Maj17]. Weights the baseline model can be found at [Tho17b]. The optimized
+Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions.
+
+5. Experimental Evaluation
+5.16. Early Stopping vs More Data
+A separate validation set is necessary for two reasons: (1) Early stopping and (2) preventing
+overfitting due to many experiments. To prevent overfitting, a different dataset can be used.
+For example, all decisions about hyperparameters in this thesis are based on CIFAR-100,
+but the network is finally trained and evaluated with the same hyperparameters on all
+datasets.2 The validation set can hence be removed if early stopping is removed. Instead,
+the validation data is used in a first run to determine the number of epochs necessary for
+training. In a second training run the validation data is added to the training set. The
+number of used epochs for the second run is given in Table 5.17.
+Dataset Mean epochs Train data classes average data / class
+Asirra 60 15 075 2 7538
+MNIST 41 54 000 10 5400
+SVHN 45 543 949 10 54 395
+CIFAR-10 84 45 000 10 4500
+HASYv2 92 136 116 369 369
+GTSRB 97 35 288 43 821
+STL-10 116 4500 10 450
+CIFAR-100 155 45 000 100 450
+Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total
+amount of used training data, the number of classes of the dataset and the average
+amount of data per class is given.
+Alternatively, the model can be trained with early stopping (ES) purely on the training
+loss. All three methods – early stopping on the validation set accuracy, early stopping on
+the training loss and training a fixed number of epochs are evaluated. While having more
+data helped with Asirra and CIFAR-100, the results as shown in Table 5.18 on the other
+datasets are only marginally different. For CIFAR-10, training with more data did not
+improve the results when the number of epochs is fixed, but notably improved the results
+when the training loss was used as the early stopping criterion.
+5.17. Regularization
+Stronger regularization might even improve the results when using the training loss as an
+early stopping criterion. `2 regularization with a weighting factor of λ = 0.0001 is used in
+all other experiments. While the accuracy as shown in Table 5.19 does not show a clear
+pattern, the number of epochs increases with lower model regularization (see Table 5.20).
+2Except data augmentation and test time transformations.
+3Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model.
+4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model.
+
+5.17. Regularization
+Dataset Early Stopping Fixed epochs
+val. acc train loss
+Asirra 93.09 % 96.01 %3 96.01 %
+CIFAR-10 89.86 % 91.75 % 88.88 %
+CIFAR-100 67.03 % 71.01 % 69.08 %
+HASYv2 85.92 % 82.89 %4 85.05 %
+MNIST 99.67 % 99.64 % 99.57 %
+STL-10 78.66 % 83.25 % 78.64 %
+Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy
+compared training setups without a validation set and thus more training data. The
+second column uses the training loss as a stopping criterion, the third column uses a
+fixed number of epochs which is equal to the mean number of training epochs of the
+models with early stopping on the validation set accuracy.
+λ
+Single Model Accuracy Ensemble of 10
+Training Set Test Set Training Set Test Set
+λ = 0.01 73.83 % σ = 1.78 58.94 % σ = 1.33 87.78 % 69.98 %
+λ = 0.001 82.86 % σ = 0.89 63.03 % σ = 0.67 91.86 % 71.02 %
+λ = 0.0001 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 %
+Table 5.19.: Different choices of `2 model regularization applied to the optimized model.
+λ min max mean std
+λ = 0.01 457 503 404.6 37.2
+λ = 0.001 516 649 588.4 41.6
+λ = 0.0001 579 833 696.1 79.1
+Table 5.20.: Training time in epochs of models with early stopping on training loss by different
+choices of `2 model regularization applied to the optimized model.
+
+5. Experimental Evaluation
+
+6. Conclusion and Outlook
+This master thesis gave an extensive overview over the design patterns of CNNs in Chapter 2,
+the methods how CNNs can be analyzed and the principle directions of topology learning
+algorithms in Chapter 3.
+Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations
+of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering
+algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are:
+• Ordering the classes in the confusion matrix allows to display the relevant parts even
+for several hundred classes.
+• A hierarchy of classifiers based on the classes does not improve the results on CIFAR100. There are three possible reasons for this:
+– 32 px × 32 px is too low dimensional
+– 100 classes are not enough for this approach
+– More classes are always easier to distinguish if each new class comes with more
+data. One reason why this might be the case is that distinguishing the object
+from background has similar properties even for different classes.
+• Label smoothing had only a minor effect on the accuracy and no effect on the training
+time when a single base classifier was used to train with the smoothed labels by an
+ensemble of base classifiers.
+A baseline model was defined and evaluated on eight publicly available datasets. The
+baselines topology and training setup are described in detail as well as its behavior during
+training and properties of the weights of the trained model.
+The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100.
+The insights of those experiments are:
+• Averaging ensembles of 10 base classifiers of the same architecture and trained with the
+same setup consistently improve the accuracy. The amount of improvement depends
+on the base classifiers, but the ensemble tends to improve the test accuracy by about
+one percentage point.
+• Wider networks learn in fewer epochs. This, however, does not mean that the
+
+6. Conclusion and Outlook
+wall-clock time is lower due to increased computation in forward- and backward
+passes.
+• Batch Normalization increases the training time noticeably. For the described ELU
+baseline model it also increases accuracy, which contradicts [CUH15].
+• The lower the batch size, the longer the time for each epoch of training and the less
+epochs need to be trained. Higher accuracy by lower batch sizes was empirically
+confirmed. The batch size, however, can also be too low.
+• An analysis of the weights of the baseline indicated that the bias of layers close to
+the output layer can be removed. This was experimentally confirmed.
+• It could not be confirmed that learned color space transformation, as described
+in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear
+unit (LReLU) and α = 0.3.
+• It could be confirmed that ELU networks gives better results than any other activation
+function on CIFAR-100. For the character datasets MNIST and HASYv2, however,
+ReLU, LReLU, PReLU, Softplus and ELU all performed similar.
+• Changing the activation functions to the identity had very little impact on the HASYv2
+and MNIST classifiers. Note that those networks are still able to learn nonlinear
+decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however,
+the accuracy drops by 6.64 % when ELU is replaced by the identity.
+Based on the results of those experiments, an optimized classifier was developed and
+evaluated on all eight datasets.
+The state of the art of STL-10 was improved from 74.80 % [ZMGL15] to 78.66 % without
+using the unlabeled part of the dataset. The state of the art of HASYv2 was improved
+from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from
+99.46 % [SL11] to 99.51 %, for Asirra it was improved from 82.7 % [Gol08] to 93.09 %.
+1
+This was mainly achieved by the combination of ELU, Dropout, ensembles, training data
+augmentation and test-time transformations. The removal of the bias of layers close to the
+output and re-usage of those parameters in layers close to the input as well as using 3 × 3
+pooling instead of 2 × 2 pooling improved the baseline.
+While writing this masters thesis, several related questions could not be answered:
+• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting
+is not a problem. But at which subsampling-level does having more layers have the
+biggest effect? Can this question be answered before a deeper network is trained?
+• Is label smoothing helpful for noisy labels?
+1The baseline is better than the optimized model on Asirra and on HASYv2.
+
+• How does the choice of activation functions influence residual architectures? Could the
+results be the same for different activation functions in architectures with hundreds
+of layers?
+• The results for the pooling kernel were inconclusive. Larger pooling kernels might be
+advantageous as well as fractional max pooling [Gra15].
+• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that
+can and should be fixed?
+• Why is softmax so much better than the logistic function? Can the reason be used to
+further improve ELU?
+Besides those questions, the influence of optimizers on time per epoch, epochs until
+convergence, total training time, memory consumption, accuracy of the models and standard
+deviation of the models was not evaluated. This, and the stopping criterion for training
+might be crucial for the models quality.
+
+
+A. Figures, Tables and Algorithms
+(a) Original image (b) Smoothing filter (c) Laplace edge detection filter
+(d) Sobel edge detection filter (e) Prewitt edge detection filter (f) Canny filter
+Figure A.1.: Examples of image filters. Best viewed in electronic form.
+Layer 99-percentile interval
+filter bias
+1 [-0.50, 0.48] [-0.06, 0.07]
+3 [-0.21, 0.19] [-0.07, 0.07]
+5 [-0.20, 0.17] [-0.07, 0.05]
+7 [-0.15, 0.14] [-0.05, 0.06]
+9 [-0.14, 0.15] [-0.04, 0.03]
+11 [-0.08, 0.08] [-0.00, 0.00]
+13 [-0.08, 0.08] [-0.00, 0.00]
+15 [-0.10, 0.11] [-0.01, 0.01]
+Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model
+trained on CIFAR-100.
+
+Figure A.2.: The distribution of bias weights of a model without batch normalization trained on
+CIFAR-100.
+Algorithm 1 Simulated Annealing for minimizing Equation (4.1).
+Require: C ∈ N
+n×n
+, steps ∈ N, T ∈ R
++, c ∈ (0, 1)
+procedure SimulatedAnnealing(C, steps, T, c)
+bestScore ← accuracy(C)
+bestC ← C
+for i = 0; i < steps; i ← i + 1 do
+p ← randomFloat(0, 1)
+if p < 0.5 then . Swap rows
+i ← randomInteger(1, . . . , n)
+j ← randomInteger(1, . . . , n) \ { i }
+p ← randomUniform(0, 1)
+C
+0 ← swap(C, i, j)
+s ← accuracy(C
+0
+)
+if p < exp( s−bestScore
+T
+) then
+C ← C
+0
+if s > bestScore then
+bestScore ← s
+bestC ← C
+T ← T · c
+else . Move Block
+s ← randomInteger(1, . . . , n) . Block start
+e ← randomInteger(s, . . . , n) . Block end
+i ← randomInteger(1, . . . , n − (e − s)) . Block insert position
+Move Block (s, . . . , e) to position i
+return bestM
+
+Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model,
+but with layer 5 reduced to 3 filters.
+Function
+Single model Ensemble of 10 Epochs
+Training set Test set Train Test Range Mean
+Identity 87.92 % σ = 0.40 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 114.5
+Logistic 81.46 % σ = 5.08 79.67 % σ = 4.85 86.38 % 84.60 % 58 – 91 77.3
+Softmax 88.19 % σ = 0.31 84.70 % σ = 0.15 88.69 % 85.43 % 124 – 171 145.8
+Tanh 88.41 % σ = 0.36 84.46 % σ = 0.27 89.24 % 85.45 % 89 – 123 108.7
+Softsign 88.00 % σ = 0.47 84.46 % σ = 0.23 88.77 % 85.33 % 77 – 119 104.1
+ReLU 88.93 % σ = 0.46 85.35 % σ = 0.21 89.35 % 85.95 % 96 – 132 102.8
+Softplus 88.42 % σ = 0.29 85.16 % σ = 0.15 88.90 % 85.73 % 108 – 143 121.0
+LReLU 88.61 % σ = 0.41 85.21 % σ = 0.05 89.07 % 85.83 % 87 – 117 104.5
+PReLU 89.62 % σ = 0.41 85.35 % σ = 0.17 90.10 % 86.01 % 85 – 111 100.5
+ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % 73 – 113 92.4
+Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on
+HASYv2. For LReLU, α = 0.3 was chosen.
+
+Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but
+with layer 5 reduced to 3 filters.
+Function
+Single model Ensemble of 10 Epochs
+Training set Test set Train Test Range Mean
+Identity 87.49 % σ = 2.50 69.86 % σ = 1.41 89.78 % 71.90 % 51 – 65 53.4
+Logistic 45.32 % σ = 14.88 40.85 % σ = 12.56 51.06 % 45.49 % 38 – 93 74.6
+Softmax 87.90 % σ = 3.58 67.91 % σ = 2.32 91.51 % 70.96 % 108 – 150 127.5
+Tanh 85.38 % σ = 4.04 67.65 % σ = 2.01 90.47 % 71.29 % 48 – 92 65.2
+Softsign 88.57 % σ = 4.00 69.32 % σ = 1.68 93.04 % 72.40 % 55 – 117 83.2
+ReLU 94.35 % σ = 3.38 71.01 % σ = 1.63 98.20 % 74.85 % 52 – 98 75.5
+Softplus 83.03 % σ = 2.07 68.28 % σ = 1.74 93.04 % 75.99 % 56 – 89 68.9
+LReLU 93.83 % σ = 3.89 74.66 % σ = 2.11 97.56 % 78.08 % 52 – 120 80.1
+PReLU 95.53 % σ = 1.92 71.69 % σ = 1.37 98.17 % 74.69 % 59 – 101 78.8
+ELU 95.42 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % 66 – 72 67.2
+Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on
+STL-10. For LReLU, α = 0.3 was chosen.
+
+B. Hyperparameters
+Hyperparameters are parameters of models which are not optimized automatically (e.g., by
+gradient descent), but by methods like random search [BB12], grid search [LBOM98] or
+manual search.
+B.1. Preprocessing
+Preprocessing used to be of major importance in machine learning. However, with the
+availability of data sets with hundreds of examples per class and the possibility of CNNs to
+learn features themselves, most models today rely on raw pixel values. The only common
+preprocessing is size normalization. In order to get a fixed input-size for a CNN, the
+following procedure can be used:
+• Take one or multiple crops of the image which have the desired aspect ratio.
+• Scale the crop(s) to the desired size.
+• In training, all crops can be used independently. In testing, all crops can be passed
+through the network and the output probability distributions can get fusioned, for
+example by averaging.
+Other preprocessing methods are:
+• Color space transformations (RGB, HSV, etc.)
+• Mean subtraction
+• Standardization of pixel-values to [0, 1] by dividing through 255 (used by [HLW16])
+• Dimensionality reduction
+– Principal component analysis (PCA): An unsupervised linear transformation
+which can be learned in the first hidden layer. It is hence doubtful if PCA
+improves the network.
+– Linear discriminant analysis (LDA)
+• Zero Components Analysis (ZCA) whitening (used by [KH09])
+
+B.2. Data augmentation
+Data augmentation techniques aim at making artificially more data from real data items by
+applying invariances. For computer vision, they include:
+Name Augmentation Factor Used by
+Horizontal flip 2 [KSH12, WYS+15]
+Vertical flip 2 [DWD15]1
+Rotation ∼ 40 (δ = 20) [DSRB14]
+Scaling ∼ 14 (δ ∈ [0.7, 1.4]) [DSRB14]
+Crops 322 = 1024 [KSH12, WYS+15]
+Shearing [Gra15]
+GANs [BCW+17]
+Brightness ∼ 20 (δ ∈ [0.5, 1.5]) [How13]
+Hue 51 (δ = 0.1) [MRM15, DSRB14]
+Saturation ∼ 20 (δ = 0.5) [DSRB14]
+Contrast ∼ 20 (δ ∈ [0.5, 1.5]) [How13]
+Channel shift [KSH12]
+Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for
+typical situations. For example, the augmentation factor for random crops is calculated
+for 256 px × 256 px images which are cropped to 224 px × 224 px.
+Taking several scales if the original is of higher resolution than desired is another technique.
+Combinations of the techniques above can also be applied. Please note that the order of
+operations does matter in many cases and hence the order is another augmentation factor.
+Less common, but also reasonable are:
+• Adding noise
+• Elastic deformations
+• Color casting (used by [WYS+15])
+• Vignetting (used by [WYS+15])
+• Lens distortion (used by [WYS+15])
+1Vertical flipping combined with 180◦
+rotation is equivalent to horizontal flipping
+
+B.3. Initialization
+Weight initializations are usually chosen to be small and centered around zero. One way to
+characterize many initialization schemes is by
+w ∼ α · U[−1, 1] + β · N (0, 1) + γ with α, β, γ ≥ 0
+Table B.2 shows six commonly used weight initialization schemes. Several schemes use the
+same idea, that unit-variance is desired for each layer as the training converges faster [IS15].
+Name α β γ Reference
+Constant α = 0 β = 0 γ ≥ 0 used by [ZF14]
+Xavier/Glorot uniform α =
+q 6
+nin+nout
+β = 0 γ = 0 [GB10]
+Xavier/Glorot normal α = 0 β =
+
+2
+(nin+nout)
+2
+γ = 0 [GB10]
+He α = 0 β =
+2
+nin
+γ = 0 [HZRS15b]
+Orthogonal — — γ = 0 [SMG13]
+LSUV — — γ = 0 [MM15]
+Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N (0, 1) + γ.
+nin, nout are the number of units in the previous layer and the next layer. Typically,
+biases are initialized with constant 0 and weights by one of the other schemes to prevent
+unit-coadaptation. However, dropout makes it possible to use constant initialization for
+all parameters.
+LSUV and Orthogonal initialization cannot be described with this simple pattern.
+B.4. Objective function
+For classification tasks, the cross-entropy
+ECE(W) = −
+X
+x∈X
+X
+K
+k=1
+[t
+x
+k
+log(o
+x
+k
+) + (1 − t
+x
+k
+) log(1 − o
+x
+k
+)]
+is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation,
+X is the set of training examples, K is the number of classes, t
+x
+k ∈ { 0, 1 } indicates if the
+training example x is of class k, o
+x
+k
+is the output of the classifier for the training example x
+and class k.
+However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added:
+• LASSO: `1 (e.g., used in [HPTD15])
+• Weight decay: `2 (e.g., λ = 0.0005 as in [MSM16])
+• Orthogonality regularization (|(WT· W − I)|, see [VTKP17])
+
+B.5. Optimization Techniques
+Most relevant optimization techniques for CNNs are based on SGD, which updates the
+weights according to the rule
+wji ← wji + ∆wji with ∆wji = −η
+∂Ex
+∂wji
+where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate.
+A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically
+mini-batch sizes are |B| ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes
+lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes
+lead to longer training times due to computational overhead and to more training steps due
+to gradient noise.
+wji ← wji + ∆wji with ∆wji = −η
+∂EB
+∂wji
+Nine variations which adjust the learning rate during training are:
+• Momentum:
+w
+(t+1)
+ji ← w
+(t)
+ji + ∆w
+(t+1)
+ji with ∆w
+(t+1)
+ji = −η
+∂EB
+∂wji
++ α∆w
+(t)
+ji
+with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16])
+• Adagrad [DHS11]
+• RProp and the mini-batch version RMSProp [TH12]
+• Adadelta [Zei12]
+• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)
+−c
+, where t ∈ N0 is the training step,
+a, c are constants.
+• Performance Scheduling [SHY+13]: Measure the error on the cross validation set and
+decrease the learning rate when the algorithms improvement is below a threshold.
+• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10− t
+k where t ∈ N0 is the
+training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps
+until the learning rate is decreased by 1
+10 th.
+• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential
+Decay Scheduling.
+• Adam and AdaMax [KB14]
+
+• Nadam [Doz15]
+Some of those are explained in [Rud16].
+Other first-order gradient optimization methods are:
+• Quickprop [Fah88]
+• Nesterov Accellerated Momentum (NAG) [Nes83]
+• Conjugate Gradient method [Cha92]: Combines a line search for the step size with
+the gradients direction.
+Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS
+and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs.
+However, there are alternatives which do not use gradient information:
+• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02]
+• Simulated Annealing [vLA87]
+• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described
+on [Tho14b]
+There are also approaches which learn the optimization algorithm [ADG+16, LM16].
+
+B.6. Network Design
+CNNs have the following hyperparameters:
+• Depth: The number of layers
+• Width: The number of filters per layer
+• Layer and block connectivity graph
+• Layer and block hyperparameters:
+– Activation Functions as shown in Table B.3
+– For more, see Sections 2.2 and 2.3.
+Name Function ϕ(x) Range of Values ϕ
+0
+(x) Used by
+Sign function†
+
+
+
++1 if x ≥ 0
+−1 if x < 0
+{ −1, 1 } 0 [KS02]
+Heaviside
+step function†
+
+
+
++1 if x > 0
+0 if x < 0
+{ 0, 1 } 0 [MP43]
+Logistic function 1
+1+e−x [0, 1] e
+x
+(e
+x+1)2 [DJ99]
+Tanh e
+x−e−x
+e
+x+e−x = tanh(x) [−1, 1] sech2
+(x) [LBBH98, Tho14a]
+ReLU† max(0, x) [0, +∞)
+
+
+
+1 if x > 0
+0 if x < 0
+[KSH12]
+LReLU†2
+(PReLU)
+ϕ(x) = max(αx, x) (−∞, +∞)
+
+
+
+1 if x > 0
+α if x < 0
+[MHN13, HZRS15b]
+Softplus log(e
+x + 1) (0, +∞)
+e
+x
+e
+x+1 [DBB+01, GBB11]
+ELU
+
+
+
+x if x > 0
+α(e
+x − 1) if x ≤ 0
+(−∞, +∞)
+
+
+
+1 if x > 0
+αex otherwise
+[CUH15]
+Softmax‡ o(x)j =
+e
+xj
+PK
+k=1 e
+xk
+[0, 1]K o(x)j ·
+PK
+k=1 e
+xk −e
+xj
+PK
+k=1 e
+xk
+[KSH12, Tho14a]
+Maxout‡ o(x) = maxx∈x x (−∞, +∞)
+
+
+
+1 if xi = max x
+0 otherwise
+[GWFM+13]
+Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0
+and functions marked with ‡ operate on all elements of a layer simultaneously. The
+hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other
+activation function like randomized leaky ReLUs exist [XWCL15], but are far less
+commonly used.
+Some functions are smoothed versions of others, like the logistic function for the
+Heaviside step function, tanh for the sign function, softplus for ReLU.
+Softmax is the standard activation function for the last layer of a classification network
+as it produces a probability distribution. See Figure B.1 for a plot of some of them.
+2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.
+
+−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0
+−1.0
+−0.5
+0.5
+1.0
+1.5
+2.0
+x
+y
+ϕ1(x) = 1
+1+e−x
+ϕ2(x) = tanh(x)
+ϕ3(x) = max(0, x)
+ϕ4(x) = log(e
+x + 1)
+ϕ5(x) = max(x, ex − 1)
+Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative
+numbers. The image of ELU, ReLU and Softplus is not bound on the positive side,
+whereas tanh and the logistic function are always below 1.
+B.7. Regularization
+Regularization techniques aim to make the fitted function smoother and reduce overfitting.
+Regularization techniques are:
+• `1, `2, and Orthogonality regularization: See Appendix B.4
+• Max-norm regularization (e.g. used ins [SHK+14])
+• Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth
+(see [HSL+16])
+• Feature scale clipping (see [ZF14])
+• Data augmentation (according to [ZBH+16])
+• Global average pooling (according to [ZKL+15])
+• Dense-Sparse-Dense training (see [HPN+16])
+• Soft targets (see [HVD15])
+
+
+C. Calculating Network Characteristics
+C.1. Parameter Numbers
+• A fully connected layer with n nodes, k inputs has n · (k + 1) parameters. The +1 is
+due to the bias.
+• A convolutional layer i with ki filters of size n × m being applied to ki−1 feature maps
+has ki· ki−1(n · m + 1) parameters. The +1 is due to the bias.
+• A fully connected layer with n nodes after k feature maps of size m1 × m2 has
+n · (k · m1 · m2 + 1) parameters.
+• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 3
+2 +
+3
+2
+· n
+2 PL
+i=0(L − i) = L + 9n + 9n
+2 L2−L
+2
+parameters.
+According to [HPTD15], AlexNet has 60 million parameters which is roughly the number
+calculated in Table D.2.
+C.2. FLOPs
+The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence
+the following number are only giving rough estimates.
+In the following, nϕ denotes the number of FLOPs to compute the non-linearity ϕ. For
+simplicity, nϕ = 5 was chosen.
+• A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with
+W ∈ R
+n×k
+, x ∈ R
+k×1
+, b ∈ R
+n×1
+. It hence needs about n · (k + (k − 1) + 1) = 2nk
+additions / multiplications before the non-linearity ϕ is calculated. The total number
+of FLOPs is 2 · n · k + n · nϕ.
+• In the following, biases are ignored. A convolutional layer with ki filters of size n × m
+being applied to ki−1 filter maps of size w × h results in ki filter maps of size w × h if
+padding is applied. For each element of each filter map, n·m·ki−1 multiplications and
+(n · m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1)·(ki· w · h)
+operations. The total number of FLOPs is (2 ·n·m·ki−1 −1)·(ki·w ·h)+ki·w ·h·nϕ.
+This is, of course, a naive way of calculating a convolution. There are other ways of
+calculating convolutions [LG16].
+
+• A fully connected layer with n nodes after k feature maps of size w×h needs 2n(k·w·h)
+FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ.
+• As Dropout is only calculated during training, the number of FLOPs was set to 0.
+• The number of FLOPs for max pooling is dominated by the number of positions to
+which the pooling kernel is applied. For a feature map of size w × h a max pooling
+filter with stride s gets applied w·h
+s
+2 . The number of FLOPs per application depends
+on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs.
+• The number of FLOPs for Batch Normalization is the same as the number of its
+parameters.
+Here are some references which give information for the FLOPs:
+• AlexNet
+– 1.5B in total [HPTD15].
+– 725M in total [KPY+15].
+– 3300M in total in Table D.2
+• VGG-16:
+– 15484M in total [HPTD15].
+– 31000M in total in Table D.3.
+• GoogleNet: 1566M in total [HPTD15].
+One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same
+network.
+C.3. Memory Footprint
+The memory footprint of CNNs determines when networks can be used at all and if they
+can be trained efficiently. In order to be able to train CNNs efficiently, one weight update
+step has to fit in the memory of the GPU. This includes the following:
+• Activations: All activations of one mini-batch in order to calculate the gradients
+in the backward pass. This is the number of floats in the feature maps of all weight
+layers combined.
+• Weights
+• Optimization algorithm: The optimization algorithm introduces some overhead.
+For example, Adam stores two parameters per weights.
+At inference time, every two consecutive layers have to fit into memory. When the forward
+pass of layer A to layer B is calculated, the memory can be freed if no skip connections are
+used.
+
+D. Common Architectures
+In the following, some of the most important CNN architectures are explained. Understanding the development of these architectures helps understanding critical insights the machine
+learning community got in the past years for convolutional networks for image recognition.
+It starts with LeNet-5 from 1998, continues with AlexNet from 2012, VGG-16 D from
+2014, the Inception modules v1 to v3 as well as ResNets in 2015. The recently developed
+Inception-v4 is also covered.
+The summation row gives the sum of all floats for the output size column. This allows
+conclusions about the maximum mini-batch size which can be in memory for training.
+
+D.1. LeNet-5
+One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common
+pattern of a single convolutional layer with tanh as a non-linear activation function followed
+by a pooling layer and three fully connected layers. One fully connected layer is used to
+get the right output dimension, another one is necessary to allow the network to learn a
+non-linear combination of the features of the feature maps.
+Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test
+error rate of 0.8 % on MNIST.
+Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98].
+# Type Filters @
+Patch size / stride
+Parameters FLOPs Output size
+Input 0 0 1 @ 32 × 32
+1 Convolution 6 @ 5 × 5 × 1 / 1 156 307 800 6 @ 28 × 28
+2 Scaled average pooling 2 × 2 / 2 2 336 6 @ 14 × 14
+3 Convolution 16 @ 5 × 5 × 6 / 1 2 416 942 400 16 @ 10 × 10
+4 Scaled average pooling 2 × 2 / 2 2 1 600 16 @ 5 × 5
+5 Fully Connected 120 neurons 48 120 240 000 120
+6 Fully Connected 84 neurons 10 164 20 580 84
+7 Fully Connected (output) 10 neurons 850 1 730 10
+P 61 710 15 144 446 9118
+Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied.
+After layer 7, the softmax function is applied. One can see that convolutional layer
+need much fewer parameters, but an order of magnitude more FLOPs per parameter
+than fully connected layers.
+
+D.2. AlexNet
+The first CNN which achieved major improvements on the ImageNet dataset was AlexNet [KSH12].
+Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 parameters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet.
+Note that the uncompressed size is at least 60 965 224 floats · 32 bit
+float ≈ 244 MB.
+Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed
+by pooling layers multiple times. At the end, a fully connected network is applied.
+Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1).
+# Type Filters @
+Patch size / stride
+Parameters FLOPs Output size
+Input 3 @ 224 × 224
+1 Convolution 96 @ 11 × 11 × 3 / 4 34 944 211 M 96 @ 55 × 55
+LCN 12 M 96 @ 55 × 55
+2 Max pooling 3 × 3 / 2 0 301 k 96 @ 27 × 27
+3 Convolution 256 @ 5 × 5 × 48 / 1 307 456 448M 256 @ 13 × 13
+LCN 3 M 256 @ 13 × 13
+4 Max pooling 3 × 3 / 2 0 50 k 256 @ 13 × 13
+5 Convolution 384 @ 3 × 3 × 256 / 1 885 120 299 M 384 @ 13 × 13
+7 Convolution 384 @ 3 × 3 × 192 / 1 663 936 224 M 384 @ 13 × 13
+9 Convolution 256 @ 3 × 3 × 192 / 1 442 624 150 M 256 @ 13 × 13
+10 Max pooling 3 × 3 / 2 0 50 k 256 @ 6 × 6
+11 FC 4096 neurons 37 752 832 75 M 4096
+12 FC 4096 neurons 16 781 312 34 M 4096
+13 FC 1000 neurons 4 097 000 8 M 1000
+P 60 965 224 3300 M 1 122 568
+Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to
+computational restrictions at the time of its development. This also reduces the number
+of parameters and allows parallel computation on separate GPUs. However, to make
+the architecture easier to compare, this grouping was ignored for the parameter count.
+The FLOPs are taken from [HPTD15] and combined with rough estimates for Local
+Contrast Normalization and max pooling.
+The calculated number of parameters was checked against the downloaded version. It
+also has 60 965 224 parameters.
+
+D.3. VGG-16 D
+Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual
+Geometry Group in Oxford which developed this architecture. It has 16 layers which can
+learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3 × 3
+filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a
+detailed textual description is given in Table D.3.
+A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/
+tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit
+float ≈
+520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and
+514 MB with compression.
+224 × 224
+Input
+C 64@3 × 3/1
+C 64@3 × 3/1
+112 × 112
+max pooling 2 × 2/1
+C 128@3 × 3/1
+C 128@3 × 3/1
+56 × 56
+max pooling 2 × 2/1
+C 256@3 × 3/1
+C 256@3 × 3/1
+C 256@3 × 3/1
+28 × 28
+max pooling 2 × 2/1
+C 512@3 × 3/1
+C 512@3 × 3/1
+C 512@3 × 3/1
+14 × 14
+max pooling 2 × 2/1
+C 512@3 × 3/1
+C 512@3 × 3/1
+C 512@3 × 3/1
+7 × 7
+max pooling 2 × 2/1
+Fully Connected 4096
+Dropout, p = 0.5
+Fully Connected 4096
+Dropout, p = 0.5
+Fully Connected 1000
+Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of
+kernel size 3 × 3 with stride 1. All convolutional layers use SAME padding.
+
+# Type Filters @
+Patch size / stride
+Parameters FLOPs Output size
+Input 3 @ 224 × 224
+1 Convolution 64 @ 3 × 3 × 3 / 1 1 792 186 M 64 @ 224 × 224
+2 Convolution 64 @ 3 × 3 × 64 / 1 36 928 3712M 64 @ 224 × 224
+Max pooling 2 × 2 / 2 0 2 M 64 @ 112 × 112
+3 Convolution 128 @ 3 × 3 × 64 / 1 73 856 1856 M 128 @ 112 × 112
+4 Convolution 128 @ 3 × 3 × 128 / 1 147 584 3705 M 128 @ 112 × 112
+Max pooling 2 × 2 / 2 0 1 M 128 @ 56 × 56
+5 Convolution 256 @ 3 × 3 × 128 / 1 295 168 1853 M 256 @ 56 × 56
+6 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56
+7 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56
+Max pooling 2 × 2 / 2 0 <1 M 256 @ 28 × 28
+8 Convolution 512 @ 3 × 3 × 256 / 1 1 180 160 1851 M 512 @ 28 × 28
+9 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28
+10 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28
+Max pooling 2 × 2 / 2 0 <1 M 512 @ 14 × 14
+11 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
+12 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
+13 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
+Max pooling 2 × 2 / 2 0 <1 M 512 @ 7 × 7
+14 FC 4096 neurons 102 764 544 206 M 4096
+Dropout 0 0 4096
+15 FC 4096 neurons 16 781 312 34 M 4096
+Dropout 0 0 4096
+16 FC 1000 neurons 4 097 000 8 M 1000
+P 138 357 544 31 000 M 15 245 800
+Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have
+learnable parameters. All convolutions are zero padded to prevent size changes and
+use ReLU activation functions. The channels mean is subtracted from each pixel as
+a preprocessing step (−103.939, −116.779, −123.68). As Dropout is only calculated
+during training time, the number of FLOPs is 0. The dropout probability is 0.5.
+The calculated number of parameters was checked against the downloaded version. It
+also has 138 357 544 parameters.
+
+D.4. GoogleNet, Inception v2 and v3
+The large number of parameters and operations is a problem when such models should get
+applied in practice to thousands of images. In order to reduce the computational cost while
+maintaining the classification quality, GoogleNet [SLJ+15] and the Inception module were
+developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and
+5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of
+parameters. It is shown in Figure D.4.
+Figure D.4.: Inception module
+Image source: [SLJ+15]
+Compared to GoogleNet, Inception v2 [SVI+15] removed the 5 × 5 filters and replaced
+them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module
+is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to
+approximate symmetric filters with fewer parameters. The authors call this approach filter
+factorization.
+Inception v3 introduced Batch Normalization to the network [SVI+15].
+Figure D.5.: Inception v2 module
+Image source: [SVI+15]
+
+D.5. Inception-v4
+Inception-v4 as described in [SIV16] consists of four main building blocks: The stem,
+Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper,
+wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A
+and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use
+average pooling. The stem, module B and module C use separable convolutions.
+# × Type Parameters Output size
+Input 3 @ 299 × 299
+1 Stem 605 728 384 @ 35 × 35
+2 4× Inception A 317 632 384 @ 35 × 35
+3 Reduction A 2 306 112 1024 @ 17 × 17
+4 7× Inception B 2 936 256 1024 @ 17 × 17
+5 Reduction B 2 747 392 1536 @ 8 × 8
+6 3× Inception C 4 553 088 1536 @ 8 × 8
+Global Average Pooling 0 1536 @ 1 × 1
+Dropout (p=0.8) 0 1536 @ 1 × 1
+7 Softmax 1 537 000 1000
+P 42 679 816
+Table D.4.: Inception-v4 network.
+
+
+E. Datasets
+Well-known benchmark datasets for classification problems in computer vision are listed
+in Table E.1. The best results known to me are given in Table E.2. However, every semantic
+segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers
+using Algorithm 2.
+Database
+Image Resolution
+(width × height)
+Number
+of
+Images
+Number
+of
+Classes
+Channels Data source
+MNIST 28 px × 28 px 70 000 10 1 [YL98, LBBH98]
+HASYv2 32 px × 32 px 168 233 369 1 [Tho17a]
+SVHN 32 px × 32 px 630 420 10 3
+[NWC+11b],
+[NWC+11a]
+CIFAR-10 32 px × 32 px 60 000 10 3 [Kri, KH09]
+CIFAR-100 32 px × 32 px 60 000 100 3 [Kri, KH09]
+STL-10 96 px × 96 px 13 000 10 3 [CLN11, CLN10]
+Caltech-101 (80 px − 3481 px)
+×(92 px − 3999 px) 9144 102 3 [FFP03, FFFP06]
+Caltech-256 (75 px − 7913 px)
+×(75 px − 7913 px) 30 607 257 3 [Gri06, GG07]
+ILSVRC 20121
+(8 px − 9331 px)
+×(10 px − 6530 px) 1.2 · 106 1000 3 [Ima12, RDS+14]
+Places3652
+(290px − 3158px)
+×(225px − 2630px)
+1.8 · 106 365 3 [Zho16, ZKL+16]
+GTSRB (25 px − 266 px)
+×(25 px − 232 px) 51 839 43 3 [SSSI, SSSI12]
+Asirra3
+(4 px − 500 px)
+×(4 px − 500 px) 25 000 2 3 [Asi17, EDHS07]
+Graz-02 480 px × 640 px
+and 640 px × 480 px 1096 3 3 [Mar08, MS07]
+Table E.1.: An overview over publicly available image databases for classification. The number
+of images row gives the sum of the training and the test images. Some datasets, like
+SVHN, have additional unlabeled data which is not given in this table.
+1
+ImageNet Large Scale Visual Recognition Competition
+2The dimensions are only calculated for the validation set.
+3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle
+
+Dataset Model type / name Result Score Achieved /
+Claimed by
+MNIST — 0.21 % error [WZZ+13]
+HASYv2 TF-CNN 81.00 % accuracy [Tho17a]
+SVHN DenseNet (k = 24) 1.59 % error [HLW16]
+CIFAR-10 DenseNet-BC (k = 40) 3.46 % error [HLW16]
+CIFAR-100 WRN-28-10 16.21 % error [LH16]
+STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15]
+Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14]
+Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14]
+ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a]
+GTSRB MCDNN 99.46 % accuracy [SL11]
+Asirra SVM 82.7 % accuracy [Gol08]
+Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10]
+Table E.2.: An overview over state of the art results achieved in computer vision datasets.
+Algorithm 2 Create a classification dataset from a semantic segmentation dataset
+Require: Semantic segmentation dataset (DS)
+procedure CreateDataset(Annotated dataset DS)
+DC ← List
+w ← desired image width
+h ← desired image height
+for Image and associated label (x, y) in DS do
+i ← randint(0, L.width − w)
+j ← randint(0, L.height − h)
+cL ← crop(y,(i, j),(i + w, j + h))
+if at least 50% of s are of one class then
+cI ← crop(x,(i, j),(i + w, j + h))
+D.append((cI , cL))
+return (DC)
+
+F. List of Tables
+2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
+5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
+5.2 Baseline model evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40
+5.3 Baseline model speed comparison . . . . . . . . . . . . . . . . . . . . . . . . 40
+5.4 Clustering errors for spectral clustering and CMO on CIFAR-100 . . . . . . 52
+5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52
+5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53
+5.7 Parameters of models with increased capacity . . . . . . . . . . . . . . . . . 54
+5.8 Training time for models with increased capacity . . . . . . . . . . . . . . . 54
+5.9 Baseline model training time . . . . . . . . . . . . . . . . . . . . . . . . . . 59
+5.10 Activation function properties . . . . . . . . . . . . . . . . . . . . . . . . . . 62
+5.11 Activation function evaluation results on CIFAR-100 . . . . . . . . . . . . . 63
+5.12 Activation function timing results on CIFAR-100 . . . . . . . . . . . . . . . 63
+5.13 Activation function evaluation results on MNIST . . . . . . . . . . . . . . . 64
+5.14 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66
+5.15 Optimized model evaluation results . . . . . . . . . . . . . . . . . . . . . . . 67
+5.16 Optimized model speed comparison . . . . . . . . . . . . . . . . . . . . . . . 67
+5.17 Optimized model mean training epochs . . . . . . . . . . . . . . . . . . . . . 68
+5.18 Optimized model trained with early stopping vs training with more data . . 69
+5.19 Model regularization with early stopping on training loss . . . . . . . . . . . 69
+5.20 Model regularization with early stopping on training loss - Training time . . 69
+A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75
+A.2 Activation function evaluation results on HASYv2 . . . . . . . . . . . . . . . 77
+A.3 Activation function evaluation results on STL-10 . . . . . . . . . . . . . . . 78
+B.1 Data augmentation techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 80
+B.2 Weight initialization schemes . . . . . . . . . . . . . . . . . . . . . . . . . . 81
+B.3 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84
+D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
+D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 93
+D.4 Inception-v4 network . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95
+
+E.1 Image Benchmark datasets . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97
+E.2 State of the Art results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 98
+
+G. List of Figures
+2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3
+2.2 Application of a convolutional layer . . . . . . . . . . . . . . . . . . . . . . . 6
+2.3 Max pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
+2.4 ResNet module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+2.5 Aggregation block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12
+2.6 Dense block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
+2.7 Validation curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
+2.8 Validation curve with plateaus . . . . . . . . . . . . . . . . . . . . . . . . . 18
+2.9 Learning curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
+2.10 Occlusion analysis . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25
+2.11 Filter visualization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26
+3.1 Cascade-correlation network . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
+4.1 Class Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33
+5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
+5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42
+5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42
+5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44
+5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45
+5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46
+5.9 Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 47
+5.10 Baseline Weight updates (sum) . . . . . . . . . . . . . . . . . . . . . . . . . 47
+5.11 Confusion matrices for CIFAR-10 . . . . . . . . . . . . . . . . . . . . . . . . 48
+5.12 Confusion matrices for GTSRB . . . . . . . . . . . . . . . . . . . . . . . . . 49
+5.13 Confusion matrices for HASYv2 . . . . . . . . . . . . . . . . . . . . . . . . . 50
+5.14 Confusion matrix of CIFAR-100 . . . . . . . . . . . . . . . . . . . . . . . . . 51
+5.15 Mean weight updates of model with bottleneck . . . . . . . . . . . . . . . . 55
+5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67
+A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75
+A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76
+
+A.3 Maximum weight updates of baseline with bottleneck . . . . . . . . . . . . . 77
+A.4 Sum of weight updates of baseline with bottleneck . . . . . . . . . . . . . . 78
+B.1 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85
+D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
+D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92
+D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94
+D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94
+
+H. Bibliography
+[AAB+16] M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on
+heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467, Mar.
+2016. [Online]. Available: https://arxiv.org/abs/1603.04467
+[ABKS99] M. Ankerst, M. M. Breunig et al., “OPTICS: Ordering points to identify the
+clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp.
+49–60.
+[ADG+16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by
+gradient descent,” in Advances in Neural Information Processing Systems 29
+(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar.
+2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461learning-to-learn-by-gradient-descent-by-gradient-descent.pdf
+
+[AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism:
+Going deeper into neural networks,” Jun. 2015. [Online]. Available: https://research.googleblog.com/2015/06/inceptionism-going-deeperinto-neural.html
+[Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https:
+//www.microsoft.com/en-us/download/details.aspx?id=54765
+[BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,”
+Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305,
+Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/
+bergstra12a/bergstra12a.pdf
+[BCW+17] J. Bao, D. Chen et al., “CVAE-GAN: Fine-grained image generation through
+asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online].
+Available: https://arxiv.org/abs/1703.10155
+[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better image features,” Département d’Informatique et de Recherche Opérationnelle,
+Université de Montréal, Tech. Rep. 1337, 2009.
+[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using
+reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.02167
+
+[BM93] U. Bodenhausen and S. Manke, Automatically Structured Neural
+Networks For Handwritten Character And Word Recognition. London:
+Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http:
+//dx.doi.org/10.1007/978-1-4471-2063-6_283
+[BMDP10] R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest
+neighbor,” in European Conference on Computer Vision (ECCV). Springer,
+2010, pp. 171–184.
+[BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of
+feature pooling in visual recognition,” in International Conference on
+Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available:
+http://yann.lecun.com/exdb/publis/pdf/boureau-icml-10.pdf
+[BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies
+with gradient descent is difficult,” IEEE transactions on neural networks,
+vol. 5, no. 2, pp. 157–166, 1994.
+[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training
+of artificial neural networks,” IEEE Proceedings G-Circuits, Devices
+and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available:
+http://ieeexplore.ieee.org/document/143326/
+[Cho15] F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015.
+[CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks
+in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109,
+p. 2, 2010. [Online]. Available: http://cs.stanford.edu/~acoates/papers/
+coatesleeng_aistats_2011.pdf
+[CLN11] A. Coates, H. Lee, and A. Y. Ng, “STL-10 dataset,” 2011. [Online]. Available:
+http://cs.stanford.edu/~acoates/stl10
+[CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural
+networks for image classification,” in Conference on Computer Vision and
+Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online].
+Available: https://arxiv.org/abs/1202.2745v1
+[CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate
+deep network learning by exponential linear units (ELUs),” arXiv
+preprint arXiv:1511.07289, Nov. 2015. [Online]. Available: https:
+//arxiv.org/abs/1511.07289
+[CWV+14] S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep
+learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available:
+https://arxiv.org/abs/1410.0759
+
+[DBB+01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional
+knowledge for better option pricing,” in Advances in Neural Information Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich,
+and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online].
+Available: http://papers.nips.cc/paper/1920-incorporating-second-orderfunctional-knowledge-for-better-option-pricing.pdf
+[DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry
+in convolutional neural networks,” arXiv preprint arXiv:1602.02660, Feb.
+2016. [Online]. Available: https://arxiv.org/abs/1602.02660
+[DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for
+online learning and stochastic optimization,” Journal of Machine Learning
+Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available:
+http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
+[DHS16] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via
+multi-task network cascades,” in Conference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available:
+https://arxiv.org/abs/1512.04412
+[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural
+Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available:
+ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf
+[Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford
+University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/
+proj2015/054_report.pdf
+[DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised
+feature learning with convolutional neural networks,” in Advances in Neural
+Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling
+et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online].
+Available: http://papers.nips.cc/paper/5548-discriminative-unsupervisedfeature-learning-with-convolutional-neural-networks.pdf
+[DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional
+neural networks for galaxy morphology prediction,” Monthly notices of the
+royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015.
+[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that
+exploits interest-aligned manual image categorization,” in ACM Conference on Computer and Communications Security (CCS), no. 14.
+Association for Computing Machinery, Inc., Oct. 2007. [Online].
+
+Available: https://www.microsoft.com/en-us/research/publication/asirra-acaptcha-that-exploits-interest-aligned-manual-image-categorization/
+[EKS+96] M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering
+clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996,
+pp. 226–231.
+[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing.
+Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3662-44874-8
+
+[Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation
+networks,” 1988. [Online]. Available: http://repository.cmu.edu/cgi/
+viewcontent.cgi?article=2799&context=compsci
+[FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of object
+categories,” IEEE transactions on pattern analysis and machine intelligence,
+vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http:
+//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf
+[FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http:
+//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html
+[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Object detection with discriminatively trained part-based models,” IEEE transactions on pattern analysis and
+machine intelligence, vol. 32, no. 9, pp. 1627–1645, 2010.
+[FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,”
+1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/
+[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep
+feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online].
+Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
+[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural
+networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available:
+http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf
+[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate object
+detection and semantic segmentation,” in Conference on Computer Vision
+and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online].
+Available: https://arxiv.org/abs/1311.2524
+[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 object category dataset,” Apr.
+2007. [Online]. Available: http://authors.library.caltech.edu/7694/
+
+[GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with
+Bernoulli approximate variational inference,” arXiv preprint arXiv:1506.02158,
+Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6
+[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman
+New York, 2002, vol. 29.
+[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete
+graph problems,” Theoretical computer science, vol. 1, no. 3, pp. 237–267,
+1976.
+[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM
+conference on Computer and communications security (CCS), no. 15. ACM,
+2008, pp. 535–542.
+[Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May
+2015. [Online]. Available: https://arxiv.org/abs/1412.6071
+[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available:
+http://www.vision.caltech.edu/Image_Datasets/Caltech256/
+[GWFM+13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML,
+vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http:
+//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
+[HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for
+transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online].
+Available: https://arxiv.org/abs/1608.08614
+[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online].
+Available: http://papers.nips.cc/paper/227-meiosis-networks.pdf
+[Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available:
+https://devblogs.nvidia.com/parallelforall/new-features-cuda-7-5/
+[HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional
+networks,” arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available:
+https://arxiv.org/abs/1608.06993v1
+[HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv
+preprint arXiv:1611.04231, Nov. 2016. [Online]. Available: https:
+//arxiv.org/abs/1611.04231
+[How13] A. G. Howard, “Some improvements on deep convolutional neural network
+based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013.
+[Online]. Available: https://arxiv.org/abs/1312.5402
+
+[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques.
+Elsevier, 2011.
+[HPN+16] S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with
+dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016.
+[Online]. Available: https://arxiv.org/abs/1607.04381
+[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient
+neural network,” in Advances in Neural Information Processing Systems 28
+(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun.
+2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784learning-both-weights-and-connections-for-efficient-neural-network.pdf
+
+[HSK+12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing
+co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580, Jul.
+2012. [Online]. Available: https://arxiv.org/abs/1207.0580
+[HSL+16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,”
+arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https:
+//arxiv.org/abs/1603.09382
+[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon
+and general network pruning,” in International Conference on Neural
+Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http:
+//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf
+[HVD15] G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural
+network,” arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available:
+https://arxiv.org/abs/1503.02531
+[HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional
+networks for visual recognition,” in European Conference on Computer
+Vision (ECCV). Springer, 2014, pp. 346–361. [Online]. Available:
+https://arxiv.org/abs/1406.4729
+[HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,”
+arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https:
+//arxiv.org/abs/1512.03385v1
+[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level
+performance on imagenet classification,” in International Conference on
+Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available:
+https://arxiv.org/abs/1502.01852
+[Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),”
+
+2012. [Online]. Available: http://www.image-net.org/challenges/LSVRC/
+2012/nonpub-downloads
+[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network
+training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167,
+Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167
+[JXF+16] X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation
+units,” in Thirtieth AAAI Conference on Artificial Intelligence, Dec. 2016.
+[Online]. Available: https://arxiv.org/abs/1512.07030
+[Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr.
+2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manuallyclassifying-cifar10/
+[KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,”
+arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https:
+//arxiv.org/abs/1412.6980
+[KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny
+images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/
+learning-features-2009-TR.pdf
+[KMN+16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning:
+Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836,
+Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836
+[Koc15] T. Kocmánek, “HyperNEAT and novelty search for image recognition,” Ph.D.
+dissertation, Master’s thesis, Czech Technical University in Prague, 2015.
+[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf
+[KPY+15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks
+for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530,
+Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530
+[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to
+cluster analysis. John Wiley & Sons, 2009, vol. 344.
+[Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https:
+//www.cs.toronto.edu/~kriz/cifar.html
+[KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear
+and neural network approximation,” IEEE Transactions on Information
+Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available:
+http://ieeexplore.ieee.org/abstract/document/971754/
+
+[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification
+with deep convolutional neural networks,” in Advances in Neural
+Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges
+et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online].
+Available: http://papers.nips.cc/paper/4824-imagenet-classification-withdeep-convolutional-neural-networks.pdf
+[KSlB+10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature
+hierarchies for visual recognition,” in Advances in Neural Information
+Processing Systems 23 (NIPS), J. D. Lafferty, C. K. I. Williams
+et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online].
+Available: http://papers.nips.cc/paper/4133-learning-convolutional-featurehierarchies-for-visual-recognition.pdf
+[LAE+16] W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in
+European Conference on Computer Vision (ECCV). Springer, 2016, pp.
+21–37. [Online]. Available: https://arxiv.org/abs/1512.02325
+[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/
+en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer
+[LBBH98] Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document
+recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov.
+1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun01a.pdf
+[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature,
+vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available:
+http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html
+[LBOM98] Y. A. LeCun, L. Bottou et al., Efficient BackProp, ser. Lecture Notes in
+Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol.
+1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8
+[LDS+89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989,
+pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/
+lecun-90b.pdf
+[Le13] Q. V. Le, “Building high-level features using large scale unsupervised
+learning,” in International conference on acoustics, speech and signal
+processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http:
+//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343
+[LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in
+
+Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep.
+2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308
+[LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in
+convolutional neural networks: Mixed, gated, and tree,” in International
+Conference on Artificial Intelligence and Statistics, 2016. [Online]. Available:
+https://arxiv.org/abs/1509.08985v2
+[LH16] I. Loshchilov and F. Hutter, “SGDR: stochastic gradient descent
+with warm restarts,” Learning, Aug. 2016. [Online]. Available: https:
+//arxiv.org/abs/1608.03983
+[LJD+16] L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to
+hyperparameter optimization,” arXiv preprint arXiv:1603.06560, Mar. 2016.
+[Online]. Available: https://arxiv.org/abs/1603.06560
+[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885,
+Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885
+[LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for
+semantic segmentation,” in Conference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available:
+https://arxiv.org/abs/1411.4038v2
+[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513, Mar.
+2017. [Online]. Available: https://arxiv.org/abs/1703.01513
+[Maj17] S. Majumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available:
+https://github.com/titu1994/DenseNet
+[Mar08] M. Marszałek, “INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online].
+Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/
+[MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter
+optimization through reversible learning,” in International Conference on
+Machine Learning (ICML), 2015, pp. 2113–2122.
+[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of
+Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008.
+[MHN13] A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities
+improve neural network acoustic models,” in Proc. ICML, vol. 30,
+no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/
+relu_hybrid_icml2013_final.pdf
+[MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv
+
+preprint arXiv:1511.06422, Nov. 2015. [Online]. Available: https:
+//arxiv.org/abs/1511.06422
+[MP43] W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in
+nervous activity,” The bulletin of mathematical biophysics, vol. 5, no. 4, pp.
+115–133, 1943.
+[MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for
+reducing dataset bias in person re-identification,” in International Conference
+on Advanced Video and Signal Based Surveillance (AVSS), no. 12, Aug. 2015,
+pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/
+7301739/
+[MS07] M. Marszalek and C. Schmid, “Accurate object localization with
+shape masks,” in Conference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http:
+//ieeexplore.ieee.org/document/4270110/
+[MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN
+advances on the ImageNet,” arXiv preprint arXiv:1606.02228, Jun. 2016.
+[Online]. Available: https://arxiv.org/abs/1606.02228
+[MV16] A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural
+networks using natural pre-images,” International Journal of Computer Vision,
+pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017
+[NDRT13] N. Natarajan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances
+in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges,
+L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online].
+Available: http://papers.nips.cc/paper/5073-learning-with-noisy-labels.pdf
+[Nes83] Y. Nesterov, “A method of solving a convex programming problem with
+convergence rate o (1/k2),” in Soviet Mathematics Doklady, vol. 27, no. 2,
+1983, pp. 372–376.
+[new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available:
+http://www1.icsi.berkeley.edu/Speech/faq/nn-train.html
+[Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS
+Talk, Dec. 2016.
+[NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft
+weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992.
+[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf
+[NH02] R. T. Ng and J. Han, “CLARANS: A method for clustering objects for spatial
+
+data mining,” IEEE transactions on knowledge and data engineering, vol. 14,
+no. 5, pp. 1003–1016, 2002.
+[NWC+11a] Y. Netzer, T. Wang et al., “Reading digits in natural images with
+unsupervised feature learning,” in NIPS workshop on deep learning and
+unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available:
+http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf
+[NWC+11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,”
+2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/
+[NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization:
+Uncovering the different types of features learned by each neuron in deep
+neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online].
+Available: https://arxiv.org/abs/1602.03616
+[OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive
+classifiers for unbalanced classification problems: A study on the performance
+scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available:
+https://arxiv.org/abs/1608.08984
+[PMW+15] N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial
+perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508,
+Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508
+[Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer
+Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/
+10.1007/3-540-49430-8_3
+[RDS+14] O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition
+challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep.
+2014. [Online]. Available: https://arxiv.org/abs/1409.0575
+[RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks
+for biomedical image segmentation,” in International Conference on Medical
+Image Computing and Computer-Assisted Intervention. Springer, 2015, pp.
+234–241. [Online]. Available: https://arxiv.org/abs/1505.04597
+[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and density of neurons in the hyperneat substrate,” in Conference on Genetic and
+evolutionary computation, no. 12. ACM, 2010, pp. 563–570.
+[RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?":
+Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938,
+Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938
+
+[Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,”
+arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https:
+//arxiv.org/abs/1609.04747
+[SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks
+applied to house numbers digit classification,” in International Conference
+on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291.
+[Online]. Available: https://arxiv.org/abs/1204.3968
+[SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding
+for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185–
+212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/
+[SEZ+13] P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization
+and detection using convolutional networks,” arXiv preprint arXiv:1312.6229,
+Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4
+[SHK+14] N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to
+prevent neural networks from overfitting.” Journal of Machine Learning
+Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available:
+https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
+[SHY+13] A. Senior, G. Heigold et al., “An empirical study of learning rates in deep
+neural networks for speech recognition,” in International Conference on
+Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online].
+Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963
+[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the
+impact of residual connections on learning,” arXiv preprint arXiv:1602.07261,
+Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261
+[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding
+for face recognition and clustering,” in Conference on Computer Vision
+and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online].
+Available: https://arxiv.org/abs/1503.03832
+[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale
+convolutional networks,” in International Joint Conference on Neural
+Networks (IJCNN), Jul. 2011, pp. 2809–2813. [Online]. Available:
+http://ieeexplore.ieee.org/document/6033589/
+[SLJ+15] C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference
+on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp.
+1–9. [Online]. Available: https://arxiv.org/abs/1409.4842
+[SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through
+
+augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127,
+2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/
+106365602320169811
+[SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to
+the nonlinear dynamics of learning in deep linear neural networks,”
+arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https:
+//arxiv.org/abs/1312.6120
+[SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive
+networks,” arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available:
+https://arxiv.org/abs/1410.1165
+[SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition
+benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section=
+gtsrb&subsection=news
+[SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking
+machine learning algorithms for traffic sign recognition,” Neural Networks,
+no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/
+article/pii/S0893608012000457
+[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint
+arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606.02492
+[SVI+15] C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture
+for computer vision,” arXiv preprint arXiv:1512.00567, Dec. 2015. [Online].
+Available: https://arxiv.org/abs/1512.00567v3
+[SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional
+networks: Visualising image classification models and saliency maps,”
+arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https:
+//arxiv.org/abs/1312.6034
+[SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for
+large-scale image recognition,” arXiv preprint arXiv:1409.1556, Sep. 2014.
+[Online]. Available: https://arxiv.org/abs/1409.1556
+[SZS+13] C. Szegedy, W. Zaremba et al., “Intriguing properties of neural
+networks,” arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available:
+https://arxiv.org/abs/1312.6199v4
+[TF-16a] “MNIST for ML beginners,” Dec. 2016. [Online]. Available: https:
+//www.tensorflow.org/tutorials/mnist/beginners/
+
+[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/
+api_docs/python/nn/activation_functions_#dropout
+[TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient
+by a running average of its recent magnitude,” COURSERA: Neural
+Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available:
+http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
+[Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,”
+Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martinthoma.com/write-math
+[Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available:
+https://martin-thoma.com/twiddle/
+[Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint
+arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/
+1602.06541
+[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380, Jan.
+2017. [Online]. Available: https://arxiv.org/abs/1701.08380
+[Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available:
+https://martin-thoma.com/msthesis
+[VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep
+learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available:
+https://arxiv.org/abs/1312.5355
+[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing.
+Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available:
+http://dx.doi.org/10.1007/978-94-015-7744-1_2
+[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent
+networks with long term dependencies,” arXiv preprint arXiv:1702.00071,
+Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071
+[WHH+89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay
+neural networks,” IEEE transactions on acoustics, speech, and signal
+processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available:
+http://ieeexplore.ieee.org/document/21701/
+[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connectionist reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256,
+1992.
+
+[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling
+and Normalization Methods for Action Recognition. Berlin, Heidelberg:
+Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online].
+Available: http://dx.doi.org/10.1007/978-3-642-37431-9_44
+[WYS+15] R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv
+preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available:
+https://arxiv.org/abs/1501.02876v4
+[WZZ+13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,”
+in International Conference on Machine Learning (ICML), no. 30, 2013,
+pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/
+icml2013/icml2013.pdf
+[XGD+16] S. Xie, R. Girshick et al., “Aggregated residual transformations for deep
+neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.05431v1
+[Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged
+stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011.
+[Online]. Available: https://arxiv.org/abs/1107.2490
+[XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in
+convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online].
+Available: https://arxiv.org/abs/1505.00853
+[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on
+support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available:
+https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf
+[XZY+14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional neural network for large-scale image classification,” in International
+Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186.
+[YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten
+digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/
+[ZBH+16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking
+generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.03530
+[ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in
+Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee,
+M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090.
+[Online]. Available: http://papers.nips.cc/paper/6340-doubly-convolutionalneural-networks.pdf
+
+[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category
+detection,” in European Conference on Computer Vision (ECCV). Springer,
+Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867
+[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint
+arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv.org/abs/
+1212.5701v1
+[ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep
+convolutional neural networks,” arXiv preprint arXiv:1301.3557, Jan. 2013.
+[Online]. Available: https://arxiv.org/abs/1301.3557v1
+[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional
+networks,” in European Conference on Computer Vision (ECCV). Springer,
+Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901
+[Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http://
+places2.csail.mit.edu/download.html
+[ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv
+preprint arXiv:1605.07146, May 2016. [Online]. Available: https:
+//arxiv.org/abs/1605.07146
+[ZKL+15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative
+localization,” arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available:
+https://arxiv.org/abs/1512.04150
+[ZKL+16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene
+understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online].
+Available: https://arxiv.org/abs/1610.02055
+[ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement
+learning,” arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available:
+https://arxiv.org/abs/1611.01578
+[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,”
+arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https:
+//arxiv.org/abs/1506.02351v1
+[ZYL+15] H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus
+units,” in International Joint Conference on Neural Networks (IJCNN), Jul.
+2015, pp. 1–4.
+
+I. Glossary
+ANN artificial neural network. 4
+ASO Automatic Structure Optimization. 29
+CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71
+CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60,
+71, 72, 79, 82–84, 88–91
+ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84
+ES early stopping. 68
+FC Fully Connected. 91, 93
+FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93
+GA genetic algorithm. 30
+GAN Generative Adverserial Network. 80
+GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91
+HSV hue, saturation, value. 79
+LCN Local Contrast Normalization. 91
+LDA linear discriminant analysis. 79
+LReLU leaky rectified linear unit. 63, 72, 77, 78, 84
+MLP multilayer perceptron. 3–6, 28
+NAG Nesterov Accellerated Momentum. 83
+NEAT NeuroEvolution of Augmenting Topologies. 83
+OBD Optimal Brain Damage. 29
+
+PCA principal component analysis. 79
+PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84
+ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84
+SGD stochastic gradient descent. 5, 30, 45, 46, 82
+ZCA Zero Components Analysis. 79
diff --git a/read/results/pdfium/2201.00021.txt b/read/results/pdfium/2201.00021.txt
index 8ef5766..be0f404 100644
--- a/read/results/pdfium/2201.00021.txt
+++ b/read/results/pdfium/2201.00021.txt
@@ -1,808 +1,724 @@
-Astronomy & Astrophysics manuscript no. mainArxiv ©ESO 2022
-April 12, 2022
-Discovery of ammonia (9,6) masers in two high-mass star-forming
-regions
-Y. T. Yan (闫耀庭)
-1,?, C. Henkel1, 2, 3
-, K. M. Menten1
-, Y. Gong (龚龑)
-1
-, J. Ott4
-, T. L. Wilson1
-, A. Wootten4
-, A.
-Brunthaler1
-, J. S. Zhang (张江水)
-5
-, J. L. Chen (陈家梁)
-5
-, and K. Yang (杨楷)
-6, 7
-1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany
-e-mail: yyan@mpifr-bonn.mpg.de
-2 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia
-3 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China
-4 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA
-5 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China
-6 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China
-7 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s
-Republic of China
-Received 13 December 2021 / Accepted 30 December 2021
-ABSTRACT
-Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact
-regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements.
-Aims. Only a few NH3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH3 (9,6)
-masers to provide a better observational basis for studying their role in high-mass star-forming regions.
-Methods. We carried out NH3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam
-size 4900) and the Karl G. Jansky Very Large Array (JVLA; beam size about 100
-.2).
-Results. We discovered new NH3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass star￾forming regions hosting NH3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity
-of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming
-linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH3 (9,6) emission
-arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the
-NH3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (000
-.28 ± 0
-00
-.10) of the peak position
-of the 1.36 cm continuum object, HW2. In G34.26+0.15, three NH3 (9,6) maser spots are observed: one is close to the head of the
-cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact H ii region
-A.
-Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long
-baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios.
-Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM
-1. Introduction
-Since its discovery more than five decades ago (Cheung et al.
-1968), ammonia (NH3) has been a most valuable molecule for
-investigating the physical properties of molecular clouds (e.g.,
-Ho & Townes 1983). While thermally excited transitions in
-the centimeter-wavelength inversion transitions of ammonia are
-regarded as a reliable thermometer of molecular clouds (e.g.,
-Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia
-masers have attracted attention since the first detection of maser
-action in the (J, K) = (3,3) metastable (J = K) line toward the
-massive star-forming region W33 (Wilson et al. 1982). Subse￾quent observations have led to the detection of new metastable
-ammonia masers, including 15NH3 (3,3) (Mauersberger et al.
-1986), NH3 (1,1) (Gaume et al. 1996), NH3 (2,2) (Mills et al.
-2018), NH3 (5,5) (Cesaroni et al. 1992), NH3 (6,6) (Beuther
-? Member of the International Max Planck Research School (IM￾PRS) for Astronomy and Astrophysics at the universities of Bonn and
-Cologne.
-et al. 2007), NH3 (7,7), NH3 (9,9), and NH3 (12,12) (Henkel
-et al. 2013). These have led to the discovery of metastable maser
-lines in 22 different regions (Mauersberger et al. 1986, 1987;
-Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991;
-Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Woot￾ten 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang
-et al. 1999; Walsh et al. 2007; Hunter et al. 2008; Galván-Madrid
-et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh
-et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman &
-Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al.
-2019; Mei et al. 2020; Towner et al. 2021). Compared with the
-metastable ammonia masers, detected non-metastable (J > K)
-ammonia maser transitions are more numerous. The first highly
-excited non-metastable ammonia maser was detected by Mad￾den et al. (1986) in the (J, K) = (9,6) and (6,3) lines. Thereafter,
-many other NH3 non-metastable inversion transition lines have
-been identified as masers, including the (5,3), (5,4), (6,1), (6,2),
-(6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3),
-(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transi￾Article number, page 1 of 10
+Astronomy & Astrophysics manuscript no. mainArxiv ©ESO 2022
+April 12, 2022
+Discovery of ammonia (9,6) masers in two high-mass star-forming
+regions
+Y. T. Yan (闫耀庭)
+1,?, C. Henkel1, 2, 3
+, K. M. Menten1, Y. Gong (龚龑)
+1
+, J. Ott4, T. L. Wilson1, A. Wootten4, A.
+Brunthaler1, J. S. Zhang (张江水)
+5
+, J. L. Chen (陈家梁)
+5
+, and K. Yang (杨楷)
+6, 7
+1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany
+e-mail: yyan@mpifr-bonn.mpg.de
+2 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia
+3 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China
+4 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA
+5 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China
+6 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China
+7 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s
+Republic of China
+Received 13 December 2021 / Accepted 30 December 2021
+ABSTRACT
+Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact
+regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements.
+Aims. Only a few NH3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH3 (9,6)
+masers to provide a better observational basis for studying their role in high-mass star-forming regions.
+Methods. We carried out NH3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam
+size 4900) and the Karl G. Jansky Very Large Array (JVLA; beam size about 100.2).
+Results. We discovered new NH3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass starforming regions hosting NH3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity
+of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming
+linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH3 (9,6) emission
+arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the
+NH3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (000.28 ± 0
+00
+.10) of the peak position
+of the 1.36 cm continuum object, HW2. In G34.26+0.15, three NH3 (9,6) maser spots are observed: one is close to the head of the
+cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact H ii region
+A.
+Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long
+baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios.
+Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM
+1. Introduction
+Since its discovery more than five decades ago (Cheung et al.
+1968), ammonia (NH3) has been a most valuable molecule for
+investigating the physical properties of molecular clouds (e.g.,
+Ho & Townes 1983). While thermally excited transitions in
+the centimeter-wavelength inversion transitions of ammonia are
+regarded as a reliable thermometer of molecular clouds (e.g.,
+Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia
+masers have attracted attention since the first detection of maser
+action in the (J, K) = (3,3) metastable (J = K) line toward the
+massive star-forming region W33 (Wilson et al. 1982). Subsequent observations have led to the detection of new metastable
+ammonia masers, including 15NH3 (3,3) (Mauersberger et al.
+1986), NH3 (1,1) (Gaume et al. 1996), NH3 (2,2) (Mills et al.
+2018), NH3 (5,5) (Cesaroni et al. 1992), NH3 (6,6) (Beuther
+? Member of the International Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the universities of Bonn and
+Cologne.
+et al. 2007), NH3 (7,7), NH3 (9,9), and NH3 (12,12) (Henkel
+et al. 2013). These have led to the discovery of metastable maser
+lines in 22 different regions (Mauersberger et al. 1986, 1987;
+Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991;
+Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Wootten 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang
+et al. 1999; Walsh et al. 2007; Hunter et al. 2008; Galván-Madrid
+et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh
+et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman &
+Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al.
+2019; Mei et al. 2020; Towner et al. 2021). Compared with the
+metastable ammonia masers, detected non-metastable (J > K)
+ammonia maser transitions are more numerous. The first highly
+excited non-metastable ammonia maser was detected by Madden et al. (1986) in the (J, K) = (9,6) and (6,3) lines. Thereafter,
+many other NH3 non-metastable inversion transition lines have
+been identified as masers, including the (5,3), (5,4), (6,1), (6,2),
+(6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3),
+(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transiArticle number, page 1 of 10
 arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022
-A&A proofs: manuscript no. mainArxiv
-tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007;
-Henkel et al. 2013; Mei et al. 2020). Except for the NH3 (3,3)
-masers proposed to be associated with four supernova remnants
-(McEwen et al. 2016), almost all the other ammonia masers are
-detected in high-mass star-forming regions (HMSFRs). How￾ever, while many HMSFRs host water (H2O), hydroxyl (OH),
-or methanol (CH3OH) masers, ammonia masers are quite rare
-in these sources, and the role that the environment of a young
-high-mass star plays in their excitation remains unclear. There￾fore, dedicated searches for ammonia masers in HMSFRs are
-indispensable in regard to their overall incidence and associa￾tion with different environments, which can provide additional
-constraints on the pumping mechanism of ammonia masers.
-So far, a total of 32 NH3 inversion transitions (∆K = 0
-and ∆J = 0) have been identified as masers. Among these, and
-despite arising from energy levels as high as 1090 K above
-the ground state, the NH3 (9,6) maser stands out as being the
-strongest and most variable one in W51-IRS2 (e.g., Henkel et al.
-2013). Maser emission in this line has only been detected in five
-HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al.
-1986), and Sgr B2(N) (Mei et al. 2020). The NH3 (3,3) masers
-are thought to be collisionally excited (e.g., Flower et al. 1990;
-Mangum & Wootten 1994); in contrast, the pumping mecha￾nism of NH3 (9,6) masers is less well constrained (Madden et al.
-1986). Brown & Cragg (1991) have studied ortho-ammonia and
-found that it could possibly pump the (6,3) inversion line, but
-they did not extend their model to the (9,6) transition due to the
-fact that collision rates are only known for inversion levels up to
-J = 6 (e.g., Danby et al. 1988).
-NH3 (9,6) masers are found to be strongly variable, similar to
-H2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al.
-2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6)
-line showed significant variation in line shape within a time in￾terval of only two days. Mapping of the (9,6) maser toward W51
-with very long baseline interferometry (VLBI) suggests that the
-masers are closer to the H2O masers than to the OH masers or
-to ultracompact (UC) H ii regions (Pratap et al. 1991). While
-Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO
-and NH3 masers in W51-IRS2 are very close to each other, their
-positions, differing by 000
-.065 (∼0.015 pc), do not fully coincide.
-In this paper we report the discovery of NH3 (9,6) masers
-in two HMSFRs, Cepheus A and G34.26+0.15. This increases
-the number of (9,6) maser detections in our Galaxy from five
-to seven. In Sect. 2 observations with the Effelsberg 100-meter
-telescope and the Karl G. Jansky Very Large Array (JVLA) are
-described. Results are presented in Sect. 3. The morphology of
-Cep A and G34.26+0.15 as well as a comparison of the emission
-distributions of different tracers with the NH3 (9,6) masers are
-presented in Sect. 4. Our main results are summarized in Sect. 5.
-2. Observations and data reduction
-2.1. Effelsberg observations and data reduction
-The NH3 (9,6) line was observed toward Cep A and
-G34.26+0.15 with the 100-meter Effelsberg telescope1
-in 2020
-January and 2021 February, July, and August. The S14mm dou￾ble beam secondary focus receiver was employed. The full width
-at half maximum (FWHM) beam size is 4900 at 18.5 GHz, the
-frequency of the target line. The observations were performed in
-position switching mode, and the off position was 100
-in azimuth
-1 Based on observations with the 100-meter telescope of the MPIfR
-(Max-Planck-Institut für Radioastronomie) at Effelsberg.
-away from the source. For observations made before 2021 Au￾gust, we used a spectrometer that covered 2 GHz wide backends
-with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1
-at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar
-1975). A high spectral resolution backend with 65536 channels
-and a bandwidth of 300 MHz was employed in 2021 August,
-providing a channel width of 0.07 km s−1
-at 18.5 GHz. Point￾ing was checked every 2 hours using 3C 286 or NGC 7027.
-Focus calibrations were done at the beginning of the observa￾tions and during sunset and sunrise toward the abovementioned
-pointing sources. The system temperatures were 100–130 K on
-a main-beam brightness temperature, TMB, scale. This flux den￾sity was calibrated assuming a TMB/S ratio of 1.95 K/Jy, derived
-from continuum cross scans of NGC 7027 (the flux density was
-adopted from Ott et al. 1994). Calibration uncertainties are esti￾mated to be ∼ 10%.
-We used the GILDAS/CLASS2 package (Pety 2005) to re￾duce the spectral line data. A first-order polynomial was sub￾tracted from each spectrum for baseline removal.
-2.2. JVLA observations and data reduction
-Observations of the NH3 (9,6) line toward Cep A and
-G34.26+0.15 were obtained on 2021 July 13 with the JVLA
-of the National Radio Astronomy Observatory3
-(NRAO) in the
-C configuration (project ID: 21A-157, PI: Yaoting Yan). We
-employed 27 antennas for the observations. The primary beam
-of the JVLA antennas is 15000 (FWHM) at 18.5 GHz. A mix￾ture of mixed three-bit and eight-bit samplers were used to per￾form the observations. For the NH3 (9,6) line observations, we
-used one subband with the eight-bit sampler covering a band￾width of 16 MHz with full polarization, eight recirculations, and
-four baseline board pairs (BIBPs) to provide a velocity range
-of 260 km s−1 with a channel spacing of 0.13 km s−1
-. Two
-additional subbands of bandwidth 16 MHz were used to cover
-the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32
-subbands, each with a bandwidth of 128 MHz to cover a to￾tal range of 4 GHz between 20–24 GHz, was used to mea￾sure the continuum emission. 3C 286 with a flux density of
-2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a
-calibrator for pointing, flux density, bandpass, and polarization.
-J2230+6946 and J1851+0035 served as gain calibrators for Cep
-A and G34.26+0.15, respectively. The on-source times were
-4
-m30s
-and 4m50s
-toward Cep A and G34.26+0.15, respectively.
-Data from two antennas were lost due to technical is￾sues. The data from the remaining 25 antennas were reduced
-through the Common Astronomy Software Applications pack￾age (CASA4
-; McMullin et al. 2007). We calibrated the data with
-the JVLA CASA calibration pipeline using CASA 6.1.2. The
-results were obtained after flagging data that contain artifacts.
-We inspected the phase, amplitude, and bandpass variations of
-the calibrated visibility data to search for additional artifacts be￾fore imaging. Then, the uvcontsub task in CASA was used to
-separate the calibrated visibilities into two parts, one with line￾only data and the other with the continuum data. The tclean task
-with a cell size of 000
-.2 and Briggs weighting with robust=0 was
-used to produce the images of spectral line and continuum emis￾sion. The synthesized beams for NH3 (9,6) are 100
-.47 × 0
-00
-.99 at
-2 https://www.iram.fr/IRAMFR/GILDAS/
-3 The National Radio Astronomy Observatory is a facility of the Na￾tional Science Foundation operated under cooperative agreement by As￾sociated Universities, Inc.
-4 https://casa.nrao.edu/
+A&A proofs: manuscript no. mainArxiv
+tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007;
+Henkel et al. 2013; Mei et al. 2020). Except for the NH3 (3,3)
+masers proposed to be associated with four supernova remnants
+(McEwen et al. 2016), almost all the other ammonia masers are
+detected in high-mass star-forming regions (HMSFRs). However, while many HMSFRs host water (H2O), hydroxyl (OH),
+or methanol (CH3OH) masers, ammonia masers are quite rare
+in these sources, and the role that the environment of a young
+high-mass star plays in their excitation remains unclear. Therefore, dedicated searches for ammonia masers in HMSFRs are
+indispensable in regard to their overall incidence and association with different environments, which can provide additional
+constraints on the pumping mechanism of ammonia masers.
+So far, a total of 32 NH3 inversion transitions (∆K = 0
+and ∆J = 0) have been identified as masers. Among these, and
+despite arising from energy levels as high as 1090 K above
+the ground state, the NH3 (9,6) maser stands out as being the
+strongest and most variable one in W51-IRS2 (e.g., Henkel et al.
+2013). Maser emission in this line has only been detected in five
+HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al.
+1986), and Sgr B2(N) (Mei et al. 2020). The NH3 (3,3) masers
+are thought to be collisionally excited (e.g., Flower et al. 1990;
+Mangum & Wootten 1994); in contrast, the pumping mechanism of NH3 (9,6) masers is less well constrained (Madden et al.
+1986). Brown & Cragg (1991) have studied ortho-ammonia and
+found that it could possibly pump the (6,3) inversion line, but
+they did not extend their model to the (9,6) transition due to the
+fact that collision rates are only known for inversion levels up to
+J = 6 (e.g., Danby et al. 1988).
+NH3 (9,6) masers are found to be strongly variable, similar to
+H2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al.
+2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6)
+line showed significant variation in line shape within a time interval of only two days. Mapping of the (9,6) maser toward W51
+with very long baseline interferometry (VLBI) suggests that the
+masers are closer to the H2O masers than to the OH masers or
+to ultracompact (UC) H ii regions (Pratap et al. 1991). While
+Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO
+and NH3 masers in W51-IRS2 are very close to each other, their
+positions, differing by 000.065 (∼0.015 pc), do not fully coincide.
+In this paper we report the discovery of NH3 (9,6) masers
+in two HMSFRs, Cepheus A and G34.26+0.15. This increases
+the number of (9,6) maser detections in our Galaxy from five
+to seven. In Sect. 2 observations with the Effelsberg 100-meter
+telescope and the Karl G. Jansky Very Large Array (JVLA) are
+described. Results are presented in Sect. 3. The morphology of
+Cep A and G34.26+0.15 as well as a comparison of the emission
+distributions of different tracers with the NH3 (9,6) masers are
+presented in Sect. 4. Our main results are summarized in Sect. 5.
+2. Observations and data reduction
+2.1. Effelsberg observations and data reduction
+The NH3 (9,6) line was observed toward Cep A and
+G34.26+0.15 with the 100-meter Effelsberg telescope1in 2020
+January and 2021 February, July, and August. The S14mm double beam secondary focus receiver was employed. The full width
+at half maximum (FWHM) beam size is 4900 at 18.5 GHz, the
+frequency of the target line. The observations were performed in
+position switching mode, and the off position was 100in azimuth
+1 Based on observations with the 100-meter telescope of the MPIfR
+(Max-Planck-Institut für Radioastronomie) at Effelsberg.
+away from the source. For observations made before 2021 August, we used a spectrometer that covered 2 GHz wide backends
+with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1
+at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar
+1975). A high spectral resolution backend with 65536 channels
+and a bandwidth of 300 MHz was employed in 2021 August,
+providing a channel width of 0.07 km s−1at 18.5 GHz. Pointing was checked every 2 hours using 3C 286 or NGC 7027.
+Focus calibrations were done at the beginning of the observations and during sunset and sunrise toward the abovementioned
+pointing sources. The system temperatures were 100–130 K on
+a main-beam brightness temperature, TMB, scale. This flux density was calibrated assuming a TMB/S ratio of 1.95 K/Jy, derived
+from continuum cross scans of NGC 7027 (the flux density was
+adopted from Ott et al. 1994). Calibration uncertainties are estimated to be ∼ 10%.
+We used the GILDAS/CLASS2 package (Pety 2005) to reduce the spectral line data. A first-order polynomial was subtracted from each spectrum for baseline removal.
+2.2. JVLA observations and data reduction
+Observations of the NH3 (9,6) line toward Cep A and
+G34.26+0.15 were obtained on 2021 July 13 with the JVLA
+of the National Radio Astronomy Observatory3
+(NRAO) in the
+C configuration (project ID: 21A-157, PI: Yaoting Yan). We
+employed 27 antennas for the observations. The primary beam
+of the JVLA antennas is 15000 (FWHM) at 18.5 GHz. A mixture of mixed three-bit and eight-bit samplers were used to perform the observations. For the NH3 (9,6) line observations, we
+used one subband with the eight-bit sampler covering a bandwidth of 16 MHz with full polarization, eight recirculations, and
+four baseline board pairs (BIBPs) to provide a velocity range
+of 260 km s−1 with a channel spacing of 0.13 km s−1. Two
+additional subbands of bandwidth 16 MHz were used to cover
+the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32
+subbands, each with a bandwidth of 128 MHz to cover a total range of 4 GHz between 20–24 GHz, was used to measure the continuum emission. 3C 286 with a flux density of
+2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a
+calibrator for pointing, flux density, bandpass, and polarization.
+J2230+6946 and J1851+0035 served as gain calibrators for Cep
+A and G34.26+0.15, respectively. The on-source times were
+4
+m30s
+and 4m50stoward Cep A and G34.26+0.15, respectively.
+Data from two antennas were lost due to technical issues. The data from the remaining 25 antennas were reduced
+through the Common Astronomy Software Applications package (CASA4
+; McMullin et al. 2007). We calibrated the data with
+the JVLA CASA calibration pipeline using CASA 6.1.2. The
+results were obtained after flagging data that contain artifacts.
+We inspected the phase, amplitude, and bandpass variations of
+the calibrated visibility data to search for additional artifacts before imaging. Then, the uvcontsub task in CASA was used to
+separate the calibrated visibilities into two parts, one with lineonly data and the other with the continuum data. The tclean task
+with a cell size of 000.2 and Briggs weighting with robust=0 was
+used to produce the images of spectral line and continuum emission. The synthesized beams for NH3 (9,6) are 100
+.47 × 0
+00
+.99 at
+2 https://www.iram.fr/IRAMFR/GILDAS/
+3 The National Radio Astronomy Observatory is a facility of the National Science Foundation operated under cooperative agreement by Associated Universities, Inc.
+4 https://casa.nrao.edu/
 Article number, page 2 of 10
-Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-P.A. = 58◦
-.79 and 100
-.33 × 1
-00
-.06 at P.A. = 5
-◦
-.36 toward Cep A
-and G34.26+0.15, respectively. For the 1.36 cm (20–24 GHz)
-continuum emission, the synthesized beams are 100
-.08 × 0
-00
-.67 at
-P.A. = 60◦
-.64 and 000
-.95 × 0
-00
-.71 at P.A. = 5
-◦
-.91 toward Cep A and
-G34.26+0.15. The typical absolute astrometric accuracy of the
-JVLA is ∼10% of the synthesized beam5
-. The flux density scale
-calibration accuracy is estimated to be within 15%.
-Fig. 1. Spectra from NH3 (9,6) transition lines. Left: Top to bottom:
-Time sequence of NH3 (9,6) profiles observed toward Cep A with the
-Effelsberg 100-meter telescope (after subtracting a first-order polyno￾mial baseline). A JVLA spectrum is interspersed. The systemic veloc￾ity from CO and HCO+
-lines is indicated by a dashed blue line. The
-two dashed red lines at LSR velocities, VLSR, of −0.90 km s−1
-and
-−0.28 km s−1
-indicate the central velocities of the two major compo￾nents. Right: NH3 (9,6) spectra from G34.26+0.15. The systemic ve￾locity from C17O is indicated by a dashed blue line. The three dashed
-red lines at VLSR = 54.1 km s−1
-, 55.8 km s−1
-, and 62.5 km s−1
-show the
-central velocities of the main ammonia emission components.
-3. Results
-The spectra from different epochs are shown in Figs. 1 and 2.
-Toward Cep A, the NH3 (9,6) line profile from the JVLA is ex￾tracted from an Effelsberg-beam-sized region (FWHM, 4900). In
-the case of G34.26+0.15, the NH3 spectrum is below the noise
-level if a similarly large beam size is used. Therefore, we de￾rived the JVLA NH3 (9,6) spectrum from a smaller region, with
-radius 300
-.5, that contains all the detected NH3 (9,6) emission. In
-Table A.1, the observed NH3 (9,6) line parameters obtained by
-Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not de￾tected by our JVLA observations. The 3σ upper limits for the
-NH3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1
-5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance-
-/positional-accuracy
-Fig. 2. NH3 (9,6) line profiles emphasizing, in contrast to the spectra
-in Fig. 1, weaker features. Cep A spectra are presented on the left,
-G34.26+0.15 spectra on the right. The two dashed red lines in the left
-panels indicate VLSR = 1.48 km s−1
-and 2.89 km s−1
-. In the right panels,
-the two dashed red lines refer to 54.1 km s−1
-and 55.8 km s−1
-.
-and 27.2 mJy beam−1
-, respectively. In G34.26+0.15, the corre￾sponding 3σ upper limits for the NH3 (8,5) and (10,7) lines are
-22.1 mJy beam−1
-and 30.4 mJy beam−1
-. For both sources, sen￾sitivity levels refer to emission from a single channel of width
-0.13 km s−1
-. Taking the larger measured line widths of the (9,6)
-maser features (see Table A.1), these limits could be further low￾ered by factors of two to four.
-3.1. Centimeter-continuum emission
-The 1.36 cm continuum, derived from our JVLA observations,
-toward Cep A is presented in Fig. 3. Six published compact
-sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are de￾tected in our observations. Figure 4 shows the 1.36 cm contin￾uum in G34.26+0.15. Three main continuum objects, A, B, and
-C, are detected. By using the imfit task in CASA, we measured
-the continuum flux at 1.36 cm toward individual compact source
-components in Cep A and G34.26+0.15. Details are given in Ta￾ble A.2.
-3.2. NH3 (9,6) emission in Cep A
-In 2020 January, NH3 (9,6) emission with a peak flux density of
-0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter
-telescope in Cep A. Emission with similar strength was also de￾tected in 2021 February and August with the same telescope.
-Higher velocity resolution data, which were obtained in 2021
-August, again with the Effelsberg 100-meter telescope, show
-that the (9,6) emission contains two main velocity components.
-Overall, the flux densities of the NH3 (9,6) emission line mea￾sured with the Effelsberg 100-meter telescope are, within the cal￾ibration uncertainties, unchanged. This is valid for the time inter￾val between 2020 January and August 2021, when we smoothed
-the obtained spectra to the same velocity resolution. We also
-see another two weaker components. Figure 2 emphasizes these
-weak components with an expanded flux density scale.
-Higher angular resolution data from the JVLA pinpoint the
-position of the NH3 (9,6) emission with an offset of (−0
-00
-.28,
-0
-00
-.02) relative to the 1.36 cm continuum peak of Cep A HW2
-(Fig. 3). The deconvolved NH3 (9,6) component size is (000
-.29 ±
-0
-00
-.15) × (000
-.19 ± 0
-00
-.14) at P.A. = 174◦
-, derived with the imfit task
-in CASA, and can thus be considered, accounting for the uncer￾tainties, as unresolved.
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+P.A. = 58◦.79 and 100.33 × 1
+00
+.06 at P.A. = 5
+◦
+.36 toward Cep A
+and G34.26+0.15, respectively. For the 1.36 cm (20–24 GHz)
+continuum emission, the synthesized beams are 100.08 × 0
+00
+.67 at
+P.A. = 60◦.64 and 000.95 × 0
+00
+.71 at P.A. = 5
+◦
+.91 toward Cep A and
+G34.26+0.15. The typical absolute astrometric accuracy of the
+JVLA is ∼10% of the synthesized beam5. The flux density scale
+calibration accuracy is estimated to be within 15%.
+Fig. 1. Spectra from NH3 (9,6) transition lines. Left: Top to bottom:
+Time sequence of NH3 (9,6) profiles observed toward Cep A with the
+Effelsberg 100-meter telescope (after subtracting a first-order polynomial baseline). A JVLA spectrum is interspersed. The systemic velocity from CO and HCO+
+lines is indicated by a dashed blue line. The
+two dashed red lines at LSR velocities, VLSR, of −0.90 km s−1and
+−0.28 km s−1indicate the central velocities of the two major components. Right: NH3 (9,6) spectra from G34.26+0.15. The systemic velocity from C17O is indicated by a dashed blue line. The three dashed
+red lines at VLSR = 54.1 km s−1, 55.8 km s−1, and 62.5 km s−1show the
+central velocities of the main ammonia emission components.
+3. Results
+The spectra from different epochs are shown in Figs. 1 and 2.
+Toward Cep A, the NH3 (9,6) line profile from the JVLA is extracted from an Effelsberg-beam-sized region (FWHM, 4900). In
+the case of G34.26+0.15, the NH3 spectrum is below the noise
+level if a similarly large beam size is used. Therefore, we derived the JVLA NH3 (9,6) spectrum from a smaller region, with
+radius 300.5, that contains all the detected NH3 (9,6) emission. In
+Table A.1, the observed NH3 (9,6) line parameters obtained by
+Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not detected by our JVLA observations. The 3σ upper limits for the
+NH3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1
+5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance/positional-accuracy
+
+Fig. 2. NH3 (9,6) line profiles emphasizing, in contrast to the spectra
+in Fig. 1, weaker features. Cep A spectra are presented on the left,
+G34.26+0.15 spectra on the right. The two dashed red lines in the left
+panels indicate VLSR = 1.48 km s−1and 2.89 km s−1. In the right panels,
+the two dashed red lines refer to 54.1 km s−1and 55.8 km s−1.
+and 27.2 mJy beam−1, respectively. In G34.26+0.15, the corresponding 3σ upper limits for the NH3 (8,5) and (10,7) lines are
+22.1 mJy beam−1and 30.4 mJy beam−1. For both sources, sensitivity levels refer to emission from a single channel of width
+0.13 km s−1. Taking the larger measured line widths of the (9,6)
+maser features (see Table A.1), these limits could be further lowered by factors of two to four.
+3.1. Centimeter-continuum emission
+The 1.36 cm continuum, derived from our JVLA observations,
+toward Cep A is presented in Fig. 3. Six published compact
+sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are detected in our observations. Figure 4 shows the 1.36 cm continuum in G34.26+0.15. Three main continuum objects, A, B, and
+C, are detected. By using the imfit task in CASA, we measured
+the continuum flux at 1.36 cm toward individual compact source
+components in Cep A and G34.26+0.15. Details are given in Table A.2.
+3.2. NH3 (9,6) emission in Cep A
+In 2020 January, NH3 (9,6) emission with a peak flux density of
+0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter
+telescope in Cep A. Emission with similar strength was also detected in 2021 February and August with the same telescope.
+Higher velocity resolution data, which were obtained in 2021
+August, again with the Effelsberg 100-meter telescope, show
+that the (9,6) emission contains two main velocity components.
+Overall, the flux densities of the NH3 (9,6) emission line measured with the Effelsberg 100-meter telescope are, within the calibration uncertainties, unchanged. This is valid for the time interval between 2020 January and August 2021, when we smoothed
+the obtained spectra to the same velocity resolution. We also
+see another two weaker components. Figure 2 emphasizes these
+weak components with an expanded flux density scale.
+Higher angular resolution data from the JVLA pinpoint the
+position of the NH3 (9,6) emission with an offset of (−0
+00
+.28,
+0
+00
+.02) relative to the 1.36 cm continuum peak of Cep A HW2
+(Fig. 3). The deconvolved NH3 (9,6) component size is (000.29 ±
+0
+00
+.15) × (000.19 ± 0
+00
+.14) at P.A. = 174◦, derived with the imfit task
+in CASA, and can thus be considered, accounting for the uncertainties, as unresolved.
 Article number, page 3 of 10
-A&A proofs: manuscript no. mainArxiv
-Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90,
-and 110 × 0.125 mJy beam−1
-. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane
-Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 = 22h56m17s
-.972, and
-δJ2000 = 62◦0104900
-.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
-ellipse denoting the position of the NH3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018),
-and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates
-the LSR velocity range of the maser spots.
-Fig. 4. 1.36 cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130,
-150, 180, and 200 × 5.0 mJy beam−1
-. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is
-αJ2000 = 18h53m18s
-.560, and δJ2000 = 01◦1405800
-.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH3
-(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H2O (Imai et al. 2011), and CH3OH (Bartkiewicz et al.
-2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (VLSR) of maser spots.
-In view of the constancy of the flux densities obtained at Ef￾felsberg and the similar JVLA flux density, measured in 2021
-July, there is no missing interferometric flux density in the JVLA
-data.
-3.3. NH3 (9,6) emission in G34.26+0.15
-The NH3 (9,6) emission was first detected toward G34.26+0.15
-in 2020 January with the Effelsberg 100-meter telescope. Higher
-velocity resolution data from 2021 August show the NH3 (9,6)
-emission to be composed of two different components. The spec￾tra of weak components on a smaller flux density scale are pre￾sented in Fig. 2.
-Three different locations showing NH3 (9,6) emission are
-found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6)
-component sizes are (100
-.42±0
-00
-.43)×(000
-.54±0
-00
-.62) at P.A. = 97◦
-(M1), (000
-.42 ± 0
-00
-.27) × (000
-.15 ± 0
-00
-.27) at P.A. = 150◦
-(M2), and
+A&A proofs: manuscript no. mainArxiv
+Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90,
+and 110 × 0.125 mJy beam−1. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane
+Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 = 22h56m17s.972, and
+δJ2000 = 62◦0104900.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
+ellipse denoting the position of the NH3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018),
+and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates
+the LSR velocity range of the maser spots.
+Fig. 4. 1.36 cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130,
+150, 180, and 200 × 5.0 mJy beam−1. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is
+αJ2000 = 18h53m18s.560, and δJ2000 = 01◦1405800.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH3
+(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H2O (Imai et al. 2011), and CH3OH (Bartkiewicz et al.
+2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (VLSR) of maser spots.
+In view of the constancy of the flux densities obtained at Effelsberg and the similar JVLA flux density, measured in 2021
+July, there is no missing interferometric flux density in the JVLA
+data.
+3.3. NH3 (9,6) emission in G34.26+0.15
+The NH3 (9,6) emission was first detected toward G34.26+0.15
+in 2020 January with the Effelsberg 100-meter telescope. Higher
+velocity resolution data from 2021 August show the NH3 (9,6)
+emission to be composed of two different components. The spectra of weak components on a smaller flux density scale are presented in Fig. 2.
+Three different locations showing NH3 (9,6) emission are
+found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6)
+component sizes are (100.42±0
+00
+.43)×(000.54±0
+00
+.62) at P.A. = 97◦
+(M1), (000.42 ± 0
+00
+.27) × (000.15 ± 0
+00
+.27) at P.A. = 150◦(M2), and
 Article number, page 4 of 10
-Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-(100
-.17 ± 0
-00
-.34) × (000
-.27 ± 0
-00
-.46) at P.A. = 53◦
-(M3) and are thus
-comparable to or smaller than the beam size.
-Overall, the NH3 (9,6) line from G34.26+0.15 weakened
-during the time interval from 2020 January to 2021 August by
-about 70%. A comparison between the JVLA spectrum and the
-Effelsberg data, assuming a linear decrease in the integrated in￾tensity as a function of time between different epochs of the
-100-meter observations, suggests there is no missing flux in the
-JVLA data. This is similar to the situation in Cep A.
-4. Discussion
-4.1. Morphology of Cep A and G34.26+0.15
-Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc
-(Moscadelli et al. 2009; Dzib et al. 2011), is the second closest
-HMSFR (after Orion) and by far the closest NH3 (9,6) maser
-known. About 16 compact (∼1
-00) radio sources (e.g., Hughes &
-Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been
-identified in Cep A. Hughes & Wouterloot (1984) discovered
-these targets at radio wavelengths, which are UC and hypercom￾pact (HC) H ii regions and/or stellar wind sources, subsequently
-named as HW sources. The HW2 object is one of the best known
-examples of a protostellar jet or disk system driving a powerful
-outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles
-et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021).
-The observed NH3 (9,6) emission is slightly offset (−0
-00
-.28, 000
-.02)
-from the center of HW2 (see Fig. 3).
-G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc
-(Kuchar & Bania 1994). It hosts four radio continuum compo￾nents named A, B, C, and D. Component C is a prototypical
-cometary UC H ii region containing a compact head and a diffuse
-tail that extends from east to west (e.g., Reid & Ho 1985; Garay
-et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components
-A and B are HC H ii regions, located to the east of component
-C. An extended ring-like H ii region, called component D, is lo￾cated southeast of components A-C. One of the three observed
-NH3 (9,6) emission line sources, M1, is close to the head of com￾ponent C, whereas M2 and M3 originate from another compact
-region in the west of the HC H ii component A (see Fig. 4).
-4.2. NH3 (9,6) emission possibly caused by maser action
-As shown in Fig. 1, the NH3 (9,6) profiles in Cep A and
-G34.26+0.15 are narrow (∆V1/2 ≤2.0 km s−1
-), much narrower
-than the expected line widths (&4 km s−1
-) of thermal lines ob￾served at a similar angular resolution (e.g., Torrelles et al. 1985,
-1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mook￾erjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Ve￾locity shifts with respect to the systemic velocities of the two
-sources are both observed, that is, V ∼10 km s−1
-in Cep A and
-V ∼4 km s−1
-in G34.26+0.15 (see details in Sect. 4.3). Further￾more, time variability is observed in the case of G34.26+0.15,
-which is also a characteristic feature of maser emission.
-Additional evidence of their maser nature is the high bright￾ness temperatures of the (9,6) emission spots toward Cep A and
-G34.26+0.15. The spectral parameters are listed in Table A.3.
-Because at least a significant part of the NH3 (9,6) emission
-is not resolved by our JVLA observations, the derived bright￾ness temperatures are only lower limits. Nevertheless, the lower
-limits on the brightness temperature are >800 K in Cep A (see
-Table A.3), which is much higher than the expected thermal
-gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito
-et al. 2007; Beuther et al. 2018). This strongly suggests that
-the NH3 (9,6) emission in Cep A is due to maser action. Be￾cause G34.26+0.15 is located at about five times the distance to
-Cep A, beam dilution effects reduce the lower main beam bright￾ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta￾ble A.3). We also note that the luminosity of the NH3 (9,6) emis￾sion in G34.26+0.15 is higher than or comparable to that in Cep
-A, depending on the epoch of our observations.
-Finally, the non-detections of the (8,5) and (10,7) lines also
-indicate that the (9,6) line is special. This allows us to derive
-lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity
-ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas
-the NH3 (8,5) and (10,7) lines are para-NH3 (K , 3n) lines.
-The minimum ortho-to-para ratios are in the range 12–42 and 1–
-8 toward Cep A and G34.26+0.15, respectively. The statistical
-weights for the ortho states are twice as large as those for the
-para states (e.g., Umemoto et al. 1999; Goddi et al. 2011; Henkel
-et al. 2013). In Cep A, the line intensity ratios are far higher than
-this factor of two. Thus, at least in Cep A the higher main beam
-brightness peak temperature of the (9,6) emission is caused by
-maser action, perhaps involving exponential amplification, and
-the case of G34.26+0.15 is likely similar.
-4.3. Comparison of NH3 (9,6) masers with previously
-published (quasi-)thermal NH3 emission
-The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
-show thermal emission toward Cep A over a velocity range of
-−13 km s−1 ≤ VLSR ≤ −4 km s−1
-(Brown et al. 1981; Güsten
-et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average
-NH3 column density of ∼5×1015 cm−2 was estimated for a region
-of 300 around HW2 (Torrelles et al. 1999). This high NH3 abun￾dance could provide a suitable environment for maser species.
-Large line widths (∆V1/2 '7.0 km s−1
-) with VLSR ∼ −10 km s−1
-in both (1,1) and (2,2) lines were found toward HW2 (Torrelles
-et al. 1993). The velocity is similar to the cloud’s systemic lo￾cal standard of rest (LSR) velocity of −11.2 km s−1
-, which
-is based on CO (Narayanan & Walker 1996) and HCO+ ob￾servations (Gómez et al. 1999). Our (9,6) maser is redshifted
-(−0.9 km s−1 ≤ VLSR ≤2.9 km s−1
-) and shares positions with
-the outflowing gas seen in CO and HCO+ with similarly red￾shifted velocities. Therefore, we argue that the (9,6) masers are
-related to outflowing gas.
-In G34.26+0.15, a large NH3 column density,
-1018.5±0.2
-cm−2
-, and a kinetic temperature of 225±75 K
-were derived by Henkel et al. (1987) based on measurements
-of 15 NH3 inversion transitions in the frequency range of
-22.0–26.0 GHz. These did not include the (9,6) transition.
-While these lines were measured with a beam size of about
-4000, a comparison of the peak intensities of the optically thick
-lines with the kinetic temperature reveals the size of the hot,
-ammonia-emitting core to be only ∼2.500. All those measured
-NH3 lines were quasi-thermal and had LSR velocities of
-∼ 58.5 km s−1
-, close to the systemic velocity of ∼ 58.1 km s−1
-obtained from C17O observations (Wyrowski et al. 2012).
-Their line widths (∆V1/2 ≥3.6 km s−1
-) are larger than what
-we find (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1
-) for each (9,6)
-maser component (see details in Table A.3). In all, we may
-have observed four different (9,6) velocity features. Three
-are blueshifted at VLSR ∼ 53.8 km s−1
-, 55.8 km s−1
-, and
-56.8 km s−1
-, and a fourth, tentatively detected, at 62.5 km s−1
-.
-This tentative redshifted feature was only potentially detected
-with Effelsberg in 2020 January. The velocity is similar to that
-of the JVLA measurements on the NH3 (1,1) absorption line
-against continuum source C (∼ 7
-00 resolution; Keto et al. 1987)
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+(100.17 ± 0
+00
+.34) × (000.27 ± 0
+00
+.46) at P.A. = 53◦(M3) and are thus
+comparable to or smaller than the beam size.
+Overall, the NH3 (9,6) line from G34.26+0.15 weakened
+during the time interval from 2020 January to 2021 August by
+about 70%. A comparison between the JVLA spectrum and the
+Effelsberg data, assuming a linear decrease in the integrated intensity as a function of time between different epochs of the
+100-meter observations, suggests there is no missing flux in the
+JVLA data. This is similar to the situation in Cep A.
+4. Discussion
+4.1. Morphology of Cep A and G34.26+0.15
+Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc
+(Moscadelli et al. 2009; Dzib et al. 2011), is the second closest
+HMSFR (after Orion) and by far the closest NH3 (9,6) maser
+known. About 16 compact (∼1
+00) radio sources (e.g., Hughes &
+Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been
+identified in Cep A. Hughes & Wouterloot (1984) discovered
+these targets at radio wavelengths, which are UC and hypercompact (HC) H ii regions and/or stellar wind sources, subsequently
+named as HW sources. The HW2 object is one of the best known
+examples of a protostellar jet or disk system driving a powerful
+outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles
+et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021).
+The observed NH3 (9,6) emission is slightly offset (−0
+00
+.28, 000.02)
+from the center of HW2 (see Fig. 3).
+G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc
+(Kuchar & Bania 1994). It hosts four radio continuum components named A, B, C, and D. Component C is a prototypical
+cometary UC H ii region containing a compact head and a diffuse
+tail that extends from east to west (e.g., Reid & Ho 1985; Garay
+et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components
+A and B are HC H ii regions, located to the east of component
+C. An extended ring-like H ii region, called component D, is located southeast of components A-C. One of the three observed
+NH3 (9,6) emission line sources, M1, is close to the head of component C, whereas M2 and M3 originate from another compact
+region in the west of the HC H ii component A (see Fig. 4).
+4.2. NH3 (9,6) emission possibly caused by maser action
+As shown in Fig. 1, the NH3 (9,6) profiles in Cep A and
+G34.26+0.15 are narrow (∆V1/2 ≤2.0 km s−1), much narrower
+than the expected line widths (&4 km s−1) of thermal lines observed at a similar angular resolution (e.g., Torrelles et al. 1985,
+1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mookerjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Velocity shifts with respect to the systemic velocities of the two
+sources are both observed, that is, V ∼10 km s−1in Cep A and
+V ∼4 km s−1in G34.26+0.15 (see details in Sect. 4.3). Furthermore, time variability is observed in the case of G34.26+0.15,
+which is also a characteristic feature of maser emission.
+Additional evidence of their maser nature is the high brightness temperatures of the (9,6) emission spots toward Cep A and
+G34.26+0.15. The spectral parameters are listed in Table A.3.
+Because at least a significant part of the NH3 (9,6) emission
+is not resolved by our JVLA observations, the derived brightness temperatures are only lower limits. Nevertheless, the lower
+limits on the brightness temperature are >800 K in Cep A (see
+Table A.3), which is much higher than the expected thermal
+gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito
+et al. 2007; Beuther et al. 2018). This strongly suggests that
+the NH3 (9,6) emission in Cep A is due to maser action. Because G34.26+0.15 is located at about five times the distance to
+Cep A, beam dilution effects reduce the lower main beam brightness temperature limit to 400 K in G34.26+0.15 (M2) (see Table A.3). We also note that the luminosity of the NH3 (9,6) emission in G34.26+0.15 is higher than or comparable to that in Cep
+A, depending on the epoch of our observations.
+Finally, the non-detections of the (8,5) and (10,7) lines also
+indicate that the (9,6) line is special. This allows us to derive
+lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity
+ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas
+the NH3 (8,5) and (10,7) lines are para-NH3 (K , 3n) lines.
+The minimum ortho-to-para ratios are in the range 12–42 and 1–
+8 toward Cep A and G34.26+0.15, respectively. The statistical
+weights for the ortho states are twice as large as those for the
+para states (e.g., Umemoto et al. 1999; Goddi et al. 2011; Henkel
+et al. 2013). In Cep A, the line intensity ratios are far higher than
+this factor of two. Thus, at least in Cep A the higher main beam
+brightness peak temperature of the (9,6) emission is caused by
+maser action, perhaps involving exponential amplification, and
+the case of G34.26+0.15 is likely similar.
+4.3. Comparison of NH3 (9,6) masers with previously
+published (quasi-)thermal NH3 emission
+The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
+show thermal emission toward Cep A over a velocity range of
+−13 km s−1 ≤ VLSR ≤ −4 km s−1(Brown et al. 1981; Güsten
+et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average
+NH3 column density of ∼5×1015 cm−2 was estimated for a region
+of 300 around HW2 (Torrelles et al. 1999). This high NH3 abundance could provide a suitable environment for maser species.
+Large line widths (∆V1/2 '7.0 km s−1) with VLSR ∼ −10 km s−1
+in both (1,1) and (2,2) lines were found toward HW2 (Torrelles
+et al. 1993). The velocity is similar to the cloud’s systemic local standard of rest (LSR) velocity of −11.2 km s−1
+, which
+is based on CO (Narayanan & Walker 1996) and HCO+ observations (Gómez et al. 1999). Our (9,6) maser is redshifted
+(−0.9 km s−1 ≤ VLSR ≤2.9 km s−1) and shares positions with
+the outflowing gas seen in CO and HCO+ with similarly redshifted velocities. Therefore, we argue that the (9,6) masers are
+related to outflowing gas.
+In G34.26+0.15, a large NH3 column density,
+1018.5±0.2cm−2, and a kinetic temperature of 225±75 K
+were derived by Henkel et al. (1987) based on measurements
+of 15 NH3 inversion transitions in the frequency range of
+22.0–26.0 GHz. These did not include the (9,6) transition.
+While these lines were measured with a beam size of about
+4000, a comparison of the peak intensities of the optically thick
+lines with the kinetic temperature reveals the size of the hot,
+ammonia-emitting core to be only ∼2.500. All those measured
+NH3 lines were quasi-thermal and had LSR velocities of
+∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1
+obtained from C17O observations (Wyrowski et al. 2012).
+Their line widths (∆V1/2 ≥3.6 km s−1) are larger than what
+we find (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1) for each (9,6)
+maser component (see details in Table A.3). In all, we may
+have observed four different (9,6) velocity features. Three
+are blueshifted at VLSR ∼ 53.8 km s−1, 55.8 km s−1, and
+56.8 km s−1
+, and a fourth, tentatively detected, at 62.5 km s−1.
+This tentative redshifted feature was only potentially detected
+with Effelsberg in 2020 January. The velocity is similar to that
+of the JVLA measurements on the NH3 (1,1) absorption line
+against continuum source C (∼ 7
+00 resolution; Keto et al. 1987)
 Article number, page 5 of 10
-A&A proofs: manuscript no. mainArxiv
-and the NH3 (3,3) emission surrounding continuum source B as
-well as the head of C (100
-.4×1
-00
-.2 resolution; Heaton et al. 1989).
-However, we did not find this redshifted component in our
-JVLA observations. Therefore, its position within G34.26+0.15
-cannot be determined. The blueshifted (9,6) masers with a
-velocity range of 53.8–56.8 km s−1
-(M1, M2, and M3) show
-velocities compatible with those of the NH3 (3,3) emission at
-the proper positions (Heaton et al. 1989), which might be a
-suitable environment for maser species.
-4.4. Comparison of NH3 (9,6) masers with other maser lines
-To characterize the environment of NH3 (9,6) masers, we can
-compare their positions with respect to those of other maser
-species (i.e., OH, H2O, and CH3OH). Toward Cep A HW2,
-many CH3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna
-et al. 2017) and H2O maser spots (e.g., Torrelles et al. 1998,
-2011; Sobolev et al. 2018) are detected and are associated with
-its disk. Sobolev et al. (2018) also found that most of the H2O
-maser flux is associated with the compact H ii region HW3d. OH
-maser features close to the H ii regions are also seen in HW2
-(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These
-three kinds of masers in Cep A have a large velocity range of
-−25 km s−1 ≤ VLSR ≤ −2 km s−1
-and are widespread around
-HW2 and HW3, while NH3 (9,6) emission is only detected at
-−0.9 km s−1 ≤ VLSR ≤2.9 km s−1
-toward a sub-arcsecond￾sized region to the west of the peak continuum position of HW2
-(see Fig. 3). This suggests that the NH3 (9,6) maser in Cep A
-is unique and not related to maser spots seen in other molecular
-species.
-In G34.26+0.15, OH (Zheng et al. 2000), H2O (Imai et al.
-2011), and CH3OH (Bartkiewicz et al. 2016) masers have been
-detected east of source C (Fig. 4), and none of them coincides
-with the head of C. The NH3 (9,6) maser M1 is also found
-slightly off the head of source C. This could suggest that M1
-is powered by continuum source C or by an outflow. Near com￾ponent B, there are some OH and CH3OH masers but no H2O
-or NH3 masers. A group of H2O masers, well-known tracers
-of outflows, with a large velocity distribution of 43 km s−1 ≤
-VLSR ≤54 km s−1
-, was found to the west of the centimeter￾continuum source A and close to the peak of the millimeter￾continuum emission (see details in our Fig. A.2 and also in Fig. 5
-of Imai et al. 2011). The closeness of NH3 (9,6) maser spots M2
-and M3 to this group of water masers and their similar velocities
-again suggest an association of NH3 (9,6) masers with outflow
-activity.
-4.5. Constraints on pumping scenarios
-Our observations have resulted in the detection of NH3 (9,6)
-masers in Cep A and G34.26+0.15. The new detections could
-provide additional constraints on the maser line’s pumping
-mechanism. As mentioned in Sect. 1, the pumping mechanism
-of the (9,6) maser is unclear (Madden et al. 1986; Brown &
-Cragg 1991). Previous studies have suggested that there are three
-main pumping scenarios to explain the observed NH3 maser
-lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra￾diation from the dust continuum emission, (2) line overlap, and
-(3) collisional pumping.
-For the first mechanism, infrared photons near 10 µm are
-needed for vibrational excitation. The high dust temperature
-(∼300 K) of W51-IRS2 can provide substantial infrared pho￾tons near 10 µm, which is used for radiative pumping (Henkel
-et al. 2013). Both Cep A and G34.26+0.15 have similar kinetic
-temperatures of &200 K (Henkel et al. 1987; Patel et al. 2005;
-Comito et al. 2007; Beuther et al. 2018). This suggests that
-high kinetic temperatures are needed to excite NH3 (9,6) masers.
-However, it should be noted that the silicate dust absorption fea￾ture might dominate at 10 µm (see the spectral energy distribu￾tion of Cep A in De Buizer et al. 2017). Additionally, there is
-no bright infrared emission around the two (9,6) masers, M2 and
-M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer
-et al. 2003 for a 10.5 µm map). This indicates that the pumping
-mechanism via infrared photons near 10 µm may not be viable
-to explain the (9,6) masers in Cep A and G34.26+0.15. Further￾more, Wilson & Schilke (1993) argued that radiative pumping by
-dust emission tends to excite multiple adjacent ammonia maser
-transitions, which appears to contradict our failure to detect the
-adjacent (8,5) and (10,7) lines (with respect to quantum numbers
-and frequency) and to only measure the (9,6) transitions in Cep
-A and G34.26+0.15. Therefore, we suggest that infrared radia￾tion from dust is not the main pumping source.
-Madden et al. (1986) suggested that there might be some
-line overlaps between the rotational NH3 transitions in the far￾infrared band. However, this would be unlikely to affect only the
-(9,6) line. Nevertheless, far-infrared spectral observations will
-be needed to clarify this scenario.
-Based on our observations, the (9,6) maser spots are close
-to, but not coincident with, the peaks of the radio continuum
-emission in Cep A and G34.26+0.15. Furthermore, the (9,6)
-masers show velocity offsets with respect to their systemic ve￾locities. This indicates that the (9,6) masers are located at the
-base of outflows, similar to the H2O masers. This is supported
-by VLBI observations that show that (9,6) masers tend to be
-closely associated with H2O masers (Pratap et al. 1991). The ob￾served time variability in G34.26+0.15 and W51-IRS2 can also
-be attributed to episodic molecular outflows. This indicates that
-collisional pumping could be the driver of the (9,6) maser. On
-the other hand, collisional pumping has been successfully used
-to explain the NH3 (3,3) maser (Walmsley & Ungerechts 1983;
-Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to
-pump from the K=0 level to the K=3 level with parity changes,
-that is, the upper level of the (3,3) metastable transition will be
-overpopulated. NH3 (9,6) arises from the ortho species, so a sim￾ilar mechanism might also occur in the case of the (9,6) transi￾tion. Further measurements of collisional rates of ammonia will
-allow us to test this scenario.
-5. Summary
-We report the discovery of NH3 (9,6) masers in two HMSFRs,
-Cep A and G34.26+0.15. The narrow line width of the emis￾sion features (∆V1/2 ≤2.0 km s−1
-) and their high brightness tem￾peratures (> 400 K) indicate the maser nature of the lines.
-The intensity of the (9,6) maser in G34.26+0.15 is decreasing
-with time, while toward Cep A the maser is stable based on 20
-months of monitoring at Effelsberg. Linearly interpolating the
-integrated intensities obtained at Effelsberg as a function of time,
-the JVLA measurements show that there is no missing flux den￾sity on scales on the order of 1.2 arcsec (4 ×10−3
-and 2 ×10−2 pc)
-to the total single-dish flux. The JVLA-detected emission in￾dicates that the NH3 (9,6) maser in Cep A originates from a
-sub-arcsecond-sized region slightly (000
-.28 ± 0
-00
-.10) to the west
-of the peak position of the 1.36 cm continuum object, HW2. In
-G34.26+0.15, three NH3 (9,6) maser spots are observed: one is
-close to the head of the cometary UC H ii region C, and the other
-two are emitted from a compact region to the west of the HC H ii
+A&A proofs: manuscript no. mainArxiv
+and the NH3 (3,3) emission surrounding continuum source B as
+well as the head of C (100.4×1
+00
+.2 resolution; Heaton et al. 1989).
+However, we did not find this redshifted component in our
+JVLA observations. Therefore, its position within G34.26+0.15
+cannot be determined. The blueshifted (9,6) masers with a
+velocity range of 53.8–56.8 km s−1(M1, M2, and M3) show
+velocities compatible with those of the NH3 (3,3) emission at
+the proper positions (Heaton et al. 1989), which might be a
+suitable environment for maser species.
+4.4. Comparison of NH3 (9,6) masers with other maser lines
+To characterize the environment of NH3 (9,6) masers, we can
+compare their positions with respect to those of other maser
+species (i.e., OH, H2O, and CH3OH). Toward Cep A HW2,
+many CH3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna
+et al. 2017) and H2O maser spots (e.g., Torrelles et al. 1998,
+2011; Sobolev et al. 2018) are detected and are associated with
+its disk. Sobolev et al. (2018) also found that most of the H2O
+maser flux is associated with the compact H ii region HW3d. OH
+maser features close to the H ii regions are also seen in HW2
+(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These
+three kinds of masers in Cep A have a large velocity range of
+−25 km s−1 ≤ VLSR ≤ −2 km s−1and are widespread around
+HW2 and HW3, while NH3 (9,6) emission is only detected at
+−0.9 km s−1 ≤ VLSR ≤2.9 km s−1toward a sub-arcsecondsized region to the west of the peak continuum position of HW2
+(see Fig. 3). This suggests that the NH3 (9,6) maser in Cep A
+is unique and not related to maser spots seen in other molecular
+species.
+In G34.26+0.15, OH (Zheng et al. 2000), H2O (Imai et al.
+2011), and CH3OH (Bartkiewicz et al. 2016) masers have been
+detected east of source C (Fig. 4), and none of them coincides
+with the head of C. The NH3 (9,6) maser M1 is also found
+slightly off the head of source C. This could suggest that M1
+is powered by continuum source C or by an outflow. Near component B, there are some OH and CH3OH masers but no H2O
+or NH3 masers. A group of H2O masers, well-known tracers
+of outflows, with a large velocity distribution of 43 km s−1 ≤
+VLSR ≤54 km s−1, was found to the west of the centimetercontinuum source A and close to the peak of the millimetercontinuum emission (see details in our Fig. A.2 and also in Fig. 5
+of Imai et al. 2011). The closeness of NH3 (9,6) maser spots M2
+and M3 to this group of water masers and their similar velocities
+again suggest an association of NH3 (9,6) masers with outflow
+activity.
+4.5. Constraints on pumping scenarios
+Our observations have resulted in the detection of NH3 (9,6)
+masers in Cep A and G34.26+0.15. The new detections could
+provide additional constraints on the maser line’s pumping
+mechanism. As mentioned in Sect. 1, the pumping mechanism
+of the (9,6) maser is unclear (Madden et al. 1986; Brown &
+Cragg 1991). Previous studies have suggested that there are three
+main pumping scenarios to explain the observed NH3 maser
+lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared radiation from the dust continuum emission, (2) line overlap, and
+(3) collisional pumping.
+For the first mechanism, infrared photons near 10 µm are
+needed for vibrational excitation. The high dust temperature
+(∼300 K) of W51-IRS2 can provide substantial infrared photons near 10 µm, which is used for radiative pumping (Henkel
+et al. 2013). Both Cep A and G34.26+0.15 have similar kinetic
+temperatures of &200 K (Henkel et al. 1987; Patel et al. 2005;
+Comito et al. 2007; Beuther et al. 2018). This suggests that
+high kinetic temperatures are needed to excite NH3 (9,6) masers.
+However, it should be noted that the silicate dust absorption feature might dominate at 10 µm (see the spectral energy distribution of Cep A in De Buizer et al. 2017). Additionally, there is
+no bright infrared emission around the two (9,6) masers, M2 and
+M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer
+et al. 2003 for a 10.5 µm map). This indicates that the pumping
+mechanism via infrared photons near 10 µm may not be viable
+to explain the (9,6) masers in Cep A and G34.26+0.15. Furthermore, Wilson & Schilke (1993) argued that radiative pumping by
+dust emission tends to excite multiple adjacent ammonia maser
+transitions, which appears to contradict our failure to detect the
+adjacent (8,5) and (10,7) lines (with respect to quantum numbers
+and frequency) and to only measure the (9,6) transitions in Cep
+A and G34.26+0.15. Therefore, we suggest that infrared radiation from dust is not the main pumping source.
+Madden et al. (1986) suggested that there might be some
+line overlaps between the rotational NH3 transitions in the farinfrared band. However, this would be unlikely to affect only the
+(9,6) line. Nevertheless, far-infrared spectral observations will
+be needed to clarify this scenario.
+Based on our observations, the (9,6) maser spots are close
+to, but not coincident with, the peaks of the radio continuum
+emission in Cep A and G34.26+0.15. Furthermore, the (9,6)
+masers show velocity offsets with respect to their systemic velocities. This indicates that the (9,6) masers are located at the
+base of outflows, similar to the H2O masers. This is supported
+by VLBI observations that show that (9,6) masers tend to be
+closely associated with H2O masers (Pratap et al. 1991). The observed time variability in G34.26+0.15 and W51-IRS2 can also
+be attributed to episodic molecular outflows. This indicates that
+collisional pumping could be the driver of the (9,6) maser. On
+the other hand, collisional pumping has been successfully used
+to explain the NH3 (3,3) maser (Walmsley & Ungerechts 1983;
+Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to
+pump from the K=0 level to the K=3 level with parity changes,
+that is, the upper level of the (3,3) metastable transition will be
+overpopulated. NH3 (9,6) arises from the ortho species, so a similar mechanism might also occur in the case of the (9,6) transition. Further measurements of collisional rates of ammonia will
+allow us to test this scenario.
+5. Summary
+We report the discovery of NH3 (9,6) masers in two HMSFRs,
+Cep A and G34.26+0.15. The narrow line width of the emission features (∆V1/2 ≤2.0 km s−1
+) and their high brightness temperatures (> 400 K) indicate the maser nature of the lines.
+The intensity of the (9,6) maser in G34.26+0.15 is decreasing
+with time, while toward Cep A the maser is stable based on 20
+months of monitoring at Effelsberg. Linearly interpolating the
+integrated intensities obtained at Effelsberg as a function of time,
+the JVLA measurements show that there is no missing flux density on scales on the order of 1.2 arcsec (4 ×10−3
+and 2 ×10−2 pc)
+to the total single-dish flux. The JVLA-detected emission indicates that the NH3 (9,6) maser in Cep A originates from a
+sub-arcsecond-sized region slightly (000
+.28 ± 0
+00
+.10) to the west
+of the peak position of the 1.36 cm continuum object, HW2. In
+G34.26+0.15, three NH3 (9,6) maser spots are observed: one is
+close to the head of the cometary UC H ii region C, and the other
+two are emitted from a compact region to the west of the HC H ii
 Article number, page 6 of 10
-Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-region A. We suggest that the (9,6) masers may be connected to
-outflowing gas. Higher angular resolution JVLA and VLBI ob￾servations are planned to provide more accurate positions and
-constraints on pumping scenarios.
-Acknowledgements. We would like to thank the anonymous referee for the use￾ful comments that improve the manuscript. Y.T.Y. is a member of the Interna￾tional Max Planck Research School (IMPRS) for Astronomy and Astrophysics
-at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China
-Scholarship Council (CSC) for its support. We would like to thank the staff at
-the Effelsberg for their help provided during the observations. We thank the staff
-of the JVLA, especially Tony Perreault and Edward Starr, for their assistance
-with the observations and data reduction. This research has made use of the
-NASA/IPAC Infrared Science Archive, which is funded by the National Aero￾nautics and Space Administration and operated by the California Institute of
-Technology.
-References
-Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MN￾RAS, 361, 623
-Bartkiewicz, A., Szymczak, M., & van Langevelde, H. J. 2016, A&A, 587, A104
-Benjamin, R. A., Churchwell, E., Babler, B. L., et al. 2003, PASP, 115, 953
-Beuther, H., Mottram, J. C., Ahmadi, A., et al. 2018, A&A, 617, A100
-Beuther, H., Walsh, A. J., Thorwirth, S., et al. 2007, A&A, 466, 989
-Brogan, C. L., Hunter, T. R., Cyganowski, C. J., et al. 2011, ApJ, 739, L16
-Brown, A. T., Little, L. T., MacDonald, G. H., Riley, P. W., & Matheson, D. N.
-1981, MNRAS, 195, 607
-Brown, R. D. & Cragg, D. M. 1991, ApJ, 378, 445
-Carrasco-González, C., Sanna, A., Rodríguez-Kamenetzky, A., et al. 2021, ApJ,
-914, L1
-Cesaroni, R., Walmsley, C. M., & Churchwell, E. 1992, A&A, 256, 618
-Cheung, A. C., Rank, D. M., Townes, C. H., Thornton, D. D., & Welch, W. J.
-1968, Phys. Rev. Lett., 21, 1701
-Churchwell, E., Babler, B. L., Meade, M. R., et al. 2009, PASP, 121, 213
-Cohen, R. J. & Brebner, G. C. 1985, MNRAS, 216, 51P
-Comito, C., Schilke, P., Endesfelder, U., Jiménez-Serra, I., & Martín-Pintado, J.
-2007, A&A, 469, 207
-Curiel, S., Ho, P. T. P., Patel, N. A., et al. 2006, ApJ, 638, 878
-Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988,
-MNRAS, 235, 229
-De Buizer, J. M., Liu, M., Tan, J. C., et al. 2017, ApJ, 843, 33
-De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598,
-1127
-Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M.
-2011, ApJ, 733, 71
-Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P
-Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036
-Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ,
-459, 193
-Garay, G., Rodriguez, L. F., & van Gorkom, J. H. 1986, ApJ, 309, 553
-Gaume, R. A., Wilson, T. L., & Johnston, K. J. 1996, ApJ, 457, L47
-Goddi, C., Greenhill, L. J., Humphreys, E. M. L., Chandler, C. J., & Matthews,
-L. D. 2011, ApJ, 739, L13
-Goddi, C., Henkel, C., Zhang, Q., Zapata, L., & Wilson, T. L. 2015, A&A, 573,
-A109
-Gómez, J. F., Sargent, A. I., Torrelles, J. M., et al. 1999, ApJ, 514, 287
-Güsten, R., Chini, R., & Neckel, T. 1984, A&A, 138, 205
-Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148
-Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90
-Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137
-Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239
-Hoffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83
-Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79
-Hughes, V. A. 1991, ApJ, 383, 280
-Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204
-Hunter, T. R., Brogan, C. L., Indebetouw, R., & Cyganowski, C. J. 2008, ApJ,
-680, 1271
-Imai, H., Omi, R., Kurayama, T., et al. 2011, PASJ, 63, 1293
-Keto, E. R., Ho, P. T. P., & Reid, M. J. 1987, ApJ, 323, L117
-Kraemer, K. E. & Jackson, J. M. 1995, ApJ, 439, L9
-Kuchar, T. A. & Bania, T. M. 1994, ApJ, 436, 117
-Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D.
-1986, ApJ, 300, L79
-Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33
-Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352
-Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13
-Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123
-McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189
-McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in As￾tronomical Society of the Pacific Conference Series, Vol. 376, Astronomical
-Data Analysis Software and Systems XVI, ed. R. A. Shaw, F. Hill, & D. J.
-Bell, 127
-Mei, Y., Chen, X., Shen, Z.-Q., & Li, B. 2020, ApJ, 898, 157
-Menten, K. M. 1991, ApJ, 380, L75
-Mills, E. A. C., Ginsburg, A., Clements, A. R., et al. 2018, ApJ, 869, L14
-Mookerjea, B., Casper, E., Mundy, L. G., & Looney, L. W. 2007, ApJ, 659, 447
-Moscadelli, L., Reid, M. J., Menten, K. M., et al. 2009, ApJ, 693, 406
-Narayanan, G. & Walker, C. K. 1996, ApJ, 466, 844
-Ott, M., Witzel, A., Quirrenbach, A., et al. 1994, A&A, 284, 331
-Patel, N. A., Curiel, S., Sridharan, T. K., et al. 2005, Nature, 437, 109
-Perley, R. A. & Butler, B. J. 2013, ApJS, 204, 19
-Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Ca￾soli, T. Contini, J. M. Hameury, & L. Pagani, 721
-Poynter, R. L. & Kakar, R. K. 1975, ApJS, 29, 87
-Pratap, P., Menten, K. M., Reid, M. J., Moran, J. M., & Walmsley, C. M. 1991,
-ApJ, 373, L13
-Reid, M. J. & Ho, P. T. P. 1985, ApJ, 288, L17
-Rodriguez, L. F., Ho, P. T. P., & Moran, J. M. 1980, ApJ, 240, L149
-Sanna, A., Moscadelli, L., Surcis, G., et al. 2017, A&A, 603, A94
-Sewilo, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2004, ApJ,
-605, 285
-Sewiło, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2011, ApJS,
-194, 44
-Sobolev, A. M., Moran, J. M., Gray, M. D., et al. 2018, ApJ, 856, 60
-Sugiyama, K., Fujisawa, K., Doi, A., et al. 2008, PASJ, 60, 1001
-Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1998, ApJ, 509, 262
-Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1999, MNRAS, 307, 58
-Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1985, ApJ, 288, 595
-Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1986, ApJ, 305, 721
-Torrelles, J. M., Patel, N. A., Curiel, S., et al. 2011, MNRAS, 410, 627
-Torrelles, J. M., Verdes-Montenegro, L., Ho, P. T. P., Rodriguez, L. F., & Canto,
-J. 1993, ApJ, 410, 202
-Towner, A. P. M., Brogan, C. L., Hunter, T. R., & Cyganowski, C. J. 2021, ApJ,
-923, 263
-Umemoto, T., Mikami, H., Yamamoto, S., & Hirano, N. 1999, ApJ, 525, L105
-Urquhart, J. S., Morgan, L. K., Figura, C. C., et al. 2011, MNRAS, 418, 1689
-Walmsley, C. M. & Ungerechts, H. 1983, A&A, 122, 164
-Walsh, A. J., Breen, S. L., Britton, T., et al. 2011, MNRAS, 416, 1764
-Walsh, A. J., Longmore, S. N., Thorwirth, S., Urquhart, J. S., & Purcell, C. R.
-2007, MNRAS, 382, L35
-Wang, K., Zhang, Q., Wu, Y., Li, H.-b., & Zhang, H. 2012, ApJ, 745, L30
-Wilson, T. L., Batrla, W., & Pauls, T. A. 1982, A&A, 110, L20
-Wilson, T. L. & Henkel, C. 1988, A&A, 206, L26
-Wilson, T. L., Johnston, K. J., & Henkel, C. 1990, A&A, 229, L1
-Wilson, T. L. & Schilke, P. 1993, in Lecture Notes in Physics, Astrophysical
-Masers, ed. A. W. Clegg & G. E. Nedoluha, Vol. 412, 123–126
-Wyrowski, F., Güsten, R., Menten, K. M., Wiesemeyer, H., & Klein, B. 2012,
-A&A, 542, L15
-Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63
-Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117
-Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+region A. We suggest that the (9,6) masers may be connected to
+outflowing gas. Higher angular resolution JVLA and VLBI observations are planned to provide more accurate positions and
+constraints on pumping scenarios.
+Acknowledgements. We would like to thank the anonymous referee for the useful comments that improve the manuscript. Y.T.Y. is a member of the International Max Planck Research School (IMPRS) for Astronomy and Astrophysics
+at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China
+Scholarship Council (CSC) for its support. We would like to thank the staff at
+the Effelsberg for their help provided during the observations. We thank the staff
+of the JVLA, especially Tony Perreault and Edward Starr, for their assistance
+with the observations and data reduction. This research has made use of the
+NASA/IPAC Infrared Science Archive, which is funded by the National Aeronautics and Space Administration and operated by the California Institute of
+Technology.
+References
+Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MNRAS, 361, 623
+Bartkiewicz, A., Szymczak, M., & van Langevelde, H. J. 2016, A&A, 587, A104
+Benjamin, R. A., Churchwell, E., Babler, B. L., et al. 2003, PASP, 115, 953
+Beuther, H., Mottram, J. C., Ahmadi, A., et al. 2018, A&A, 617, A100
+Beuther, H., Walsh, A. J., Thorwirth, S., et al. 2007, A&A, 466, 989
+Brogan, C. L., Hunter, T. R., Cyganowski, C. J., et al. 2011, ApJ, 739, L16
+Brown, A. T., Little, L. T., MacDonald, G. H., Riley, P. W., & Matheson, D. N.
+1981, MNRAS, 195, 607
+Brown, R. D. & Cragg, D. M. 1991, ApJ, 378, 445
+Carrasco-González, C., Sanna, A., Rodríguez-Kamenetzky, A., et al. 2021, ApJ,
+914, L1
+Cesaroni, R., Walmsley, C. M., & Churchwell, E. 1992, A&A, 256, 618
+Cheung, A. C., Rank, D. M., Townes, C. H., Thornton, D. D., & Welch, W. J.
+1968, Phys. Rev. Lett., 21, 1701
+Churchwell, E., Babler, B. L., Meade, M. R., et al. 2009, PASP, 121, 213
+Cohen, R. J. & Brebner, G. C. 1985, MNRAS, 216, 51P
+Comito, C., Schilke, P., Endesfelder, U., Jiménez-Serra, I., & Martín-Pintado, J.
+2007, A&A, 469, 207
+Curiel, S., Ho, P. T. P., Patel, N. A., et al. 2006, ApJ, 638, 878
+Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988,
+MNRAS, 235, 229
+De Buizer, J. M., Liu, M., Tan, J. C., et al. 2017, ApJ, 843, 33
+De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598,
+1127
+Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M.
+2011, ApJ, 733, 71
+Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P
+Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036
+Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ,
+459, 193
+Garay, G., Rodriguez, L. F., & van Gorkom, J. H. 1986, ApJ, 309, 553
+Gaume, R. A., Wilson, T. L., & Johnston, K. J. 1996, ApJ, 457, L47
+Goddi, C., Greenhill, L. J., Humphreys, E. M. L., Chandler, C. J., & Matthews,
+L. D. 2011, ApJ, 739, L13
+Goddi, C., Henkel, C., Zhang, Q., Zapata, L., & Wilson, T. L. 2015, A&A, 573,
+A109
+Gómez, J. F., Sargent, A. I., Torrelles, J. M., et al. 1999, ApJ, 514, 287
+Güsten, R., Chini, R., & Neckel, T. 1984, A&A, 138, 205
+Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148
+Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90
+Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137
+Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239
+Hoffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83
+Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79
+Hughes, V. A. 1991, ApJ, 383, 280
+Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204
+Hunter, T. R., Brogan, C. L., Indebetouw, R., & Cyganowski, C. J. 2008, ApJ,
+680, 1271
+Imai, H., Omi, R., Kurayama, T., et al. 2011, PASJ, 63, 1293
+Keto, E. R., Ho, P. T. P., & Reid, M. J. 1987, ApJ, 323, L117
+Kraemer, K. E. & Jackson, J. M. 1995, ApJ, 439, L9
+Kuchar, T. A. & Bania, T. M. 1994, ApJ, 436, 117
+Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D.
+1986, ApJ, 300, L79
+Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33
+Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352
+Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13
+Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123
+McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189
+McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in Astronomical Society of the Pacific Conference Series, Vol. 376, Astronomical
+Data Analysis Software and Systems XVI, ed. R. A. Shaw, F. Hill, & D. J.
+Bell, 127
+Mei, Y., Chen, X., Shen, Z.-Q., & Li, B. 2020, ApJ, 898, 157
+Menten, K. M. 1991, ApJ, 380, L75
+Mills, E. A. C., Ginsburg, A., Clements, A. R., et al. 2018, ApJ, 869, L14
+Mookerjea, B., Casper, E., Mundy, L. G., & Looney, L. W. 2007, ApJ, 659, 447
+Moscadelli, L., Reid, M. J., Menten, K. M., et al. 2009, ApJ, 693, 406
+Narayanan, G. & Walker, C. K. 1996, ApJ, 466, 844
+Ott, M., Witzel, A., Quirrenbach, A., et al. 1994, A&A, 284, 331
+Patel, N. A., Curiel, S., Sridharan, T. K., et al. 2005, Nature, 437, 109
+Perley, R. A. & Butler, B. J. 2013, ApJS, 204, 19
+Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Casoli, T. Contini, J. M. Hameury, & L. Pagani, 721
+Poynter, R. L. & Kakar, R. K. 1975, ApJS, 29, 87
+Pratap, P., Menten, K. M., Reid, M. J., Moran, J. M., & Walmsley, C. M. 1991,
+ApJ, 373, L13
+Reid, M. J. & Ho, P. T. P. 1985, ApJ, 288, L17
+Rodriguez, L. F., Ho, P. T. P., & Moran, J. M. 1980, ApJ, 240, L149
+Sanna, A., Moscadelli, L., Surcis, G., et al. 2017, A&A, 603, A94
+Sewilo, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2004, ApJ,
+605, 285
+Sewiło, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2011, ApJS,
+194, 44
+Sobolev, A. M., Moran, J. M., Gray, M. D., et al. 2018, ApJ, 856, 60
+Sugiyama, K., Fujisawa, K., Doi, A., et al. 2008, PASJ, 60, 1001
+Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1998, ApJ, 509, 262
+Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1999, MNRAS, 307, 58
+Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1985, ApJ, 288, 595
+Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1986, ApJ, 305, 721
+Torrelles, J. M., Patel, N. A., Curiel, S., et al. 2011, MNRAS, 410, 627
+Torrelles, J. M., Verdes-Montenegro, L., Ho, P. T. P., Rodriguez, L. F., & Canto,
+J. 1993, ApJ, 410, 202
+Towner, A. P. M., Brogan, C. L., Hunter, T. R., & Cyganowski, C. J. 2021, ApJ,
+923, 263
+Umemoto, T., Mikami, H., Yamamoto, S., & Hirano, N. 1999, ApJ, 525, L105
+Urquhart, J. S., Morgan, L. K., Figura, C. C., et al. 2011, MNRAS, 418, 1689
+Walmsley, C. M. & Ungerechts, H. 1983, A&A, 122, 164
+Walsh, A. J., Breen, S. L., Britton, T., et al. 2011, MNRAS, 416, 1764
+Walsh, A. J., Longmore, S. N., Thorwirth, S., Urquhart, J. S., & Purcell, C. R.
+2007, MNRAS, 382, L35
+Wang, K., Zhang, Q., Wu, Y., Li, H.-b., & Zhang, H. 2012, ApJ, 745, L30
+Wilson, T. L., Batrla, W., & Pauls, T. A. 1982, A&A, 110, L20
+Wilson, T. L. & Henkel, C. 1988, A&A, 206, L26
+Wilson, T. L., Johnston, K. J., & Henkel, C. 1990, A&A, 229, L1
+Wilson, T. L. & Schilke, P. 1993, in Lecture Notes in Physics, Astrophysical
+Masers, ed. A. W. Clegg & G. E. Nedoluha, Vol. 412, 123–126
+Wyrowski, F., Güsten, R., Menten, K. M., Wiesemeyer, H., & Klein, B. 2012,
+A&A, 542, L15
+Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63
+Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117
+Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192
 Article number, page 7 of 10
-A&A proofs: manuscript no. mainArxiv
-Appendix A:
-Table A.1. Summary of NH3 (9, 6) maser observations.
-Source Telescope Beam Epoch Channel S ν rms R
-S νdv VLSR ∆V1/2
-size spacing
-(km s−1
-) (Jy) (mJy) (Jy km s−1
-) (km s−1
-)
-Cep A Effelsberg 4900 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04
-Effelsberg 4900 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04
-Effelsberg 4900 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05
-JVLAa 1
-00
-.47 × 0
-00
-.99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12
-Effelsberg 4900 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01
-0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05
-Effelsberg 4900 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07
-0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07
-0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07
-0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07
-0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07
-G34.26+0.15 Effelsberg 4900 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13
-Effelsberg 4900 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12
-Effelsberg 4900 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14
-JVLAb 1
-00
-.33 × 1
-00
-.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09
-0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12
-0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08
-Effelsberg 4900 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12
-0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09
-0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06
-Effelsberg 4900 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05
-Notes. The spectral parameters are obtained from Gaussian fitting. (a) The JVLA spectrum toward Cep A is extracted from the Effelsberg-beam￾sized region (FWHM 4900). (b) For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 300
-.5, which contains all
-detected NH3 (9,6) emissions.
-Table A.2. 1.36 cm JVLA flux densities of individual continuum sources.
-Source R.A. Dec. Size P.A. S ν
-(h m s) (◦ 0 00) (arcsec) (deg) (mJy)
-Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4
-HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74
-HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36
-HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7
-HW3d 22 56 18.195 ± 0.005 +62 01 46.325 ± 0.014 (1.26 ± 0.12) × (0.30 ± 0.19) 102.5 13.75 ± 0.92
-HW9 22 56 18.626 ± 0.014 +62 01 47.851 ± 0.137 (1.53 ± 0.51) × (0.29 ± 0.30) 28.0 3.26 ± 0.78
-G34.26+0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33
-B 18 53 18.649 ± 0.005 +01 15 00.071 ± 0.180 (2.31 ± 0.49) × (0.85 ± 0.21) 17.4 597 ± 110
-C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) 178.0 5070 ± 660
+A&A proofs: manuscript no. mainArxiv
+Appendix A:
+Table A.1. Summary of NH3 (9, 6) maser observations.
+Source Telescope Beam Epoch Channel S ν rms RS νdv VLSR ∆V1/2
+size spacing
+(km s−1) (Jy) (mJy) (Jy km s−1) (km s−1)
+Cep A Effelsberg 4900 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04
+Effelsberg 4900 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04
+Effelsberg 4900 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05
+JVLAa 1
+00
+.47 × 0
+00
+.99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12
+Effelsberg 4900 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01
+0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05
+Effelsberg 4900 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07
+0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07
+0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07
+0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07
+0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07
+G34.26+0.15 Effelsberg 4900 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13
+Effelsberg 4900 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12
+Effelsberg 4900 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14
+JVLAb 1
+00
+.33 × 1
+00
+.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09
+0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12
+0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08
+Effelsberg 4900 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12
+0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09
+0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06
+Effelsberg 4900 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05
+Notes. The spectral parameters are obtained from Gaussian fitting. (a) The JVLA spectrum toward Cep A is extracted from the Effelsberg-beamsized region (FWHM 4900). (b) For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 300
+.5, which contains all
+detected NH3 (9,6) emissions.
+Table A.2. 1.36 cm JVLA flux densities of individual continuum sources.
+Source R.A. Dec. Size P.A. S ν
+(h m s) (◦ 0 00) (arcsec) (deg) (mJy)
+Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4
+HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74
+HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36
+HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7
+HW3d 22 56 18.195 ± 0.005 +62 01 46.325 ± 0.014 (1.26 ± 0.12) × (0.30 ± 0.19) 102.5 13.75 ± 0.92
+HW9 22 56 18.626 ± 0.014 +62 01 47.851 ± 0.137 (1.53 ± 0.51) × (0.29 ± 0.30) 28.0 3.26 ± 0.78
+G34.26+0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33
+B 18 53 18.649 ± 0.005 +01 15 00.071 ± 0.180 (2.31 ± 0.49) × (0.85 ± 0.21) 17.4 597 ± 110
+C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) 178.0 5070 ± 660
 Article number, page 8 of 10
-Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-Table A.3. NH3 (9,6) maser positions derived from the JVLA observations.
-Source R.A. Dec. S ν TMB VLSR ∆V1/2
-(h m s) (◦ 0 00) (mJy beam−1
-) (K) (km s−1
-)
-Cep A M 22 56 17.933 ± 0.002 +62 01 49.608 ± 0.011 985.2 2464.8 -0.88 ± 0.01 0.51 ± 0.02
-343.2 829.5 -0.24 ± 0.03 0.63 ± 0.05
-G34.26+0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14
-M2 18 53 18.696 ± 0.002 +01 14 55.807 ± 0.034 48.4 122.4 53.77 ± 0.05 0.35 ± 0.08
-57.8 146.2 54.35 ± 0.07 0.83 ± 0.14
-180.8 457.6 55.83 ± 0.01 0.59 ± 0.03
-M3 18 53 18.667 ± 0.005 +01 14 55.348 ± 0.066 78.1 197.2 54.22 ± 0.04 0.94 ± 0.08
-73.7 186.3 55.78 ± 0.04 0.79 ± 0.08
-Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000 = 22h56m17s
-.972,
-and δJ2000 = 62◦0104900
-.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse
-denoting the position of the NH3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array
-(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam−1
-.
-OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares,
-respectively. The color bar on the right-hand side indicates the velocity range (VLSR) of maser spots.
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+Table A.3. NH3 (9,6) maser positions derived from the JVLA observations.
+Source R.A. Dec. S ν TMB VLSR ∆V1/2
+(h m s) (◦ 0 00) (mJy beam−1) (K) (km s−1)
+Cep A M 22 56 17.933 ± 0.002 +62 01 49.608 ± 0.011 985.2 2464.8 -0.88 ± 0.01 0.51 ± 0.02
+343.2 829.5 -0.24 ± 0.03 0.63 ± 0.05
+G34.26+0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14
+M2 18 53 18.696 ± 0.002 +01 14 55.807 ± 0.034 48.4 122.4 53.77 ± 0.05 0.35 ± 0.08
+57.8 146.2 54.35 ± 0.07 0.83 ± 0.14
+180.8 457.6 55.83 ± 0.01 0.59 ± 0.03
+M3 18 53 18.667 ± 0.005 +01 14 55.348 ± 0.066 78.1 197.2 54.22 ± 0.04 0.94 ± 0.08
+73.7 186.3 55.78 ± 0.04 0.79 ± 0.08
+Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000 = 22h56m17s.972,
+and δJ2000 = 62◦0104900.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse
+denoting the position of the NH3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array
+(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam−1.
+OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares,
+respectively. The color bar on the right-hand side indicates the velocity range (VLSR) of maser spots.
 Article number, page 9 of 10
-A&A proofs: manuscript no. mainArxiv
-Fig. A.2. 1.36 cm JVLA continuum map of G34.26+0.15 presented as gray shaded areas. The reference position is αJ2000 = 18h53m18s
-.560, and
-δJ2000 = 01◦1405800
-.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH3 (9,6) emission with stars at their
-center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from
-Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 × 20 mJy beam−1
-. OH (Zheng et al. 2000), H2O (Imai
-et al. 2011), and CH3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates
-the velocity range (VLSR) of maser spots.
-Article number, page 10 of 10
+A&A proofs: manuscript no. mainArxiv
+Fig. A.2. 1.36 cm JVLA continuum map of G34.26+0.15 presented as gray shaded areas. The reference position is αJ2000 = 18h53m18s.560, and
+δJ2000 = 01◦1405800.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH3 (9,6) emission with stars at their
+center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from
+Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 × 20 mJy beam−1. OH (Zheng et al. 2000), H2O (Imai
+et al. 2011), and CH3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates
+the velocity range (VLSR) of maser spots.
+Article number, page 10 of 
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00022.txt b/read/results/pdfium/2201.00022.txt
index 7fbd35c..49255d3 100644
--- a/read/results/pdfium/2201.00022.txt
+++ b/read/results/pdfium/2201.00022.txt
@@ -1,849 +1,1098 @@
-Draft version January 4, 2022
-Typeset using LATEX twocolumn style in AASTeX631
-The Formation of Intermediate Mass Black Holes in Galactic Nuclei
-Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3
-1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA
-2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA
-3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel
-ABSTRACT
-Most stellar evolution models predict that black holes (BHs) should not exist above approximately
-50−70 M. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and
-above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs),
-can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding
-main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax￾ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M. Our
-results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for￾mation channel also has implications for observations. Collisions between stars and BHs can produce
-electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally,
-formed through this channel, both black holes in the mass gap and IMBHs can merge with the super￾massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational
-wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively).
-1. INTRODUCTION
-The recently detected gravitational wave source
-GW190521 (The LIGO Scientific Collaboration et al.
-2020a,b) produced an intermediate mass black hole of
-approximately 142 M. This event may have also had a
-85 M progenitor, which falls within the pair-instability
-mass gap that limits stellar black holes (BHs) to no
-more than ∼
-< 50 M (e.g., Heger et al. 2003; Woosley
-2017)
-1
-. Similarly, the merger products of GW150914,
-GW170104, and GW170814 fall within the mass gap
-(e.g., Abbott et al. 2016, 2017a,b). BH mergers that
-form second generation BHs and, in some cases, inter￾mediate mass BHs (IMBHs), these gravitational wave
-(GW) events can occur in globular clusters, young stel￾lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro￾driguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
-2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
-2021; Arca Sedda et al. 2021). However, IMBHs are
-not limited to these locations and may reside in galac￾Corresponding author: Sanaea C. Rose
-srose@astro.ucla.edu
-1 Note that the exact lower and upper limits may be sensitive to
-metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
-2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski
-et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
-tic nuclei as well. Several studies propose that our
-own galactic center may host an IMBH in the inner pc
-(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004;
-G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
-& Liu 2013; Generozov & Madigan 2020; Fragione et al.
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-Collaboration et al. 2020).
-Several IMBH formation channels have been suggested
-in the literature. For example, IMBHs may have a cos￾mological origin, forming in the early universe either
-as a result of the very first stars (e.g., Madau & Rees
-2001; Schneider et al. 2002; Johnson & Bromm 2007;
-Valiante et al. 2016) or from direct collapse of accumu￾lated gas (e.g., Begelman et al. 2006; Yue et al. 2014;
-Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
-2016). These high redshift IMBHs would need to sur￾vive galaxy evolution and mergers to present day (e.g.,
-Rashkov & Madau 2014), with significant effects on their
-stellar and even dark matter surroundings (e.g., Bertone
-et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
-et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
-popular formation channel relies on the coalescence of
-many stellar-mass black holes. For example, IMBHs
-may form in the centers of globular clusters, where few￾body interactions lead to the merger of stellar-mass BHs
-(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha
-et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro￾arXiv:2201.00022v1 [astro-ph.GA] 31 Dec 2021
-2 Rose et al.
-driguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
-2020b). Other formation mechanisms invoke successive
-collisions and mergers of massive stars (e.g., Portegies
-Zwart & McMillan 2002; Portegies Zwart et al. 2004;
-Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al.
-2021; Di Carlo et al. 2021).
-The main obstacle to sequential BH mergers in clus￾ters is that the merger recoil velocity kick often exceeds
-the escape velocity from the cluster (e.g., Schnittman
-& Buonanno 2007; Centrella et al. 2010; O’Leary et al.
-2006; Baibhav et al. 2020, Rom & Sari, in prep.). How￾ever, nuclear star clusters at the centers of galaxies do
-not encounter this problem. For example, Fragione et al.
-(2021) explore repeated BH-BH mergers in nuclear star
-clusters without a SMBH. They considered BH binary￾single interactions, binary BH GW merger, and GW
-merger recoil kicks. The post-kick merger product sinks
-back towards the cluster center over a dynamical fric￾tion timescale. Using this approach, they showed that
-103 − 104 M IMBHs can form efficiently over the life￾time of a cluster.
-However, as discussed in Section 2.2, direct star-BH
-collisions are much more frequent than BH-BH collision
-in galactic nuclei, making the former a promising chan￾nel for BH growth. We propose that IMBHs can form
-naturally within the central pc of a SMBH in a galactic
-center. Specifically, these IMBHs form through repeated
-collisions with main sequence stars, accreting some or
-all of the star’s mass depending on the details of the
-collision. We demonstrate that this channel can create
-IMBHs with masses as large as 104 M, depending on
-the density profile of the surrounding stars.
-The paper is structured as follows: we describe rele￾vant physical processes and our approach in Section 2.
-In particular, we provide an overview of collisions in
-Section 2.2 and present our statistical approach in Sec￾tion 2.3. Section 2.4 discusses our treatment of the
-mass growth with each collision and presents analytic
-solutions to our equations in two different regimes, ef￾ficient collisions and inefficient collisions We compare
-these solutions to our statistical results. Sections 2.5
-and 2.7 discuss implications for GW merger events be￾tween IMBHs and the SMBH. We then incorporate re￾laxation processes and discuss the subsequent results in
-Section 2.8. Finally, we discuss and summarize our find￾ings in Section 3.
-2. METHODOLOGY
-We consider a population of stellar mass BHs embed￾ded in a cluster of 1 M stars. When stars and BHs
-collide, the BHs can accrete mass. The growth rate de￾pends on the physical processes outlined below. We use
-a statistical approach to estimate the stellar encounters
-and final IMBH masses.
-2.1. Physical Picture
-We consider a population of BHs within the inner few
-parsecs of the SMBH in a galactic nucleus (GN). We as￾sume that the BH mass distribution follows that of the
-stars from which they originate, a Kroupa initial mass
-function dN/dm ∝ m−2.35. While this choice represents
-a gross oversimplification, it has very little bearing on
-our final results. Future work may address the particu￾lars of the BH mass distribution, but we do not expect
-that it will significantly alter the outcome. The upper
-and lower limits of the BH mass distribution are 5 and
-50 M, respectively. We select the upper limit to en￾compass the range of upper bounds predicted by stellar
-evolution models, which vary between 40 and 125 M
-depending on the metallicity (Heger et al. 2003; Woosley
-2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b;
-Belczynski et al. 2020b; Renzo et al. 2020). We assume
-that the orbits of the BHs follow a thermal eccentricity
-distribution. We draw their semimajor axes, a•, from a
-uniform distribution in log distance, dN/d(log r) being
-constant. While this distribution is not necessarily rep￾resentative of actual conditions in the GN, we use it to
-build a comprehensive physical picture of BH growth at
-all distances from the SMBH, including within 0.01 pc.
-Otherwise, the innermost region of the GN would be
-poorly represented in our sample. We consider other
-observationally motivated distributions in Section 2.8,
-but reserve a more detailed examination of the distribu￾tion’s impact for future work.
-2.2. Direct Collisions
-BHs in the GN can undergo direct collisions with other
-objects. The timescale for this process, tcoll, can be es￾timated using a simple rate calculation: t
-−1
-coll = nσA,
-where n is the number density of objects, σ is the ve￾locity dispersion, and A is the cross-section. We use the
-collision timescale from Rose et al. (2020):
-t
-−1
-coll =πn(a•)σ(a•)
-×
-
-f1(e•)r
-2
-c + f2(e•)rc
-2G(mBH + m?)
-σ(a•)
-2
-
-. (1)
-where G is the gravitational constant and rc is the sum
-of the radii of the interacting objects, a black hole with
-mass mBH and a star with mass m?. Detailed in Rose
-et al. (2020), f1(e•) and f2(e•) account for the effect of
-the eccentricity of the BH’s orbit about the SMBH on
-the collision rate, while n and σ are simply evaluated
+Draft version July 7, 2022
+Typeset using LATEX twocolumn style in AASTeX631
+The Formation of Intermediate Mass Black Holes in Galactic Nuclei
+Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3
+1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA
+2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA
+3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel
+ABSTRACT
+Most stellar evolution models predict that black holes (BHs) should not exist above approximately
+50 − 70 M, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
+indicate the existence of BHs with masses at and above this threshold. We suggest that massive
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
+processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite
+efficient, forming IMBHs as massive as 104 M. This upper limit assumes that (1) the BHs accrete a
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
+new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar
+disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs,
+respectively).
+1. INTRODUCTION
+The recently detected gravitational wave source
+GW190521 (The LIGO Scientific Collaboration et al.
+2020a,b) produced an intermediate mass black hole of
+approximately 142 M. This event may have also had a
+85 M progenitor, which falls within the pair-instability
+mass gap that limits stellar black holes (BHs) to no
+more than ∼
+< 50 M (e.g., Heger et al. 2003; Woosley
+2017)
+1
+. Similarly, the merger products of GW150914,
+GW170104, and GW170814 fall within the mass gap
+(e.g., Abbott et al. 2016, 2017a,b). BH mergers that
+form second generation BHs and, in some cases, intermediate mass BHs (IMBHs), these gravitational wave
+(GW) events can occur in globular clusters, young stelCorresponding author: Sanaea C. Rose
+srose@astro.ucla.edu
+1 Note that the exact lower and upper limits may be sensitive to
+metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
+2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski
+et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
+lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
+2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
+2021; Arca Sedda et al. 2021). However, IMBHs are
+not limited to these locations and may reside in galactic nuclei as well. Several studies propose that our
+own galactic center may host an IMBH in the inner pc
+(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004;
+G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
+& Liu 2013; Generozov & Madigan 2020; Fragione et al.
+2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
+Collaboration et al. 2020).
+Several IMBH formation channels have been suggested
+in the literature. For example, IMBHs may have a cosmological origin, forming in the early universe either
+as a result of the very first stars (e.g., Madau & Rees
+2001; Schneider et al. 2002; Johnson & Bromm 2007;
+Valiante et al. 2016) or from direct collapse of accumulated gas (e.g., Begelman et al. 2006; Yue et al. 2014;
+Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
+2016). These high redshift IMBHs would need to survive galaxy evolution and mergers to present day (e.g.,
+arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022
+ Rose et al.
+Rashkov & Madau 2014), with significant effects on their
+stellar and even dark matter surroundings (e.g., Bertone
+et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
+et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
+popular formation channel relies on the coalescence of
+many stellar-mass black holes, which may seed objects
+as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs
+may form in the centers of globular clusters, where fewbody interactions lead to the merger of stellar-mass BHs
+(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha
+et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
+2020b). Other formation mechanisms invoke successive
+collisions and mergers of massive stars (e.g., Ebisuzaki
+et al. 2001; Portegies Zwart & McMillan 2002; Portegies
+Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017;
+Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al.
+2021; Das et al. 2021a,b; Escala 2021).
+The main obstacle to sequential BH mergers in clusters is that the merger recoil velocity kick often exceeds
+the escape velocity from the cluster (e.g., Schnittman
+& Buonanno 2007; Centrella et al. 2010; O’Leary et al.
+2006; Baibhav et al. 2020, Rom & Sari, in prep.). However, nuclear star clusters at the centers of galaxies do
+not encounter this problem. For example, Fragione et al.
+(2021) explore repeated BH-BH mergers in nuclear star
+clusters without a SMBH. They considered BH binarysingle interactions, binary BH GW merger, and GW
+merger recoil kicks. The post-kick merger product sinks
+back towards the cluster center over a dynamical friction timescale. Using this approach, they showed that
+103 − 104 M IMBHs can form efficiently over the lifetime of a cluster.
+However, as discussed in Section 2.2, direct BH-star
+collisions are much more frequent than BH-BH collision
+in galactic nuclei, making the former a promising channel for BH growth. In an N-body study of young star
+clusters, Rizzuto et al. (2022) find that BH-star collisions are a main contributor to the formation of BHs
+in the mass gap and IMBHs. In a similar vein, Stone
+et al. (2017) demonstrate that massive BHs can form
+from repeated tidal encounters between stars and BHs.
+More generally, several studies have explored the role of
+collisions in a GN, with implications for the stellar and
+red giant populations (e.g., Dale & Davies 2006; Dale
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+et al. 2021). We propose that IMBHs can form naturally
+within the central pc of a galactic center through repeated collisions between BHs and main sequence stars.
+During a collision, the BH can accrete some portion of
+the star’s mass. Over many collisions, it can grow appreciably in size. We demonstrate that this channel can
+create IMBHs with masses as large as 104 M, an upper
+limit that depends on the density profile of the surrounding stars and the efficiency of the accretion.
+The paper is structured as follows: we describe relevant physical processes and our approach in Section 2.
+In particular, we provide an overview of collisions in
+Section 2.2 and present our statistical approach in Section 2.3. Section 2.4 discusses our treatment of the
+mass growth with each collision and presents analytic
+solutions to our equations in two different regimes, efficient collisions and inefficient collisions We compare
+these solutions to our statistical results. Sections 2.6
+and 2.8 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in
+Section 2.9. Finally, we discuss and summarize our findings in Section 3.
+2. METHODOLOGY
+We consider a population of stellar mass BHs embedded in a cluster of 1 M stars. When stars and BHs
+collide, the BHs can accrete mass. The growth rate depends on the physical processes outlined below. We use
+a statistical approach to estimate the stellar encounters
+and final IMBH masses.
+2.1. Physical Picture
+We consider a population of BHs within the inner few
+parsecs of the SMBH in a galactic nucleus (GN). We assume that the BH mass distribution follows that of the
+stars from which they originate, a Kroupa initial mass
+function dN/dm ∝ m−2.35. While this choice represents
+a gross oversimplification, it has very little bearing on
+our final results. Future work may address the particulars of the BH mass distribution, but we do not expect
+that it will significantly alter the outcome. The upper
+and lower limits of the BH mass distribution are 5 and
+50 M, respectively. We select the upper limit to encompass the range of upper bounds predicted by stellar
+evolution models, which vary between 40 and 125 M
+depending on the metallicity (Heger et al. 2003; Woosley
+2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b;
+Belczynski et al. 2020b; Renzo et al. 2020). We assume
+that the orbits of the BHs follow a thermal eccentricity
+distribution. We draw their semimajor axes, a•, from a
+uniform distribution in log distance, dN/d(log r) being
+constant. While this distribution is not necessarily representative of actual conditions in the GN, we use it to
+build a comprehensive physical picture of BH growth at
+all distances from the SMBH, including within 0.01 pc.
+Otherwise, the innermost region of the GN would be
+poorly represented in our sample. We consider other
+IMBH Formation in Galactic Nuclei 3
+Figure 1. We plot the relevant timescales, including collision (green), relaxation (gold), and BH-BH GW capture
+(purple), for a single BH in the GN as a function of distance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit. The timescales depend on the
+density, so we adopt a range of density profiles, bounded by
+α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
+blue line represents the time for a 105 M BH to merge with
+the SMBH through GW emission.
+observationally motivated distributions in Section 2.9,
+but reserve a more detailed examination of the distribution’s impact for future work.
+2.2. Direct Collisions
+BHs in the GN can undergo direct collisions with other
+objects. The timescale for this process, tcoll, can be estimated using a simple rate calculation: t
+−1
+coll = nσA,
+where n is the number density of objects, σ is the velocity dispersion, and A is the cross-section. We use the
+collision timescale from Rose et al. (2020):
+t
+−1
+coll =πn(a•)σ(a•)
+×
+
+f1(e•)r
+2
+c + f2(e•)rc
+2G(mBH + m?)
+σ(a•)
+2
+
+. (1)
+where G is the gravitational constant and rc is the sum
+of the radii of the interacting objects, a black hole with
+mass mBH and a star with mass m?. Detailed in Rose
+et al. (2020), f1(e•) and f2(e•) account for the effect of
+the eccentricity of the BH’s orbit about the SMBH on
+the collision rate, while n and σ are simply evaluated
 at the semimajor axis of the orbit (see below). Note
-IMBH Formation in Galactic Nuclei 3
-Figure 1. We plot the relevant timescales, including col￾lision (green), relaxation (gold), and BH-BH GW capture
-(purple), for a single BH in the GN as a function of distance
-from the SMBH. For the collision timescale, we assume the
-BH is on a circular orbit. The timescales depend on the
-density, so we adopt a range of density profiles, bounded by
-α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
-blue line represents the time for a 105 M BH to merge with
-the SMBH through GW emission.
-that this timescale equation includes the effects of grav￾itational focusing, which enhances the cross-section of
-interaction.
-Assuming a circular orbit for simplicity, we plot the
-timescale for a BH orbiting in the GN to collide with
-a 1 M star as a function of distance from the SMBH
-in Figure 1.
-2 As this timescale depends on the density
-of surrounding stars, we adopt a density profile of the
-form:
-ρ(r•) = ρ0
-
-r•
-r0
-−α
-, (2)
-where r• denotes the distance from the SMBH. We adopt
-a SMBH mass of 4 × 106 M such that our fiducial GN
-matches our own galactic center (e.g., Ghez et al. 2005;
-Genzel et al. 2003). In this case, the normalization in
-Eq. (2) is ρ0 = 1.35×106 M/pc3 at r0 = 0.25 pc (Gen￾zel et al. 2010). Additionally, in Eq. (2), α gives the
-slope of the power law. We assume that a uniform pop￾ulation of solar mass stars account for most of the mass
-in the GN, making the stellar number density:
-n(r•) = ρ(r•)
-1 M
-. (3)
-2 We note that the eccentricity has a very minor effect on the
-collision timescale (Rose et al. 2020).
-The collision timescale also depends on the velocity dis￾persion, which we express as:
-σ(r•) = s
-GM•
-r•(1 + α)
-, (4)
-where α is the slope of the density profile and M• de￾notes the mass of the SMBH (Alexander 1999; Alexan￾der & Pfuhl 2014). As mentioned above, Eq. (1) depends
-on the sum of the radii of the colliding objects, rc. We
-take rc = 1 R because these interactions involve a BH
-and a star, and the former has a much smaller physi￾cal cross-section. For example, the Schwarzschild radius
-of a 10 M BH is only 30 km, or 4.31 × 10−5 R. For
-this reason, direct collisions between compact objects
-are very rare and not included in our model.
-We note that direct collisions between BHs, via GW
-emission, were shown to be efficient in nuclear star clus￾ters without SMBHs (e.g., Portegies Zwart & McMil￾lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016).
-However, in the GN, star-BH collisions are much more
-frequent than direct BH-BH collisions. As depicted in
-Figure 1, the star-BH collision timescale for a range
-of density profiles is many orders of magnitude shorter
-than the BH-BH GW collision timescale (for the rele￾vant equations, see O’Leary et al. 2009; Gond´an et al.
-2018, for example). Thus, we expect that star-BH col￾lisions will be the main driver of IMBH growth in the
-GN.
-2.3. Statistical Approach to Collisions
-We simulate the mass growth of a population of BHs
-with initial conditions detailed in Section 2.1. Over an
-increment ∆t of 106 yr, we calculate the probability of
-a collision occurring, given by ∆t/tcoll. This choice of
-∆t is motivated by our galactic center’s star formation
-timescale (e.g., Lu et al. 2009), allowing for regular re￾plenishment of the stellar population in the GN. We have
-checked that the results are not sensitive to this choice
-of ∆t, omitted here to avoid clutter. We draw a number
-between 0 and 1 using a random number generator. If
-that number is less than or equal to the probability, we
-increase the BH’s mass by ∆m, the mass that the BH is
-expected to accrete in a single collision (see Section 2.4
-for details). We recalculate the collision timescale using
-the updated BH mass and repeat this process until the
-time elapsed equals the simulation time of 10 Gyr3
-.
-3 Closer to the SMBH, ∆t may exceed the collision timescale by
-a factor of a few for steep density profiles. We include a safe￾guard in our code which takes the ratio tcoll/∆t and rounds it
-to the nearest integer. We take this integer to be the number of
+that this timescale equation includes the effects of gravitational focusing, which enhances the cross-section of
+interaction.
+Assuming a circular orbit for simplicity, we plot the
+timescale for a BH orbiting in the GN to collide with
+a 1 M star as a function of distance from the SMBH
+in Figure 1.
+2 As this timescale depends on the density
+of surrounding stars, we adopt a density profile of the
+form:
+ρ(r•) = ρ0
+
+r•
+r0
+−α
+, (2)
+where r• denotes the distance from the SMBH. We adopt
+a SMBH mass of 4 × 106 M such that our fiducial GN
+matches our own galactic center (e.g., Ghez et al. 2005;
+Genzel et al. 2003). In this case, the normalization in
+Eq. (2) is ρ0 = 1.35×106 M/pc3 at r0 = 0.25 pc (Genzel et al. 2010). Additionally, in Eq. (2), α gives the
+slope of the power law. We assume that a uniform population of solar mass stars account for most of the mass
+in the GN, making the stellar number density:
+n(r•) = ρ(r•)
+1 M
+. (3)
+The collision timescale also depends on the velocity dispersion, which we express as:
+σ(r•) = s
+GM•
+r•(1 + α)
+, (4)
+where α is the slope of the density profile and M• denotes the mass of the SMBH (Alexander 1999; Alexander & Pfuhl 2014). As mentioned above, Eq. (1) depends
+on the sum of the radii of the colliding objects, rc. We
+take rc = 1 R because these interactions involve a BH
+and a star, and the former has a much smaller physical cross-section. For example, the Schwarzschild radius
+of a 10 M BH is only 30 km, or 4.31 × 10−5 R. For
+this reason, direct collisions between compact objects
+are very rare and not included in our model.
+We note that direct collisions between BHs, via GW
+emission, were shown to be efficient in nuclear star clusters without SMBHs (e.g., Portegies Zwart & McMillan 2000; O’Leary et al. 2006; Rodriguez et al. 2016).
+However, in the GN, star-BH collisions are much more
+frequent than direct BH-BH collisions. As depicted in
+Figure 1, the star-BH collision timescale for a range
+of density profiles is many orders of magnitude shorter
+than the BH-BH GW collision timescale (for the relevant equations, see O’Leary et al. 2009; Gond´an et al.
+2018, for example). Thus, we expect that star-BH collisions will be the main driver of IMBH growth in the
+GN.
+2 We note that the eccentricity has a very minor effect on the
+collision timescale (Rose et al. 2020).
+ Rose et al.
+2.3. Statistical Approach to Collisions
+We simulate the mass growth of a population of BHs
+with initial conditions detailed in Section 2.1. Over an
+increment ∆t of 106 yr, we calculate the probability of
+a collision occurring, given by ∆t/tcoll. This choice of
+∆t is motivated by our galactic center’s star formation
+timescale (e.g., Lu et al. 2009), allowing for regular replenishment of the stellar population in the GN. We have
+checked that the results are not sensitive to this choice
+of ∆t, omitted here to avoid clutter. We draw a number
+between 0 and 1 using a random number generator. If
+that number is less than or equal to the probability, we
+increase the BH’s mass by ∆m, the mass that the BH is
+expected to accrete in a single collision (see Section 2.4
+for details). We recalculate the collision timescale using
+the updated BH mass and repeat this process until the
+time elapsed equals the simulation time of 10 Gyr3.
+2.4. Mass Growth
+When a BH collides with a star, it may accrete material and grow in mass. The details of the accretion
+depend on the relative velocity between the BH and
+star. For simplicity, this calculation assumes that the
+two objects experience a head on collision, with the BH
+passing through the star’s center. We begin by considering the escape velocity from the BH at the star’s
+outermost point, its surface, which corresponds to the
+maximum impact parameter 1 R. Qualitatively, one
+might expect that the BH could capture the entire star
+(i.e., ∆m ∼ 1 M) if the relative velocity is smaller than
+the escape velocity from the BH at this point. However,
+in the vicinity of the SMBH, the dispersion velocity of
+the stars may be much larger than the escape velocity
+from the BH at the star’s surface. In this case, the BH
+captures a “tunnel” of material through the star. This
+tunnel has radius equal to the Bondi radius and length
+approximately 1 R. For the purposes of this study, we
+assume that the BH accretes all of the material that
+it captures. The details of the accretion are uncertain,
+however, and it may be much less efficient than our results imply. We discuss accretion in Section 2.5.
+To estimate ∆m, we begin with the Bondi-Hoyle accretion rate, ˙m, given by:
+m˙ =
+4πG2m2
+BHρstar
+(c
+2
+s + σ
+2)
+3/2
+, (5)
+3 Closer to the SMBH, ∆t may exceed the collision timescale by
+a factor of a few for steep density profiles. We include a safeguard in our code which takes the ratio tcoll/∆t and rounds it
+to the nearest integer. We take this integer to be the number of
 collisions and increase the BH mass accordingly.
-4 Rose et al.
-2.4. Mass Growth
-When a BH collides with a star, it may accrete ma￾terial and grow in mass. The details of the accretion
-depend on the relative velocity between the BH and
-star. For simplicity, this calculation assumes that the
-two objects experience a head on collision, with the BH
-passing through the star’s center. We begin by con￾sidering the escape velocity from the BH at the star’s
-outermost point, its surface, which corresponds to the
-maximum impact parameter 1 R. Qualitatively, one
-might expect that the BH could accrete the entire star
-(i.e., ∆m ∼ 1 M) if the relative velocity is smaller than
-the escape velocity from the BH at this point. However,
-in the vicinity of the SMBH, the dispersion velocity of
-the stars may be much larger than the escape velocity
-from the BH at the star’s surface. In this case, the BH
-accretes a “tunnel” of material through the star. This
-tunnel has radius equal to the Bondi radius and length
-approximately 1 R.
-To estimate ∆m, we begin with the Bondi-Hoyle ac￾cretion rate, ˙m, given by:
-m˙ =
-4πG2m2
-BHρstar
-(c
-2
-s + σ
-2)
-3/2
-, (5)
-where cs is the speed of sound in the star and ρstar is its
-density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
-et al. 1985; Edgar 2004, see latter for a review). We
-approximate the density as 1 M/(4πR3
-/3) and take
-the conservative value of cs = 500 km s−1
-, which is
-consistent with the sound speed inside a 1 M star
-(Christensen-Dalsgaard et al. 1996) and allows us to set
-a lower limit on ∆m. To find ∆m, at each collision, we
-have:
-∆m = min( ˙m × t?,cross, 1 M) , (6)
-where t?,cross ∼ R/σ is the crossing time of the BH in
-the star. We take the minimum between ˙m×t?,cross and
-1 M because the BH cannot accrete more mass than
-one star at each collision.
-Figure 2 juxtaposes the expected growth using Bondi￾Hoyle-Lyttleton accretion (blue small points) with a
-much simpler model in which the BH accretes the star’s
-entire mass, 1 M (red large points). Both examples
-start with identical populations of 10 M BHs (grey)
-and simulate growth through collisions using a statisti￾cal approach. As the BHs grow, the collision timescale,
-which depends on mBH, decreases. Simultaneously,
-∆m, which also depends on mBH, increases. The re￾sult is exponential growth (see discussion and details
-surrounding Eq. (8)). In Figure 2, however, the simula￾tions assume α = 1 for the stellar density profile, ensur￾ing the collision timescale is long compared to the sim￾ulation time, 10 Gyr. Therefore, the BHs grow slowly,
-Figure 2. We consider an example that highlights the mass
-growth as a function of distance from the SMBH. Grey dots
-represent the initial masses and distances from the SMBH
-of the BHs involved in the simulation. For simplicity, we set
-the inital mass equal to 10 M for all of the BHs. Assuming
-the density profile of stars has α = 1, we consider two cases:
-BHs accrete all of the star’s mass during a collision (red) and
-only a portion of the star’s mass is accreted during a collision
-given by Eq. 6 (blue). The latter case results in less growth
-closer to the SMBH where the velocity dispersion becomes
-high. The shaded regions and dashed lines represent the
-analytical predictions detailed in Section 2.4.
-and their final masses can be approximated using the
-following equation:
-mfinal(tcoll → const.) = minitial + ∆m
-T
-tcoll
-, (7)
-in which T represents the simulation time and ∆m and
-tcoll remain constant, approximated as their initial val￾ues.
-This equation is plotted in Figure 2 for both cases,
-∆m = 1 M (red) and ∆m from Bondi-Hoyle-Lyttleton
-accretion (blue), and the curves coincide with the cor￾responding simulated results. The shaded regions rep￾resent one standard deviation from Eq. (7), calculated
-using the square root of the number of collisions, T /tcoll.
-As indicated by the results in red, in the absence of
-Bondi-Hoyle-Lyttleton accretion, the BHs closest to the
-SMBH experience the most growth because they have
-shorter collision timescales. However, Bondi-Hoyle￾Lyttleton accretion becomes important closer to the
-SMBH, where the velocity dispersion is large compared
-with the stars’ escape velocity, and curtails the mass
-growth for BHs in this region. Outside of 10−2 pc, a BH
-consumes the star’s entire mass: the accretion-limited
-∆m governed by Eq. (7) is greater than or equal to the
-star’s mass.
-Eq. 7 does not apply for other values of α. When the
-collision timescale is shorter, corresponding to a larger
+Figure 2. We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10 M for all of the BHs. Assuming
+the density profile of stars has α = 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+where cs is the speed of sound in the star and ρstar is its
+density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
+et al. 1985; Edgar 2004, see latter for a review). We
+approximate the density as 1 M/(4πR3
+/3) and take
+the conservative value of cs = 500 km s−1, which is
+consistent with the sound speed inside a 1 M star
+(Christensen-Dalsgaard et al. 1996) and allows us to set
+a lower limit on ∆m. To find ∆m, at each collision, we
+have:
+∆m = min( ˙m × t?,cross, 1 M) , (6)
+where t?,cross ∼ R/σ is the crossing time of the BH in
+the star. We take the minimum between ˙m×t?,cross and
+1 M because the BH cannot accrete more mass than
+one star at each collision.
+Figure 2 juxtaposes the expected growth using BondiHoyle-Lyttleton accretion (blue small points) with a
+much simpler model in which the BH accretes the star’s
+entire mass, 1 M (red large points). Both examples
+start with identical populations of 10 M BHs (grey)
+and simulate growth through collisions using a statistical approach. As the BHs grow, the collision timescale,
+which depends on mBH, decreases. Simultaneously,
+∆m, which also depends on mBH, increases. The result is exponential growth (see discussion and details
+surrounding Eq. (8)). In Figure 2, however, the simulations assume α = 1 for the stellar density profile, ensuring the collision timescale is long compared to the sim-
+IMBH Formation in Galactic Nuclei 5
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
+and their final masses can be approximated using the
+following equation:
+mfinal(tcoll → const.) = minitial + ∆m
+T
+tcoll
+, (7)
+in which T represents the simulation time and ∆m and
+tcoll remain constant, approximated as their initial values.
+This equation is plotted in Figure 2 for both cases,
+∆m = 1 M (red) and ∆m from Bondi-Hoyle-Lyttleton
+accretion (blue), and the curves coincide with the corresponding simulated results. The shaded regions represent one standard deviation from Eq. (7), calculated
+using the square root of the number of collisions, T /tcoll.
+As indicated by the results in red, in the absence of
+Bondi-Hoyle-Lyttleton accretion, the BHs closest to the
+SMBH experience the most growth because they have
+shorter collision timescales. However, Bondi-HoyleLyttleton accretion becomes important closer to the
+SMBH, where the velocity dispersion is large compared
+with the stars’ escape velocity, and curtails the mass
+growth for BHs in this region. Outside of 10−2 pc, a BH
+consumes the star’s entire mass: the accretion-limited
+∆m governed by Eq. (7) is greater than or equal to the
+star’s mass.
+Eq. 7 does not apply for other values of α. When the
+collision timescale is shorter, corresponding to a larger
 index α in the density profile (see Figure 1), the growth
-IMBH Formation in Galactic Nuclei 5
-is very efficient and ∆m quickly approaches 1 M. Con￾sequently, while we can now assume ∆m = 1 M, we
-can no longer assume the collision timescale is constant.
-The final mass grows exponentially as a result. For
-∆m = 1M, the general solution is reached by solving
-the differential equation dm/dt = 1 M/tcoll(m), which
-gives:
-mfinal(∆m → 1 M) =−A + (minitial + A) e
-CT (8)
-where A = σ
-2Rstar/G and C = 2πGnstarRstar/σ. As an
-example, we plot this curve in purple for the α = 2 case,
-in Figure 3, which agrees with the simulated masses.
-2.5. GW Inspiral
-When a BH is close to the SMBH, GW emission can
-circularize and shrink its orbit. We implement the ef￾fects of GW emission on the BH’s semimajor axis and
-eccentricity following Peters & Mathews (1963a). The
-characteristic timescale to merge a BH with an SMBH
-is given by:
-tGW ≈2.9 × 1012 yr 
-M•
-106 M
-−1 
-mBH
-106 M
-−1
-×
-
-M• + mBH
-2 × 106 M
-−1 
-a•
-10−4 pc4
-×f(e•)(1 − e
-2
-•
-)
-7/2
-, (9)
-where f(e•) is a function of e•. For all values of e•,
-f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We
-plot this timescale for a 1 × 105 M BH in Figure 1 in
-blue.
-In our simulations, we assume a BH has merged with
-the SMBH when the condition tGW < telapsed is met.
-When this condition is satisfied, we terminate mass
-growth through collisions for that BH.4
-2.6. IMBH growth
-As detailed above, BH-stellar collisions can increase
-the BH masses as a function of time. Here, we examine
-the sensitivity of the BH growth to the density power
-law. From Eq. (1), it is clear that the growth rate de￾pends on the stellar density profile, governed by the in￾dex α. We expect that higher values of α, or steeper
-profiles, will result in more efficient mass growth. In
-Figure 1, larger values of α lead to collision timescales
-in the GN’s inner region, inwards of 0.25 pc, that are
-4 For comparison, we also incrementally changed the semimajor
-axis and eccentricity from GW emission following the equations
-in Peters & Mathews (1963b). This method leads to a slight
-increase in the final IMBH masses because it accounts for the
-collisions that take place while the orbit is gradually shrinking.
-much smaller that the 10 Gyr simulation time. Figure 3
-confirms this expectation. It depicts the mass growth of
-a uniform distribution of BHs with initial conditions de￾tailed in Section 2.1 for five α values, spanning 1 (green)
-to 2 (purple). The most massive IMBHs form inwards
-of 0.25 pc for the α = 2 case.
-2.7. Gravitational Wave Mergers and Intermediate
-and Extreme Mass Ratio Inspiral Candidates
-Towards the SMBH, efficient collisions can create BHs
-massive enough to merge with the SMBH through GWs.
-Following the method detailed in Section 2.5, when a
-given BH meets the criterion tGW < telapsed, we mark
-it as merged with the SMBH. We assume that at this
-point the dynamics of the BH will be determined by GW
-emission, shrinking and circularizing the BHs orbit un￾til it undergoes an extreme or intermediate mass ratio
-inspiral (EMRI and IMRI, respectively). The righthand
-plot in Figure 3 shows the BH masses versus time of
-merger. It is interesting to note that even in the ab￾sence of relaxation processes, which are often invoked
-to explain the formation of EMRIs, EMRIs and notably
-IMRIs can form in this region.
-2.8. Two Body Relaxation Processes
-A BH orbiting the SMBH experiences weak gravita￾tional interactions with other objects in the GN. Over a
-relaxation time, these interactions alter its orbit about
-the SMBH. The two-body relaxation timescale for a
-single-mass system is:
-trelax = 0.34 σ
-3
-G2ρhM∗iln Λrlx
-, (10)
-where ln Λrlx is the Coulomb logarithm and hM∗i is the
-average mass of the surrounding objects, here assumed
-to be 1 M (Spitzer 1987; Binney & Tremaine 2008,
-Eq. (7.106)). This equation represents the approximate
-timescale for a BH on a semi-circular orbit to change
-its orbital energy and angular momentum by order of
-themselves. The BH experiences diffusion in its angular
-momentum and energy as a function of time (depending
-on the eccentricity of the orbit, this process can be more
-efficient Fragione & Sari 2018; Sari & Fragione 2019). In
-Figure 1, we plot the relaxation timescale in gold for a
-range of α. We note that the Bahcall & Wolf (1976) pro￾file, α = 7/4, corresponds to zero net flux and therefore
-does not preferentially migrate objects inward.
-Additionally, because they are more massive on
-average than the surrounding objects, BHs are ex￾pected to segregate inwards in the GN (e.g., Shapiro
-& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+is very efficient and ∆m quickly approaches 1 M. Consequently, while we can now assume ∆m = 1 M, we
+can no longer assume the collision timescale is constant.
+The final mass grows exponentially as a result. For
+∆m = 1M, the general solution is reached by solving
+the differential equation dm/dt = 1 M/tcoll(m), which
+gives:
+mfinal(∆m → 1 M) =−A + (minitial + A) e
+CT (8)
+where A = σ
+2Rstar/G and C = 2πGnstarRstar/σ. As an
+example, we plot this curve in purple for the α = 2 case,
+in Figure 3, which agrees with the simulated masses.
+2.5. Uncertainties in Accretion
+We note that the ∆M calculated in this proof-ofconcept study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challenging; this complex problem requires numerically solving
+the generalized GR fluid equations with cooling, heating, and radiative transfer, etc. and remains an active
+field of research (e.g., Blandford & Begelman 1999; Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star results in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity remain uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accumulate significant amounts of mass over many collisions
+even if it accretes very little in a single one. We explore the viability of our channel using a physically motivated inefficient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately vesc/(cη), where vesc is
+the escape velocity from the BH at 1 R and η is the
+accretion efficiency at the ISCO. We take η to be 0.1
+(e.g., Yu & Tremaine 2002). This expression for the
+fraction accreted is consistent with Kremer et al. (2022)
+equation 19 for s = 0.5, which is a reasonable value for
+s, a free parameter between 0.2 and 0.8. We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more efficient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radiation away from the captured material (e.g., Blandford
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumenshchev 2008; Begelman 2012a,b; McKinney et al. 2014).
+2.6. GW Inspiral
+When a BH is close to the SMBH, GW emission can
+circularize and shrink its orbit. We implement the effects of GW emission on the BH’s semimajor axis and
+eccentricity following Peters & Mathews (1963a). The
+characteristic timescale to merge a BH with an SMBH
+is given by:
+tGW ≈2.9 × 1012 yr 
+M•
+106 M
+−1 
+mBH
+106 M
+−1
+×
+
+M• + mBH
+2 × 106 M
+−1 
+a•
+10−2 pc4
+×f(e•)(1 − e
+2
+•
+)
+7/2
+, (9)
+where f(e•) is a function of e•. For all values of e•,
+f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We
+plot this timescale for a 1 × 105 M BH in Figure 1 in
+blue.
+ Rose et al.
+Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
+cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
+of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
+merger times of these BHs.
+In our simulations, we assume a BH has merged with
+the SMBH when the condition tGW < telapsed is met.
+When this condition is satisfied, we terminate mass
+growth through collisions for that BH.4
+2.7. IMBH growth
+As detailed above, BH-stellar collisions can increase
+the BH masses as a function of time. Here, we examine
+the sensitivity of the BH growth to the density power
+law. From Eq. (1), it is clear that the growth rate depends on the stellar density profile, governed by the index α. We expect that higher values of α, or steeper
+profiles, will result in more efficient mass growth. In
+Figure 1, larger values of α lead to collision timescales
+in the GN’s inner region, inwards of 0.25 pc, that are
+much smaller that the 10 Gyr simulation time. Figure 3
+confirms this expectation. It depicts the mass growth of
+a uniform distribution of BHs with initial conditions detailed in Section 2.1 for five α values, spanning 1 (green)
+to 2 (purple). The most massive IMBHs form inwards
+of 0.25 pc for the α = 2 case.
+2.8. Gravitational Wave Mergers and Intermediate
+and Extreme Mass Ratio Inspiral Candidates
+Towards the SMBH, efficient collisions can create BHs
+massive enough to merge with the SMBH through GWs.
+Following the method detailed in Section 2.6, when a
+given BH meets the criterion tGW < telapsed, we mark
+4 For comparison, we also incrementally changed the semimajor
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b). This method leads to a slight
+increase in the final IMBH masses because it accounts for the
+collisions that take place while the orbit is gradually shrinking.
+it as merged with the SMBH. We assume that at this
+point the dynamics of the BH will be determined by GW
+emission, shrinking and circularizing the BHs orbit until it undergoes an extreme or intermediate mass ratio
+inspiral (EMRI and IMRI, respectively). The righthand
+plot in Figure 3 shows the BH masses versus time of
+merger. It is interesting to note that even in the absence of relaxation processes, which are often invoked
+to explain the formation of EMRIs, EMRIs and notably
+IMRIs can form in this region.
+2.9. Two Body Relaxation Processes
+A BH orbiting the SMBH experiences weak gravitational interactions with other objects in the GN. Over a
+relaxation time, these interactions alter its orbit about
+the SMBH. The two-body relaxation timescale for a
+single-mass system is:
+trelax = 0.34 σ
+3
+G2ρhM∗iln Λrlx
+, (10)
+where ln Λrlx is the Coulomb logarithm and hM∗i is the
+average mass of the surrounding objects, here assumed
+to be 1 M (Spitzer 1987; Binney & Tremaine 2008,
+Eq. (7.106)). This equation represents the approximate
+timescale for a BH on a semi-circular orbit to change
+its orbital energy and angular momentum by order of
+themselves. The BH experiences diffusion in its angular
+momentum and energy as a function of time (depending
+on the eccentricity of the orbit, this process can be more
+efficient Fragione & Sari 2018; Sari & Fragione 2019).
+Relaxation can cause the orbit of an object in a GN to
+reach high eccentricities. If the object is a BH, it can
+spiral into the SMBH and form an EMRI, while a star
+IMBH Formation in Galactic Nuclei 7
+can be tidally disrupted by the SMBH (e.g. Magorrian
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
+Alexander 2005; Aharon & Perets 2016; Stone & Metzger 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
+Naoz et al. 2022). The relaxation process is therefore
+crucial to our study. In Figure 1, we plot the relaxation
+timescale in gold for a range of α. We note that the Bahcall & Wolf (1976) profile, α = 7/4, corresponds to zero
+net flux and therefore does not preferentially migrate
+objects inward.
+Additionally, because BHs are more massive on average than the surrounding objects, they are expected
+to segregate inwards in the GN (e.g., Shapiro &
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
 Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
-6 Rose et al.
-Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
-cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
-of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
-merger times of these BHs.
-They sink toward the SMBH on the mass segregation
-timescale, tseg ≈ hM∗i/mBH × trelax (e.g., Spitzer 1987;
-Fregeau et al. 2002; Merritt 2006), which is typically an
-order of magnitude smaller than the relaxation timescale
-plotted in Figure 1.
-We incorporate relaxation processes by introducing a
-small change in the BH’s energy and angular momen￾tum each time it orbits the SMBH. We apply a small
-instantaneous velocity kick to the BH, denoted as ∆v.
-We draw ∆v from a Guassian distribution with average
-of zero and a standard deviation of ∆vrlx/
-√
-3, where
-∆vrlx = v•
-p
-P•/trlx (see Bradnick et al. 2017, for an
-approach to changes in the angular momentum). The
-new orbital parameters can be calculated following Lu
-& Naoz (2019), and see Naoz et al. in prep for full set
-of equations.
-We account for the effects of relaxation processes,
-including mass-segregation, using a multi-faceted ap￾proach. We begin by migrating each BH towards the
-center over its mass-segregation timescale, shifting it in￾crementally inward such that its orbital energy changes
-by order of itself within the segregation timescale.
-As the BHs segregate down the potential well, their
-abundance with respect to stars increases, until at some
-turnover radius, BHs become the dominant source of
-scattering for both black holes and stars. Within this ra￾dius, BH self-interaction dominates over two-body scat￾terings with the now rarer main-sequence stars. The
-BHs will then settle onto a Bahcall-Wolf profile, while
-the stars may follow a shallower profile, with approx￾imately n? ∝ r
-−1.5
-, inwards of the transition radius
-(Linial & Sari in prep.).
-Therefore, after the initial mass segregation, we allow
-the BHs to begin diffusing over a relaxation timescale,
-their orbital parameters changing slowly through a ran￾dom process. In this random process, some of the BHs
-may migrate closer to the SMBH. We terminate mass
-growth when the BH enters the inner 200 au of the GN,
-within which the density of stars is uncertain. This cut￾off is based on the 120 au pericenter of S0-2, the closest
-known star to the SMBH (e.g., Ghez et al. 2005).
-Another physical process that causes inward migra￾tion is dynamical friction. A cursory derivation based
-on the dynamical friction equations described in Binney
-& Tremaine (2008) reveals the process to have a simi￾lar timescale to mass segregation. If a BH diffuses to
-a distance greater than 2 pc from the SMBH, exiting
-the sphere of influence, we have it sink inwards, back
-towards the center, over a dynamical friction timescale.
-After one dynamical friction timescale has passed, we
-restart diffusion.
-We note that our prescription ignores self-interactions
-between the BHs. As mentioned above, as the BHs sink
-towards the SMBH, their concentration in the inner re￾gion of the GN increases, allowing them to dominate the
-scattering. We reserve the inclusion of these interactions
-for future study.
-2.9. Effect of Relaxation Processes
-As depicted in Figure 4, two-body relaxation processes
-result in more EMRIs and IMRIs events. These pro￾cesses allow BHs that begin further from the SMBH
-to migrate inwards and grow more efficiently in mass.
-However, it also impedes the growth of BHs that are
-initially closer to the SMBH by allowing them to dif-
-IMBH Formation in Galactic Nuclei 7
-Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red)
-for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We
-assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward
-migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more
-BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two
-different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes.
-The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded
-stars represent BHs that merged with the SMBH.
-fuse out of the inner region where collisions are efficient.
-As can be seen in Figure 4, the net result is that more
-BHs grow, but the maximum mass is lower compared
-to the scenario that ignores two-body relaxation. The
-histogram in Figure 4 presents the final BH mass distri￾butions for different power law indices α. As expected,
-the two-body relaxation suppresses the α dependence
-highlighted in Figure 3. In fact, using a KS test, we
-find that we cannot reject the hypothesis that the two
-distributions were drawn from the same sample for the
-α = 1.75 and α = 2 results. Interestingly, a BH mass
-IMF with an average of 10 M leads to a final distri￾bution with an average of ∼ 200 M and a median of
-∼ 45 M, which lies within the mass gap.
-3. DISCUSSION AND PREDICTIONS
-We explore the feasibility of forming IMBHs in a
-GN through successive collisions between a stellar-mass
-BH and main-sequence stars. Taking both a statisti￾cal and analytic approach, we show that this channel
-can produce IMBHs efficiently with masses as high as
-103−4 M and may result in many IMBH-SMBH merg￾ers (intermediate-mass ratio inspiral, IMRIs) and EM￾RIs.
-As the stellar mass BH collides with a star, the BH
-will grow in mass. The increase may equal star’s en￾tire mass if the relative velocity is smaller than the es￾cape velocity from the BH at 1 R. However, near the
-SMBH, the velocity dispersion may be larger than the
-escape velocity from the BH at the star’s radius. In this
-limit, the BH accretes a “tunnel” of material through
-the star, estimated using Bondi-Hoyle-Lyttleton accre￾tion. In our statistical analysis, we account for Bondi￾Hoyle-Lyttleton accretion and find that BHs outside of
-10−2 pc from the SMBH can accrete the entire star (see
-Figure 2).
-The efficiency of collisions, and therefore IMBH,
-EMRI, and IMRI formation as well, are sensitive to
-the underlying stellar density. As shown in Figure 3, a
-steeper density profile results in larger IMBHs. This be￾havior can be understood from the collision timescale’s
-dependence on the stellar density profile. A steeper pro￾file yields shorter collision timescales near the SMBH.
-However, the inclusion of relaxation processes in the
-simulations dampens the influence of the stellar density
-profile by allowing BHs to diffuse into regions of more
-or less efficient growth. As a result, more BHs grow in
-mass, but their maximum mass is smaller (∼ 104 M).
-Additionally, the final masses have no apparent depen￾dence on distance from the SMBH (see Figure 4).
-Mass growth through BH-main-sequence star colli￾sions may act in concert with other IMBH formation
-channels, such as compact object binary mergers (e.g.,
-Hoang et al. 2018; Stephan et al. 2019; Fragione et al.
-2021; Wang et al. 2021). While in some cases colli￾sions can unbind a binary (e.g., Sigurdsson & Phinney
-1993; Fregeau et al. 2004), BH binaries can be tightly
-bound enough to withstand the collisions. Wide bina￾ries may also become unbound due to interactions with
-the neighboring stars and compact objects (e.g., Binney
-& Tremaine 1987; Rose et al. 2020, see latter study for
-the timescale for an arbitrary eccentricity). However,
-as highlighted in previous studies, a substantial frac￾tion of these binaries may merge due to the Eccentric
-Kozai Lidov mechanism, leaving behind a single star or
-a single compact object (e.g., Stephan et al. 2016, 2019;
-Hoang et al. 2018). Additionally, to be susceptible to
-evaporation, BH binaries must have a wider configura￾tion. Otherwise, they will be more tightly bound that
-8 Rose et al.
-the average kinetic energy of the surrounding objects,
-and will only harden through weak gravitational inter￾actions with neighboring stars (see for example Figure
-6 in Rose et al. 2020).
-Not included in this study, collisions between the BH
-and other compact objects will increase the BH growth
-rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra￾gione et al. 2021) and even neutron star BH mergers
-(e.g., Hoang et al. 2020) become more likely as the BHs
-increase in mass through stellar collisions. As a result,
-the BH-BH collision timescale, discussed in Section 2.2,
-will become relevant to our simulations, allowing the
-BHs to grow through this channel in addition to stel￾lar collisions. Additionally, this compact object mergers
-result in GW recoil, which may have a large impact on
-the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
-2021)
-The BH’s mass growth increases GW emission, which
-dissipates energy from the orbit. Along with relaxation
-processes, GW emission causes BHs to sink towards the
-SMBH and eventually undergo a merger. As a result,
-the GN environment is conducive to the formation of
-EMRIs and IMRIs. The GW emission from EMRIs and
-IMRIs is expected to be at mHz frequencies, making
-them promising candidates for LISA to observe. While
-the exact rate calculation is beyond the scope of this
-study, the mechanism outlined here seems very promis￾ing.
-Our results also suggest that IMBHs are likely to ex￾ists in many galactic nuclei, as well as within our own
-galactic center. This implication seems to be consis￾tent with recent observational and theoretical studies
-(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004;
-G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
-& Liu 2013; Generozov & Madigan 2020; Fragione et al.
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-Collaboration et al. 2020).
-Lastly, the collisions between stellar mass BHs and
-stars may contribute to the x-ray emission from our
-galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al.
-2018; Zhu et al. 2018; Cheng et al. 2018)
-5
-. These inter￾actions, in particular grazing collisions, may also result
-in tidal disruption events (e.g., Perets et al. 2016; Sam￾sing et al. 2019; Kremer et al. 2021). Thus, the process
-outlined here may produce electromagnetic signatures
-in addition to GW mergers.
-SR thanks the Charles E Young fellowship, the Nina
-Byers Fellowship, and the Michael A. Jura Memorial
-Graduate Award for support. SR and SN acknowledge
-the partial support from NASA ATP 80NSSC20K0505.
-SN thanks Howard and Astrid Preston for their gener￾ous support. IL thanks support from the Adams Fellow￾ship. SN and RS thank the Bhaumik Institute visitor
-program.
-REFERENCES
-Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
-PhRvL, 116, 241102,
-doi: 10.1103/PhysRevLett.116.241102
-—. 2017a, PhRvL, 118, 221101,
-doi: 10.1103/PhysRevLett.118.221101
-—. 2017b, PhRvL, 119, 141101,
-doi: 10.1103/PhysRevLett.119.141101
-Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
-Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
-doi: 10.1088/0004-637X/780/2/148
-Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
-2021, arXiv e-prints, arXiv:2109.12119.
-https://arxiv.org/abs/2109.12119
-Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
-doi: 10.1086/154711
-5 The connection between the observed X-ray sources at the Galac￾tic Center and tidal capture has been suggested by Generozov
-et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
-alternative channels.
-Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
-043002, doi: 10.1103/PhysRevD.102.043002
-Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
-613, 1143, doi: 10.1086/423299
-Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
-MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
-Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
-890, 113, doi: 10.3847/1538-4357/ab6d77
-—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77
-Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R.
-2009, New Journal of Physics, 11, 105016,
-doi: 10.1088/1367-2630/11/10/105016
-Binney, J., & Tremaine, S. 1987, Galactic dynamics
-—. 2008, Galactic Dynamics: Second Edition
-Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
-doi: 10.1086/342655
-Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
-427, doi: 10.1086/500727
-Bondi, H. 1952, MNRAS, 112, 195,
+They sink toward the SMBH on the mass segregation
+timescale, tseg ≈ hM∗i/mBH × trelax (e.g., Spitzer 1987;
+Fregeau et al. 2002; Merritt 2006), which is typically an
+order of magnitude smaller than the relaxation timescale
+plotted in Figure 1.
+We incorporate relaxation processes by introducing a
+small change in the BH’s energy and angular momentum each time it orbits the SMBH. We apply a small
+instantaneous velocity kick to the BH, denoted as ∆v.
+We draw ∆v from a Guassian distribution with average
+of zero and a standard deviation of ∆vrlx/
+√
+3, where
+∆vrlx = v•
+p
+P•/trlx (see Bradnick et al. 2017, for an
+approach to changes in the angular momentum). The
+new orbital parameters can be calculated following Lu
+& Naoz (2019), and see Naoz et al. (2022) for the full
+set of equations.
+We account for the effects of relaxation processes,
+including mass-segregation, using a multi-faceted approach. We begin by migrating each BH towards the
+center over its mass-segregation timescale, shifting it incrementally inward such that its orbital energy changes
+by order of itself within the segregation timescale.
+As the BHs segregate down the potential well, their
+abundance with respect to stars increases, until at some
+turnover radius, BHs become the dominant source of
+scattering for both black holes and stars. Within this radius, BH self-interaction dominates over two-body scatterings with the now rarer main-sequence stars. The
+BHs will then settle onto a Bahcall-Wolf profile, while
+the stars may follow a shallower profile, with approximately n? ∝ r
+−1.5
+, inwards of the transition radius
+(Linial & Sari in prep.).
+Therefore, after the initial mass segregation, we allow
+the BHs to begin diffusing over a relaxation timescale,
+their orbital parameters changing slowly through a random process. In this random process, some of the BHs
+may migrate closer to the SMBH. We terminate mass
+growth when the BH enters the inner 200 au of the GN,
+within which the density of stars is uncertain. This cutoff is based on the 120 au pericenter of S0-2, the closest
+known star to the SMBH (e.g., Ghez et al. 2005).
+Another physical process that causes inward migration is dynamical friction. A cursory derivation based
+on the dynamical friction equations described in Binney
+& Tremaine (2008) reveals the process to have a similar timescale to mass segregation. If a BH diffuses to
+a distance greater than 2 pc from the SMBH, exiting
+the sphere of influence, we have it sink inwards, back
+towards the center, over a dynamical friction timescale.
+After one dynamical friction timescale has passed, we
+restart diffusion.
+We note that our prescription ignores self-interactions
+between the BHs. As mentioned above, as the BHs sink
+towards the SMBH, their concentration in the inner region of the GN increases, allowing them to dominate the
+scattering. We reserve the inclusion of these interactions
+for future study.
+2.10. Effect of Relaxation Processes
+As depicted in Figure 4, two-body relaxation processes
+result in more EMRIs and IMRIs events. These processes allow BHs that begin further from the SMBH
+to migrate inwards and grow more efficiently in mass.
+However, it also impedes the growth of BHs that are
+initially closer to the SMBH by allowing them to diffuse out of the inner region where collisions are efficient.
+As can be seen in Figure 4, the net result is that more
+BHs grow, but the maximum mass is lower compared
+to the scenario that ignores two-body relaxation. The
+histogram in Figure 4 presents the final BH mass distributions for different power law indices α. As expected,
+the two-body relaxation suppresses the α dependence
+highlighted in Figure 3. In fact, using a KS test, we
+find that we cannot reject the hypothesis that the two
+distributions were drawn from the same sample for the
+α = 1.75 and α = 2 results. Interestingly, a BH mass
+IMF with an average of 10 M leads to a final distribution with an average of ∼ 200 M and a median of
+∼ 45 M, which lies within the mass gap.
+3. DISCUSSION AND PREDICTIONS
+We explore the feasibility of forming IMBHs in a
+GN through successive collisions between a stellar-mass
+BH and main-sequence stars. Taking both a statistical and analytic approach, we show that this channel
+can produce IMBHs efficiently with masses as high as
+103−4 M and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspirals, or IMRIs) and
+EMRIs.
+ Rose et al.
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction.
+We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
+As the stellar mass BH collides with a star, the BH
+will grow in mass. The increase may equal star’s entire mass if the relative velocity is smaller than the escape velocity from the BH at 1 R. However, near the
+SMBH, the velocity dispersion may be larger than the
+escape velocity from the BH at the star’s radius. In this
+limit, the BH captures a “tunnel” of material through
+the star, estimated using Bondi-Hoyle-Lyttleton accretion. In our statistical analysis, we account for BondiHoyle-Lyttleton accretion and find that BHs outside of
+10−2 pc from the SMBH can capture the entire star (see
+Figure 2).
+The efficiency of collisions, and therefore IMBH,
+EMRI, and IMRI formation as well, are sensitive to
+the underlying stellar density. As shown in Figure 3, a
+steeper density profile results in larger IMBHs. This behavior can be understood from the collision timescale’s
+dependence on the stellar density profile. A steeper profile yields shorter collision timescales near the SMBH.
+However, the inclusion of relaxation processes in the
+simulations dampens the influence of the stellar density
+profile by allowing BHs to diffuse into regions of more
+or less efficient growth. As a result, more BHs grow in
+mass, but their maximum mass is smaller (∼ 104 M).
+Additionally, the final masses have no apparent dependence on distance from the SMBH (see Figure 4).
+Most simulations in our study assume that the BHs
+accrete all of the mass that they capture. The final BH
+masses can be taken as an upper limit. We note that
+the accretion is a highly uncertain process and represents an active field of study (e.g., Blandford & Begelman 1999; Park & Ostriker 2001; Narayan et al. 2003;
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+Narayan et al. 2022). To assess the limits of our model,
+we also consider a physically motivated accretion model,
+momentum-driven winds (Section 2.5). We present the
+final mass distribution for momentum-driven winds in
+Figure 4. Importantly, we find that BHs within the
+mass gap still form naturally despite the substantially
+reduced accretion. About 5% of the BHs grow by 10
+to 100 M. Furthermore, if we increase this ∆M estimate by a factor of 2 (i.e., use η = 0.05), the simulation produces a 3.5×103 M IMBH for the same initial
+conditions. Our proof-of-concept demonstrates that collisions between BH and stars are an important process
+that should be taken into account in dense places such
+as a GN.
+Mass growth through BH-main-sequence star collisions may act in concert with other IMBH formation
+channels, such as compact object binary mergers (e.g.,
+Hoang et al. 2018; Stephan et al. 2019; Fragione et al.
+2021; Wang et al. 2021). While in some cases collisions can unbind a binary (e.g., Sigurdsson & Phinney
+1993; Fregeau et al. 2004), BH binaries can be tightly
+bound enough to withstand the collisions. Wide binaries may also become unbound due to interactions with
+the neighboring stars and compact objects (e.g., Binney
+& Tremaine 1987; Rose et al. 2020, see latter study for
+the timescale for an arbitrary eccentricity). However,
+as highlighted in previous studies, a substantial fraction of these binaries may merge due to the Eccentric
+Kozai Lidov mechanism, leaving behind a single star or
+a single compact object (e.g., Stephan et al. 2016, 2019;
+Hoang et al. 2018). Additionally, to be susceptible to
+evaporation, BH binaries must have a wider configuration. Otherwise, they will be more tightly bound than
+the average kinetic energy of the surrounding objects
+and will only harden through weak gravitational inter-
+IMBH Formation in Galactic Nuclei 9
+actions with neighboring stars (see for example Figure
+6 in Rose et al. 2020).
+We note that we assume a steady-state and treat the
+stars as a reservoir in this model. Future work will take a
+more nuanced approach to the background stars, whose
+density as a function of time can be influenced by several
+factors. Firstly, the relaxation of the stellar population
+occurs on Gyr timescales. Some studies have suggested
+that in situ star formation can occur in the Galactic
+Center as close as 0.04 pc from the SMBH (e.g., Levin
+& Beloborodov 2003; Paumard et al. 2006), and star
+formation episodes can occur as often as every ∼ 5 Myr
+(e.g. Lu et al. 2009). Therefore, we expect that after
+the first Gyr, stars within . 0.01 pc will be replenished
+at intervals consistent with the star formation episodes;
+the infalling populations of stars are separated by ∼
+5−10 Myr, which is shorter than the collision timescale.
+However, star-star collisions may complicate this picture within ∼ 0.01 pc. As discussed above, regular star
+formation ensures the BHs always have a stellar population to interact with outside of ∼ 0.01 pc.5 At 0.01 pc,
+however, the kinetic energy during a collision between
+two 1 M stars is larger than their binding energies.
+Collisions can therefore thin out the stellar populations
+during the time it takes them to diffuse to these small
+radii, . 0.01 pc, and may reduce the BH growth in the
+innermost region. We reserve the inclusion of star-star
+collisions for future work. We also note that the disruption of binary stars by the SMBH may help replenish
+the stellar population even as collisions work to deplete
+it (e.g., Balberg et al. 2013); when a binary is disrupted,
+one of the stars is captured on a tightly bound orbit
+about the SMBH.
+An IMBH may also affect the stellar density profile.
+As it spirals into the SMBH, it can perturb stellar orbits,
+and these interactions can lead to hypervelocity stars
+(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt
+2008). L¨ockmann & Baumgardt (2008) show that an
+IMBH can modify an initially steep stellar density profile to become consistent with the flatter cusp observed
+in the Galactic Center. The stars may then be replenished on 100 Myr timescales (Baumgardt et al. 2006a).
+Therefore, after the formation of the first few IMBHs,
+subsequent BH growth may occur in bursts, coinciding
+with replenishment of the stars.
+While there are many competing dynamical processes
+that shape the stellar density profile, we stress that α
+5
+In fact, the star-star collision timescale is greater than 10 Myr
+for the entire parameter space, save at 0.001 pc for larger values
+of α; the BH-star collision timescale plotted in Fig. 1 is the same
+order of magnitude as the star-star collision timescale.
+can simply be chosen to encapsulate all of the relevant
+physics. A value for α that is constrained by observations must already reflect ongoing processes like starstar collisions and replenishment. Sch¨odel et al. (2018)
+find the observed stellar mass enclosed within 0.01 pc of
+the Milky Way’s Galactic Center to be approximately
+180 M. This estimate is consistent to order of magnitude with our α = 1.25 case. In a simulation like those
+depicted in Figure 4, which include relaxation, α = 1.25
+leads to a maximum IMBH mass of 140 M. Furthermore, while the stellar mass within 0.01 pc may be a
+few hundred M, Do et al. (2019) and GRAVITY Collaboration et al. (2020) set an upper limit on the mass
+enclosed within the orbit of S0-2 to be about a few thousand M, or 0.1% of the central mass. This upper limit
+can include mass that was previously in stars but is now
+in BHs. In that case, the 180 M is what remains of the
+stars, while BHs and IMBHs make up the ∼ 1000 M
+in the innermost region.
+Also not included in this study, collisions between the
+BH and other compact objects will increase the BH
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+Fragione et al. 2021) and even neutron star BH mergers
+(e.g., Hoang et al. 2020) become more likely as the BHs
+increase in mass through stellar collisions. As a result,
+the BH-BH collision timescale, discussed in Section 2.2,
+will become relevant to our simulations, allowing the
+BHs to grow through this channel in addition to stellar collisions. Additionally, this compact object mergers
+result in GW recoil, which may have a large impact on
+the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
+2021).
+The BH’s mass growth increases GW emission, which
+dissipates energy from the orbit. Along with relaxation,
+GW emission causes BHs to sink towards the SMBH
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
+and IMRIs. The GW emission from EMRIs and IMRIs is expected to be at mHz frequencies, making them
+promising candidates for LISA to observe. While the
+exact rate calculation is beyond the scope of this study,
+the mechanism outlined here seems very promising.
+Our results also suggest that BHs within the mass gap
+as well as IMBHs likely exist in many galactic nuclei, as
+well as within our own galactic center. This implication
+seems to be consistent with recent observational and
+theoretical studies (e.g., Hansen & Milosavljevi´c 2003;
+Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris
+& Merritt 2009; Chen & Liu 2013; Generozov & Madigan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz
+et al. 2020; GRAVITY Collaboration et al. 2020).
+ Rose et al.
+Lastly, the collisions between stellar mass BHs and
+stars may contribute to the x-ray emission from our
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer et al. (2022) for a discussion of electromagnetic signatures from BH-star collisions)6
+. These interactions,
+in particular grazing collisions, may also result in tidal
+disruption events (e.g., Baumgardt et al. 2006b; Perets
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer et al. 2021). Thus, the process outlined here may
+produce electromagnetic signatures in addition to GW
+mergers.
+We thank the anonymous referee for useful comments.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful discussion.
+SR thanks the Charles E. Young Fellowship, the Nina
+Byers Fellowship, and the Michael A. Jura Memorial
+Graduate Award for support. SR and SN acknowledge
+the partial support from NASA ATP 80NSSC20K0505.
+SN thanks Howard and Astrid Preston for their generous support. IL thanks support from the Adams Fellowship. SN and RS thank the Bhaumik Institute visitor
+program. This work was performed in part at the Aspen Center for Physics, which is supported by National
+Science Foundation grant PHY-1607611.
+REFERENCES
+Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
+PhRvL, 116, 241102,
+doi: 10.1103/PhysRevLett.116.241102
+—. 2017a, PhRvL, 118, 221101,
+doi: 10.1103/PhysRevLett.118.221101
+—. 2017b, PhRvL, 119, 141101,
+doi: 10.1103/PhysRevLett.119.141101
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
+doi: 10.3847/2041-8205/830/1/L1
+Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
+Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
+doi: 10.1088/0004-637X/780/2/148
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
+doi: 10.1007/s41114-018-0013-8
+6 The connection between the observed X-ray sources at the Galactic Center and tidal capture has been suggested by Generozov
+et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
+alternative channels.
+Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
+2021, arXiv e-prints, arXiv:2109.12119.
+https://arxiv.org/abs/2109.12119
+Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
+doi: 10.1086/154711
+Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
+043002, doi: 10.1103/PhysRevD.102.043002
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+doi: 10.1093/mnrasl/slt071
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+2006a, MNRAS, 372, 174,
+doi: 10.1111/j.1365-2966.2006.10818.x
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+Makino, J. 2006b, MNRAS, 372, 467,
+doi: 10.1111/j.1365-2966.2006.10885.x
+Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
+613, 1143, doi: 10.1086/423299
+Begelman, M. C. 1979, MNRAS, 187, 237,
+doi: 10.1093/mnras/187.2.237
+—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3
+IMBH Formation in Galactic Nuclei 11
+—. 2012b, MNRAS, 420, 2912,
+doi: 10.1111/j.1365-2966.2011.20071.x
+Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
+MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
+Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
+890, 113, doi: 10.3847/1538-4357/ab6d77
+—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77
+Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R.
+2009, New Journal of Physics, 11, 105016,
+doi: 10.1088/1367-2630/11/10/105016
+Binney, J., & Tremaine, S. 1987, Galactic dynamics
+—. 2008, Galactic Dynamics: Second Edition
+Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
+doi: 10.1086/342655
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433,
+doi: 10.1093/mnras/179.3.433
+Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
+427, doi: 10.1086/500727
+Bondi, H. 1952, MNRAS, 112, 195,
 doi: 10.1093/mnras/112.2.195
-IMBH Formation in Galactic Nuclei 9
-Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
-doi: 10.1093/mnras/104.5.273
-Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
-2042, doi: 10.1093/mnras/stx1007
-Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
-C. 2012, JCAP, 2012, 054,
-doi: 10.1088/1475-7516/2012/07/054
-Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R.
-2010, Reviews of Modern Physics, 82, 3069,
-doi: 10.1103/RevModPhys.82.3069
-Chen, X., & Liu, F. K. 2013, ApJ, 762, 95,
-doi: 10.1088/0004-637X/762/2/95
-Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33,
-doi: 10.3847/1538-4357/aaba16
-Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015,
-MNRAS, 450, 4411, doi: 10.1093/mnras/stv694
-Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V.,
-et al. 1996, Science, 272, 1286,
-doi: 10.1126/science.272.5266.1286
-Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
-doi: 10.1086/156685
-Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
-MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
-Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
-MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
-Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
-MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
-Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
-110, 221101, doi: 10.1103/PhysRevLett.110.221101
-Edgar, R. 2004, NewAR, 48, 843,
-doi: 10.1016/j.newar.2004.06.001
-Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
-Monthly Notices of the Royal Astronomical Society, 443,
-2410, doi: 10.1093/mnras/stu1280
-Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891,
-L31, doi: 10.3847/2041-8213/ab77c9
-Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021,
-arXiv e-prints, arXiv:2107.04639.
-https://arxiv.org/abs/2107.04639
-Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a,
-ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2
-Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902,
-L26, doi: 10.3847/2041-8213/abbc0a
-Fragione, G., & Sari, R. 2018, ApJ, 852, 51,
-doi: 10.3847/1538-4357/aaa0d7
-Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., &
-Rasio, F. A. 2004, MNRAS, 352, 1,
-doi: 10.1111/j.1365-2966.2004.07914.x
-Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
-Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
-Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
-649, 91, doi: 10.1086/506193
-Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
-doi: 10.3847/1538-4357/ab94bc
-Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
-J. P. 2018, MNRAS, 478, 4030,
-doi: 10.1093/mnras/sty1262
-Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
-Modern Physics, 82, 3121,
-doi: 10.1103/RevModPhys.82.3121
-Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812,
-doi: 10.1086/377127
-Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ,
-620, 744, doi: 10.1086/427175
-Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ,
-860, 5, doi: 10.3847/1538-4357/aabfee
-Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL,
-908, L29, doi: 10.3847/2041-8213/abdf5b
-GRAVITY Collaboration, Abuter, R., Amorim, A., et al.
-2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813
-Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361,
-doi: 10.1088/0004-637X/705/1/361
-G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL,
-640, L39, doi: 10.1086/503295
-G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236,
-doi: 10.1086/430694
-Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature,
-556, 70, doi: 10.1038/nature25029
-Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593,
-L77, doi: 10.1086/378182
-Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., &
-Hartmann, D. H. 2003, ApJ, 591, 288,
-doi: 10.1086/375341
-Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., &
-Dosopoulou, F. 2018, ApJ, 856, 140,
-doi: 10.3847/1538-4357/aaafce
-Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
-doi: 10.3847/1538-4357/abb66a
-Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
-Royal Astronomical Society, 374, 1557,
-doi: 10.1111/j.1365-2966.2006.11275.x
-Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
-doi: 10.3847/1538-4357/abeb14
-Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
-45, doi: 10.3847/1538-4357/abb945
-Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
-doi: 10.3847/1538-4365/aacb24
-—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
-Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
+Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
+doi: 10.1093/mnras/104.5.273
+Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
+2042, doi: 10.1093/mnras/stx1007
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+860, 14, doi: 10.3847/1538-4357/aac2c4
+Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
+C. 2012, JCAP, 2012, 054,
+doi: 10.1088/1475-7516/2012/07/054
+Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R.
+2010, Reviews of Modern Physics, 82, 3069,
+doi: 10.1103/RevModPhys.82.3069
+Chen, X., & Liu, F. K. 2013, ApJ, 762, 95,
+doi: 10.1088/0004-637X/762/2/95
+Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33,
+doi: 10.3847/1538-4357/aaba16
+Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015,
+MNRAS, 450, 4411, doi: 10.1093/mnras/stv694
+Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V.,
+et al. 1996, Science, 272, 1286,
+doi: 10.1126/science.272.5266.1286
+Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
+doi: 10.1086/156685
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
+doi: 10.1111/j.1365-2966.2005.09937.x
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
+2009, MNRAS, 393, 1016,
+doi: 10.1111/j.1365-2966.2008.14254.x
+Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
+MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+C. N. 2021a, MNRAS, 505, 2186,
+doi: 10.1093/mnras/stab1428
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+T. C. N. 2021b, MNRAS, 503, 1051,
+doi: 10.1093/mnras/stab402
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+2005, ApJ, 620, 878, doi: 10.1086/427142
+Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
+MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
+Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
+MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+doi: 10.1126/science.aav8137
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+562, L19, doi: 10.1086/338118
+Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
+110, 221101, doi: 10.1103/PhysRevLett.110.221101
+Edgar, R. 2004, NewAR, 48, 843,
+doi: 10.1016/j.newar.2004.06.001
+Escala, A. 2021, ApJ, 908, 57,
+doi: 10.3847/1538-4357/abd93c
+Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
+Monthly Notices of the Royal Astronomical Society, 443,
+2410, doi: 10.1093/mnras/stu1280
+Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891,
+L31, doi: 10.3847/2041-8213/ab77c9
+Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021,
+arXiv e-prints, arXiv:2107.04639.
+https://arxiv.org/abs/2107.04639
+Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a,
+ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2
+Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902,
+L26, doi: 10.3847/2041-8213/abbc0a
+Fragione, G., & Sari, R. 2018, ApJ, 852, 51,
+doi: 10.3847/1538-4357/aaa0d7
+Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., &
+Rasio, F. A. 2004, MNRAS, 352, 1,
+doi: 10.1111/j.1365-2966.2004.07914.x
+Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
+Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
+Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
+649, 91, doi: 10.1086/506193
+Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
+doi: 10.3847/1538-4357/ab94bc
+Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
+J. P. 2018, MNRAS, 478, 4030,
+doi: 10.1093/mnras/sty1262
+ Rose et al.
+Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
+Modern Physics, 82, 3121,
+doi: 10.1103/RevModPhys.82.3121
+Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812,
+doi: 10.1086/377127
+Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ,
+620, 744, doi: 10.1086/427175
+Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ,
+860, 5, doi: 10.3847/1538-4357/aabfee
+Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL,
+908, L29, doi: 10.3847/2041-8213/abdf5b
+GRAVITY Collaboration, Abuter, R., Amorim, A., et al.
+2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813
+Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361,
+doi: 10.1088/0004-637X/705/1/361
+G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL,
+640, L39, doi: 10.1086/503295
+G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236,
+doi: 10.1086/430694
+Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature,
+556, 70, doi: 10.1038/nature25029
+Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593,
+L77, doi: 10.1086/378182
+Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., &
+Hartmann, D. H. 2003, ApJ, 591, 288,
+doi: 10.1086/375341
+Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., &
+Dosopoulou, F. 2018, ApJ, 856, 140,
+doi: 10.3847/1538-4357/aaafce
+Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
+doi: 10.3847/1538-4357/abb66a
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
+doi: 10.1086/431475
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
+doi: 10.1086/529025
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
+2003, ApJ, 592, 1042, doi: 10.1086/375769
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
+106, doi: 10.1088/0004-637X/796/2/106
+Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
+Royal Astronomical Society, 374, 1557,
+doi: 10.1111/j.1365-2966.2006.11275.x
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+https://arxiv.org/abs/2201.12368
+Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
+doi: 10.3847/1538-4357/abeb14
+Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
+45, doi: 10.3847/1538-4357/abb945
+Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
+doi: 10.1086/376675
+Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
+doi: 10.3847/1538-4365/aacb24
+—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
+L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
+doi: 10.1111/j.1365-2966.2007.12699.x
+Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
 doi: 10.1093/mnras/stz036
-10 Rose et al.
-Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
-690, 1463, doi: 10.1088/0004-637X/690/2/1463
-Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
-doi: 10.1086/319848
-Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
-2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
-Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda,
-M., & Artale, M. C. 2021a, arXiv e-prints,
-arXiv:2109.06222. https://arxiv.org/abs/2109.06222
-Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b,
-MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
-Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
-doi: 10.1088/0034-4885/69/9/R01
-Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847,
-doi: 10.1086/317837
-Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607
-Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL,
-622, L113, doi: 10.1086/429721
-Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009,
-ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
-Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
-doi: 10.1088/0004-637X/795/2/102
-Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
-L35, doi: 10.3847/2041-8213/ab4fed
-Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
-888, L8, doi: 10.3847/2041-8213/ab5e3b
-O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
-2127, doi: 10.1111/j.1365-2966.2009.14653.x
-O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
-& O’Shaughnessy, R. 2006, ApJ, 637, 937,
-doi: 10.1086/498446
-Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
-Stephen R., J. 2016, ApJ, 823, 113,
-doi: 10.3847/0004-637X/823/2/113
-Peters, P. C., & Mathews, J. 1963a, Physical Review, 131,
-435, doi: 10.1103/PhysRev.131.435
-—. 1963b, Physical Review, 131, 435,
-doi: 10.1103/PhysRev.131.435
-Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J.,
-& McMillan, S. L. W. 2004, Nature, 428, 724,
-doi: 10.1038/nature02448
-Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL,
-528, L17, doi: 10.1086/312422
-—. 2002, ApJ, 576, 899, doi: 10.1086/341798
-Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
-doi: 10.1088/0004-637X/780/2/187
-Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
-A56, doi: 10.1051/0004-6361/202037710
-Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
-Rasio, F. A. 2018, PhRvL, 120, 151101,
-doi: 10.1103/PhysRevLett.120.151101
-Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
-PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029
-Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019,
-Phys. Rev. D, 100, 043027,
-doi: 10.1103/PhysRevD.100.043027
-Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
-113, doi: 10.3847/1538-4357/abc557
-Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
-& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
-https://arxiv.org/abs/2009.01213
-Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
-100, 043009, doi: 10.1103/PhysRevD.100.043009
-Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
-doi: 10.3847/1538-4357/ab43df
-Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K.
-2002, The Astrophysical Journal, 571, 30,
-doi: 10.1086/339917
-Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
-doi: 10.1086/519309
-Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
-doi: 10.1086/156521
-Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
-MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367
-Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine,
-K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700
-Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631,
-doi: 10.1086/173190
-Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739,
-doi: 10.1093/mnras/stx1576
-—. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576
-Spitzer, L. 1987, Dynamical evolution of globular clusters
-Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
-e-prints. https://arxiv.org/abs/1603.02709
-—. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
-The LIGO Scientific Collaboration, the Virgo
-Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
-arXiv:2009.01075. https://arxiv.org/abs/2009.01075
-—. 2020b, arXiv e-prints, arXiv:2009.01190.
-https://arxiv.org/abs/2009.01190
-Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A.
-2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31
-Valiante, R., Schneider, R., Volonteri, M., & Omukai, K.
-2016, Monthly Notices of the Royal Astronomical
-Society, 457, 3356, doi: 10.1093/mnras/stw225
-Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
-G. N. 2021, MNRAS, 504, 146,
+Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
+690, 1463, doi: 10.1088/0004-637X/690/2/1463
+Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
+doi: 10.1086/319848
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+doi: 10.1046/j.1365-8711.1999.02853.x
+Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
+2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
+Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda,
+M., & Artale, M. C. 2021a, arXiv e-prints,
+arXiv:2109.06222. https://arxiv.org/abs/2109.06222
+Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b,
+MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+doi: 10.1111/j.1365-2966.2006.10256.x
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+doi: 10.1086/422244
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+Narayan, R. 2014, MNRAS, 441, 3177,
+doi: 10.1093/mnras/stu762
+Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
+doi: 10.1088/0034-4885/69/9/R01
+Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847,
+doi: 10.1086/317837
+Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607
+Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL,
+622, L113, doi: 10.1086/429721
+Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009,
+ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+618, 569, doi: 10.1086/426067
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+L18, doi: 10.3847/2041-8213/ac574b
+Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
+doi: 10.1088/0004-637X/795/2/102
+Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
+L35, doi: 10.3847/2041-8213/ab4fed
+IMBH Formation in Galactic Nuclei 13
+Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
+888, L8, doi: 10.3847/2041-8213/ab5e3b
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+Curd, B. 2022, MNRAS, 511, 3795,
+doi: 10.1093/mnras/stac285
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005,
+ApJ, 628, 368, doi: 10.1086/430728
+O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
+2127, doi: 10.1111/j.1365-2966.2009.14653.x
+O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
+& O’Shaughnessy, R. 2006, ApJ, 637, 937,
+doi: 10.1086/498446
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+D. 2010, ApJ, 722, 642,
+doi: 10.1088/0004-637X/722/1/642
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+doi: 10.1086/319042
+Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643,
+1011, doi: 10.1086/503273
+Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
+Stephen R., J. 2016, ApJ, 823, 113,
+doi: 10.3847/0004-637X/823/2/113
+Peters, P. C., & Mathews, J. 1963a, Physical Review, 131,
+435, doi: 10.1103/PhysRev.131.435
+—. 1963b, Physical Review, 131, 435,
+doi: 10.1103/PhysRev.131.435
+Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J.,
+& McMillan, S. L. W. 2004, Nature, 428, 724,
+doi: 10.1038/nature02448
+Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL,
+528, L17, doi: 10.1086/312422
+—. 2002, ApJ, 576, 899, doi: 10.1086/341798
+Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
+doi: 10.1088/0004-637X/780/2/187
+Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
+A56, doi: 10.1051/0004-6361/202037710
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+MNRAS, doi: 10.1093/mnras/stac231
+Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
+Rasio, F. A. 2018, PhRvL, 120, 151101,
+doi: 10.1103/PhysRevLett.120.151101
+Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
+PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029
+Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019,
+Phys. Rev. D, 100, 043027,
+doi: 10.1103/PhysRevD.100.043027
+Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
+113, doi: 10.3847/1538-4357/abc557
+Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
+& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
+https://arxiv.org/abs/2009.01213
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
+Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
+100, 043009, doi: 10.1103/PhysRevD.100.043009
+Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
+doi: 10.3847/1538-4357/ab43df
+Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K.
+2002, The Astrophysical Journal, 571, 30,
+doi: 10.1086/339917
+Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
+doi: 10.1086/519309
+Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
+609, A27, doi: 10.1051/0004-6361/201730452
+Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
+doi: 10.1086/156521
+Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
+MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367
+Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine,
+K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700
+Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631,
+doi: 10.1086/173190
+Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739,
+doi: 10.1093/mnras/stx1576
+—. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576
+Spitzer, L. 1987, Dynamical evolution of globular clusters
+Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
+e-prints. https://arxiv.org/abs/1603.02709
+—. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
+Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017,
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+doi: 10.1093/mnras/stv2281
+The LIGO Scientific Collaboration, the Virgo
+Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
+arXiv:2009.01075. https://arxiv.org/abs/2009.01075
+—. 2020b, arXiv e-prints, arXiv:2009.01190.
+https://arxiv.org/abs/2009.01190
+Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A.
+2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31
+Valiante, R., Schneider, R., Volonteri, M., & Omukai, K.
+2016, Monthly Notices of the Royal Astronomical
+Society, 457, 3356, doi: 10.1093/mnras/stw225
+Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
+G. N. 2021, MNRAS, 504, 146,
 doi: 10.1093/mnras/stab842
-IMBH Formation in Galactic Nuclei 11
-Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
-Breivik, K. 2021, ApJ, 917, 76,
-doi: 10.3847/1538-4357/ac088d
-Woosley, S. E. 2017, ApJ, 836, 244,
-doi: 10.3847/1538-4357/836/2/244
-Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
-2014, Monthly Notices of the Royal Astronomical
-Society, 440, 1263, doi: 10.1093/mnras/stu351
-Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints,
-arXiv:2011.04653. https://arxiv.org/abs/2011.04653
-Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26,
-doi: 10.3847/1538-4365/aab14f
+ Rose et al.
+Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
+Breivik, K. 2021, ApJ, 917, 76,
+doi: 10.3847/1538-4357/ac088d
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+doi: 10.1086/379767
+Woosley, S. E. 2017, ApJ, 836, 244,
+doi: 10.3847/1538-4357/836/2/244
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+doi: 10.1046/j.1365-8711.2002.05532.x
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+doi: 10.1088/0004-637X/761/2/129
+Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
+2014, Monthly Notices of the Royal Astronomical
+Society, 440, 1263, doi: 10.1093/mnras/stu351
+Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints,
+arXiv:2011.04653. https://arxiv.org/abs/2011.04653
+Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26,
+doi: 10.3847/1538-4365/aab14f
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00029.txt b/read/results/pdfium/2201.00029.txt
index 48f509c..8803826 100644
--- a/read/results/pdfium/2201.00029.txt
+++ b/read/results/pdfium/2201.00029.txt
@@ -1,273 +1,273 @@
- 1
-Exploring new techniques for analyzing variability in white dwarf KIC 8626021
-Thomas Huckans, Peter Stine
-Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St., 
+
+Exploring new techniques for analyzing variability in white dwarf KIC 8626021
+Thomas Huckans, Peter Stine
+Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St.,
 Bloomsburg, PA 17815
-2
-Abstract
-As is common with the collection of astronomical data, signals are frequently dominated 
-by noise. However, when performing FTs of light curves, re-binning data can improve the signal￾to-noise ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope, 
-we sequentially re-binned data three times to investigate the SNR improvement of lower frequency 
-(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8
-µHz greatly improved through this process, and we postulate that this frequency is linked to the 
-rotation of KIC 8626021.
-Introduction
-First detected in 1862, white dwarfs long posed a mystery for early observers. When the 
-companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and 
-densities baffled astronomers. Lacking full understanding of atomic structures and the energy 
-states of electrons, these early researchers believed white dwarfs too dense to exist. However, new 
-discoveries at the turn of the 20th century explained the existence of these stars, and between the 
-world wars white dwarfs were increasingly studied and modeled (Holberg, 2009).
-As stars age, those that lack the mass to become neutron stars and black holes become 
-white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008). They are 
-composed of a core of carbon and oxygen ions that slowly cools over billions of years, and the 
-light emanating from these stars is a result of thermal energy. White dwarf stars are no longer 
-supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate 
-state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two 
-electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from 
-collapsing entirely.
-For many years, accurate detection of light variability in white dwarfs was difficult due to 
-a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made 
-capturing the light of distant stars much more efficient and effective (Basri et al., 2010). Kepler 
-was initially developed with the intention of surveying our region of the Milky Way galaxy in 
-order to find potentially habitable planets. The purpose of the mission was to identify key traits for 
-such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits, 
-and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler 
-observed approximately 1.5 x 105 stars (Johnson, 2018), affording scientists excellent 
-opportunities to research stellar variability. Due to the loss of a second reaction wheel in 2013, 
-NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and 
-astrophysics.
-Utilizing Kepler’s ability to maintain three-dimensional control, NASA proceeded to use 
-the telescope to collect photometry data of certain sections of our galaxy, although the number of 
-targets was significantly reduced. In addition, the K2 mission was designed to be community￾oriented, with the scientific community having an influence on the fields observed and serving as 
-the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was 
-deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of 
-white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations 
+
+Abstract
+As is common with the collection of astronomical data, signals are frequently dominated
+by noise. However, when performing FTs of light curves, re-binning data can improve the signalto-noise ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope,
+we sequentially re-binned data three times to investigate the SNR improvement of lower frequency
+(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8
+µHz greatly improved through this process, and we postulate that this frequency is linked to the
+rotation of KIC 8626021.
+Introduction
+First detected in 1862, white dwarfs long posed a mystery for early observers. When the
+companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and
+densities baffled astronomers. Lacking full understanding of atomic structures and the energy
+states of electrons, these early researchers believed white dwarfs too dense to exist. However, new
+discoveries at the turn of the 20th century explained the existence of these stars, and between the
+world wars white dwarfs were increasingly studied and modeled (Holberg, 2009).
+As stars age, those that lack the mass to become neutron stars and black holes become
+white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008). They are
+composed of a core of carbon and oxygen ions that slowly cools over billions of years, and the
+light emanating from these stars is a result of thermal energy. White dwarf stars are no longer
+supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate
+state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two
+electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from
+collapsing entirely.
+For many years, accurate detection of light variability in white dwarfs was difficult due to
+a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made
+capturing the light of distant stars much more efficient and effective (Basri et al., 2010). Kepler
+was initially developed with the intention of surveying our region of the Milky Way galaxy in
+order to find potentially habitable planets. The purpose of the mission was to identify key traits for
+such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits,
+and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler
+observed approximately 1.5 x 105 stars (Johnson, 2018), affording scientists excellent
+opportunities to research stellar variability. Due to the loss of a second reaction wheel in 2013,
+NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and
+astrophysics.
+Utilizing Kepler’s ability to maintain three-dimensional control, NASA proceeded to use
+the telescope to collect photometry data of certain sections of our galaxy, although the number of
+targets was significantly reduced. In addition, the K2 mission was designed to be communityoriented, with the scientific community having an influence on the fields observed and serving as
+the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was
+deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of
+white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations
 Center (KASOC).
-3
-The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon 
-previous studies, this research investigated novel techniques of analyzing variability in white 
-dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on 
-the star, allowing for the validation of results using our methods. KIC 8626021 has an effective 
-temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research
-has found that this white dwarf is the DBV with the highest known temperature, and its helium 
-layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too 
-noisy to draw many conclusions, other FTs of short-cadence data have been performed to find 
-variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with 
-frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz
-(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777 
-Herculis, although our research focuses on low frequencies using long-cadence data.
-Methods
-All data were downloaded from the KASOC database, and the long-cadence (data 
-sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were 
-analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs
-were performed in Mathematica. The re-binning process consisted of summing adjacent light 
-curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one
-hour, and then repeating this process on the data sample for a total of three times. In addition, a 
-significant detection was defined as being 3 above the mean of the relative flux, and 0 on the 
-graphs below represents this 3 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To 
-find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify 
-improvement in signal strength.
-Results
-Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), 
-with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs 
-of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the 
-same for Q13.
-Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of 
-KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and 
-Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7 
+
+The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon
+previous studies, this research investigated novel techniques of analyzing variability in white
+dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on
+the star, allowing for the validation of results using our methods. KIC 8626021 has an effective
+temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research
+has found that this white dwarf is the DBV with the highest known temperature, and its helium
+layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too
+noisy to draw many conclusions, other FTs of short-cadence data have been performed to find
+variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with
+frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz
+(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777
+Herculis, although our research focuses on low frequencies using long-cadence data.
+Methods
+All data were downloaded from the KASOC database, and the long-cadence (data
+sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were
+analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs
+were performed in Mathematica. The re-binning process consisted of summing adjacent light
+curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one
+hour, and then repeating this process on the data sample for a total of three times. In addition, a
+significant detection was defined as being 3 above the mean of the relative flux, and 0 on the
+graphs below represents this 3 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To
+find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify
+improvement in signal strength.
+Results
+Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13),
+with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs
+of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the
+same for Q13.
+Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of
+KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and
+Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7
 and Q13.
-4
-FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7 
-lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012.
-Both graphs were constructed by plotting corrected flux magnitude (flux corrected for 
-instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating 
+
+FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7
+lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012.
+Both graphs were constructed by plotting corrected flux magnitude (flux corrected for
+instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating
 between points. Q7 had forty-three interpolated points, and Q13 had sixty-six.
-5
-FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of 
-the light curve data. The significant frequencies of 5.886 µHz and 5.889 µHz are circled. The 
-disappearance of the frequency in the last FT is most likely a byproduct of the method, and the 
-spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re￾binning process.
-6
-FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins 
-of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In 
-addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3 and are 
-nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a 
+
+FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of
+the light curve data. The significant frequencies of 5.886 µHz and 5.889 µHz are circled. The
+disappearance of the frequency in the last FT is most likely a byproduct of the method, and the
+spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the rebinning process.
+
+FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins
+of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In
+addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3 and are
+nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a
 starspot (Santos et al., 2017).
-7
-Q7 Significant 
-Data Points
-Light 
-Variability 
-Frequency 
-(µHz)
-Corrected Flux 
-Magnitude 
-(ppm)
-Period (days) Signal-to-Noise
-(dB)
-Q7 First 
-Iteration
-5.886 -1.198 1.966 9.9
-Q7 Re-bin 1 5.886 -1.477 1.966 12.8
-Q7 Re-bin 2 5.889 0.597 1.965 19.2
-TABLE I: The table displays the various frequencies collected from Q7 and the information 
-found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, 
-and therefore was not used in any calculations determining the average period of rotation. The
-values under corrected flux magnitude are relative to our significant frequency cutoff of 3, thus 
-negative numbers are under the cutoff.
-Q13 Significant 
-Data Points
-Light 
-Variability 
-Frequency 
-(µHz)
-Corrected Flux 
-Magnitude 
-(ppm)
-Period (days) Signal-to-Noise 
-(dB)
-Q13 First 
-Iteration
-5.784 1.555 2.001 15.6
-Q13 Re-bin 1 5.784 2.873 2.001 17.7
-Q13 Re-bin 2 5.787 4.938 2.000 22.6
-Q13 Re-bin 3 5.787 6.909 2.000 26.3
-Q13 Re-bin 3 11.641 7.073 0.994 26.4
-Q13 Re-bin 3 16.823 2.299 0.688 24.1
-TABLE II: The table displays the various frequencies collected from Q13 and the information 
-found through calculations to find period and SNR. The last two significant frequencies (11.641
-µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in 
-further detail in the Conclusions section of this paper. The values under corrected flux magnitude
+
+Q7 Significant
+Data Points
+Light
+Variability
+Frequency
+(µHz)
+Corrected Flux
+Magnitude
+(ppm)
+Period (days) Signal-to-Noise
+(dB)
+Q7 First
+Iteration
+5.886 -1.198 1.966 9.9
+Q7 Re-bin 1 5.886 -1.477 1.966 12.8
+Q7 Re-bin 2 5.889 0.597 1.965 19.2
+TABLE I: The table displays the various frequencies collected from Q7 and the information
+found through calculations to find period and SNR. The frequency of 5.464 µHz is not included,
+and therefore was not used in any calculations determining the average period of rotation. The
+values under corrected flux magnitude are relative to our significant frequency cutoff of 3, thus
+negative numbers are under the cutoff.
+Q13 Significant
+Data Points
+Light
+Variability
+Frequency
+(µHz)
+Corrected Flux
+Magnitude
+(ppm)
+Period (days) Signal-to-Noise
+(dB)
+Q13 First
+Iteration
+5.784 1.555 2.001 15.6
+Q13 Re-bin 1 5.784 2.873 2.001 17.7
+Q13 Re-bin 2 5.787 4.938 2.000 22.6
+Q13 Re-bin 3 5.787 6.909 2.000 26.3
+Q13 Re-bin 3 11.641 7.073 0.994 26.4
+Q13 Re-bin 3 16.823 2.299 0.688 24.1
+TABLE II: The table displays the various frequencies collected from Q13 and the information
+found through calculations to find period and SNR. The last two significant frequencies (11.641
+µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in
+further detail in the Conclusions section of this paper. The values under corrected flux magnitude
 are relative to our significant frequency cutoff of 3, thus negative numbers are under the cutoff.
-8
-First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
-0.933 0.933 0.215 0.216
-1.148 1.148 0.575 0.575
-1.364 1.364 0.934 0.935
-1.507 1.507 1.005 1.006
-12.561 12.561 1.149 1.150
-16.581 16.581 1.221 1.222
-1.364 1.366
-1.508 1.509
-1.580 1.582
-1.724 1.725
-1.795 1.797
-5.889 2.085
-6.822 5.392
-9.192 5.464
-9.479 7.476
-11.203 9.489
-12.568 11.215
-14.291 12.581
-16.230 13.084
-16.589 13.443
-13.659
-14.018
-14.809
-15.097
-16.031
-16.463
-16.894
-TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm)
-above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a by￾product of the method, and we calculated for such errors when finding our average.
-9
-First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
-3.094 2.018 2.019 1.951
-5.784 3.094 3.095 2.019
-9.080 5.784 5.787 2.442
-13.519 7.667 7.671 2.759
-15.671 9.080 9.084 3.095
-16.209 11.165 11.641 3.634
-16.411 13.519 13.526 4.374
-15.469 15.477 4.778
-15.671 15.679 4.912
-16.209 15.881 5.047
-16.411 16.419 5.787
-8.479
-9.084
-10.565
-11.641
-13.526
-15.544
-15.881
-16.823
-TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm)
-above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a by￾product of the method, and we calculated for such errors when finding our average.
-Conclusions
-As our research used the long-cadence data from Kepler, much of the high-frequency 
-variability due to gravitational wave pulsations is lost. However, this presents an opportunity to 
-verify our results with the work of research groups that analyzed short-cadence data.With the 
-data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the 
-periods and accounting for a 1 margin of error, our research hypothesizes that the rotation 
-period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation 
-period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et 
-al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and 
-these periods indicate that the more precise significant period identified through our re-binning 
-relates to the rotation of the white dwarf.
-Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it
-improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous 
-10
-significant frequency disappears, which becomes increasingly likely after successive re-binning
-processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe 
-that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR
-improvement ranging from 1.1 dB to 1.3 dB. 
-Through the re-binning process, more lines, or significant frequencies, appeared above 
-the 3 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to 
-short-cadence analysis, the re-binning process of long-cadence data can be used to identify 
-significant lower frequencies in white dwarfs. The methods we used are also simple and 
-replicable, which allows even those with less experience to quickly analyze the large amounts of 
-data being collected by orbiting telescopes, such as the currently active TESS (Transiting 
-Exoplanet Survey Satellite) telescope.
-The presence of possible harmonics in the third re-bin of Q13 also indicates the possible 
-presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These 
-spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, 
-the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and 
-contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously 
+
+First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
+0.933 0.933 0.215 0.216
+1.148 1.148 0.575 0.575
+1.364 1.364 0.934 0.935
+1.507 1.507 1.005 1.006
+12.561 12.561 1.149 1.150
+16.581 16.581 1.221 1.222
+1.364 1.366
+1.508 1.509
+1.580 1.582
+1.724 1.725
+1.795 1.797
+5.889 2.085
+6.822 5.392
+9.192 5.464
+9.479 7.476
+11.203 9.489
+12.568 11.215
+14.291 12.581
+16.230 13.084
+16.589 13.443
+13.659
+14.018
+14.809
+15.097
+16.031
+16.463
+16.894
+TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm)
+above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a byproduct of the method, and we calculated for such errors when finding our average.
+
+First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
+3.094 2.018 2.019 1.951
+5.784 3.094 3.095 2.019
+9.080 5.784 5.787 2.442
+13.519 7.667 7.671 2.759
+15.671 9.080 9.084 3.095
+16.209 11.165 11.641 3.634
+16.411 13.519 13.526 4.374
+15.469 15.477 4.778
+15.671 15.679 4.912
+16.209 15.881 5.047
+16.411 16.419 5.787
+8.479
+9.084
+10.565
+11.641
+13.526
+15.544
+15.881
+16.823
+TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm)
+above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a byproduct of the method, and we calculated for such errors when finding our average.
+Conclusions
+As our research used the long-cadence data from Kepler, much of the high-frequency
+variability due to gravitational wave pulsations is lost. However, this presents an opportunity to
+verify our results with the work of research groups that analyzed short-cadence data.With the
+data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the
+periods and accounting for a 1 margin of error, our research hypothesizes that the rotation
+period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation
+period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et
+al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and
+these periods indicate that the more precise significant period identified through our re-binning
+relates to the rotation of the white dwarf.
+Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it
+improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous
+
+significant frequency disappears, which becomes increasingly likely after successive re-binning
+processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe
+that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR
+improvement ranging from 1.1 dB to 1.3 dB.
+Through the re-binning process, more lines, or significant frequencies, appeared above
+the 3 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to
+short-cadence analysis, the re-binning process of long-cadence data can be used to identify
+significant lower frequencies in white dwarfs. The methods we used are also simple and
+replicable, which allows even those with less experience to quickly analyze the large amounts of
+data being collected by orbiting telescopes, such as the currently active TESS (Transiting
+Exoplanet Survey Satellite) telescope.
+The presence of possible harmonics in the third re-bin of Q13 also indicates the possible
+presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These
+spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence,
+the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and
+contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously
 dominated by noise, may have been discovered.
-11
-Acknowledgments
-We wish to thank Bloomsburg University of Pennsylvania for its continued support of our 
-research.
-This paper includes data collected by the Kepler mission and obtained from the MAST 
-data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is 
-provided by the NASA Science Mission Directorate. STScI is operated by the Association of 
-Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555.
-References
-Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., 
-Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010). 
-PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG 
-stars—a FIRST LOOK. The Astrophysical Journal, 713(2), L155-L159. 
-https://doi.org/10.1088/2041-8205/713/2/L155
-Bischoff-Kim, A., Østensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven-Period 
-asteroseismic fit of KIC 8626021. EPJ Web of Conferences, 101, 06009. 
-https://doi.org/10.1051/epjconf/201510106009
-Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers 
-in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047
-Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal 
-for the History of Astronomy, 40(2), 137-154. 
-https://doi.org/10.1177%2F002182860904000201
-Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S., 
-Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., 
-Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: 
-Characterization and early results. Publications of the Astronomical Society of the Pacific, 
-126(938), 398-408. https://doi.org/10.1086/676406
-Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space 
-Administration. Retrieved September 2, 2021, from 
-https://www.nasa.gov/mission_pages/kepler/overview/index.html
-Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen￾dalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., 
-Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy, 
-G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC 
-performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86. 
-https://dx.doi.org/10.1088/2041-8205/713/2/L79
-Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & 
-Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The 
-Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39 
-Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot 
-signature on the light curve. Astronomy & Astrophysics, 599, A1. 
+
+Acknowledgments
+We wish to thank Bloomsburg University of Pennsylvania for its continued support of our
+research.
+This paper includes data collected by the Kepler mission and obtained from the MAST
+data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is
+provided by the NASA Science Mission Directorate. STScI is operated by the Association of
+Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555.
+References
+Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D.,
+Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010).
+PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG
+stars—a FIRST LOOK. The Astrophysical Journal, 713(2), L155-L159.
+https://doi.org/10.1088/2041-8205/713/2/L155
+Bischoff-Kim, A., Østensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven-Period
+asteroseismic fit of KIC 8626021. EPJ Web of Conferences, 101, 06009.
+https://doi.org/10.1051/epjconf/201510106009
+Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers
+in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047
+Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal
+for the History of Astronomy, 40(2), 137-154.
+https://doi.org/10.1177%2F002182860904000201
+Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S.,
+Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W.,
+Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission:
+Characterization and early results. Publications of the Astronomical Society of the Pacific,
+126(938), 398-408. https://doi.org/10.1086/676406
+Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space
+Administration. Retrieved September 2, 2021, from
+https://www.nasa.gov/mission_pages/kepler/overview/index.html
+Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensendalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C.,
+Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy,
+G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC
+performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86.
+https://dx.doi.org/10.1088/2041-8205/713/2/L79
+Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., &
+Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The
+Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39
+Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot
+signature on the light curve. Astronomy & Astrophysics, 599, A1.
 https://doi.org/10.1051/0004-6361/201629923
-12
-Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. 
-Annual Review of Astronomy and Astrophyics, 46(1), 157-199. 
-https://doi.org/10.1146/annurev.astro.46.060407.145250
-Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021).
+
+Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology.
+Annual Review of Astronomy and Astrophyics, 46(1), 157-199.
+https://doi.org/10.1146/annurev.astro.46.060407.145250
+Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021).
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00037.txt b/read/results/pdfium/2201.00037.txt
index 7dea8ef..1a6d063 100644
--- a/read/results/pdfium/2201.00037.txt
+++ b/read/results/pdfium/2201.00037.txt
@@ -1,2075 +1,2016 @@
-Confidential manuscript submitted to JGR-Planets
-The influence of a fluid core and a solid inner core on the
-Cassini sate of Mercury
-Mathieu Dumberry 1
-1Department of Physics, University of Alberta, Edmonton, Alberta, Canada.
-Key Points:
-• The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid
-planet by no more than 0.01 arcmin.
-• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
-cores into a common precession motion.
-• The larger the inner core is, the more the obliquity of the polar moment of inertia ap￾proaches that expected for a rigid planet.
-Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca
-–1–
-arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 2021
-Confidential manuscript submitted to JGR-Planets
-Abstract
-We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core
-and a mantle. Our model includes inertial and gravitational torques between interior regions,
-and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show
-that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of
-the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The man￾tle obliquity decreases with increasing inner core size, but the change between a large and no
-inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the in￾ner core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and
-solid cores into a common precession motion. Because of the strong gravitational coupling be￾tween the mantle and inner core, the larger the inner core is, the more this co-precessing core
-is brought into an alignment with the mantle, and the more the obliquity of the polar moment
-of inertia approaches that expected for a rigid planet. The misalignment between the polar mo￾ment of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 ar￾cmin. Our results imply that the measured obliquities of the mantle spin axis and polar mo￾ment of inertia should coincide at the present-day level of measurement errors, and cannot be
-distinguished from the obliquity of a rigid planet.
-Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precess￾ing about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the
-same rate, an equilibrium known as a Cassini state. The angle between the spin axis and the
-normal to the orbital plane is known as the obliquity and remains fixed. Observations have con￾firmed that Mercury’s obliquity matches, within measurement errors, the theoretical predic￾tion based on an entirely rigid planet. However, we know that Mercury has a large metallic core
-which is liquid, although the central part may be solid. In this work, we investigate how the
-presence of a fluid and solid core affect the Cassini state of Mercury. We show that the inter￾nal coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliq￾uity of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an
-offset smaller than the present-day error in measurements. We also show that the larger the
-solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body.
-1 Introduction
-Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spin￾symmetry axis are both coplanar with, and precess about, the normal to the Laplace plane [Colombo,
-1966; Peale, 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but
-its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot,
-2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is
-reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513
-yr with an inclination angle of I = 8.5330◦ between the orbit and Laplace plane normals [Ba￾land et al., 2017]. Measurements of the obliquity εm, defined as the angle of misalignment be￾tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques,
-including ground based radar observations [Margot et al., 2007, 2012], and stereo digital ter￾rain images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Mar￾got, 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvi￾ronment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors,
-all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals
-and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042 ± 0.08
+Confidential manuscript submitted to JGR-Planets
+The influence of a fluid core and a solid inner core on the
+Cassini sate of Mercury
+Mathieu Dumberry 1
+1Department of Physics, University of Alberta, Edmonton, Alberta, Canada.
+Key Points:
+• The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid
+planet by no more than 0.01 arcmin.
+• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
+cores into a common precession motion.
+• The larger the inner core is, the more the obliquity of the polar moment of inertia approaches that expected for a rigid planet.
+Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca
+–1–
+arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 202
+Confidential manuscript submitted to JGR-Planets
+Abstract
+We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core
+and a mantle. Our model includes inertial and gravitational torques between interior regions,
+and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show
+that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of
+the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The mantle obliquity decreases with increasing inner core size, but the change between a large and no
+inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the inner core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and
+solid cores into a common precession motion. Because of the strong gravitational coupling between the mantle and inner core, the larger the inner core is, the more this co-precessing core
+is brought into an alignment with the mantle, and the more the obliquity of the polar moment
+of inertia approaches that expected for a rigid planet. The misalignment between the polar moment of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 arcmin. Our results imply that the measured obliquities of the mantle spin axis and polar moment of inertia should coincide at the present-day level of measurement errors, and cannot be
+distinguished from the obliquity of a rigid planet.
+Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precessing about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the
+same rate, an equilibrium known as a Cassini state. The angle between the spin axis and the
+normal to the orbital plane is known as the obliquity and remains fixed. Observations have confirmed that Mercury’s obliquity matches, within measurement errors, the theoretical prediction based on an entirely rigid planet. However, we know that Mercury has a large metallic core
+which is liquid, although the central part may be solid. In this work, we investigate how the
+presence of a fluid and solid core affect the Cassini state of Mercury. We show that the internal coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliquity of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an
+offset smaller than the present-day error in measurements. We also show that the larger the
+solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body.
+1 Introduction
+Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spinsymmetry axis are both coplanar with, and precess about, the normal to the Laplace plane [Colombo,
+1966; Peale, 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but
+its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot,
+2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is
+reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513
+yr with an inclination angle of I = 8.5330◦ between the orbit and Laplace plane normals [Baland et al., 2017]. Measurements of the obliquity εm, defined as the angle of misalignment between the spin-symmetry axis and the orbit normal, have been obtained by different techniques,
+including ground based radar observations [Margot et al., 2007, 2012], and stereo digital terrain images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Margot, 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvironment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors,
+all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals
+and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042 ± 0.08
 –2–
-Confidential manuscript submitted to JGR-Planets
-I
-descending
-node of orbit
-Ωp
-ê3
-I
-I
-ê3
-εm L
-I ê3
-p
-ascending
-node of orbit
-descending
-node of equator
-equatorial 
-plane
-orbital
-direction
-S
-ê3
-I ê3
-L
-M
-εm
-orbital plane
-Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded
-rectangle) and the Cassini state of Mercury. The normal to the orbital plane (eˆ
-I
-3) is offset from the nor￾mal to the Laplace plane (eˆ
-L
-3 ) by an angle I = 8.5330◦
-. The symmetry axis of the mantle eˆ
-p
-3
-is offset
-from eˆ
-I
-3 by εm ≈ 2 arcmin. eˆ
-I
-3 and eˆ
-p
-3 are coplanar with, and precess about, eˆ
-L
-3 in a retrograde direction
-at frequency Ωp = 2π/325, 513 yr−1
-. The blue (orange) shaded region indicates the portion of the orbit
-when Mercury is above (below) the Laplace plane. Angles are not drawn to scale.
-arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Gen￾ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1.
-The prediction of Mercury’s obliquity is based on the assumption that the whole planet
-precesses as a single body. However, we know that Mercury has a fluid core from two main lines
-of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by
-dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion
-in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the ob￾served amplitude of the 88-day longitudinal libration is approximately twice as large as that
-expected if Mercury were librating as a rigid body [Margot et al., 2007, 2012; Stark et al., 2015a].
-This indicates that it is only the mantle that librates, and that the outer part of the core is fluid.
-These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that
-its outermost part must be. A solid inner core may have nucleated at the centre although its
-size is not well constrained. Inner core growth leads to planetary contraction, and the inferred
-radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an
-approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner
-core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history.
+Confidential manuscript submitted to JGR-Planets
+I
+descending
+node of orbit
+Ωp
+ê3
+I
+I
+ê3
+εm L
+I ê3
+p
+ascending
+node of orbit
+descending
+node of equator
+equatorial
+plane
+orbital
+direction
+S
+ê3
+I ê3
+L
+M
+εm
+orbital plane
+Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded
+rectangle) and the Cassini state of Mercury. The normal to the orbital plane (eˆ
+I
+3) is offset from the normal to the Laplace plane (eˆ
+L
+3 ) by an angle I = 8.5330◦
+. The symmetry axis of the mantle eˆ
+p
+3
+is offset
+from eˆ
+I
+3 by εm ≈ 2 arcmin. eˆ
+I
+3 and eˆ
+p
+3 are coplanar with, and precess about, eˆ
+L
+3 in a retrograde direction
+at frequency Ωp = 2π/325, 513 yr−1. The blue (orange) shaded region indicates the portion of the orbit
+when Mercury is above (below) the Laplace plane. Angles are not drawn to scale.
+arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Genova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1.
+The prediction of Mercury’s obliquity is based on the assumption that the whole planet
+precesses as a single body. However, we know that Mercury has a fluid core from two main lines
+of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by
+dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion
+in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the observed amplitude of the 88-day longitudinal libration is approximately twice as large as that
+expected if Mercury were librating as a rigid body [Margot et al., 2007, 2012; Stark et al., 2015a].
+This indicates that it is only the mantle that librates, and that the outer part of the core is fluid.
+These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that
+its outermost part must be. A solid inner core may have nucleated at the centre although its
+size is not well constrained. Inner core growth leads to planetary contraction, and the inferred
+radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an
+approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner
+core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history.
 –3–
-Confidential manuscript submitted to JGR-Planets
-With a fluid core, and possibly a solid inner core, the observed obliquity εm reflects the
-orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dis￾sipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spin￾symmetry axis of the inner core should both also precess about the normal to the Laplace plane
-in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the
-plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek, 2016], although
-their obliquity angles may be different than εm. Whether the spin axis of the fluid core is brought
-into an alignment with the mantle obliquity depends primarily on the pressure torque (also re￾ferred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the
-misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e, 1910]. The more flat￾tened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained
-into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s
-CMB is not known. But if one assumes that the topography of the CMB coincides with an equipo￾tential surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the up￾per mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core
-into a close alignment with the mantle [Peale et al., 2014]. The spin axis of the fluid core is not
-expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close
-that the resulting mantle obliquity does not differ much from that of a single body planet. Fur￾thermore, viscous and electromagnetic (EM) coupling at the CMB can further restrict the mis￾alignment between the mantle and core [Peale et al., 2014].
-If an inner core is present, its obliquity angle is determined by the sum of the torques act￾ing on it. This includes the gravitational torque from the Sun acting on its tilted figure, anal￾ogous to the torque applied on the tilted mantle that sets the obliquity εm. In addition, the
-tilt of the inner core also depends on the gravitational torque imposed by the mantle and the
-pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle grav￾itational torque dominates, the inner core tilt is expected to remain closely aligned with the
-mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should in￾stead be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM cou￾pling at the ICB should also enforce a closer alignment between the rotation vectors of the in￾ner core and fluid core.
-It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mer￾cury is inferred [e.g. Peale, 1976; Margot et al., 2018]. Inherent in this calculation is the built￾in assumption that the mantle obliquity does not deviate from that of a rigid planet by a sub￾stantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core
-can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the
-orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the as￾sumption that the observed obliquity reflects the orientation of the whole planet.
-Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis
-does not coincide with the orientation of the polar moment of inertia of the whole planet. This
-can introduce a systematic offset between different types of obliquity measurements. Those based
-on tracking topographic features [Margot et al., 2007, 2012; Stark et al., 2015a] capture the obliq￾uity of the mantle spin axis. While those based on the orientation of the gravity field [Mazarico
-et al., 2014; Verma and Margot, 2016; Genova et al., 2019; Konopliv et al., 2020] are instead
-tied to the orientation of the principal moment of inertia of the whole planet. An offset of the
-obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the
-size of the inner core, even though this is difficult to do at present because the different esti￾mates of the obliquity of the gravity field do not match well with one another.
+Confidential manuscript submitted to JGR-Planets
+With a fluid core, and possibly a solid inner core, the observed obliquity εm reflects the
+orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dissipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spinsymmetry axis of the inner core should both also precess about the normal to the Laplace plane
+in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the
+plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek, 2016], although
+their obliquity angles may be different than εm. Whether the spin axis of the fluid core is brought
+into an alignment with the mantle obliquity depends primarily on the pressure torque (also referred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the
+misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e, 1910]. The more flattened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained
+into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s
+CMB is not known. But if one assumes that the topography of the CMB coincides with an equipotential surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the upper mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core
+into a close alignment with the mantle [Peale et al., 2014]. The spin axis of the fluid core is not
+expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close
+that the resulting mantle obliquity does not differ much from that of a single body planet. Furthermore, viscous and electromagnetic (EM) coupling at the CMB can further restrict the misalignment between the mantle and core [Peale et al., 2014].
+If an inner core is present, its obliquity angle is determined by the sum of the torques acting on it. This includes the gravitational torque from the Sun acting on its tilted figure, analogous to the torque applied on the tilted mantle that sets the obliquity εm. In addition, the
+tilt of the inner core also depends on the gravitational torque imposed by the mantle and the
+pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle gravitational torque dominates, the inner core tilt is expected to remain closely aligned with the
+mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should instead be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM coupling at the ICB should also enforce a closer alignment between the rotation vectors of the inner core and fluid core.
+It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mercury is inferred [e.g. Peale, 1976; Margot et al., 2018]. Inherent in this calculation is the builtin assumption that the mantle obliquity does not deviate from that of a rigid planet by a substantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core
+can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the
+orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the assumption that the observed obliquity reflects the orientation of the whole planet.
+Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis
+does not coincide with the orientation of the polar moment of inertia of the whole planet. This
+can introduce a systematic offset between different types of obliquity measurements. Those based
+on tracking topographic features [Margot et al., 2007, 2012; Stark et al., 2015a] capture the obliquity of the mantle spin axis. While those based on the orientation of the gravity field [Mazarico
+et al., 2014; Verma and Margot, 2016; Genova et al., 2019; Konopliv et al., 2020] are instead
+tied to the orientation of the principal moment of inertia of the whole planet. An offset of the
+obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the
+size of the inner core, even though this is difficult to do at present because the different estimates of the obliquity of the gravity field do not match well with one another.
 –4–
-Confidential manuscript submitted to JGR-Planets
-There is thus a significant interest in properly assessing how the presence of a solid in￾ner core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a
-model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model
-is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumb￾erry and Wieczorek, 2016; Stys and Dumberry, 2018; Organowski and Dumberry, 2020]. The
-specific questions that motivate our study are the following. First, we want to determine how
-large the misaligned obliquities of the fluid core and solid inner core can be and how they de￾pend on model parameters. Second, we want to assess by how much the mantle obliquity may
-differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spin￾symmetry axis of the mantle and gravity field may differ.
-2 Theory
-2.1 The interior structure of Mercury
-Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid
-outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted
-by rs, rf , rm, and R, and their densities by ρs, ρf , ρm, and ρc, respectively. The inner core ra￾dius rs corresponds to the ICB radius, the fluid core radius rf to the CMB radius, and R =
-2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pres￾sure with depth are not negligible in the core of Mercury. However adopting uniform densities
-simplifies the analytical expressions of the model while still capturing the first order rotational
-dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same
-strategy facilitates comparisons between our results.
-We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or
-a density contrast at the ICB), the crustal density ρc and crustal thickness h = R−rm. The
-three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with
-the known mass M and chosen values of the moments of inertia of the whole planet C and that
-of the mantle and crust Cm.
-Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity)
-by i
-, defined as the difference between the mean equatorial and polar radii, divided by the mean
-spherical radius. Likewise, we denote the equatorial flattening by the variable ξi
-, defined as the
-difference between the maximum and minimum equatorial radii, divided by the mean spher￾ical radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equa￾torial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface.
-The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their
-numerical values are given in Table 1. We then assume that the ICB and CMB are both at hy￾drostatic equilibrium with the imposed gravitational potential induced by the flattenings at the
-CrMB and surface. The flattenings at all interior boundaries are specified such that they are
-consistent with the observed degree 2 spherical harmonic coefficients of gravity J2 and C22; their
-numerical values are given in Table 1. Specifically, J2 and C22 are connected to the principal
-moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by
-J2 =
-C − A¯
-MR2
-=
-8π
-15
-1
-MR2
-
-(ρs − ρf )r
-5
-s
-s + (ρf − ρm)r
-5
-f
-f + (ρm − ρc)r
-5
-mm + ρcR
-5
-r
-
-, (1a)
-C22 =
-B − A
-4MR2
-=
-8π
-15
-1
-4MR2
-
-(ρs − ρf )r
-5
-s
-ξs + (ρf − ρm)r
-5
-f
-ξf + (ρm − ρc)r
-5
-mξm + ρcR
-5
-ξr
-
-. (1b)
-where A¯ is the mean equatorial moment of inertia defined below. The same procedure was used
-in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry
-–5–    
-Confidential manuscript submitted to JGR-Planets
-Mercury Parameter Numerical value Reference
-mean motion, n 2π/87.96935 day−1 Stark et al. [2015b]
-rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b]
-orbit precession rate, Ωp 2π/325, 513 yr−1 Baland et al. [2017]
-Poincar´e number, δω = Ωp/Ωo 4.9327 × 10−7
-orbital eccentricity, ec 0.20563 Baland et al. [2017]
-orbital inclination, I 8.5330◦ Baland et al. [2017]
-mean planetary radius, R 2439.360 km Perry et al. [2015]
-mass, M 3.3012 × 1023 kg Genova et al. [2019]
-mean density, ¯ρ 5429.5 kg m−3
-J2 5.0291 × 10−5 Genova et al. [2019]
-C22 8.0415 × 10−6 Genova et al. [2019]
-polar surface flattening, r 6.7436 × 10−4 Perry et al. [2015]
-equatorial surface flattening, ξr 5.1243 × 10−4 Perry et al. [2015]
-Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109
-m3
-/s2
-taken from Genova et al. [2019]. The mean density is calculated from 4π
-3
-ρR¯
-3 = M. The numerical
-values of r and ξr are calculated from r = (¯a − c)/R and ξr = (a − b)/R, where ¯a =
-1
-2
-(a + b) and where
-a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor
-axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J2 and C22 are
-computed from Equation (4) in the Supporting Information of Genova et al. [2019].
-and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon.
-Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog￾raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2
-◦
-and an equatorial offset of ∼ 15◦
-[Perry et al., 2015].
-Once the densities and flattenings of all interior regions are known, we can specify the mo￾ments of inertia of the fluid core (Cf > Bf > Af ) and solid inner core (Cs > Bs > As)
-along with the mean equatorial moments of inertia
-A¯ =
-1
-2
-(A + B), A¯
-f =
-1
-2
-(Af + Bf ), A¯
-s =
-1
-2
-(As + Bs). (2)
-From these, we define the polar (e, ef , es) and equatorial (γ, γs) dynamical ellipticities of the
-whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which
-enter our rotational model,
-e =
-C − A¯
-A¯
-ef =
-Cf − A¯
-f
-A¯
-f
-es =
-Cs − A¯
-s
-A¯
-s
-, (3a)
-γ =
-B − A
-A¯
-γs =
-Bs − As
-A¯
-s
-. (3b)
-We further note that e and γ are connected to J2 and C22 by
-e =
-MR2
-A¯
-J2 , γ =
-4MR2
-A¯
-C22 . (4)
+Confidential manuscript submitted to JGR-Planets
+There is thus a significant interest in properly assessing how the presence of a solid inner core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a
+model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model
+is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumberry and Wieczorek, 2016; Stys and Dumberry, 2018; Organowski and Dumberry, 2020]. The
+specific questions that motivate our study are the following. First, we want to determine how
+large the misaligned obliquities of the fluid core and solid inner core can be and how they depend on model parameters. Second, we want to assess by how much the mantle obliquity may
+differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spinsymmetry axis of the mantle and gravity field may differ.
+2 Theory
+2.1 The interior structure of Mercury
+Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid
+outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted
+by rs, rf , rm, and R, and their densities by ρs, ρf , ρm, and ρc, respectively. The inner core radius rs corresponds to the ICB radius, the fluid core radius rf to the CMB radius, and R =
+2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure with depth are not negligible in the core of Mercury. However adopting uniform densities
+simplifies the analytical expressions of the model while still capturing the first order rotational
+dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same
+strategy facilitates comparisons between our results.
+We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or
+a density contrast at the ICB), the crustal density ρc and crustal thickness h = R−rm. The
+three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with
+the known mass M and chosen values of the moments of inertia of the whole planet C and that
+of the mantle and crust Cm.
+Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity)
+by i, defined as the difference between the mean equatorial and polar radii, divided by the mean
+spherical radius. Likewise, we denote the equatorial flattening by the variable ξi, defined as the
+difference between the maximum and minimum equatorial radii, divided by the mean spherical radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equatorial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface.
+The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their
+numerical values are given in Table 1. We then assume that the ICB and CMB are both at hydrostatic equilibrium with the imposed gravitational potential induced by the flattenings at the
+CrMB and surface. The flattenings at all interior boundaries are specified such that they are
+consistent with the observed degree 2 spherical harmonic coefficients of gravity J2 and C22; their
+numerical values are given in Table 1. Specifically, J2 and C22 are connected to the principal
+moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by
+J2 =
+C − A¯
+MR2
+=
+8π
+15
+1
+MR2
+
+(ρs − ρf )r
+5
+s
+s + (ρf − ρm)r
+5
+f
+f + (ρm − ρc)r
+5
+mm + ρcR
+5
+r
+
+, (1a)
+C22 =
+B − A
+4MR2
+=
+8π
+15
+1
+4MR2
+
+(ρs − ρf )r
+5
+s
+ξs + (ρf − ρm)r
+5
+f
+ξf + (ρm − ρc)r
+5
+mξm + ρcR
+5
+ξr
+
+. (1b)
+where A¯ is the mean equatorial moment of inertia defined below. The same procedure was used
+in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry
+–5–
+Confidential manuscript submitted to JGR-Planets
+Mercury Parameter Numerical value Reference
+mean motion, n 2π/87.96935 day−1 Stark et al. [2015b]
+rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b]
+orbit precession rate, Ωp 2π/325, 513 yr−1 Baland et al. [2017]
+Poincar´e number, δω = Ωp/Ωo 4.9327 × 10−7
+orbital eccentricity, ec 0.20563 Baland et al. [2017]
+orbital inclination, I 8.5330◦ Baland et al. [2017]
+mean planetary radius, R 2439.360 km Perry et al. [2015]
+mass, M 3.3012 × 1023 kg Genova et al. [2019]
+mean density, ¯ρ 5429.5 kg m−3
+J2 5.0291 × 10−5 Genova et al. [2019]
+C22 8.0415 × 10−6 Genova et al. [2019]
+polar surface flattening, r 6.7436 × 10−4 Perry et al. [2015]
+equatorial surface flattening, ξr 5.1243 × 10−4 Perry et al. [2015]
+Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109
+m3/s2taken from Genova et al. [2019]. The mean density is calculated from 4π
+3
+ρR¯
+3 = M. The numerical
+values of r and ξr are calculated from r = (¯a − c)/R and ξr = (a − b)/R, where ¯a =
+1
+2
+(a + b) and where
+a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor
+axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J2 and C22 are
+computed from Equation (4) in the Supporting Information of Genova et al. [2019].
+and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon.
+Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topography and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2
+◦
+and an equatorial offset of ∼ 15◦[Perry et al., 2015].
+Once the densities and flattenings of all interior regions are known, we can specify the moments of inertia of the fluid core (Cf > Bf > Af ) and solid inner core (Cs > Bs > As)
+along with the mean equatorial moments of inertia
+A¯ =
+1
+2
+(A + B), A¯
+f =
+1
+2
+(Af + Bf ), A¯
+s =
+1
+2
+(As + Bs). (2)
+From these, we define the polar (e, ef , es) and equatorial (γ, γs) dynamical ellipticities of the
+whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which
+enter our rotational model,
+e =
+C − A¯
+A¯
+ef =
+Cf − A¯
+f
+A¯
+f
+es =
+Cs − A¯
+s
+A¯
+s
+, (3a)
+γ =
+B − A
+A¯
+γs =
+Bs − As
+A¯
+s
+. (3b)
+We further note that e and γ are connected to J2 and C22 by
+e =
+MR2
+A¯
+J2 , γ =
+4MR2
+A¯
+C22 . (4)
 –6–
-Confidential manuscript submitted to JGR-Planets
-θm
-θn
-θs
-θf
-Ω
-Ωs
-Ωf
-ê3
-p
-ê3
-ê3 s
-I
-I
-εm
-θp
-ê3
-L
-ê1
-p
-ê2
-p
-Cassini plane
-ωΩot
-ê3
-I
-I εm
-ê3
-p
-ê1
-ê2
-p
-ê3
-L
-a) b)
-Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b)
-in a frame attached to the rotating mantle. The orbit normal (eˆ
-I
-3) is tilted by an angle I = 8.533◦
-from
-the Laplace normal (eˆ
-L
-3 ) and the symmetry axis of Mercury’s mantle (eˆ
-p
-3
-) is tilted by an obliquity εm
-with respect to eˆ
-I
-3. Shown in (a) are the orientations of the symmetry axis of the inner core (eˆ
-s
-3), the
-rotation rate vectors of the mantle (Ω), fluid core (Ωf ) and inner core (Ωf ) and angles θp, θn, θm, θf
-and θs in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer
-to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of
-the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section.
-The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial
-mantle axes eˆ
-p
-1 and eˆ
-p
-2 with respect to the Cassini plane. Viewed in the frame attached to the rotating
-mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωp cos I in the longitudinal direc￾tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of
-illustration.
+Confidential manuscript submitted to JGR-Planets
+θm
+θn
+θs
+θf
+Ω
+Ωs
+Ωf
+ê3
+p
+ê3
+ê3 s
+I
+I
+εm
+θp
+ê3
+L
+ê1
+p
+ê2
+p
+Cassini plane
+ωΩot
+ê3
+I
+I εm
+ê3
+p
+ê1
+ê2
+p
+ê3
+L
+a) b)
+Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b)
+in a frame attached to the rotating mantle. The orbit normal (eˆ
+I
+3) is tilted by an angle I = 8.533◦
+from
+the Laplace normal (eˆ
+L
+3 ) and the symmetry axis of Mercury’s mantle (eˆ
+p
+3
+) is tilted by an obliquity εm
+with respect to eˆ
+I
+3. Shown in (a) are the orientations of the symmetry axis of the inner core (eˆ
+s
+3), the
+rotation rate vectors of the mantle (Ω), fluid core (Ωf ) and inner core (Ωf ) and angles θp, θn, θm, θf
+and θs in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer
+to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of
+the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section.
+The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial
+mantle axes eˆ
+p
+1 and eˆ
+p
+2 with respect to the Cassini plane. Viewed in the frame attached to the rotating
+mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωp cos I in the longitudinal direction. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of
+illustration.
 –7–
-Confidential manuscript submitted to JGR-Planets
-2.2 The rotational model
-Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes
-3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is
-87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These de￾fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ωo = 2π/58.64623
-day−1
-, with Ωo = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby
-the orientations of the orbit normal (eˆ
-I
-3
-) and of the mantle symmetry axis (eˆ
-p
-3
-) are both copla￾nar with, and precess about, the normal to the Laplace plane (eˆ
-L
-3
-). The orientation of the Laplace
-plane varies on long timescales, but it can be taken as invariable in inertial space for our present
-purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between eˆ
-L
-3
-and eˆ
-I
-3
-is the orbital inclination I = 8.5330◦
-[Baland et al., 2017], the angle between eˆ
-I
-3
-and eˆ
-p
-3
-is the
-obliquity εm and the angle between eˆ
-L
-3
-and eˆ
-p
-3
-is θp = I + εm. The precession of eˆ
-I
-3
-and eˆ
-p
-3
-about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr−1
-[Baland et al., 2017].
-The mantle and crust are welded together and form a single rotating region which we re￾fer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes
-of the mantle are expected to remain in close alignment, but they do not coincide exactly. We
-define the rotation rate vector of the mantle by Ω, and its misalignment from eˆ
-p
-3 by an angle
-θm. Note that θm  εm and it is often the spin axis of Mercury which is used to define the
-obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, eˆ
-p
-3
-and Ω would
-characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and
-the angles I, εm and θm would completely describe the Cassini state. The presence of a fluid
-outer core and solid inner core require three additional orientation vectors and angles. The sym￾metry axis of the inner core is defined by unit vector eˆ
-s
-3
-and its misalignment from eˆ
-p
-3 by an
-angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, re￾spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an￾gles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close
-alignment, so θn ≈ θs. To be formal in our definition of the different angles of misalignment,
-for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise
-direction.
-At equilibrium in the Cassini state, the three orientation vectors (eˆ
-I
-3
-, eˆ
-p
-3
-, eˆ
-s
-3
-) and three
-rotation vectors (Ω, Ωf , Ωs) are forced to precess about eˆ
-L
-3
-at the same frequency. If we ne￾glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed
-in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ωp. Viewed
-in the frame attached to the mantle rotating at sidereal frequency Ωo, the Cassini plane is ro￾tating in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles
-per Mercury day, is equal to
-ω = −1 − δω cos(θp). (5)
-The factor δω = Ωp/Ωo = 4.933 × 10−7
-is the Poincar´e number, expressing the ratio of the
-forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal
-as seen in the mantle frame is expressed as
-d
-dteˆ
-L
-3 + Ω × eˆ
-L
-3 = 0 , (6)
-or equivalently, by Equation (19e) of Stys and Dumberry [2018],
-ω sin(θp) + sin(θm + θp) = 0 . (7)
+Confidential manuscript submitted to JGR-Planets
+2.2 The rotational model
+Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes
+3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is
+87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These define the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ωo = 2π/58.64623
+day−1, with Ωo = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby
+the orientations of the orbit normal (eˆ
+I
+3
+) and of the mantle symmetry axis (eˆ
+p
+3
+) are both coplanar with, and precess about, the normal to the Laplace plane (eˆ
+L
+3
+). The orientation of the Laplace
+plane varies on long timescales, but it can be taken as invariable in inertial space for our present
+purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between eˆ
+L
+3
+and eˆ
+I
+3
+is the orbital inclination I = 8.5330◦[Baland et al., 2017], the angle between eˆ
+I
+3
+and eˆ
+p
+3
+is the
+obliquity εm and the angle between eˆ
+L
+3
+and eˆ
+p
+3
+is θp = I + εm. The precession of eˆ
+I
+3
+and eˆ
+p
+3
+about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr−1[Baland et al., 2017].
+The mantle and crust are welded together and form a single rotating region which we refer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes
+of the mantle are expected to remain in close alignment, but they do not coincide exactly. We
+define the rotation rate vector of the mantle by Ω, and its misalignment from eˆ
+p
+3 by an angle
+θm. Note that θm  εm and it is often the spin axis of Mercury which is used to define the
+obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, eˆ
+p
+3
+and Ω would
+characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and
+the angles I, εm and θm would completely describe the Cassini state. The presence of a fluid
+outer core and solid inner core require three additional orientation vectors and angles. The symmetry axis of the inner core is defined by unit vector eˆ
+s
+3
+and its misalignment from eˆ
+p
+3 by an
+angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, respectively, and their misalignment from the rotation vector of the mantle Ω are defined by angles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close
+alignment, so θn ≈ θs. To be formal in our definition of the different angles of misalignment,
+for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise
+direction.
+At equilibrium in the Cassini state, the three orientation vectors (eˆ
+I
+3
+, eˆ
+p
+3
+, eˆ
+s
+3
+) and three
+rotation vectors (Ω, Ωf , Ωs) are forced to precess about eˆ
+L
+3
+at the same frequency. If we neglect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed
+in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ωp. Viewed
+in the frame attached to the mantle rotating at sidereal frequency Ωo, the Cassini plane is rotating in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles
+per Mercury day, is equal to
+ω = −1 − δω cos(θp). (5)
+The factor δω = Ωp/Ωo = 4.933 × 10−7is the Poincar´e number, expressing the ratio of the
+forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal
+as seen in the mantle frame is expressed as
+d
+dteˆ
+L
+3 + Ω × eˆ
+L
+3 = 0 , (6)
+or equivalently, by Equation (19e) of Stys and Dumberry [2018],
+ω sin(θp) + sin(θm + θp) = 0 . (7)
 –8–
-Confidential manuscript submitted to JGR-Planets
-This expresses a formal connection between θp and θm which is independent of the interior struc￾ture of Mercury. Using Equation (5) and cos(θm) → 1, this connection can be rewritten as
-sin(θm) = δω sin(θp). (8)
-and thus the relative amplitudes of θm and θp depend of the Poincar´e number δω.
-To investigate Mercury’s response to the gravitational torque from the Sun, we take ad￾vantage of the framework developed in Mathews et al. [1991] to model the forced nutations of
-Earth [see also Mathews et al., 2002; Dehant and Mathews, 2015]. This model takes into ac￾count the pressure torque (also referred to as the inertial torque) that results when the spin axis
-of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and
-ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned
-with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been
-incorporated into the framework [e.g Buffett, 1992; Buffett et al., 2002; Mathews and Guo, 2005;
-Deleplace and Cardin, 2006]. The framework was adapted to model the Cassini state of the Moon
-in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski
-and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury.
-Because the forced precession period is much longer than the rotation and orbital peri￾ods of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean
-torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, point￾ing in the same direction as the vector connecting the Sun to the descending node of Mercury’s
-orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque
-is periodic, rotating at frequency ωΩo. Setting the equatorial directions eˆ
-p
-1
-and eˆ
-p
-2
-to correspond
-to the real and imaginary axes of the complex plane, respectively, we can write the equatorial
-components of this periodic applied torque in a compact form as
-Γ1(t) + iΓ2(t) = −i Γ( ˜ ω) exp[iωΩot] , (9)
-where Γ( ˜ ω) represents the amplitude of the torque at frequency ωΩo. In response to this torque,
-the axes defining all angles (θp, εm, θm, θf , θs, θn) as viewed in the mantle frame are also ro￾tating at frequency ωΩo (see Figure 2). The longitudinal direction of each of these angles at
-a specific time t can then also be written in the equatorial complex plane and is proportional
-to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1 and θm2 of the
-angle θm, as seen in the mantle frame, can be written as
-θm1(t) + iθm2(t) = ˜m exp[iωΩot] , (10a)
-where
-m˜ ≡ m˜ (ω) = Re[ ˜m] + iIm[ ˜m] , (10b)
-is the amplitude at frequency ωΩo. Equivalent definitions apply for all other angles, with the
-connection as follows:
-θm ⇔ m , θ ˜ f ⇔ m˜ f , θs ⇔ m˜ s , θn ⇔ n˜s , θp ⇔ p , ε ˜ m ⇔ ε˜m . (11)
-The notation ˜m, ˜mf , ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991].
-Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re￾sponse to the applied torque as a result of dissipation, for instance from viscous or EM coupling
+Confidential manuscript submitted to JGR-Planets
+This expresses a formal connection between θp and θm which is independent of the interior structure of Mercury. Using Equation (5) and cos(θm) → 1, this connection can be rewritten as
+sin(θm) = δω sin(θp). (8)
+and thus the relative amplitudes of θm and θp depend of the Poincar´e number δω.
+To investigate Mercury’s response to the gravitational torque from the Sun, we take advantage of the framework developed in Mathews et al. [1991] to model the forced nutations of
+Earth [see also Mathews et al., 2002; Dehant and Mathews, 2015]. This model takes into account the pressure torque (also referred to as the inertial torque) that results when the spin axis
+of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and
+ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned
+with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been
+incorporated into the framework [e.g Buffett, 1992; Buffett et al., 2002; Mathews and Guo, 2005;
+Deleplace and Cardin, 2006]. The framework was adapted to model the Cassini state of the Moon
+in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski
+and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury.
+Because the forced precession period is much longer than the rotation and orbital periods of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean
+torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, pointing in the same direction as the vector connecting the Sun to the descending node of Mercury’s
+orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque
+is periodic, rotating at frequency ωΩo. Setting the equatorial directions eˆ
+p
+1
+and eˆ
+p
+2
+to correspond
+to the real and imaginary axes of the complex plane, respectively, we can write the equatorial
+components of this periodic applied torque in a compact form as
+Γ1(t) + iΓ2(t) = −i Γ( ˜ ω) exp[iωΩot] , (9)
+where Γ( ˜ ω) represents the amplitude of the torque at frequency ωΩo. In response to this torque,
+the axes defining all angles (θp, εm, θm, θf , θs, θn) as viewed in the mantle frame are also rotating at frequency ωΩo (see Figure 2). The longitudinal direction of each of these angles at
+a specific time t can then also be written in the equatorial complex plane and is proportional
+to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1 and θm2 of the
+angle θm, as seen in the mantle frame, can be written as
+θm1(t) + iθm2(t) = ˜m exp[iωΩot] , (10a)
+where
+m˜ ≡ m˜ (ω) = Re[ ˜m] + iIm[ ˜m] , (10b)
+is the amplitude at frequency ωΩo. Equivalent definitions apply for all other angles, with the
+connection as follows:
+θm ⇔ m , θ ˜ f ⇔ m˜ f , θs ⇔ m˜ s , θn ⇔ n˜s , θp ⇔ p , ε ˜ m ⇔ ε˜m . (11)
+The notation ˜m, ˜mf , ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991].
+Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase response to the applied torque as a result of dissipation, for instance from viscous or EM coupling
 –9–
-Confidential manuscript submitted to JGR-Planets
-at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely
-real. We concentrate our analysis in this work on the real part of the solutions, which corre￾sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜εm
-corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm,
-though we keep the tilde notation in the presentation of our results to emphasize that it rep￾resents the real part of the solution from our system. Furthermore, since ˜m  ε˜m, we often
-refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
-more customarily described in terms of the latter in the literature.
-The model of Mathews et al. [1991] is developed under the assumption of small angles as
-appropriate for the nutations on Earth. The details on how the equations of the model are de￾rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equa￾tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer￾cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three
-equations are
-(ω − e) ˜m + (1 + ω)
-"
-A¯
-f
-A¯
-m˜ f +
-A¯
-s
-A¯
-m˜ s + α3es
-A¯
-s
-A¯
-n˜s
-#
-=
-1
-iΩ2
-oA¯
-
-Γ˜
-sun
-, (12a)
-ωm˜ + (1 + ω + ef ) ˜mf − ωα1es
-A¯
-s
-A¯
-f
-n˜s =
-1
-iΩ2
-oA¯
-f
-
-− Γ˜
-cmb − Γ˜
-icb
-, (12b)
-(ω − α3es) ˜m + α1esm˜ f + (1 + ω) ˜ms + (1 + ω − α2) esn˜s =
-1
-iΩ2
-oA¯
-s
-
-Γ˜s
-sun + Γ˜
-icb
-, (12c)
-and a fourth equation consists of a kinematic relation that expresses the change in the orien￾tation of the inner core figure as a result of its own rotation,
-m˜ s + ωn˜s = 0 . (12d)
-In these equations, the parameters α1, α2 and α3 involve the density contrast at the ICB
-and are given by
-α1 =
-ρf
-ρs
-, α3 = 1 − α1 , α2 = α1 − α3αg , (13a)
-where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied
-on the inner core,
-αg =
-8πG
-5Ω2
-o
-[ρc(r − m) + ρm(m − f ) + ρf f ] , (13b)
-where G is the gravitational constant.
-Γ˜
-sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
-a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by
-Γ˜
-sun = −iΩ
-2
-oA¯
-
-φmε˜m +
-A¯
-s
-A¯
-α3φsn˜s
-
-, (14)
-where
+Confidential manuscript submitted to JGR-Planets
+at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely
+real. We concentrate our analysis in this work on the real part of the solutions, which corresponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜εm
+corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm,
+though we keep the tilde notation in the presentation of our results to emphasize that it represents the real part of the solution from our system. Furthermore, since ˜m  ε˜m, we often
+refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
+more customarily described in terms of the latter in the literature.
+The model of Mathews et al. [1991] is developed under the assumption of small angles as
+appropriate for the nutations on Earth. The details on how the equations of the model are derived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equations describe, respectively, the time rate of change of the angular momenta of the whole of Mercury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three
+equations are
+(ω − e) ˜m + (1 + ω)
+"
+A¯
+f
+A¯
+m˜ f +
+A¯
+s
+A¯
+m˜ s + α3es
+A¯
+s
+A¯
+n˜s
+#
+=
+1
+iΩ2
+oA¯
+
+Γ˜
+sun
+, (12a)
+ωm˜ + (1 + ω + ef ) ˜mf − ωα1es
+A¯
+s
+A¯
+f
+n˜s =
+1
+iΩ2
+oA¯
+f
+
+− Γ˜
+cmb − Γ˜icb
+, (12b)
+(ω − α3es) ˜m + α1esm˜ f + (1 + ω) ˜ms + (1 + ω − α2) esn˜s =
+1
+iΩ2
+oA¯
+s
+
+Γ˜s
+sun + Γ˜
+icb
+, (12c)
+and a fourth equation consists of a kinematic relation that expresses the change in the orientation of the inner core figure as a result of its own rotation,
+m˜ s + ωn˜s = 0 . (12d)
+In these equations, the parameters α1, α2 and α3 involve the density contrast at the ICB
+and are given by
+α1 =
+ρf
+ρs
+, α3 = 1 − α1 , α2 = α1 − α3αg , (13a)
+where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied
+on the inner core,
+αg =
+8πG
+5Ω2
+o
+[ρc(r − m) + ρm(m − f ) + ρf f ] , (13b)
+where G is the gravitational constant.
+Γ˜
+sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
+a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by
+Γ˜
+sun = −iΩ
+2
+oA¯
+
+φmε˜m +
+A¯
+s
+A¯
+α3φsn˜s
+
+, (14)
+where
 –10–
-Confidential manuscript submitted to JGR-Planets
-φm =
-3
-2
-n
-2
-Ω2
-o
-
-G210 e +
-1
-2
-G201 γ
-
-, (15a)
-φs =
-3
-2
-n
-2
-Ω2
-o
-
-G210 es +
-1
-2
-G201 γs
-
-, (15b)
-and where G210 and G201 are functions of the orbital eccentricity ec,
-G210 =
-1
-(1 − e
-2
-c
-)
-3/2
-, (16a)
-G201 =
-7
-2
-ec −
-123
-16
-e
-3
-c +
-489
-128
-e
-5
-c
-. (16b)
-The gravitational torque by the Sun acting on the inner core alone, Γ˜s
-sun, is
-Γ˜s
-sun = −iΩ
-2
-oA¯
-sα3φs(˜εm + ˜ns). (17)
-Γ˜
-cmb and Γ˜
-icb are the torques from tangential stresses by the fluid core on the mantle at the
-CMB and on the inner core at the ICB, respectively. These torques can be parameterized in
-terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angu￾lar velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002],
-Γ˜
-icb = iΩ
-2
-oA¯
-sKicb( ˜mf − m˜ s), (18a)
-Γ˜
-cmb = iΩ
-2
-oA¯
-fKcmb m˜ f . (18b)
-Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the
-effects of viscous and EM coupling, respectively.
-A fifth equation is required to connect this interior model to the obliquity of the mantle,
-and this is provided by Equation (7). For small angles θm and θp, this gives [e.g. Mathews et al.,
-1991; Dumberry and Wieczorek, 2016; Baland et al., 2019]
-m˜ + (1 + ω)˜p = 0 . (19)
-For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This
-is because θp ≈ 8.567◦ whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the
-condition of small angles assumed in our framework. Furthermore, the external torques act￾ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜εm. Writ￾ten in terms of ˜εm, and with the approximation of ˜εm  1 and ˜m  1, Equation (7) becomes
-m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (20)
-Likewise, the frequency ω from Equation (5) can be written simply in terms of I,
-ω = −1 − δω cos I . (21)
-The set of four Equations (12) with the addition of Equation (20) form a linear system
-of equations for the five rotational variables ˜m, ˜mf , ˜ms, ˜ns and ˜εm. It captures the response
-of Mercury, in the frequency domain, when subject to a periodic solar torque applied at fre￾quency ω. The system can be written in a matrix form as
+Confidential manuscript submitted to JGR-Planets
+φm =
+3
+2
+n
+2
+Ω2
+o
+
+G210 e +
+1
+2
+G201 γ
+
+, (15a)
+φs =
+3
+2
+n
+2
+Ω2
+o
+
+G210 es +
+1
+2
+G201 γs
+
+, (15b)
+and where G210 and G201 are functions of the orbital eccentricity ec,
+G210 =
+1
+(1 − e
+2
+c
+)
+3/2
+, (16a)
+G201 =
+7
+2
+ec −
+123
+16
+e
+3
+c +
+489
+128
+e
+5
+c
+. (16b)
+The gravitational torque by the Sun acting on the inner core alone, Γ˜s
+sun, is
+Γ˜s
+sun = −iΩ
+2
+oA¯
+sα3φs(˜εm + ˜ns). (17)
+Γ˜
+cmb and Γ˜icb are the torques from tangential stresses by the fluid core on the mantle at the
+CMB and on the inner core at the ICB, respectively. These torques can be parameterized in
+terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angular velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002],
+Γ˜
+icb = iΩ
+2
+oA¯
+sKicb( ˜mf − m˜ s), (18a)
+Γ˜
+cmb = iΩ
+2
+oA¯
+fKcmb m˜ f . (18b)
+Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the
+effects of viscous and EM coupling, respectively.
+A fifth equation is required to connect this interior model to the obliquity of the mantle,
+and this is provided by Equation (7). For small angles θm and θp, this gives [e.g. Mathews et al.,
+1991; Dumberry and Wieczorek, 2016; Baland et al., 2019]
+m˜ + (1 + ω)˜p = 0 . (19)
+For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This
+is because θp ≈ 8.567◦ whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the
+condition of small angles assumed in our framework. Furthermore, the external torques acting on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜εm. Written in terms of ˜εm, and with the approximation of ˜εm  1 and ˜m  1, Equation (7) becomes
+m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (20)
+Likewise, the frequency ω from Equation (5) can be written simply in terms of I,
+ω = −1 − δω cos I . (21)
+The set of four Equations (12) with the addition of Equation (20) form a linear system
+of equations for the five rotational variables ˜m, ˜mf , ˜ms, ˜ns and ˜εm. It captures the response
+of Mercury, in the frequency domain, when subject to a periodic solar torque applied at frequency ω. The system can be written in a matrix form as
 –11–
-Confidential manuscript submitted to JGR-Planets
-M · x = y , (22a)
-where the solution (x) and forcing (y) vectors are
-x
-T = [ ˜m, m˜ f , m˜ s, n˜s, ε˜m] , (22b)
-y
-T = [0, 0, 0, 0, −(1 + ω) tan I] , (22c)
-and the elements of matrix M are
-M =
-
-
-
-
-
-
-
-ω − e (1 + ω)
-A¯f
-A¯
-(1 + ω)
-A¯s
-A¯
-A¯s
-A¯ α3
-￾
-(1 + ω)es + φs
-
-φm
-ω 1 + ω + ef + Kcmb +
-A¯s
-A¯f
-Kicb −
-A¯s
-A¯f
-Kicb −ωesα1
-A¯s
-A¯f
-0
-ω − α3es α1es − Kicb 1 + ω + Kicb (1 + ω − α2)es + α3φs α3φs
-0 0 1 ω 0
-1 0 0 0 (1 + ω)
-
-
-
-
-
-
-
-.
-(22d)
-Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three
-modes have periods which, when seen in inertial space, are typically in the range of a few hun￾dred to a few thousand years. The first is the free axial precession of Mercury maintained by
-the solar torque acting on its elliptical figure [e.g. Peale, 2005]. The second is the free core nu￾tation (FCN), which is the free precession of the spin axis of the fluid core about the symme￾try axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN),
-a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al.,
-1991].
-A few remarks on our model are important to point out before we proceed further. First,
-although we have retained the triaxial shape of Mercury in the expression of the solar torque,
-we treat its angular momentum response as if it were an axially symmetric body. This is con￾venient as the two equatorial angular momentum equations for each region can be combined
-into a single equation. To first order, the frequency of the free precession of Mercury is not largely
-altered by triaxiality [e.g. Peale, 2005]. Baland et al. [2019] showed that the frequencies of the
-FCN and FICN for a triaxial planetary body may be slightly different than those for an axi￾ally symmetric body, but not by large factor. As the response of Mercury to the solar torque
-is largely determined by the resonant amplification due to the presence of these three modes,
-our model should capture correctly the first order Cassini state of Mercury. Considering the
-triaxial shape of Mercury may alter the numerical results, but not our general conclusions.
-Second, our modelling approach is different than in the studies of Peale et al. [2014] and
-Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are de￾veloped and must then be integrated in time. The equilibrium Cassini state is the quasi-steady
-state that remains after transient effects associated with the initial conditions have decayed away.
-An advantage of these models compared to ours is that the complete triaxial dynamics of Mer￾cury, including its longitudinal librations, are retained. However, the numerical integration can
-be lengthy if dissipation is weak, which restricts the number of possible interior models of Mer￾cury that can be tested. In contrast, our model is a simple linear system in the frequency do￾main, focused on one specific frequency: the forced precession associated with the Cassini state.
-Solutions are straightforward to obtain for a given interior model, and this allows us to cover
-a larger span of the parameter space. One drawback, however, is that our model does not cap￾ture time-dependent variations at any other frequencies, including the precession of the peri￾center of Mercury’s orbit about the Sun.
-–12 
-Confidential manuscript submitted to JGR-Planets
-2.3 Analytical solutions and limiting cases
-2.3.1 The Cassini state of a single-body, rigid Mercury
-For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa￾tions (12a) and (20),
-(ω − e) ˜m + φm ε˜m = 0 , (23a)
-m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (23b)
-Using Equation (21), δω  1, and the approximation A¯(1 +e+δω cos I) = C +Aδω ¯ cos I ≈
-C, these can be written as
-Cm˜ = Aφ¯ m ε˜m , (24a)
-m˜ = δω￾
-sin I + cos I ε˜m
-
-. (24b)
-Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330◦
-, δω =
-4.9327×10−7 and taking ˜εm = 2.04 arcmin, this gives ˜m = 2.52×10−4 arcmin, much smaller
-than ˜εm: the offset of the rotation axis of the mantle with respect to its symmetry axis is very
-small. Substituting Equation (24b) in Equation (24a) gives
-CΩp
-￾
-sin I + cos I ε˜m
-
-= A¯Ωoφmε˜m , (25)
-and isolating for ˜εm,
-ε˜m =
-CΩp sin I
-−CΩp cos I + A¯Ωoφm
-. (26)
-Upon using Equations (4), (15a), and Ωo =
-3
-2
-n, we can write
-ε˜m =
-CΩp sin I
-−CΩp cos I + nMR2 (G210J2 + 2G201C22)
-. (27)
-This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1
-[see for instance Equation (1) of Baland et al., 2017, where their definition of Ω is equal to ˙ −Ωp].
-Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of
-Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo￾ment of inertia Cˆ,
-Cˆ =
-C
-MR2
-=
-n
-Ωp
-G210J2 + 2G201C22
-cos I + sin I/ε˜m
-. (28)
-which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation
-that a measurement of the obliquity gives a constraint on Cˆ.
-Two free modes of precession are found by setting y = 0 in Equation (23). One mode cor￾responds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession
-of the rotation axis about the symmetry axis. The second mode is the free retrograde axial pre￾cession of Mercury. As seen in the inertial frame, its frequency is given by
-–1  
-Confidential manuscript submitted to JGR-Planets
-ωfp = n
-MR2
-C
-
-G210J2 + 2G201C22
-, (29)
-which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com￾ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid￾body precession and hence C was replaced by Cm. Using C = 0.346 · MR2
-[Margot et al.,
-2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free preces￾sion period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take
-Cm = 0.431·C = 0.431·0.346·MR2
-[Margot et al., 2012], we obtain Tfp = 2π/ωfp = 560 yr.
-These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical,
-the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid
-core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The
-true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value,
-the free precession period is much shorter than the forcing period of 325 kyr. Using Equation
-(29), Equation (27) can be written as [e.g. Baland et al., 2017]
-ε˜m =
-Ωp sin I
-−Ωp cos I + ωfp
-. (30)
-The obliquity of Mercury is thus determined by how the forcing frequency Ωp compares with
-the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassini state 1 [Peale,
-1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant
-amplification if Ωp ≈ ωfp. Since ωfp  Ωp, resonant amplification is minimal and the re￾sulting obliquity, ˜εm ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5
-◦
-.
-2.3.2 The misalignment of the fluid and solid cores
-With ω = −1 − δω cos I and δω  1, Equation (12d) gives ˜ns ≈ m˜ s; as for the mantle,
-the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state.
-The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure,
-so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa￾tion (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes
-CΩp
-￾
-sin I + cos I ε˜m
-
-+ (A¯
-f cos I Ωp) ˜mf + A¯
-s(cos I Ωp − Ωoα3φs)˜ns = A¯Ωoφmε˜m . (31)
-This latter equation shows how the misaligned inner core and fluid core can lead to a modifi￾cation of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by
-n˜s ≈
-Ωp
-κλs
-
-1 +
-Ωo(Kicb − α1es)
-λf
-
-￾
-sin I + cos I ε˜m
-
-−
-Ωoα3φs
-κλs
-ε˜m , (32a)
-m˜ f ≈
-Ωp
-λf
-￾
-sin I + cos I ε˜m
-
-+
-Ωo
-λf
-A¯
-s
-A¯
-f
-￾
-Kicb − α1es
-
-n˜s , (32b)
-where
-κ = 1 −
-A¯
-s
-A¯
-f
-Ω
-2
-o
-￾
-Kicb − α1es
-2
-λs λf
-, (33a)
-λf = ¯σf − Ωp cos I , (33b)
-λs = ¯σs − Ωp cos I , (33c)     
-Confidential manuscript submitted to JGR-Planets
-and where we have introduced the frequencies
-σ¯f = Ωo
-
-ef + Kcmb +
-A¯
-s
-A¯
-f
-Kicb
-, (33d)
-σ¯s = Ωo
-
-esα3αg − esα1 + α3φs + Kicb
-. (33e)
-These solutions are good approximations for all the results that we present in section 3. For
-an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they pro￾vide useful predictions of ˜ns and ˜mf .
-In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σs 
-Ωp and ¯σf  Ωp, so that ˜ns → 0, ˜mf → 0 and Equation (31) reverts back to Equation (25)
-for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and
-mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM cou￾pling, Kcmb = Kicb = 0), then
-φs = 0 , κ = 1 , λf = λs = −Ωp cos I , m˜ f = ˜ns = −(tan I + ˜εm). (34)
-Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm =
-C − A¯
-f − A¯
-s, we obtain
-Cm Ωp
-￾
-sin I + cos I ε˜m
-
-= A¯Ωoφmε˜m . (35)
-which describes, as expected, a forced precession of the mantle alone. If this was the case for
-Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈ 0.88 arcmin, substantially smaller
-than the observed obliquity of ˜εm ≈ 2 arcmin.
-If ¯σf ≈ Ωp (and thus λf → 0) and/or ¯σs ≈ Ωp (and thus λs → 0) resonant amplifica￾tion leads to large amplitudes for ˜mf , ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and
-σ¯s are closely related to the FCN and FICN frequencies ωf cn and ωf icn, respectively. Hence,
-just as a large mantle obliquity can result from resonant amplification when the forcing frequency
-approaches the free precession frequency, a large mantle obliquity can likewise result from res￾onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These
-frequencies depend on the interior density structure and are not known. However, we will show
-that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of
-a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex￾pect an important amplification effect. Furthermore, since ωf cn, ωf icn  Ωp, then ¯σf  Ωp
-and ¯σs  Ωp, and we are in the strong coupling limit. The mantle obliquity should be close
-to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜mf and
-n˜s should be of the order of ˜εm or smaller. This further justifies the assumption of small an￾gles that we have adopted.
-3 Results
-3.1 Geodetic constraints and interior density structure
-All our interior models are constrained to match the mass M of Mercury and specific choices
-of Cˆ = C/MR2 and Cm/C. The choice of Cˆ is determined from Equation (28). For the pa￾rameters listed in Table 1, and an observed obliquity of εm = 2.04 arcmin [Margot et al., 2012],
-this gives Cˆ = C/MR2 = 0.3455 and all our interior models are consistent with this choice.
-Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are
-–15 
-Confidential manuscript submitted to JGR-Planets
-perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in es￾timating Cˆ from Equation (28), or conversely in predicting εm based on a given choice for Cˆ.
-Part of the objective of our study is to estimate how large this error is. The ratio Cm/C is ob￾tained from the amplitude of the 88-day longitudinal mantle libration φo, which is given by
-φo = 6 · f(ec)C22
-MR2
-C
-C
-Cm
-1
-1 + ζ
-, (36)
-where
-f(ec) = 1 − 11e
-2
-c +
-959
-48
-e
-4
-c
-, (37)
-and where ζ is a correction that takes into account the entrainment of the inner core in the li￾bration [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini, 2015]; this cor￾rection is small and, to simplify, we neglect it here. Taking the observed libration amplitude
-to be 38.5 arcsec [Margot et al., 2012], Cˆ = C/MR2 = 0.3455 and C22 and ec from Table 1,
-this corresponds to a ratio Cm/C = 0.4269, or equivalently Cˆm = Cm/MR2 = 0.1475.
-For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3
-[Sori,
-2018]. Our standard choice for the crustal thickness is h = 26 km [Sori, 2018], although in
-section 3.2 we also present some results with other choices of h. We have considered two pos￾sible prescriptions connected to the density of the inner core. First, for all the results presented
-in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 ap￾proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure
-Fe composition in face-centered cubic phase. This captures an end-member scenario where the
-core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively
-free of S on the Fe-rich side of the eutectic [Li et al., 2001]. If the core composition is instead
-an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g.
-Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the
-ICB is expected to be small, although since density increases with depth, the contrast between
-the mean densities of the fluid and solid cores is larger. It is these mean densities that enter
-our Mercury model with uniform density layers. To capture this other end-member core com￾position scenario, in section 3.5 we present results where we instead prescribe a fixed density
-contrast between the fluid and solid core; specifically, we set the numerical value of α3.
-For a given choice of inner core radius rs, the densities of the mantle (ρm) and fluid core
-(ρf ) and the radius of the CMB (rf ) are determined such that the interior model matches M,
-Cˆ = 0.3455 and Cˆm = 0.1475. Figure 3a shows how ρm, ρf and rf vary as a function of in￾ner core radius rs for each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When
-the inner core is small, its presence has a limited influence on the resulting density structure,
-and we find ρm = 3197 kg m−3
-, ρf = 7263 kg m−3 and rf = 2000 km in each of the two
-scenarios. When ρs is fixed to 8800 kg m−3
-, as the inner core reaches 1500 km in size, rf in￾creases to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m−3
-.
-Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core
-size, as otherwise ρm gets unreasonably large and ρf gets inappropriately small (as it would
-require an excessively large concentration of light elements). When adopting instead a fixed den￾sity contrast, with α3 = 0.1, the changes in rf , ρm and ρf with inner core radius are more mod￾est, allowing larger possible inner core sizes. Different assumptions on ρc and h would alter the
-numerical values shown on Figure 3a but not their trends with rs.
-Figure 3b shows how the FCN and FICN periods vary with rs for each of the two inner
-core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb = Kicb =
+Confidential manuscript submitted to JGR-Planets
+M · x = y , (22a)
+where the solution (x) and forcing (y) vectors are
+x
+T = [ ˜m, m˜ f , m˜ s, n˜s, ε˜m] , (22b)
+y
+T = [0, 0, 0, 0, −(1 + ω) tan I] , (22c)
+and the elements of matrix M are
+M =
+
+
+
+
+
+
+
+ω − e (1 + ω)
+A¯f
+A¯
+(1 + ω)
+A¯s
+A¯
+A¯s
+A¯ α3
+
+(1 + ω)es + φs
+
+φm
+ω 1 + ω + ef + Kcmb +
+A¯s
+A¯f
+Kicb −
+A¯s
+A¯f
+Kicb −ωesα1
+A¯s
+A¯f
+0
+ω − α3es α1es − Kicb 1 + ω + Kicb (1 + ω − α2)es + α3φs α3φs
+0 0 1 ω 0
+1 0 0 0 (1 + ω)
+
+
+
+
+
+
+
+.
+(22d)
+Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three
+modes have periods which, when seen in inertial space, are typically in the range of a few hundred to a few thousand years. The first is the free axial precession of Mercury maintained by
+the solar torque acting on its elliptical figure [e.g. Peale, 2005]. The second is the free core nutation (FCN), which is the free precession of the spin axis of the fluid core about the symmetry axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN),
+a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al.,
+1991].
+A few remarks on our model are important to point out before we proceed further. First,
+although we have retained the triaxial shape of Mercury in the expression of the solar torque,
+we treat its angular momentum response as if it were an axially symmetric body. This is convenient as the two equatorial angular momentum equations for each region can be combined
+into a single equation. To first order, the frequency of the free precession of Mercury is not largely
+altered by triaxiality [e.g. Peale, 2005]. Baland et al. [2019] showed that the frequencies of the
+FCN and FICN for a triaxial planetary body may be slightly different than those for an axially symmetric body, but not by large factor. As the response of Mercury to the solar torque
+is largely determined by the resonant amplification due to the presence of these three modes,
+our model should capture correctly the first order Cassini state of Mercury. Considering the
+triaxial shape of Mercury may alter the numerical results, but not our general conclusions.
+Second, our modelling approach is different than in the studies of Peale et al. [2014] and
+Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are developed and must then be integrated in time. The equilibrium Cassini state is the quasi-steady
+state that remains after transient effects associated with the initial conditions have decayed away.
+An advantage of these models compared to ours is that the complete triaxial dynamics of Mercury, including its longitudinal librations, are retained. However, the numerical integration can
+be lengthy if dissipation is weak, which restricts the number of possible interior models of Mercury that can be tested. In contrast, our model is a simple linear system in the frequency domain, focused on one specific frequency: the forced precession associated with the Cassini state.
+Solutions are straightforward to obtain for a given interior model, and this allows us to cover
+a larger span of the parameter space. One drawback, however, is that our model does not capture time-dependent variations at any other frequencies, including the precession of the pericenter of Mercury’s orbit about the Sun.
+–12–
+Confidential manuscript submitted to JGR-Planets
+2.3 Analytical solutions and limiting cases
+2.3.1 The Cassini state of a single-body, rigid Mercury
+For a rigid planet with no fluid and solid cores, our system of equations reduces to Equations (12a) and (20),
+(ω − e) ˜m + φm ε˜m = 0 , (23a)
+m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (23b)
+Using Equation (21), δω  1, and the approximation A¯(1 +e+δω cos I) = C +Aδω ¯ cos I ≈
+C, these can be written as
+Cm˜ = Aφ¯ m ε˜m , (24a)
+m˜ = δωsin I + cos I ε˜m
+
+. (24b)
+Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330◦, δω =
+4.9327×10−7 and taking ˜εm = 2.04 arcmin, this gives ˜m = 2.52×10−4 arcmin, much smaller
+than ˜εm: the offset of the rotation axis of the mantle with respect to its symmetry axis is very
+small. Substituting Equation (24b) in Equation (24a) gives
+CΩp
+
+sin I + cos I ε˜m
+
+= A¯Ωoφmε˜m , (25)
+and isolating for ˜εm,
+ε˜m =
+CΩp sin I
+−CΩp cos I + A¯Ωoφm
+. (26)
+Upon using Equations (4), (15a), and Ωo =
+3
+2
+n, we can write
+ε˜m =
+CΩp sin I
+−CΩp cos I + nMR2 (G210J2 + 2G201C22)
+. (27)
+This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1
+[see for instance Equation (1) of Baland et al., 2017, where their definition of Ω is equal to ˙ −Ωp].
+Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of
+Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized moment of inertia Cˆ,
+Cˆ =
+C
+MR2
+=
+n
+Ωp
+G210J2 + 2G201C22
+cos I + sin I/ε˜m
+. (28)
+which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation
+that a measurement of the obliquity gives a constraint on Cˆ.
+Two free modes of precession are found by setting y = 0 in Equation (23). One mode corresponds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession
+of the rotation axis about the symmetry axis. The second mode is the free retrograde axial precession of Mercury. As seen in the inertial frame, its frequency is given by
+–13–
+Confidential manuscript submitted to JGR-Planets
+ωfp = n
+MR2
+C
+
+G210J2 + 2G201C22
+, (29)
+which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical component. Note that in Peale [2005] it was assumed that only the mantle was involved in the solidbody precession and hence C was replaced by Cm. Using C = 0.346 · MR2
+[Margot et al.,
+2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free precession period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take
+Cm = 0.431·C = 0.431·0.346·MR2[Margot et al., 2012], we obtain Tfp = 2π/ωfp = 560 yr.
+These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical,
+the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid
+core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The
+true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value,
+the free precession period is much shorter than the forcing period of 325 kyr. Using Equation
+(29), Equation (27) can be written as [e.g. Baland et al., 2017]
+ε˜m =
+Ωp sin I
+−Ωp cos I + ωfp
+. (30)
+The obliquity of Mercury is thus determined by how the forcing frequency Ωp compares with
+the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassini state 1 [Peale,
+1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant
+amplification if Ωp ≈ ωfp. Since ωfp  Ωp, resonant amplification is minimal and the resulting obliquity, ˜εm ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5
+◦
+.
+2.3.2 The misalignment of the fluid and solid cores
+With ω = −1 − δω cos I and δω  1, Equation (12d) gives ˜ns ≈ m˜ s; as for the mantle,
+the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state.
+The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure,
+so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equation (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes
+CΩp
+
+sin I + cos I ε˜m
+
++ (A¯
+f cos I Ωp) ˜mf + A¯s(cos I Ωp − Ωoα3φs)˜ns = A¯Ωoφmε˜m . (31)
+This latter equation shows how the misaligned inner core and fluid core can lead to a modification of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by
+n˜s ≈
+Ωp
+κλs
+
+1 +
+Ωo(Kicb − α1es)
+λf
+
+
+sin I + cos I ε˜m
+
+−
+Ωoα3φs
+κλs
+ε˜m , (32a)
+m˜ f ≈
+Ωp
+λf
+
+sin I + cos I ε˜m
+
++
+Ωo
+λf
+A¯
+s
+A¯
+f
+
+Kicb − α1es
+
+n˜s , (32b)
+where
+κ = 1 −
+A¯
+s
+A¯
+f
+Ω
+2
+o
+
+Kicb − α1es
+2
+λs λf
+, (33a)
+λf = ¯σf − Ωp cos I , (33b)
+λs = ¯σs − Ωp cos I , (33c)
+–14–
+Confidential manuscript submitted to JGR-Planets
+and where we have introduced the frequencies
+σ¯f = Ωo
+
+ef + Kcmb +
+A¯
+s
+A¯
+f
+Kicb, (33d)
+σ¯s = Ωo
+
+esα3αg − esα1 + α3φs + Kicb
+. (33e)
+These solutions are good approximations for all the results that we present in section 3. For
+an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they provide useful predictions of ˜ns and ˜mf .
+In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σs
+Ωp and ¯σf  Ωp, so that ˜ns → 0, ˜mf → 0 and Equation (31) reverts back to Equation (25)
+for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and
+mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM coupling, Kcmb = Kicb = 0), then
+φs = 0 , κ = 1 , λf = λs = −Ωp cos I , m˜ f = ˜ns = −(tan I + ˜εm). (34)
+Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm =
+C − A¯
+f − A¯s, we obtain
+Cm Ωp
+
+sin I + cos I ε˜m
+
+= A¯Ωoφmε˜m . (35)
+which describes, as expected, a forced precession of the mantle alone. If this was the case for
+Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈ 0.88 arcmin, substantially smaller
+than the observed obliquity of ˜εm ≈ 2 arcmin.
+If ¯σf ≈ Ωp (and thus λf → 0) and/or ¯σs ≈ Ωp (and thus λs → 0) resonant amplification leads to large amplitudes for ˜mf , ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and
+σ¯s are closely related to the FCN and FICN frequencies ωf cn and ωf icn, respectively. Hence,
+just as a large mantle obliquity can result from resonant amplification when the forcing frequency
+approaches the free precession frequency, a large mantle obliquity can likewise result from resonant amplification when the forcing frequency approaches the FCN or FICN frequencies. These
+frequencies depend on the interior density structure and are not known. However, we will show
+that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of
+a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not expect an important amplification effect. Furthermore, since ωf cn, ωf icn  Ωp, then ¯σf  Ωp
+and ¯σs  Ωp, and we are in the strong coupling limit. The mantle obliquity should be close
+to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜mf and
+n˜s should be of the order of ˜εm or smaller. This further justifies the assumption of small angles that we have adopted.
+3 Results
+3.1 Geodetic constraints and interior density structure
+All our interior models are constrained to match the mass M of Mercury and specific choices
+of Cˆ = C/MR2 and Cm/C. The choice of Cˆ is determined from Equation (28). For the parameters listed in Table 1, and an observed obliquity of εm = 2.04 arcmin [Margot et al., 2012],
+this gives Cˆ = C/MR2 = 0.3455 and all our interior models are consistent with this choice.
+Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are
+–15–
+Confidential manuscript submitted to JGR-Planets
+perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in estimating Cˆ from Equation (28), or conversely in predicting εm based on a given choice for Cˆ.
+Part of the objective of our study is to estimate how large this error is. The ratio Cm/C is obtained from the amplitude of the 88-day longitudinal mantle libration φo, which is given by
+φo = 6 · f(ec)C22
+MR2
+C
+C
+Cm
+1
+1 + ζ
+, (36)
+where
+f(ec) = 1 − 11e
+2
+c +
+959
+48
+e
+4
+c
+, (37)
+and where ζ is a correction that takes into account the entrainment of the inner core in the libration [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini, 2015]; this correction is small and, to simplify, we neglect it here. Taking the observed libration amplitude
+to be 38.5 arcsec [Margot et al., 2012], Cˆ = C/MR2 = 0.3455 and C22 and ec from Table 1,
+this corresponds to a ratio Cm/C = 0.4269, or equivalently Cˆm = Cm/MR2 = 0.1475.
+For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3[Sori,
+2018]. Our standard choice for the crustal thickness is h = 26 km [Sori, 2018], although in
+section 3.2 we also present some results with other choices of h. We have considered two possible prescriptions connected to the density of the inner core. First, for all the results presented
+in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 approximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure
+Fe composition in face-centered cubic phase. This captures an end-member scenario where the
+core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively
+free of S on the Fe-rich side of the eutectic [Li et al., 2001]. If the core composition is instead
+an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g.
+Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the
+ICB is expected to be small, although since density increases with depth, the contrast between
+the mean densities of the fluid and solid cores is larger. It is these mean densities that enter
+our Mercury model with uniform density layers. To capture this other end-member core composition scenario, in section 3.5 we present results where we instead prescribe a fixed density
+contrast between the fluid and solid core; specifically, we set the numerical value of α3.
+For a given choice of inner core radius rs, the densities of the mantle (ρm) and fluid core
+(ρf ) and the radius of the CMB (rf ) are determined such that the interior model matches M,
+Cˆ = 0.3455 and Cˆm = 0.1475. Figure 3a shows how ρm, ρf and rf vary as a function of inner core radius rs for each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When
+the inner core is small, its presence has a limited influence on the resulting density structure,
+and we find ρm = 3197 kg m−3, ρf = 7263 kg m−3 and rf = 2000 km in each of the two
+scenarios. When ρs is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, rf increases to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m−3
+.
+Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core
+size, as otherwise ρm gets unreasonably large and ρf gets inappropriately small (as it would
+require an excessively large concentration of light elements). When adopting instead a fixed density contrast, with α3 = 0.1, the changes in rf , ρm and ρf with inner core radius are more modest, allowing larger possible inner core sizes. Different assumptions on ρc and h would alter the
+numerical values shown on Figure 3a but not their trends with rs.
+Figure 3b shows how the FCN and FICN periods vary with rs for each of the two inner
+core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb = Kicb =
 –16–
-Confidential manuscript submitted to JGR-Planets
-0
-200
-400
-600
-800
-1000
-1200
-1400
-period (yr)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-3000
-4000
-5000
-6000
-7000
-8000
-density (kg/m
-3)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-2000
-2020
-2040
-2060
-2080
-2100
-Fluid core radius (km)
-fluid core density
-CMB radius
-FICN
-FCNint
-mantle density
-a b
-FCN
-Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand
-side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN
-period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to
-a scenario where the density of the inner core is set to 8800 kg m−3
-; thin dashed lines correspond to a
-scenario where the density contrast between the fluid and solid cores is set to α3 = 0.1.
-0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small in￾ner core, increasing to approximately 600 yr at the largest rs. The FICN period is shorter, close
-to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the
-largest rs under the fixed ρs (fixed α3) scenario. This confirms that the FCN and FICN peri￾ods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away
-from it that we do not expect large ˜mf and ˜ns from resonant amplification.
-The FCN and FICN periods that we have computed include the influence of the exter￾nal torque. As shown by Baland et al. [2019], the external torque allow solid regions to have
-a free motion in inertial space thereby affecting the free rotational modes. To a good approx￾imation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0
-are given by
-ωf cn ≈ −Ωo
-
-A¯
-A¯m + A¯
-s
- 
-ef + φm
-
-+ Ωo
-efφm
-(ef + φm)
-, (38a)
-ωf icn ≈ Ωo
-
-A¯ + A¯
-s
-A¯ − A¯
-s
- 
-esα1 − esα3αg − α3φs
-
-. (38b)
-The expression of the FICN frequency involves the inertial torque (term esα1) and the grav￾itational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core.
-For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 =
-0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg  α1;
-the gravitational torque dominates the inertial torque, in large part because of the slow rota￾tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion
-is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and
-Dumberry, 2018], but it is different for Earth, where α1 > α3αg because of its faster rotation
-and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres-
-–17–
-Confidential manuscript submitted to JGR-Planets
-sion for the FICN differs by a factor (A¯+A¯
-s)/(A¯−A¯
-s) compared to that given in Dumberry
-and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon.
-The expression for FCN frequency differs from the usual expression for Earth. First, it
-involves the external torque from the Sun captured by the parameter φm. If we set φm = 0,
-we obtain the FCN frequency for a decoupled model in which only interior torques contribute,
-ωf cn,int ≈ −Ωo
-
-A¯
-A¯m + A¯
-s
-
-ef . (38c)
-This frequency is slightly different from the usual expression for Earth, involving the ratio A/¯ (A¯m+
-A¯
-s) rather than A/¯ A¯m. This is because of the relatively thin mantle of Mercury; for the largest
-rs considered, the moment of inertia of the inner core can get close to 40% of that of the man￾tle and is not negligible. The period of the FCN when only interior torques contribute is shown
-in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr
-at the largest rs. Hence, the influence of the solar torque reduces the FCN period by a factor
-of approximately 3. We note that the FICN period, in contrast, is not altered substantially when
-the external torque is set to zero.
-3.2 Gravitational and inertial coupling
-Let us now investigate the obliquities of the mantle, fluid core and inner core in their equi￾librium Cassini state. We assume a fixed inner core density scenario in this section, with ρs =
-8800 kg m−3
-. Viscous and EM coupling are set to zero in order to isolate the influence of grav￾itational and inertial coupling. Figure 4 shows how ˜εm, ˜mf and ˜ns vary as functions of inner
-core radius. We show calculations for three different choices of crustal thickness, but let us con￾centrate first on the case for h = 26 km. For small rs, we retrieve an obliquity of ˜εm = 2.0494
-arcmin (Figure 4a). ˜εm decreases with rs, but not substantially; at the largest rs (1500 km),
-ε˜m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜εm = 2.04
-arcmin, the obliquity that we used in setting the constraint for Cˆ – and hence the prediction
-we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which
-occurs for small inner cores.
-The deviation of ˜εm from that of a rigid planet is due to the misalignments of the fluid
-core ( ˜mf ) and solid inner core (˜ns) with respect to the mantle (Figure 4b). The misalignment
-of the fluid core spin axis from the mantle is significant: ˜mf is approximately 4.02 arcmin for
-a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin
-at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which
-coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with
-respect to the orbit normal is ˜εm+ ˜mf ≈ 6 arcmin. The reason why the obliquity of the spin
-axis of the fluid core is larger than that of the mantle can be understood from Equation (32b),
-which shows that ˜mf is determined by the resonant amplification of the FCN mode at the forc￾ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the
-case for Mercury, the resonant amplification is very weak but remains present and ˜mf is larger
-than zero.
-In contrast to ˜mf , the misalignment of the inner core with respect to the mantle is much
-smaller; ˜ns is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜εm.
-Physically, this is because the gravitational torque acting on the inner core when it is tilted from
-the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner
-core must remain in close alignment with the mantle. Presented differently, since the FICN pe￾riod is more than 3000 times shorter than the forced precession period, the inner core can eas-
-–18–
-Confidential manuscript submitted to JGR-Planets
-2.038
-2.040
-2.042
-2.044
-2.046
-2.048
-2.050
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-1.5
-2.0
-2.5
-3.0
-3.5
-4.0
-4.5
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-crustal thickness
-16 km
-36 km
-26 km
-crustal thickness
-16 km
-36 km
-26 km
-εm
-εg
-εm for a rigid planet
-mf
-ns (x100)
-a b
-Figure 4. a) Obliquity of the mantle (˜εm, solid lines) and of the principal moment of inertia (˜εg,
-dashed line) b) ˜mf (solid lines) and ˜ns (dashed lines, x100) as a function of inner core radius and for
-different choices of crustal thickness.
-ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ns does
-not change substantially as the inner core increases in size.
-When Kicb = Kcmb = 0, a good approximation of ˜εm is given by
-ε˜m =
-C
-0Ωp sin I
-−C0Ωp cos I + A¯Ωoφm
-, (39)
-which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced
-by C
-0
-. The latter represents an effective moment of inertia that accounts for the coupling of
-the core to the mantle,
-C
-0 = C + A¯
-cχ , (40)
-where A¯
-c = A¯
-f + A¯
-s and
-χ =
-Ωp cos I
-A¯
-c
-
-A¯
-f
-(¯σf − Ωp cos I)
-+
-A¯
-s
-(¯σs − Ωp cos I)
-
-−
-A¯
-s
-A¯
-c
-Ωoα3φs
-(¯σs − Ωp cos I)
-. (41)
-The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN
-and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then
-how the core is entrained to precess with the mantle, with the coupling between the two ex￾pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit
-of ¯σf , σ¯s → 0, then χ = −1, C
-0 = Cm, the core is fully decoupled from the mantle and we
-retrieve Equation (35). If instead ¯σf , σ¯s → ∞, then χ = 0, C
-0 = C and we retrieve the pre￾diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp,
-as is the case here, resonant amplification is weak, χ is small and positive, C
-0 > C and this
-leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is grav￾itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the
-misalignment of the fluid core. In Equation (41), ¯σs  σ¯f , so to a good approximation
+Confidential manuscript submitted to JGR-Planets
+0
+200
+400
+600
+800
+1000
+1200
+1400
+period (yr)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+3000
+4000
+5000
+6000
+7000
+8000
+density (kg/m
+3)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+2000
+2020
+2040
+2060
+2080
+2100
+Fluid core radius (km)
+fluid core density
+CMB radius
+FICN
+FCNint
+mantle density
+a b
+FCN
+Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand
+side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN
+period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to
+a scenario where the density of the inner core is set to 8800 kg m−3; thin dashed lines correspond to a
+scenario where the density contrast between the fluid and solid cores is set to α3 = 0.1.
+0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small inner core, increasing to approximately 600 yr at the largest rs. The FICN period is shorter, close
+to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the
+largest rs under the fixed ρs (fixed α3) scenario. This confirms that the FCN and FICN periods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away
+from it that we do not expect large ˜mf and ˜ns from resonant amplification.
+The FCN and FICN periods that we have computed include the influence of the external torque. As shown by Baland et al. [2019], the external torque allow solid regions to have
+a free motion in inertial space thereby affecting the free rotational modes. To a good approximation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0
+are given by
+ωf cn ≈ −Ωo
+
+A¯
+A¯m + A¯
+s
+ 
+ef + φm
+
++ Ωo
+efφm
+(ef + φm)
+, (38a)
+ωf icn ≈ Ωo
+
+A¯ + A¯
+s
+A¯ − A¯
+s
+ 
+esα1 − esα3αg − α3φs
+
+. (38b)
+The expression of the FICN frequency involves the inertial torque (term esα1) and the gravitational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core.
+For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 =
+0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg  α1;
+the gravitational torque dominates the inertial torque, in large part because of the slow rotation rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion
+is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and
+Dumberry, 2018], but it is different for Earth, where α1 > α3αg because of its faster rotation
+and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres–17–
+
+Confidential manuscript submitted to JGR-Planets
+sion for the FICN differs by a factor (A¯+A¯
+s)/(A¯−A¯s) compared to that given in Dumberry
+and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon.
+The expression for FCN frequency differs from the usual expression for Earth. First, it
+involves the external torque from the Sun captured by the parameter φm. If we set φm = 0,
+we obtain the FCN frequency for a decoupled model in which only interior torques contribute,
+ωf cn,int ≈ −Ωo
+
+A¯
+A¯m + A¯
+s
+
+ef . (38c)
+This frequency is slightly different from the usual expression for Earth, involving the ratio A/¯ (A¯m+
+A¯
+s) rather than A/¯ A¯m. This is because of the relatively thin mantle of Mercury; for the largest
+rs considered, the moment of inertia of the inner core can get close to 40% of that of the mantle and is not negligible. The period of the FCN when only interior torques contribute is shown
+in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr
+at the largest rs. Hence, the influence of the solar torque reduces the FCN period by a factor
+of approximately 3. We note that the FICN period, in contrast, is not altered substantially when
+the external torque is set to zero.
+3.2 Gravitational and inertial coupling
+Let us now investigate the obliquities of the mantle, fluid core and inner core in their equilibrium Cassini state. We assume a fixed inner core density scenario in this section, with ρs =
+8800 kg m−3. Viscous and EM coupling are set to zero in order to isolate the influence of gravitational and inertial coupling. Figure 4 shows how ˜εm, ˜mf and ˜ns vary as functions of inner
+core radius. We show calculations for three different choices of crustal thickness, but let us concentrate first on the case for h = 26 km. For small rs, we retrieve an obliquity of ˜εm = 2.0494
+arcmin (Figure 4a). ˜εm decreases with rs, but not substantially; at the largest rs (1500 km),
+ε˜m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜εm = 2.04
+arcmin, the obliquity that we used in setting the constraint for Cˆ – and hence the prediction
+we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which
+occurs for small inner cores.
+The deviation of ˜εm from that of a rigid planet is due to the misalignments of the fluid
+core ( ˜mf ) and solid inner core (˜ns) with respect to the mantle (Figure 4b). The misalignment
+of the fluid core spin axis from the mantle is significant: ˜mf is approximately 4.02 arcmin for
+a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin
+at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which
+coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with
+respect to the orbit normal is ˜εm+ ˜mf ≈ 6 arcmin. The reason why the obliquity of the spin
+axis of the fluid core is larger than that of the mantle can be understood from Equation (32b),
+which shows that ˜mf is determined by the resonant amplification of the FCN mode at the forcing frequency. When the FCN frequency is much larger than the forcing frequency, as is the
+case for Mercury, the resonant amplification is very weak but remains present and ˜mf is larger
+than zero.
+In contrast to ˜mf , the misalignment of the inner core with respect to the mantle is much
+smaller; ˜ns is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜εm.
+Physically, this is because the gravitational torque acting on the inner core when it is tilted from
+the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner
+core must remain in close alignment with the mantle. Presented differently, since the FICN period is more than 3000 times shorter than the forced precession period, the inner core can eas–18–
+
+Confidential manuscript submitted to JGR-Planets
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+crustal thickness
+16 km
+36 km
+26 km
+crustal thickness
+16 km
+36 km
+26 km
+εm
+εg
+εm for a rigid planet
+mf
+ns (x100)
+a b
+Figure 4. a) Obliquity of the mantle (˜εm, solid lines) and of the principal moment of inertia (˜εg,
+dashed line) b) ˜mf (solid lines) and ˜ns (dashed lines, x100) as a function of inner core radius and for
+different choices of crustal thickness.
+ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ns does
+not change substantially as the inner core increases in size.
+When Kicb = Kcmb = 0, a good approximation of ˜εm is given by
+ε˜m =
+C
+0Ωp sin I
+−C0Ωp cos I + A¯Ωoφm
+, (39)
+which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced
+by C
+0
+. The latter represents an effective moment of inertia that accounts for the coupling of
+the core to the mantle,
+C
+0 = C + A¯
+cχ , (40)
+where A¯
+c = A¯f + A¯s and
+χ =
+Ωp cos I
+A¯
+c
+
+A¯
+f
+(¯σf − Ωp cos I)
++
+A¯
+s
+(¯σs − Ωp cos I)
+
+−
+A¯
+s
+A¯
+c
+Ωoα3φs
+(¯σs − Ωp cos I)
+. (41)
+The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN
+and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then
+how the core is entrained to precess with the mantle, with the coupling between the two expressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit
+of ¯σf , σ¯s → 0, then χ = −1, C
+0 = Cm, the core is fully decoupled from the mantle and we
+retrieve Equation (35). If instead ¯σf , σ¯s → ∞, then χ = 0, C
+0 = C and we retrieve the prediction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp,
+as is the case here, resonant amplification is weak, χ is small and positive, C
+0 > C and this
+leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is gravitationally locked to the mantle, deviations from a rigid planet are dominantly caused by the
+misalignment of the fluid core. In Equation (41), ¯σs  σ¯f , so to a good approximation
 –19–
-Confidential manuscript submitted to JGR-Planets
-χ ≈
-A¯
-f
-A¯
-c
-Ωo cos I
-(¯σf − Ωp cos I)
-. (42)
-For a small inner core, χ ≈ 7.55×10−3
-. As the inner core grows, A¯
-f decreases, and the com￾bination A¯
-cχ also decreases. This implies that C
-0 decreases with inner core size and, consequently,
-ε˜m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the
-prediction for a rigid planet.
-The specific predictions of ˜εm, ˜mf and ˜ns on Figure 4 depend sensitively on the assumed
-interior density model and on the dynamical ellipticities of the inner core (es) and fluid core
-(ef ). Hence, it depends on the choices we have made for the inner core density ρs, the crustal
-density ρc and its thickness h. Changing ρs, ρc and/or h requires a different combination of ρf ,
-ρm and rf in order to match M, Cˆ and Cˆm. In turn, this leads to different ellipticities at in￾terior boundary in order to match J2 and C22, and thus different predictions for ˜εm, ˜mf and
-n˜s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal
-thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼ 0.025%, but
-the changes in ˜mf and ˜ns are more substantial, ∼ 5% and ∼ 10%, respectively.
-We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment
-of inertia of the whole planet, which we denote by ˜εg. A difference between ˜εg and ˜εm occurs
-if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core
-(with ˜ns assumed small) leads to an off-diagonal component of the moment of inertia tensor
-of (Cs−A¯
-s)α3n˜s = A¯
-sesα3n˜s. The angle by which the mantle frame must be rotated so that
-the moment of inertia of the whole planet is purely diagonal is (A¯
-sesα3n˜s)/(Ae¯ ), and hence a
-good approximation of ˜εg is
-ε˜g = ˜εm +
-A¯
-ses
-Ae¯
-α3n˜s . (43)
-Since the inner core is gravitationally forced into a close alignment with the mantle, the dif￾ference between ˜εg and ˜εm remains very small. For the largest inner core radius that we have
-considered, ˜εg differs from ˜εm only by approximately 0.001 arcmin.
-3.3 Viscous coupling
-We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini
-state. Peale et al. [2014] present two different parameterizations of viscous coupling based on
-the timescale of attenuation of the differential rotation between the fluid core and mantle. More
-complete analytical solutions for the flow resulting from a differentially precessing shell have
-been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester , 1976] and we exploit
-these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based
-on them are given in Mathews and Guo [2005],
-Kcmb =
-πρf r
-4
-f
-A¯
-f
-r ν
-2Ωo
-
-0.195 − 1.976i
-
-, (44a)
-Kicb =
-πρf r
-4
-s
-A¯
-s
-r ν
-2Ωo
-
-0.195 − 1.976i
-
-, (44b)
-where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte￾rior is not well known but based on theoretical and experimental studies it is expected to be
-of the order of 10−6 m2
-s
-−1
-[e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al.,
-2002a,b].
+Confidential manuscript submitted to JGR-Planets
+χ ≈
+A¯
+f
+A¯
+c
+Ωo cos I
+(¯σf − Ωp cos I)
+. (42)
+For a small inner core, χ ≈ 7.55×10−3. As the inner core grows, A¯
+f decreases, and the combination A¯
+cχ also decreases. This implies that C
+0 decreases with inner core size and, consequently,
+ε˜m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the
+prediction for a rigid planet.
+The specific predictions of ˜εm, ˜mf and ˜ns on Figure 4 depend sensitively on the assumed
+interior density model and on the dynamical ellipticities of the inner core (es) and fluid core
+(ef ). Hence, it depends on the choices we have made for the inner core density ρs, the crustal
+density ρc and its thickness h. Changing ρs, ρc and/or h requires a different combination of ρf ,
+ρm and rf in order to match M, Cˆ and Cˆm. In turn, this leads to different ellipticities at interior boundary in order to match J2 and C22, and thus different predictions for ˜εm, ˜mf and
+n˜s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal
+thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼ 0.025%, but
+the changes in ˜mf and ˜ns are more substantial, ∼ 5% and ∼ 10%, respectively.
+We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment
+of inertia of the whole planet, which we denote by ˜εg. A difference between ˜εg and ˜εm occurs
+if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core
+(with ˜ns assumed small) leads to an off-diagonal component of the moment of inertia tensor
+of (Cs−A¯
+s)α3n˜s = A¯sesα3n˜s. The angle by which the mantle frame must be rotated so that
+the moment of inertia of the whole planet is purely diagonal is (A¯
+sesα3n˜s)/(Ae¯ ), and hence a
+good approximation of ˜εg is
+ε˜g = ˜εm +
+A¯
+ses
+Ae¯
+α3n˜s . (43)
+Since the inner core is gravitationally forced into a close alignment with the mantle, the difference between ˜εg and ˜εm remains very small. For the largest inner core radius that we have
+considered, ˜εg differs from ˜εm only by approximately 0.001 arcmin.
+3.3 Viscous coupling
+We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini
+state. Peale et al. [2014] present two different parameterizations of viscous coupling based on
+the timescale of attenuation of the differential rotation between the fluid core and mantle. More
+complete analytical solutions for the flow resulting from a differentially precessing shell have
+been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester , 1976] and we exploit
+these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based
+on them are given in Mathews and Guo [2005],
+Kcmb =
+πρf r
+4
+f
+A¯
+f
+r ν
+2Ωo
+
+0.195 − 1.976i
+
+, (44a)
+Kicb =
+πρf r
+4
+s
+A¯
+s
+r ν
+2Ωo
+
+0.195 − 1.976i
+
+, (44b)
+where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary interior is not well known but based on theoretical and experimental studies it is expected to be
+of the order of 10−6 m2s
+−1
+[e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al.,
+2002a,b].
 –20–
-Confidential manuscript submitted to JGR-Planets
-The above parameterizations are valid only under the assumption that the flow in the bound￾ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds
-number Re = rf∆uf /ν, associated with the differential velocity ∆uf = rfΩom˜ f at the CMB.
-For rf = 2000 km, and taking ˜mf = 4 arcmin ≈ 0.001 rad from the results in the previous
-section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109
-. Such a large Reynolds number indicates
-that the viscous friction between the fluid core and mantle should induce turbulent flows, as
-is the case for the Cassini state of the Moon [Yoder , 1981; Williams et al., 2001; C´ebron et al.,
-2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde￾pendent of the fluid viscosity and proportional to the square of the differential velocity. The
-coupling constant Kcmb should be in the form
-Kcmb = fcmb
-
-m˜ f
-
-
-
-0.195 − 1.976i
-
-, (45)
-where fcmb is a numerical factor that depends among other things on surface roughness. In￾corporating a viscous coupling of this form in our rotational model is more challenging not only
-because fcmb is not known but also because the viscous torque is no longer linear in ˜mf . One
-strategy is to find solutions through an iterative process. The simpler alternative strategy that
-we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν
-represents an effective turbulent viscosity.
-To give an estimate of an appropriate turbulent value for ν, we turn to the Cassini state
-of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained
-by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR)
-[Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms
-of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1
-[Williams
-and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K
-and Kcmb is
-
-
-
-Im[Kcmb]
-
-
- =
-K
-CL
-CL
-CfL
-1
-ΩL
-, (46)
-where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6
-s
-−1
-the lunar
-rotation rate. With CfL/CL ∼ 7 × 10−4
-[e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼
-9×10−5
-. In order to match this amplitude in Equation (44a), with lunar parameters and as￾suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2
-s
-−1
-, about 500 times larger than the laminar viscosity. Note that the differential velocity at the
-CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Williams et al., 2001], more than 10 times
-larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb
-is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer￾cury should be smaller. Thus, ν ≈ 5×10−4 m2
-s
-−1 gives a conservative upper bound for the
-possible effective turbulent viscosity that can be expected for Mercury.
-Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices
-of effective viscosities. For ν = 10−5 m2
-s
-−1
-, viscous coupling is too weak to affect ˜εm and
-m˜ f and they are essentially unchanged from the solutions shown in Figure 4. With increasing
-ν, the stronger viscous coupling between the core and the mantle reduces their differential ve￾locity, and ˜mf is reduced. With the reduced differential velocity at the CMB, the prediction
-of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
-viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜εm
-and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
-fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis￾cosity that we have identified above (i.e ν ≈ 5 × 10−4 m2
-s
-−1
-), the influence of viscous cou-
-–21–
-Confidential manuscript submitted to JGR-Planets
-εm
-εg
-mf
-ns
-2.038
-2.040
-2.042
-2.044
-2.046
-2.048
-2.050
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-0.0
-0.5
-1.0
-1.5
-2.0
-2.5
-3.0
-3.5
-4.0
-4.5
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-kinematic viscosity: 0.01 m2 s-1 0.00001 m2 0.0001 m2 s-1 0.0005 m2 s-1 0.001 m2 s-1 s-1
-a b
-εm for a rigid planet
-Figure 5. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
-(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of kinematic
-viscosity (color in legend).
-pling on ˜εm remains modest, reducing its amplitude by a maximum of approximately 0.0015
-arcmin.
-The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core
-tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the in￾ner core with the fluid core spin axis. The viscous coupling strength is inversely proportional
-to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of
-a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4
-m2
-s
-−1
-, Figure 5 indicates that ˜ns may be 1 arcmin or larger only if the inner core radius is
-smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi￾tational coupling is much larger than viscous coupling, and the inner core tilt is limited to a
-fraction of 1 arcmin.
-The larger inner core tilt observed with increasing effective viscosity results in a larger
-offset between the obliquity of the principal moment of inertia ˜εg and that of the mantle ˜εm,
-though it remains limited. For the upper bound of ν = 5 × 10−4 m2
-s
-−1
-, and for rs = 1500
-km, the difference between ˜εg and ˜εm is limited to 0.0013 arcmin.
-The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller
-the misalignments of both the fluid core and inner core are with respect to the mantle. This
-implies that the larger the inner core is, the more we approach a planet precessing as a rigid
-body, although the misalignment of the spin axis of the fluid core remains important, approx￾imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜εm, ˜mf
-and ˜ns change with inner core size would certainly be different for a turbulent model of viscous
-coupling. But the general conclusion remains that the addition of viscous coupling at the CMB
-and ICB does not significantly modify the Cassini state equilibrium angle of the mantle.
+Confidential manuscript submitted to JGR-Planets
+The above parameterizations are valid only under the assumption that the flow in the boundary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds
+number Re = rf∆uf /ν, associated with the differential velocity ∆uf = rfΩom˜ f at the CMB.
+For rf = 2000 km, and taking ˜mf = 4 arcmin ≈ 0.001 rad from the results in the previous
+section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109. Such a large Reynolds number indicates
+that the viscous friction between the fluid core and mantle should induce turbulent flows, as
+is the case for the Cassini state of the Moon [Yoder , 1981; Williams et al., 2001; C´ebron et al.,
+2019]. For a boundary layer that involves turbulent flows, the viscous torque should be independent of the fluid viscosity and proportional to the square of the differential velocity. The
+coupling constant Kcmb should be in the form
+Kcmb = fcmb
+
+m˜ f
+
+
+
+0.195 − 1.976i
+
+, (45)
+where fcmb is a numerical factor that depends among other things on surface roughness. Incorporating a viscous coupling of this form in our rotational model is more challenging not only
+because fcmb is not known but also because the viscous torque is no longer linear in ˜mf . One
+strategy is to find solutions through an iterative process. The simpler alternative strategy that
+we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν
+represents an effective turbulent viscosity.
+To give an estimate of an appropriate turbulent value for ν, we turn to the Cassini state
+of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained
+by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR)
+[Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms
+of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1[Williams
+and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K
+and Kcmb is
+
+
+
+Im[Kcmb]
+
+
+ =
+K
+CL
+CL
+CfL
+1
+ΩL
+, (46)
+where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6s
+−1
+the lunar
+rotation rate. With CfL/CL ∼ 7 × 10−4[e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼
+9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and assuming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2
+s
+−1
+, about 500 times larger than the laminar viscosity. Note that the differential velocity at the
+CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Williams et al., 2001], more than 10 times
+larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb
+is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mercury should be smaller. Thus, ν ≈ 5×10−4 m2
+s
+−1 gives a conservative upper bound for the
+possible effective turbulent viscosity that can be expected for Mercury.
+Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices
+of effective viscosities. For ν = 10−5 m2s
+−1
+, viscous coupling is too weak to affect ˜εm and
+m˜ f and they are essentially unchanged from the solutions shown in Figure 4. With increasing
+ν, the stronger viscous coupling between the core and the mantle reduces their differential velocity, and ˜mf is reduced. With the reduced differential velocity at the CMB, the prediction
+of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
+viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜εm
+and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
+fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent viscosity that we have identified above (i.e ν ≈ 5 × 10−4 m2
+s
+−1
+), the influence of viscous cou–21–
+
+Confidential manuscript submitted to JGR-Planets
+εm
+εg
+mf
+ns
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s 0.0001 m2 -1 s 0.0005 m2 -1 s 0.001 m2 -1 s-1
+a b
+εm for a rigid planet
+Figure 5. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
+(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of kinematic
+viscosity (color in legend).
+pling on ˜εm remains modest, reducing its amplitude by a maximum of approximately 0.0015
+arcmin.
+The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core
+tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the inner core with the fluid core spin axis. The viscous coupling strength is inversely proportional
+to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of
+a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4
+m2s
+−1
+, Figure 5 indicates that ˜ns may be 1 arcmin or larger only if the inner core radius is
+smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravitational coupling is much larger than viscous coupling, and the inner core tilt is limited to a
+fraction of 1 arcmin.
+The larger inner core tilt observed with increasing effective viscosity results in a larger
+offset between the obliquity of the principal moment of inertia ˜εg and that of the mantle ˜εm,
+though it remains limited. For the upper bound of ν = 5 × 10−4 m2s
+−1
+, and for rs = 1500
+km, the difference between ˜εg and ˜εm is limited to 0.0013 arcmin.
+The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller
+the misalignments of both the fluid core and inner core are with respect to the mantle. This
+implies that the larger the inner core is, the more we approach a planet precessing as a rigid
+body, although the misalignment of the spin axis of the fluid core remains important, approximately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜εm, ˜mf
+and ˜ns change with inner core size would certainly be different for a turbulent model of viscous
+coupling. But the general conclusion remains that the addition of viscous coupling at the CMB
+and ICB does not significantly modify the Cassini state equilibrium angle of the mantle.
 –22–
-Confidential manuscript submitted to JGR-Planets
-3.4 Electromagnetic coupling
-Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium
-Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to re￾main attached to electrically conducting materials, a differential tangential motion between two
-electrically conducting regions stretches existing magnetic field lines that thread their interface.
-This induces a secondary magnetic field (or equivalently, an electrical current) and an associ￾ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB
-acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength
-of the radial magnetic field Br and the electrical conductivity σ on either side of the bound￾ary [Rochester , 1960, 1962, 1968].
-The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb
-has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot,
-2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given
-by Br =
-√
-3
+Confidential manuscript submitted to JGR-Planets
+3.4 Electromagnetic coupling
+Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium
+Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to remain attached to electrically conducting materials, a differential tangential motion between two
+electrically conducting regions stretches existing magnetic field lines that thread their interface.
+This induces a secondary magnetic field (or equivalently, an electrical current) and an associated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB
+acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength
+of the radial magnetic field Br and the electrical conductivity σ on either side of the boundary [Rochester , 1960, 1962, 1968].
+The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb
+has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot,
+2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given
+by Br =
+√
+3
+
+
+Bd
+r
+
+cos θ, where
+Bd
+r
+
+is the r.m.s. strength of the field, the coupling constant
+Kcmb can be written is the form
+Kcmb = 3(1 − i)Fcmb
+B
+d
+r
+2
+, (47)
+where
+Fcmb =
+1
+Ωoρf rf
+
+1
+σmδm
++
+1
+σf δf
+−1
+, (48)
+and where σm, δm =
+p
+2/(σmµΩo) and σf , δf =
+p
+2/(σfµΩo) are the electrical conductivities and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π ×10−7
+N A−2the magnetic permeability of free space. The r.m.s. field strength
+Bd
+r
+
+is connected to
+the Gauss coefficient g
+0
+1 of the surface magnetic field by
 
-
-Bd
-r
-
-cos θ, where 
-
-Bd
-r
-
-is the r.m.s. strength of the field, the coupling constant
-Kcmb can be written is the form
-Kcmb = 3(1 − i)Fcmb 
-
-B
-d
-r
-2
-, (47)
-where
-Fcmb =
-1
-Ωoρf rf
-
-1
-σmδm
-+
-1
-σf δf
-−1
-, (48)
-and where σm, δm =
-p
-2/(σmµΩo) and σf , δf =
-p
-2/(σfµΩo) are the electrical conductivi￾ties and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π ×10−7
-N A−2
-the magnetic permeability of free space. The r.m.s. field strength 
-
-Bd
-r
-
-is connected to
-the Gauss coefficient g
-0
-1 of the surface magnetic field by
 
-
-B
-d
-r
-
-=
-2
-√
-3
-
-R
-rf
-3
-
-g
-0
-1
-
-
-. (49)
-We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity
-of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding
-to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1
-[Constable, 2015]. In con￾trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S
-m−1
-[Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)
-−1  (σf δf )
-−1
-. Tak￾ing σm = 1 S m−1
-,
-
-g
-0
-1
-
- = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf =
-2000 km, ρf = 7000 kg m−3
-, this gives Kcmb ≈ (3.1 × 10−11)·(1 − i). To put this amplitude
-in perspective, taking a molecular viscosity of ν = 10−6 m2
-s
-−1
-in Equation (44a) gives a vis￾cous coupling constant of Kcmb ≈ (6.0 × 10−7
-)·(0.195 − 1.976i). Hence, EM coupling at the
-CMB is much weaker than viscous coupling, even if we include other spherical harmonic com￾ponents of the radial magnetic field.
-EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by
-CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σm could be
-closer to σf . Likewise, σm can be increased if a more electrically conducting layer has formed
-at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction
-of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even
-in the extreme case of σm = σf = 106 S m−1
-, Kcmb ≈ (1.6 × 10−8
-) · (1 − i), which remains
+B
+d
+r
+
+=
+2
+√
+3
+
+R
+rf
+3
+
+g
+0
+1
+
+
+. (49)
+We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity
+of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding
+to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1[Constable, 2015]. In contrast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S
+m−1[Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)
+−1  (σf δf )−1
+. Taking σm = 1 S m−1
+,
+
+g
+0
+1
+
+ = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf =
+2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈ (3.1 × 10−11)·(1 − i). To put this amplitude
+in perspective, taking a molecular viscosity of ν = 10−6 m2s
+−1
+in Equation (44a) gives a viscous coupling constant of Kcmb ≈ (6.0 × 10−7
+)·(0.195 − 1.976i). Hence, EM coupling at the
+CMB is much weaker than viscous coupling, even if we include other spherical harmonic components of the radial magnetic field.
+EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by
+CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σm could be
+closer to σf . Likewise, σm can be increased if a more electrically conducting layer has formed
+at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction
+of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even
+in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains
 –23–
-Confidential manuscript submitted to JGR-Planets
-smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces
-dominate the tangential stress on the CMB of Mercury.
-At the ICB, because we can expect the electrical conductivity in both the solid inner core
-and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM
-coupling can be much larger and dominate viscous coupling. We assume that the magnetic field
-morphology at the ICB is dominantly comprised of small spatial scales for example as predicted
-by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in
-terms of an equivalent uniform radial magnetic field hBri capturing its r.m.s. strength [Buf￾fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the
-fluid and solid core, the coupling constant Kicb can be written in the form
-Kicb =
-5
-4
-(1 − i)Ficb hBri
-2
-, (50)
-where
-Ficb =
-σδ
-Ωoρsrs
-, (51)
-and where δ =
-p
-2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to
-rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based
-on the r.m.s. strength hBri rather than a true field morphology tends to overestimate the strength
-of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic
-field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are
-absorbed in the range of possible hBri values.
-The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al.,
-2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected.
-When hBri is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
-field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which
-Kicb increases linearly with hBri instead of quadratically. A good approximation of Kicb cal￾culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012],
-KE
-icb = (0.175 − i0.138)hBri , (52)
-where hBri is in units of Tesla. The superscript E emphasizes that the numerical factors are
-appropriate for the parameter values adopted for Earth in the computation of Dumberry and
-Koot [2012]. To adapt these numerical factors to Mercury, we write,
-Kicb = (0.175 − i0.138)Ficb
-F E
-icb
-hBri , (53)
-where F
-E
-icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb￾erry and Koot [2012]. These are Ωo = 7.292 × 10−5
-s
-−1
-, ρs = 12846 kg m−3
-, rs = 1221.5
-km, σ = 5 × 105 S m−1
-, which gives F
-E
-icb = 90.36 T−2
-.
-To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1
-in the core of
-Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and
-strong field regime occurs when hBri ≈ 1.53 mT for the real part of Kicb. hBri at the ICB
-of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom￾etry inside the core could be dominated by small length scales, yet only the weaker lower har￾monics of the field would penetrate through a thermally stratified layer in the upper region of
+Confidential manuscript submitted to JGR-Planets
+smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces
+dominate the tangential stress on the CMB of Mercury.
+At the ICB, because we can expect the electrical conductivity in both the solid inner core
+and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM
+coupling can be much larger and dominate viscous coupling. We assume that the magnetic field
+morphology at the ICB is dominantly comprised of small spatial scales for example as predicted
+by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in
+terms of an equivalent uniform radial magnetic field hBri capturing its r.m.s. strength [Buffett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the
+fluid and solid core, the coupling constant Kicb can be written in the form
+Kicb =
+5
+4
+(1 − i)Ficb hBri
+2
+, (50)
+where
+Ficb =
+σδ
+Ωoρsrs
+, (51)
+and where δ =
+p
+2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to
+rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based
+on the r.m.s. strength hBri rather than a true field morphology tends to overestimate the strength
+of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic
+field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are
+absorbed in the range of possible hBri values.
+The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al.,
+2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected.
+When hBri is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
+field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which
+Kicb increases linearly with hBri instead of quadratically. A good approximation of Kicb calculated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012],
+KE
+icb = (0.175 − i0.138)hBri , (52)
+where hBri is in units of Tesla. The superscript E emphasizes that the numerical factors are
+appropriate for the parameter values adopted for Earth in the computation of Dumberry and
+Koot [2012]. To adapt these numerical factors to Mercury, we write,
+Kicb = (0.175 − i0.138)Ficb
+F E
+icb
+hBri , (53)
+where F
+E
+icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumberry and Koot [2012]. These are Ωo = 7.292 × 10−5
+s
+−1
+, ρs = 12846 kg m−3, rs = 1221.5
+km, σ = 5 × 105 S m−1, which gives F
+E
+icb = 90.36 T−2
+.
+To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1in the core of
+Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and
+strong field regime occurs when hBri ≈ 1.53 mT for the real part of Kicb. hBri at the ICB
+of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geometry inside the core could be dominated by small length scales, yet only the weaker lower harmonics of the field would penetrate through a thermally stratified layer in the upper region of
 –24–
-Confidential manuscript submitted to JGR-Planets
-the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur￾face field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g An￾derson et al., 2012], hBri at the ICB could be as large as 0.3 mT, corresponding to approxi￾mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer￾cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of
-Mercury remains in the weak field regime.
-Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices
-of hBri. The larger hBri is, the stronger is the EM coupling at the ICB, and the smaller is the
-differential rotation between the fluid core and inner core. The inner core and fluid core are vir￾tually locked into a common precession motion when hBri > 0.3 mT. Further increasing hBri
-above 1 mT does not change the solution as EM coupling already dominates all other torques
-on the inner core. This is the case even when EM coupling transitions into the strong field regime.
-EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and
-
-g
-0
-1
-
- =
-190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core
-we retrieved the solutions of ˜εm and ˜mf shown in Figure 4.
-As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with
-viscous coupling alone, although the addition of EM coupling lead to more substantial changes.
-The inner core needs to be larger than approximately 500 km for changes in the Cassini state
-equilibrium to be noticeable. It is important to point out that ˜mf is reduced not because of
-EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which
-pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the
-inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the
-greater is the reduction in ˜εm and ˜mf .
-When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are
-locked into a common precession motion, a good approximation of ˜εm is given by the same pre￾diction as Equations (39-40) involving the effective moment of inertia C
-0
-, except χ is now given
-by
-χ =
-A¯
-cΩp cos I − A¯
-sΩoα3φs
-A¯
-fΩo(ef + Kcmb) + A¯
-sΩoesα3αg − A¯
-cΩp cos I
-. (54)
-For a small inner core, A¯
-cΩp cos I > A¯
-sΩoα3φs and χ is positive. Because A¯
-sΩoα3φs increases
-with inner core size, χ gets smaller, and so do C
-0 and ˜εm. The mantle obliquity drops from 2.049
-arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015
-arcmin. For an inner core larger than ≈ 1000 km, A¯
-cΩp cos I < A¯
-sΩoα3φs, so χ becomes neg￾ative, C
-0 becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes
-smaller than the prediction based on a rigid planet.
-The larger the inner core is, the smaller are the misalignments of the fluid and solid cores
-with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone
-is not altered with the addition of EM coupling but further strengthened; the larger the inner
-core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the
-obliquity of the gravity field ˜εg which, for a large inner core, asymptotically approaches the obliq￾uity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset be￾tween ˜εm and ˜εg can be as large as 0.008 arcmin for a large inner core.
-3.5 Fixed inner core density versus fixed ICB density contrast
-Coupling models when viscous and EM stresses are both present have been presented in
-Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results,
+Confidential manuscript submitted to JGR-Planets
+the fluid core and reach the surface. If so, the field strength inside the core can exceed the surface field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g Anderson et al., 2012], hBri at the ICB could be as large as 0.3 mT, corresponding to approximately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mercury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of
+Mercury remains in the weak field regime.
+Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices
+of hBri. The larger hBri is, the stronger is the EM coupling at the ICB, and the smaller is the
+differential rotation between the fluid core and inner core. The inner core and fluid core are virtually locked into a common precession motion when hBri > 0.3 mT. Further increasing hBri
+above 1 mT does not change the solution as EM coupling already dominates all other torques
+on the inner core. This is the case even when EM coupling transitions into the strong field regime.
+EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and
+
+g
+0
+1
+
+ =
+190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core
+we retrieved the solutions of ˜εm and ˜mf shown in Figure 4.
+As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with
+viscous coupling alone, although the addition of EM coupling lead to more substantial changes.
+The inner core needs to be larger than approximately 500 km for changes in the Cassini state
+equilibrium to be noticeable. It is important to point out that ˜mf is reduced not because of
+EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which
+pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the
+inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the
+greater is the reduction in ˜εm and ˜mf .
+When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are
+locked into a common precession motion, a good approximation of ˜εm is given by the same prediction as Equations (39-40) involving the effective moment of inertia C
+0
+, except χ is now given
+by
+χ =
+A¯
+cΩp cos I − A¯sΩoα3φs
+A¯
+fΩo(ef + Kcmb) + A¯sΩoesα3αg − A¯cΩp cos I
+. (54)
+For a small inner core, A¯
+cΩp cos I > A¯sΩoα3φs and χ is positive. Because A¯sΩoα3φs increases
+with inner core size, χ gets smaller, and so do C
+0 and ˜εm. The mantle obliquity drops from 2.049
+arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015
+arcmin. For an inner core larger than ≈ 1000 km, A¯
+cΩp cos I < A¯sΩoα3φs, so χ becomes negative, C
+0 becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes
+smaller than the prediction based on a rigid planet.
+The larger the inner core is, the smaller are the misalignments of the fluid and solid cores
+with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone
+is not altered with the addition of EM coupling but further strengthened; the larger the inner
+core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the
+obliquity of the gravity field ˜εg which, for a large inner core, asymptotically approaches the obliquity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset between ˜εm and ˜εg can be as large as 0.008 arcmin for a large inner core.
+3.5 Fixed inner core density versus fixed ICB density contrast
+Coupling models when viscous and EM stresses are both present have been presented in
+Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results,
 –25–
-Confidential manuscript submitted to JGR-Planets
-2.032
-2.034
-2.036
-2.038
-2.040
-2.042
-2.044
-2.046
-2.048
-2.050
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-0.0
-0.5
-1.0
-1.5
-2.0
-2.5
-3.0
-3.5
-4.0
-4.5
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-Br at ICB: 1 mT 0.3 mT 0.1 mT 0.03 mT 0.01 mT
-εm
-εg
-mf
-ns
-a b
-εm for a rigid planet
-Figure 6. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
-(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of Br
-(colour in legend).
-for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by
-viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we con￾sider a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling.
-We choose an effective viscosity at the CMB of ν = 10−4 m2
-s
-−1
-, which we believe to be a
-representative value given the comparison with the Moon (see section 3.3). We take a radial
-field strength at the ICB of hBri = 0.3 mT, approximately the field strength expected under
-the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa￾tive’ coupling model, although the uncertainty on ν and hBri obviously remains high.
-Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’
-coupling model (black lines) under the fixed inner core density scenario that we have used in
-sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same rep￾resentative coupling model, we adopt instead a fixed density contrast between the fluid and solid
-cores and for different choices of α3 (coloured lines). For a relatively high density contrast (α3 =
-0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller
-α3, the point at which the orientation of the co-precessing fluid and inner cores begins to be
-pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the
-general behaviour of ˜εm, ˜mf and ˜ns as functions of inner core radius is unchanged. Hence, all
-our results in the previous three sections would be qualitatively similar under a fixed density
-contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core
-is required in order to produce an equivalent change in the Cassini state equilibrium.
-4 Discussion
-The study of Peale et al. [2016] also presented predictions of the obliquities of the man￾tle, fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their
-model included the tangential viscous stress at the ICB and CMB, but not the EM stress. Their
-Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as
+Confidential manuscript submitted to JGR-Planets
+2.032
+2.034
+2.036
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+Br at ICB: 1 mT 0.3 mT 0.1 mT 0.03 mT 0.01 mT
+εm
+εg
+mf
+ns
+a b
+εm for a rigid planet
+Figure 6. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
+(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of Br
+(colour in legend).
+for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by
+viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we consider a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling.
+We choose an effective viscosity at the CMB of ν = 10−4 m2s
+−1
+, which we believe to be a
+representative value given the comparison with the Moon (see section 3.3). We take a radial
+field strength at the ICB of hBri = 0.3 mT, approximately the field strength expected under
+the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representative’ coupling model, although the uncertainty on ν and hBri obviously remains high.
+Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’
+coupling model (black lines) under the fixed inner core density scenario that we have used in
+sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same representative coupling model, we adopt instead a fixed density contrast between the fluid and solid
+cores and for different choices of α3 (coloured lines). For a relatively high density contrast (α3 =
+0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller
+α3, the point at which the orientation of the co-precessing fluid and inner cores begins to be
+pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the
+general behaviour of ˜εm, ˜mf and ˜ns as functions of inner core radius is unchanged. Hence, all
+our results in the previous three sections would be qualitatively similar under a fixed density
+contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core
+is required in order to produce an equivalent change in the Cassini state equilibrium.
+4 Discussion
+The study of Peale et al. [2016] also presented predictions of the obliquities of the mantle, fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their
+model included the tangential viscous stress at the ICB and CMB, but not the EM stress. Their
+Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as
 –26–
-Confidential manuscript submitted to JGR-Planets
-2.032
-2.034
-2.036
-2.038
-2.040
-2.042
-2.044
-2.046
-2.048
-2.050
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-0.0
-0.5
-1.0
-1.5
-2.0
-2.5
-3.0
-3.5
-4.0
-4.5
-Obliquity angle (arcmin)
-0 200 400 600 800 1000 1200 1400
-Inner core radius (km)
-εm for a rigid planet
-a b
-ρs = 8800 kg m α3: 0.20 0.15 0.10 0.05 0.01 -3
-mf
-ns
-εm
-εg
-Figure 7. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
-(solid lines) and ˜ns (dashed lines) as a function of inner core radius, for a fixed inner core density of
-8800 kg m−3
-(black lines) and for different choices of α3 (coloured lines).
-i
-0
-m, i
-0
-f
-and i
-0
-s
-; these represent the obliquities with respect to the orbital plane and are connected
-to our variables by: i
-0
-m = ˜εm, i
-0
-f = ˜εm + ˜m+ ˜mf ≈ ε˜m + ˜mf and i
-0
-s = ˜εm + ˜ns. To summarize
-their results, i
-0
-f
-and i
-0
-s vary substantially for different inner core sizes, are always of compara￾ble amplitude, and i
-0
-s
-is always larger than i
-0
-f
-. Furthermore, they find that as the inner core
-size is increased, the mantle obliquity i
-0
-m gets progressively larger and is displaced further away
-from its expected orientation based of a rigid planet (see their Figure 6). The change in i
-0
-m they
-obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan￾etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered),
-is approximately an increase of 5 × 10−5
-rad = 0.17 arcmin. This also corresponds approxi￾mately to the deviation of the obliquity with respect to that of a rigid planet.
-When only viscous stress is included in our model (section 3.3), our results are substan￾tially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core
-gets smaller with inner core size and that the change is very modest. In contrast with the re￾sults of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that
-of the fluid core, except when the inner core is very small or when the effective viscosity is un￾reasonably large. We also find that as the inner core size is increased, the mantle obliquity gets
-smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most
-of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling
-model that we use. But even when we adopt their model parameters and use their viscosity model,
-we were not able to reproduce their results.
-In the absence of viscous and EM coupling, the strong gravitational torque exerted on the
-inner core by the mantle should prevent any large misalignment between the two. This is cap￾tured by the period of the FICN, which is of the order of 100 yr, much shorter than the forc￾ing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational
-torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscous￾EM torque to the gravitational torque decreases with inner core size, so a large inner core should
-be more strongly aligned with the mantle. The more strongly the inner core and mantle are
+Confidential manuscript submitted to JGR-Planets
+2.032
+2.034
+2.036
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5
+Obliquity angle (arcmin)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+εm for a rigid planet
+a b
+α3 ρs = 8800 kg m : 0.20 0.15 0.10 0.05 0.01 -3
+mf
+ns
+εm
+εg
+Figure 7. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
+(solid lines) and ˜ns (dashed lines) as a function of inner core radius, for a fixed inner core density of
+8800 kg m−3(black lines) and for different choices of α3 (coloured lines).
+i
+0
+m, i
+0
+f
+and i
+0
+s
+; these represent the obliquities with respect to the orbital plane and are connected
+to our variables by: i
+0
+m = ˜εm, i
+0
+f = ˜εm + ˜m+ ˜mf ≈ ε˜m + ˜mf and i
+0
+s = ˜εm + ˜ns. To summarize
+their results, i
+0
+f
+and i
+0
+s vary substantially for different inner core sizes, are always of comparable amplitude, and i
+0
+s
+is always larger than i
+0
+f
+. Furthermore, they find that as the inner core
+size is increased, the mantle obliquity i
+0
+m gets progressively larger and is displaced further away
+from its expected orientation based of a rigid planet (see their Figure 6). The change in i
+0
+m they
+obtain between a case with no inner core and an inner core radius equal to 0.6 times the planetary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered),
+is approximately an increase of 5 × 10−5rad = 0.17 arcmin. This also corresponds approximately to the deviation of the obliquity with respect to that of a rigid planet.
+When only viscous stress is included in our model (section 3.3), our results are substantially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core
+gets smaller with inner core size and that the change is very modest. In contrast with the results of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that
+of the fluid core, except when the inner core is very small or when the effective viscosity is unreasonably large. We also find that as the inner core size is increased, the mantle obliquity gets
+smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most
+of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling
+model that we use. But even when we adopt their model parameters and use their viscosity model,
+we were not able to reproduce their results.
+In the absence of viscous and EM coupling, the strong gravitational torque exerted on the
+inner core by the mantle should prevent any large misalignment between the two. This is captured by the period of the FICN, which is of the order of 100 yr, much shorter than the forcing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational
+torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscousEM torque to the gravitational torque decreases with inner core size, so a large inner core should
+be more strongly aligned with the mantle. The more strongly the inner core and mantle are
 –27–
-Confidential manuscript submitted to JGR-Planets
-gravitationally locked together, the more they behave as a single rigid body in response to the
-external torque from the Sun. We expect then that the obliquity of the mantle should be brought
-closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the re￾sults of Peale et al. [2016], which suggest the opposite.
-We showed that EM coupling is most likely larger than viscous coupling at the ICB, even
-though our knowledge of the radial magnetic field strength inside Mercury (on which EM cou￾pling depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM
-coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion.
-The larger the inner core is, the more this co-precessing core is forced into an alignment with
-the mantle because of the mantle gravitational torque on the inner core. As a result, the larger
-the inner core is, the closer we approach a situation resembling a whole planet precessing as
-a rigid body. The addition of EM coupling at the ICB does not change the overall picture that
-we observe with viscous coupling alone; the mantle obliquity decreases with inner core size. The
-amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous cou￾pling alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016],
-and again, importantly, in the reverse direction.
-Our results suggest then that the presence and size of an inner core leads to only mod￾est changes of the mantle obliquity εm compared to the obliquity predicted on the basis of an
-entirely rigid planet (ε
-r
-m). Let us denote this difference as ∆εm = εm−ε
-r
-m. The largest ∆εm
-occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This difference is decreased
-as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM
-coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value
-remains smaller than 0.01 arcmin.
-To put these results in perspective, the uncertainty in the measurement of the mantle obliq￾uity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much
-larger than this difference. This means that, at the current level of precision, it is not possi￾ble to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This
-is consistent with the fact that the observed obliquity falls close to that expected from a rigid
-planet. But it also implies that the observed obliquity cannot be used to place constraints on
-the inner core size.
-Nevertheless, our results show that the presence of a fluid core and inner core affect the
-resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change
-in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006
-arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation
-motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which
-is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliq￾uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008
-arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the preces￾sion of the pericenter, a Cassini state model that includes a fluid and solid core will then be
-necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens
-the possibility of further constraining the interior structure of Mercury on the basis of its obliq￾uity.
-Obliquity measurements based on tracking topographic features reflect the orientation of
-the spin-symmetry axis of the mantle (εm). Measurements based on tracking the gravity field
-of Mercury reflect instead the orientation of the principal moment of the whole planet (εg). These
-two orientations do not coincide when an inner core is present and is misaligned from the man￾tle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we
+Confidential manuscript submitted to JGR-Planets
+gravitationally locked together, the more they behave as a single rigid body in response to the
+external torque from the Sun. We expect then that the obliquity of the mantle should be brought
+closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the results of Peale et al. [2016], which suggest the opposite.
+We showed that EM coupling is most likely larger than viscous coupling at the ICB, even
+though our knowledge of the radial magnetic field strength inside Mercury (on which EM coupling depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM
+coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion.
+The larger the inner core is, the more this co-precessing core is forced into an alignment with
+the mantle because of the mantle gravitational torque on the inner core. As a result, the larger
+the inner core is, the closer we approach a situation resembling a whole planet precessing as
+a rigid body. The addition of EM coupling at the ICB does not change the overall picture that
+we observe with viscous coupling alone; the mantle obliquity decreases with inner core size. The
+amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous coupling alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016],
+and again, importantly, in the reverse direction.
+Our results suggest then that the presence and size of an inner core leads to only modest changes of the mantle obliquity εm compared to the obliquity predicted on the basis of an
+entirely rigid planet (ε
+r
+m). Let us denote this difference as ∆εm = εm−ε
+r
+m. The largest ∆εm
+occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This difference is decreased
+as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM
+coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value
+remains smaller than 0.01 arcmin.
+To put these results in perspective, the uncertainty in the measurement of the mantle obliquity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much
+larger than this difference. This means that, at the current level of precision, it is not possible to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This
+is consistent with the fact that the observed obliquity falls close to that expected from a rigid
+planet. But it also implies that the observed obliquity cannot be used to place constraints on
+the inner core size.
+Nevertheless, our results show that the presence of a fluid core and inner core affect the
+resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change
+in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006
+arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation
+motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which
+is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliquity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008
+arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the precession of the pericenter, a Cassini state model that includes a fluid and solid core will then be
+necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens
+the possibility of further constraining the interior structure of Mercury on the basis of its obliquity.
+Obliquity measurements based on tracking topographic features reflect the orientation of
+the spin-symmetry axis of the mantle (εm). Measurements based on tracking the gravity field
+of Mercury reflect instead the orientation of the principal moment of the whole planet (εg). These
+two orientations do not coincide when an inner core is present and is misaligned from the mantle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we
 –28–
-Confidential manuscript submitted to JGR-Planets
-find that the misalignment ∆εg = εg − εm is limited. The maximum offset that we obtain
-is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the
-light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968±
-0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the
-spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm =
-2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent
-with one another within their error estimates. In their interpretation, Genova et al. [2019] sug￾gest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 ar￾cmin) is perhaps explained by an offset ∆εg due to the presence of a (possibly large) solid in￾ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg
-that we predict. Moreover, we predict that the obliquity of the gravity field should be larger
-than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision
-of the measurements, εg and εm should coincide, and their difference cannot be interpreted as
-reflecting the misalignment between the polar moment of inertia of the whole planet and the
-mantle spin axis.
-Lastly, we have concentrated our efforts on the mutual orientations of the different spin
-and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by vis￾cous and EM coupling also lead to a displacement of these axes in the direction perpendicu￾lar to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on track￾ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that
-the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin).
-Although this offset is smaller than the measurement errors, so that the observed obliquity is
-still consistent with no deviation away from the Cassini plane, some amount of dissipation in￾variably takes place. These measurements give then a measure of the possible amplitude of the
-dissipation. One source of dissipation is from anelastic tidal deformation [Baland et al., 2017],
-but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-of￾plane component of the observed obliquity may further help to quantify and constrain the in￾terior coupling mechanisms. This will be the subject of a future study.
-5 Conclusion
-We have investigated how the presence of a fluid core and solid inner core affects the Cassini
-state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mer￾cury’s interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis
-does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset oc￾curs for a small or no inner core. The larger the inner core is, the more it is forced into an align￾ment with the mantle because of the strong gravitational torque between the two, and the closer
-we approach a situation resembling a whole planet precessing as a rigid body. The misalign￾ment between the polar moment of inertia and mantle spin axis increases with inner core size,
-but is limited to approximately 0.007 arcmin. These conclusions apply irrespective of the core
-composition and thus of the partitioning of light elements into the solid core; a smaller den￾sity contrast at the ICB only implies that a larger inner core is required in order to produce
-an equivalent change in the Cassini state equilibrium.
-Our results imply that the obliquities of the mantle spin axis and polar moment of iner￾tia (or, equivalently, the gravity field) should coincide at the present-day level of measurement
-errors. Moreover, neither of these can be distinguished from the obliquity predicted on the ba￾sis of a rigid planet. However, the smaller measurement errors expected from the upcoming Bepi￾Columbo satellite mission may permit this distinction, and thus provide further constraints on
-Mercury’s interior structure.
+Confidential manuscript submitted to JGR-Planets
+find that the misalignment ∆εg = εg − εm is limited. The maximum offset that we obtain
+is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the
+light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968±
+0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the
+spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm =
+2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent
+with one another within their error estimates. In their interpretation, Genova et al. [2019] suggest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 arcmin) is perhaps explained by an offset ∆εg due to the presence of a (possibly large) solid inner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg
+that we predict. Moreover, we predict that the obliquity of the gravity field should be larger
+than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision
+of the measurements, εg and εm should coincide, and their difference cannot be interpreted as
+reflecting the misalignment between the polar moment of inertia of the whole planet and the
+mantle spin axis.
+Lastly, we have concentrated our efforts on the mutual orientations of the different spin
+and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by viscous and EM coupling also lead to a displacement of these axes in the direction perpendicular to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on tracking surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that
+the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin).
+Although this offset is smaller than the measurement errors, so that the observed obliquity is
+still consistent with no deviation away from the Cassini plane, some amount of dissipation invariably takes place. These measurements give then a measure of the possible amplitude of the
+dissipation. One source of dissipation is from anelastic tidal deformation [Baland et al., 2017],
+but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-ofplane component of the observed obliquity may further help to quantify and constrain the interior coupling mechanisms. This will be the subject of a future study.
+5 Conclusion
+We have investigated how the presence of a fluid core and solid inner core affects the Cassini
+state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mercury’s interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis
+does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset occurs for a small or no inner core. The larger the inner core is, the more it is forced into an alignment with the mantle because of the strong gravitational torque between the two, and the closer
+we approach a situation resembling a whole planet precessing as a rigid body. The misalignment between the polar moment of inertia and mantle spin axis increases with inner core size,
+but is limited to approximately 0.007 arcmin. These conclusions apply irrespective of the core
+composition and thus of the partitioning of light elements into the solid core; a smaller density contrast at the ICB only implies that a larger inner core is required in order to produce
+an equivalent change in the Cassini state equilibrium.
+Our results imply that the obliquities of the mantle spin axis and polar moment of inertia (or, equivalently, the gravity field) should coincide at the present-day level of measurement
+errors. Moreover, neither of these can be distinguished from the obliquity predicted on the basis of a rigid planet. However, the smaller measurement errors expected from the upcoming BepiColumbo satellite mission may permit this distinction, and thus provide further constraints on
+Mercury’s interior structure.
 –29–
-Confidential manuscript submitted to JGR-Planets
-Acknowledgments
-Figures were created using the GMT software [Wessel et al., 2013]. The source codes, GMT
-scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work
-was supported by an NSERC/CRSNG Discovery Grant.
-References
-Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core
-conditions, Phys. Rev., B61, 132–142.
-Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin,
-S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global
-magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859–
-1862.
-Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Pu￾rucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Low￾degree structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12,
-doi:10.1029/2012JE004159.
-Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mer￾cury: Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159.
-Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin pre￾cession and polar motion of a synchronously rotating satellite: application to Titan,
-Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50.
-Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the
-forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597.
-Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from
-observations of nutations, Earth Planet. Sci. Lett., 296, 367–372.
-Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession:
-effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056.
-Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech., 33,
-739–751.
-Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A.
-Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature
-Geosci., 7, 301–307.
-C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells:
-flows, dissipation, dynamo and the lunar core, Geophys. J. Int., 219 (Supplement 1),
-S34–S57, doi:10.1093/gji/ggz037.
-Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature,
-444, 1056–1058.
-Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The
-BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software,
-Month. N. Roy. Astr. Soc., 457, 1507–1521.
-Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896.
-Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second
-Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Ox￾ford.
-de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal
-conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc.
-Nat. Acad. Sci., 109, 4070–4073.
+Confidential manuscript submitted to JGR-Planets
+Acknowledgments
+Figures were created using the GMT software [Wessel et al., 2013]. The source codes, GMT
+scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work
+was supported by an NSERC/CRSNG Discovery Grant.
+References
+Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core
+conditions, Phys. Rev., B61, 132–142.
+Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin,
+S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global
+magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859–
+1862.
+Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Purucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Lowdegree structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12,
+doi:10.1029/2012JE004159.
+Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mercury: Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159.
+Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin precession and polar motion of a synchronously rotating satellite: application to Titan,
+Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50.
+Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the
+forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597.
+Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from
+observations of nutations, Earth Planet. Sci. Lett., 296, 367–372.
+Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession:
+effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056.
+Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech., 33,
+739–751.
+Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A.
+Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature
+Geosci., 7, 301–307.
+C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells:
+flows, dissipation, dynamo and the lunar core, Geophys. J. Int., 219 (Supplement 1),
+S34–S57, doi:10.1093/gji/ggz037.
+Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature,
+444, 1056–1058.
+Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The
+BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software,
+Month. N. Roy. Astr. Soc., 457, 1507–1521.
+Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896.
+Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second
+Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford.
+de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal
+conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc.
+Nat. Acad. Sci., 109, 4070–4073.
 –30–
-Confidential manuscript submitted to JGR-Planets
-de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price
-(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature,
-392, 805–807.
-Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics,
-vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford.
-Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary,
-Geophys. J. Int., 167, 557–566.
-Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical
-resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37,
-doi:10.1029/2012GL054347.
-Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid in￾ner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL
-Dataverse, V2.
-Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nuta￾tions, Geophys. J. Int., 191, 530–544.
-Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization
-regime, Icarus, 248, 254–268.
-Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner
-core, J. Geophys. Res. Planets, 121, 1264–1292.
-Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mer￾cury’s core density structure on its longitudinal librations, Icarus, 225, 62–74.
-Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366.
-Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang,
-T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019),
-Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46,
-doi:10.1029/2018GL081135.
-Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at
-the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171.
-Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global
-contraction of Mercury, Earth Planet. Sci. Lett., 307, 135–146.
-Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine,
-E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T.
-Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118,
-doi:10.1002/jgre.20091.
-Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H.
-Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon
-(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys.
-Res., 117, E00L14, doi:10.1029/2012JE004217.
-Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orien￾tation, love number, and ephemeris from the MESSENGER radiometric tracking data,
-Icarus, 335, 113,386.
-Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the
-electromagnetic coupling for nutations, Geophys. J. Int., 195, 200–210.
-Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth
-Planet. Sci. Lett., 193, 509–514.
-Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longi￾tude libration of Mercury reveals a molten core, Science, 316, 710–714.
-Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens,
-M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s
+Confidential manuscript submitted to JGR-Planets
+de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price
+(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature,
+392, 805–807.
+Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics,
+vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford.
+Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary,
+Geophys. J. Int., 167, 557–566.
+Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical
+resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37,
+doi:10.1029/2012GL054347.
+Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid inner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL
+Dataverse, V2.
+Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nutations, Geophys. J. Int., 191, 530–544.
+Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization
+regime, Icarus, 248, 254–268.
+Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner
+core, J. Geophys. Res. Planets, 121, 1264–1292.
+Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mercury’s core density structure on its longitudinal librations, Icarus, 225, 62–74.
+Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366.
+Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang,
+T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019),
+Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46,
+doi:10.1029/2018GL081135.
+Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at
+the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171.
+Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global
+contraction of Mercury, Earth Planet. Sci. Lett., 307, 135–146.
+Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine,
+E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T.
+Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118,
+doi:10.1002/jgre.20091.
+Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H.
+Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon
+(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys.
+Res., 117, E00L14, doi:10.1029/2012JE004217.
+Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orientation, love number, and ephemeris from the MESSENGER radiometric tracking data,
+Icarus, 335, 113,386.
+Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the
+electromagnetic coupling for nutations, Geophys. J. Int., 195, 200–210.
+Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth
+Planet. Sci. Lett., 193, 509–514.
+Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longitude libration of Mercury reveals a molten core, Science, 316, 710–714.
+Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens,
+M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s
 –31–
-Confidential manuscript submitted to JGR-Planets
-moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09,
-doi:10.1029/2012JE004161.
-Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s
-internal structure, in Mercury: The View after MESSENGER, edited by S. Solomon,
-L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi:
-10.1017/9781316650684.005.
-Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation
-theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915.
-Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of
-the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242.
-Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and pre￾cession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J.
-Geophys. Res., 107, doi:10.1029/2004JB000390.
-Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber,
-D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of
-Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res.
-Planets, 119, 2417–2436.
-Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon
-and the phase lead of its Cassini state, Journal of Geophysical Research Planets, 125,
-e2020JE006386.
-Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489.
-Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744.
-Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766.
-Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18.
-Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic in￾variance, Icarus, 181, 338–347.
-Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle
-and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220.
-Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a
-solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455.
-Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of
-Mercury, Geophys. Res. Lett., 42, 6951–6958.
-Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bull. Astron. Ser. 1, 27,
-321–356.
-Pozzo, M., C. Davies, D. Gubbins, and D. Alf´e (2012), Thermal and electrical conductivity
-of iron at Earth’s core conditions, Nature, 485, 355–358.
-Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s
-rotation, Phil. Trans. R. Soc. Lond., A, 252, 531–555.
-Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833–
-4836.
-Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic core￾mantle coupling, J. Geomag. Geoelectr., 20, 387–402.
-Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle
-coupling, Geophys. J. R. Astron. Soc., 46, 109–126.
-Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), To￾wards evaluating the viscosity of the Earth’s outer core: an experimental high pressure
-study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1.
-Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang
-(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B, 66, 060,102,
+Confidential manuscript submitted to JGR-Planets
+moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09,
+doi:10.1029/2012JE004161.
+Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s
+internal structure, in Mercury: The View after MESSENGER, edited by S. Solomon,
+L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi:
+10.1017/9781316650684.005.
+Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation
+theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915.
+Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of
+the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242.
+Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and precession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J.
+Geophys. Res., 107, doi:10.1029/2004JB000390.
+Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber,
+D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of
+Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res.
+Planets, 119, 2417–2436.
+Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon
+and the phase lead of its Cassini state, Journal of Geophysical Research Planets, 125,
+e2020JE006386.
+Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489.
+Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744.
+Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766.
+Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18.
+Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic invariance, Icarus, 181, 338–347.
+Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle
+and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220.
+Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a
+solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455.
+Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of
+Mercury, Geophys. Res. Lett., 42, 6951–6958.
+Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bull. Astron. Ser. 1, 27,
+321–356.
+Pozzo, M., C. Davies, D. Gubbins, and D. Alf´e (2012), Thermal and electrical conductivity
+of iron at Earth’s core conditions, Nature, 485, 355–358.
+Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s
+rotation, Phil. Trans. R. Soc. Lond., A, 252, 531–555.
+Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833–
+4836.
+Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic coremantle coupling, J. Geomag. Geoelectr., 20, 387–402.
+Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle
+coupling, Geophys. J. R. Astron. Soc., 46, 109–126.
+Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), Towards evaluating the viscosity of the Earth’s outer core: an experimental high pressure
+study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1.
+Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang
+(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B, 66, 060,102,
 –32–
-Confidential manuscript submitted to JGR-Planets
-doi:10.1029/2001GL014392.
-Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metal￾silicate partitioning and its role in core formation and composition on Super-Earths,
-Astrophys. J., 835, 234.
-Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett., 489, 92–99.
-Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann,
-S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observa￾tions of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889.
-Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular
-orbital elements, Celest. Mech. Dyn. Astr., 123, 263–277.
-Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity
-of a precessing rigid body, J. Fluid Mech., 17, 1–20.
-Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys.
-Res. Planets, 123, 1–25, doi:10.1029/2018JE005607.
-Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics,
-vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford.
-Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides
-and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett., 333–334,
-83–90.
-Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSEN￾GER radio science data, J. Geophys. Res. Planets, 121, 1627–1640.
-Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping
-Tools: Improved version released, EOS Trans. AGU, 94, 409–410.
-Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of
-dissipation, J. Geophys. Res. Planets, 120 (4), 689–724, doi:10.1002/2014JE004755.
-Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar
-rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933–
-27,968.
-Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine,
-S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A.
-Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek,
-J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J.
-Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J.
-Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559.
-Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond.
-A, 303, 327–338.
-Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181,
-327–337.
-–33–
+Confidential manuscript submitted to JGR-Planets
+doi:10.1029/2001GL014392.
+Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metalsilicate partitioning and its role in core formation and composition on Super-Earths,
+Astrophys. J., 835, 234.
+Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett., 489, 92–99.
+Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann,
+S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observations of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889.
+Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular
+orbital elements, Celest. Mech. Dyn. Astr., 123, 263–277.
+Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity
+of a precessing rigid body, J. Fluid Mech., 17, 1–20.
+Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys.
+Res. Planets, 123, 1–25, doi:10.1029/2018JE005607.
+Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics,
+vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford.
+Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides
+and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett., 333–334,
+83–90.
+Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSENGER radio science data, J. Geophys. Res. Planets, 121, 1627–1640.
+Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping
+Tools: Improved version released, EOS Trans. AGU, 94, 409–410.
+Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of
+dissipation, J. Geophys. Res. Planets, 120 (4), 689–724, doi:10.1002/2014JE004755.
+Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar
+rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933–
+27,968.
+Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine,
+S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A.
+Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek,
+J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J.
+Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J.
+Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559.
+Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond.
+A, 303, 327–338.
+Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181,
+327–337.
+–33–
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00069.txt b/read/results/pdfium/2201.00069.txt
index 2a505bd..30fcbbc 100644
--- a/read/results/pdfium/2201.00069.txt
+++ b/read/results/pdfium/2201.00069.txt
@@ -1,913 +1,862 @@
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 1
-A MeerKAT, e-MERLIN, H.E.S.S. and Swift search for persistent
-and transient emission associated with three localised FRBs
-J. O. Chibueze,1,2★ M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4
-C. Venter,1
-I. Heywood,7,8,9 A. M. S. Richards,3 D. R. A. Williams,3 M. Kramer,3,5
-R. Beswick,3 M. C. Bezuidenhout,3 R. P. Breton,3 L. N. Driessen,3 F. Jankowski,3
-E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3
-, H. Qiu,11 K. Rajwade,3
-S. Sanidas,3 M. Surnis,3 T. W. Scragg,3 C. R. H. Walker,5
-and N. Wrigley,3
-H.E.S.S. Collaboration: F. Aharonian,12,13,14 F. Ait Benkhali,15 E.O. Angüner,16 M. Backes,18,1
-V. Baghmanyan,19 V. Barbosa Martins,20 R. Batzofin,21 Y. Becherini,22,23 D. Berge,20
-M. Böttcher,1 C. Boisson,24 J. Bolmont,25 M. de Bony de Lavergne,26 M. Breuhaus,13
-R. Brose,12 F. Brun,6 T. Bulik,27 F. Cangemi,25 S. Caroff,25 S. Casanova,19
-J. Catalano,28 M. Cerruti,22 T. Chand,1 A. Chen,21 O.U. Chibueze,1
-G. Cotter,29 P. Cristofari,24 J. Damascene Mbarubucyeye,20 J. Devin,30 A. Djannati-Ataï,22
-A. Dmytriiev,1 K. Egberts,31 J.-P. Ernenwein,16 A. Fiasson,26 G. Fichet de Clairfontaine,24
-G. Fontaine,17 S. Funk,28 S. Gabici,22 S. Ghafourizadeh,15 G. Giavitto,20
-D. Glawion,28 M.-H. Grondin,30 M. Hörbe,29 C. Hoischen,31 T. L. Holch,20
-Zhiqiu Huang,13 M. Jamrozy,32 F. Jankowsky,15 I. Jung-Richardt,28 E. Kasai,18
-K. Katarzyński,33 U. Katz,28 B. Khélifi,22 W. Kluźniak,34 Nu. Komin,21
-K. Kosack,6 D. Kostunin,20 A. Lemière,22 J.-P. Lenain,25 F. Leuschner,35
-T. Lohse,36 A. Luashvili,24 I. Lypova,15 J. Mackey,12 D. Malyshev,35
-V. Marandon,13 P. Marchegiani,21 A. Marcowith,37 G. Martí-Devesa,38 R. Marx,15
-A. Mitchell,28,13 R. Moderski,34 L. Mohrmann,13 E. Moulin,6
-J. Muller,17
-K. Nakashima,28 M. de Naurois,17 A. Nayerhoda,19 J. Niemiec,19 A. Priyana Noel,32
-P. O’Brien,39 S. Ohm,20 L. Olivera-Nieto,13 E. de Ona Wilhelmi,20 M. Ostrowski,32
-S. Panny,38 R.D. Parsons,36 S. Pita,22 V. Poireau,26 D.A. Prokhorov,40
-H. Prokoph,20 G. Pühlhofer,35 A. Quirrenbach,15 P. Reichherzer,6 A. Reimer,38
-O. Reimer,38 G. Rowell,41 B. Rudak,34 E. Ruiz-Velasco,13 V. Sahakian,42
-S. Sailer,13 H. Salzmann,35 D.A. Sanchez,26 A. Santangelo,35 M. Sasaki,28
-H.M. Schutte,1 U. Schwanke,36 J.N.S. Shapopi,18 A. Specovius,28
-S. Spencer,29 R. Steenkamp,18 S. Steinmassl,13 T. Takahashi,43 T. Tanaka,44
-C. Thorpe-Morgan,35 N. Tsuji,45 C. van Eldik,28 J. Veh,28
-J. Vink,40 S.J. Wagner,15 A. Wierzcholska,19 Yu Wun Wong,28 A. Yusafzai,28
-M. Zacharias,24,1 D. Zargaryan,12,14 A.A. Zdziarski,34 A. Zech,24 S.J. Zhu,20
-S. Zouari,22 N. Żywucka,1
-Accepted XXX. Received YYY; in original form ZZZ
-MNRAS 000, 1–15 (2021)
-arXiv:2201.00069v1 [astro-ph.HE] 31 Dec 2021
-MNRAS 000, 1–15 (2021) Preprint 4 January 2022 Compiled using MNRAS LATEX style file v3.0
-ABSTRACT
-We report on a search for persistent radio emission from the one-off Fast Radio Burst (FRB)
-20190714A, as well as from two repeating FRBs, 20190711A and 20171019A, using the
-MeerKAT radio telescope. For FRB 20171019A we also conducted simultaneous observations
-with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma rays and
-searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV
-flux upper limit of 1.39×10−16 erg cm−2
-s
-−1Å
-−1
-, X-ray limit of ∼ 6.6×10−14 erg cm−2
-s
-−1
-and
-a limit on the very-high-energy gamma-ray flux Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2
-s
-−1
-.
-We obtain a radio upper limit of ∼15𝜇Jy beam−1
-for persistent emission at the locations of both
-FRBs 20190711A and 20171019A, but detect diffuse radio emission with a peak brightness
-of ∼53𝜇Jy beam−1
-associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first
-detection of the radio continuum emission potentially associated with the host (galaxy) of FRB
-20190714A, and is only the third known FRB to have such an association. Given the possible
-association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB
-whose age lies between that of FRB 20121102A and FRB 20180916A. A parallel search for
-repeat bursts from these FRBs revealed no new detections down to a fluence of 0.08 Jy ms for
-a 1 ms duration burst.
-Key words: fast radio bursts – radio continuum: galaxies – radiation mechanisms: non-thermal
-1 INTRODUCTION
-Fast radio bursts (FRBs) are luminous transients that last for mi￾croseconds to milliseconds and occur at extragalactic to cosmo￾logical distances (e.g. Lorimer et al. 2007; Thornton et al. 2013;
-Macquart et al. 2020). The estimated high radio luminosities and
-associated brightness temperatures required to produce these short￾timescale energetic events at such distances are what makes them
-intriguing (Petroff et al. 2021;Caleb & Keane 2021). They have been
-observed to emit from ∼ 110 MHz − 8 GHz, though not yet across
-a wide and continuous frequency band due to the variable band￾limited spectra of the single pulses. Over 600 FRBs have been dis￾covered1 of which ∼ 20 have been seen to repeat, and it is presently
-uncertain whether they all do (Caleb et al. 2019; James et al. 2020).
-The extraordinary observed characteristics of the repeating and non￾repeating FRBs have led to various progenitor models with the bulk
-of them favouring neutron stars. Progenitor theories include binary
-neutron star mergers and collisions (Totani 2013; Yamasaki et al.
-2018), giant pulses from extragalactic pulsars (Cordes & Wasser￾man 2016; Popov & Pshirkov 2016), hyperflares and giant flares
-from magnetars (Popov & Postnov 2013; Popov et al. 2018), binary
-white dwarf mergers (Kashiyama et al. 2013), neutron star “comb￾ing" (Zhang 2018) and interactions of neutron stars with active
-galactic nuclei (Vieyro et al. 2017) (see Platts et al. (2019) for a list
-of potential progenitors). Some of these models predict radio after￾glows accompanying an FRB with timescales of days to years. Liu
-et al. (2016) propose that the merger of a Kerr-Newman black hole
-binary is one of the plausible central engines for FRBs and their
-afterglows. Dai et al. (2017), however, suggest that the persistent
-emission is due to an ultra-relativistic pulsar wind nebula sweeping
-up its ambient medium with FRBs repeatedly produced through one
-of several potential mechanisms. In the magnetar model by Margalit
-et al. (2019), FRBs produced by binary neutron star mergers and
-accretion induced collapse are expected to be accompanied by per￾sistent radio continuum emission on timescales of months to years.
-★ james.chibueze@nwu.ac.za
-† manisha.caleb@manchester.ac.uk
-1 https://www.wis-tns.org/
-The persistent emission is powered by the nebula of relativistic elec￾trons and magnetic fields inflated by the magnetar flares (Margalit
-et al. 2019). The existence of persistent emission associated with
-FRBs could provide vital clues to their origin. Moreover, potential
-candidates and models for FRB progenitors predict counterparts
-in the X-ray an TeV bands. For example, a model by Lyubarsky
-(2014) predicts millisecond outbursts of TeV emission accompany￾ing FRBs from magnetars. In 2020, FRB 20200428 was discovered
-for the first time from a galactic magnetar, SGR 1935+2154. Fur￾thermore, an X-ray counterpart to this FRB was deteced for the first
-time by several instruments (Tavani et al. 2021; Ridnaia et al. 2021;
-Mereghetti et al. 2020; Insight-HXMT 2020).
-Of the 19 FRBs that have been associated with host galax￾ies2
-, only the sub-arcsecond localisation of the repeating FRB
-20121102A to a host galaxy at a redshift of 𝑧 = 0.19273 ± 0.0008
-(Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physi￾cally associated with a compact (≤ 0.7 pc), persistent radio source
-of luminosity 𝜈𝐿𝜈 ∼ 1039 erg s−1
-at a few GHz (Marcote et al.
-2017). This source is detectable from 300 MHz – 26 GHz (Resmi
-et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% vari￾ability on day timescales. In contrast, a similar sub-milliarcsecond
-localisation of another repeating FRB 20180916B to a nearby mas￾sive spiral galaxy at 𝑧 = 0.0337 ± 0.0002 (Marcote et al. 2020)
-showed no associated persistent radio emission. This places a strong
-upper limit on the persistent source luminosity of 𝜈𝐿𝜈 . 7.6×1035
-erg s−1
-at 1.6 GHz, which is three orders of magnitude lower than
-that of FRB 20121102A. Recently, the CHIME/FRB collaboration
-announced heightened activity in the repeating FRB 20201124A
-(Chime/FRB Collaboration 2021), which was localised to a host
-galaxy at a redshift of 𝑧 = 0.0979 ± 0.0001 (Fong et al. 2021).
-Persistent radio emission was detected by the upgraded Giant Me￾trewave Radio Telescope (uGMRT) (Wharton et al. 2021) and the
-Karl G. Jansky Very Large Array (JVLA) (Ricci et al. 2021) on
-angular scales of a few arcseconds, but resolved out to scales of
-∼ 0.1 arcseconds with the European VLBI Network (Marcote et al.
-2021).
-Localisations of four one-off FRBs through imaging of
-2 https://frbhosts.org/
-© 2021 The Authors
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 3
-buffered raw voltage data at 1.4 GHz (Bannister et al. 2019;
-Prochaska et al. 2019; Macquart et al. 2020) by the Australian
-SKA Pathfinder (ASKAP) telescope did not yield persistent radio
-continuum emission from the host galaxies (Bhandari et al. 2020).
-Australian Telescope Compact Array (ATCA) observations of FRBs
-20180924B, 20181112A, 20190102C and 20190608B were con￾ducted at a centre frequency of 6.5 GHz. No persistent emission as
-luminous as the one associated with FRB 20121102A was detected
-for the ASKAP FRBs (Bhandari et al. 2020). While the true age of
-FRB 121102A is unknown, models based on polarization studies
-predict the age to be ∼ 6 − 17 years (Hilmarsson et al. 2021). It is
-possible that younger, more active FRBs like FRB 20121102A are
-associated with persistent radio emission while the emission might
-have faded over time for the older ones. The possibility of repeating
-FRBs not being so uncommon after all (Ravi 2019) along with the
-increasing arcsecond localisations suggests that we are entering an
-era where we can begin to look for evidence of multiple classes by
-studying FRB host galaxies and multi-wavelength counterparts.
-In this paper, we report on the search for persistent radio emis￾sion in the host galaxies of one apparent one-off source (FRB
-20190714A) and two repeating sources (FRBs 20171019A and
-20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case
-of the latter, we also conducted simultaneous observations with the
-High Energy Stereoscopic System (H.E.S.S.) in very high energy
-gamma rays. In addition, we searched for signals in the ultraviolet,
-optical, and X-ray bands. The paper is structured as follows. In Sec￾tion 2, we discuss our observations and data reduction; in Section 3,
-we discuss the single radio continuum detection and derived multi￾wavelength upper limits. Our discussion and conclusions follow in
-Section 4 and 5.
-2 OBSERVATIONS AND DATA REDUCTION
-2.1 MeerKAT observations
-The MeerKAT 64-parabolic-dish array (Jonas & MeerKAT Team
-2016; Mauch et al. 2020) is located in the Northern Karoo desert
-near Carnarvon, South Africa. Each “offset Gregorian" parabolic
-dish antenna has an effective diameter of 13.5 m. The inner core of
-the array contains 48 of the 64 dishes in a 1 km radius, while the
-remaining 16 dishes are spread outward up to 8 km. The shortest and
-longest baselines of the MeerKAT array are 29 m and 8 km, respec￾tively, providing angular scales of 500 to 270
-at the central frequency,
-of the L-band receiver used here, of 1283 MHz. Multi-epoch ob￾servations of the FRB fields were conducted with the MeerKAT
-array (Project ID: SCI-20190418-VC-01) at L-band (856 MHz to
-1712 MHz). Details of the MeerKAT observations are presented
-in Table 1. Only Stokes I (total intensity) of the MeerKAT ob￾servations are considered in this paper. The data correlation was
-done with the SKARAB correlator (Hickish et al. 2016) in 4k mode
-which gives 4096 channels across the 856 MHz bandwidth resulting
-in a frequency resolution of ∼209 kHz. The data were reduced us￾ing the semi-automated MeerKAT data analysis pipelines - 𝑜𝑥𝑘𝑎𝑡3
-(Heywood 2020).
-2.1.1 Imaging analysis
-The 𝑜𝑥𝑘𝑎𝑡 pipeline employs a collection of publicly available ra￾dio interferometry data reduction software. The final data prod￾3 https://ascl.net/code/v/2627
-ucts, including reduced and calibrated visibility data (including
-self-calibration), continuum (including sub-band) images as well
-as diagnostic plots, are provided by the pipeline. The customary
-configuration of the 𝑜𝑥𝑘𝑎𝑡 pipeline incorporates flagging, cross￾calibration and self-calibration processes. In the flagging process,
-the low-gain bandpass edges (856 MHz to 880 MHz and 1658 MHz
-to 1800 MHz) are flagged on all baselines, along with the location of
-the Galactic neutral hydrogen line at 1419.8 MHz to 1421.3 MHz.
-Several other radio frequency interference (RFI) prone regions of
-the spectrum are then flagged on baselines shorter than 600 m.
-Then, other possible RFI affected data are flagged out using the
-CASA routines rflag and tfcrop for the calibrators, and using the
-tricolour package for the target fields.
-The cross-calibration steps using 𝑜𝑥𝑘𝑎𝑡 were standard, includ￾ing setting the flux scale and deriving corrections for residual delay
-calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡 pipeline
-uses the customary tasks from the CASA (McMullin et al. 2007)
-suite for cross-calibration. After applying all the corrections to the
-target field, we channel-averaged the dataset by a factor of five chan￾nels before splitting out the science target. This is consistent with our
-science goals, since the relic sources we target are in the central parts
-of our fields, reducing the effect of smearing through the channel
-averaging. To deconvolve and image the target data, the WSClean
-imager (Offringa et al. 2014) was used, with the multiscale and
-wideband deconvolution algorithms enabled to better allow imag￾ing of the diffuse emission present in the our fields. Deconvolution
-was performed in ten sub-band images of each 82 MHz wide-band.
-WSClean generates the multi-frequency synthesis (MFS) map, in
-joined-channel deconvolution mode, with a central frequency of
-1283 MHz. In other words, the MFS map is a full bandwidth map.
-In WSClean, each of the sub-bands is deconvolved separately with
-an initially high mask of 20𝜎rms (using the auto masking function
-provided by WSClean), to generate an artefact-free model of the
-target field for the self-calibration process. This masking threshold
-was iteratively reduced to a value of 3𝜎rms in the final iteration
-of imaging. The 𝑜𝑥𝑘𝑎𝑡 pipeline uses the customary tasks from the
-Cubical software (Kenyon et al. 2018) for self-calibration.
-2.1.2 Single pulse searches
-In addition to obtaining correlated data, the output data stream of
-the F-engine are captured, delay corrected, phased and channelised
-before being sent over the central beamforming network to the beam￾forming User Supplied Equipment (FBFUSE) that was designed and
-developed at the Max Planck Institute for Radio Astronomy in Bonn.
-For this project, FBFUSE combined the data into 764 total-intensity
-tied-array beams which were used to populate the primary beam of
-∼ 1 deg2 of the array. The data are then captured at 306.24 μs time
-resolution by the Transient User Supplied Equipment (TUSE), a
-real-time transient detection backend instrument developed by the
-MeerTRAP4
-team at the University of Manchester. More details on
-TUSE will be presented in an upcoming paper (Stappers et al. in
-prep). The GPU-based single pulse search pipeline AstroAcceler￾ate5
-(Dimoudi & Armour 2015; Adámek & Armour 2016; Adámek
-et al. 2017; Dimoudi et al. 2018; Adámek & Armour 2019) was used
-to search for bursts in real-time after incoherently de-dispersing the
-data in the DM range 0–5118.4 pc cm−3
-(see Caleb et al. 2020, for
-more details).
-4 https://www.meertrap.org/
-5 https://github.com/AstroAccelerateOrg/astro-accelerate
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 1
+A MeerKAT, e-MERLIN, H.E.S.S. and Swift search for persistent
+and transient emission associated with three localised FRBs
+J. O. Chibueze,1,2★ M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4
+C. Venter,1I. Heywood,7,8,9 A. M. S. Richards,3 D. R. A. Williams,3 M. Kramer,3,5
+R. Beswick,3 M. C. Bezuidenhout,3 R. P. Breton,3 L. N. Driessen,3 F. Jankowski,3
+E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3, H. Qiu,11 K. Rajwade,3
+S. Sanidas,3 M. Surnis,3 T. W. Scragg,3 C. R. H. Walker,5and N. Wrigley,3
+H.E.S.S. Collaboration: F. Aharonian,12,13,14 F. Ait Benkhali,15 E.O. Angüner,16 M. Backes,18,1
+V. Baghmanyan,19 V. Barbosa Martins,20 R. Batzofin,21 Y. Becherini,22,23 D. Berge,20
+M. Böttcher,1 C. Boisson,24 J. Bolmont,25 M. de Bony de Lavergne,26 M. Breuhaus,13
+R. Brose,12 F. Brun,6 T. Bulik,27 F. Cangemi,25 S. Caroff,25 S. Casanova,19
+J. Catalano,28 M. Cerruti,22 T. Chand,1 A. Chen,21 O.U. Chibueze,1
+G. Cotter,29 P. Cristofari,24 J. Damascene Mbarubucyeye,20 J. Devin,30 A. Djannati-Ataï,22
+A. Dmytriiev,1 K. Egberts,31 J.-P. Ernenwein,16 A. Fiasson,26 G. Fichet de Clairfontaine,24
+G. Fontaine,17 S. Funk,28 S. Gabici,22 S. Ghafourizadeh,15 G. Giavitto,20
+D. Glawion,28 M.-H. Grondin,30 M. Hörbe,29 C. Hoischen,31 T. L. Holch,20
+Zhiqiu Huang,13 M. Jamrozy,32 F. Jankowsky,15 I. Jung-Richardt,28 E. Kasai,18
+K. Katarzyński,33 U. Katz,28 B. Khélifi,22 W. Kluźniak,34 Nu. Komin,21
+K. Kosack,6 D. Kostunin,20 A. Lemière,22 J.-P. Lenain,25 F. Leuschner,35
+T. Lohse,36 A. Luashvili,24 I. Lypova,15 J. Mackey,12 D. Malyshev,35
+V. Marandon,13 P. Marchegiani,21 A. Marcowith,37 G. Martí-Devesa,38 R. Marx,15
+A. Mitchell,28,13 R. Moderski,34 L. Mohrmann,13 E. Moulin,6J. Muller,17
+K. Nakashima,28 M. de Naurois,17 A. Nayerhoda,19 J. Niemiec,19 A. Priyana Noel,32
+P. O’Brien,39 S. Ohm,20 L. Olivera-Nieto,13 E. de Ona Wilhelmi,20 M. Ostrowski,32
+S. Panny,38 R.D. Parsons,36 S. Pita,22 V. Poireau,26 D.A. Prokhorov,40
+H. Prokoph,20 G. Pühlhofer,35 A. Quirrenbach,15 P. Reichherzer,6 A. Reimer,38
+O. Reimer,38 G. Rowell,41 B. Rudak,34 E. Ruiz-Velasco,13 V. Sahakian,42
+S. Sailer,13 H. Salzmann,35 D.A. Sanchez,26 A. Santangelo,35 M. Sasaki,28
+H.M. Schutte,1 U. Schwanke,36 J.N.S. Shapopi,18 A. Specovius,28
+S. Spencer,29 R. Steenkamp,18 S. Steinmassl,13 T. Takahashi,43 T. Tanaka,44
+C. Thorpe-Morgan,35 N. Tsuji,45 C. van Eldik,28 J. Veh,28
+J. Vink,40 S.J. Wagner,15 A. Wierzcholska,19 Yu Wun Wong,28 A. Yusafzai,28
+M. Zacharias,24,1 D. Zargaryan,12,14 A.A. Zdziarski,34 A. Zech,24 S.J. Zhu,20
+S. Zouari,22 N. Żywucka,1
+Accepted XXX. Received YYY; in original form ZZZ
 MNRAS 000, 1–15 (2021)
-4 Chibueze et al.
-2.2 e-MERLIN Observations
-To constrain the position of the persistent continuum emission
-associated with FRB 20190714A, we conducted L-band (centre
-frequency of 1.51 GHz) observations of the target with the en￾hanced Multi-Element Remote-Linked Interferometer Network, e￾MERLIN array in the United Kingdom (project code: CY10003)
-on 13 January, 2021 (see Section 3.1.2). Six antennas were used
-including the 75-m Lovell telescope and the target pointing cen￾tre was R.A. = 12ℎ15𝑚55𝑠
-.12, Dec. = −13◦01015.
-007. 1407+2827
-was used as the bandpass calibrator, 1331+3030 as the flux cal￾ibrator and 1216−1033 as the phase calibrator. The angular sep￾aration between the target and the phase calibrator is 2.47◦
-. The
-data reduction was done following standard e-MERLIN calibra￾tion procedures6 with additional flagging of bad visibilities fol￾lowed by imaging. We found two confusing sources in the field,
-at R.A. = 12ℎ15𝑚44𝑠
-.669, Dec. = −12◦57059.
-0056 and R.A. =
-12ℎ15𝑚37𝑠
-.216, Dec. = −13◦09033.
-0044 at 4.10
-and 9.40
-from the
-pointing centre, respectively. They had apparent flux densities of 4
-and 1.3 mJy without primary beam correction. We used these for
-self-calibration of the field and then subtracted them before final
-imaging. The final image synthesized beam is 0.
-0065 × 0.
-0015, posi￾tion angle 15◦
-elongated in the Declination direction due to the low
-target elevation from the UK.
-2.3 The Swift satellite: UVOT and XRT observations
-Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA
-space mission operating in soft-X-rays and optical/UV. Here we
-use data from the X-ray Telescope (XRT) (Burrows et al. 2005)
-which operates in the soft X-ray domain of 0.3 − 10 keV as well as
-data taken by the UV/Optical Telescope (UVOT) (Roming et al.
-2005) operating in the UV to optical domain (170 − 600 nm).
-During the FRB 20171019A multi-wavelength (MWL) observing
-campaign, two 2 ks target-of-opportunity (ToO) observations were
-performed with Swift from 2019-09-28 18:37:02 to 2019-09-28
-21:52:54 and 2019-10-18 18:03:00 to 2019-10-18 20:03:00 on the
-FRB 20171019A localisation region. Simultaneously with Swift￾XRT, five UVOT images were taken with the UVM2 filter (central
-wavelengh = 2246 Å) over the 2 epochs with a total exposure of 4 ks.
-The images are aspect-corrected and summed with the uvotimsum
-tool (HEASOFT 6.26). Observations were performed with Swift￾XRT in the standard Photon Counting observing mode (PC). The
-XRT PC data are processed with xrtpipeline (HEASOFT 6.26).
-A summed image is extracted with xselect.
-2.4 Very-high energy gamma-ray observations with H.E.S.S.
-Observations of FRB 20171019A were also obtained in the very￾high energy gamma-ray domain with the H.E.S.S. imaging atmo￾spheric Cherenkov telescope array, sensitive in the range between a
-few tens of GeVs and 100 TeV. H.E.S.S. is located on the Khomas
-Highland plateau of Namibia (23◦1601800 South, 16◦3000000 East),
-at an elevation of ∼1800 m above sea level. Observations took place
-contemporaneously to the first epoch of MeerKAT observations of
-FRB 20171019A described above. The data set was obtained with
-the H.E.S.S. phase II array, including the upgraded 12 m-diameter
-CT1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter
-6 https://github.com/e-merlin/eMERLIN_CASA_pipeline
-CT5 telescope (Bolmont et al. 2014). A standard data quality selec￾tion was applied to the data (Aharonian et al. 2006). The events have
-then been selected and their direction and energy reconstructed us￾ing a log-likelihood minimization comparing the recorded shower
-images of all triggered telescopes (requiring at least two telescopes
-to see the same gamma-ray event) to a semi-analytical model of air
-showers (de Naurois & Rolland 2009).
-We define a circular region-of-interest centered on the position
-of FRB 20171019A with a radius of 0.12◦
-, optimal for a point-like
-source of emission as expected from FRB 20171019A. The back￾ground level in this ON region was determined using the standard
-“ring background” technique (Berge et al. 2007) based on a radially
-symmetric ring around the source position. This technique allows us
-to derive the background level from the same field of view and as￾sures that the gamma-ray signal and background are estimated with
-the same acceptance and under the same observation conditions.
-3 RESULTS
-3.1 MeerKAT
-The theoretical thermal noise of the MeerKAT can be calculated as
-𝑆rms =
-1
-𝜂𝑐
-SEFD
-√︃
-𝑛pol × 𝑁(𝑁 − 1) × Δ𝜈 × 𝑡int
-. (1)
-The system equivalent flux density (SEFD) of MeerKAT at the
-1.28 GHz is 443 Jy and 𝜂𝑐 is the correlator efficiency. We used 𝑛pol
-= 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 =
-856 MHz bandwidth and 𝑡int = 21600 sec observing time for one
-epoch. This gives the theoretical rms of ∼ 2 𝜇Jy beam−1
-. The typical
-image rms obtained from our residual images is ∼ 5 𝜇Jy beam−1
-,
-which is 2.5 times the expected theoretical rms. The wideband MFS
-image does not allow primary beam correction procedure as this can
-only be done on the sub-band images with limited rms for detection
-of the sources. However, our sources are the phase centres of our
-fields and thus unaffected by the effect of the primary beam.
-Due to the lack of MeerKAT primary beam correction, we
-did not compare the flux densities of the discrete sources with
-their NRAO (National Radio Astronomy Observatory) VLA (Very
-Large Array) Sky Survey (NVSS) counterparts. However, Chibueze
-et al. (2021, submitted) confirmed that the overall flux densities
-obtained with MeerKAT and NVSS are in good agreement with
-each other within errors of ∼ 5%. We compared the astrometry of
-the discrete radio sources obtained with MeerKAT and NVSS in
-Figure 1. The position uncertainty of the MeerKAT ranges from
-0.
-002 (close to the centre of the primary beam) to a few arcseconds
-towards the edge of the primary beam. The scatter observed in
-Figure 1 is mostly due to the probability of the centroids of emission
-in the ∼4500 NVSS resolution being different from the centroids at
-MeerKAT’s resolution and partly due to higher position uncertainty
-of the fainter sources. Therefore, we conclude that our MeerKAT
-data are well calibrated and the flux density and astrometry are as
-accurate as the errors indicate.
-3.1.1 Looking for persistent continuum emission associated with
-the FRB fields
-Considering the results of the astrometric comparison with NVSS
-(see Figure 1), we considered potential associations of contin￾uum sources in the MeerKAT observations with the FRB loca￾MNRAS 000, 1–15 (2021)
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 5
-tion to sources within 500. Using this spatial coincidence criterion,
-we identified a persistent 1283 MHz continuum source near FRB
-20190714A, detected in both the 14 September 2019 and the 28
-September 2019 epoch. The peak of the MeerKAT radio emission
-is offset by ∼ 2
-00.1 from the peak of the 𝑖-band magnitude of the op￾tical galaxy identified in the Panoramic Survey Telescope and Rapid
-Response System (PanSTARRS, located at Haleakala Observatory)
-image (shown as contours in Figures 2 and 3). The MeerKAT ra￾dio source is offset by 1.
-0068 from the localisation region of FRB
-20190714 (cyan circle in Figures 2 and 3).
-3.1.2 e-MERLIN detection of compact emission towards
-FRB 20190714
-Compact persistent emission was detected in the 1.51 GHz e￾MERLIN image at R.A. = 12ℎ15𝑚55𝑠
-.116, Dec. = −13◦01014.
-0048
-at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncer￾tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa￾ration between phase-calibrator and target, and antenna position
-uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric
-uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively.
-The offset from the FRB position is negligible in R.A. and 1.2
-arcsec in Dec. The rms in this region (of full primary beam sen￾sitivity) is 20 𝜇Jy beam−1
-, making this a 4.3𝜎rms detection. It is
-∼1.5𝜎rms higher than that of the MeerKAT detection. Although the
-e-MERLIN flux scale nominal uncertainty is ∼5%, in these data it
-is possibly higher due to the low declination of the phase-reference
-source and to the strong RFI which were removed from the data
-but may have affected the linearity of the receiver response. The
-peak of the e-MERLIN radio emission is offset by ∼ 1.
-004 from the
-peak of the PanSTARRS 𝑖-band emission in Figures 2 and 3. The
-e-MERLIN radio source (shown by the cyan cross in Figures 2 and
-3) is offset by 0.
-0053 from the localised position of FRB 20190714.
-We estimate the probability of a chance alignment of a back￾ground persistent radio source and the host galaxy, following the
-procedure of Eftekhari et al. (2018). Instead of using the FRB lo￾calisation region, we use the area of the galaxy, which is taken as
-2
-00 × 2
-00, twice the half light radius from Heintz et al. (2020). Given
-the source has a flux density of ∼ 90𝜇Jy we estimate the chance
-alignment probability of 0.0008, which corresponds to 3.4𝜎. The
-flux density threshold, assuming 3𝜎, for an unresolved radio source
-is ∼ 15 𝜇Jy. If instead we consider the probability of detecting any
-radio source above our flux density threshold of 15𝜇Jy, the probabil￾ity of a chance alignment is, therefore, approximately 0.8%, making
-the statistical significance of our detection 2.6𝜎. This represents the
-first detection of radio continuum emission associated with the host
-(galaxy) of FRB 20190714A (see Figure 2 and 3).
-3.1.3 MeerKAT non-detections
-No continuum emission was detected near FRBs 20171019A and
-20190711A. As each of the images of these sources has an rms
-of ∼ 5 𝜇Jy beam−1
-, the 3𝜎 intensity upper limit of any emission
-associated with FRBs 20171019A and 20190711A will be ∼ 15 𝜇Jy
-beam−1
-(see Table 1).
-Candidate pulses above a signal-to-noise (S/N) of 10 from the
-single pulse search with MeerTRAP were visually inspected offline.
-No new FRBs or repeat bursts from the known FRBs were detected
-above a fluence threshold of 0.08 Jy ms assuming a 1 ms duration
-burst.
-3.2 Swift
-The UVOT summed image is presented in Figure 4. The UVOT
-field of view corresponds roughly to the uncertainty7 of the locali￾sation region of FRB 20171019A (RA = 7.50
-and DEC = 70
-). Using
-uvotdetect, we find 30 sources above the 5𝜎 level and within the
-FRB 20171019A uncertainty region. Using a 3 arcsec maximum
-separation, which is slightly larger than the UVOT PSF (Breeveld
-et al. 2010), these sources are cross-matched with known catalogue
-sources. We find that out of the 30 sources detected by UVOT, 28
-are spatially coincident with stars catalogued in the SDSS catalogue
-(DR12; Alam et al. 2015), and one source is coincident with a galaxy
-(AGN broadline SDSS ID: 1237652599570890948 at 𝑧 ∼ 0.156).
-This galaxy is also detected by the MeerKAT radio observations. We
-use the NASA/IPAC Extragalactic Database (NED)8
-to search for
-known galaxies in the FRB 20171019A uncertainty regions. We find
-multiple galaxies with unknown redshifts, therefore we cannot draw
-conclusions on the host galaxy from our observations. Using a 5000
-circular ON region centred on the position of FRB 20171019A and
-a 5000 OFF region that does not contain any of the detected sources,
-we run the uvotsource tool with a 5𝜎 background threshold and
-obtain a flux upper limit of 1.4 × 10−16 erg cm−2
-s
-−1Å
-−1 without
-applying a Calactic extinction correction.
-The XRT summed image is shown in Figure 5. At the edge
-of the field-of-view, we detect a source spatially coincident with
-the Wolf 1561 star. As we consider this source unrelated to the
-FRB, we use the online Swift-XRT data products generator (Evans
-et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3-
-10 keV range on the count rate of 0.001885 counts.s
-−1
-. Using
-WebPIMMS9
-(v4.11a) and assuming a weighted average 𝑁H = 5.12×
-1020 cm−2
-from the direction of the source estimated from the
-NASA’s HEASARC 10 online tools (HI4PI Collaboration et al.
-2016) and a power law model with a photon index = 2, this upper
-limit translates to an energy flux of 6.6×10−14 erg cm−2
-s
-−1
-(8.3×
-10−14 erg cm−2
-s
-−1 unabsorbed).
-3.3 H.E.S.S.
-No significant gamma-ray excess above the expected background
-is detected from the direction of FRB 20171019A, with 52 gamma
-candidate events from the source region and 524 background event.
-A second analysis using an independent event calibration and recon￾struction (Parsons & Hinton 2014) confirms this result. A search for
-variable emission on timescales ranging from milliseconds to sev￾eral minutes with tools provided in (Brun et al. 2020) does not reveal
-any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% confi￾dence level (C. L.) upper limits on the photon flux are derived using
-the method described by Rolke et al. (2005). The energy threshold
-of the data is highly dependent on the zenith angle of the observa￾tions. For these observations, the zenith angles range from 15 to 25
-deg, which leads to an energy threshold for the stacked data set of
-𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE)
-7 https://www.wis-tns.org/object/20171019a
-8 https://ned.ipac.caltech.edu; NED is funded by the National
-Aeronautics and Space Administration and operated by the California Insti￾tute of Technology
-9 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/
-w3pimms.pl
-10 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.
-pl
+arXiv:2201.00069v1 [astro-ph.HE] 31 Dec 202
+MNRAS 000, 1–15 (2021) Preprint 4 January 2022 Compiled using MNRAS LATEX style file v3.0
+ABSTRACT
+We report on a search for persistent radio emission from the one-off Fast Radio Burst (FRB)
+20190714A, as well as from two repeating FRBs, 20190711A and 20171019A, using the
+MeerKAT radio telescope. For FRB 20171019A we also conducted simultaneous observations
+with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma rays and
+searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV
+flux upper limit of 1.39×10−16 erg cm−2s
+−1Å−1
+, X-ray limit of ∼ 6.6×10−14 erg cm−2s
+−1
+and
+a limit on the very-high-energy gamma-ray flux Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2s
+−1
+.
+We obtain a radio upper limit of ∼15𝜇Jy beam−1for persistent emission at the locations of both
+FRBs 20190711A and 20171019A, but detect diffuse radio emission with a peak brightness
+of ∼53𝜇Jy beam−1associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first
+detection of the radio continuum emission potentially associated with the host (galaxy) of FRB
+20190714A, and is only the third known FRB to have such an association. Given the possible
+association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB
+whose age lies between that of FRB 20121102A and FRB 20180916A. A parallel search for
+repeat bursts from these FRBs revealed no new detections down to a fluence of 0.08 Jy ms for
+a 1 ms duration burst.
+Key words: fast radio bursts – radio continuum: galaxies – radiation mechanisms: non-thermal
+1 INTRODUCTION
+Fast radio bursts (FRBs) are luminous transients that last for microseconds to milliseconds and occur at extragalactic to cosmological distances (e.g. Lorimer et al. 2007; Thornton et al. 2013;
+Macquart et al. 2020). The estimated high radio luminosities and
+associated brightness temperatures required to produce these shorttimescale energetic events at such distances are what makes them
+intriguing (Petroff et al. 2021;Caleb & Keane 2021). They have been
+observed to emit from ∼ 110 MHz − 8 GHz, though not yet across
+a wide and continuous frequency band due to the variable bandlimited spectra of the single pulses. Over 600 FRBs have been discovered1 of which ∼ 20 have been seen to repeat, and it is presently
+uncertain whether they all do (Caleb et al. 2019; James et al. 2020).
+The extraordinary observed characteristics of the repeating and nonrepeating FRBs have led to various progenitor models with the bulk
+of them favouring neutron stars. Progenitor theories include binary
+neutron star mergers and collisions (Totani 2013; Yamasaki et al.
+2018), giant pulses from extragalactic pulsars (Cordes & Wasserman 2016; Popov & Pshirkov 2016), hyperflares and giant flares
+from magnetars (Popov & Postnov 2013; Popov et al. 2018), binary
+white dwarf mergers (Kashiyama et al. 2013), neutron star “combing" (Zhang 2018) and interactions of neutron stars with active
+galactic nuclei (Vieyro et al. 2017) (see Platts et al. (2019) for a list
+of potential progenitors). Some of these models predict radio afterglows accompanying an FRB with timescales of days to years. Liu
+et al. (2016) propose that the merger of a Kerr-Newman black hole
+binary is one of the plausible central engines for FRBs and their
+afterglows. Dai et al. (2017), however, suggest that the persistent
+emission is due to an ultra-relativistic pulsar wind nebula sweeping
+up its ambient medium with FRBs repeatedly produced through one
+of several potential mechanisms. In the magnetar model by Margalit
+et al. (2019), FRBs produced by binary neutron star mergers and
+accretion induced collapse are expected to be accompanied by persistent radio continuum emission on timescales of months to years.
+★ james.chibueze@nwu.ac.za
+† manisha.caleb@manchester.ac.uk
+1 https://www.wis-tns.org/
+The persistent emission is powered by the nebula of relativistic electrons and magnetic fields inflated by the magnetar flares (Margalit
+et al. 2019). The existence of persistent emission associated with
+FRBs could provide vital clues to their origin. Moreover, potential
+candidates and models for FRB progenitors predict counterparts
+in the X-ray an TeV bands. For example, a model by Lyubarsky
+(2014) predicts millisecond outbursts of TeV emission accompanying FRBs from magnetars. In 2020, FRB 20200428 was discovered
+for the first time from a galactic magnetar, SGR 1935+2154. Furthermore, an X-ray counterpart to this FRB was deteced for the first
+time by several instruments (Tavani et al. 2021; Ridnaia et al. 2021;
+Mereghetti et al. 2020; Insight-HXMT 2020).
+Of the 19 FRBs that have been associated with host galaxies2
+, only the sub-arcsecond localisation of the repeating FRB
+20121102A to a host galaxy at a redshift of 𝑧 = 0.19273 ± 0.0008
+(Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physically associated with a compact (≤ 0.7 pc), persistent radio source
+of luminosity 𝜈𝐿𝜈 ∼ 1039 erg s−1at a few GHz (Marcote et al.
+2017). This source is detectable from 300 MHz – 26 GHz (Resmi
+et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% variability on day timescales. In contrast, a similar sub-milliarcsecond
+localisation of another repeating FRB 20180916B to a nearby massive spiral galaxy at 𝑧 = 0.0337 ± 0.0002 (Marcote et al. 2020)
+showed no associated persistent radio emission. This places a strong
+upper limit on the persistent source luminosity of 𝜈𝐿𝜈 . 7.6×1035
+erg s−1at 1.6 GHz, which is three orders of magnitude lower than
+that of FRB 20121102A. Recently, the CHIME/FRB collaboration
+announced heightened activity in the repeating FRB 20201124A
+(Chime/FRB Collaboration 2021), which was localised to a host
+galaxy at a redshift of 𝑧 = 0.0979 ± 0.0001 (Fong et al. 2021).
+Persistent radio emission was detected by the upgraded Giant Metrewave Radio Telescope (uGMRT) (Wharton et al. 2021) and the
+Karl G. Jansky Very Large Array (JVLA) (Ricci et al. 2021) on
+angular scales of a few arcseconds, but resolved out to scales of
+∼ 0.1 arcseconds with the European VLBI Network (Marcote et al.
+2021).
+Localisations of four one-off FRBs through imaging of
+2 https://frbhosts.org/
+© 2021 The Authors
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 3
+buffered raw voltage data at 1.4 GHz (Bannister et al. 2019;
+Prochaska et al. 2019; Macquart et al. 2020) by the Australian
+SKA Pathfinder (ASKAP) telescope did not yield persistent radio
+continuum emission from the host galaxies (Bhandari et al. 2020).
+Australian Telescope Compact Array (ATCA) observations of FRBs
+20180924B, 20181112A, 20190102C and 20190608B were conducted at a centre frequency of 6.5 GHz. No persistent emission as
+luminous as the one associated with FRB 20121102A was detected
+for the ASKAP FRBs (Bhandari et al. 2020). While the true age of
+FRB 121102A is unknown, models based on polarization studies
+predict the age to be ∼ 6 − 17 years (Hilmarsson et al. 2021). It is
+possible that younger, more active FRBs like FRB 20121102A are
+associated with persistent radio emission while the emission might
+have faded over time for the older ones. The possibility of repeating
+FRBs not being so uncommon after all (Ravi 2019) along with the
+increasing arcsecond localisations suggests that we are entering an
+era where we can begin to look for evidence of multiple classes by
+studying FRB host galaxies and multi-wavelength counterparts.
+In this paper, we report on the search for persistent radio emission in the host galaxies of one apparent one-off source (FRB
+20190714A) and two repeating sources (FRBs 20171019A and
+20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case
+of the latter, we also conducted simultaneous observations with the
+High Energy Stereoscopic System (H.E.S.S.) in very high energy
+gamma rays. In addition, we searched for signals in the ultraviolet,
+optical, and X-ray bands. The paper is structured as follows. In Section 2, we discuss our observations and data reduction; in Section 3,
+we discuss the single radio continuum detection and derived multiwavelength upper limits. Our discussion and conclusions follow in
+Section 4 and 5.
+2 OBSERVATIONS AND DATA REDUCTION
+2.1 MeerKAT observations
+The MeerKAT 64-parabolic-dish array (Jonas & MeerKAT Team
+2016; Mauch et al. 2020) is located in the Northern Karoo desert
+near Carnarvon, South Africa. Each “offset Gregorian" parabolic
+dish antenna has an effective diameter of 13.5 m. The inner core of
+the array contains 48 of the 64 dishes in a 1 km radius, while the
+remaining 16 dishes are spread outward up to 8 km. The shortest and
+longest baselines of the MeerKAT array are 29 m and 8 km, respectively, providing angular scales of 500 to 270
+at the central frequency,
+of the L-band receiver used here, of 1283 MHz. Multi-epoch observations of the FRB fields were conducted with the MeerKAT
+array (Project ID: SCI-20190418-VC-01) at L-band (856 MHz to
+1712 MHz). Details of the MeerKAT observations are presented
+in Table 1. Only Stokes I (total intensity) of the MeerKAT observations are considered in this paper. The data correlation was
+done with the SKARAB correlator (Hickish et al. 2016) in 4k mode
+which gives 4096 channels across the 856 MHz bandwidth resulting
+in a frequency resolution of ∼209 kHz. The data were reduced using the semi-automated MeerKAT data analysis pipelines - 𝑜𝑥𝑘𝑎𝑡3
+(Heywood 2020).
+2.1.1 Imaging analysis
+The 𝑜𝑥𝑘𝑎𝑡 pipeline employs a collection of publicly available radio interferometry data reduction software. The final data prod3 https://ascl.net/code/v/2627
+ucts, including reduced and calibrated visibility data (including
+self-calibration), continuum (including sub-band) images as well
+as diagnostic plots, are provided by the pipeline. The customary
+configuration of the 𝑜𝑥𝑘𝑎𝑡 pipeline incorporates flagging, crosscalibration and self-calibration processes. In the flagging process,
+the low-gain bandpass edges (856 MHz to 880 MHz and 1658 MHz
+to 1800 MHz) are flagged on all baselines, along with the location of
+the Galactic neutral hydrogen line at 1419.8 MHz to 1421.3 MHz.
+Several other radio frequency interference (RFI) prone regions of
+the spectrum are then flagged on baselines shorter than 600 m.
+Then, other possible RFI affected data are flagged out using the
+CASA routines rflag and tfcrop for the calibrators, and using the
+tricolour package for the target fields.
+The cross-calibration steps using 𝑜𝑥𝑘𝑎𝑡 were standard, including setting the flux scale and deriving corrections for residual delay
+calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡 pipeline
+uses the customary tasks from the CASA (McMullin et al. 2007)
+suite for cross-calibration. After applying all the corrections to the
+target field, we channel-averaged the dataset by a factor of five channels before splitting out the science target. This is consistent with our
+science goals, since the relic sources we target are in the central parts
+of our fields, reducing the effect of smearing through the channel
+averaging. To deconvolve and image the target data, the WSClean
+imager (Offringa et al. 2014) was used, with the multiscale and
+wideband deconvolution algorithms enabled to better allow imaging of the diffuse emission present in the our fields. Deconvolution
+was performed in ten sub-band images of each 82 MHz wide-band.
+WSClean generates the multi-frequency synthesis (MFS) map, in
+joined-channel deconvolution mode, with a central frequency of
+1283 MHz. In other words, the MFS map is a full bandwidth map.
+In WSClean, each of the sub-bands is deconvolved separately with
+an initially high mask of 20𝜎rms (using the auto masking function
+provided by WSClean), to generate an artefact-free model of the
+target field for the self-calibration process. This masking threshold
+was iteratively reduced to a value of 3𝜎rms in the final iteration
+of imaging. The 𝑜𝑥𝑘𝑎𝑡 pipeline uses the customary tasks from the
+Cubical software (Kenyon et al. 2018) for self-calibration.
+2.1.2 Single pulse searches
+In addition to obtaining correlated data, the output data stream of
+the F-engine are captured, delay corrected, phased and channelised
+before being sent over the central beamforming network to the beamforming User Supplied Equipment (FBFUSE) that was designed and
+developed at the Max Planck Institute for Radio Astronomy in Bonn.
+For this project, FBFUSE combined the data into 764 total-intensity
+tied-array beams which were used to populate the primary beam of
+∼ 1 deg2 of the array. The data are then captured at 306.24 μs time
+resolution by the Transient User Supplied Equipment (TUSE), a
+real-time transient detection backend instrument developed by the
+MeerTRAP4team at the University of Manchester. More details on
+TUSE will be presented in an upcoming paper (Stappers et al. in
+prep). The GPU-based single pulse search pipeline AstroAccelerate5
+(Dimoudi & Armour 2015; Adámek & Armour 2016; Adámek
+et al. 2017; Dimoudi et al. 2018; Adámek & Armour 2019) was used
+to search for bursts in real-time after incoherently de-dispersing the
+data in the DM range 0–5118.4 pc cm−3(see Caleb et al. 2020, for
+more details).
+4 https://www.meertrap.org/
+5 https://github.com/AstroAccelerateOrg/astro-accelerate
 MNRAS 000, 1–15 (2021)
-6 Chibueze et al.
-Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the difference in position between the
-MeerKAT and NVSS sources.
-gamma-ray flux above that threshold and assuming an energy depen￾dence following 𝐸
-−2
-is Φ(𝐸 > 120 GeV) < 2.10 × 10−12 cm−2
-s
-−1
-or Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2
-s
-−1
-. A variation of
-± 0.5 of the assumed spectral index leads to a variation in the upper
-limit of less than ± 19%. A map of energy flux upper limits covering
-the full region accessible within the H.E.S.S. field of view above
-120 GeV is given in Figure 6.
-4 DISCUSSION
-Of the targeted FRB fields reported here, only FRB 20190714A
-is observed to be spatially coincident with a persistent radio con￾tinuum source. We obtain an upper limit of ∼ 15 𝜇Jy beam−1
-for
-FRBs 20190711A and 20171019A, respectively, and a peak inten￾sity of ∼ 53 𝜇Jy beam−1
-for the emission coincident with FRB
-20190714A. This source is detected at both epochs with similar
-intensities within the measured rms of the images (see Tables 1 and
-2 for details). The values in the Table 2 are derived by carrying
-out 2D Gaussian fit using similar ellipses enclosing the detected
-persistent emission. The average flux density is ∼ 3 times less than
-that of the persistent source associated with FRBs 20121102A, one
-of the most prolific repeaters, located at 𝑧 = 0.19273(8). Persistent
-radio emission from FRB 20201124A was detected by the uGMRT
-(Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular
-scales of a few arcseconds. However, it is resolved out at scales of
-∼ 0.1 arcseconds with the European VLBI Network (Marcote et al.
-2021) suggesting that it is not a compact source directly associated
-with the FRB. In contrast, the other localised, prolific repeating
-FRB 20180916A has no persistent radio counterpart.
-In the image in Figure 3 one can see that the persistent radio
-source lies at the edge of the optical extent of the host galaxy
-as seen in PanSTARRS observations (Heintz et al. 2020). Our
-derived 1283 MHz peak position with MeerKAT places it just
-1.
-0068 away from the position of FRB 20190714A (𝛼𝐽2000, 𝛿𝐽2000
-= 12ℎ15𝑚55𝑠
-.12, -13◦01015.
-0070; Heintz et al. 2020). The posi￾tional uncertainty on the FRB position is 0.
-00283. Similarly, the peak
-1.51 GHz e-MERLIN position of the persistent radio source is sepa￾rated from the position of FRB 20190714A by 0.
-0053. The persistent
-source near FRB 20190714A has a flux broadly consistent with the
-MeerKAT flux and is unresolved on the e-MERLIN baselines. The
+ Chibueze et al.
+2.2 e-MERLIN Observations
+To constrain the position of the persistent continuum emission
+associated with FRB 20190714A, we conducted L-band (centre
+frequency of 1.51 GHz) observations of the target with the enhanced Multi-Element Remote-Linked Interferometer Network, eMERLIN array in the United Kingdom (project code: CY10003)
+on 13 January, 2021 (see Section 3.1.2). Six antennas were used
+including the 75-m Lovell telescope and the target pointing centre was R.A. = 12ℎ15𝑚55𝑠
+.12, Dec. = −13◦01015.
+007. 1407+2827
+was used as the bandpass calibrator, 1331+3030 as the flux calibrator and 1216−1033 as the phase calibrator. The angular separation between the target and the phase calibrator is 2.47◦
+. The
+data reduction was done following standard e-MERLIN calibration procedures6 with additional flagging of bad visibilities followed by imaging. We found two confusing sources in the field,
+at R.A. = 12ℎ15𝑚44𝑠.669, Dec. = −12◦57059.
+0056 and R.A. =
+12ℎ15𝑚37𝑠.216, Dec. = −13◦09033.
+0044 at 4.10
+and 9.40from the
+pointing centre, respectively. They had apparent flux densities of 4
+and 1.3 mJy without primary beam correction. We used these for
+self-calibration of the field and then subtracted them before final
+imaging. The final image synthesized beam is 0.
+0065 × 0.0015, position angle 15◦
+elongated in the Declination direction due to the low
+target elevation from the UK.
+2.3 The Swift satellite: UVOT and XRT observations
+Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA
+space mission operating in soft-X-rays and optical/UV. Here we
+use data from the X-ray Telescope (XRT) (Burrows et al. 2005)
+which operates in the soft X-ray domain of 0.3 − 10 keV as well as
+data taken by the UV/Optical Telescope (UVOT) (Roming et al.
+2005) operating in the UV to optical domain (170 − 600 nm).
+During the FRB 20171019A multi-wavelength (MWL) observing
+campaign, two 2 ks target-of-opportunity (ToO) observations were
+performed with Swift from 2019-09-28 18:37:02 to 2019-09-28
+21:52:54 and 2019-10-18 18:03:00 to 2019-10-18 20:03:00 on the
+FRB 20171019A localisation region. Simultaneously with SwiftXRT, five UVOT images were taken with the UVM2 filter (central
+wavelengh = 2246 Å) over the 2 epochs with a total exposure of 4 ks.
+The images are aspect-corrected and summed with the uvotimsum
+tool (HEASOFT 6.26). Observations were performed with SwiftXRT in the standard Photon Counting observing mode (PC). The
+XRT PC data are processed with xrtpipeline (HEASOFT 6.26).
+A summed image is extracted with xselect.
+2.4 Very-high energy gamma-ray observations with H.E.S.S.
+Observations of FRB 20171019A were also obtained in the veryhigh energy gamma-ray domain with the H.E.S.S. imaging atmospheric Cherenkov telescope array, sensitive in the range between a
+few tens of GeVs and 100 TeV. H.E.S.S. is located on the Khomas
+Highland plateau of Namibia (23◦1601800 South, 16◦3000000 East),
+at an elevation of ∼1800 m above sea level. Observations took place
+contemporaneously to the first epoch of MeerKAT observations of
+FRB 20171019A described above. The data set was obtained with
+the H.E.S.S. phase II array, including the upgraded 12 m-diameter
+CT1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter
+6 https://github.com/e-merlin/eMERLIN_CASA_pipeline
+CT5 telescope (Bolmont et al. 2014). A standard data quality selection was applied to the data (Aharonian et al. 2006). The events have
+then been selected and their direction and energy reconstructed using a log-likelihood minimization comparing the recorded shower
+images of all triggered telescopes (requiring at least two telescopes
+to see the same gamma-ray event) to a semi-analytical model of air
+showers (de Naurois & Rolland 2009).
+We define a circular region-of-interest centered on the position
+of FRB 20171019A with a radius of 0.12◦, optimal for a point-like
+source of emission as expected from FRB 20171019A. The background level in this ON region was determined using the standard
+“ring background” technique (Berge et al. 2007) based on a radially
+symmetric ring around the source position. This technique allows us
+to derive the background level from the same field of view and assures that the gamma-ray signal and background are estimated with
+the same acceptance and under the same observation conditions.
+3 RESULTS
+3.1 MeerKAT
+The theoretical thermal noise of the MeerKAT can be calculated as
+𝑆rms =
+1
+𝜂𝑐
+SEFD
+√︃
+𝑛pol × 𝑁(𝑁 − 1) × Δ𝜈 × 𝑡int
+. (1)
+The system equivalent flux density (SEFD) of MeerKAT at the
+1.28 GHz is 443 Jy and 𝜂𝑐 is the correlator efficiency. We used 𝑛pol
+= 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 =
+856 MHz bandwidth and 𝑡int = 21600 sec observing time for one
+epoch. This gives the theoretical rms of ∼ 2 𝜇Jy beam−1. The typical
+image rms obtained from our residual images is ∼ 5 𝜇Jy beam−1,
+which is 2.5 times the expected theoretical rms. The wideband MFS
+image does not allow primary beam correction procedure as this can
+only be done on the sub-band images with limited rms for detection
+of the sources. However, our sources are the phase centres of our
+fields and thus unaffected by the effect of the primary beam.
+Due to the lack of MeerKAT primary beam correction, we
+did not compare the flux densities of the discrete sources with
+their NRAO (National Radio Astronomy Observatory) VLA (Very
+Large Array) Sky Survey (NVSS) counterparts. However, Chibueze
+et al. (2021, submitted) confirmed that the overall flux densities
+obtained with MeerKAT and NVSS are in good agreement with
+each other within errors of ∼ 5%. We compared the astrometry of
+the discrete radio sources obtained with MeerKAT and NVSS in
+Figure 1. The position uncertainty of the MeerKAT ranges from
+0.
+002 (close to the centre of the primary beam) to a few arcseconds
+towards the edge of the primary beam. The scatter observed in
+Figure 1 is mostly due to the probability of the centroids of emission
+in the ∼4500 NVSS resolution being different from the centroids at
+MeerKAT’s resolution and partly due to higher position uncertainty
+of the fainter sources. Therefore, we conclude that our MeerKAT
+data are well calibrated and the flux density and astrometry are as
+accurate as the errors indicate.
+3.1.1 Looking for persistent continuum emission associated with
+the FRB fields
+Considering the results of the astrometric comparison with NVSS
+(see Figure 1), we considered potential associations of continuum sources in the MeerKAT observations with the FRB locaMNRAS 000, 1–15 (2021)
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 5
+tion to sources within 500. Using this spatial coincidence criterion,
+we identified a persistent 1283 MHz continuum source near FRB
+20190714A, detected in both the 14 September 2019 and the 28
+September 2019 epoch. The peak of the MeerKAT radio emission
+is offset by ∼ 2
+00.1 from the peak of the 𝑖-band magnitude of the optical galaxy identified in the Panoramic Survey Telescope and Rapid
+Response System (PanSTARRS, located at Haleakala Observatory)
+image (shown as contours in Figures 2 and 3). The MeerKAT radio source is offset by 1.
+0068 from the localisation region of FRB
+20190714 (cyan circle in Figures 2 and 3).
+3.1.2 e-MERLIN detection of compact emission towards
+FRB 20190714
+Compact persistent emission was detected in the 1.51 GHz eMERLIN image at R.A. = 12ℎ15𝑚55𝑠
+.116, Dec. = −13◦01014.
+0048
+at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncertainty is (0.04, 0.15) arcsec and the uncertainty (due to the separation between phase-calibrator and target, and antenna position
+uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric
+uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively.
+The offset from the FRB position is negligible in R.A. and 1.2
+arcsec in Dec. The rms in this region (of full primary beam sensitivity) is 20 𝜇Jy beam−1
+, making this a 4.3𝜎rms detection. It is
+∼1.5𝜎rms higher than that of the MeerKAT detection. Although the
+e-MERLIN flux scale nominal uncertainty is ∼5%, in these data it
+is possibly higher due to the low declination of the phase-reference
+source and to the strong RFI which were removed from the data
+but may have affected the linearity of the receiver response. The
+peak of the e-MERLIN radio emission is offset by ∼ 1.
+004 from the
+peak of the PanSTARRS 𝑖-band emission in Figures 2 and 3. The
+e-MERLIN radio source (shown by the cyan cross in Figures 2 and
+3) is offset by 0.
+0053 from the localised position of FRB 20190714.
+We estimate the probability of a chance alignment of a background persistent radio source and the host galaxy, following the
+procedure of Eftekhari et al. (2018). Instead of using the FRB localisation region, we use the area of the galaxy, which is taken as
+2
+00 × 200, twice the half light radius from Heintz et al. (2020). Given
+the source has a flux density of ∼ 90𝜇Jy we estimate the chance
+alignment probability of 0.0008, which corresponds to 3.4𝜎. The
+flux density threshold, assuming 3𝜎, for an unresolved radio source
+is ∼ 15 𝜇Jy. If instead we consider the probability of detecting any
+radio source above our flux density threshold of 15𝜇Jy, the probability of a chance alignment is, therefore, approximately 0.8%, making
+the statistical significance of our detection 2.6𝜎. This represents the
+first detection of radio continuum emission associated with the host
+(galaxy) of FRB 20190714A (see Figure 2 and 3).
+3.1.3 MeerKAT non-detections
+No continuum emission was detected near FRBs 20171019A and
+20190711A. As each of the images of these sources has an rms
+of ∼ 5 𝜇Jy beam−1, the 3𝜎 intensity upper limit of any emission
+associated with FRBs 20171019A and 20190711A will be ∼ 15 𝜇Jy
+beam−1(see Table 1).
+Candidate pulses above a signal-to-noise (S/N) of 10 from the
+single pulse search with MeerTRAP were visually inspected offline.
+No new FRBs or repeat bursts from the known FRBs were detected
+above a fluence threshold of 0.08 Jy ms assuming a 1 ms duration
+burst.
+3.2 Swift
+The UVOT summed image is presented in Figure 4. The UVOT
+field of view corresponds roughly to the uncertainty7 of the localisation region of FRB 20171019A (RA = 7.50
+and DEC = 70). Using
+uvotdetect, we find 30 sources above the 5𝜎 level and within the
+FRB 20171019A uncertainty region. Using a 3 arcsec maximum
+separation, which is slightly larger than the UVOT PSF (Breeveld
+et al. 2010), these sources are cross-matched with known catalogue
+sources. We find that out of the 30 sources detected by UVOT, 28
+are spatially coincident with stars catalogued in the SDSS catalogue
+(DR12; Alam et al. 2015), and one source is coincident with a galaxy
+(AGN broadline SDSS ID: 1237652599570890948 at 𝑧 ∼ 0.156).
+This galaxy is also detected by the MeerKAT radio observations. We
+use the NASA/IPAC Extragalactic Database (NED)8to search for
+known galaxies in the FRB 20171019A uncertainty regions. We find
+multiple galaxies with unknown redshifts, therefore we cannot draw
+conclusions on the host galaxy from our observations. Using a 5000
+circular ON region centred on the position of FRB 20171019A and
+a 5000 OFF region that does not contain any of the detected sources,
+we run the uvotsource tool with a 5𝜎 background threshold and
+obtain a flux upper limit of 1.4 × 10−16 erg cm−2s
+−1Å−1 without
+applying a Calactic extinction correction.
+The XRT summed image is shown in Figure 5. At the edge
+of the field-of-view, we detect a source spatially coincident with
+the Wolf 1561 star. As we consider this source unrelated to the
+FRB, we use the online Swift-XRT data products generator (Evans
+et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.310
+ keV range on the count rate of 0.001885 counts.s
+−1
+. Using
+WebPIMMS9(v4.11a) and assuming a weighted average 𝑁H = 5.12×
+1020 cm−2from the direction of the source estimated from the
+NASA’s HEASARC 10 online tools (HI4PI Collaboration et al.
+2016) and a power law model with a photon index = 2, this upper
+limit translates to an energy flux of 6.6×10−14 erg cm−2s
+−1
+(8.3×
+10−14 erg cm−2s
+−1 unabsorbed).
+3.3 H.E.S.S.
+No significant gamma-ray excess above the expected background
+is detected from the direction of FRB 20171019A, with 52 gamma
+candidate events from the source region and 524 background event.
+A second analysis using an independent event calibration and reconstruction (Parsons & Hinton 2014) confirms this result. A search for
+variable emission on timescales ranging from milliseconds to several minutes with tools provided in (Brun et al. 2020) does not reveal
+any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% confidence level (C. L.) upper limits on the photon flux are derived using
+the method described by Rolke et al. (2005). The energy threshold
+of the data is highly dependent on the zenith angle of the observations. For these observations, the zenith angles range from 15 to 25
+deg, which leads to an energy threshold for the stacked data set of
+𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE)
+7 https://www.wis-tns.org/object/20171019a
+8 https://ned.ipac.caltech.edu; NED is funded by the National
+Aeronautics and Space Administration and operated by the California Institute of Technology
+9 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/
+w3pimms.pl
+10 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.
+pl
 MNRAS 000, 1–15 (2021)
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 7
-Figure 2. FRB 20190714A MeerKAT epoch I image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours
-(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The
-white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our
-e-MERLIN observations.
+ Chibueze et al.
+Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the difference in position between the
+MeerKAT and NVSS sources.
+gamma-ray flux above that threshold and assuming an energy dependence following 𝐸
+−2
+is Φ(𝐸 > 120 GeV) < 2.10 × 10−12 cm−2s
+−1
+or Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2
+s
+−1
+. A variation of
+± 0.5 of the assumed spectral index leads to a variation in the upper
+limit of less than ± 19%. A map of energy flux upper limits covering
+the full region accessible within the H.E.S.S. field of view above
+120 GeV is given in Figure 6.
+4 DISCUSSION
+Of the targeted FRB fields reported here, only FRB 20190714A
+is observed to be spatially coincident with a persistent radio continuum source. We obtain an upper limit of ∼ 15 𝜇Jy beam−1
+for
+FRBs 20190711A and 20171019A, respectively, and a peak intensity of ∼ 53 𝜇Jy beam−1
+for the emission coincident with FRB
+20190714A. This source is detected at both epochs with similar
+intensities within the measured rms of the images (see Tables 1 and
+2 for details). The values in the Table 2 are derived by carrying
+out 2D Gaussian fit using similar ellipses enclosing the detected
+persistent emission. The average flux density is ∼ 3 times less than
+that of the persistent source associated with FRBs 20121102A, one
+of the most prolific repeaters, located at 𝑧 = 0.19273(8). Persistent
+radio emission from FRB 20201124A was detected by the uGMRT
+(Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular
+scales of a few arcseconds. However, it is resolved out at scales of
+∼ 0.1 arcseconds with the European VLBI Network (Marcote et al.
+2021) suggesting that it is not a compact source directly associated
+with the FRB. In contrast, the other localised, prolific repeating
+FRB 20180916A has no persistent radio counterpart.
+In the image in Figure 3 one can see that the persistent radio
+source lies at the edge of the optical extent of the host galaxy
+as seen in PanSTARRS observations (Heintz et al. 2020). Our
+derived 1283 MHz peak position with MeerKAT places it just
+1.
+0068 away from the position of FRB 20190714A (𝛼𝐽2000, 𝛿𝐽2000
+= 12ℎ15𝑚55𝑠.12, -13◦01015.
+0070; Heintz et al. 2020). The positional uncertainty on the FRB position is 0.
+00283. Similarly, the peak
+1.51 GHz e-MERLIN position of the persistent radio source is separated from the position of FRB 20190714A by 0.
+0053. The persistent
+source near FRB 20190714A has a flux broadly consistent with the
+MeerKAT flux and is unresolved on the e-MERLIN baselines. The
 MNRAS 000, 1–15 (2021)
-8 Chibueze et al.
-Figure 3. FRB 20190714A MeerKAT epoch II image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours
-(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The
-white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our
-e-MERLIN observations.
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 7
+Figure 2. FRB 20190714A MeerKAT epoch I image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours
+(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The
+white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our
+e-MERLIN observations.
 MNRAS 000, 1–15 (2021)
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 9
-Figure 4. UVOT summed image of FRB 20171019A region taken during the MWL observation campaign in September-October 2019. The white circles
-indicate sources detected above 5𝜎. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper
-limits while the magenta region indicates the background region used. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar
-et al. (2019).
-Table 1. Details of the FRB fields observed with MeerKAT.
-Field name Observation date Synthesized beam rms (𝜇Jy beam−1
-) Detected?
-FRB 20171019A 28 September 2019 – No (calibration failure)
-FRB 20171019A 18 October 2019 6.
-008 × 5.
-000 5.2 < 15𝜇Jy beam−1
-FRB 20190711A 23 August 2019 11.
-007 × 4.
-009 4.9 < 15𝜇Jy beam−1
-FRB 20190711A 09 September 2019 12.
-005 × 4.
-009 4.6 < 15𝜇Jy beam−1
-FRB 20190714A 14 September 2019 7.
-001 × 6.
-002 4.2 54.4 𝜇Jy beam−1
-FRB 20190714A 28 September 2019 6.
-005 × 5.
-001 5.8 52.0 𝜇Jy beam−1
-Table 2. Details of the radio continuum source associated with FRB 20190714A.
-Field name Observation date Telescope 𝜈centre (GHz) 𝛼J2000 𝛿J2000 Maj. × min. axis Pos. angle Int. flux density
-FRB 20190714A 28 September 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠
-.154 -13◦01017.
-0030 9.
-006 × 7.
-004 88.7◦ 87.4 𝜇Jy
-FRB 20190714A 18 October 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠
-.193 −13◦01017.
-0018 8.
-002 × 6.
-004 12.2◦ 80.7 𝜇Jy
-FRB 20190714A 13 January 2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠
-.116 −13◦01014.
-0051 0.
-0015 × 0.
-0065 17.6◦ 107.5 𝜇Jy
-large offset from the centre of the galaxy makes the persistent source
-unlikely to be an AGN. So far this FRB has not been seen to repeat.
-Higher resolution imaging will be required to be certain of a direct
-association of the persistent source with the FRB. We did not have
-sufficient sensitivity in the sub-band images, thus, we are unable to
-derive the spectral index of the emission of the host galaxy.
-Our e-MERLIN observations probe a different spatial
-scale than the size of the persistent radio source associated
-with FRB 20121102A. At the angular diameter distance of
+ Chibueze et al.
+Figure 3. FRB 20190714A MeerKAT epoch II image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours
+(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The
+white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our
+e-MERLIN observations.
 MNRAS 000, 1–15 (2021)
-10 Chibueze et al.
-Figure 5. XRT summed image of FRB 20171019A region taken during the MWL observation campaign in September - October 2019. The position of the
-Wolf 1561 star is shown in cyan and is labelled. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar et al. (2019).
-FRB 20190714A (780 Mpc), an unresolved source with an an￾gular size of 0.
-006 corresponds to a physical extent of .2.3 kpc. The
-uGMRT reported the detection of an unresolved radio emission at
-650 MHz with a flux density of 700±100 𝜇Jy (Wharton et al. 2021),
-while the JVLA detected persistent emission with a flux density of
-340 ± 30 𝜇Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated
-spectral index between these frequencies (∼ −0.5, Ricci et al. 2021),
-the 1.3 GHz flux density would be ∼ 500 𝜇Jy (similar to the 3-𝜎
-upper limit on observations from 1 − 2 GHz; Law et al. 2021). The
-flux density we measured for FRB 20190714A is a factor of ∼10
-lower than FRB20201124A, but FRB 20190714A is also a factor
-2.6 more distant. Therefore, the flux densities would be comparable
-if they were at similar distances.
-Given the resolution of MeerKAT we are unable to defini￾tively state whether the persistent emission is associated with a
-star-forming region or the FRB itself. However, the increased reso￾lution with the e-MERLIN baselines would tend to favour a compact
-source similar to the one observed in FRB 20121102A. One of the
-leading models to explain the bursts from, and radio counterpart
-to FRB 20121102A, is a young nebula powered flaring magnetar
-embedded in a 20–50 year-old supernova remnant (Beloborodov
-2017; Metzger et al. 2019). The lack of a bright persistent radio
-source associated with the repeater FRB 20180916A suggests that
-it is comparatively older at & 200 − 500 years and the persistent
-radio source may have faded. In the model by Metzger et al. (2019),
-the nebula is suggested to contribute significantly to the rotation
-measure and dispersion measure (DM), as well as to the persis￾tent radio luminosity. These values are expected to decrease on a
-timescale of a few decades to centuries. Given the association of a
-comparatively fainter persistent source, FRB 20190714A may po￾tentially be a repeating FRB whose age lies between that of FRB
-20121102A and FRB 20180916A. Millisecond magnetars formed
-through standard astrophysical channels such as hydrogen poor su￾perluminous supernovae and long duration gamma-ray bursts are
-consistent with the progenitors of FRBs expected in low-metallicity
-dwarf galaxies with high specific star-formation rate such as for
-FRB 20121102A. However, Margalit et al. (2019) note that it is also
-possible to form such sources through a variety of channels, includ￾ing binary neutron star mergers and accretion induced collapse of
-white dwarfs in environments and host galaxy demographics differ￾ent to FRB 20121102A. Such suggestions are consistent with recent
-localisations (e.g. Heintz et al. 2020).
-The X-ray and VHE observations with Swift and H.E.S.S.
-allows us to probe non-thermal persistent emission associated to
-the FRB host galaxy or its source. Recently, H.E.S.S. observed
-SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic
-magnetar linked to a repeating FRB and its first X-ray counterpart.
-Magnetar X-ray flares could in fact be non-thermal in nature (Li et al.
-2021) indicating the presence of particle acceleration that could po￾tentially reach the VHE domain. The inverse Compton process is a
-primary candidate for the production of VHE non-thermal emission.
-H.E.S.S. observations did not lead to a detection of a persistent or a
-transient source associated to FRB 20171019A. We found no X-ray
-counterparts and thus derived the upper limits to constrain these
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 9
+Figure 4. UVOT summed image of FRB 20171019A region taken during the MWL observation campaign in September-October 2019. The white circles
+indicate sources detected above 5𝜎. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper
+limits while the magenta region indicates the background region used. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar
+et al. (2019).
+Table 1. Details of the FRB fields observed with MeerKAT.
+Field name Observation date Synthesized beam rms (𝜇Jy beam−1) Detected?
+FRB 20171019A 28 September 2019 – No (calibration failure)
+FRB 20171019A 18 October 2019 6.
+008 × 5.000 5.2 < 15𝜇Jy beam−1
+FRB 20190711A 23 August 2019 11.
+007 × 4.009 4.9 < 15𝜇Jy beam−1
+FRB 20190711A 09 September 2019 12.
+005 × 4.009 4.6 < 15𝜇Jy beam−1
+FRB 20190714A 14 September 2019 7.
+001 × 6.002 4.2 54.4 𝜇Jy beam−1
+FRB 20190714A 28 September 2019 6.
+005 × 5.001 5.8 52.0 𝜇Jy beam−1
+Table 2. Details of the radio continuum source associated with FRB 20190714A.
+Field name Observation date Telescope 𝜈centre (GHz) 𝛼J2000 𝛿J2000 Maj. × min. axis Pos. angle Int. flux density
+FRB 20190714A 28 September 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.154 -13◦01017.
+0030 9.006 × 7.004 88.7◦ 87.4 𝜇Jy
+FRB 20190714A 18 October 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.193 −13◦01017.
+0018 8.002 × 6.004 12.2◦ 80.7 𝜇Jy
+FRB 20190714A 13 January 2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠.116 −13◦01014.
+0051 0.0015 × 0.0065 17.6◦ 107.5 𝜇Jy
+large offset from the centre of the galaxy makes the persistent source
+unlikely to be an AGN. So far this FRB has not been seen to repeat.
+Higher resolution imaging will be required to be certain of a direct
+association of the persistent source with the FRB. We did not have
+sufficient sensitivity in the sub-band images, thus, we are unable to
+derive the spectral index of the emission of the host galaxy.
+Our e-MERLIN observations probe a different spatial
+scale than the size of the persistent radio source associated
+with FRB 20121102A. At the angular diameter distance of
 MNRAS 000, 1–15 (2021)
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 11
-Figure 6. Map of upper limits on the VHE gamma-ray energy flux derived from the H.E.S.S. observations. The limits are valid above 120 GeV and assume
-a photon flux distribution following an 𝐸
-−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al.
-(2019).The oversampling radius is 0.1◦
-.
-emissions. In the case of existence of X-ray non-thermal outbursts,
-the lack of VHE detection could indicate that inverse Compton is
-weak in the vicinity of the magnetars or that the VHE gamma-ray
-emission is quenched. This latter scenario could be explained by the
-fact that inverse Compton is taking place too close to the magne￾tar’s surface, where pair production and photon splitting could be
-responsible for significant energy losses (Hu et al. 2019), preventing
-energetic particles and photons to reach the nebula.
-No persistent emissions were detected towards FRB
-20190711A and FRB 20171019A in our MeerKAT observations
-(see Figures 7, 8, and 9), therefore no follow up observations were
-conducted towards those FRBs.
-5 CONCLUSIONS
-Several FRB models envision persistent emission to be associated
-with these sources. In this paper, we conducted radio observations
-of three FRBs (FRB 20190714A, 20190711A and 20171019A),
-and also a multi-wavelength campaign on one of these (FRB
-20171019A).
-We detected persistent compact radio emission associated with
-FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and e￾MERLIN radio telescope. This represents the first detection of the
-radio continuum emission associated with the host (galaxy) of FRB
-20190714A and is only the third known FRB to have such an as￾sociation. We furthermore obtained a radio upper limit of∼ 15𝜇Jy
-beam−1
-for the repeating FRBs 20190711A and 20171019A.
-We also performed UV, X-ray and VHE observations with the
-Swift and H.E.S.S. instruments and obtained upper limits in the three
-domains constraining the MWL emissions from FRB 20171019A.
-The search for FRB MWL counterparts is ongoing within the
-H.E.S.S. collaboration and more results will be published in fu￾ture works.
-Given the association of a comparatively fainter persistent
-source, FRB 20190714A may potentially be a repeating FRB whose
-age lies between that of FRB 20121102A and FRB 20180916A.
-ACKNOWLEDGEMENTS
-This paper makes use of the MeerKAT data (Project ID: SCI￾20190418-VC-01). The MeerKAT telescope is operated by the
-South African Radio Astronomy Observatory, which is a facility
-of the National Research Foundation, an agency of the Depart￾ment of Science and Innovation (DSI). This work made use of the
-Inter-University Institute for Data Intensive Astronomy (IDIA) vi￾sualization lab https://vislab.idia.ac.za. IDIA is a partnership of the
-University of Cape Town, the University of Pretoria, the University
-of the Western Cape and the South African Radio astronomy Obser￾vatory. e-MERLIN is a National Facility operated by the University
-of Manchester at Jodrell Bank Observatory on behalf of STFC.
-The authors acknowledge funding from the European Research
-Council (ERC) under the European Union’s Horizon 2020 research
-and innovation programme (grant agreement No 694745). The sup￾port of the Namibian authorities and of the University of Namibia
-in facilitating the construction and operation of H.E.S.S. is grate￾fully acknowledged, as is the support by the German Ministry for
+ Chibueze et al.
+Figure 5. XRT summed image of FRB 20171019A region taken during the MWL observation campaign in September - October 2019. The position of the
+Wolf 1561 star is shown in cyan and is labelled. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar et al. (2019).
+FRB 20190714A (780 Mpc), an unresolved source with an angular size of 0.
+006 corresponds to a physical extent of .2.3 kpc. The
+uGMRT reported the detection of an unresolved radio emission at
+650 MHz with a flux density of 700±100 𝜇Jy (Wharton et al. 2021),
+while the JVLA detected persistent emission with a flux density of
+340 ± 30 𝜇Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated
+spectral index between these frequencies (∼ −0.5, Ricci et al. 2021),
+the 1.3 GHz flux density would be ∼ 500 𝜇Jy (similar to the 3-𝜎
+upper limit on observations from 1 − 2 GHz; Law et al. 2021). The
+flux density we measured for FRB 20190714A is a factor of ∼10
+lower than FRB20201124A, but FRB 20190714A is also a factor
+2.6 more distant. Therefore, the flux densities would be comparable
+if they were at similar distances.
+Given the resolution of MeerKAT we are unable to definitively state whether the persistent emission is associated with a
+star-forming region or the FRB itself. However, the increased resolution with the e-MERLIN baselines would tend to favour a compact
+source similar to the one observed in FRB 20121102A. One of the
+leading models to explain the bursts from, and radio counterpart
+to FRB 20121102A, is a young nebula powered flaring magnetar
+embedded in a 20–50 year-old supernova remnant (Beloborodov
+2017; Metzger et al. 2019). The lack of a bright persistent radio
+source associated with the repeater FRB 20180916A suggests that
+it is comparatively older at & 200 − 500 years and the persistent
+radio source may have faded. In the model by Metzger et al. (2019),
+the nebula is suggested to contribute significantly to the rotation
+measure and dispersion measure (DM), as well as to the persistent radio luminosity. These values are expected to decrease on a
+timescale of a few decades to centuries. Given the association of a
+comparatively fainter persistent source, FRB 20190714A may potentially be a repeating FRB whose age lies between that of FRB
+20121102A and FRB 20180916A. Millisecond magnetars formed
+through standard astrophysical channels such as hydrogen poor superluminous supernovae and long duration gamma-ray bursts are
+consistent with the progenitors of FRBs expected in low-metallicity
+dwarf galaxies with high specific star-formation rate such as for
+FRB 20121102A. However, Margalit et al. (2019) note that it is also
+possible to form such sources through a variety of channels, including binary neutron star mergers and accretion induced collapse of
+white dwarfs in environments and host galaxy demographics different to FRB 20121102A. Such suggestions are consistent with recent
+localisations (e.g. Heintz et al. 2020).
+The X-ray and VHE observations with Swift and H.E.S.S.
+allows us to probe non-thermal persistent emission associated to
+the FRB host galaxy or its source. Recently, H.E.S.S. observed
+SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic
+magnetar linked to a repeating FRB and its first X-ray counterpart.
+Magnetar X-ray flares could in fact be non-thermal in nature (Li et al.
+2021) indicating the presence of particle acceleration that could potentially reach the VHE domain. The inverse Compton process is a
+primary candidate for the production of VHE non-thermal emission.
+H.E.S.S. observations did not lead to a detection of a persistent or a
+transient source associated to FRB 20171019A. We found no X-ray
+counterparts and thus derived the upper limits to constrain these
 MNRAS 000, 1–15 (2021)
-12 Chibueze et al.
-Figure 7. FRB 20171019A MeerKAT image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the insert
-represent the beam size of MeerKAT.
-Education and Research (BMBF), the Max Planck Society, the
-German Research Foundation (DFG), the Helmholtz Association,
-the Alexander von Humboldt Foundation, the French Ministry of
-Higher Education, Research and Innovation, the Centre National
-de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU),
-the Commissariat à l’énergie atomique et aux énergies alterna￾tives (CEA), the U.K. Science and Technology Facilities Council
-(STFC), the Knut and Alice Wallenberg Foundation, the National
-Science Centre, Poland grant no. 2016/22/M/ST9/00382, the South
-African Department of Science and Technology and National Re￾search Foundation, the University of Namibia, the National Com￾mission on Research, Science & Technology of Namibia (NCRST),
-the Austrian Federal Ministry of Education, Science and Research
-and the Austrian Science Fund (FWF), the Australian Research
-Council (ARC), the Japan Society for the Promotion of Science
-and by the University of Amsterdam. We appreciate the excellent
-work of the technical support staff in Berlin, Zeuthen, Heidelberg,
-Palaiseau, Paris, Saclay, Tübingen and in Namibia in the construc￾tion and operation of the equipment. This work benefited from
-services provided by the H.E.S.S. Virtual Organisation, supported
-by the national resource providers of the EGI Federation.
-DATA AVAILABILITY
-The data underlying this article will be shared on reasonable request
-to the corresponding authors.
-REFERENCES
-Adámek K., Armour W., 2016, arXiv e-prints, p. arXiv:1611.09704
-Adámek K., Armour W., 2019, A GPU Implementation of the Harmonic
-Sum Algorithm. p. 489
-Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p.
-arXiv:1711.10855
-Aharonian F., et al., 2006, A&A, 457, 899
-Alam S., et al., 2015, The Astrophysical Journal Supplement Series, 219, 12
-Ashton T., et al., 2020, arXiv e-prints, p. arXiv:2001.04510
-Bannister K. W., et al., 2019, Science, 365, 565
-Bassa C. G., et al., 2017, ApJ, 843, L8
-Beloborodov A. M., 2017, ApJ, 843, L26
-Berge D., Funk S., Hinton J., 2007, A&A, 466, 1219
-Bhandari S., et al., 2020, ApJ, 895, L37
-Bolmont J., et al., 2014, Nuclear Instruments and Methods in Physics Re￾search Section A: Accelerators, Spectrometers, Detectors and Associ￾ated Equipment, 761, 46–57
-Breeveld A. A., et al., 2010, Monthly Notices of the Royal Astronomical
-Society, 406, 1687
-Brun F., Piel Q., de Naurois M., Bernhard S., 2020, Astropart.Phys., 118,
-102429
-Burrows D. N., et al., 2005, Space Sci. Rev., 120, 165
-Caleb M., Keane E., 2021, Universe, 7, 453
-Caleb M., Stappers B. W., Rajwade K., Flynn C., 2019, MNRAS, 484, 5500
-Caleb M., et al., 2020, MNRAS, 496, 4565
-Chatterjee S., et al., 2017, Nature, 541, 58
-Chime/FRB Collaboration 2021, The Astronomer’s Telegram, 14497, 1
-Cordes J. M., Wasserman I., 2016, MNRAS, 457, 232
-Dai Z. G., Wang J. S., Yu Y. W., 2017, ApJ, 838, L7
-Dimoudi S., Armour W., 2015, arXiv e-prints, p. arXiv:1511.07343
-Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A.,
-Armour W., 2018, ApJS, 239, 28
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 11
+Figure 6. Map of upper limits on the VHE gamma-ray energy flux derived from the H.E.S.S. observations. The limits are valid above 120 GeV and assume
+a photon flux distribution following an 𝐸
+−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al.
+(2019).The oversampling radius is 0.1◦.
+emissions. In the case of existence of X-ray non-thermal outbursts,
+the lack of VHE detection could indicate that inverse Compton is
+weak in the vicinity of the magnetars or that the VHE gamma-ray
+emission is quenched. This latter scenario could be explained by the
+fact that inverse Compton is taking place too close to the magnetar’s surface, where pair production and photon splitting could be
+responsible for significant energy losses (Hu et al. 2019), preventing
+energetic particles and photons to reach the nebula.
+No persistent emissions were detected towards FRB
+20190711A and FRB 20171019A in our MeerKAT observations
+(see Figures 7, 8, and 9), therefore no follow up observations were
+conducted towards those FRBs.
+5 CONCLUSIONS
+Several FRB models envision persistent emission to be associated
+with these sources. In this paper, we conducted radio observations
+of three FRBs (FRB 20190714A, 20190711A and 20171019A),
+and also a multi-wavelength campaign on one of these (FRB
+20171019A).
+We detected persistent compact radio emission associated with
+FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and eMERLIN radio telescope. This represents the first detection of the
+radio continuum emission associated with the host (galaxy) of FRB
+20190714A and is only the third known FRB to have such an association. We furthermore obtained a radio upper limit of∼ 15𝜇Jy
+beam−1for the repeating FRBs 20190711A and 20171019A.
+We also performed UV, X-ray and VHE observations with the
+Swift and H.E.S.S. instruments and obtained upper limits in the three
+domains constraining the MWL emissions from FRB 20171019A.
+The search for FRB MWL counterparts is ongoing within the
+H.E.S.S. collaboration and more results will be published in future works.
+Given the association of a comparatively fainter persistent
+source, FRB 20190714A may potentially be a repeating FRB whose
+age lies between that of FRB 20121102A and FRB 20180916A.
+ACKNOWLEDGEMENTS
+This paper makes use of the MeerKAT data (Project ID: SCI20190418-VC-01). The MeerKAT telescope is operated by the
+South African Radio Astronomy Observatory, which is a facility
+of the National Research Foundation, an agency of the Department of Science and Innovation (DSI). This work made use of the
+Inter-University Institute for Data Intensive Astronomy (IDIA) visualization lab https://vislab.idia.ac.za. IDIA is a partnership of the
+University of Cape Town, the University of Pretoria, the University
+of the Western Cape and the South African Radio astronomy Observatory. e-MERLIN is a National Facility operated by the University
+of Manchester at Jodrell Bank Observatory on behalf of STFC.
+The authors acknowledge funding from the European Research
+Council (ERC) under the European Union’s Horizon 2020 research
+and innovation programme (grant agreement No 694745). The support of the Namibian authorities and of the University of Namibia
+in facilitating the construction and operation of H.E.S.S. is gratefully acknowledged, as is the support by the German Ministry for
 MNRAS 000, 1–15 (2021)
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 13
-Figure 8. FRB 20190711A MeerKAT epoch I image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the
-insert represent the beam size of MeerKAT.
-Eftekhari T., Berger E., Williams P. K. G., Blanchard P. K., 2018, ApJ, 860,
-73
-Evans P. A., et al., 2007, A&A, 469, 379
-Evans P. A., et al., 2009, MNRAS, 397, 1177
-Fong W.-f., et al., 2021, ApJ, 919, L23
-H.E.S.S. collaboration 2021, ApJ, 919, 106
-HI4PI Collaboration et al., 2016, A&A, 594, A116
-Heintz K. E., et al., 2020, ApJ, 903, 152
-Heywood I., 2020, oxkat: Semi-automated imaging of MeerKAT observa￾tions (ascl:2009.003)
-Hickish J., et al., 2016,Journal of Astronomical Instrumentation, 5, 1641001
-Hilmarsson G. H., et al., 2021, ApJ, 908, L10
-Hu K., Baring M. G., Wadiasingh Z., Harding A. K., 2019, MNRAS, 486,
-3327–3349
-Insight-HXMT 2020, SGR J1935+2154 burst list, http://hxmten.ihep.
-ac.cn/bfy/331.jhtml
-James C. W., et al., 2020, MNRAS, 495, 2416
-Jonas J., MeerKAT Team 2016, in MeerKAT Science: On the Pathway to
-the SKA. p. 1
-Kashiyama K., Ioka K., Mészáros P., 2013, ApJ, 776, L39
-Kenyon J. S., Smirnov O. M., Grobler T. L., Perkins S. J., 2018, MNRAS,
-478, 2399
-Kumar P., et al., 2019, ApJ, 887, L30
-Kumar P., et al., 2021, MNRAS, 500, 2525
-Law C., Tendulkar S., Clarke T., Aggarwal K., Bethapudy S., 2021, The
-Astronomer’s Telegram, 14526, 1
-Li C. K., et al., 2021, Nature Astronomy,
-Liu T., Romero G. E., Liu M.-L., Li A., 2016, ApJ, 826, 82
-Lorimer D. R., Bailes M., McLaughlin M. A., Narkevic D. J., Crawford F.,
-2007, Science, 318, 777
-Lyubarsky Y., 2014, MNRAS: Letters, 442, L9
-Macquart J. P., et al., 2020, Nature, 581, 391
-Marcote B., et al., 2017, ApJ, 834, L8
-Marcote B., et al., 2020, Nature, 577, 190
-Marcote B., et al., 2021, The Astronomer’s Telegram, 14603, 1
-Margalit B., Berger E., Metzger B. D., 2019, ApJ, 886, 110
-Mauch T., et al., 2020, ApJ, 888, 61
-McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in
-Shaw R. A., Hill F., Bell D. J., eds, Astronomical Society of the Pacific
-Conference Series Vol. 376, Astronomical Data Analysis Software and
-Systems XVI. p. 127
-Mereghetti S., et al., 2020, ApJ, 898, L29
-Metzger B. D., Margalit B., Sironi L., 2019, MNRAS, 485, 4091
-Offringa A. R., et al., 2014, MNRAS, 444, 606
-Parsons R. D., Hinton J. A., 2014, Astroparticle Physics, 56, 26
-Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p.
-arXiv:2107.10113
-Platts E., Weltman A., Walters A., Tendulkar S. P., Gordin J. E. B., Kandhai
-S., 2019, Phys. Rep., 821, 1
-Popov S. B., Postnov K. A., 2013, arXiv e-prints, p. arXiv:1307.4924
-Popov S. B., Pshirkov M. S., 2016, MNRAS, 462, L16
-Popov S., Postnov K., Pshirkov M., 2018, International Journal of Modern
-Physics D, 27, 1844016
-Prochaska J. X., et al., 2019, Science, 366, 231
-Ravi V., 2019, Nature Astronomy, 3, 928
-Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p.
-arXiv:2010.14334
-Ricci R., Piro L., Panessa F., O’Connor B., Lotti S., Bruni G., Zhang B.,
-2021, The Astronomer’s Telegram, 14549, 1
-Ridnaia A., et al., 2021, Nature Astronomy, in press
-Rolke W. A., López A. M., Conrad J., 2005, Nuclear Instruments and Meth￾ods in Physics Research A, 551, 493
-Roming P. W. A., et al., 2005, Space Science Reviews, 120, 95–142
-Tavani M., et al., 2021, Nature Astronomy, 5, 401–407
+ Chibueze et al.
+Figure 7. FRB 20171019A MeerKAT image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the insert
+represent the beam size of MeerKAT.
+Education and Research (BMBF), the Max Planck Society, the
+German Research Foundation (DFG), the Helmholtz Association,
+the Alexander von Humboldt Foundation, the French Ministry of
+Higher Education, Research and Innovation, the Centre National
+de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU),
+the Commissariat à l’énergie atomique et aux énergies alternatives (CEA), the U.K. Science and Technology Facilities Council
+(STFC), the Knut and Alice Wallenberg Foundation, the National
+Science Centre, Poland grant no. 2016/22/M/ST9/00382, the South
+African Department of Science and Technology and National Research Foundation, the University of Namibia, the National Commission on Research, Science & Technology of Namibia (NCRST),
+the Austrian Federal Ministry of Education, Science and Research
+and the Austrian Science Fund (FWF), the Australian Research
+Council (ARC), the Japan Society for the Promotion of Science
+and by the University of Amsterdam. We appreciate the excellent
+work of the technical support staff in Berlin, Zeuthen, Heidelberg,
+Palaiseau, Paris, Saclay, Tübingen and in Namibia in the construction and operation of the equipment. This work benefited from
+services provided by the H.E.S.S. Virtual Organisation, supported
+by the national resource providers of the EGI Federation.
+DATA AVAILABILITY
+The data underlying this article will be shared on reasonable request
+to the corresponding authors.
+REFERENCES
+Adámek K., Armour W., 2016, arXiv e-prints, p. arXiv:1611.09704
+Adámek K., Armour W., 2019, A GPU Implementation of the Harmonic
+Sum Algorithm. p. 489
+Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p.
+arXiv:1711.10855
+Aharonian F., et al., 2006, A&A, 457, 899
+Alam S., et al., 2015, The Astrophysical Journal Supplement Series, 219, 12
+Ashton T., et al., 2020, arXiv e-prints, p. arXiv:2001.04510
+Bannister K. W., et al., 2019, Science, 365, 565
+Bassa C. G., et al., 2017, ApJ, 843, L8
+Beloborodov A. M., 2017, ApJ, 843, L26
+Berge D., Funk S., Hinton J., 2007, A&A, 466, 1219
+Bhandari S., et al., 2020, ApJ, 895, L37
+Bolmont J., et al., 2014, Nuclear Instruments and Methods in Physics Research Section A: Accelerators, Spectrometers, Detectors and Associated Equipment, 761, 46–57
+Breeveld A. A., et al., 2010, Monthly Notices of the Royal Astronomical
+Society, 406, 1687
+Brun F., Piel Q., de Naurois M., Bernhard S., 2020, Astropart.Phys., 118,
+102429
+Burrows D. N., et al., 2005, Space Sci. Rev., 120, 165
+Caleb M., Keane E., 2021, Universe, 7, 453
+Caleb M., Stappers B. W., Rajwade K., Flynn C., 2019, MNRAS, 484, 5500
+Caleb M., et al., 2020, MNRAS, 496, 4565
+Chatterjee S., et al., 2017, Nature, 541, 58
+Chime/FRB Collaboration 2021, The Astronomer’s Telegram, 14497, 1
+Cordes J. M., Wasserman I., 2016, MNRAS, 457, 232
+Dai Z. G., Wang J. S., Yu Y. W., 2017, ApJ, 838, L7
+Dimoudi S., Armour W., 2015, arXiv e-prints, p. arXiv:1511.07343
+Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A.,
+Armour W., 2018, ApJS, 239, 28
 MNRAS 000, 1–15 (2021)
-14 Chibueze et al.
-Figure 9. FRB 20190711A MeerKAT epoch II image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of
-the insert represent the beam size of MeerKAT.
-Tendulkar S. P., et al., 2017, ApJ, 834, L7
-Thornton D., et al., 2013, Science, 341, 53
-Totani T., 2013, PASJ, 65, L12
-Vieyro F. L., Romero G. E., Bosch-Ramon V., Marcote B., del Valle M. V.,
-2017, A&A, 602, A64
-Wharton R., et al., 2021, The Astronomer’s Telegram, 14529, 1
-Yamasaki S., Totani T., Kiuchi K., 2018, PASJ, 70, 39
-Zhang B., 2018, ApJ, 854, L21
-de Naurois M., Rolland L., 2009, Astroparticle Physics, 32, 231
-APPENDIX A: AUTHOR AFFILIATIONS
-1Centre for Space Research, North-West University, Potchefstroom
-2531, South Africa
-2Department of Physics and Astronomy, Faculty of Physical Sci￾ences, University of Nigeria, Carver Building, 1 University Road,
-Nsukka 410001, Nigeria
-3
-Jodrell Bank Centre for Astrophysics, Department of Physics and
-Astronomy, University of Manchester, Manchester M13 9PL, UK
-4Sydney Institute for Astronomy, School of Physics, The University
-of Sydney, NSW 2006, Australia
-5Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, D￾53121 Bonn, Germany
-6
-IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette,
-France
-7Department of Physics and Electronics, Rhodes University, PO
-Box 94, Grahamstown 6140, South Africa
-8South African Radio Astronomy Observatory, Black River Park, 2
-Fir Street, Observatory, Cape Town 7925, South Africa
-9Astrophysics, Department of Physics, University of Oxford, Keble
-Road, Oxford OX1 3RH, UK
-10National University of Ireland Galway, University Road, Galway,
-H91 TK33, Ireland
-11SKA Observatory, Jodrell Bank Observatory, Macclesfield,
-Cheshire SK11 9DL, UK
-12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place,
-Dublin 2, Ireland
-13Max-Planck-Institut für Kernphysik, P.O. Box 103980, D 69029
-Heidelberg, Germany
-14High Energy Astrophysics Laboratory, RAU, 123 Hovsep Emin
-St Yerevan 0051, Armenia
-15Landessternwarte, Universität Heidelberg, Königstuhl, D 69117
-Heidelberg, Germany
-16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille,
-France
-17Laboratoire Leprince-Ringuet, École Polytechnique, CNRS, In￾stitut Polytechnique de Paris, F-91128 Palaiseau, France
-18University of Namibia, Department of Physics, Private Bag
-13301, Windhoek 10005, Namibia
-19Instytut Fizyki Ja¸drowej PAN, ul. Radzikowskiego 152, 31-342
-Kraków, Poland
-20DESY, D-15738 Zeuthen, Germany
-21School of Physics, University of the Witwatersrand, 1 Jan Smuts
-Avenue, Braamfontein, Johannesburg, 2050 South Africa
-22Université de Paris, CNRS, Astroparticule et Cosmologie, F￾75013 Paris, France
-23Department of Physics and Electrical Engineering, Linnaeus Uni￾versity, 351 95 Växjö, Sweden
-24Laboratoire Univers et Théories, Observatoire de Paris, Univer￾sité PSL, CNRS, Université de Paris, 92190 Meudon, France
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 13
+Figure 8. FRB 20190711A MeerKAT epoch I image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the
+insert represent the beam size of MeerKAT.
+Eftekhari T., Berger E., Williams P. K. G., Blanchard P. K., 2018, ApJ, 860,
+73
+Evans P. A., et al., 2007, A&A, 469, 379
+Evans P. A., et al., 2009, MNRAS, 397, 1177
+Fong W.-f., et al., 2021, ApJ, 919, L23
+H.E.S.S. collaboration 2021, ApJ, 919, 106
+HI4PI Collaboration et al., 2016, A&A, 594, A116
+Heintz K. E., et al., 2020, ApJ, 903, 152
+Heywood I., 2020, oxkat: Semi-automated imaging of MeerKAT observations (ascl:2009.003)
+Hickish J., et al., 2016,Journal of Astronomical Instrumentation, 5, 1641001
+Hilmarsson G. H., et al., 2021, ApJ, 908, L10
+Hu K., Baring M. G., Wadiasingh Z., Harding A. K., 2019, MNRAS, 486,
+3327–3349
+Insight-HXMT 2020, SGR J1935+2154 burst list, http://hxmten.ihep.
+ac.cn/bfy/331.jhtml
+James C. W., et al., 2020, MNRAS, 495, 2416
+Jonas J., MeerKAT Team 2016, in MeerKAT Science: On the Pathway to
+the SKA. p. 1
+Kashiyama K., Ioka K., Mészáros P., 2013, ApJ, 776, L39
+Kenyon J. S., Smirnov O. M., Grobler T. L., Perkins S. J., 2018, MNRAS,
+478, 2399
+Kumar P., et al., 2019, ApJ, 887, L30
+Kumar P., et al., 2021, MNRAS, 500, 2525
+Law C., Tendulkar S., Clarke T., Aggarwal K., Bethapudy S., 2021, The
+Astronomer’s Telegram, 14526, 1
+Li C. K., et al., 2021, Nature Astronomy,
+Liu T., Romero G. E., Liu M.-L., Li A., 2016, ApJ, 826, 82
+Lorimer D. R., Bailes M., McLaughlin M. A., Narkevic D. J., Crawford F.,
+2007, Science, 318, 777
+Lyubarsky Y., 2014, MNRAS: Letters, 442, L9
+Macquart J. P., et al., 2020, Nature, 581, 391
+Marcote B., et al., 2017, ApJ, 834, L8
+Marcote B., et al., 2020, Nature, 577, 190
+Marcote B., et al., 2021, The Astronomer’s Telegram, 14603, 1
+Margalit B., Berger E., Metzger B. D., 2019, ApJ, 886, 110
+Mauch T., et al., 2020, ApJ, 888, 61
+McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in
+Shaw R. A., Hill F., Bell D. J., eds, Astronomical Society of the Pacific
+Conference Series Vol. 376, Astronomical Data Analysis Software and
+Systems XVI. p. 127
+Mereghetti S., et al., 2020, ApJ, 898, L29
+Metzger B. D., Margalit B., Sironi L., 2019, MNRAS, 485, 4091
+Offringa A. R., et al., 2014, MNRAS, 444, 606
+Parsons R. D., Hinton J. A., 2014, Astroparticle Physics, 56, 26
+Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p.
+arXiv:2107.10113
+Platts E., Weltman A., Walters A., Tendulkar S. P., Gordin J. E. B., Kandhai
+S., 2019, Phys. Rep., 821, 1
+Popov S. B., Postnov K. A., 2013, arXiv e-prints, p. arXiv:1307.4924
+Popov S. B., Pshirkov M. S., 2016, MNRAS, 462, L16
+Popov S., Postnov K., Pshirkov M., 2018, International Journal of Modern
+Physics D, 27, 1844016
+Prochaska J. X., et al., 2019, Science, 366, 231
+Ravi V., 2019, Nature Astronomy, 3, 928
+Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p.
+arXiv:2010.14334
+Ricci R., Piro L., Panessa F., O’Connor B., Lotti S., Bruni G., Zhang B.,
+2021, The Astronomer’s Telegram, 14549, 1
+Ridnaia A., et al., 2021, Nature Astronomy, in press
+Rolke W. A., López A. M., Conrad J., 2005, Nuclear Instruments and Methods in Physics Research A, 551, 493
+Roming P. W. A., et al., 2005, Space Science Reviews, 120, 95–142
+Tavani M., et al., 2021, Nature Astronomy, 5, 401–407
 MNRAS 000, 1–15 (2021)
-MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 15
-25Sorbonne Université, Université Paris Diderot, Sorbonne Paris
-Cité, CNRS/IN2P3, Laboratoire de Physique Nucléaire et de Hautes
-Energies,
-LPNHE, 4 Place Jussieu, F-75252 Paris, France
-26Université Savoie Mont Blanc, CNRS, Laboratoire d’Annecy de
-Physique des Particules - IN2P3, 74000 Annecy, France
-27Astronomical Observatory, The University of Warsaw, Al. Ujaz￾dowskie 4, 00-478 Warsaw, Poland
-28Friedrich-Alexander-Universität Erlangen-Nürnberg, Erlangen
-Centre for Astroparticle Physics, Erwin-Rommel-Str. 1, D 91058
-Erlangen, Germany
-29University of Oxford, Department of Physics, Denys Wilkinson
-Building, Keble Road, Oxford OX1 3RH, UK
-30Université Bordeaux, CNRS/IN2P3, Centre d’Études Nucléaires
-de Bordeaux Gradignan, 33175 Gradignan, France
-31Institut für Physik und Astronomie, Universität Potsdam, Karl￾Liebknecht-Strasse 24/25, D 14476 Potsdam, Germany
-32Obserwatorium Astronomiczne, Uniwersytet Jagielloński, ul.
-Orla 171, 30-244 Kraków, Poland
-33Institute of Astronomy, Faculty of Physics, Astronomy and In￾formatics, Nicolaus Copernicus University, Grudziadzka 5, 87-100
-Torun, Poland
-34Nicolaus Copernicus Astronomical Center, Polish Academy of
-Sciences, ul. Bartycka 18, 00-716 Warsaw, Poland
-35Institut für Astronomie und Astrophysik, Universität Tübingen,
-Sand 1, D 72076 Tübingen, Germany
-36Institut für Physik, Humboldt-Universität zu Berlin, Newtonstr.
-15, D 12489 Berlin, Germany
-37Laboratoire Univers et Particules de Montpellier, Université
-Montpellier, CNRS/IN2P3, CC 72, Place Eugène Bataillon, F￾34095 Montpellier Cedex 5, France
-38Institut für Astro- und Teilchenphysik, Leopold-Franzens￾Universität Innsbruck, A-6020 Innsbruck, Austria
-39Department of Physics and Astronomy, The University of Leices￾ter, University Road, Leicester, LE1 7RH, United Kingdom
-40GRAPPA, Anton Pannekoek Institute for Astronomy, University
-of Amsterdam, Science Park 904, 1098 XH Amsterdam, The Nether￾lands
-41School of Physical Sciences, University of Adelaide, Adelaide
-5005, Australia
-42Yerevan Physics Institute, 2 Alikhanian Brothers St., 375036
-Yerevan, Armenia
-43Kavli Institute for the Physics and Mathematics of the Universe
-(WPI), The University of Tokyo Institutes for Advanced Study
-(UTIAS),
-The University of Tokyo, 5-1-5 Kashiwa-no-Ha, Kashiwa, Chiba,
-277-8583, Japan
-44Department of Physics, Konan University, 8-9-1 Okamoto, Hi￾gashinada, Kobe, Hyogo 658-8501, Japan
-45RIKEN, 2-1 Hirosawa, Wako, Saitama 351-0198, Japan
-This paper has been typeset from a TEX/LATEX file prepared by the author.
+ Chibueze et al.
+Figure 9. FRB 20190711A MeerKAT epoch II image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of
+the insert represent the beam size of MeerKAT.
+Tendulkar S. P., et al., 2017, ApJ, 834, L7
+Thornton D., et al., 2013, Science, 341, 53
+Totani T., 2013, PASJ, 65, L12
+Vieyro F. L., Romero G. E., Bosch-Ramon V., Marcote B., del Valle M. V.,
+2017, A&A, 602, A64
+Wharton R., et al., 2021, The Astronomer’s Telegram, 14529, 1
+Yamasaki S., Totani T., Kiuchi K., 2018, PASJ, 70, 39
+Zhang B., 2018, ApJ, 854, L21
+de Naurois M., Rolland L., 2009, Astroparticle Physics, 32, 231
+APPENDIX A: AUTHOR AFFILIATIONS
+1Centre for Space Research, North-West University, Potchefstroom
+2531, South Africa
+2Department of Physics and Astronomy, Faculty of Physical Sciences, University of Nigeria, Carver Building, 1 University Road,
+Nsukka 410001, Nigeria
+3
+Jodrell Bank Centre for Astrophysics, Department of Physics and
+Astronomy, University of Manchester, Manchester M13 9PL, UK
+4Sydney Institute for Astronomy, School of Physics, The University
+of Sydney, NSW 2006, Australia
+5Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, D53121 Bonn, Germany
+6
+IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette,
+France
+7Department of Physics and Electronics, Rhodes University, PO
+Box 94, Grahamstown 6140, South Africa
+8South African Radio Astronomy Observatory, Black River Park, 2
+Fir Street, Observatory, Cape Town 7925, South Africa
+9Astrophysics, Department of Physics, University of Oxford, Keble
+Road, Oxford OX1 3RH, UK
+10National University of Ireland Galway, University Road, Galway,
+H91 TK33, Ireland
+11SKA Observatory, Jodrell Bank Observatory, Macclesfield,
+Cheshire SK11 9DL, UK
+12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place,
+Dublin 2, Ireland
+13Max-Planck-Institut für Kernphysik, P.O. Box 103980, D 69029
+Heidelberg, Germany
+14High Energy Astrophysics Laboratory, RAU, 123 Hovsep Emin
+St Yerevan 0051, Armenia
+15Landessternwarte, Universität Heidelberg, Königstuhl, D 69117
+Heidelberg, Germany
+16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille,
+France
+17Laboratoire Leprince-Ringuet, École Polytechnique, CNRS, Institut Polytechnique de Paris, F-91128 Palaiseau, France
+18University of Namibia, Department of Physics, Private Bag
+13301, Windhoek 10005, Namibia
+19Instytut Fizyki Ja¸drowej PAN, ul. Radzikowskiego 152, 31-342
+Kraków, Poland
+20DESY, D-15738 Zeuthen, Germany
+21School of Physics, University of the Witwatersrand, 1 Jan Smuts
+Avenue, Braamfontein, Johannesburg, 2050 South Africa
+22Université de Paris, CNRS, Astroparticule et Cosmologie, F75013 Paris, France
+23Department of Physics and Electrical Engineering, Linnaeus University, 351 95 Växjö, Sweden
+24Laboratoire Univers et Théories, Observatoire de Paris, Université PSL, CNRS, Université de Paris, 92190 Meudon, France
 MNRAS 000, 1–15 (2021)
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 15
+25Sorbonne Université, Université Paris Diderot, Sorbonne Paris
+Cité, CNRS/IN2P3, Laboratoire de Physique Nucléaire et de Hautes
+Energies,
+LPNHE, 4 Place Jussieu, F-75252 Paris, France
+26Université Savoie Mont Blanc, CNRS, Laboratoire d’Annecy de
+Physique des Particules - IN2P3, 74000 Annecy, France
+27Astronomical Observatory, The University of Warsaw, Al. Ujazdowskie 4, 00-478 Warsaw, Poland
+28Friedrich-Alexander-Universität Erlangen-Nürnberg, Erlangen
+Centre for Astroparticle Physics, Erwin-Rommel-Str. 1, D 91058
+Erlangen, Germany
+29University of Oxford, Department of Physics, Denys Wilkinson
+Building, Keble Road, Oxford OX1 3RH, UK
+30Université Bordeaux, CNRS/IN2P3, Centre d’Études Nucléaires
+de Bordeaux Gradignan, 33175 Gradignan, France
+31Institut für Physik und Astronomie, Universität Potsdam, KarlLiebknecht-Strasse 24/25, D 14476 Potsdam, Germany
+32Obserwatorium Astronomiczne, Uniwersytet Jagielloński, ul.
+Orla 171, 30-244 Kraków, Poland
+33Institute of Astronomy, Faculty of Physics, Astronomy and Informatics, Nicolaus Copernicus University, Grudziadzka 5, 87-100
+Torun, Poland
+34Nicolaus Copernicus Astronomical Center, Polish Academy of
+Sciences, ul. Bartycka 18, 00-716 Warsaw, Poland
+35Institut für Astronomie und Astrophysik, Universität Tübingen,
+Sand 1, D 72076 Tübingen, Germany
+36Institut für Physik, Humboldt-Universität zu Berlin, Newtonstr.
+15, D 12489 Berlin, Germany
+37Laboratoire Univers et Particules de Montpellier, Université
+Montpellier, CNRS/IN2P3, CC 72, Place Eugène Bataillon, F34095 Montpellier Cedex 5, France
+38Institut für Astro- und Teilchenphysik, Leopold-FranzensUniversität Innsbruck, A-6020 Innsbruck, Austria
+39Department of Physics and Astronomy, The University of Leicester, University Road, Leicester, LE1 7RH, United Kingdom
+40GRAPPA, Anton Pannekoek Institute for Astronomy, University
+of Amsterdam, Science Park 904, 1098 XH Amsterdam, The Netherlands
+41School of Physical Sciences, University of Adelaide, Adelaide
+5005, Australia
+42Yerevan Physics Institute, 2 Alikhanian Brothers St., 375036
+Yerevan, Armenia
+43Kavli Institute for the Physics and Mathematics of the Universe
+(WPI), The University of Tokyo Institutes for Advanced Study
+(UTIAS),
+The University of Tokyo, 5-1-5 Kashiwa-no-Ha, Kashiwa, Chiba,
+277-8583, Japan
+44Department of Physics, Konan University, 8-9-1 Okamoto, Higashinada, Kobe, Hyogo 658-8501, Japan
+45RIKEN, 2-1 Hirosawa, Wako, Saitama 351-0198, Japan
+This paper has been typeset from a TEX/LATEX file prepared by the author.
+MNRAS 000, 1–15 (2021)
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00151.txt b/read/results/pdfium/2201.00151.txt
index c2ba066..2b542c7 100644
--- a/read/results/pdfium/2201.00151.txt
+++ b/read/results/pdfium/2201.00151.txt
@@ -1,1383 +1,1378 @@
-arXiv:2201.00151v1 [astro-ph.GA] 1 Jan 2022
-Astronomy & Astrophysics manuscript no. Populations4 ©ESO 2022
-January 4, 2022
-Multiple stellar populations in Schwarzschild modeling
-and the application to the Fornax dwarf
-Klaudia Kowalczyk and Ewa L. Łokas
-Nicolaus Copernicus Astronomical Center, Polish Academy of Sciences, Bartycka 18, 00-716 Warsaw, Poland
-e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl
-January 4, 2022
-ABSTRACT
-Dwarf spheroidal (dSph) galaxies are believed to be strongly dark matter dominated and thus are considered perfect objects to study
-dark matter distribution and test theories of structure formation. They possess resolved, multiple stellar populations that offer new
-possibilities for modeling. A promising tool for the dynamical modeling of these objects is the Schwarzschild orbit superposition
-method. In this work we extend our previous implementation of the scheme to include more than one population of stars and a more
-general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galaxy formed in
-the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations
-by metallicity and demonstrate that in spite of larger sampling errors the increased number of constraints leads to significantly tighter
-confidence regions on the recovered density and velocity anisotropy profiles. We then applied the method to the Fornax dSph galaxy
-with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly
-increasing, rather than decreasing, with radius and more strongly constrained. We are also able to infer anisotropy for each stellar
-population separately and find them to be significantly different.
-Key words. galaxies: kinematics and dynamics – galaxies: structure – galaxies: fundamental parameters – galaxies: dwarf – galaxies:
-star clusters: individual: Fornax
-1. Introduction
-Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo
-1998; Tolstoy et al. 2009) are considered to be a perfect tool to
-test our current theories of structure formation involving dark
-matter in the context of near-field cosmology. The objects are
-believed to be strongly dark matter dominated with mass-to-light
-ratios even on the order of a few hundred solar units. Due to their
-proximity they are also the only extragalactic systems where in￾dividual stars can be resolved and their velocities measured of￾fering the possibility to create interesting dynamical modeling
-techniques.
-The first estimates of dark matter content in dSph galaxies
-were based on a single measurement of the line-of-sight velocity
-dispersion of the stars and the application of the virial theorem.
-As the samples of the stars with kinematic measurements grew,
-it became possible to estimate the profile of the velocity disper￾sion and model it using the Jeans equation (Binney & Tremaine
-2008). Since the stars in the galaxy can move on a variety
-of orbits, from circular to radial, the degeneracy between the
-anisotropy of the orbits and the mass distribution is inherent in
-this type of modeling. The reason for this lies in the fact that
-different combinations of these quantities can reproduce the ve￾locity dispersion profile equally well.
-A way to overcome this issue, at least partially, is to resort to
-higher order line-of-sight velocity moments, such as the kurto￾sis, and use the corresponding Jeans equations. Since the kurto￾sis is more sensitive to the velocity anisotropy than to the mass
-distribution, useful constraints can be obtained on both. Still, the
-method requires large kinematic samples to estimate the velocity
-moments reliably and some assumption on the functional form
-of the anisotropy (Łokas 2002; Łokas et al. 2005).
-The Schwarzschild modeling technique (Schwarzschild
-1979) offers a different approach to estimate the properties of
-dSph galaxies without prior assumptions on the type of orbits.
-It relies on building a galaxy model out of a set of best-fitting
-orbits probed in the range of energy and angular momenta. In
-this method, the anisotropy of the stellar orbits comes out as a
-result of the modeling in the same way as the density profile. Al￾though it has been originally developed for large elliptical galax￾ies (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt et al.
-2015), it has recently been adopted for use on discrete data
-characteristic of dSph galaxies and applied to a number of
-dwarfs, including Carina, Draco, Fornax, Sculptor, and Sextans
-(Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi
-2013; Breddels et al. 2013; Kowalczyk et al. 2019).
-Many dSph galaxies show signs of the presence of multiple
-stellar populations resulting from a few star formation episodes
-(Bellazzini et al. 2001; del Pino et al. 2015; Fabrizio et al. 2016;
-Pace et al. 2020). This observation offers a way to improve the
-modeling methods since, assuming dynamical equilibrium, all
-populations are supposed to be influenced by the same under￾lying gravitational potential of the galaxy, but they have dif￾ferent distributions so more constraints can be imposed during
-the modeling. This approach was first used by Battaglia et al.
-(2008) to model the mass distribution in the Sculptor dSph
-galaxy. A few attempts have also been made to constrain the
-inner slope of the dark matter profile in dSph galaxies using
-this technique (Walker & Peñarrubia 2011; Amorisco & Evans
-2012; Hayashi et al. 2018) in order to resolve the so-called cusp￾core problem. It has been shown to be difficult, however, due
+arXiv:2201.00151v1 [astro-ph.GA] 1 Jan 2022
+Astronomy & Astrophysics manuscript no. Populations4 ©ESO 2022
+January 4, 2022
+Multiple stellar populations in Schwarzschild modeling
+and the application to the Fornax dwarf
+Klaudia Kowalczyk and Ewa L. Łokas
+Nicolaus Copernicus Astronomical Center, Polish Academy of Sciences, Bartycka 18, 00-716 Warsaw, Poland
+e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl
+January 4, 2022
+ABSTRACT
+Dwarf spheroidal (dSph) galaxies are believed to be strongly dark matter dominated and thus are considered perfect objects to study
+dark matter distribution and test theories of structure formation. They possess resolved, multiple stellar populations that offer new
+possibilities for modeling. A promising tool for the dynamical modeling of these objects is the Schwarzschild orbit superposition
+method. In this work we extend our previous implementation of the scheme to include more than one population of stars and a more
+general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galaxy formed in
+the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations
+by metallicity and demonstrate that in spite of larger sampling errors the increased number of constraints leads to significantly tighter
+confidence regions on the recovered density and velocity anisotropy profiles. We then applied the method to the Fornax dSph galaxy
+with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly
+increasing, rather than decreasing, with radius and more strongly constrained. We are also able to infer anisotropy for each stellar
+population separately and find them to be significantly different.
+Key words. galaxies: kinematics and dynamics – galaxies: structure – galaxies: fundamental parameters – galaxies: dwarf – galaxies:
+star clusters: individual: Fornax
+1. Introduction
+Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo
+1998; Tolstoy et al. 2009) are considered to be a perfect tool to
+test our current theories of structure formation involving dark
+matter in the context of near-field cosmology. The objects are
+believed to be strongly dark matter dominated with mass-to-light
+ratios even on the order of a few hundred solar units. Due to their
+proximity they are also the only extragalactic systems where individual stars can be resolved and their velocities measured offering the possibility to create interesting dynamical modeling
+techniques.
+The first estimates of dark matter content in dSph galaxies
+were based on a single measurement of the line-of-sight velocity
+dispersion of the stars and the application of the virial theorem.
+As the samples of the stars with kinematic measurements grew,
+it became possible to estimate the profile of the velocity dispersion and model it using the Jeans equation (Binney & Tremaine
+2008). Since the stars in the galaxy can move on a variety
+of orbits, from circular to radial, the degeneracy between the
+anisotropy of the orbits and the mass distribution is inherent in
+this type of modeling. The reason for this lies in the fact that
+different combinations of these quantities can reproduce the velocity dispersion profile equally well.
+A way to overcome this issue, at least partially, is to resort to
+higher order line-of-sight velocity moments, such as the kurtosis, and use the corresponding Jeans equations. Since the kurtosis is more sensitive to the velocity anisotropy than to the mass
+distribution, useful constraints can be obtained on both. Still, the
+method requires large kinematic samples to estimate the velocity
+moments reliably and some assumption on the functional form
+of the anisotropy (Łokas 2002; Łokas et al. 2005).
+The Schwarzschild modeling technique (Schwarzschild
+1979) offers a different approach to estimate the properties of
+dSph galaxies without prior assumptions on the type of orbits.
+It relies on building a galaxy model out of a set of best-fitting
+orbits probed in the range of energy and angular momenta. In
+this method, the anisotropy of the stellar orbits comes out as a
+result of the modeling in the same way as the density profile. Although it has been originally developed for large elliptical galaxies (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt et al.
+2015), it has recently been adopted for use on discrete data
+characteristic of dSph galaxies and applied to a number of
+dwarfs, including Carina, Draco, Fornax, Sculptor, and Sextans
+(Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi
+2013; Breddels et al. 2013; Kowalczyk et al. 2019).
+Many dSph galaxies show signs of the presence of multiple
+stellar populations resulting from a few star formation episodes
+(Bellazzini et al. 2001; del Pino et al. 2015; Fabrizio et al. 2016;
+Pace et al. 2020). This observation offers a way to improve the
+modeling methods since, assuming dynamical equilibrium, all
+populations are supposed to be influenced by the same underlying gravitational potential of the galaxy, but they have different distributions so more constraints can be imposed during
+the modeling. This approach was first used by Battaglia et al.
+(2008) to model the mass distribution in the Sculptor dSph
+galaxy. A few attempts have also been made to constrain the
+inner slope of the dark matter profile in dSph galaxies using
+this technique (Walker & Peñarrubia 2011; Amorisco & Evans
+2012; Hayashi et al. 2018) in order to resolve the so-called cuspcore problem. It has been shown to be difficult, however, due
 Article number, page 1 of 12
-A&A proofs: manuscript no. Populations4
-Table 1. Properties of the Illustris galaxy used to create mock data.
-Property Value
-Subhalo ID 16960
-Number of stellar particles (N⋆) 70446
-Number of dark matter particles (NDM) 78448
-Stellar mass (M⋆) 5.74 × 1010 M⊙
-Dark matter mass (MDM) 4.91 × 1011 M⊙
-Mean mass of stellar particles 815808 M⊙
-Stellar half-mass radius 9.99 kpc
-Stellar half-number radius (r1/2) 9.6 kpc
-Axis ratio c/a within r1/2 0.907
-Axis ratio b/a within r1/2 0.949
-Triaxiality 0.56
-to the nonsphericity of the dwarfs that introduces biases in such
-measurements (Kowalczyk et al. 2013; Genina et al. 2018).
-In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we
-developed the Schwarzschild technique in the form applicable to
-binned velocity moments of a single tracer and verified its abil￾ity to reproduce the mass distribution and velocity anisotropy of
-simulated galaxies. We have also studied biases resulting from
-the nonsphericity of the modeled objects. Later, we applied the
-method to model the kinematics of the Fornax dSph galaxy esti￾mating its mass and anisotropy profiles with unprecedented pre￾cision.
-In this paper we extend our Schwarzschild modeling tech￾nique to include multiple stellar populations with the aim to
-constrain the properties of dSph galaxies even more strongly.
-We test our approach on a realistic simulated galaxy formed in
-the cosmological context, originating from the Illustris project
-(Vogelsberger et al. 2014a). Although no precise analogues of
-dSph galaxies are available in this simulation because of the res￾olution, we use a more massive galaxy but with properties oth￾erwise similar to dSphs. The reliability of the modeling does not
-depend on the particular value of the mass so we believe these
-tests to be viable. We do not attempt to constrain the inner dark
-matter density profile (which is poorly resolved anyway) but try
-to put tighter limits on the estimates of the mass and anisotropy
-profiles. Finally, we apply the improved method to the available
-kinematic data for the distinct stellar populations of the Fornax
-dSph.
-This paper is organized as follows. In Section 2 we present
-the data for the simulated galaxy as well as their splitting into
-stellar populations and mock observations along the main axes.
-Section 3 contains an overview of our modeling method, the ap￾plication of the method to all stars and to two populations, and
-a comparison of the results obtained with these two approaches.
-The results of the application of the method to the Fornax dSph
-galaxy are presented in Section 4. We discuss our findings and
-summarize the paper in Section 5.
-2. Mock data
-2.1. Selection of the simulated galaxy
-In order to test our modeling method on realistic simulated
-data, we decided to use a galaxy from the Illustris project
-(Vogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al.
-2015), namely the Illustris-1 cosmological simulation. This sim￾ulation follows the formation and evolution of galaxies from the
-early Universe to the present by solving gravity and hydrody￾namics, as well as modeling of star formation, galactic winds,
-SFR [M
-⊙ yr
--1
-]
-t [Gyr]
- 0
- 4
- 8
- 12
- 16
- 0 2 4 6 8 10 12
-Fig. 1. Star formation rate as a function of the age of the Universe in
-the simulated galaxy from the Illustris project used to create mock data.
-The black and gray vertical arrows indicate the last mergers which the
-galaxy underwent, wet and dry, respectively. t [Gyr]
-Z [Z⊙]
- 0
- 2
- 4
- 6
- 8
- 10
- 0 1 2 3 4 5
- 0
- 2
- 4
- 6
-N [10
-2
-]
-Fig. 2. Number of stars as a function of their metallicity and time of
-formation (the age of the Universe) in the simulated galaxy. The vertical
-line indicates the applied split into stellar populations.
-magnetic fields, and the feedback from black holes. Although
-dwarf galaxies that are of our interest here are not resolved in the
-suite, this can be easily overcome with the appropriate choice of
-the object and the treatment of data.
-As the key properties of dSph galaxy equivalents we iden￾tified: the lack of gas, the lack of a black hole, a low spin,
-the stellar mass much smaller than the dark matter mass and a
-nearly spherical shape. The last condition was adopted in an at￾tempt to avoid any strong bias introduced by the spherical mod￾eling of a nonspherical object. Moreover, we required the galaxy
-to possess a significant number of both stellar and dark mat￾ter particles (over 105
-), and a well resolved center. Due to the
-large softening scale for dark matter particles in the simulation
-(ǫDM = 1.42 kpc), we looked for an object in which even the
-more concentrated stellar population (see Section 2.2) extended
-over 43 kpc so that the region affected by the numerical artifacts
-was enclosed within 2-3 innermost data bins (we used 20 linearly
-spaced spatial bins, see Section 3.1).
-Out of 27345 galaxies listed in the catalog of stellar circu￾larities, angular momenta, and axis ratios published by the Illus￾tris team (Genel et al. 2015) containing subhalos with the stellar
-mass larger than 109 M⊙, only a few met our restrictive require￾Article number, page 2 of 12
-K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
--80
--40
-0
-40
-80
-POPULATION I
-[kpc]
-major intermediate minor
- 5.3
- 5.9
- 6.5
- 7.1
- 7.7
-log(Σ) [M⊙/kpc
-2
-]
--80
--40
-0
-40
-POPULATION II
-[kpc]
--160
--80
- 0
- 80
- 160
-V [km/s]
--80
--40
-0
-40
--80 -40 0 40
-POPULATION II
-[kpc]
-[kpc]
--80 -40 0 40
-POPULATION II
-[kpc]
--80 -40 0 40 80
-POPULATION II
-[kpc]
- 0
- 30
- 60
- 90
-σ [km/s]
--80
--40
-0
-40
-80
-POPULATION II
-[kpc]
-major intermediate minor
- 5.3
- 5.9
- 6.5
- 7.1
- 7.7
-log(Σ) [M⊙/kpc
-2
-]
--80
--40
-0
-40
-POPULATION II
-[kpc]
--160
--80
- 0
- 80
- 160
-V [km/s]
--80
--40
-0
-40
--80 -40 0 40
-POPULATION II
-[kpc]
-[kpc]
--80 -40 0 40
-POPULATION II
-[kpc]
--80 -40 0 40 80
-POPULATION II
-[kpc]
- 0
- 30
- 60
- 90
-σ [km/s]
-Fig. 3. Maps of the projected stellar density, mean stellar velocity, and stellar velocity dispersion (in rows) for two stellar populations: the metal￾rich population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along the principal axes determined
-for all stars (in columns, along the major, the intermediate, and the minor axis, respectively).
--1
--0.5
- 0
- 0.5
- 1
- 1 10 100
-β(r)
-r [kpc]
--1
--0.5
- 0
- 0.5
- 1
- 0 10 20 30 40 50
-β(r)
-r [kpc]
-all stars
-pop I
-pop II
- 40
- 60
- 80
- 100
- 120
- 1 10 100
-σr(r)
-r [kpc]
- 40
- 60
- 80
- 100
- 120
- 0 10 20 30 40 50
-σr(r)
-r [kpc]
- 40
- 60
- 80
- 100
- 120
- 1 10 100
-σt(r)
-r [kpc]
- 40
- 60
- 80
- 100
- 120
- 0 10 20 30 40 50
-σt(r)
-r [kpc]
-Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calcu￾lated from all stars (in red), including only population I (in orange), and only population II (in blue). The upper row shows the profiles using the
-logarithmic distance scale and reaching the outskirts of the galaxy whereas the bottom row presents in the linear scale only the radial range used
-in the modeling.
-ments. We decided to use a galaxy labeled as subhalo 16960.
-All the relevant properties of the galaxy are given in Table 1,
-including numbers of particles and total masses for both compo￾nents, and details on the shape of the stellar component: the axis
-ratios minor to major (shortest to longest) c/a, intermediate to
-major b/a, and the triaxiality parameter T = (a
-2 − b
-2
-)/(a
-2 − c
-2
-).
-We distinguish between the half-mass radius provided in the Il￾lustris database and the half-number radius r1/2, which we use
-for further calculations in this paper. The difference between the
-two comes from a small gradient in the stellar mass-to-light ratio
-with the distance from the galactic center. Since in our approach
-we treat stars as equal-mass particles and refer to number den￾sities (multiplied by the mean mass of a stellar particle when
-needed), the application of the half-number radius is more self￾consistent.
+A&A proofs: manuscript no. Populations4
+Table 1. Properties of the Illustris galaxy used to create mock data.
+Property Value
+Subhalo ID 16960
+Number of stellar particles (N⋆) 70446
+Number of dark matter particles (NDM) 78448
+Stellar mass (M⋆) 5.74 × 1010 M⊙
+Dark matter mass (MDM) 4.91 × 1011 M⊙
+Mean mass of stellar particles 815808 M⊙
+Stellar half-mass radius 9.99 kpc
+Stellar half-number radius (r1/2) 9.6 kpc
+Axis ratio c/a within r1/2 0.907
+Axis ratio b/a within r1/2 0.949
+Triaxiality 0.56
+to the nonsphericity of the dwarfs that introduces biases in such
+measurements (Kowalczyk et al. 2013; Genina et al. 2018).
+In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we
+developed the Schwarzschild technique in the form applicable to
+binned velocity moments of a single tracer and verified its ability to reproduce the mass distribution and velocity anisotropy of
+simulated galaxies. We have also studied biases resulting from
+the nonsphericity of the modeled objects. Later, we applied the
+method to model the kinematics of the Fornax dSph galaxy estimating its mass and anisotropy profiles with unprecedented precision.
+In this paper we extend our Schwarzschild modeling technique to include multiple stellar populations with the aim to
+constrain the properties of dSph galaxies even more strongly.
+We test our approach on a realistic simulated galaxy formed in
+the cosmological context, originating from the Illustris project
+(Vogelsberger et al. 2014a). Although no precise analogues of
+dSph galaxies are available in this simulation because of the resolution, we use a more massive galaxy but with properties otherwise similar to dSphs. The reliability of the modeling does not
+depend on the particular value of the mass so we believe these
+tests to be viable. We do not attempt to constrain the inner dark
+matter density profile (which is poorly resolved anyway) but try
+to put tighter limits on the estimates of the mass and anisotropy
+profiles. Finally, we apply the improved method to the available
+kinematic data for the distinct stellar populations of the Fornax
+dSph.
+This paper is organized as follows. In Section 2 we present
+the data for the simulated galaxy as well as their splitting into
+stellar populations and mock observations along the main axes.
+Section 3 contains an overview of our modeling method, the application of the method to all stars and to two populations, and
+a comparison of the results obtained with these two approaches.
+The results of the application of the method to the Fornax dSph
+galaxy are presented in Section 4. We discuss our findings and
+summarize the paper in Section 5.
+2. Mock data
+2.1. Selection of the simulated galaxy
+In order to test our modeling method on realistic simulated
+data, we decided to use a galaxy from the Illustris project
+(Vogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al.
+2015), namely the Illustris-1 cosmological simulation. This simulation follows the formation and evolution of galaxies from the
+early Universe to the present by solving gravity and hydrodynamics, as well as modeling of star formation, galactic winds,
+SFR [M
+⊙ yr
+-1
+]
+t [Gyr]
+ 0
+ 4
+ 8
+ 12
+ 16
+ 0 2 4 6 8 10 12
+Fig. 1. Star formation rate as a function of the age of the Universe in
+the simulated galaxy from the Illustris project used to create mock data.
+The black and gray vertical arrows indicate the last mergers which the
+galaxy underwent, wet and dry, respectively. t [Gyr]
+Z [Z⊙]
+ 0
+ 2
+ 4
+ 6
+ 8
+ 10
+ 0 1 2 3 4 5
+ 0
+ 2
+ 4
+ 6
+N [10
+2
+]
+Fig. 2. Number of stars as a function of their metallicity and time of
+formation (the age of the Universe) in the simulated galaxy. The vertical
+line indicates the applied split into stellar populations.
+magnetic fields, and the feedback from black holes. Although
+dwarf galaxies that are of our interest here are not resolved in the
+suite, this can be easily overcome with the appropriate choice of
+the object and the treatment of data.
+As the key properties of dSph galaxy equivalents we identified: the lack of gas, the lack of a black hole, a low spin,
+the stellar mass much smaller than the dark matter mass and a
+nearly spherical shape. The last condition was adopted in an attempt to avoid any strong bias introduced by the spherical modeling of a nonspherical object. Moreover, we required the galaxy
+to possess a significant number of both stellar and dark matter particles (over 105
+), and a well resolved center. Due to the
+large softening scale for dark matter particles in the simulation
+(ǫDM = 1.42 kpc), we looked for an object in which even the
+more concentrated stellar population (see Section 2.2) extended
+over 43 kpc so that the region affected by the numerical artifacts
+was enclosed within 2-3 innermost data bins (we used 20 linearly
+spaced spatial bins, see Section 3.1).
+Out of 27345 galaxies listed in the catalog of stellar circularities, angular momenta, and axis ratios published by the Illustris team (Genel et al. 2015) containing subhalos with the stellar
+mass larger than 109 M⊙, only a few met our restrictive requireArticle number, page 2 of 1
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+-80
+-40
+0
+40
+80
+POPULATION I
+[kpc]
+major intermediate minor
+ 5.3
+ 5.9
+ 6.5
+ 7.1
+ 7.7
+log(Σ) [M⊙/kpc
+2
+]
+-80
+-40
+0
+40
+POPULATION II
+[kpc]
+-160
+-80
+ 0
+ 80
+ 160
+V [km/s]
+-80
+-40
+0
+40
+-80 -40 0 40
+POPULATION II
+[kpc]
+[kpc]
+-80 -40 0 40
+POPULATION II
+[kpc]
+-80 -40 0 40 80
+POPULATION II
+[kpc]
+ 0
+ 30
+ 60
+ 90
+σ [km/s]
+-80
+-40
+0
+40
+80
+POPULATION II
+[kpc]
+major intermediate minor
+ 5.3
+ 5.9
+ 6.5
+ 7.1
+ 7.7
+log(Σ) [M⊙/kpc
+2
+]
+-80
+-40
+0
+40
+POPULATION II
+[kpc]
+-160
+-80
+ 0
+ 80
+ 160
+V [km/s]
+-80
+-40
+0
+40
+-80 -40 0 40
+POPULATION II
+[kpc]
+[kpc]
+-80 -40 0 40
+POPULATION II
+[kpc]
+-80 -40 0 40 80
+POPULATION II
+[kpc]
+ 0
+ 30
+ 60
+ 90
+σ [km/s]
+Fig. 3. Maps of the projected stellar density, mean stellar velocity, and stellar velocity dispersion (in rows) for two stellar populations: the metalrich population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along the principal axes determined
+for all stars (in columns, along the major, the intermediate, and the minor axis, respectively).
+-1
+-0.5
+ 0
+ 0.5
+ 1
+ 1 10 100
+β(r)
+r [kpc]
+-1
+-0.5
+ 0
+ 0.5
+ 1
+ 0 10 20 30 40 50
+β(r)
+r [kpc]
+all stars
+pop I
+pop II
+ 40
+ 60
+ 80
+ 100
+ 120
+ 1 10 100
+σr(r)
+r [kpc]
+ 40
+ 60
+ 80
+ 100
+ 120
+ 0 10 20 30 40 50
+σr(r)
+r [kpc]
+ 40
+ 60
+ 80
+ 100
+ 120
+ 1 10 100
+σt(r)
+r [kpc]
+ 40
+ 60
+ 80
+ 100
+ 120
+ 0 10 20 30 40 50
+σt(r)
+r [kpc]
+Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calculated from all stars (in red), including only population I (in orange), and only population II (in blue). The upper row shows the profiles using the
+logarithmic distance scale and reaching the outskirts of the galaxy whereas the bottom row presents in the linear scale only the radial range used
+in the modeling.
+ments. We decided to use a galaxy labeled as subhalo 16960.
+All the relevant properties of the galaxy are given in Table 1,
+including numbers of particles and total masses for both components, and details on the shape of the stellar component: the axis
+ratios minor to major (shortest to longest) c/a, intermediate to
+major b/a, and the triaxiality parameter T = (a
+2 − b2
+)/(a
+2 − c2
+).
+We distinguish between the half-mass radius provided in the Illustris database and the half-number radius r1/2, which we use
+for further calculations in this paper. The difference between the
+two comes from a small gradient in the stellar mass-to-light ratio
+with the distance from the galactic center. Since in our approach
+we treat stars as equal-mass particles and refer to number densities (multiplied by the mean mass of a stellar particle when
+needed), the application of the half-number radius is more selfconsistent.
 Article number, page 3 of 12
-A&A proofs: manuscript no. Populations4
-10-3
-10-1
-101
-103
- 10 100
-n⋆(R) [kpc-2
-]
-R [kpc]
-major
- 10 100
-R [kpc]
-intermediate
- 10 100
-R [kpc]
-minor
-all stars
-pop I
-pop II
-Fig. 5. Surface number density profiles of the stellar data samples for the simulated galaxy observed along different lines of sight (from the left to
-the right). Different lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in
-blue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data.
-2.2. Splitting the stars into populations
-Our chosen galaxy shows a complex formation history under￾going multiple mergers which result in extended star formation
-with a few star formation bursts. The last wet merger, that is a
-merger with an object containing gas, happens at 6.9 Gyr from
-the beginning of the simulation, whereas the last dry merger (no
-gas transfer) at 12.1 Gyr, giving the galaxy enough time to regain
-dynamical equilibrium. We present the star formation rate (SFR)
-as a function of time (the age of the Universe) in Fig. 1, where
-these last mergers are indicated with black and gray vertical ar￾rows. In Fig. 2 we show the distribution of stars as a function of
-their metallicity (in solar units) and the time of formation. In or￾der to divide the stellar sample into two populations we cut it in
-half based on the metallicity index of each stellar particle. This
-split is indicated in Fig. 2 with the vertical line. With satisfying
-accuracy it separates the stars born before and after 4 Gyr since
-the start of the simulation, which corresponds to the formation
-time before and after the end of the second major star burst, as
-shown in Fig. 1. We refer to the metal-rich stars as population I
-and to the metal-poor as population II, following the commonly
-used nomenclature in astronomy.
-In Fig. 3 we present maps of the projected stellar mass den￾sity, line-of-sight velocity, and line-of-sight velocity dispersion
-for both populations obtained by projecting the galaxy along its
-principal axes. The orientation was determined from the iner￾tia tensor calculated from all stars within the half-number radius
-r1/2 and therefore is the same in both panels. The two popula￾tions differ significantly in the spatial distribution and kinemat￾ics with the metal-rich (considered to be younger) population I
-being more concentrated but having lower central velocity dis￾persion. Both populations show a weak rotation signal at large
-distances from the center.
-The velocity anisotropy parameter β(r) = 1 − (σ
-2
-θ +
-σ
-2
-φ
-)/(2σ
-2
-r
-), where σi are velocity dispersions in spherical coordi￾nates (Binney & Tremaine 2008), describes the orbital structure
-of galaxies. It is one of the most important dynamical properties
-of bound systems which cannot be inferred directly from ob￾servations and has to be recovered by dynamical modeling. The
-profiles of the anisotropy parameter β as well as the radial σr
-and tangential σt = [(σ
-2
-θ +σ
-2
-φ
-)/2]1/2 velocity dispersions for our
-simulated galaxy are presented in the consecutive columns of
-Fig. 4. Throughout the paper we use red, orange, and blue colors
-to indicate values calculated or recovered for all stars, popula￾tion I, and population II, respectively. The two rows of the figure
-show the behavior of the parameters at different scales. The top
-row plots the profiles with the distance from the center of the
-galaxy in the logarithmic scale and shows the drop of anisotropy
-at the outer edges of the object. The bottom row uses the linear
-distance scale and focuses on the main body of the galaxy.
-Figure 5 shows the surface number density profiles of the
-stars as measured in different directions. We can see that while
-the different subsamples have quite distinguishable profiles, the
-difference between the lines of sight is small because the galaxy
-is close to spherical.
-2.3. Observables
-We generated nine sets of mock data by observing all stars and
-each population separately along the principal axes determined
-from all stars. For the observables to be used in the modeling we
-divided the stars into 20 bins spaced linearly in distance from
-the center of the galaxy up to 50 kpc, measuring the fraction
-of the total number of stars and the 2nd, 3rd, and 4th proper
-moments of the line-of-sight velocity defined in Eq. 8 and 9
-of Kowalczyk et al. (2018). The profiles of these quantities are
-shown in consecutive rows in Fig. 6. Columns correspond to dif￾ferent lines of sight, from the left to the right: along the major,
-intermediate, and minor axis of the galaxy. For clarity of the fig￾ure, in each panel we indicate only the error bars for one of the
-data sets. However, as the number of stars in a sample remains
-roughly constant between the lines of sight, the error bars are
-very similar among the panels in a given row.
-Although in our previous studies of the reliability of
-the Schwarzschild modeling and its applications to real data
-(Kowalczyk et al. 2017, 2018, 2019) we approximated the den￾sity profile of the tracer with the Sérsic formula, we found that it
-does not provide a good approximation of the data for the simu￾lated galaxy considered here. We therefore fit the projected den￾sity profile with the King formula (King 1962)
-I(R) = I0
-
-
-1
-p
-1 + (R/Rc)
-2
-−
-1
-p
-1 + (Rt/Rc)
-2
-
-
-2
-, (1)
+A&A proofs: manuscript no. Populations4
+10-3
+10-1
+101
+103
+ 10 100
+n⋆(R) [kpc-2
+]
+R [kpc]
+major
+ 10 100
+R [kpc]
+intermediate
+ 10 100
+R [kpc]
+minor
+all stars
+pop I
+pop II
+Fig. 5. Surface number density profiles of the stellar data samples for the simulated galaxy observed along different lines of sight (from the left to
+the right). Different lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in
+blue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data.
+2.2. Splitting the stars into populations
+Our chosen galaxy shows a complex formation history undergoing multiple mergers which result in extended star formation
+with a few star formation bursts. The last wet merger, that is a
+merger with an object containing gas, happens at 6.9 Gyr from
+the beginning of the simulation, whereas the last dry merger (no
+gas transfer) at 12.1 Gyr, giving the galaxy enough time to regain
+dynamical equilibrium. We present the star formation rate (SFR)
+as a function of time (the age of the Universe) in Fig. 1, where
+these last mergers are indicated with black and gray vertical arrows. In Fig. 2 we show the distribution of stars as a function of
+their metallicity (in solar units) and the time of formation. In order to divide the stellar sample into two populations we cut it in
+half based on the metallicity index of each stellar particle. This
+split is indicated in Fig. 2 with the vertical line. With satisfying
+accuracy it separates the stars born before and after 4 Gyr since
+the start of the simulation, which corresponds to the formation
+time before and after the end of the second major star burst, as
+shown in Fig. 1. We refer to the metal-rich stars as population I
+and to the metal-poor as population II, following the commonly
+used nomenclature in astronomy.
+In Fig. 3 we present maps of the projected stellar mass density, line-of-sight velocity, and line-of-sight velocity dispersion
+for both populations obtained by projecting the galaxy along its
+principal axes. The orientation was determined from the inertia tensor calculated from all stars within the half-number radius
+r1/2 and therefore is the same in both panels. The two populations differ significantly in the spatial distribution and kinematics with the metal-rich (considered to be younger) population I
+being more concentrated but having lower central velocity dispersion. Both populations show a weak rotation signal at large
+distances from the center.
+The velocity anisotropy parameter β(r) = 1 − (σ
+2
+θ +
+σ
+2
+φ
+)/(2σ
+2
+r
+), where σi are velocity dispersions in spherical coordinates (Binney & Tremaine 2008), describes the orbital structure
+of galaxies. It is one of the most important dynamical properties
+of bound systems which cannot be inferred directly from observations and has to be recovered by dynamical modeling. The
+profiles of the anisotropy parameter β as well as the radial σr
+and tangential σt = [(σ
+2
+θ +σ
+2
+φ
+)/2]1/2 velocity dispersions for our
+simulated galaxy are presented in the consecutive columns of
+Fig. 4. Throughout the paper we use red, orange, and blue colors
+to indicate values calculated or recovered for all stars, population I, and population II, respectively. The two rows of the figure
+show the behavior of the parameters at different scales. The top
+row plots the profiles with the distance from the center of the
+galaxy in the logarithmic scale and shows the drop of anisotropy
+at the outer edges of the object. The bottom row uses the linear
+distance scale and focuses on the main body of the galaxy.
+Figure 5 shows the surface number density profiles of the
+stars as measured in different directions. We can see that while
+the different subsamples have quite distinguishable profiles, the
+difference between the lines of sight is small because the galaxy
+is close to spherical.
+2.3. Observables
+We generated nine sets of mock data by observing all stars and
+each population separately along the principal axes determined
+from all stars. For the observables to be used in the modeling we
+divided the stars into 20 bins spaced linearly in distance from
+the center of the galaxy up to 50 kpc, measuring the fraction
+of the total number of stars and the 2nd, 3rd, and 4th proper
+moments of the line-of-sight velocity defined in Eq. 8 and 9
+of Kowalczyk et al. (2018). The profiles of these quantities are
+shown in consecutive rows in Fig. 6. Columns correspond to different lines of sight, from the left to the right: along the major,
+intermediate, and minor axis of the galaxy. For clarity of the figure, in each panel we indicate only the error bars for one of the
+data sets. However, as the number of stars in a sample remains
+roughly constant between the lines of sight, the error bars are
+very similar among the panels in a given row.
+Although in our previous studies of the reliability of
+the Schwarzschild modeling and its applications to real data
+(Kowalczyk et al. 2017, 2018, 2019) we approximated the density profile of the tracer with the Sérsic formula, we found that it
+does not provide a good approximation of the data for the simulated galaxy considered here. We therefore fit the projected density profile with the King formula (King 1962)
+I(R) = I0
+
+
+1
+p
+1 + (R/Rc)
+2
+−
+1
+p
+1 + (Rt/Rc)
+2
+
+
+2
+, (1)
 Article number, page 4 of 12
-K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
-10-3
-10-2
-10-1
-100
- 0 10 20 30 40
-M(R)
-R [kpc]
-major
- 0 10 20 30 40
-R [kpc]
-intermediate
- 0 10 20 30 40 50
-R [kpc]
-minor
-3
-6
-9
-12
- 0 10 20 30 40
-m2(R)[10
-3(km s-1
-)
-2
-]
-R [kpc]
- 0 10 20 30 40
-R [kpc]
- 0 10 20 30 40 50
-R [kpc]
--10
--5
-0
-5
-10
- 0 10 20 30 40
-m3(R)[10
-4(km s-1
-)
-3
-]
-R [kpc]
- 0 10 20 30 40
-R [kpc]
- 0 10 20 30 40 50
-R [kpc]
-0
-1
-2
-3
-4
- 0 10 20 30 40
-m4(R)[10
-8(km s-1
-)
-4
-]
-R [kpc]
- 0 10 20 30 40
-R [kpc]
- 0 10 20 30 40 50
-R [kpc]
-all stars
-pop I
-pop II
-Fig. 6. Observables used in our Schwarzschild modeling scheme of the simulated galaxy. In rows: the fraction of the total number of stars, 2nd,
-3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, intermediate, and minor axis. In red we present
-the values obtained for all stars whereas in orange and blue those for populations I and II, respectively. For clarity of the figure, in each panel we
-indicate only the error bars for one of the data sets.
-where I0, Rc, and Rt are the model parameters. The profile can
-be analytically deprojected to obtain the 3D density
-ρ(r) =
-ρ0
-z
-2
-"
-1
-z
-arccos(z) −
-p
-1 − z
-2
-#
-, (2)
-where
-ρ0 =
-I0
-πRc[1 + (Rt/Rc)
-2
-]
-3/2
-(3)
-and
-z =
-s
-r
-2 + R
-2
-c
-R
-2
-c + R
-2
-t
-. (4)
-3. Schwarzschild modeling
-In this section we briefly present our modeling method and its
-application to the data sets derived for all stars and the two pop￾ulations of the simulated galaxy separately. In both cases our
-aim was to recover the profiles of the total mass and the velocity
-anisotropy.
-3.1. Overview of the method
-We follow the approach introduced in Kowalczyk et al. (2018),
-namely we model the total mass profile with the mass-to-light
-ratio Υ varying with radius:
-logΥ(r) =
-(
-log(Υ0) r ≤ r0
-a(log r − log r0)
-c + log(Υ0) r > r0
-(5)
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+10-3
+10-2
+10-1
+100
+ 0 10 20 30 40
+M(R)
+R [kpc]
+major
+ 0 10 20 30 40
+R [kpc]
+intermediate
+ 0 10 20 30 40 50
+R [kpc]
+minor
+3
+6
+9
+12
+ 0 10 20 30 40
+m2(R)[10
+3(km s-1
+)
+2
+]
+R [kpc]
+ 0 10 20 30 40
+R [kpc]
+ 0 10 20 30 40 50
+R [kpc]
+-10
+-5
+0
+5
+10
+ 0 10 20 30 40
+m3(R)[10
+4(km s-1
+)
+3
+]
+R [kpc]
+ 0 10 20 30 40
+R [kpc]
+ 0 10 20 30 40 50
+R [kpc]
+0
+1
+2
+3
+4
+ 0 10 20 30 40
+m4(R)[10
+8(km s-1
+)
+4
+]
+R [kpc]
+ 0 10 20 30 40
+R [kpc]
+ 0 10 20 30 40 50
+R [kpc]
+all stars
+pop I
+pop II
+Fig. 6. Observables used in our Schwarzschild modeling scheme of the simulated galaxy. In rows: the fraction of the total number of stars, 2nd,
+3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, intermediate, and minor axis. In red we present
+the values obtained for all stars whereas in orange and blue those for populations I and II, respectively. For clarity of the figure, in each panel we
+indicate only the error bars for one of the data sets.
+where I0, Rc, and Rt are the model parameters. The profile can
+be analytically deprojected to obtain the 3D density
+ρ(r) =
+ρ0
+z
+2
+"
+1
+z
+arccos(z) −
+p
+1 − z
+2
+#
+, (2)
+where
+ρ0 =
+I0
+πRc[1 + (Rt/Rc)
+2
+]
+3/2
+(3)
+and
+z =
+s
+r
+2 + R
+2
+c
+R
+2
+c + R
+2
+t
+. (4)
+3. Schwarzschild modeling
+In this section we briefly present our modeling method and its
+application to the data sets derived for all stars and the two populations of the simulated galaxy separately. In both cases our
+aim was to recover the profiles of the total mass and the velocity
+anisotropy.
+3.1. Overview of the method
+We follow the approach introduced in Kowalczyk et al. (2018),
+namely we model the total mass profile with the mass-to-light
+ratio Υ varying with radius:
+logΥ(r) =
+(
+log(Υ0) r ≤ r0
+a(log r − log r0)
+c + log(Υ0) r > r0
+(5)
 Article number, page 5 of 12
-A&A proofs: manuscript no. Populations4
- 1
- 2
- 3
- 0
- 0.5
- 1
- 1
- 2
- 3
-ALL
-Υ0
-a
-c
- 1
- 2
- 3
- 0
- 0.5
- 1
- 1
- 2
- 3
-POPULATIONS
-Υ0
-a
-c
- 10
- 100
-χ
-2
- 1
- 2
- 3
- 0
- 0.5
- 1
- 1
- 2
- 3
-POP I
-Υ0
-a
-c
- 1
- 2
- 3
- 0
- 0.5
- 1
- 1
- 2
- 3
-POP II
-Υ0
-a
-c
- 10
- 100
-χ
-2
-Fig. 7. Absolute values of χ
-2 obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom
-right) for the observations along the major axis of the simulated galaxy. The results for the modeling of two populations (top right) were obtained
-as an algebraic sum of values for populations I and II. To avoid large numbers in the figure, Υ0 was divided by the mean mass of a stellar particle.
-where r is the distance from the center of the galaxy, r0 is a
-constant, while Υ0, a, and c are the parameters of a model. We
-have assumed log r0 = 0.33 which corresponds to three softening
-scales for stellar particles in the Illustris simulation.
-We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04
-and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the require￾ment on the total density profile to be monotonically decreasing
-with radius. For each set of parameters and for each line of sight
-we generated 1200 orbits using 100 values of energy (expressed
-with the radius of a circular orbit) spaced logarithmically and
-12 values of the relative angular momentum spaced linearly. The
-outer radius of the orbit library, that is the apocenter of the most
-extended orbit, was set to rout = 165 kpc in order to cover over
-0.999 of the total stellar mass based on the fitted King profile
-parameters.
-We fit the kinematics weighted with the fraction of mass with
-the constrained least squares algorithm where different values
-of Υ0 were obtained with a simple transformation of velocities
-given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In or￾der to smooth out the numerical artifacts, the three-dimensional
-χ
-2
-spaces were then interpolated with 12-order polynomials
-(∼ a
-4
-c
-4Υ
-4
-0
-) that were further used to determine the global min￾imums (identified as the best-fitting models) and 1, 2, 3σ con￾fidence levels which for three parameters correspond to ∆χ
-2 =
-3.53, 8.02, 14.2 (Press et al. 1992).
-3.2. Application to mock data
-In the following we present the direct and inferred results of
-the Schwarzschild modeling of the data sets described in Sec￾tion 2.3.
-First, Fig. 7 shows the distribution of the absolute values of
-the χ
-2
-as a function of three parameters of the mass-to-light ra￾tio. In order to avoid unnecessary repetitions, we include only
-the plot for the mock data obtained by observing the Illustris
-galaxy along its major axis as the others are qualitatively similar.
-The four panels refer to fits for all stars (top left), the metal-rich
-population I (bottom left), the metal-poor population II (bottom
-right), and the one named "populations" (top right) which is the
-algebraic sum of values for both populations.
-As our parametrization of the mass-to-light ratio is not intu￾itive we present its profiles explicitly in the first rows of the left￾Article number, page 6 of 12
-K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
-106
-107
-108
-109
-1010
- 10 100
-ALL
-Υ(r) [M⊙/L⊙]
-r [kpc]
-major
- 10 100
-ALL
-r [kpc]
-intermediate
- 10 100
-r [kpc]
-minor
-3σ
-2σ
-1σ
-best model
-data
-104
-106
-108
- 10 100
-ALL
-νtot(r) [M⊙ kpc-3
-]
-r [kpc]
- 10 100
-ALL
-r [kpc]
- 10 100
-r [kpc]
-1010
-1011
-1012
- 10 100
-ALL
-Mtot(r) [M⊙]
-r [kpc]
- 10 100
-ALL
-r [kpc]
- 10 100
-r [kpc]
--2
--1
-0
-1
- 0 10 20 30 40
-ALL
-β(r)
-r [kpc]
- 0 10 20 30 40
-ALL
-r [kpc]
- 0 10 20 30 40 50
-ALL
-r [kpc]
-106
-107
-108
-109
-1010
- 10 100
-POPULATIONS
-Υ(r) [M⊙/L⊙]
-r [kpc]
-major
- 10 100
-POPULATIONS
-r [kpc]
-intermediate
- 10 100
-r [kpc]
-minor
-3σ
-2σ
-1σ
-best model
-data
-104
-106
-108
- 10 100
-POPULATIONS
-νtot(r) [M⊙ kpc-3
-]
-r [kpc]
- 10 100
-POPULATIONS
-r [kpc]
- 10 100
-r [kpc]
-1010
-1011
-1012
- 10 100
-POPULATIONS
-Mtot(r) [M⊙]
-r [kpc]
- 10 100
-POPULATIONS
-r [kpc]
- 10 100
-r [kpc]
--2
--1
-0
-1
- 0 10 20 30 40
-POPULATIONS
-β(r)
-r [kpc]
- 0 10 20 30 40
-POPULATIONS
-r [kpc]
- 0 10 20 30 40 50
-POPULATIONS
-r [kpc]
-Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy along the principal
-axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, interme￾diate, and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the
-1, 2, and 3σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r0 and the outer range of the
-data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations.
-and right-hand side panels of Fig. 8 for the results obtained for
-all stars and the populations, respectively. We further calculate
-the total density (second rows) and the total mass content (third
-rows). We include the obtained orbit anisotropy within the mod￾eled range in the bottom rows. The consecutive columns present
-the results for the observations along the major, intermediate,
-and minor axis. Green lines indicate values for the best-fit mod￾els whereas the colored areas of decreasing intensity correspond
-to 1, 2, and 3σ confidence regions obtained as extreme values al￾lowed by the models with χ
-2 within a given region. In each panel
-the true values from the simulation are presented with black lines
-while thin vertical lines mark the values of r0 and the outer range
-of the data sets beyond which the reliability of results drops sig￾nificantly. The true mass-to-light ratio profile was obtained by
-dividing the total mass by the fitted King profiles, therefore the
-drop at 100 kpc is the numerical artifact occurring at the very
-outskirts of the galaxy.
-Whereas in the right-hand side panels of Fig. 8 the resulting
-anisotropy is obtained from the fit of all stars and uses only the
-location of global minimum and confidence levels from two pop￾ulations (as in the top right panel of Fig. 7), in Fig. 9 we present
-another method of calculating the anisotropy. In the second and
-third row we show the derived profiles for population I and II
-separately and combine them as stellar mass weighted average
-in the top row. As in previous figures, three columns refer to the
-different lines of sight whereas the narrow fourth one shows the
-behavior of the true profiles outside the modeled range which, as
-we noticed in our previous studies, in a limited way influences
-the results. Such an impact is understandable since the stars at
-larger distances from the center are still included in the line-of￾sight measurements.
-3.3. Comparison of fitting results
-The main strength of the two populations method comes from
-tracing the underlying gravitational potential at different scales.
-As can be seen in the bottom panels of Fig. 7, population I, which
-is more concentrated, is also more sensitive to Υ0, but gives
-weaker constraints on a or c. On the other hand, population II
-attempts to reproduce the total mass content at larger distances
-as well, therefore showing stronger coupling between the param￾eters.
-The global minimums of the χ
-2 distributions for both ap￾proaches, that is modeling one and two populations, which we
-identify as the best-fitting models, closely coincide showing that
-there is no internal bias in the improved method. However, sig￾nificant differences can be observed when comparing the confi￾dence levels, mainly at 1 and 3 σ. Namely, we find that using
-two populations, the constraints we obtain on the density and
-anisotropy profile are much stronger.
-Additionally, the more accurate method allows us to study
-other effects and biases, for example the consequences of the
-nonsphericity of the modeled object. Whereas for the fit of all
-stars the true values of the density, mass, and anisotropy profiles
-are contained within 1 σ confidence regions, the results for the
-populations are more or less biased depending on the axis. They
-are well reproduced for the observation along the intermediate
-axis, for which the effects of nonsphericity seem to cancel out,
-and more biased for the remaining lines of sight. We notice a
-trend from under- to overestimation of the anisotropy when go￾ing from the major to the minor axis.
+A&A proofs: manuscript no. Populations4
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+ALL
+Υ0
+a
+c
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+POPULATIONS
+Υ0
+a
+c
+ 10
+ 100
+χ
+2
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+POP I
+Υ0
+a
+c
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+POP II
+Υ0
+a
+c
+ 10
+ 100
+χ
+2
+Fig. 7. Absolute values of χ
+2 obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom
+right) for the observations along the major axis of the simulated galaxy. The results for the modeling of two populations (top right) were obtained
+as an algebraic sum of values for populations I and II. To avoid large numbers in the figure, Υ0 was divided by the mean mass of a stellar particle.
+where r is the distance from the center of the galaxy, r0 is a
+constant, while Υ0, a, and c are the parameters of a model. We
+have assumed log r0 = 0.33 which corresponds to three softening
+scales for stellar particles in the Illustris simulation.
+We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04
+and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the requirement on the total density profile to be monotonically decreasing
+with radius. For each set of parameters and for each line of sight
+we generated 1200 orbits using 100 values of energy (expressed
+with the radius of a circular orbit) spaced logarithmically and
+12 values of the relative angular momentum spaced linearly. The
+outer radius of the orbit library, that is the apocenter of the most
+extended orbit, was set to rout = 165 kpc in order to cover over
+0.999 of the total stellar mass based on the fitted King profile
+parameters.
+We fit the kinematics weighted with the fraction of mass with
+the constrained least squares algorithm where different values
+of Υ0 were obtained with a simple transformation of velocities
+given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order to smooth out the numerical artifacts, the three-dimensional
+χ
+2
+spaces were then interpolated with 12-order polynomials
+(∼ a
+4
+c
+4Υ4
+0
+) that were further used to determine the global minimums (identified as the best-fitting models) and 1, 2, 3σ confidence levels which for three parameters correspond to ∆χ
+2 =
+3.53, 8.02, 14.2 (Press et al. 1992).
+3.2. Application to mock data
+In the following we present the direct and inferred results of
+the Schwarzschild modeling of the data sets described in Section 2.3.
+First, Fig. 7 shows the distribution of the absolute values of
+the χ
+2
+as a function of three parameters of the mass-to-light ratio. In order to avoid unnecessary repetitions, we include only
+the plot for the mock data obtained by observing the Illustris
+galaxy along its major axis as the others are qualitatively similar.
+The four panels refer to fits for all stars (top left), the metal-rich
+population I (bottom left), the metal-poor population II (bottom
+right), and the one named "populations" (top right) which is the
+algebraic sum of values for both populations.
+As our parametrization of the mass-to-light ratio is not intuitive we present its profiles explicitly in the first rows of the leftArticle number, page 6 of 12
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+106
+107
+108
+109
+1010
+ 10 100
+ALL
+Υ(r) [M⊙/L⊙]
+r [kpc]
+major
+ 10 100
+ALL
+r [kpc]
+intermediate
+ 10 100
+r [kpc]
+minor
+3σ
+2σ
+1σ
+best model
+data
+104
+106
+108
+ 10 100
+ALL
+νtot(r) [M⊙ kpc-3
+]
+r [kpc]
+ 10 100
+ALL
+r [kpc]
+ 10 100
+r [kpc]
+1010
+1011
+1012
+ 10 100
+ALL
+Mtot(r) [M⊙]
+r [kpc]
+ 10 100
+ALL
+r [kpc]
+ 10 100
+r [kpc]
+-2
+-1
+0
+1
+ 0 10 20 30 40
+ALL
+β(r)
+r [kpc]
+ 0 10 20 30 40
+ALL
+r [kpc]
+ 0 10 20 30 40 50
+ALL
+r [kpc]
+106
+107
+108
+109
+1010
+ 10 100
+POPULATIONS
+Υ(r) [M⊙/L⊙]
+r [kpc]
+major
+ 10 100
+POPULATIONS
+r [kpc]
+intermediate
+ 10 100
+r [kpc]
+minor
+3σ
+2σ
+1σ
+best model
+data
+104
+106
+108
+ 10 100
+POPULATIONS
+νtot(r) [M⊙ kpc-3
+]
+r [kpc]
+ 10 100
+POPULATIONS
+r [kpc]
+ 10 100
+r [kpc]
+1010
+1011
+1012
+ 10 100
+POPULATIONS
+Mtot(r) [M⊙]
+r [kpc]
+ 10 100
+POPULATIONS
+r [kpc]
+ 10 100
+r [kpc]
+-2
+-1
+0
+1
+ 0 10 20 30 40
+POPULATIONS
+β(r)
+r [kpc]
+ 0 10 20 30 40
+POPULATIONS
+r [kpc]
+ 0 10 20 30 40 50
+POPULATIONS
+r [kpc]
+Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy along the principal
+axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, intermediate, and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the
+1, 2, and 3σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r0 and the outer range of the
+data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations.
+and right-hand side panels of Fig. 8 for the results obtained for
+all stars and the populations, respectively. We further calculate
+the total density (second rows) and the total mass content (third
+rows). We include the obtained orbit anisotropy within the modeled range in the bottom rows. The consecutive columns present
+the results for the observations along the major, intermediate,
+and minor axis. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity correspond
+to 1, 2, and 3σ confidence regions obtained as extreme values allowed by the models with χ
+2 within a given region. In each panel
+the true values from the simulation are presented with black lines
+while thin vertical lines mark the values of r0 and the outer range
+of the data sets beyond which the reliability of results drops significantly. The true mass-to-light ratio profile was obtained by
+dividing the total mass by the fitted King profiles, therefore the
+drop at 100 kpc is the numerical artifact occurring at the very
+outskirts of the galaxy.
+Whereas in the right-hand side panels of Fig. 8 the resulting
+anisotropy is obtained from the fit of all stars and uses only the
+location of global minimum and confidence levels from two populations (as in the top right panel of Fig. 7), in Fig. 9 we present
+another method of calculating the anisotropy. In the second and
+third row we show the derived profiles for population I and II
+separately and combine them as stellar mass weighted average
+in the top row. As in previous figures, three columns refer to the
+different lines of sight whereas the narrow fourth one shows the
+behavior of the true profiles outside the modeled range which, as
+we noticed in our previous studies, in a limited way influences
+the results. Such an impact is understandable since the stars at
+larger distances from the center are still included in the line-ofsight measurements.
+3.3. Comparison of fitting results
+The main strength of the two populations method comes from
+tracing the underlying gravitational potential at different scales.
+As can be seen in the bottom panels of Fig. 7, population I, which
+is more concentrated, is also more sensitive to Υ0, but gives
+weaker constraints on a or c. On the other hand, population II
+attempts to reproduce the total mass content at larger distances
+as well, therefore showing stronger coupling between the parameters.
+The global minimums of the χ
+2 distributions for both approaches, that is modeling one and two populations, which we
+identify as the best-fitting models, closely coincide showing that
+there is no internal bias in the improved method. However, significant differences can be observed when comparing the confidence levels, mainly at 1 and 3 σ. Namely, we find that using
+two populations, the constraints we obtain on the density and
+anisotropy profile are much stronger.
+Additionally, the more accurate method allows us to study
+other effects and biases, for example the consequences of the
+nonsphericity of the modeled object. Whereas for the fit of all
+stars the true values of the density, mass, and anisotropy profiles
+are contained within 1 σ confidence regions, the results for the
+populations are more or less biased depending on the axis. They
+are well reproduced for the observation along the intermediate
+axis, for which the effects of nonsphericity seem to cancel out,
+and more biased for the remaining lines of sight. We notice a
+trend from under- to overestimation of the anisotropy when going from the major to the minor axis.
 Article number, page 7 of 12
-A&A proofs: manuscript no. Populations4
--1
-0
-1
- 0 10 20 30 40
-POP I + POP II
-β(r)
-r [kpc]
-major
- 0 10 20 30 40
-r [kpc]
-intermediate
- 0 10 20 30 40
-r [kpc]
-minor
- 50 60 70 80
--1
-0
-1
- 0 10 20 30 40
-POP I
-β(r)
-r [kpc]
- 0 10 20 30 40
-r [kpc]
- 0 10 20 30 40
-r [kpc]
- 50 60 70 80
--1
-0
-1
- 0 10 20 30 40
-POP II
-β(r)
-r [kpc]
- 0 10 20 30 40
-r [kpc]
- 0 10 20 30 40
-r [kpc]
- 50 60 70 80
-data
-best model
-1σ
-2σ
-3σ
-Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulated galaxy. In rows:
-results for all stars (calculated as the superposition of two populations), population I, and population II. Colors follow the convention used in
-previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines)
-outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1,
-2, and 3σ confidence regions.
-4. Modeling Fornax dSph
-In this section we present the application of our Schwarzschild
-modeling scheme to the observational data for the Fornax dSph
-galaxy obtained by del Pino et al. (2015) and del Pino et al.
-(2017). This study is a follow-up of the work of Kowalczyk et al.
-(2019) and can be directly compared to the results presented
-there. Moreover, we refer the reader to these previous publica￾tions for details on the origin of data and our procedures used
-for cleaning the spectroscopic sample.
-Similarly to the approach introduced in Section 2.2, we di￾vided all available stars into two equal-size populations based on
-their metallicity and then cross-correlated the samples with the
-data used in Kowalczyk et al. (2019). The metallicity histogram
-of the final spectroscopic sample is shown in Fig. 10. Addition￾ally, we color-coded each bin with the population it has been
-assigned to, namely orange or blue for population I or II. Inter￾estingly, the case of Fornax is similar to our simulated galaxy
-as the split at [Fe/H]= −1 also captures an important feature
-of the object’s star formation history, separating stars into sub￾samples older and younger than 6 Gyr, as shown in Fig. 12 of
-del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The
-numbers of stars contained in the samples of all stars, popula￾tion I, and population II are given in Table 2, where the indices
-"phot" and "spec" refer to the photometric and kinematic sam￾ples. The sum of stars in the populations is lower than in the
-sample of all stars since only stars with reliable measurements
-of metallicity could be included.
-N
-[Fe/H]
-pop I
-pop II
- 0
- 20
- 40
- 60
- 80
- 100
--2.5 -2 -1.5 -1 -0.5 0
-Fig. 10. Metallicity histogram of the final spectroscopic sample used in
-the modeling of two stellar populations in the Fornax dSph. Each bin is
-color-coded according to the population it has been assigned to, orange
-or blue for population I and II, respectively.
-As we have shown in our earlier work, the light profile of the
-Fornax dSph can be well reproduced with the three-parameter
+A&A proofs: manuscript no. Populations4
+-1
+0
+1
+ 0 10 20 30 40
+POP I + POP II
+β(r)
+r [kpc]
+major
+ 0 10 20 30 40
+r [kpc]
+intermediate
+ 0 10 20 30 40
+r [kpc]
+minor
+ 50 60 70 80
+-1
+0
+1
+ 0 10 20 30 40
+POP I
+β(r)
+r [kpc]
+ 0 10 20 30 40
+r [kpc]
+ 0 10 20 30 40
+r [kpc]
+ 50 60 70 80
+-1
+0
+1
+ 0 10 20 30 40
+POP II
+β(r)
+r [kpc]
+ 0 10 20 30 40
+r [kpc]
+ 0 10 20 30 40
+r [kpc]
+ 50 60 70 80
+data
+best model
+1σ
+2σ
+3σ
+Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulated galaxy. In rows:
+results for all stars (calculated as the superposition of two populations), population I, and population II. Colors follow the convention used in
+previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines)
+outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1,
+2, and 3σ confidence regions.
+4. Modeling Fornax dSph
+In this section we present the application of our Schwarzschild
+modeling scheme to the observational data for the Fornax dSph
+galaxy obtained by del Pino et al. (2015) and del Pino et al.
+(2017). This study is a follow-up of the work of Kowalczyk et al.
+(2019) and can be directly compared to the results presented
+there. Moreover, we refer the reader to these previous publications for details on the origin of data and our procedures used
+for cleaning the spectroscopic sample.
+Similarly to the approach introduced in Section 2.2, we divided all available stars into two equal-size populations based on
+their metallicity and then cross-correlated the samples with the
+data used in Kowalczyk et al. (2019). The metallicity histogram
+of the final spectroscopic sample is shown in Fig. 10. Additionally, we color-coded each bin with the population it has been
+assigned to, namely orange or blue for population I or II. Interestingly, the case of Fornax is similar to our simulated galaxy
+as the split at [Fe/H]= −1 also captures an important feature
+of the object’s star formation history, separating stars into subsamples older and younger than 6 Gyr, as shown in Fig. 12 of
+del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The
+numbers of stars contained in the samples of all stars, population I, and population II are given in Table 2, where the indices
+"phot" and "spec" refer to the photometric and kinematic samples. The sum of stars in the populations is lower than in the
+sample of all stars since only stars with reliable measurements
+of metallicity could be included.
+N
+[Fe/H]
+pop I
+pop II
+ 0
+ 20
+ 40
+ 60
+ 80
+ 100
+-2.5 -2 -1.5 -1 -0.5 0
+Fig. 10. Metallicity histogram of the final spectroscopic sample used in
+the modeling of two stellar populations in the Fornax dSph. Each bin is
+color-coded according to the population it has been assigned to, orange
+or blue for population I and II, respectively.
+As we have shown in our earlier work, the light profile of the
+Fornax dSph can be well reproduced with the three-parameter
 Article number, page 8 of 12
-K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
-Table 2. Properties of the data samples for the Fornax dSph.
-Property ALL POP I POP II
-Number of stars (Nphot) 65 797 14 882 49 205
-Number of stars (Nspec) 3286 1136 1151
-Stars within 1.8 kpc 3268 1134 1130
-Fitted normalization (N0) [×104
-] 6.95 1.81 5.45
-Sérsic radius (RS) [kpc] 0.454 0.429 0.420
-Sérsic parameter (m) 0.808 0.807 0.898
-102
-103
-104
-105
- 0.1 0.2 0.5 1 2
-n
-⋆(R) [kpc-2
-]
-R [kpc]
-all stars
-popI
-popII
-Fig. 11. Surface number density profiles of the photometric data sam￾ples for the Fornax dSph: all available stars (in red), the metal-rich pop￾ulation I (in orange), and the metal-poor population II (in blue). Thin
-vertical lines indicate r0 (see text) and the outer boundary of the spec￾troscopic data.
-Sérsic formula (Sérsic 1968). The profiles of number density for
-all stars and both populations together with the best-fitting Sérsic
-profiles are presented in Fig. 11. The colors follow the conven￾tion introduced in previous sections. Thin vertical lines indicate
-the innermost data point for the light profile for all stars and
-the outer boundary of the kinematic sample. The former, set at
-log r = −0.16, is also used as the minimum of the mass-to-light
-ratio profile (r0 in Eq. 5). The fitted parameters of the profiles,
-that is the normalization N0, the Sérsic radius RS, and the Sérsic
-parameter m, are included in the second part of Table 2.
-Figure 12 presents the profiles of the observables used in the
-Schwarzschild modeling: the fraction of stars and the 2nd, 3rd,
-and 4th velocity moments (top to bottom) for the three data sam￾ples: all stars, population I, and population II (in red, orange, and
-blue, respectively). The error bars indicate 1 σ sampling errors.
-The parameter space for Υ(r) has been probed as follows:
-a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a
-step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the
-parameter c was fixed at c = 3 and now we fit it as a free pa￾rameter. As for the mock data in Section 3.2, different values of
-Υ0 were obtained with the transformation of velocity moments
-within the χ
-2 fitting routine. The values of ∆χ
-2
-for all stars and
-the populations are shown in the two panels of Fig. 13 (left and
-right-hand side, respectively). Due to the dense coverage of the
-grid, we decided to include only the values within 3σ from the
-fitted minimums (see Section 3.1).
-The profiles of the mass-to-light ratio, total density, total
-mass, and velocity anisotropy resulting from the χ
-2 distributions
-are presented in the consecutive rows of Fig. 14. The anisotropy
-profile for the populations is based on the fit of all stars but using
- 0
- 0.05
- 0.1
- 0.15
- 0.2
- 0.25
- 0 0.4 0.8 1.2 1.6
-M(R)
-R [kpc]
-all stars
-pop I
-pop II
-0
-40
-80
-120
-160
-200
- 0 0.4 0.8 1.2 1.6
-m2(R)[(km s-1
-)
-2
-]
-R [kpc]
--16
--8
-0
-8
-16
- 0 0.4 0.8 1.2 1.6
-m3(R)[10
-2(km s-1
-)
-3
-]
-R [kpc]
-0
-4
-8
-12
-16
- 0 0.4 0.8 1.2 1.6
-m4(R)[10
-4(km s-1
-)
-4
-]
-R [kpc]
-Fig. 12. Observables of the Fornax dSph used in our Schwarzschild
-modeling scheme. In rows: the fraction of the total number of stars, the
-2nd, 3rd, and 4th velocity moment. In red we present the values obtained
-for all stars whereas in orange and blue those for populations I and II,
-respectively.
-the confidence levels on Υ from the fit of two populations. Green
-lines indicate the values for the best-fitting models whereas the
-colored areas of decreasing intensity show the 1, 2, and 3 σ con￾fidence regions. Additionally, with black dashed lines we include
-the results from Kowalczyk et al. (2019) for comparison.
-As a result of freeing the steepness of the mass-to-light
-ratio profile (parameter c) with respect to the previous study
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+Table 2. Properties of the data samples for the Fornax dSph.
+Property ALL POP I POP II
+Number of stars (Nphot) 65 797 14 882 49 205
+Number of stars (Nspec) 3286 1136 1151
+Stars within 1.8 kpc 3268 1134 1130
+Fitted normalization (N0) [×104] 6.95 1.81 5.45
+Sérsic radius (RS) [kpc] 0.454 0.429 0.420
+Sérsic parameter (m) 0.808 0.807 0.898
+102
+103
+104
+105
+ 0.1 0.2 0.5 1 2
+n
+⋆(R) [kpc-2
+]
+R [kpc]
+all stars
+popI
+popII
+Fig. 11. Surface number density profiles of the photometric data samples for the Fornax dSph: all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in blue). Thin
+vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data.
+Sérsic formula (Sérsic 1968). The profiles of number density for
+all stars and both populations together with the best-fitting Sérsic
+profiles are presented in Fig. 11. The colors follow the convention introduced in previous sections. Thin vertical lines indicate
+the innermost data point for the light profile for all stars and
+the outer boundary of the kinematic sample. The former, set at
+log r = −0.16, is also used as the minimum of the mass-to-light
+ratio profile (r0 in Eq. 5). The fitted parameters of the profiles,
+that is the normalization N0, the Sérsic radius RS, and the Sérsic
+parameter m, are included in the second part of Table 2.
+Figure 12 presents the profiles of the observables used in the
+Schwarzschild modeling: the fraction of stars and the 2nd, 3rd,
+and 4th velocity moments (top to bottom) for the three data samples: all stars, population I, and population II (in red, orange, and
+blue, respectively). The error bars indicate 1 σ sampling errors.
+The parameter space for Υ(r) has been probed as follows:
+a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a
+step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the
+parameter c was fixed at c = 3 and now we fit it as a free parameter. As for the mock data in Section 3.2, different values of
+Υ0 were obtained with the transformation of velocity moments
+within the χ
+2 fitting routine. The values of ∆χ2
+for all stars and
+the populations are shown in the two panels of Fig. 13 (left and
+right-hand side, respectively). Due to the dense coverage of the
+grid, we decided to include only the values within 3σ from the
+fitted minimums (see Section 3.1).
+The profiles of the mass-to-light ratio, total density, total
+mass, and velocity anisotropy resulting from the χ
+2 distributions
+are presented in the consecutive rows of Fig. 14. The anisotropy
+profile for the populations is based on the fit of all stars but using
+ 0
+ 0.05
+ 0.1
+ 0.15
+ 0.2
+ 0.25
+ 0 0.4 0.8 1.2 1.6
+M(R)
+R [kpc]
+all stars
+pop I
+pop II
+0
+40
+80
+120
+160
+200
+ 0 0.4 0.8 1.2 1.6
+m2(R)[(km s-1
+)
+2
+]
+R [kpc]
+-16
+-8
+0
+8
+16
+ 0 0.4 0.8 1.2 1.6
+m3(R)[10
+2(km s-1
+)
+3
+]
+R [kpc]
+0
+4
+8
+12
+16
+ 0 0.4 0.8 1.2 1.6
+m4(R)[10
+4(km s-1
+)
+4
+]
+R [kpc]
+Fig. 12. Observables of the Fornax dSph used in our Schwarzschild
+modeling scheme. In rows: the fraction of the total number of stars, the
+2nd, 3rd, and 4th velocity moment. In red we present the values obtained
+for all stars whereas in orange and blue those for populations I and II,
+respectively.
+the confidence levels on Υ from the fit of two populations. Green
+lines indicate the values for the best-fitting models whereas the
+colored areas of decreasing intensity show the 1, 2, and 3 σ confidence regions. Additionally, with black dashed lines we include
+the results from Kowalczyk et al. (2019) for comparison.
+As a result of freeing the steepness of the mass-to-light
+ratio profile (parameter c) with respect to the previous study
 Article number, page 9 of 12
-A&A proofs: manuscript no. Populations4
- 0
- 0.5
- 1
- 1.5
- 0
- 0.5
- 1
- 1.5
- 2
- 3
- 4
- 5
- 6
-ALL
-Υ0
-a
-c
- 0
- 0.5
- 1
- 1.5
- 0
- 0.5
- 1
- 1.5
- 2
- 3
- 4
- 5
- 6
-POPULATIONS
-Υ0
-a
-c
- 0
- 3
- 6
- 9
- 12
-χ
-2
--χ
-2
-min
-Fig. 13. Values of χ
-2
-relative to the fitted minimum within the range of 3σ confidence level for all stars (left panel) and for the populations (right
-panel) for the Fornax dSph.
-(Kowalczyk et al. 2019), we obtained higher estimates of the en￾closed total mass at larger radii. In particular, for the mass en￾closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48
-−1.56 × 108
-M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87
-−1.13 ×
-108 M⊙ from the fit of populations, while previously we had
-Mold(< 1.8 kpc) = 3.7
-+1.4
-−1.3
-× 108 M⊙.
-Interestingly, despite the significant shift of the position of
-χ
-2
-min (to c = 4.2 for all stars and 3.6 for populations), the ob￾tained profile of the anisotropy parameter remains decreasing or
-flat for all stars but changes to increasing from 0 to 0.5 for the
-populations. Nevertheless, even in the latter case the previous
-result agrees with the new finding within 1σ.
-The detailed analysis of the anisotropy is shown in Fig. 15
-where the middle and bottom panels present the profiles ob￾tained for each population separately. We notice that the profile
-for population I is decreasing or has a local minimum whereas
-for population II is increasing (from −0.25 to 0.5 for the best￾fitting model). Since population I is more concentrated, the last
-bins contain very few stars, which limits their credibility. The
-top panel of Fig. 15 presents the anisotropy of all stars calcu￾lated as a weighted superposition of two populations. With such
-approach we still obtain the increasing profile (from 0 to 0.5) but
-the previous result agrees with it only within 2σ.
-Since Fornax dSph is significantly elongated with the pro￾jected ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou
-1995), we anticipate some bias in the obtained results caused
-by the spherically symmetric modeling. Kowalczyk et al. (2018)
-studied such bias in an axisymmetric simulated object qualita￾tively similar to Fornax and identified differences in the system￾atic errors depending on whether the galaxy was observed along
-its major or minor axis. Assuming that Fornax is observed along
-the line of sight in between these extremes, we expect the total
-mass profile to be slightly overestimated and the anisotropy to be
-underestimated, further strengthening the likelihood of the real
-anisotropy to be radial and its profile to be growing with radius
-with respect to the results of Kowalczyk et al. (2019).
-Both constant (like for our population I) and growing (pop￾ulation II) anisotropy profiles can arise from biased modeling
-of the real growing profile by observing an object along the
-minor and major axis, respectively. However, for the bias to
-occur in two populations presented here, their inner orienta￾tions would need to be opposite. Since such morphological fea￾tures are not supported by the photometric studies of Fornax
-(del Pino et al. 2015; Wang et al. 2019) which rather find a good
-spatial alignment between the stellar populations, we conclude
-that the anisotropy profiles of the two populations modeled in
-this work are indeed significantly distinct.
-Finally, it is worth noticing that the so-called mass-follows￾light model, that is the one following from the assumption that
-the total density traces the stellar distribution, is no longer sup￾ported by the fit of the populations. With our parametrization,
-the mass-follows-light model corresponds to a = 0 and whereas
-it is enclosed within 3σ for the fit of all stars, as was the case
-in Kowalczyk et al. (2019), the allowed values for the improved
-method are much larger, as demonstrated by the right panel of
-Fig. 13.
-5. Summary and discussion
-Building on the previously created implementation of the
-Schwarzschild orbit superposition method focused on modeling
-dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018,
-2019), we improved our tool by introducing multiple stellar pop￾ulations. Such an improvement is desirable and justified since
-many of the dwarfs show signs of multiple star formation bursts
-or extended star formation episodes. As the different populations
-trace the common underlying gravitational potential, one may
-expect a significant improvement in the estimates of not only the
-total mass content but also the orbit anisotropy since this robust
-modeling technique reproduces the anisotropy as a by-product
-of the modeling rather than taking it as an assumption.
-We have tested our hypothesis by modeling mock data gener￾ated from a galaxy formed in the Illustris simulation. Due to the
-limitations of the resolution, we chose a galaxy of mass a few or￾ders of magnitude larger than the estimated masses of classical
-dwarfs. Still, the galaxy possessed appropriate qualitative char￾acteristics, such as the lack of gas and an almost spherical shape,
+A&A proofs: manuscript no. Populations4
+ 0
+ 0.5
+ 1
+ 1.5
+ 0
+ 0.5
+ 1
+ 1.5
+ 2
+ 3
+ 4
+ 5
+ 6
+ALL
+Υ0
+a
+c
+ 0
+ 0.5
+ 1
+ 1.5
+ 0
+ 0.5
+ 1
+ 1.5
+ 2
+ 3
+ 4
+ 5
+ 6
+POPULATIONS
+Υ0
+a
+c
+ 0
+ 3
+ 6
+ 9
+ 12
+χ
+2
+-χ
+2
+min
+Fig. 13. Values of χ
+2
+relative to the fitted minimum within the range of 3σ confidence level for all stars (left panel) and for the populations (right
+panel) for the Fornax dSph.
+(Kowalczyk et al. 2019), we obtained higher estimates of the enclosed total mass at larger radii. In particular, for the mass enclosed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48
+−1.56 × 108
+M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87
+−1.13 ×
+108 M⊙ from the fit of populations, while previously we had
+Mold(< 1.8 kpc) = 3.7
++1.4
+−1.3
+× 108 M⊙.
+Interestingly, despite the significant shift of the position of
+χ
+2
+min (to c = 4.2 for all stars and 3.6 for populations), the obtained profile of the anisotropy parameter remains decreasing or
+flat for all stars but changes to increasing from 0 to 0.5 for the
+populations. Nevertheless, even in the latter case the previous
+result agrees with the new finding within 1σ.
+The detailed analysis of the anisotropy is shown in Fig. 15
+where the middle and bottom panels present the profiles obtained for each population separately. We notice that the profile
+for population I is decreasing or has a local minimum whereas
+for population II is increasing (from −0.25 to 0.5 for the bestfitting model). Since population I is more concentrated, the last
+bins contain very few stars, which limits their credibility. The
+top panel of Fig. 15 presents the anisotropy of all stars calculated as a weighted superposition of two populations. With such
+approach we still obtain the increasing profile (from 0 to 0.5) but
+the previous result agrees with it only within 2σ.
+Since Fornax dSph is significantly elongated with the projected ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou
+1995), we anticipate some bias in the obtained results caused
+by the spherically symmetric modeling. Kowalczyk et al. (2018)
+studied such bias in an axisymmetric simulated object qualitatively similar to Fornax and identified differences in the systematic errors depending on whether the galaxy was observed along
+its major or minor axis. Assuming that Fornax is observed along
+the line of sight in between these extremes, we expect the total
+mass profile to be slightly overestimated and the anisotropy to be
+underestimated, further strengthening the likelihood of the real
+anisotropy to be radial and its profile to be growing with radius
+with respect to the results of Kowalczyk et al. (2019).
+Both constant (like for our population I) and growing (population II) anisotropy profiles can arise from biased modeling
+of the real growing profile by observing an object along the
+minor and major axis, respectively. However, for the bias to
+occur in two populations presented here, their inner orientations would need to be opposite. Since such morphological features are not supported by the photometric studies of Fornax
+(del Pino et al. 2015; Wang et al. 2019) which rather find a good
+spatial alignment between the stellar populations, we conclude
+that the anisotropy profiles of the two populations modeled in
+this work are indeed significantly distinct.
+Finally, it is worth noticing that the so-called mass-followslight model, that is the one following from the assumption that
+the total density traces the stellar distribution, is no longer supported by the fit of the populations. With our parametrization,
+the mass-follows-light model corresponds to a = 0 and whereas
+it is enclosed within 3σ for the fit of all stars, as was the case
+in Kowalczyk et al. (2019), the allowed values for the improved
+method are much larger, as demonstrated by the right panel of
+Fig. 13.
+5. Summary and discussion
+Building on the previously created implementation of the
+Schwarzschild orbit superposition method focused on modeling
+dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018,
+2019), we improved our tool by introducing multiple stellar populations. Such an improvement is desirable and justified since
+many of the dwarfs show signs of multiple star formation bursts
+or extended star formation episodes. As the different populations
+trace the common underlying gravitational potential, one may
+expect a significant improvement in the estimates of not only the
+total mass content but also the orbit anisotropy since this robust
+modeling technique reproduces the anisotropy as a by-product
+of the modeling rather than taking it as an assumption.
+We have tested our hypothesis by modeling mock data generated from a galaxy formed in the Illustris simulation. Due to the
+limitations of the resolution, we chose a galaxy of mass a few orders of magnitude larger than the estimated masses of classical
+dwarfs. Still, the galaxy possessed appropriate qualitative characteristics, such as the lack of gas and an almost spherical shape,
 Article number, page 10 of 12
-K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
-101
-103
-105
- 0.1 1
-Υ(r) [M⊙/L⊙]
-r [kpc]
-ALL
- 0.1 1
-r [kpc]
-POPULATIONS
-3σ
-2σ
-1σ
-best model
-K19
-104
-106
-108
- 0.1 1
-νtot(r) [M⊙ kpc-3
-]
-r [kpc]
- 0.1 1
-r [kpc]
-105
-107
-109
- 0.1 1
-Mtot(r) [M⊙]
-r [kpc]
- 0.1 1
-r [kpc]
--3
--2
--1
-0
-1
- 0 0.4 0.8 1.2 1.6
-β(r)
-r [kpc]
- 0 0.4 0.8 1.2 1.6
-r [kpc]
-Fig. 14. Results of Schwarzschild modeling of the Fornax dSph.
-In rows: derived mass-to-light ratio, total density, total mass, and
-anisotropy parameter. In columns: results for all stars and the popula￾tions, respectively. Green lines indicate the values for the best-fit models
-whereas the colored areas of decreasing intensity show the 1, 2, and 3σ
-confidence regions. The best-fitting values obtained by Kowalczyk et al.
-(2019) are shown with black dashed lines.
-that made it a good test bed for modeling techniques applica￾ble to dSph galaxies. We applied our approach to all data and
-to two stellar populations separately, comparing the accuracy of
-the obtained results. Although the addition of the second tracer
-seemingly increases the number of constraints twice, the incre￾ment is somewhat compromised by the sampling errors since the
-number of stars in each sample is then reduced. Still, we found
-strong improvements in the accuracy of the method when us￾ing two populations. The results of the modeling show that the
-density and velocity anisotropy profiles are more strongly con￾strained, most importantly at the 3 σ level, that is the range of
-allowed values is much narrower.
-Similarly to the conclusions of Kowalczyk et al. (2018) who
-explored the effects of nonsphericity using large and small
-data samples, the comparison of results presented in the left￾and right-hand side panels of Fig. 8 suggests that the improved
-method using two stellar populations gives more precise but less
-accurate outcome. However, in both studies the apparent dete￾rioration of the reliability is a consequence of modeling of a
-nonspherical object. In both cases, a simpler approach (much
-smaller data samples or using one stellar population) resulted
--2
--1
-0
-1
- 0 0.4 0.8 1.2 1.6
-POP I + POP II
-β(r)
-r [kpc]
--2
--1
-0
-1
- 0 0.4 0.8 1.2 1.6
-POP I
-β(r)
-r [kpc]
--2
--1
-0
-1
- 0 0.4 0.8 1.2 1.6
-POP II
-β(r)
-r [kpc]
-best model
-1σ
-2σ
-3σ
-K19
-Fig. 15. Profiles of the anisotropy parameter obtained with the
-Schwarzschild modeling of two stellar populations for the Fornax dSph.
-In rows: results for all stars (calculated as the superposition of two pop￾ulations), population I, and population II. Color lines indicate values
-for the best-fit models whereas the colored areas of decreasing intensity
-show the 1, 2, and 3σ confidence regions. The dashed black line shows
-the result from Kowalczyk et al. (2019) for comparison.
-in larger final uncertainties, usually containing the true values
-within 1 σ confidence region. On the other hand, the improved
-methods exhibit substantially reduced uncertainties, highlighting
-the underlying bias.
-Our method parametrizes the total mass content with the
-mass-to-light ratio varying with radius as a power-law in the log￾log scale. We made two main changes with respect to our previ￾ous work: we added a third parameter c controlling the steepness
-of the mass-to-light ratio profile (previously fixed at the value of
-3) and allowed for different stellar density profiles (previously
-only Sérsic, now also King). These changes are of course cou￾pled since different density profiles require different exponents to
-reproduce the same mass profile. It is visible also in our results
-since the King profile applied in the simulated galaxy gave us
-values of c lower than 3. Nevertheless, we decided to use differ￾ent density profiles to make our method more general and appli￾cable to objects, such as our Illustris galaxy, for which the Sérsic
-formula does not provide a good approximation of the density
-distribution.
-Finally, we applied the improved method to the data for the
-Fornax dSph galaxy. Due to the addition of another free param￾eter in our functional form for the mass-to-light ratio, our re￾sults for modeling all stars are slightly different from the ones
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+101
+103
+105
+ 0.1 1
+Υ(r) [M⊙/L⊙]
+r [kpc]
+ALL
+ 0.1 1
+r [kpc]
+POPULATIONS
+3σ
+2σ
+1σ
+best model
+K19
+104
+106
+108
+ 0.1 1
+νtot(r) [M⊙ kpc-3
+]
+r [kpc]
+ 0.1 1
+r [kpc]
+105
+107
+109
+ 0.1 1
+Mtot(r) [M⊙]
+r [kpc]
+ 0.1 1
+r [kpc]
+-3
+-2
+-1
+0
+1
+ 0 0.4 0.8 1.2 1.6
+β(r)
+r [kpc]
+ 0 0.4 0.8 1.2 1.6
+r [kpc]
+Fig. 14. Results of Schwarzschild modeling of the Fornax dSph.
+In rows: derived mass-to-light ratio, total density, total mass, and
+anisotropy parameter. In columns: results for all stars and the populations, respectively. Green lines indicate the values for the best-fit models
+whereas the colored areas of decreasing intensity show the 1, 2, and 3σ
+confidence regions. The best-fitting values obtained by Kowalczyk et al.
+(2019) are shown with black dashed lines.
+that made it a good test bed for modeling techniques applicable to dSph galaxies. We applied our approach to all data and
+to two stellar populations separately, comparing the accuracy of
+the obtained results. Although the addition of the second tracer
+seemingly increases the number of constraints twice, the increment is somewhat compromised by the sampling errors since the
+number of stars in each sample is then reduced. Still, we found
+strong improvements in the accuracy of the method when using two populations. The results of the modeling show that the
+density and velocity anisotropy profiles are more strongly constrained, most importantly at the 3 σ level, that is the range of
+allowed values is much narrower.
+Similarly to the conclusions of Kowalczyk et al. (2018) who
+explored the effects of nonsphericity using large and small
+data samples, the comparison of results presented in the leftand right-hand side panels of Fig. 8 suggests that the improved
+method using two stellar populations gives more precise but less
+accurate outcome. However, in both studies the apparent deterioration of the reliability is a consequence of modeling of a
+nonspherical object. In both cases, a simpler approach (much
+smaller data samples or using one stellar population) resulted
+-2
+-1
+0
+1
+ 0 0.4 0.8 1.2 1.6
+POP I + POP II
+β(r)
+r [kpc]
+-2
+-1
+0
+1
+ 0 0.4 0.8 1.2 1.6
+POP I
+β(r)
+r [kpc]
+-2
+-1
+0
+1
+ 0 0.4 0.8 1.2 1.6
+POP II
+β(r)
+r [kpc]
+best model
+1σ
+2σ
+3σ
+K19
+Fig. 15. Profiles of the anisotropy parameter obtained with the
+Schwarzschild modeling of two stellar populations for the Fornax dSph.
+In rows: results for all stars (calculated as the superposition of two populations), population I, and population II. Color lines indicate values
+for the best-fit models whereas the colored areas of decreasing intensity
+show the 1, 2, and 3σ confidence regions. The dashed black line shows
+the result from Kowalczyk et al. (2019) for comparison.
+in larger final uncertainties, usually containing the true values
+within 1 σ confidence region. On the other hand, the improved
+methods exhibit substantially reduced uncertainties, highlighting
+the underlying bias.
+Our method parametrizes the total mass content with the
+mass-to-light ratio varying with radius as a power-law in the loglog scale. We made two main changes with respect to our previous work: we added a third parameter c controlling the steepness
+of the mass-to-light ratio profile (previously fixed at the value of
+3) and allowed for different stellar density profiles (previously
+only Sérsic, now also King). These changes are of course coupled since different density profiles require different exponents to
+reproduce the same mass profile. It is visible also in our results
+since the King profile applied in the simulated galaxy gave us
+values of c lower than 3. Nevertheless, we decided to use different density profiles to make our method more general and applicable to objects, such as our Illustris galaxy, for which the Sérsic
+formula does not provide a good approximation of the density
+distribution.
+Finally, we applied the improved method to the data for the
+Fornax dSph galaxy. Due to the addition of another free parameter in our functional form for the mass-to-light ratio, our results for modeling all stars are slightly different from the ones
 Article number, page 11 of 12
-A&A proofs: manuscript no. Populations4
-obtained in Kowalczyk et al. (2019). However, in terms of the
-total density and mass distribution the estimates obtained here
-agree very well with those earlier results in the range covered
-by the data. Therefore, the detailed comparison with other esti￾mates from the literature presented in Kowalczyk et al. (2019) is
-still valid and we do not repeat it here.
-A more significant difference with respect to these previous
-estimates is seen in the results of modeling two populations in
-Fornax. In this case we find the anisotropy to be slightly increas￾ing rather than decreasing with radius and, most importantly, the
-confidence regions for this parameter, as well as for the den￾sity, are much narrower. We were thus able to obtain tighter con￾straints on the properties of Fornax, which means that the im￾proved method is successful. For the first time, we were also able
-to deduce the velocity anisotropy profiles for each of the popula￾tions separately. We found that the more concentrated, metal-rich
-population I has a decreasing anisotropy profile while the more
-extended, metal-poor population II has the anisotropy increasing
-with radius. This finding may partially explain the large spread
-of the anisotropy values obtained in the literature and summa￾rized in Table 2 and 3 of Kowalczyk et al. (2019), which were
-often based on modeling subsamples of our spectroscopic data
-set.
-For both studied objects we split the stars into two popula￾tions by dividing them in half based on their metallicity, Z (in
-solar units), for the Illustris galaxy and [Fe/H] for Fornax. Such
-a method is approximate but justified. Both galaxies have com￾plex star formation history with multiple star formation bursts, as
-demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al.
-(2013), producing multiple stellar populations which cannot be
-easily tracked as the metallicity is a good but not perfect proxy
-for the stellar age. Moreover, the metallicity histograms for both
-objects are approximately unimodal not allowing for a conve￾nient separation. More refined methods of division have been
-suggested in the literature, for example in the form of the likeli￾hood function based on the position, velocity, and metallicity in￾dex (Walker & Peñarrubia 2011). However, the likelihood func￾tion requires many assumptions which introduce additional un￾certainties into the treatment of the data. On the other hand, our
-approach ensures the maximization of each sample (and there￾fore minimization of sampling errors) while capturing the im￾portant features of the star formation history.
-Further improvements to the Schwarzschild modeling
-method are certainly possible. One way to proceed would be to
-include the modeling of the proper motions of the stars. For now,
-measurements of transverse velocities are available only for the
-brightest stars in dSph galaxies, but even small samples of this
-type could provide further constraints on the models, as demon￾strated by Strigari et al. (2007) and Massari et al. (2020).
-Acknowledgements. We are grateful to Andrés del Pino for providing the data for
-the Fornax dSph and to the Illustris team for making their simulations publicly
-available. Useful comments from the anonymous referee are kindly appreciated.
-This research was supported by the Polish National Science Center under grant
-2018/28/C/ST9/00529.
-References
-Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184
-Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13
-Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 327, L15
-Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton Uni￾versity Press, Princeton)
-Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35
-Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia,
-G. 2013, MNRAS, 433, 3173
-del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS, 433, 1505
-del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996
-del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017, MNRAS, 465,
-3708
-Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126
-Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92
-Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L40
-Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRAS, 445, 175
-Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, MNRAS, 474, 1398
-Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 481, 250
-Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354
-Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89
-Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Williams, M. J. 2013,
-ApJ, 763, 91
-King, I. 1962, AJ, 67, 471
-Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431,
-2796
-Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3959
-Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2918
-Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482,
-5241
-Łokas, E. L., 2002, MNRAS, 333, 697
-Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918
-Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36
-Mateo, M. 1998, ARA&A, 36, 435
-Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13,
-12
-Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022
-Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numer￾ical Recipes in C, 2nd edn. (Cambridge University Press, Cambridge)
-Schwarzschild, M. 1979, ApJ, 232, 236
-Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cor￾doba, Argentina)
-Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 657, L1
-Tolstoy, E., Hill, V., & Tosi, M. 2009, ARA&A, 47, 371
-Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66
-van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493,
-613
-Vogelsberger, M., Genel, S., Springel, V., et al. 2014a, Nature, 509, 177
-Vogelsberger, M., Genel, S., Springel, V., et al. 2014b, MNRAS, 444, 1518
-Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20
-Wang, M. Y., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118
-Article number, page 12 of 12
+A&A proofs: manuscript no. Populations4
+obtained in Kowalczyk et al. (2019). However, in terms of the
+total density and mass distribution the estimates obtained here
+agree very well with those earlier results in the range covered
+by the data. Therefore, the detailed comparison with other estimates from the literature presented in Kowalczyk et al. (2019) is
+still valid and we do not repeat it here.
+A more significant difference with respect to these previous
+estimates is seen in the results of modeling two populations in
+Fornax. In this case we find the anisotropy to be slightly increasing rather than decreasing with radius and, most importantly, the
+confidence regions for this parameter, as well as for the density, are much narrower. We were thus able to obtain tighter constraints on the properties of Fornax, which means that the improved method is successful. For the first time, we were also able
+to deduce the velocity anisotropy profiles for each of the populations separately. We found that the more concentrated, metal-rich
+population I has a decreasing anisotropy profile while the more
+extended, metal-poor population II has the anisotropy increasing
+with radius. This finding may partially explain the large spread
+of the anisotropy values obtained in the literature and summarized in Table 2 and 3 of Kowalczyk et al. (2019), which were
+often based on modeling subsamples of our spectroscopic data
+set.
+For both studied objects we split the stars into two populations by dividing them in half based on their metallicity, Z (in
+solar units), for the Illustris galaxy and [Fe/H] for Fornax. Such
+a method is approximate but justified. Both galaxies have complex star formation history with multiple star formation bursts, as
+demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al.
+(2013), producing multiple stellar populations which cannot be
+easily tracked as the metallicity is a good but not perfect proxy
+for the stellar age. Moreover, the metallicity histograms for both
+objects are approximately unimodal not allowing for a convenient separation. More refined methods of division have been
+suggested in the literature, for example in the form of the likelihood function based on the position, velocity, and metallicity index (Walker & Peñarrubia 2011). However, the likelihood function requires many assumptions which introduce additional uncertainties into the treatment of the data. On the other hand, our
+approach ensures the maximization of each sample (and therefore minimization of sampling errors) while capturing the important features of the star formation history.
+Further improvements to the Schwarzschild modeling
+method are certainly possible. One way to proceed would be to
+include the modeling of the proper motions of the stars. For now,
+measurements of transverse velocities are available only for the
+brightest stars in dSph galaxies, but even small samples of this
+type could provide further constraints on the models, as demonstrated by Strigari et al. (2007) and Massari et al. (2020).
+Acknowledgements. We are grateful to Andrés del Pino for providing the data for
+the Fornax dSph and to the Illustris team for making their simulations publicly
+available. Useful comments from the anonymous referee are kindly appreciated.
+This research was supported by the Polish National Science Center under grant
+2018/28/C/ST9/00529.
+References
+Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184
+Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13
+Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 327, L15
+Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University Press, Princeton)
+Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35
+Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia,
+G. 2013, MNRAS, 433, 3173
+del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS, 433, 1505
+del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996
+del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017, MNRAS, 465,
+3708
+Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126
+Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92
+Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L40
+Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRAS, 445, 175
+Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, MNRAS, 474, 1398
+Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 481, 250
+Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354
+Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89
+Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Williams, M. J. 2013,
+ApJ, 763, 91
+King, I. 1962, AJ, 67, 471
+Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431,
+2796
+Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3959
+Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2918
+Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482,
+5241
+Łokas, E. L., 2002, MNRAS, 333, 697
+Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918
+Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36
+Mateo, M. 1998, ARA&A, 36, 435
+Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13,
+12
+Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022
+Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numerical Recipes in C, 2nd edn. (Cambridge University Press, Cambridge)
+Schwarzschild, M. 1979, ApJ, 232, 236
+Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cordoba, Argentina)
+Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 657, L1
+Tolstoy, E., Hill, V., & Tosi, M. 2009, ARA&A, 47, 371
+Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66
+van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493,
+613
+Vogelsberger, M., Genel, S., Springel, V., et al. 2014a, Nature, 509, 177
+Vogelsberger, M., Genel, S., Springel, V., et al. 2014b, MNRAS, 444, 1518
+Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20
+Wang, M. Y., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118
+Article number, page 12 of 
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00178.txt b/read/results/pdfium/2201.00178.txt
index dcee01b..f8a31e6 100644
--- a/read/results/pdfium/2201.00178.txt
+++ b/read/results/pdfium/2201.00178.txt
@@ -1,1077 +1,1033 @@
-Draft version January 4, 2022
-Typeset using LATEX default style in AASTeX631
-Imaging the Sun’s near-surface flows using mode-coupling analysis
-Prasad Mani ,
-1 Chris S. Hanson ,
-2 and Shravan Hanasoge 1, 2
-1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India
-2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE
-ABSTRACT
-The technique of normal-mode coupling is a powerful tool with which to seismically image non￾axisymmetric phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to
-probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and
-Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling
-measurements to show that the resulting divergence and radial vorticity maps at supergranular length
-scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre￾lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows,
-while ≥ 0.8 is obtained for the radial vorticity.
-Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662)
-1. INTRODUCTION
-Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect
-on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun,
-behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber
-of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of
-the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be
-retrieved through meticulous inversions of these seismic measurements.
-Several important flow systems on the Sun have been inferred using various global and local helioseismic methods.
-Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through
-global mode frequency splitting (Thompson et al. 1996; Schou et al. 1998), and the resolving the neutrino problem
-(Bahcall & Pinsonneault 1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al.
-1997; Gizon et al. 2020) through time-distance helioseismology (Duvall et al. 1993), and farside imaging of active
-regions (Braun & Lindsey 2001) and their near side emergence (Birch et al. 2016), through helioseismic holography
-(Lindsey & Braun 2000). The recent discovery of various inertial waves (Gizon et al. 2021), including the equatorial
-Rossby wave (L¨optien et al. 2018), has been achieved through local helioseismic ring-diagram analysis (Hill 1988) and
-the non-helioseismic local correlation tracking (LCT, November & Simon 1988) of granulation.
-In recent years, the use of global mode-coupling helioseismology (Woodard 1989; Lavely & Ritzwoller 1992) has
-received attention, with many studies seeking to validate and demonstrate the importance of such a technique for
-investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically
-challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global
-mode-coupling has been validated through observations of the meridional flow (Vorontsov 2011; Woodard et al. 2013),
-differential rotation (Schad & Roth 2020; Kashyap et al. 2021), global-scale convection (Woodard 2014, 2016; Hanasoge
-et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Mandal & Hanasoge 2020; Mandal
-et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was
-validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and
-comparing with previous time-distance studies (Langfellner et al. 2018).
-prasad.subramanian@tifr.res.in
+Draft version January 4, 2022
+Typeset using LATEX default style in AASTeX631
+Imaging the Sun’s near-surface flows using mode-coupling analysis
+Prasad Mani ,
+1 Chris S. Hanson ,2 and Shravan Hanasoge 1, 2
+1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India
+2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE
+ABSTRACT
+The technique of normal-mode coupling is a powerful tool with which to seismically image nonaxisymmetric phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to
+probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and
+Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling
+measurements to show that the resulting divergence and radial vorticity maps at supergranular length
+scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Correlation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows,
+while ≥ 0.8 is obtained for the radial vorticity.
+Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662)
+1. INTRODUCTION
+Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect
+on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun,
+behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber
+of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of
+the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be
+retrieved through meticulous inversions of these seismic measurements.
+Several important flow systems on the Sun have been inferred using various global and local helioseismic methods.
+Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through
+global mode frequency splitting (Thompson et al. 1996; Schou et al. 1998), and the resolving the neutrino problem
+(Bahcall & Pinsonneault 1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al.
+1997; Gizon et al. 2020) through time-distance helioseismology (Duvall et al. 1993), and farside imaging of active
+regions (Braun & Lindsey 2001) and their near side emergence (Birch et al. 2016), through helioseismic holography
+(Lindsey & Braun 2000). The recent discovery of various inertial waves (Gizon et al. 2021), including the equatorial
+Rossby wave (L¨optien et al. 2018), has been achieved through local helioseismic ring-diagram analysis (Hill 1988) and
+the non-helioseismic local correlation tracking (LCT, November & Simon 1988) of granulation.
+In recent years, the use of global mode-coupling helioseismology (Woodard 1989; Lavely & Ritzwoller 1992) has
+received attention, with many studies seeking to validate and demonstrate the importance of such a technique for
+investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically
+challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global
+mode-coupling has been validated through observations of the meridional flow (Vorontsov 2011; Woodard et al. 2013),
+differential rotation (Schad & Roth 2020; Kashyap et al. 2021), global-scale convection (Woodard 2014, 2016; Hanasoge
+et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Mandal & Hanasoge 2020; Mandal
+et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was
+validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and
+comparing with previous time-distance studies (Langfellner et al. 2018).
+prasad.subramanian@tifr.res.in
 arXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022
-2 Mani et al.
-Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combi￾nation of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete
-and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and
-flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation
-eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then
-reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield
-cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights
-(Woodard 2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial
-and temporal frequencies, leaving us with measurements sensitive to different quantities of interest.
-In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps
-at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is
-reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow
-and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order
-coupling (p2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface.
-We compare our results with flows obtained using the Local Correlation Tracking method on solar granules.
-1.1. Forward problem
-In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a
-complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we
-denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the
-solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface
-that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood
-of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the
-horizontal wavenumber qR ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal
-wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow
-perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon
-& Rieutord 2018), permitting us to model the flow vector u = (ux, uy, uz) in the Cartesian domain like so (Unno et al.
-1989; Woodard 2006)
-u
-σ = ∇×[∇×(P ez)] + ∇×(T ez), (1)
-where P = P
-σ
-(x) and T = T
-σ
-(x) are poloidal and toroidal scalar functions, varying with position x and temporal
-frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
-perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for
-example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period
-of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of
-perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using
-vector calculus results in
-u = −∇2Pez + ∇(∂zP) + ∇hT×ez, (2)
-where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the
-Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a
-function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and
-Tq(z), respectively. Furthermore, we parametrize the flow along ez using basis functions f(z) (Chebyshev, B-spline,
-etc). This is expressed as
-P ≡ Pq(z) = X
-j
-fj (z) Pqj , T ≡ Tq(z) = X
-j
-fj (z) Tqj . (3)
-The flow coefficients Pqj and Tqj , represented by the discrete indices q and j, become ideal candidates for inversions,
-where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be
-exploited to expedite inversions. Note that Pqj = P
-∗
-−qj
-and Tqj = T
-∗
-−qj
-for the flow field to be real in the spatio￾temporal domain.
+ Mani et al.
+Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combination of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete
+and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and
+flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation
+eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then
+reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield
+cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights
+(Woodard 2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial
+and temporal frequencies, leaving us with measurements sensitive to different quantities of interest.
+In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps
+at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is
+reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow
+and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order
+coupling (p2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface.
+We compare our results with flows obtained using the Local Correlation Tracking method on solar granules.
+1.1. Forward problem
+In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a
+complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we
+denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the
+solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface
+that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood
+of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the
+horizontal wavenumber qR ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal
+wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow
+perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon
+& Rieutord 2018), permitting us to model the flow vector u = (ux, uy, uz) in the Cartesian domain like so (Unno et al.
+1989; Woodard 2006)
+u
+σ = ∇×[∇×(P ez)] + ∇×(T ez), (1)
+where P = P
+σ
+(x) and T = T
+σ
+(x) are poloidal and toroidal scalar functions, varying with position x and temporal
+frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
+perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for
+example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period
+of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of
+perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using
+vector calculus results in
+u = −∇2Pez + ∇(∂zP) + ∇hT×ez, (2)
+where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the
+Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a
+function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and
+Tq(z), respectively. Furthermore, we parametrize the flow along ez using basis functions f(z) (Chebyshev, B-spline,
+etc). This is expressed as
+P ≡ Pq(z) = X
+j
+fj (z) Pqj , T ≡ Tq(z) = X
+j
+fj (z) Tqj . (3)
+The flow coefficients Pqj and Tqj , represented by the discrete indices q and j, become ideal candidates for inversions,
+where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be
+exploited to expedite inversions. Note that Pqj = P
+∗
+−qj
+and Tqj = T
+∗
+−qj
+for the flow field to be real in the spatiotemporal domain.
 To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner
-Imaging near-surface flows using mode-coupling analysis 3
-φ
-ω∗
-k φ
-ω
-k+q
-, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φ
-ω∗
-k φ
-ω
-k+q
-thus
-to the flow coefficients Pqj and Tqj (see eq A7)
-hφ
-ω∗
-k φ
-ω
-k+q
-i = Hω
-kk0nn0
-X
-j
-Cqj,kPqj + Dqj,kTqj . (4)
-The weight factor Hω (see eq A8) is a function of frequency, capturing information about the extent of coupling between
-the two modes [n, k] and [n
-0
-, k0
-], where n and n
-0 are the radial orders of the modes, and k = |k| and k
-0 = |k
-0
-| = |k+q|.
-The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the
-Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,k and Dqj,k are poloidal
-and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements
-and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and
-Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj (z).
-1.2. Least-squares of cross-correlation
-Even though φ
-ω∗
-k φ
-ω
-k+q
-isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea￾surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the
-dimension of the problem. A least-squares fit to the cross-correlation φ
-ω∗
-k φ
-ω
-k+q
-(see Woodard 2006, 2014, 2016) results
-in the B-coefficients Bk,q, according to
-Bk,q =
-P
-ω
-Hω∗
-kk0nn0φ
-ω∗
-k φ
-ω
-k+q
-P
-ω
-|Hω
-kk0nn0 |
-2
-. (5)
-Multiplying eq 4 on both sides by Hω∗
-kk0nn0 and substituting by eq 5 on the left-hand-side results in a concisely defined
-forward problem (compare with eq 4)
-Bk,q =
-X
-j
-Cqj,kPqj + Dqj,kTqj . (6)
-In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω.
-Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk,
-|ω| ∈ 
-ωnk − Γnk/2, ωnk + Γnk/2
-
-or
-|ω| ∈ 
-ωn0k0 − Γn0k0/2, ωn0k0 + Γn0k0/2
-
-. (7)
-Summing over ±ω guarantees that the parity Bk,q = B∗
-−k,−q
-(see Appendix A for derivation) is obeyed, thereby
-ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain.
-Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and
-−k,
-B
-∗
-−k,−q =
-X
-j
-C−qj,−kP
-∗
-−qj + D−qj,−kT
-∗
-−qj
-. (8)
-Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a
-least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a
-by-product.
-1.3. Noise model
-In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from
-the observed B-coefficients. For estimating the contribution from realization noise to the measurements, we make the
-following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian
-random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations.
-Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a
-random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes
+Imaging near-surface flows using mode-coupling analysis 3
+φ
+ω∗
+k φ
+ω
+k+q
+, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φ
+ω∗
+k φ
+ω
+k+q
+thus
+to the flow coefficients Pqj and Tqj (see eq A7)
+hφ
+ω∗
+k φ
+ω
+k+q
+i = Hω
+kk0nn0
+X
+j
+Cqj,kPqj + Dqj,kTqj . (4)
+The weight factor Hω (see eq A8) is a function of frequency, capturing information about the extent of coupling between
+the two modes [n, k] and [n
+0
+, k0], where n and n
+0 are the radial orders of the modes, and k = |k| and k0 = |k
+0
+| = |k+q|.
+The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the
+Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,k and Dqj,k are poloidal
+and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements
+and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and
+Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj (z).
+1.2. Least-squares of cross-correlation
+Even though φ
+ω∗
+k φ
+ω
+k+q
+isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the
+dimension of the problem. A least-squares fit to the cross-correlation φ
+ω∗
+k φ
+ω
+k+q
+(see Woodard 2006, 2014, 2016) results
+in the B-coefficients Bk,q, according to
+Bk,q =
+P
+ω
+Hω∗
+kk0nn0φ
+ω∗
+k φ
+ω
+k+q
+P
+ω
+|Hω
+kk0nn0 |
+2
+. (5)
+Multiplying eq 4 on both sides by Hω∗
+kk0nn0 and substituting by eq 5 on the left-hand-side results in a concisely defined
+forward problem (compare with eq 4)
+Bk,q =
+X
+j
+Cqj,kPqj + Dqj,kTqj . (6)
+In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω.
+Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk,
+|ω| ∈ ωnk − Γnk/2, ωnk + Γnk/2
+
+or
+|ω| ∈ ωn0k0 − Γn0k0/2, ωn0k0 + Γn0k0/2
+
+. (7)
+Summing over ±ω guarantees that the parity Bk,q = B∗
+−k,−q
+(see Appendix A for derivation) is obeyed, thereby
+ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain.
+Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and
+−k,
+B
+∗
+−k,−q =
+X
+j
+C−qj,−kP
+∗
+−qj + D−qj,−kT
+∗
+−qj
+. (8)
+Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a
+least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a
+by-product.
+1.3. Noise model
+In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from
+the observed B-coefficients. For estimating the contribution from realization noise to the measurements, we make the
+following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian
+random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations.
+Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a
+random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes
 and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters
-4 Mani et al.
-Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and p2 (green). The shaded
-regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of
-kR and ω/2π to which we have restricted ourselves in this analysis. Beyond kR of 2000, it is seen that the theoretical fitting
-of mode frequencies start deviating from the observed dispersion relation for the f-mode.
-such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model
-as in H21, which was motivated by the above discussion,
-Gk,q ≡ h|Bk,q|
-2
-i, (9)
-where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A
-for explanation).
-2. DATA ANALYSIS
-In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the
-Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image
-is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked
-at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 × 194.4 Mm2
-in size, tracked for 24 hours
-and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number
-2197, Carrington longitude 90◦
-). This Dopplercube is considered as the physical wavefield φ(x, y;t). The Fourier-space
-wavefield φ
-ω
-k
-(and subsequently, the cross-correlation φ
-ω∗
-k φ
-ω
-k+q
-) is obtained by computing the 3D spatial and temporal
-Fourier transform of the Dopplercube.
-The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in
-Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon
-& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination
-from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015).
-Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral
-profiles of the two modes [n, k] and [n
-0
-, k0
-] closely align in ω space. This implies that their mode frequencies should be
-sufficiently close (|ωnk − ωn0k0 | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
-±ω is significant only over a few linewidths (, the summation parameter; see eq 7). We have empirically found and
-tabulated δ in Table 1 for the radial order couplings n-n
-0 ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only
-weakly on ; we set it to 3 line widths).
-Figure 1 shows that for any two adjacent ridges (adjacent n and n
-0
-), mode frequencies ωnk and ωn0k become spaced
-farther apart with increasing wavenumber kR. It is also known that mode linewidth Γ grows with radial orders for
-a given kR. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
-observation set the total number of modes within a range of kR (and ω/2π) that can be clearly observed, thereby
-affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually
-inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR at fixed
-radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR ≤ 2000 and qR ≤ 300. Our
+ Mani et al.
+Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and p2 (green). The shaded
+regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of
+kR and ω/2π to which we have restricted ourselves in this analysis. Beyond kR of 2000, it is seen that the theoretical fitting
+of mode frequencies start deviating from the observed dispersion relation for the f-mode.
+such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model
+as in H21, which was motivated by the above discussion,
+Gk,q ≡ h|Bk,q|
+2
+i, (9)
+where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A
+for explanation).
+2. DATA ANALYSIS
+In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the
+Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image
+is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked
+at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 × 194.4 Mm2in size, tracked for 24 hours
+and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number
+2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x, y;t). The Fourier-space
+wavefield φ
+ω
+k
+(and subsequently, the cross-correlation φ
+ω∗
+k φ
+ω
+k+q
+) is obtained by computing the 3D spatial and temporal
+Fourier transform of the Dopplercube.
+The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in
+Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon
+& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination
+from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015).
+Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral
+profiles of the two modes [n, k] and [n
+0
+, k0] closely align in ω space. This implies that their mode frequencies should be
+sufficiently close (|ωnk − ωn0k0 | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
+±ω is significant only over a few linewidths (, the summation parameter; see eq 7). We have empirically found and
+tabulated δ in Table 1 for the radial order couplings n-n
+0 ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only
+weakly on ; we set it to 3 line widths).
+Figure 1 shows that for any two adjacent ridges (adjacent n and n
+0
+), mode frequencies ωnk and ωn0k become spaced
+farther apart with increasing wavenumber kR. It is also known that mode linewidth Γ grows with radial orders for
+a given kR. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
+observation set the total number of modes within a range of kR (and ω/2π) that can be clearly observed, thereby
+affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually
+inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR at fixed
+radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR ≤ 2000 and qR ≤ 300. Our
 frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz).
-Imaging near-surface flows using mode-coupling analysis 5
-Coupling kR range # of δ
-modes
-f-f [400,1000] 5240 4
-[1000,1500] 7784 1.1
-[1500,2000] 10940 0.4
-p1-p1 [400,1000] 5240 4.5
-[1000,1750] 12852 2
-p2-p2 [200,1000] 5886 3
-[1000,1300] 4280 3
-Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different
-ranges of kR.
-3. INVERSION
-The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements
-Bk,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
-leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods
-complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas
-SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis
-functions fj (z) (J  M; see eq 3 and section 3.1), whereas SOLA scales as M2
-(see Appendix B). For M > 5000,
-computation starts to quickly become expensive for SOLA.
-Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While
-f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is
-present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are
-interested in only surface flows, we leave higher order coupling to future work.
-It bears mentioning that the slopes of the ridges in the kR-ν spectrum (Figure 1) increase with radial order. This
-limits us to low-to-intermediate kR (< 1000) for these higher radial orders if we are to remain under the acoustic cut￾off frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals
-from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation.
-Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions
-separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich
-helioseismic technique.
-3.1. RLS
-For given q, the forward problem may be stated as
-KU = B, (10)
-with the aim to minimize the misfit P
-k
-||KU − B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed
-by the sensitivity kernels: {Cqj,k, Dqj,k}. U is a vector composed of the flow coefficients: {Pqj , Tqj} and B is a vector
-composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and
-toroidal flow. We use B-spline basis functions as our fj (z), comprising 11 knots spaced uniformly in acoustic radius,
-for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis
-functions for each poloidal and toroidal, the dimensions of K, U and B are thus M ×22, 22×1, and M ×1 respectively.
-Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension
-M × M) and pre-multiplying by K|
-,
-(K|Λ
-−1K)U =(K|Λ
-−1
-)B, (11)
-U =(K|Λ
-−1K)
-−1K|Λ
-−1B. (12)
-6 Mani et al.
-Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR = [−112, −45],
-at the depth zo = −0.41 Mm. Right: L-curve for the mode qR = [−112, −45]; the knee (λ = 2.48) is marked by a blue
-diamond.
-Since the least-squares problem is typically ill-posed, we restate the minimization as P
-k
-||KU − B||2 + λ||U||2 with
-the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution
-norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the
-data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this
-regularization makes the problem better conditioned and is now defined as
-U = (K|Λ
-−1K + λI)
-−1K|Λ
-−1B, (13)
-where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
-by plotting ||U||2 vs ||KU − B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the
-regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal
-flow Pq are shown in Figure 3.
-4. LCT
-To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained
-from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by
-examining the advection of convective granules (1.2 Mm, qR ≈ 3500; Hathaway et al. 2015) by underlying larger￾scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm),
-LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation.
-Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2
-(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob￾tained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between
-consecutive intensity images, which we denote as I1, I2. The LCT method selects a patch in two images each
-(I1 = I1e
-(x−xij )
-2/2 sigma2
-, I2 = I2e
-(x−xij )
-2/2 sigma2
-) that observe the same granule at the grid point xij = (xi
-, yj ).
-A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance
-moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in
-section 1.1. The two patches I1, I2 are then cross correlated for different values of position shifts ∆x,
-Cij (∆x, ∆y) = Z
-dx I
-∗
-1
-(−x)I2(∆x − x). (14)
-The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule.
-Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10
-min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the
-images I1, I2 and for each consecutive pair of images in the cube.
-In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT
+Imaging near-surface flows using mode-coupling analysis 5
+Coupling kR range # of δ
+modes
+f-f [400,1000] 5240 4
+[1000,1500] 7784 1.1
+[1500,2000] 10940 0.4
+p1-p1 [400,1000] 5240 4.5
+[1000,1750] 12852 2
+p2-p2 [200,1000] 5886 3
+[1000,1300] 4280 3
+Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different
+ranges of kR.
+3. INVERSION
+The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements
+Bk,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
+leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods
+complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas
+SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis
+functions fj (z) (J  M; see eq 3 and section 3.1), whereas SOLA scales as M2(see Appendix B). For M > 5000,
+computation starts to quickly become expensive for SOLA.
+Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While
+f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is
+present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are
+interested in only surface flows, we leave higher order coupling to future work.
+It bears mentioning that the slopes of the ridges in the kR-ν spectrum (Figure 1) increase with radial order. This
+limits us to low-to-intermediate kR (< 1000) for these higher radial orders if we are to remain under the acoustic cutoff frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals
+from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation.
+Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions
+separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich
+helioseismic technique.
+3.1. RLS
+For given q, the forward problem may be stated as
+KU = B, (10)
+with the aim to minimize the misfit P
+k
+||KU − B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed
+by the sensitivity kernels: {Cqj,k, Dqj,k}. U is a vector composed of the flow coefficients: {Pqj , Tqj} and B is a vector
+composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and
+toroidal flow. We use B-spline basis functions as our fj (z), comprising 11 knots spaced uniformly in acoustic radius,
+for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis
+functions for each poloidal and toroidal, the dimensions of K, U and B are thus M ×22, 22×1, and M ×1 respectively.
+Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension
+M × M) and pre-multiplying by K|,
+(K|Λ
+−1K)U =(K|Λ−1
+)B, (11)
+U =(K|Λ
+−1K)−1K|Λ−1B. (12)
+ Mani et al.
+Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR = [−112, −45],
+at the depth zo = −0.41 Mm. Right: L-curve for the mode qR = [−112, −45]; the knee (λ = 2.48) is marked by a blue
+diamond.
+Since the least-squares problem is typically ill-posed, we restate the minimization as P
+k
+||KU − B||2 + λ||U||2 with
+the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution
+norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the
+data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this
+regularization makes the problem better conditioned and is now defined as
+U = (K|Λ
+−1K + λI)−1K|Λ−1B, (13)
+where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
+by plotting ||U||2 vs ||KU − B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the
+regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal
+flow Pq are shown in Figure 3.
+4. LCT
+To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained
+from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by
+examining the advection of convective granules (1.2 Mm, qR ≈ 3500; Hathaway et al. 2015) by underlying largerscale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm),
+LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation.
+Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2
+(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are obtained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between
+consecutive intensity images, which we denote as I1, I2. The LCT method selects a patch in two images each
+(I1 = I1e
+(x−xij )
+2/2 sigma2
+, I2 = I2e
+(x−xij )
+2/2 sigma2
+) that observe the same granule at the grid point xij = (xi, yj ).
+A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance
+moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in
+section 1.1. The two patches I1, I2 are then cross correlated for different values of position shifts ∆x,
+Cij (∆x, ∆y) = Zdx I
+∗
+1
+(−x)I2(∆x − x). (14)
+The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule.
+Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10
+min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the
+images I1, I2 and for each consecutive pair of images in the cube.
+In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT
 requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the
-Imaging near-surface flows using mode-coupling analysis 7
-Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR and
-qyR. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the
-mean. Total power appears to increase through the radial orders. Power is in units of m2
-/s4
-.
-dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the
-FLCT code. vx and vy are then computed for consecutive pairs of images and are averaged over the entire day.
-5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY
-For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by
-substituting P and T from eq 3 into eq 2 as below -
-u(q, z) = −∇2Pez + ∇(∂zP) + ∇hT×ez,
-= −(0, 0, ∂2
-xP + ∂
-2
-yP + ∂
-2
-zP) + (∂x∂zP, ∂y∂zP, ∂2
-zP) + (∂yT, −∂xT, 0). (15)
-Setting ∂
-2
-x + ∂
-2
-y = q
-2
-, div is given by,
-∇h · u(q, z) = q
-2
-∂zP, (16)
-and curl is given by,
-h
-∇ × u(q, z)
-i
-z
-= q
-2T. (17)
-We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The
-essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of
-interest (see Figure 4), and subsequently convert it to real space.
-We seek to show comparisons (see Figures 5, 6, and 7) for qR = 100, 150, 200 and 250. To sufficiently delineate
-flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The
-Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to
+Imaging near-surface flows using mode-coupling analysis 7
+Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR and
+qyR. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the
+mean. Total power appears to increase through the radial orders. Power is in units of m2/s4.
+dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the
+FLCT code. vx and vy are then computed for consecutive pairs of images and are averaged over the entire day.
+5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY
+For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by
+substituting P and T from eq 3 into eq 2 as below u(q,
+ z) = −∇2Pez + ∇(∂zP) + ∇hT×ez,
+= −(0, 0, ∂2
+xP + ∂
+2
+yP + ∂
+2
+zP) + (∂x∂zP, ∂y∂zP, ∂2
+zP) + (∂yT, −∂xT, 0). (15)
+Setting ∂
+2
+x + ∂
+2
+y = q
+2
+, div is given by,
+∇h · u(q, z) = q
+2
+∂zP, (16)
+and curl is given by,
+h
+∇ × u(q, z)
+i
+z
+= q
+2T. (17)
+We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The
+essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of
+interest (see Figure 4), and subsequently convert it to real space.
+We seek to show comparisons (see Figures 5, 6, and 7) for qR = 100, 150, 200 and 250. To sufficiently delineate
+flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The
+Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to
 obtain a real-space steady-flow map.
-8 Mani et al.
-Figure 4. Left: Divergence-flow power spectrum |div|
-2
-, from eqn 16, obtained from inversion using all the couplings. The
-power-spectrum is then filtered with a bandpass centered around qR = 150 (middle panel). The resulting spectra is shown in
-the right panel. The units of |div|
-2
-are in s−2
-. For illustration, we show the action of the filter on the power-spectrum |div|
-2
-since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter.
-For LCT, we first apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of
-smoothing depends on the length scale qR to be compared with mode-coupling. div and curl are then simply
-computed by
-div = ∂xvx + ∂yvy, (18)
-curl = ∂xvy − ∂yvx. (19)
-We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling,
-and transform back to real space.
-Condensing all of the above, the following sequence of operations to compare flows at desired length scales are
-performed for mode-coupling (M-C) and for LCT -
-M-C : φ(x, y;t)
-3D FFT =====⇒ φ
-ω
-k
-, Bk,q
-inversion ======⇒ P, T ∇h·
-===⇒
-∇×
-eqns 16, 17 Filter,
-=====⇒
-2D FFT
-div, curl
-LCT : I1, I2
-FLCT ====⇒ vx, vy
-smooth,
-======⇒
-∇h· ∇×
-eqns 18, 19 2D FFT,
-======⇒
-Filter
-Filtered,
-Fourier-space
-flows
-2D FFT =====⇒ div, curl
-6. RESULTS
-Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5,
-where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from
-the two methods near supergranular scale (qR ≈ 100). Near-surface flows are imaged most faithfully when all the
-couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of
-vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between
-the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence
-flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to
-insufficient modes for the p2-p2 case (see Table 1), we are unable to infer vortical flows with conviction other than near
-the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished
-through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical
-flows at different scales (qR = 100, 150) can yield extremely good agreement with LCT. As the length scale of the
-inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases.
-An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to
-comment substantively on the flows at these scales.
+ Mani et al.
+Figure 4. Left: Divergence-flow power spectrum |div|
+2
+, from eqn 16, obtained from inversion using all the couplings. The
+power-spectrum is then filtered with a bandpass centered around qR = 150 (middle panel). The resulting spectra is shown in
+the right panel. The units of |div|
+2
+are in s−2. For illustration, we show the action of the filter on the power-spectrum |div|
+2
+since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter.
+For LCT, we first apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of
+smoothing depends on the length scale qR to be compared with mode-coupling. div and curl are then simply
+computed by
+div = ∂xvx + ∂yvy, (18)
+curl = ∂xvy − ∂yvx. (19)
+We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling,
+and transform back to real space.
+Condensing all of the above, the following sequence of operations to compare flows at desired length scales are
+performed for mode-coupling (M-C) and for LCT M-C
+ : φ(x, y;t)
+3D FFT =====⇒ φ
+ω
+k
+, Bk,q
+inversion ======⇒ P, T ∇h·
+===⇒
+∇×
+eqns 16, 17 Filter,=====⇒
+2D FFT
+div, curl
+LCT : I1, I2
+FLCT ====⇒ vx, vy
+smooth,
+======⇒
+∇h· ∇×
+eqns 18, 19 2D FFT,======⇒
+Filter
+Filtered,
+Fourier-space
+flows
+2D FFT =====⇒ div, curl
+6. RESULTS
+Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5,
+where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from
+the two methods near supergranular scale (qR ≈ 100). Near-surface flows are imaged most faithfully when all the
+couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of
+vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between
+the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence
+flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to
+insufficient modes for the p2-p2 case (see Table 1), we are unable to infer vortical flows with conviction other than near
+the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished
+through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical
+flows at different scales (qR = 100, 150) can yield extremely good agreement with LCT. As the length scale of the
+inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases.
+An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to
+comment substantively on the flows at these scales.
 6.1. Amplitudes of mode-coupling flows
-Imaging near-surface flows using mode-coupling analysis 9
-(a) qR = 100, f-f + p1-p1 + p2-p2
-Figure 5. Real-space divergence flows (left column, in units of 10−5
-s
-−1
-) and radial vorticity (right column, in units of 10−6
-s
-−1
-)
-for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around
-qR = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges
-out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter
-plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum
-values.
-For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated
-numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward
-a precise statement on them. H21 reported a 60% greater amplitude for p1-p1 over f-f coupling (Figure 3 reflects a
-similar conclusion), another element to consider when combining different radial orders. The choice of regularization
-(see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow
-amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages.
-This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a
-radial grid along which kernels and flows tend to be described.
-Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors:
-• Coupling(s) used,
-• Regularization parameter in the inversion,
-• Smoothing applied to LCT flows (indirectly; see below paragraph),
-• The depth at which flows are inferred.
-Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close
-to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR, we first fix the coupling(s)
+Imaging near-surface flows using mode-coupling analysis 9
+(a) qR = 100, f-f + p1-p1 + p2-p2
+Figure 5. Real-space divergence flows (left column, in units of 10−5s
+−1
+) and radial vorticity (right column, in units of 10−6s
+−1
+)
+for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around
+qR = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges
+out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter
+plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum
+values.
+For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated
+numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward
+a precise statement on them. H21 reported a 60% greater amplitude for p1-p1 over f-f coupling (Figure 3 reflects a
+similar conclusion), another element to consider when combining different radial orders. The choice of regularization
+(see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow
+amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages.
+This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a
+radial grid along which kernels and flows tend to be described.
+Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors:
+• Coupling(s) used,
+• Regularization parameter in the inversion,
+• Smoothing applied to LCT flows (indirectly; see below paragraph),
+• The depth at which flows are inferred.
+Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close
+to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR, we first fix the coupling(s)
 and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and
-10 Mani et al.
-(a) qR = 100, f-f (b) qR = 150, p1-p1
-Figure 6. Real-space divergence flows (left column, in units of 10−5
-s
-−1
-) and radial vorticity (right column, in units of 10−6
-s
-−1
-)
-for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around
-qR = 100, and using (b) p1-p1 coupling (bottom row), bandpass filtered around qR = 150. We cut edges out from the flow
-maps and compare a circular region of diameter ≈175 Mm.
-(a) qR = 200, f-f + p1-p1 + p2-p2 (b) qR = 250, f-f + p1-p1 + p2-p2
-Figure 7. Real-space divergence flows (left column, in units of 10−5
-s
-−1
-) and radial vorticity (right column, in units of 10−6
-s
-−1
-)
-for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around
-(a) qR = 200, and (b) qR = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm.
-vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained
-from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation
-(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired
-qR.
-It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line-of-sight velocity from Dopplergrams
-and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes
-for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history
-(see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda
+ Mani et al.
+(a) qR = 100, f-f (b) qR = 150, p1-p1
+Figure 6. Real-space divergence flows (left column, in units of 10−5s
+−1
+) and radial vorticity (right column, in units of 10−6s
+−1
+)
+for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around
+qR = 100, and using (b) p1-p1 coupling (bottom row), bandpass filtered around qR = 150. We cut edges out from the flow
+maps and compare a circular region of diameter ≈175 Mm.
+(a) qR = 200, f-f + p1-p1 + p2-p2 (b) qR = 250, f-f + p1-p1 + p2-p2
+Figure 7. Real-space divergence flows (left column, in units of 10−5s
+−1
+) and radial vorticity (right column, in units of 10−6s
+−1
+)
+for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around
+(a) qR = 200, and (b) qR = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm.
+vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained
+from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation
+(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired
+qR.
+It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line-of-sight velocity from Dopplergrams
+and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes
+for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history
+(see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda
 & Svanda ˇ 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al.
-Imaging near-surface flows using mode-coupling analysis 11
-Coupling qR div curl
-f-f 100 0.97 0.87
-+ p1-p1 150 0.95 0.76
-+ p2-p2 200 0.92 0.76
-250 0.85 0.65
-f-f 100 0.96 0.85
-150 0.93 0.76
-200 0.89 0.69
-250 0.77 0.58
-p1-p1 100 0.95 0.83
-150 0.95 0.75
-200 0.92 0.75
-250 0.85 0.61
-p2-p2 100 0.94 0.7
-150 0.91 0.39
-200 0.79 0.3
-250 0.55 0.3
-Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images,
-respectively.
-(2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes
-from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see
-Verma et al. 2013; L¨optien et al. 2016). Even for the case of supergranulation divergence maps obtained through
-ring-diagram helioseismology, Greer et al. (2016) only report normalized amplitudes.
-In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used
-with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built
-using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing
-of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve
-signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic
-applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013;
-Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves.
-APPENDIX
-A. DERIVATION OF THE FORWARD MODEL
-As described in section 1.1, we seek to describe the flow u as a function of q along ez. To that end, substituting
-eq 3 into eq 2,
-u
-σ
-q
-(z) = X
-j
-
-q
-2
-fjez + iq f
-0
-j
-	
-P
-σ
-jq + iq×ez fjT
-σ
-jq
-. (A1)
-For flows in the anelastic limit (u  speed of sound), we can denote the flow perturbation operator as δL
-σ =
-−2iωρu
-σ
-· ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
-δL
-σ
-q = −2iω ρ (iu
-σ
-q
-· k + u
-σ
-q
-· ez∂z), (A2)
-=−2iωρP
-j
-
-−k · q f
-0
-jP
-σ
-jq − k · (q×ez) fjT
-σ
-jq + q
-2 fjP
-σ
-jq ∂z
-	
+Imaging near-surface flows using mode-coupling analysis 11
+Coupling qR div curl
+f-f 100 0.97 0.87
++ p1-p1 150 0.95 0.76
++ p2-p2 200 0.92 0.76
+250 0.85 0.65
+f-f 100 0.96 0.85
+150 0.93 0.76
+200 0.89 0.69
+250 0.77 0.58
+p1-p1 100 0.95 0.83
+150 0.95 0.75
+200 0.92 0.75
+250 0.85 0.61
+p2-p2 100 0.94 0.7
+150 0.91 0.39
+200 0.79 0.3
+250 0.55 0.3
+Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images,
+respectively.
+(2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes
+from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see
+Verma et al. 2013; L¨optien et al. 2016). Even for the case of supergranulation divergence maps obtained through
+ring-diagram helioseismology, Greer et al. (2016) only report normalized amplitudes.
+In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used
+with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built
+using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing
+of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve
+signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic
+applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013;
+Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves.
+APPENDIX
+A. DERIVATION OF THE FORWARD MODEL
+As described in section 1.1, we seek to describe the flow u as a function of q along ez. To that end, substituting
+eq 3 into eq 2,
+u
+σ
+q
+(z) = X
+j
+
+q
+2
+fjez + iq f
+0
+j
+
+P
+σ
+jq + iq×ez fjT
+σ
+jq
+. (A1)
+For flows in the anelastic limit (u  speed of sound), we can denote the flow perturbation operator as δL
+σ =
+−2iωρu
+σ
+· ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
+δL
+σ
+q = −2iω ρ (iu
+σ
+q
+· k + u
+σ
+q
+· ez∂z), (A2)
+=−2iωρP
+j
+
+−k · q f
+0
+jP
+σ
+jq − k · (q×ez) fjT
+σ
+jq + q
+2 fjPσ
+jq ∂z
+
 . (A3)
-12 Mani et al.
-Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006)
-ξk ≡ ξnk(z) = ikˆHnk(z)ez + ˆzVnk(z), (A4)
-where H and V are real-valued functions; n and n
-0 are dropped for compactness of notation. Then the coupling of
-two modes ξk and ξk0 (k
-0 = k + q), by the flow perturbation operator δL
-σ
-q
-, denoted by coupling integral Λk
-k0 (σ), is
-given by
-Λ
-k
-k0 (σ) ≡
-Z
-dx (δL
-σ
-q ξk
-) · ξ
-∗
-k0 =
-Z
-dx
-"
-− 2iωρX
-j
-n
-q
-2
-fjP
-σ
-jq
-(kˆ · kˆ
-0
-H0
-kH∗
-k0 + V
-0
-kV
-∗
-k0 )
-−
-
-k · q f
-0
-jP
-σ
-jq + k · (q×ez) fjT
-σ
-jq
-
-(kˆ · kˆ
-0
-HkH∗
-k0 + VkV
-∗
-k0 )
-o
-#
-(A5)
-We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and
-toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by
-Cqj,k =
-Z
-dz ρ h
-q
-2
-fj (kˆ · kˆ
-0
-H0
-kH∗
-k0 + V
-0
-kV
-∗
-k0 )
-−k · q f
-0
-j
-(kˆ · kˆ
-0
-HkH∗
-k0 + VkV
-∗
-k0 )
-i
-,
-Dqj,k = k · (q×ez)
-Z
-dz ρ fj (kˆ · kˆ
-0
-HkH∗
-k0 + VkV
-∗
-k0 ). (A6)
-Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral
-measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on
-wavefield cross-correlation as
-hφ
-ω∗
-k φ
-ω+σ
-k+q
-i = Hω
-kk0σΛ
-k
-k0 (σ), (A7)
-where the function H is given by
-Hω
-kk0σ = −2iω(Nk|R
-ω
-k
-|
-2 R
-ω+σ
-k0 + Nk0 |R
-ω+σ
-k0 |
-2 R
-ω∗
-k
-). (A8)
-We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4.
-The mode spectral profile R is a Lorentzian, given by
-R
-ω
-k =
-1
-ω
-2
-nk − ω2 − iωγnk/2
-, (A9)
-where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing
-mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq
-5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation.
-Also, the parity Hω
-kk0σ = H
-−ω∗
-kk0−σ
-and Rω
-k = R
-−ω∗
-k
-are established. Mode normalization N is given by
-Nk =
-1
-Q
-X
-Q
-k
-P
-ω
-|φ
-ω
-k
-|
-2
-P
-ω
-Rω
-k
-, (A10)
-where the 1
-Q
-P
-Q
-k
-on the right-hand-side implies average over all [kx, ky] (Q terms in all) such that k = |k| is constant.
-This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ωnk.
-Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real.
-The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve
-to establish the parity Bσ
-k,q = B
-∗−σ
-−k,−q
-. This allows for obtaining P
-σ
-q = P
-∗−σ
-−q
-, and subsequently, purely real flow in
-the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into
-the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ
-k,q = G
-−σ
-−k,−q
-.  
-Imaging near-surface flows using mode-coupling analysis 13
-B. SOLA INVERSIONS
-Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors
-P
-for the mode q and depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion
-k
-αk,zoBk,q allows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients
-αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an
-’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen
-which the averaging kernel should resemble after performing inversion.
-B.1. Kernels in the integral form
-Since the kernels in eq A6 are manifest as coefficients on a basis fj (z), we first derive kernels that can be expressed
-as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions:
-P ≡ Pq(z), p ≡ Pqj , F ≡ fj (z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal flow for
-simplicity, the same derivations hold true for toroidal flow as well)
-P = F p (B11)
-The size of P is thus the same as the length of the radial grid z.
-Now, pre-multiply by F
-T and integrate over z on both sides (drop the integral notation for compactness),
-F
-T P = (F
-T F)p
-p = (F
-T F)
-−1 F
-T P (B12)
-Now, substituting eq B12 into the forward problem eq 6,
-B = Cp
-= (F
-T F)
-−1F
-T CP
-= KP (B13)
-where
-K = (F
-T F)
-−1F
-T C,
-i.e., Kk,q(z) = X
-j,j0
-h Z
-dz fj (z)fj
-0 (z)
-i−1
-fj
-0 (z)Cqj
-0
-,k (B14)
-B.2. Obtaining the coefficients α
-Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at zo
-T (z, zo) = 1
-√
-2π∆2
-expz − zo
-2∆2
-
-. (B15)
-This can be achieved by solving the optimization problem
-minimize X =
-Z
-dz
-h
-T (z, zo) − Θq(z, zo)
-i2
-, (B16)
-where we introduce the averaging kernel for mode q thus
-Θq(z, zo) = X
-k
-αk,zoKk,q(z). (B17)
-As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13
+ Mani et al.
+Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006)
+ξk ≡ ξnk(z) = ikˆHnk(z)ez + ˆzVnk(z), (A4)
+where H and V are real-valued functions; n and n
+0 are dropped for compactness of notation. Then the coupling of
+two modes ξk and ξk0 (k
+0 = k + q), by the flow perturbation operator δL
+σ
+q
+, denoted by coupling integral Λk
+k0 (σ), is
+given by
+Λ
+k
+k0 (σ) ≡
+Z
+dx (δL
+σ
+q ξk
+) · ξ
+∗
+k0 =
+Z
+dx
+"
+− 2iωρX
+j
+n
+q
+2
+fjP
+σ
+jq
+(kˆ · kˆ
+0
+H0
+kH∗k0 + V
+0
+kV
+∗
+k0 )
+−
+
+k · q f
+0
+jP
+σ
+jq + k · (q×ez) fjT
+σ
+jq
+
+(kˆ · kˆ
+0
+HkH∗
+k0 + VkV
+∗
+k0 )
+o
+#
+(A5)
+We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and
+toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by
+Cqj,k =
+Z
+dz ρ hq
+2
+fj (kˆ · kˆ
+0
+H0
+kH∗k0 + V
+0
+kV
+∗
+k0 )
+−k · q f
+0
+j
+(kˆ · kˆ
+0
+HkH∗
+k0 + VkV
+∗
+k0 )
+i
+,
+Dqj,k = k · (q×ez)
+Z
+dz ρ fj (kˆ · kˆ
+0
+HkH∗
+k0 + VkV
+∗
+k0 ). (A6)
+Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral
+measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on
+wavefield cross-correlation as
+hφ
+ω∗
+k φ
+ω+σ
+k+q
+i = Hω
+kk0σΛ
+k
+k0 (σ), (A7)
+where the function H is given by
+Hω
+kk0σ = −2iω(Nk|R
+ω
+k
+|
+2 R
+ω+σ
+k0 + Nk0 |R
+ω+σ
+k0 |
+2 Rω∗
+k
+). (A8)
+We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4.
+The mode spectral profile R is a Lorentzian, given by
+R
+ω
+k =
+1
+ω
+2
+nk − ω2 − iωγnk/2
+, (A9)
+where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing
+mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq
+5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation.
+Also, the parity Hω
+kk0σ = H
+−ω∗
+kk0−σ
+and Rω
+k = R
+−ω∗
+k
+are established. Mode normalization N is given by
+Nk =
+1
+Q
+X
+Q
+k
+P
+ω
+|φ
+ω
+k
+|
+2
+P
+ω
+Rω
+k
+, (A10)
+where the 1
+Q
+P
+Q
+k
+on the right-hand-side implies average over all [kx, ky] (Q terms in all) such that k = |k| is constant.
+This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ωnk.
+Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real.
+The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve
+to establish the parity Bσ
+k,q = B
+∗−σ
+−k,−q
+. This allows for obtaining P
+σ
+q = P
+∗−σ
+−q
+, and subsequently, purely real flow in
+the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into
+the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ
+k,q = G
+−σ
+−k,−q
+.
+Imaging near-surface flows using mode-coupling analysis 13
+B. SOLA INVERSIONS
+Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors
+P
+for the mode q and depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion
+k
+αk,zoBk,q allows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients
+αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an
+’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen
+which the averaging kernel should resemble after performing inversion.
+B.1. Kernels in the integral form
+Since the kernels in eq A6 are manifest as coefficients on a basis fj (z), we first derive kernels that can be expressed
+as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions:
+P ≡ Pq(z), p ≡ Pqj , F ≡ fj (z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal flow for
+simplicity, the same derivations hold true for toroidal flow as well)
+P = F p (B11)
+The size of P is thus the same as the length of the radial grid z.
+Now, pre-multiply by F
+T and integrate over z on both sides (drop the integral notation for compactness),
+F
+T P = (FT F)p
+p = (F
+T F)−1 FT P (B12)
+Now, substituting eq B12 into the forward problem eq 6,
+B = Cp
+= (F
+T F)−1FT CP
+= KP (B13)
+where
+K = (F
+T F)−1FT C,
+i.e., Kk,q(z) = X
+j,j0
+h Z
+dz fj (z)fj
+0 (z)
+i−1
+fj
+0 (z)Cqj
+0
+,k (B14)
+B.2. Obtaining the coefficients α
+Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at zo
+T (z, zo) = 1
+√
+2π∆2
+expz − zo
+2∆2
+
+. (B15)
+This can be achieved by solving the optimization problem
+minimize X =
+Z
+dz
+h
+T (z, zo) − Θq(z, zo)
+i2
+, (B16)
+where we introduce the averaging kernel for mode q thus
+Θq(z, zo) = X
+k
+αk,zoKk,q(z). (B17)
+As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13
 and B14.
-14 Mani et al.
-Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR =
-[−112, −45] and kR = [−853, −157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel
-(eq B17) using SOLA, for qR = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15).
-Integral of the averaging kernel over z is 0.89.
-Setting ∂X
-∂α → 0 gives us the matrix problem to be solved
-A{α} = v,
-{α} =
-h
-A + µIi−1
-v, (B18)
-where the square matrix A =
-R
-dz Kk,q(z)Kk0
-,q(z) and v =
-R
-dz Kk,q(z)T (z, zo). Here, k
-0
-is just a dummy index for
-denoting elements in the matrix A, (k
-0
-6= k+q). In the last line of eq B18, we introduce regularization using an Identity
-matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining
-α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α
-obtained from eq B18 into last line of eq B13, and P
-k
-on both sides
-X
-k
-αk,zoB
-σ
-k,q =
-X
-k
-αk,zo
-Z
-dz Kk,q(z)P
-σ
-q
-(z),
-=
-Z
-dz Θq(z, zo)P
-σ
-q
-(z),
-≈ hP
-σ
-q
-(zo)i (B19)
-Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di￾vergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10.
-REFERENCES
-Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M.
-1990, ApJ, 364, 699, doi: 10.1086/169452
-Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of
-Modern Physics, 64, 885,
-doi: 10.1103/RevModPhys.64.885
-Birch, A. C., Schunker, H., Braun, D. C., et al. 2016,
-Science Advances, 2, e1600557,
-doi: 10.1126/sciadv.1600557
-Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019,
-A&A, 628, A37, doi: 10.1051/0004-6361/201935591
-B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
-Schou, J. 2020, A&A, 635, A181,
-doi: 10.1051/0004-6361/201937331
-Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189,
-doi: 10.1086/324323
-Christensen-Dalsgaard, J. 2002, Reviews of Modern
-Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073
-—. 2021, Living Reviews in Solar Physics, 18, 2,
+ Mani et al.
+Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR =
+[−112, −45] and kR = [−853, −157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel
+(eq B17) using SOLA, for qR = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15).
+Integral of the averaging kernel over z is 0.89.
+Setting ∂X
+∂α → 0 gives us the matrix problem to be solved
+A{α} = v,
+{α} =
+h
+A + µIi−1v, (B18)
+where the square matrix A =
+R
+dz Kk,q(z)Kk0
+,q(z) and v =
+R
+dz Kk,q(z)T (z, zo). Here, k
+0
+is just a dummy index for
+denoting elements in the matrix A, (k
+0
+6= k+q). In the last line of eq B18, we introduce regularization using an Identity
+matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining
+α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α
+obtained from eq B18 into last line of eq B13, and P
+k
+on both sides
+X
+k
+αk,zoB
+σ
+k,q =
+X
+k
+αk,zo
+Z
+dz Kk,q(z)P
+σ
+q
+(z),
+=
+Z
+dz Θq(z, zo)P
+σ
+q
+(z),
+≈ hP
+σ
+q
+(zo)i (B19)
+Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Divergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10.
+REFERENCES
+Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M.
+1990, ApJ, 364, 699, doi: 10.1086/169452
+Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of
+Modern Physics, 64, 885,
+doi: 10.1103/RevModPhys.64.885
+Birch, A. C., Schunker, H., Braun, D. C., et al. 2016,
+Science Advances, 2, e1600557,
+doi: 10.1126/sciadv.1600557
+Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019,
+A&A, 628, A37, doi: 10.1051/0004-6361/201935591
+B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
+Schou, J. 2020, A&A, 635, A181,
+doi: 10.1051/0004-6361/201937331
+Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189,
+doi: 10.1086/324323
+Christensen-Dalsgaard, J. 2002, Reviews of Modern
+Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073
+—. 2021, Living Reviews in Solar Physics, 18, 2,
 doi: 10.1007/s41116-020-00028-3
-Imaging near-surface flows using mode-coupling analysis 15
-Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR and qyR. Right: Corresponding power-spectrum
-averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2
-/s4
-.
-Figure 10. Real-space divergence flows (in units of 10−5
-s
-−1
-) for mode-coupling inversion through SOLA using f-f coupling,
-and LCT, bandpass filtered around qR = 100. We cut edges out from the flow maps and compare a circular region of diameter
-≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is
-1.05. For demonstration, we show inversions only for poloidal flow using SOLA.
-De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh,
-192, 351, doi: 10.1023/A:1005269001739
-De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242,
-doi: 10.1086/424920
-Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced
-Study Institute (ASI) Series C, Vol. 169, Seismology of
-the Sun and the Distant Stars, ed. D. O. Gough, 105–116
-Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., &
-Pomerantz, M. A. 1993, Nature, 362, 430,
-doi: 10.1038/362430a0
-Fisher, G. H., & Welsch, B. T. 2008, in Astronomical
-Society of the Pacific Conference Series, Vol. 383,
-Subsurface and Atmospheric Influences on Solar Activity,
-ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, &
-G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289
-Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S.
-1997, Nature, 390, 52, doi: 10.1038/36294
-Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472,
-doi: 10.1086/423367
-Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020,
-Science, 368, 1469, doi: 10.1126/science.aaz7119
-Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A,
-652, L6, doi: 10.1051/0004-6361/202141462
-Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ,
-824, 128, doi: 10.3847/0004-637X/824/2/128
-Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32,
-doi: 10.3847/2041-8213/aaff60
-Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020,
-Science Advances, 6, eaba9639,
-doi: 10.1126/sciadv.aba9639
-Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., &
-Sreenivasan, K. R. 2017, MNRAS, 470, 1404,
-doi: 10.1093/mnras/stx1298
-Hansen, P. C. 1992, SIAM review, 34, 561
-Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., &
-Sreenivasan, K. R. 2020, A&A, 644, A103,
-doi: 10.1051/0004-6361/202039108
-Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021,
-ApJ, 910, 156, doi: 10.3847/1538-4357/abe770
-Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I.
+Imaging near-surface flows using mode-coupling analysis 15
+Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR and qyR. Right: Corresponding power-spectrum
+averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2/s4.
+Figure 10. Real-space divergence flows (in units of 10−5s
+−1
+) for mode-coupling inversion through SOLA using f-f coupling,
+and LCT, bandpass filtered around qR = 100. We cut edges out from the flow maps and compare a circular region of diameter
+≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is
+1.05. For demonstration, we show inversions only for poloidal flow using SOLA.
+De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh,
+192, 351, doi: 10.1023/A:1005269001739
+De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242,
+doi: 10.1086/424920
+Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced
+Study Institute (ASI) Series C, Vol. 169, Seismology of
+the Sun and the Distant Stars, ed. D. O. Gough, 105–116
+Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., &
+Pomerantz, M. A. 1993, Nature, 362, 430,
+doi: 10.1038/362430a0
+Fisher, G. H., & Welsch, B. T. 2008, in Astronomical
+Society of the Pacific Conference Series, Vol. 383,
+Subsurface and Atmospheric Influences on Solar Activity,
+ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, &
+G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289
+Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S.
+1997, Nature, 390, 52, doi: 10.1038/36294
+Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472,
+doi: 10.1086/423367
+Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020,
+Science, 368, 1469, doi: 10.1126/science.aaz7119
+Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A,
+652, L6, doi: 10.1051/0004-6361/202141462
+Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ,
+824, 128, doi: 10.3847/0004-637X/824/2/128
+Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32,
+doi: 10.3847/2041-8213/aaff60
+Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020,
+Science Advances, 6, eaba9639,
+doi: 10.1126/sciadv.aba9639
+Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., &
+Sreenivasan, K. R. 2017, MNRAS, 470, 1404,
+doi: 10.1093/mnras/stx1298
+Hansen, P. C. 1992, SIAM review, 34, 561
+Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., &
+Sreenivasan, K. R. 2020, A&A, 644, A103,
+doi: 10.1051/0004-6361/202039108
+Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021,
+ApJ, 910, 156, doi: 10.3847/1538-4357/abe770
+Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I.
 2015, ApJ, 811, 105, doi: 10.1088/0004-637X/811/2/105
-16 Mani et al.
-Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science,
-342, 1217, doi: 10.1126/science.1244682
-Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807
-Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard,
-M. F., & Tromp, J. 2021, ApJS, 253, 47,
-doi: 10.3847/1538-4365/abdf5e
-Korda, D., & Svanda, M. 2021, A&A, 646, A184, ˇ
-doi: 10.1051/0004-6361/202039928
-Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617,
-A97, doi: 10.1051/0004-6361/201732471
-Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581,
-A67, doi: 10.1051/0004-6361/201526024
-Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical
-Transactions of the Royal Society of London Series A,
-339, 431, doi: 10.1098/rsta.1992.0048
-Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261,
-doi: 10.1023/A:1005227200911
-L¨optien, B., Birch, A. C., Duvall, T. L., Gizon, L., &
-Schou, J. 2016, A&A, 587, A9,
-doi: 10.1051/0004-6361/201526805
-L¨optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature
-Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x
-Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125,
-doi: 10.3847/1538-4357/ab7227
-Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652,
-A96, doi: 10.1051/0004-6361/202141044
-Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139,
-doi: 10.3847/1538-4357/abb133
-—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6
-November, L. J., & Simon, G. W. 1988, ApJ, 333, 427,
-doi: 10.1086/166758
-Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231
-Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., &
-Stein, R. 2001, A&A, 377, L14,
-doi: 10.1051/0004-6361:20011160
-Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar
-Physics, 15, 6, doi: 10.1007/s41116-018-0013-5
-Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord,
-M. 2017, A&A, 599, A69,
-doi: 10.1051/0004-6361/201629747
-Schad, A., & Roth, M. 2020, ApJ, 890, 32,
-doi: 10.3847/1538-4357/ab65ec
-Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh,
-275, 207, doi: 10.1007/s11207-011-9834-2
-Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505,
-390, doi: 10.1086/306146
-Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed.
-J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389
-Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ,
-59, S637, doi: 10.1093/pasj/59.sp3.S637
-Snodgrass, H. B. 1984, SoPh, 94, 13,
-doi: 10.1007/BF00154804
-Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996,
-Science, 272, 1300, doi: 10.1126/science.272.5266.1300
-Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H.
-1989, Nonradial oscillations of stars
-Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555,
-A136, doi: 10.1051/0004-6361/201321628
-Vorontsov, S. V. 2011, MNRAS, 418, 1146,
-doi: 10.1111/j.1365-2966.2011.19564.x
-Woodard, M. 2014, SoPh, 289, 1085,
-doi: 10.1007/s11207-013-0386-5
-Woodard, M., Schou, J., Birch, A. C., & Larson, T. P.
-2013, SoPh, 287, 129, doi: 10.1007/s11207-012-0075-9
-Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206
-—. 2006, ApJ, 649, 1140, doi: 10.1086/506927
-—. 2007, ApJ, 668, 1189, doi: 10.1086/521391
-—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223
-Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007,
-ApJ, 659, 848, doi: 10.1086/512009
-Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G.,
-& Duvall, T. L., J. 2012, ApJL, 749, L5,
-doi: 10.1088/2041-8205/749/1/L5
+ Mani et al.
+Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science,
+342, 1217, doi: 10.1126/science.1244682
+Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807
+Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard,
+M. F., & Tromp, J. 2021, ApJS, 253, 47,
+doi: 10.3847/1538-4365/abdf5e
+Korda, D., & Svanda, M. 2021, A&A, 646, A184, ˇ
+doi: 10.1051/0004-6361/202039928
+Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617,
+A97, doi: 10.1051/0004-6361/201732471
+Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581,
+A67, doi: 10.1051/0004-6361/201526024
+Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical
+Transactions of the Royal Society of London Series A,
+339, 431, doi: 10.1098/rsta.1992.0048
+Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261,
+doi: 10.1023/A:1005227200911
+L¨optien, B., Birch, A. C., Duvall, T. L., Gizon, L., &
+Schou, J. 2016, A&A, 587, A9,
+doi: 10.1051/0004-6361/201526805
+L¨optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature
+Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x
+Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125,
+doi: 10.3847/1538-4357/ab7227
+Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652,
+A96, doi: 10.1051/0004-6361/202141044
+Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139,
+doi: 10.3847/1538-4357/abb133
+—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6
+November, L. J., & Simon, G. W. 1988, ApJ, 333, 427,
+doi: 10.1086/166758
+Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231
+Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., &
+Stein, R. 2001, A&A, 377, L14,
+doi: 10.1051/0004-6361:20011160
+Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar
+Physics, 15, 6, doi: 10.1007/s41116-018-0013-5
+Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord,
+M. 2017, A&A, 599, A69,
+doi: 10.1051/0004-6361/201629747
+Schad, A., & Roth, M. 2020, ApJ, 890, 32,
+doi: 10.3847/1538-4357/ab65ec
+Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh,
+275, 207, doi: 10.1007/s11207-011-9834-2
+Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505,
+390, doi: 10.1086/306146
+Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed.
+J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389
+Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ,
+59, S637, doi: 10.1093/pasj/59.sp3.S637
+Snodgrass, H. B. 1984, SoPh, 94, 13,
+doi: 10.1007/BF00154804
+Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996,
+Science, 272, 1300, doi: 10.1126/science.272.5266.1300
+Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H.
+1989, Nonradial oscillations of stars
+Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555,
+A136, doi: 10.1051/0004-6361/201321628
+Vorontsov, S. V. 2011, MNRAS, 418, 1146,
+doi: 10.1111/j.1365-2966.2011.19564.x
+Woodard, M. 2014, SoPh, 289, 1085,
+doi: 10.1007/s11207-013-0386-5
+Woodard, M., Schou, J., Birch, A. C., & Larson, T. P.
+2013, SoPh, 287, 129, doi: 10.1007/s11207-012-0075-9
+Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206
+—. 2006, ApJ, 649, 1140, doi: 10.1086/506927
+—. 2007, ApJ, 668, 1189, doi: 10.1086/521391
+—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223
+Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007,
+ApJ, 659, 848, doi: 10.1086/512009
+Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G.,
+& Duvall, T. L., J. 2012, ApJL, 749, L5,
+doi: 10.1088/2041-8205/749/1/L5
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00200.txt b/read/results/pdfium/2201.00200.txt
index 2074c3d..d1a20de 100644
--- a/read/results/pdfium/2201.00200.txt
+++ b/read/results/pdfium/2201.00200.txt
@@ -1,536 +1,527 @@
-Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022
-January 4, 2022
-Local heating due to convective overshooting and the solar
-modelling problem
-I. Baraffe
-1,2
-, T. Constantino1
-, J. Clarke1
-, A. Le Saux1,2
-, T. Goffrey4
-, T. Guillet1
-, J. Pratt3
-, D. G. Vlaykov1
-1 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk)
-2 Ecole Normale Sup ´ erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´ e de Lyon, France ´
-3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA
-4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK
-ABSTRACT
-Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary
-of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in one￾dimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a
-solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a
-local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in
-the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun
-inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the
-speed of sound. These effects could help reduce the discrepancies between solar models and observed constraints based on seismic
-inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective
-penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach.
-Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior
-1. Introduction
-Modelling the internal structure of the Sun is still a challenge.
-A recent review by Christensen-Dalsgaard (2021) describes in
-detail the long-standing efforts to improve solar models. The so￾lar modelling problem refers to the discrepancy between helio￾seismology and solar interior models that adopt low metallici￾ties predicted by the three-dimensional (3D) atmosphere models
-of, for example, Asplund et al. (2009) and Caffau et al. (2011),
-in contrast to the high metallicities based on previous litera￾ture compilations by, for example, Anders & Grevesse (1989)
-and Grevesse & Noels (1993). Asplund et al. (2021) have re￾cently confirmed with state-of-the-art 3D simulations the rela￾tively low metal abundances for the Sun. Asplund et al. (2021)
-consider that their study yields the most reliable solar abun￾dances available today, suggesting that the solar modelling prob￾lem is no longer a problem of abundances but rather a problem
-of stellar physics. The treatment of mixing below the convective
-zone is one of the key processes that could improve solar mod￾els. Several studies indeed reveal that the process of convective
-penetration, also called overshooting, at the bottom of the con￾vective envelope could play an important role in improving the
-agreement between solar models and helioseismic constraints
-(see for example Christensen-Dalsgaard et al. 2011; Zhang et al.
-2012; Buldgen et al. 2019b). Overshooting in solar models has
-most often been treated using diffusive or instantaneous chemi￾cal mixing. A temperature gradient that sharply transitions from
-a nearly adiabatic form to a radiative form is usually assumed,
-as suggested by the theoretical work of Zahn (1991). Models
-with a smoother transition have also been investigated. Based
-on the analysis of models with different stratifications near the
-Send offprint requests to: I. Baraffe
-base of the convective zone, Christensen-Dalsgaard et al. (2011)
-found that models that better fit the helioseismic data have a
-weakly sub-adiabatic temperature gradient in the lower part of
-the convective zone and a smooth transition to the radiative gra￾dient in the overshooting layer. But Christensen-Dalsgaard et al.
-(2011) noted that the required temperature stratification is diffi￾cult to reconcile with existing overshooting models and numer￾ical simulations. They concluded that only non-local turbulent
-convection models could produce the desired degree of smooth￾ness in the transition (see for example Zhang & Li 2012; Zhang
-et al. 2012). But these non-local models remain uncertain, and
-their description of overshooting under the conditions found at
-the base of the solar convective zone is yet to be validated.
-Zhang et al. (2019) explored the impact of overshooting by
-introducing a parametrised turbulent kinetic energy flux based
-on a model with parameters that are adjusted to improve the
-helioseismic properties. They suggest that amelioration can be
-obtained specifically below the convective envelope. However,
-Zhang et al. (2019) find that this model cannot solve the whole
-solar problem because such a flux worsens the sound-speed pro￾file in the deep radiative interior of their solar model. Given the
-uncertainties regarding the temperature stratification of the over￾shooting region, solar modellers have considered these effects as
-secondary and have focused their efforts on exploring the impact
-of solar abundances, microphysics (opacities, equations of state,
-nuclear reaction rates), and chemical mixing and diffusion (see
-details and references in the review of Buldgen et al. 2019a).
-Additional, more exotic effects such as early disk accretion or
-solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot
-2021) are also attracting increasing attention.
-To reinvigorate the debate, Buldgen et al. (2019b) recently
-highlighted once again how the transition of the temperature gra￾1
+Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022
+January 4, 2022
+Local heating due to convective overshooting and the solar
+modelling problem
+I. Baraffe
+1,2
+, T. Constantino1, J. Clarke1, A. Le Saux1,2, T. Goffrey4, T. Guillet1, J. Pratt3, D. G. Vlaykov1
+1 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk)
+2 Ecole Normale Sup ´ erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´ e de Lyon, France ´
+3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA
+4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK
+ABSTRACT
+Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary
+of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in onedimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a
+solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a
+local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in
+the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun
+inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the
+speed of sound. These effects could help reduce the discrepancies between solar models and observed constraints based on seismic
+inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective
+penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach.
+Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior
+1. Introduction
+Modelling the internal structure of the Sun is still a challenge.
+A recent review by Christensen-Dalsgaard (2021) describes in
+detail the long-standing efforts to improve solar models. The solar modelling problem refers to the discrepancy between helioseismology and solar interior models that adopt low metallicities predicted by the three-dimensional (3D) atmosphere models
+of, for example, Asplund et al. (2009) and Caffau et al. (2011),
+in contrast to the high metallicities based on previous literature compilations by, for example, Anders & Grevesse (1989)
+and Grevesse & Noels (1993). Asplund et al. (2021) have recently confirmed with state-of-the-art 3D simulations the relatively low metal abundances for the Sun. Asplund et al. (2021)
+consider that their study yields the most reliable solar abundances available today, suggesting that the solar modelling problem is no longer a problem of abundances but rather a problem
+of stellar physics. The treatment of mixing below the convective
+zone is one of the key processes that could improve solar models. Several studies indeed reveal that the process of convective
+penetration, also called overshooting, at the bottom of the convective envelope could play an important role in improving the
+agreement between solar models and helioseismic constraints
+(see for example Christensen-Dalsgaard et al. 2011; Zhang et al.
+2012; Buldgen et al. 2019b). Overshooting in solar models has
+most often been treated using diffusive or instantaneous chemical mixing. A temperature gradient that sharply transitions from
+a nearly adiabatic form to a radiative form is usually assumed,
+as suggested by the theoretical work of Zahn (1991). Models
+with a smoother transition have also been investigated. Based
+on the analysis of models with different stratifications near the
+Send offprint requests to: I. Baraffe
+base of the convective zone, Christensen-Dalsgaard et al. (2011)
+found that models that better fit the helioseismic data have a
+weakly sub-adiabatic temperature gradient in the lower part of
+the convective zone and a smooth transition to the radiative gradient in the overshooting layer. But Christensen-Dalsgaard et al.
+(2011) noted that the required temperature stratification is difficult to reconcile with existing overshooting models and numerical simulations. They concluded that only non-local turbulent
+convection models could produce the desired degree of smoothness in the transition (see for example Zhang & Li 2012; Zhang
+et al. 2012). But these non-local models remain uncertain, and
+their description of overshooting under the conditions found at
+the base of the solar convective zone is yet to be validated.
+Zhang et al. (2019) explored the impact of overshooting by
+introducing a parametrised turbulent kinetic energy flux based
+on a model with parameters that are adjusted to improve the
+helioseismic properties. They suggest that amelioration can be
+obtained specifically below the convective envelope. However,
+Zhang et al. (2019) find that this model cannot solve the whole
+solar problem because such a flux worsens the sound-speed profile in the deep radiative interior of their solar model. Given the
+uncertainties regarding the temperature stratification of the overshooting region, solar modellers have considered these effects as
+secondary and have focused their efforts on exploring the impact
+of solar abundances, microphysics (opacities, equations of state,
+nuclear reaction rates), and chemical mixing and diffusion (see
+details and references in the review of Buldgen et al. 2019a).
+Additional, more exotic effects such as early disk accretion or
+solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot
+2021) are also attracting increasing attention.
+To reinvigorate the debate, Buldgen et al. (2019b) recently
+highlighted once again how the transition of the temperature gra1
 arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022
-Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-dient just below the convective envelope can significantly impact
-the disagreement between solar models and helioseismic con￾straints. Their results, based on a method that combines multi￾ple structural inversions, suggest that the transition in temper￾ature gradient is improperly reproduced by adopting either an
-adiabatic or a radiative temperature gradient in the overshoot￾ing layer. The solution should be somewhere in between these
-two extremes. Christensen-Dalsgaard et al. (2018) also note that
-an increase in the temperature at the transition would remove
-a remaining small sharp dip in the speed of sound immediately
-beneath the convective zone of the model. A major difficulty is
-to disentangle the effects of overshoot from the effects of opaci￾ties, which can also alter the temperature gradient in these layers.
-Given the large number of parameters to deal with in order to im￾prove solar models and the current lack of strong arguments in
-favour of modifying the thermal stratification in the overshoot￾ing layer, there has been no real motivation to deviate from the
-traditional picture of a sharp transition as formalised by Zahn
-(1991).
-The present work is motivated by arguments inspired by hy￾drodynamical simulations of convection and convective penetra￾tion in solar-like models. Recent hydrodynamical simulations by
-Baraffe et al. (2021, hereafter B21) highlight the process of local
-heating in the overshooting region due to penetrating convective
-motions across the convective boundary. In the following, we
-analyse the potential impact of this feature on one-dimensional
-(1D) stellar evolution structures in the context of solar models.
-The hydrodynamical results of B21 are briefly summarised in
-Sect. 2, and their impact on 1D models are analysed in Sect. 3
-and discussed in Sect. 4.
-2. Modification of the thermal background in the
-overshooting layer: Results from
-two-dimensional hydrodynamical simulations
-B21 performed two-dimensional (2D) fully compressible time￾implicit simulations of convection and convective penetration in
-a solar-like model with the MUlti-dimensional Stellar Implicit
-Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017).
-The main motivation was to explore the impact of an artificial
-increase in the stellar luminosity on the properties of convection
-and convective penetration. This procedure is a common tactic
-adopted in hydrodynamical simulations of convection (Rogers
-et al. 2006; Meakin & Arnett 2007; Brun et al. 2011; Hotta 2017;
-Edelmann et al. 2019). The experiments of B21 highlight the im￾pact of penetrative downflows on the local thermal background
-in the overshooting layer. They illustrate how convective down￾flows, when penetrating the region below the convective bound￾ary of the envelope, can induce a local heating and a modification
-of the temperature gradient as a result of compression and shear
-in the overshooting layer. This modification of the local back￾ground is connected to a local increase in the radiative flux to
-counterbalance the negative enthalpy flux (or heat flux) produced
-by penetrating flows. The negative peak of the enthalpy flux
-and the positive bump of the radiative flux below the convective
-boundary are well-known features described in many numeri￾cal works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell
-et al. 2002; Brun et al. 2011; Hotta 2017; Kapyl ¨ a 2019; Cai ¨
-2020). A few works (Rogers et al. 2006; Viallet et al. 2013; Korre
-et al. 2019; Higl et al. 2021) have also reported a modification
-of the local thermal background in the overshooting region, but
-without providing a detailed description. The simulations of B21
-provide a physical explanation that links the convective penetra￾tion process to the local heating and to the radiative bump in the
-overshooting layer. The solar-like star simulated in B21 is based
-on a model that is not thermally relaxed. It is reasonable to as￾sume that the local heating seen in B21 is present in stars because
-the negative heat flux in the overshooting layer and the bump in
-the radiative flux that compensates for this feature are persistent.
-These two features are also commonly observed in other hydro￾dynamical simulations, as mentioned above. An exploration of
-the impact of this heating on stellar evolution models may reveal
-that heating is a necessary aspect of models for overshooting.
-Fig. 1. Radial profile of the temperature departure ∆T/T0 from
-the initial profile T0 and of the sub-adiabaticity (∇−∇ad) close to
-the convective boundary predicted by 2D hydrodynamical simu￾lations (B21) of solar-like models. The lower panel corresponds
-to the model with a realistic stellar luminosity and the upper
-panel to a model with luminosity enhanced by a factor of ten.
-The dash-dotted red lines show ∆T/T0 (in %), the relative dif￾ference between the time and space averages of the temperature,
-T, and the initial temperature, T0. The solid blue lines show the
-time and space averages of the sub-adiabaticity (∇ − ∇ad). The
-dashed black lines show the initial profile of the sub-adiabaticity,
-(∇−∇ad)init. The convective boundary is indicated by the vertical
-solid line (see details in B21)
-The behaviour of the thermal profile below the convective
-boundary found in the simulations of B21 is illustrated in Fig.
-1. It is displayed for the model with a realistic stellar luminosity
-(lower panel). We also show the results for a model with an artifi￾cial enhancement in the luminosity by a factor of ten because the
-features are intensified in these ‘boosted’ models (upper panel).
-The figure shows the local heating in the overshooting layer and
-its impact on the sub-adiabaticity (∇ − ∇ad), with ∇ =
-d log T
-d log P
-the
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+dient just below the convective envelope can significantly impact
+the disagreement between solar models and helioseismic constraints. Their results, based on a method that combines multiple structural inversions, suggest that the transition in temperature gradient is improperly reproduced by adopting either an
+adiabatic or a radiative temperature gradient in the overshooting layer. The solution should be somewhere in between these
+two extremes. Christensen-Dalsgaard et al. (2018) also note that
+an increase in the temperature at the transition would remove
+a remaining small sharp dip in the speed of sound immediately
+beneath the convective zone of the model. A major difficulty is
+to disentangle the effects of overshoot from the effects of opacities, which can also alter the temperature gradient in these layers.
+Given the large number of parameters to deal with in order to improve solar models and the current lack of strong arguments in
+favour of modifying the thermal stratification in the overshooting layer, there has been no real motivation to deviate from the
+traditional picture of a sharp transition as formalised by Zahn
+(1991).
+The present work is motivated by arguments inspired by hydrodynamical simulations of convection and convective penetration in solar-like models. Recent hydrodynamical simulations by
+Baraffe et al. (2021, hereafter B21) highlight the process of local
+heating in the overshooting region due to penetrating convective
+motions across the convective boundary. In the following, we
+analyse the potential impact of this feature on one-dimensional
+(1D) stellar evolution structures in the context of solar models.
+The hydrodynamical results of B21 are briefly summarised in
+Sect. 2, and their impact on 1D models are analysed in Sect. 3
+and discussed in Sect. 4.
+2. Modification of the thermal background in the
+overshooting layer: Results from
+two-dimensional hydrodynamical simulations
+B21 performed two-dimensional (2D) fully compressible timeimplicit simulations of convection and convective penetration in
+a solar-like model with the MUlti-dimensional Stellar Implicit
+Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017).
+The main motivation was to explore the impact of an artificial
+increase in the stellar luminosity on the properties of convection
+and convective penetration. This procedure is a common tactic
+adopted in hydrodynamical simulations of convection (Rogers
+et al. 2006; Meakin & Arnett 2007; Brun et al. 2011; Hotta 2017;
+Edelmann et al. 2019). The experiments of B21 highlight the impact of penetrative downflows on the local thermal background
+in the overshooting layer. They illustrate how convective downflows, when penetrating the region below the convective boundary of the envelope, can induce a local heating and a modification
+of the temperature gradient as a result of compression and shear
+in the overshooting layer. This modification of the local background is connected to a local increase in the radiative flux to
+counterbalance the negative enthalpy flux (or heat flux) produced
+by penetrating flows. The negative peak of the enthalpy flux
+and the positive bump of the radiative flux below the convective
+boundary are well-known features described in many numerical works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell
+et al. 2002; Brun et al. 2011; Hotta 2017; Kapyl ¨ a 2019; Cai ¨
+2020). A few works (Rogers et al. 2006; Viallet et al. 2013; Korre
+et al. 2019; Higl et al. 2021) have also reported a modification
+of the local thermal background in the overshooting region, but
+without providing a detailed description. The simulations of B21
+provide a physical explanation that links the convective penetration process to the local heating and to the radiative bump in the
+overshooting layer. The solar-like star simulated in B21 is based
+on a model that is not thermally relaxed. It is reasonable to assume that the local heating seen in B21 is present in stars because
+the negative heat flux in the overshooting layer and the bump in
+the radiative flux that compensates for this feature are persistent.
+These two features are also commonly observed in other hydrodynamical simulations, as mentioned above. An exploration of
+the impact of this heating on stellar evolution models may reveal
+that heating is a necessary aspect of models for overshooting.
+Fig. 1. Radial profile of the temperature departure ∆T/T0 from
+the initial profile T0 and of the sub-adiabaticity (∇−∇ad) close to
+the convective boundary predicted by 2D hydrodynamical simulations (B21) of solar-like models. The lower panel corresponds
+to the model with a realistic stellar luminosity and the upper
+panel to a model with luminosity enhanced by a factor of ten.
+The dash-dotted red lines show ∆T/T0 (in %), the relative difference between the time and space averages of the temperature,
+T, and the initial temperature, T0. The solid blue lines show the
+time and space averages of the sub-adiabaticity (∇ − ∇ad). The
+dashed black lines show the initial profile of the sub-adiabaticity,
+(∇−∇ad)init. The convective boundary is indicated by the vertical
+solid line (see details in B21)
+The behaviour of the thermal profile below the convective
+boundary found in the simulations of B21 is illustrated in Fig.
+1. It is displayed for the model with a realistic stellar luminosity
+(lower panel). We also show the results for a model with an artificial enhancement in the luminosity by a factor of ten because the
+features are intensified in these ‘boosted’ models (upper panel).
+The figure shows the local heating in the overshooting layer and
+its impact on the sub-adiabaticity (∇ − ∇ad), with ∇ =
+d log T
+d log P
+the
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+temperature gradient and ∇ad =
+d log T
+d log P
+|S the adiabatic gradient.
+The initial stratification below the convective boundary (located
+at r = 0.6734 × Rstar for this specific stellar model) is set by
+the stable radiative gradient, ∇rad (see the dashed black line below the convective boundary in Fig. 1). B21 show that, as a result of the local heating below the convective boundary characterised by the bump in temperature difference ∆T/T0 displayed
+in Fig. 1, the temperature gradient becomes less sub-adiabatic
+immediately below the convective boundary1. The net result is
+a smoother transition just below the convective boundary with
+a temperature gradient that has an intermediate value between
+the radiative temperature gradient and the adiabatic one. In the
+next section we analyse the impact of this local heating on 1D
+solar structures by adopting a simple prescription that mimics
+the behaviour of the temperature gradient suggested by hydrodynamical simulations.
+3. Impact on one-dimensional solar structure
+models
+3.1. Helioseismic constraints
+Our primary goal in this short paper is to illustrate the potential,
+qualitative impact of the local heating produced by overshooting. We adopted a strategy inspired by the analysis of Buldgen
+et al. (2020), who constructed a static structure of the Sun in
+agreement with seismic inversions of the Ledoux discriminant
+defined by
+A =
+1
+Γ1
+d ln P
+d ln r
+−
+d ln ρ
+d ln r
+, (1)
+with Γ1 = (∂ ln P/∂ ln ρ)ad. Starting from a reference evolutionary model, Buldgen et al. (2020) used an inversion procedure to iteratively reconstruct a solar model. Successive inversions of the Ledoux discriminant allowed them to obtain a
+model-independent profile for this quantity. Their reconstruction
+method also gives solar structures that are in excellent agreement with other structural inversions, namely the entropy, S , the
+square of the speed of sound, c
 2
-Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-temperature gradient and ∇ad =
-d log T
-d log P
-|S the adiabatic gradient.
-The initial stratification below the convective boundary (located
-at r = 0.6734 × Rstar for this specific stellar model) is set by
-the stable radiative gradient, ∇rad (see the dashed black line be￾low the convective boundary in Fig. 1). B21 show that, as a re￾sult of the local heating below the convective boundary charac￾terised by the bump in temperature difference ∆T/T0 displayed
-in Fig. 1, the temperature gradient becomes less sub-adiabatic
-immediately below the convective boundary1
-. The net result is
-a smoother transition just below the convective boundary with
-a temperature gradient that has an intermediate value between
-the radiative temperature gradient and the adiabatic one. In the
-next section we analyse the impact of this local heating on 1D
-solar structures by adopting a simple prescription that mimics
-the behaviour of the temperature gradient suggested by hydro￾dynamical simulations.
-3. Impact on one-dimensional solar structure
-models
-3.1. Helioseismic constraints
-Our primary goal in this short paper is to illustrate the potential,
-qualitative impact of the local heating produced by overshoot￾ing. We adopted a strategy inspired by the analysis of Buldgen
-et al. (2020), who constructed a static structure of the Sun in
-agreement with seismic inversions of the Ledoux discriminant
-defined by
-A =
-1
-Γ1
-d ln P
-d ln r
-−
-d ln ρ
-d ln r
-, (1)
-with Γ1 = (∂ ln P/∂ ln ρ)ad. Starting from a reference evolu￾tionary model, Buldgen et al. (2020) used an inversion pro￾cedure to iteratively reconstruct a solar model. Successive in￾versions of the Ledoux discriminant allowed them to obtain a
-model-independent profile for this quantity. Their reconstruction
-method also gives solar structures that are in excellent agree￾ment with other structural inversions, namely the entropy, S , the
-square of the speed of sound, c
-2
-s
-, and the density, ρ. To illustrate
-the convergence of their reconstruction procedure, they show
-(right panels of their Figs. 3-6) the successive iterations that con￾verge to an excellent level of agreement for the four structural
-inversions (A, S , c
-2
-s
-, ρ) starting from the initial reference model
-adopted in their work. The differences found between the recon￾structed model and the reference model are useful as they indi￾cate the modifications of the reference model that are required to
-converge towards a solar model in agreement with helioseismic
-data. We recall here the major trends found by Buldgen et al.
-(2020) for the four structural quantities, which are used for our
-analysis in Sect. 3.2.
-The first concerns the Ledoux discriminant. The major dis￾crepancy between the Sun and the reference model occurs just
-below the convective boundary, with a large positive bump for
-the quantity (ASun - Aref).
-The second concerns the speed of sound. The same positive
-bump at the same location as for the Ledoux discriminant, A, is
-observed for the quantity (c
-2
-s,Sun − c
-2
-s,ref)/c
-2
-s,ref. The corrections
-applied to A during the reconstruction procedure also reduce the
-discrepancy in the speed of sound in the radiative region.
-The third concerns the entropy. Large discrepancies are ob￾served in both the radiative region and the convective zone. The
-1 Less sub-adiabatic means that |∇ − ∇ad| decreases compared to the
-initial profile.
-entropy discrepancy (S Sun − S ref)/S ref has two positive peaks in
-the radiative zone, one just below the overshooting region and a
-larger peak deeper at ∼ 40% of the stellar radius. This discrep￾ancy is negative in the convective zone. The corrections applied
-to A help reduce these entropy discrepancies in both regions.
-The fourth concerns the density. The quantity (ρSun −
-ρref)/ρref has a negative peak in the radiative region, at ∼ 35%
-of the stellar radius, and is positive in the convective zone.
-Importantly, Buldgen et al. (2020) mention that their recon￾struction procedure gives similar Ledoux discriminant profiles
-for a wide range of initial reference models. We used these re￾sults to gauge whether the modifications of the thermal profile
-predicted by B21 can help in qualitatively improving all the
-structural quantities used by Buldgen et al. (2020).
-3.2. Testing one-dimensional solar models
-Our main motivation is to show the potential impact of the local
-heating described in Sect. 2 on stellar models. We are not aim￾ing in this short work at constructing the best solar model to fit
-helioseismic constraints. Using stellar evolution codes, we have
-adopted two different methods that can be found in the litera￾ture to construct solar models (e.g. Zhang et al. 2012; Vinyoles
-et al. 2017). Our first method relies on the thermal relaxation
-of a reference model with solar radius and luminosity that is
-modified to reproduce the temperature gradient in the overshoot￾ing layer suggested by hydrodynamical simulations. In this case,
-the chemical abundances are not modified by nuclear reactions,
-mixing, or microscopic diffusion during the relaxation process.
-For these tests, we used the 1D Lyon stellar evolution code
-(Baraffe et al. 1998). We repeated this experiment based on ther￾mal relaxation with the stellar evolution code MONSTAR (e.g.
-Constantino et al. 2014) and obtained the same qualitative re￾sults.
-The second method considers models that account for the
-modification of the temperature gradient in the overshooting
-layer from the zero age main sequence (ZAMS). The models
-are then evolved until they reach the solar radius and luminosity.
-With this approach, changes in the chemical abundances from
-nuclear reactions, microscopic diffusion, and overshooting mix￾ing are also consistent with any modification of the structure
-induced by the forced local heating in the overshooting layer.
-These tests were performed with MONSTAR as it includes the
-treatment of microscopic diffusion.
-The first method allows the impact of local heating in
-the overshooting layer after thermal relaxation to be isolated.
-The second method provides evolutionary models that are self￾consistent since the effect of the modification of the temperature
-gradient is accounted for during their evolution on the main se￾quence.
-In the following, we adopt a modification of the local temper￾ature gradient in the overshooting layer that qualitatively repro￾duces the behaviour displayed in Fig. 1. We define an overshoot￾ing length dov = αovHP,CB, with HP,CB the pressure scale height
-at the convective boundary and αov a free parameter. We also de￾fine two radial locations, rov = rCB − dov and rmid = rCB − dov/2,
-with rCB the radial location of the convective boundary. The tem￾perature gradient is modified as follows. For rmid ≤ r < rCB, we
-use
-∇ = g(r)∇ad + (1 − g(r))∇rad, (2)
-with
-g(r) = sin{[(r − rmid)/(rCB − rmid)]a × π/2}. (3)
-3
-Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-For rov ≤ r < rmid, we use
-∇ = ∇rad − h(r)∇ad, (4)
-with
-h(r) = b × sin{[(rmid − r)/(rmid − rov)] × π}. (5)
-Sine functions are used in Eqs. (3) and (5) to reproduce the
-smooth variations in the temperature gradient below the convec￾tive boundary produced by the hydrodynamical simulations. We
-have verified that the results are insensitive to the smoothness of
-these variations and to the exact shape of the temperature gra￾dient radial profile.We adopted a=0.3 in Eq. (3) as it provides a
-behaviour for the temperature gradient very close to the one dis￾played in Fig. 1. Results are rather insensitive to variations in the
-values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5),
-which also provides a close visual match to the hydrodynamical
-results, but we note that the results are insensitive to the value of
-b.
-3.2.1. Thermal equilibrium models
-The details of the procedure for the first method are the follow￾ing. We calculate the evolution of a 1 M model with an initial
-helium mass fraction of 0.28, metallicity Z = 0.02, and a mix￾ing length lmix = 1.9HP. We use a reference model that is in
-thermal equilibrium2
-and has the luminosity and radius of the
-current Sun. Starting from this reference model, the tempera￾ture gradient is modified over a prescribed depth to mimic the
-impact of overshooting according to the hydrodynamical sim￾ulations described in Sect. 2. We adopt the prescription given
-by Eqs. (2)-(5) over a distance dov below the convective bound￾ary. We show the results in Fig. 2 for αov = 0.15 and αov= 0.20.
-These overshooting widths are in good agreement with the maxi￾mal depth reached by downflows below the convective boundary
-predicted by the hydrodynamical simulations for the solar-like
-model investigated in B21. We note that the stellar model used
-in B21 is slightly under-luminous compared to the Sun (see B21
-for details). B21 also mention that one should be cautious when
-directly applying the overshooting depths predicted by their sim￾ulations to real stars since the final relaxed state for these simula￾tions may have different properties from non-thermally relaxed
-states. We varied αov between 0.15 and 0.35 and find that the
-results do not change qualitatively. However, the amplitude of
-the variations in the model properties depends on dov (see be￾low). As shown below, this simple prescription implemented in
-a stellar evolution code yields a local increase in the tempera￾ture below the convective boundary, similar to that observed in
-the hydrodynamical simulations. We stress that Eqs. (2)-(5) have
-been chosen for simplicity. They are only a rough approximation
-that can mimic the thermal profile behaviour suggested in the 2D
-simulations.
-The model with a modified temperature gradient is then ther￾mally relaxed, that is to say, it is evolved over many thermal
-timescales without any modification of the abundances from nu￾clear reactions until thermal equilibrium is reached. The temper￾ature gradient is modified in the overshooting layer during the
-whole relaxation process, and this is referred to as a ‘forced local
-heating’. This procedure ensures that the model with a modified
-temperature gradient can be consistently compared to the refer￾ence model. As shown in Fig. 2, the simple prescription given
-2 Thermal equilibrium means that the total nuclear energy produced
-in the central regions balances the radiative losses at the surface, i.e. the
-total nuclear luminosity, Lnuc, equals the total stellar luminosity, L.
-by Eqs. (2)-(5) yields similar qualitative changes in the temper￾ature and the sub-adiabaticity close to the convective boundary
-that was found in the hydrodynamical simulations of B21.
-Fig. 2. Radial profile of the temperature difference and of the
-sub-adiabaticity of a 1D solar-like structure with a modified tem￾perature gradient in the overshooting layer according to Eqs.
-(2)-(5). The temperature gradient is modified over a distance
-dov = αovHP,CB, with αov=0.15 in the lower panel and αov=0.20
-in the upper panel. The dash-dotted red lines show the percent￾age relative temperature difference, ∆T/Tref, with ∆T = T −Tref.
-The solid blue lines correspond to the sub-adiabaticity (∇−∇ad).
-The dashed black lines show the sub-adiabaticity of the refer￾ence model. The convective boundary is indicated by the vertical
-solid line. The vertical dashed line in each panel is located at a
-distance dov below the convective boundary.
-The impact on the whole stellar structure was quantified by
-comparing the four structural quantities (A, S , c
-2
-s
-, ρ) between the
-modified and the reference model. The results are displayed in
-Fig. 3, with ∆X defined as (X−Xref) for any structural quantity X.
-The forced local heating in the overshooting layer produces sim￾ilar positive peaks for ∆A, ∆S , and ∆c
-2
-s
-, as found for the temper￾ature. The modification thus provides the correction required to
-improve the discrepancy for the Ledoux discriminant described
-in the first of the trends outlined in Sect. 3.1. Unsurprisingly,
-such a modification of the temperature gradient is expected to
-improve the agreement with helioseismic constraints and help
-4
-Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-remove the sound speed anomaly below the convective bound￾ary (second trend in Sect. 3.1), as suggested by the results of
-Christensen-Dalsgaard et al. (2011). But it is also interesting to
-note that such a modification yields a slight cooling of the con￾vective zone (see Fig. 2) and thus a negative difference for the
-entropy (see Fig. 3). A negative difference in the convective en￾velope is in agreement with the correction required for the ref￾erence model of Buldgen et al. (2020) to better match the Sun
-(see third trend in Sect. 3.1). Regarding the density, the modifi￾cation of the temperature gradient has an interesting impact in
-the radiative zone, with a large decrease in the density compared
-to the reference model over a broad region below the convective
-boundary. The impact on the density in the convective region for
-this specific model is partly in agreement with the correction re￾quired for this quantity in the Buldgen et al. (2020) study, with a
-positive difference found only in the upper part of the convective
-envelope (see the fourth trend in Sect. 3.1).
-These trends are insensitive to the depth over which the tem￾perature gradient is modified. Increasing the depth increases the
-magnitude of the differences but has no impact on their sign. We
-find that the maximum variation in the model properties, such as
-the speed of sound, ∆c
-2
-s
-/c
-2
-s,ref, roughly scales with d
-2
-ov. This scal￾ing is linked to the integrated area between the modified temper￾ature gradient curve and the one for the reference (non-modified)
-temperature gradient, which roughly decreases linearly with r.
-This area is proportional to the square of the overshooting depth,
-and consequently, the maximum variation in the model proper￾ties is also proportional to d
-2
-ov. The qualitative trends also remain
-the same whether overshooting mixing in the reference model
-is ignored or included using a step function (with instantaneous
-mixing) or an exponential decay for the diffusion coefficient (e.g.
-Freytag et al. 1996).
-3.2.2. Self-consistent evolutionary models
-For the tests based on the second method, we ran different sets
-of models with different combinations of assumptions, including
-or not microscopic diffusion and with or without overshooting
-mixing. When overshooting mixing was included in the over￾shooting layer, it was based either on a step function or on an
-exponential decay for the diffusion coefficient. Microscopic dif￾fusion for H and He was implemented according to Thoul et al.
-(1994). For these tests, the temperature gradient was modified
-according to Eqs. (2)-(5). All models start from the ZAMS and
-are evolved until they reach the solar radius and luminosity at the
-same age. This was achieved by making small adjustments to the
-mixing length, lmix. The models with temperature gradient mod￾ifications were compared to the relevant reference model, which
-has no modification of the temperature gradient but everything
-else is the same (i.e. the same treatment of microscopic diffu￾sion and of overshooting mixing). The evolutionary models with
-temperature gradient modifications are thus self-consistent. The
-main difference between this approach and the one in the previ￾ous section is that these models accumulate small differences in,
-for example, central H abundance when compared to their ref￾erence model. These tests produce the same trends in the over￾shooting layer as found for the tests based on the first method
-(Sect. 3.2.1), independently of the treatment of overshooting
-mixing and whether microscopic diffusion is included or not.
-In the convective zone, all models give a positive difference for
-the density between the model with a modified temperature gra￾dient and the relevant reference model. For the other quantities
-(S , c
-2
-s
-), the differences in the convective zone are very sensitive
-Fig. 3. Difference of various structural quantities between a
-model with a modified temperature gradient in the overshoot￾ing layer and a reference model calculated with the Lyon stellar
-evolution code. The temperature gradient in the modified model
-is changed over a distance dov = αovHP,CB below the convec￾tive boundary (indicated by the vertical solid line). The lower
-panel shows the results for αov = 0.15 and the upper panel for
-αov = 0.20.
-to the assumptions regarding whether overshooting mixing is in￾cluded or not. But at least we find solutions that are compatible
-with the four trends found by Buldgen et al. (2020) for the four
-structural quantities. This is illustrated in Fig. 4 with a model
-that accounts for step function overshooting mixing over a dis￾tance dov = 0.15HP,CB (lower panel) and dov = 0.20HP,CB (upper
-panel).
-4. Conclusion
-The tests performed in Sect. 3 are based on different methods
-(relaxed models versus consistent evolution) that can be used to
-construct solar models. Independently of the method used, the
-tests show that a local increase in the temperature in the over￾shooting region due to convective penetration provides the quali￾tative effects required to improve the speed of sound discrepancy
-below the convective boundary. This discrepancy is persistent in
-5
-Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-Fig. 4. Difference of various structural quantities between a
-modified model and a reference model calculated with the
-MONSTAR stellar evolution code. The reference model is
-evolved from the ZAMS with microscopic diffusion and step
-function overshooting mixing over a distance dov = αovHP,CB be￾low the convective boundary. The lower panel shows the results
-for αov = 0.15 and the upper panel for αov = 0.20. The models
-with a modified temperature gradient in the overshooting layer
-(same microscopic diffusion and overshooting mixing treatment
-as the reference model) are evolved similarly from the ZAMS.
-The convective boundary is indicated by the vertical solid line.
-solar models that use low solar metal abundances. This is not
-surprising because an increase in the temperature in this spe￾cific region has previously been invoked in the literature to solve
-this problem, as mentioned in Sect. 1. However, the details of
-the physical process responsible for this local heating have been
-lacking, whereas we can now suggest an explanation based on
-the B21 results. The trends that we find for the four structural
-quantities (A, S , c
-2
-s
-, ρ) are robust below the convective bound￾ary and in a large fraction of the radiative core, independently of
-the treatment of mixing and diffusion and of the method for con￾structing the models in Sects. 3.2.1 and 3.2.2. Our experiments
-additionally show that such a local change in the temperature,
-despite being made over a very limited region below the convec￾tive boundary, can also affect the density, the entropy, and the
-speed of sound in the convective envelope after thermal relax￾ation or evolution on the main sequence. How these quantities
-are affected in the convective envelope compared to a reference
-model with no local heating depends on the strategy for building
-solar models and on the treatment of overshooting mixing. This
-mixing is obviously linked to the local heating given that both
-result from the same dynamical process. A combined testing of
-both effects in stellar models could provide more constraints on
-the general process of overshooting.
-Increasingly, efforts are now devoted to characterising the
-process of convective boundary mixing in stellar models based
-on multi-dimensional hydrodynamical simulations. More work
-is required to obtain reliable determinations of an overshooting
-depth and to describe quantitatively the mixing and impact on
-the temperature gradient. Understanding the effects of rotation
-and magnetic fields on overshooting is a significantly more dif￾ficult theoretical and numerical problem to address; however,
-efforts to study these combined non-linear effects are ongoing
-(Hotta 2017; Korre et al. 2021). Despite the limitations of ex￾isting hydrodynamical simulations, they are already providing
-constraints on physical processes usually treated with several
-free parameters in 1D stellar evolution models. They can thus
-limit the degrees of freedom in a problem as complex as so￾lar modelling. Our primary goal in this work is to highlight the
-potential impact of convective penetration on the thermal back￾ground in the overshooting region. The processes studied in B21
-that produce a local change in the temperature gradient are also
-responsible for the mixing in this region. Because much observa￾tional evidence points towards the need for extra mixing at con￾vective boundaries, for example lithium depletion in solar-like
-stars (Baraffe et al. 2017), the size of convective cores (Claret
-& Torres 2016), and colour-magnitude diagrams (Castro et al.
-2014), solar modellers often include this extra mixing in their
-models. But a consistent approach should also require account￾ing for a local change in the temperature gradient. The impact of
-this local heating goes in the right direction to improve not only
-the discrepancies of solar models below the convective bound￾ary, but also in the convective envelope. This effect offers an in￾teresting step forward for solving the solar modelling problem.
-In this exploratory work, we adopt a simple prescription for the
-local heating in the overshooting layer since the main goal is
-to highlight its qualitative impact on stellar models. However,
-this effect should not be considered as another free parameter in
-the solar modelling problem. Future multi-dimensional hydro￾dynamical simulations will enable this process, and its treatment
-in 1D stellar evolution codes, to be better constrained.
-5. Acknowledgements
-We thank our anonymous referee for valuable comments which
-helped improving the manuscript. This work is supported by the
-ERC grant No. 787361-COBOM and the consolidated STFC
-grant ST/R000395/1. IB thanks the Max Planck Institut fur¨
-Astrophysics (Garching) for warm hospitality during completion
-of part of this work. The authors would like to acknowledge the
-use of the University of Exeter High-Performance Computing
-(HPC) facility ISCA and of the DiRAC Data Intensive service
-at Leicester, operated by the University of Leicester IT Services,
-which forms part of the STFC DiRAC HPC Facility. The equip￾ment was funded by BEIS capital funding via STFC capital
-grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC
-Operations grant ST/R001014/1. DiRAC is part of the National
-e-Infrastructure.
-6
-Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-References
-Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197
-Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141
-Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481
-Baraffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403
-Baraffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6
-Baraffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126
-Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825
-Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79
-Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36
-Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space
-Sciences, 6, 42
-Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33
-Caffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011,
-Sol. Phys., 268, 255
-Cai, T. 2020, ApJ, 888, 46
-Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13
-Christensen-Dalsgaard, J. 2021, Living Reviews in Solar Physics, 18, 2
-Christensen-Dalsgaard, J., Gough, D. O., & Knudstrup, E. 2018, MNRAS, 477,
-3845
-Christensen-Dalsgaard, J., Monteiro, M. J. P. F. G., Rempel, M., & Thompson,
-M. J. 2011, MNRAS, 414, 1158
-Claret, A. & Torres, G. 2016, A&A, 592, A15
-Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56
-Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4
-Freytag, B., Ludwig, H. G., & Steffen, M. 1996, A&A, 313, 497
-Goffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7
-Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed.
-N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25
-Higl, J., Muller, E., & Weiss, A. 2021, A&A, 646, A133 ¨
-Hotta, H. 2017, ApJ, 843, 52
-Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563
-Kapyl ¨ a, P. J. 2019, A&A, 631, A122 ¨
-Korre, L., Brummell, N., Garaud, P., & Guervilly, C. 2021, MNRAS, 503, 362
-Korre, L., Garaud, P., & Brummell, N. H. 2019, MNRAS, 484, 1220
-Kunitomo, M. & Guillot, T. 2021, arXiv e-prints, arXiv:2109.06492
-Meakin, C. A. & Arnett, D. 2007, ApJ, 667, 448
-Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A,
-293, 127
-Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765
-Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828
-Viallet, M., Baraffe, I., & Walder, R. 2011, A&A, 531, A86
-Viallet, M., Goffrey, T., Baraffe, I., et al. 2016, A&A, 586, A153
-Viallet, M., Meakin, C., Arnett, D., & Mocak, M. 2013, ApJ, 769, 1 ´
-Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202
-Zahn, J. P. 1991, A&A, 252, 179
-Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759,
-L14
-Zhang, Q. S. & Li, Y. 2012, ApJ, 746, 50
-Zhang, Q.-S., Li, Y., & Christensen-Dalsgaard, J. 2019, ApJ, 881, 103
-7
+s
+, and the density, ρ. To illustrate
+the convergence of their reconstruction procedure, they show
+(right panels of their Figs. 3-6) the successive iterations that converge to an excellent level of agreement for the four structural
+inversions (A, S , c
+2
+s
+, ρ) starting from the initial reference model
+adopted in their work. The differences found between the reconstructed model and the reference model are useful as they indicate the modifications of the reference model that are required to
+converge towards a solar model in agreement with helioseismic
+data. We recall here the major trends found by Buldgen et al.
+(2020) for the four structural quantities, which are used for our
+analysis in Sect. 3.2.
+The first concerns the Ledoux discriminant. The major discrepancy between the Sun and the reference model occurs just
+below the convective boundary, with a large positive bump for
+the quantity (ASun - Aref).
+The second concerns the speed of sound. The same positive
+bump at the same location as for the Ledoux discriminant, A, is
+observed for the quantity (c
+2
+s,Sun − c
+2
+s,ref)/c
+2
+s,ref. The corrections
+applied to A during the reconstruction procedure also reduce the
+discrepancy in the speed of sound in the radiative region.
+The third concerns the entropy. Large discrepancies are observed in both the radiative region and the convective zone. The
+1 Less sub-adiabatic means that |∇ − ∇ad| decreases compared to the
+initial profile.
+entropy discrepancy (S Sun − S ref)/S ref has two positive peaks in
+the radiative zone, one just below the overshooting region and a
+larger peak deeper at ∼ 40% of the stellar radius. This discrepancy is negative in the convective zone. The corrections applied
+to A help reduce these entropy discrepancies in both regions.
+The fourth concerns the density. The quantity (ρSun −
+ρref)/ρref has a negative peak in the radiative region, at ∼ 35%
+of the stellar radius, and is positive in the convective zone.
+Importantly, Buldgen et al. (2020) mention that their reconstruction procedure gives similar Ledoux discriminant profiles
+for a wide range of initial reference models. We used these results to gauge whether the modifications of the thermal profile
+predicted by B21 can help in qualitatively improving all the
+structural quantities used by Buldgen et al. (2020).
+3.2. Testing one-dimensional solar models
+Our main motivation is to show the potential impact of the local
+heating described in Sect. 2 on stellar models. We are not aiming in this short work at constructing the best solar model to fit
+helioseismic constraints. Using stellar evolution codes, we have
+adopted two different methods that can be found in the literature to construct solar models (e.g. Zhang et al. 2012; Vinyoles
+et al. 2017). Our first method relies on the thermal relaxation
+of a reference model with solar radius and luminosity that is
+modified to reproduce the temperature gradient in the overshooting layer suggested by hydrodynamical simulations. In this case,
+the chemical abundances are not modified by nuclear reactions,
+mixing, or microscopic diffusion during the relaxation process.
+For these tests, we used the 1D Lyon stellar evolution code
+(Baraffe et al. 1998). We repeated this experiment based on thermal relaxation with the stellar evolution code MONSTAR (e.g.
+Constantino et al. 2014) and obtained the same qualitative results.
+The second method considers models that account for the
+modification of the temperature gradient in the overshooting
+layer from the zero age main sequence (ZAMS). The models
+are then evolved until they reach the solar radius and luminosity.
+With this approach, changes in the chemical abundances from
+nuclear reactions, microscopic diffusion, and overshooting mixing are also consistent with any modification of the structure
+induced by the forced local heating in the overshooting layer.
+These tests were performed with MONSTAR as it includes the
+treatment of microscopic diffusion.
+The first method allows the impact of local heating in
+the overshooting layer after thermal relaxation to be isolated.
+The second method provides evolutionary models that are selfconsistent since the effect of the modification of the temperature
+gradient is accounted for during their evolution on the main sequence.
+In the following, we adopt a modification of the local temperature gradient in the overshooting layer that qualitatively reproduces the behaviour displayed in Fig. 1. We define an overshooting length dov = αovHP,CB, with HP,CB the pressure scale height
+at the convective boundary and αov a free parameter. We also define two radial locations, rov = rCB − dov and rmid = rCB − dov/2,
+with rCB the radial location of the convective boundary. The temperature gradient is modified as follows. For rmid ≤ r < rCB, we
+use
+∇ = g(r)∇ad + (1 − g(r))∇rad, (2)
+with
+g(r) = sin{[(r − rmid)/(rCB − rmid)]a × π/2}. (3)
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+For rov ≤ r < rmid, we use
+∇ = ∇rad − h(r)∇ad, (4)
+with
+h(r) = b × sin{[(rmid − r)/(rmid − rov)] × π}. (5)
+Sine functions are used in Eqs. (3) and (5) to reproduce the
+smooth variations in the temperature gradient below the convective boundary produced by the hydrodynamical simulations. We
+have verified that the results are insensitive to the smoothness of
+these variations and to the exact shape of the temperature gradient radial profile.We adopted a=0.3 in Eq. (3) as it provides a
+behaviour for the temperature gradient very close to the one displayed in Fig. 1. Results are rather insensitive to variations in the
+values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5),
+which also provides a close visual match to the hydrodynamical
+results, but we note that the results are insensitive to the value of
+b.
+3.2.1. Thermal equilibrium models
+The details of the procedure for the first method are the following. We calculate the evolution of a 1 M model with an initial
+helium mass fraction of 0.28, metallicity Z = 0.02, and a mixing length lmix = 1.9HP. We use a reference model that is in
+thermal equilibrium2and has the luminosity and radius of the
+current Sun. Starting from this reference model, the temperature gradient is modified over a prescribed depth to mimic the
+impact of overshooting according to the hydrodynamical simulations described in Sect. 2. We adopt the prescription given
+by Eqs. (2)-(5) over a distance dov below the convective boundary. We show the results in Fig. 2 for αov = 0.15 and αov= 0.20.
+These overshooting widths are in good agreement with the maximal depth reached by downflows below the convective boundary
+predicted by the hydrodynamical simulations for the solar-like
+model investigated in B21. We note that the stellar model used
+in B21 is slightly under-luminous compared to the Sun (see B21
+for details). B21 also mention that one should be cautious when
+directly applying the overshooting depths predicted by their simulations to real stars since the final relaxed state for these simulations may have different properties from non-thermally relaxed
+states. We varied αov between 0.15 and 0.35 and find that the
+results do not change qualitatively. However, the amplitude of
+the variations in the model properties depends on dov (see below). As shown below, this simple prescription implemented in
+a stellar evolution code yields a local increase in the temperature below the convective boundary, similar to that observed in
+the hydrodynamical simulations. We stress that Eqs. (2)-(5) have
+been chosen for simplicity. They are only a rough approximation
+that can mimic the thermal profile behaviour suggested in the 2D
+simulations.
+The model with a modified temperature gradient is then thermally relaxed, that is to say, it is evolved over many thermal
+timescales without any modification of the abundances from nuclear reactions until thermal equilibrium is reached. The temperature gradient is modified in the overshooting layer during the
+whole relaxation process, and this is referred to as a ‘forced local
+heating’. This procedure ensures that the model with a modified
+temperature gradient can be consistently compared to the reference model. As shown in Fig. 2, the simple prescription given
+2 Thermal equilibrium means that the total nuclear energy produced
+in the central regions balances the radiative losses at the surface, i.e. the
+total nuclear luminosity, Lnuc, equals the total stellar luminosity, L.
+by Eqs. (2)-(5) yields similar qualitative changes in the temperature and the sub-adiabaticity close to the convective boundary
+that was found in the hydrodynamical simulations of B21.
+Fig. 2. Radial profile of the temperature difference and of the
+sub-adiabaticity of a 1D solar-like structure with a modified temperature gradient in the overshooting layer according to Eqs.
+(2)-(5). The temperature gradient is modified over a distance
+dov = αovHP,CB, with αov=0.15 in the lower panel and αov=0.20
+in the upper panel. The dash-dotted red lines show the percentage relative temperature difference, ∆T/Tref, with ∆T = T −Tref.
+The solid blue lines correspond to the sub-adiabaticity (∇−∇ad).
+The dashed black lines show the sub-adiabaticity of the reference model. The convective boundary is indicated by the vertical
+solid line. The vertical dashed line in each panel is located at a
+distance dov below the convective boundary.
+The impact on the whole stellar structure was quantified by
+comparing the four structural quantities (A, S , c
+2
+s
+, ρ) between the
+modified and the reference model. The results are displayed in
+Fig. 3, with ∆X defined as (X−Xref) for any structural quantity X.
+The forced local heating in the overshooting layer produces similar positive peaks for ∆A, ∆S , and ∆c
+2
+s
+, as found for the temperature. The modification thus provides the correction required to
+improve the discrepancy for the Ledoux discriminant described
+in the first of the trends outlined in Sect. 3.1. Unsurprisingly,
+such a modification of the temperature gradient is expected to
+improve the agreement with helioseismic constraints and help
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+remove the sound speed anomaly below the convective boundary (second trend in Sect. 3.1), as suggested by the results of
+Christensen-Dalsgaard et al. (2011). But it is also interesting to
+note that such a modification yields a slight cooling of the convective zone (see Fig. 2) and thus a negative difference for the
+entropy (see Fig. 3). A negative difference in the convective envelope is in agreement with the correction required for the reference model of Buldgen et al. (2020) to better match the Sun
+(see third trend in Sect. 3.1). Regarding the density, the modification of the temperature gradient has an interesting impact in
+the radiative zone, with a large decrease in the density compared
+to the reference model over a broad region below the convective
+boundary. The impact on the density in the convective region for
+this specific model is partly in agreement with the correction required for this quantity in the Buldgen et al. (2020) study, with a
+positive difference found only in the upper part of the convective
+envelope (see the fourth trend in Sect. 3.1).
+These trends are insensitive to the depth over which the temperature gradient is modified. Increasing the depth increases the
+magnitude of the differences but has no impact on their sign. We
+find that the maximum variation in the model properties, such as
+the speed of sound, ∆c
+2
+s
+/c
+2
+s,ref, roughly scales with d
+2
+ov. This scaling is linked to the integrated area between the modified temperature gradient curve and the one for the reference (non-modified)
+temperature gradient, which roughly decreases linearly with r.
+This area is proportional to the square of the overshooting depth,
+and consequently, the maximum variation in the model properties is also proportional to d
+2
+ov. The qualitative trends also remain
+the same whether overshooting mixing in the reference model
+is ignored or included using a step function (with instantaneous
+mixing) or an exponential decay for the diffusion coefficient (e.g.
+Freytag et al. 1996).
+3.2.2. Self-consistent evolutionary models
+For the tests based on the second method, we ran different sets
+of models with different combinations of assumptions, including
+or not microscopic diffusion and with or without overshooting
+mixing. When overshooting mixing was included in the overshooting layer, it was based either on a step function or on an
+exponential decay for the diffusion coefficient. Microscopic diffusion for H and He was implemented according to Thoul et al.
+(1994). For these tests, the temperature gradient was modified
+according to Eqs. (2)-(5). All models start from the ZAMS and
+are evolved until they reach the solar radius and luminosity at the
+same age. This was achieved by making small adjustments to the
+mixing length, lmix. The models with temperature gradient modifications were compared to the relevant reference model, which
+has no modification of the temperature gradient but everything
+else is the same (i.e. the same treatment of microscopic diffusion and of overshooting mixing). The evolutionary models with
+temperature gradient modifications are thus self-consistent. The
+main difference between this approach and the one in the previous section is that these models accumulate small differences in,
+for example, central H abundance when compared to their reference model. These tests produce the same trends in the overshooting layer as found for the tests based on the first method
+(Sect. 3.2.1), independently of the treatment of overshooting
+mixing and whether microscopic diffusion is included or not.
+In the convective zone, all models give a positive difference for
+the density between the model with a modified temperature gradient and the relevant reference model. For the other quantities
+(S , c
+2
+s
+), the differences in the convective zone are very sensitive
+Fig. 3. Difference of various structural quantities between a
+model with a modified temperature gradient in the overshooting layer and a reference model calculated with the Lyon stellar
+evolution code. The temperature gradient in the modified model
+is changed over a distance dov = αovHP,CB below the convective boundary (indicated by the vertical solid line). The lower
+panel shows the results for αov = 0.15 and the upper panel for
+αov = 0.20.
+to the assumptions regarding whether overshooting mixing is included or not. But at least we find solutions that are compatible
+with the four trends found by Buldgen et al. (2020) for the four
+structural quantities. This is illustrated in Fig. 4 with a model
+that accounts for step function overshooting mixing over a distance dov = 0.15HP,CB (lower panel) and dov = 0.20HP,CB (upper
+panel).
+4. Conclusion
+The tests performed in Sect. 3 are based on different methods
+(relaxed models versus consistent evolution) that can be used to
+construct solar models. Independently of the method used, the
+tests show that a local increase in the temperature in the overshooting region due to convective penetration provides the qualitative effects required to improve the speed of sound discrepancy
+below the convective boundary. This discrepancy is persistent in
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+Fig. 4. Difference of various structural quantities between a
+modified model and a reference model calculated with the
+MONSTAR stellar evolution code. The reference model is
+evolved from the ZAMS with microscopic diffusion and step
+function overshooting mixing over a distance dov = αovHP,CB below the convective boundary. The lower panel shows the results
+for αov = 0.15 and the upper panel for αov = 0.20. The models
+with a modified temperature gradient in the overshooting layer
+(same microscopic diffusion and overshooting mixing treatment
+as the reference model) are evolved similarly from the ZAMS.
+The convective boundary is indicated by the vertical solid line.
+solar models that use low solar metal abundances. This is not
+surprising because an increase in the temperature in this specific region has previously been invoked in the literature to solve
+this problem, as mentioned in Sect. 1. However, the details of
+the physical process responsible for this local heating have been
+lacking, whereas we can now suggest an explanation based on
+the B21 results. The trends that we find for the four structural
+quantities (A, S , c
+2
+s
+, ρ) are robust below the convective boundary and in a large fraction of the radiative core, independently of
+the treatment of mixing and diffusion and of the method for constructing the models in Sects. 3.2.1 and 3.2.2. Our experiments
+additionally show that such a local change in the temperature,
+despite being made over a very limited region below the convective boundary, can also affect the density, the entropy, and the
+speed of sound in the convective envelope after thermal relaxation or evolution on the main sequence. How these quantities
+are affected in the convective envelope compared to a reference
+model with no local heating depends on the strategy for building
+solar models and on the treatment of overshooting mixing. This
+mixing is obviously linked to the local heating given that both
+result from the same dynamical process. A combined testing of
+both effects in stellar models could provide more constraints on
+the general process of overshooting.
+Increasingly, efforts are now devoted to characterising the
+process of convective boundary mixing in stellar models based
+on multi-dimensional hydrodynamical simulations. More work
+is required to obtain reliable determinations of an overshooting
+depth and to describe quantitatively the mixing and impact on
+the temperature gradient. Understanding the effects of rotation
+and magnetic fields on overshooting is a significantly more difficult theoretical and numerical problem to address; however,
+efforts to study these combined non-linear effects are ongoing
+(Hotta 2017; Korre et al. 2021). Despite the limitations of existing hydrodynamical simulations, they are already providing
+constraints on physical processes usually treated with several
+free parameters in 1D stellar evolution models. They can thus
+limit the degrees of freedom in a problem as complex as solar modelling. Our primary goal in this work is to highlight the
+potential impact of convective penetration on the thermal background in the overshooting region. The processes studied in B21
+that produce a local change in the temperature gradient are also
+responsible for the mixing in this region. Because much observational evidence points towards the need for extra mixing at convective boundaries, for example lithium depletion in solar-like
+stars (Baraffe et al. 2017), the size of convective cores (Claret
+& Torres 2016), and colour-magnitude diagrams (Castro et al.
+2014), solar modellers often include this extra mixing in their
+models. But a consistent approach should also require accounting for a local change in the temperature gradient. The impact of
+this local heating goes in the right direction to improve not only
+the discrepancies of solar models below the convective boundary, but also in the convective envelope. This effect offers an interesting step forward for solving the solar modelling problem.
+In this exploratory work, we adopt a simple prescription for the
+local heating in the overshooting layer since the main goal is
+to highlight its qualitative impact on stellar models. However,
+this effect should not be considered as another free parameter in
+the solar modelling problem. Future multi-dimensional hydrodynamical simulations will enable this process, and its treatment
+in 1D stellar evolution codes, to be better constrained.
+5. Acknowledgements
+We thank our anonymous referee for valuable comments which
+helped improving the manuscript. This work is supported by the
+ERC grant No. 787361-COBOM and the consolidated STFC
+grant ST/R000395/1. IB thanks the Max Planck Institut fur¨
+Astrophysics (Garching) for warm hospitality during completion
+of part of this work. The authors would like to acknowledge the
+use of the University of Exeter High-Performance Computing
+(HPC) facility ISCA and of the DiRAC Data Intensive service
+at Leicester, operated by the University of Leicester IT Services,
+which forms part of the STFC DiRAC HPC Facility. The equipment was funded by BEIS capital funding via STFC capital
+grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC
+Operations grant ST/R001014/1. DiRAC is part of the National
+e-Infrastructure.
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+References
+Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197
+Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141
+Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481
+Baraffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403
+Baraffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6
+Baraffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126
+Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825
+Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79
+Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36
+Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space
+Sciences, 6, 42
+Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33
+Caffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011,
+Sol. Phys., 268, 255
+Cai, T. 2020, ApJ, 888, 46
+Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13
+Christensen-Dalsgaard, J. 2021, Living Reviews in Solar Physics, 18, 2
+Christensen-Dalsgaard, J., Gough, D. O., & Knudstrup, E. 2018, MNRAS, 477,
+3845
+Christensen-Dalsgaard, J., Monteiro, M. J. P. F. G., Rempel, M., & Thompson,
+M. J. 2011, MNRAS, 414, 1158
+Claret, A. & Torres, G. 2016, A&A, 592, A15
+Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56
+Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4
+Freytag, B., Ludwig, H. G., & Steffen, M. 1996, A&A, 313, 497
+Goffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7
+Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed.
+N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25
+Higl, J., Muller, E., & Weiss, A. 2021, A&A, 646, A133 ¨
+Hotta, H. 2017, ApJ, 843, 52
+Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563
+Kapyl ¨ a, P. J. 2019, A&A, 631, A122 ¨
+Korre, L., Brummell, N., Garaud, P., & Guervilly, C. 2021, MNRAS, 503, 362
+Korre, L., Garaud, P., & Brummell, N. H. 2019, MNRAS, 484, 1220
+Kunitomo, M. & Guillot, T. 2021, arXiv e-prints, arXiv:2109.06492
+Meakin, C. A. & Arnett, D. 2007, ApJ, 667, 448
+Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A,
+293, 127
+Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765
+Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828
+Viallet, M., Baraffe, I., & Walder, R. 2011, A&A, 531, A86
+Viallet, M., Goffrey, T., Baraffe, I., et al. 2016, A&A, 586, A153
+Viallet, M., Meakin, C., Arnett, D., & Mocak, M. 2013, ApJ, 769, 1 ´
+Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202
+Zahn, J. P. 1991, A&A, 252, 179
+Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759,
+L14
+Zhang, Q. S. & Li, Y. 2012, ApJ, 746, 50
+Zhang, Q.-S., Li, Y., & Christensen-Dalsgaard, J. 2019, ApJ, 881, 103
diff --git a/read/results/pdfium/2201.00201.txt b/read/results/pdfium/2201.00201.txt
index 5bb1ebd..f0f0d8e 100644
--- a/read/results/pdfium/2201.00201.txt
+++ b/read/results/pdfium/2201.00201.txt
@@ -1,720 +1,715 @@
-Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022
-January 19, 2022
-Letter to the Editor
-The period-age relation of long-period variables
-M. Trabucchi1,?, N. Mowlavi1
-Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland
-December 2021
-ABSTRACT
-Context. Pieces of empirical evidence suggest the existence of a period-age relation for long-period variables (LPVs). Yet, this
-property has hardly been studied on theoretical grounds thus far.
-Aims. We aim to examine the period-age relation using the results from recent nonlinear pulsation calculations.
-Methods. We combined isochrone models with theoretical periods to simulate the distribution of fundamental mode LPV pulsators,
-which include Miras, in the period-age plane, and we compared it with observations of LPVs in Galactic and Magellanic Clouds’
-clusters.
-Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of
-the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible
-width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and C￾rich models are predicted to have different slopes. We derived best-fit relations describing age and initial mass as a function of the
-fundamental mode period for both O- and C-rich models.
-Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich
-or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection
-biases, but rather to include semi-regular variables that pulsate predominantly in the fundamental mode. The use of the relations to
-study individual LPVs, on the other hand, requires more care given the scatter in the period distribution predicted at any given age.
-Key words. stars: AGB and post-AGB – stars: evolution – stars: variables: general – Galaxy: stellar content – Galaxy: globular
-clusters: general – Magellanic Clouds
-1. Introduction
-Low- to intermediate-mass stars approach the end of their lives
-through the asymptotic giant branch (AGB) evolutionary phase,
-during which they exhibit pulsations with timescales up to sev￾eral hundreds of days, and they are hence known as long-period
-variables (LPVs). If their V-band amplitude exceeds 2.5 mag,
-they are classified as Miras, which have a rather regular periodic￾ity and they are believed to pulsate only in the radial fundamen￾tal mode (FM). If their photometric amplitude is smaller, they
-are known as semi-regular variables (SRVs), which are thought
-to be the progenitors of Miras. The name stems from the lesser
-degree of regularity of their light curves, likely due to the fact
-that they can pulsate in multiple modes simultaneously.
-The notion that younger LPVs tend to display longer periods
-compared to older ones, often referred to as the period-age (PA)
-relation, is rooted in the empirical evidence from stellar kinemat￾ics in the solar neighborhood. The first such piece of evidence
-is probably due to Merrill (1923), who pointed out that M-type
-LPVs increasingly lag behind the local standard of rest (i.e., pos￾sess a higher asymmetric drift) as their period decreases. Later
-studies (as summarized by Wyatt & Cahn 1983) confirmed this
-behavior (also using proper motion data, e.g., Wilson & Mer￾rill 1942), and showed that the shorter periods are also accom￾panied by a higher velocity dispersion. Furthermore, groups of
-LPVs with relatively short periods are characterized by a greater
-scale height above the Galactic plane. This was shown, using for
-? Corresponding author: M. Trabucchi
-(michele.trabucchi@unige.ch)
-the first time the radial velocity of LPVs in the southern hemi￾sphere, by Feast (1963). In this seminal paper, Feast realized
-that LPVs with shorter periods must be members of older stellar
-populations and emphasized their highly promising applications
-for both Galactic and extra-galactic studies over a wide range
-of stellar ages. It should be noted that the PA relation is con￾nected with the existence of a period-metallicity relation (Lloyd
-Evans & Menzies 1973; Lloyd Evans 1983b; Feast 1981; Feast
-& Whitelock 2000a, and references therein).
-A number of subsequent works have corroborated the PA
-relation on empirical grounds, or have exploited it to interpret
-observational results. Relevant examples are studies of LPVs in
-globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; White￾lock 1986), toward the galactic center and bulge (Lloyd Evans
-1976; Feast et al. 1980; Whitelock et al. 1991) or at high galactic
-latitude (Jura & Kleinmann 1992; Whitelock et al. 1994). Of par￾ticular interest is the recent effort to extend the analysis of LPVs
-to dwarf galaxies in the Local Group (Menzies et al. 2002, 2008;
-Whitelock et al. 2009; Menzies et al. 2010, 2011; Sakamoto et al.
-2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013;
-Menzies et al. 2015).
-The Hipparcos mission provided the means to refine the re￾sults on the period-kinematics connection. This was done by
-Feast & Whitelock (2000b), who found evidence supporting the
-existence of a bar-like structure in the Bulge from the orbits of
-local LPVs. A similar study dedicated to C-rich LPVs was per￾formed by Feast et al. (2006), who provided quantitative age
-estimates for these stars. A summary of the main results and
-prospects emerging from these Hipparcos-era studies is given by
-Article number, page 1 of 9
+Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022
+January 19, 2022
+Letter to the Editor
+The period-age relation of long-period variables
+M. Trabucchi1,?, N. Mowlavi1
+Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland
+December 2021
+ABSTRACT
+Context. Pieces of empirical evidence suggest the existence of a period-age relation for long-period variables (LPVs). Yet, this
+property has hardly been studied on theoretical grounds thus far.
+Aims. We aim to examine the period-age relation using the results from recent nonlinear pulsation calculations.
+Methods. We combined isochrone models with theoretical periods to simulate the distribution of fundamental mode LPV pulsators,
+which include Miras, in the period-age plane, and we compared it with observations of LPVs in Galactic and Magellanic Clouds’
+clusters.
+Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of
+the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible
+width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and Crich models are predicted to have different slopes. We derived best-fit relations describing age and initial mass as a function of the
+fundamental mode period for both O- and C-rich models.
+Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich
+or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection
+biases, but rather to include semi-regular variables that pulsate predominantly in the fundamental mode. The use of the relations to
+study individual LPVs, on the other hand, requires more care given the scatter in the period distribution predicted at any given age.
+Key words. stars: AGB and post-AGB – stars: evolution – stars: variables: general – Galaxy: stellar content – Galaxy: globular
+clusters: general – Magellanic Clouds
+1. Introduction
+Low- to intermediate-mass stars approach the end of their lives
+through the asymptotic giant branch (AGB) evolutionary phase,
+during which they exhibit pulsations with timescales up to several hundreds of days, and they are hence known as long-period
+variables (LPVs). If their V-band amplitude exceeds 2.5 mag,
+they are classified as Miras, which have a rather regular periodicity and they are believed to pulsate only in the radial fundamental mode (FM). If their photometric amplitude is smaller, they
+are known as semi-regular variables (SRVs), which are thought
+to be the progenitors of Miras. The name stems from the lesser
+degree of regularity of their light curves, likely due to the fact
+that they can pulsate in multiple modes simultaneously.
+The notion that younger LPVs tend to display longer periods
+compared to older ones, often referred to as the period-age (PA)
+relation, is rooted in the empirical evidence from stellar kinematics in the solar neighborhood. The first such piece of evidence
+is probably due to Merrill (1923), who pointed out that M-type
+LPVs increasingly lag behind the local standard of rest (i.e., possess a higher asymmetric drift) as their period decreases. Later
+studies (as summarized by Wyatt & Cahn 1983) confirmed this
+behavior (also using proper motion data, e.g., Wilson & Merrill 1942), and showed that the shorter periods are also accompanied by a higher velocity dispersion. Furthermore, groups of
+LPVs with relatively short periods are characterized by a greater
+scale height above the Galactic plane. This was shown, using for
+? Corresponding author: M. Trabucchi
+(michele.trabucchi@unige.ch)
+the first time the radial velocity of LPVs in the southern hemisphere, by Feast (1963). In this seminal paper, Feast realized
+that LPVs with shorter periods must be members of older stellar
+populations and emphasized their highly promising applications
+for both Galactic and extra-galactic studies over a wide range
+of stellar ages. It should be noted that the PA relation is connected with the existence of a period-metallicity relation (Lloyd
+Evans & Menzies 1973; Lloyd Evans 1983b; Feast 1981; Feast
+& Whitelock 2000a, and references therein).
+A number of subsequent works have corroborated the PA
+relation on empirical grounds, or have exploited it to interpret
+observational results. Relevant examples are studies of LPVs in
+globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; Whitelock 1986), toward the galactic center and bulge (Lloyd Evans
+1976; Feast et al. 1980; Whitelock et al. 1991) or at high galactic
+latitude (Jura & Kleinmann 1992; Whitelock et al. 1994). Of particular interest is the recent effort to extend the analysis of LPVs
+to dwarf galaxies in the Local Group (Menzies et al. 2002, 2008;
+Whitelock et al. 2009; Menzies et al. 2010, 2011; Sakamoto et al.
+2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013;
+Menzies et al. 2015).
+The Hipparcos mission provided the means to refine the results on the period-kinematics connection. This was done by
+Feast & Whitelock (2000b), who found evidence supporting the
+existence of a bar-like structure in the Bulge from the orbits of
+local LPVs. A similar study dedicated to C-rich LPVs was performed by Feast et al. (2006), who provided quantitative age
+estimates for these stars. A summary of the main results and
+prospects emerging from these Hipparcos-era studies is given by
+Article number, page 1 of 9
 arXiv:2201.00201v2 [astro-ph.SR] 17 Jan 2022
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
-Feast (2007). More recently, the study of the Galaxy with LPVs
-has been stimulated by the wealth of data acquired by large-scale
-surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), espe￾cially the Gaia mission (Grady et al. 2019, 2020).
-It seems relevant that just a few years after the study of Feast
-(1963), Kippenhahn & Smith (1969) predicted the PA relation
-of classical Cepheids from stellar evolution and pulsation mod￾els. The theoretical modeling of Cepheids and of their period￾luminosity (PL) and PA relations is now an active field of re￾search (e.g., Bono et al. 2005; Anderson et al. 2016; De Somma
-et al. 2020). In contrast, when it comes to theoretical assessments
-of the LPV PA relation, the literature is surprisingly scarce (espe￾cially in comparison with the significant effort put into empirical
-studies). In fact, we were able to identify only two relevant stud￾ies addressing this subject (Wyatt & Cahn 1983; Eggen 1998).
-The discrepancy in period predictions between linear and nonlin￾ear pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter
-& Wood 2005; Trabucchi et al. 2021b), and more generally the
-difficulty in modeling the structure of evolved red giants, likely
-played a role in hampering the theoretical investigation of the PA
-relation of LPVs.
-Motivated by the release of updated AGB evolutionary mod￾els (Pastorelli et al. 2019, 2020) and the availability of new, ac￾curate model predictions for the FM period of AGB stars (Tra￾bucchi et al. 2019, 2021b), we decided to investigate the nature
-of the PA relation of LPVs on theoretical grounds. The adopted
-models and observed data are described in Sect. 2, while in
-Sect. 3 we present the results, which are discussed in Sect. 4.
-We summarize our conclusions in Sect. 5.
-2. Methods
-2.1. Models
-We employed PARSEC-COLIBRI isochrones (Marigo et al.
-2017) with stellar evolutionary models from Pastorelli et al.
-(2019, 2020) for the thermally pulsing asymptotic giant branch
-(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, ver￾sion 1.2S) for the preceding evolution. The adopted set of
-isochrones covers the range 0.001 to 0.016 in initial metal￾licity (Zi), with a 0.001 step, while it spans the age interval
-8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB
-phase is short-lived, it only spans a small range of initial masses
-for each given isochrone, of order of 10−2 M at most.
-The adopted isochrones include linear pulsation periods from
-Trabucchi et al. (2019) for overtone modes and nonlinear periods
-computed with the period-mass-radius relation from Trabucchi
-et al. (2021b) for the FM1
-. Pulsation properties were computed
-along both the early-AGB and the TP-AGB. We did not extend
-our analysis to red supergiant stars as the pulsation prescription
-we employed are strictly valid only below 7 M.
-We recall that, with the adopted nonlinear relation, the period
-increases with radius (R) as a broken power law, whose exponent
-decreases as soon as the “bending radius” Rb is exceeded, it and
-becomes zero when the “saturation radius” Rs > Rb is reached
-(i.e., the period becomes independent of radius). The exact val￾ues of Rb and Rs
-, as well as of the exponents, depend on the
-current mass (M). We assume that the FM is dominant if the
-stellar radius is larger than the critical value Rdom,0, which we
-computed from the current stellar mass using Eq. 4 of Trabucchi
-et al. (2021b).
-1 Hereinafter, whenever we discuss periods, it should be understood
-that we refer to FM periods on which this work is focused.
-2.2. Data
-As a first set of data, we considered the cluster-LPV pairs used
-by Grady et al. (2019, see their tables 1 and 2). These consist of
-19 clusters in the Large Magellanic Cloud, hosting a total of 20
-potential LPV members, and eight Galactic clusters each hosting
-a potential LPV member.
-We expanded this list with data for LPVs in a few populous
-clusters, namely the Galactic clusters NGC 362, NGC 2808, 47
-Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC
-1978 and NGC 1846; and the cluster NGC 419 in the Small Mag￾ellanic Cloud (SMC). The source lists were taken from Lebzel￾ter & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010),
-whose notation for the sources names is adopted here. After ex￾cluding the star LW3 in NGC 1846 and the star V129 in ω Cen,
-which are unlikely cluster members (cf. Lebzelter & Wood 2007,
-2016), we reached a total of 203 sources.
-The aforementioned studies also provide a lot of informa￾tion, possibly including JHK photometry, one or more periods,
-and a spectral type. In order to expand on the available data,
-we crossmatched the selected sample with the Two Micron All￾Sky Survey (2MASS, Skrutskie et al. 2006), the all-sky data
-release of the Wide-field Infrared Survey Explorer (AllWISE,
-Cutri et al. 2013), the catalog of variable stars from the All￾Sky Automated Survey for SuperNovae (ASAS-SN Jayasinghe
-et al. 2020), the catalogs of LPVs in the Magellanic Clouds from
-the third phase of the Optical Gravitational Lensing Experiment
-(OGLE-III, Soszynski et al. ´ 2009, 2011), the early third data re￾lease from the Gaia mission (Gaia EDR3, Gaia Collaboration
-et al. 2021), and the catalog of LPV candidates from Gaia DR2
-(Mowlavi et al. 2018).
-Following Grady et al. (2019), we took ages from
-Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters
-in the Galaxy and LMC, respectively, thereby ensuring that ages
-would be homogeneously derived for clusters in both galaxies.
-Age uncertainties from Baumgardt et al. (2013), provided for
-each cluster, are generally around σlog(τ) ' 0.05. Kharchenko
-et al. (2016) do not provide age uncertainties, but a reasonable
-upper limit for their method should be σlog(τ) = 0.2 based on
-the analysis of Kharchenko et al. (2005) (the same value was
-adopted by Grady et al. 2019, in their Fig. 7).
-As discussed by Kamath et al. (2010), the age of the SMC
-cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is
-consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij
-et al. (2014), while it is as young as τ ' 0.89 ± 0.015 Gyr ac￾cording to Perren et al. (2017). Since an accurate estimate is not
-necessary for our exploratory analysis, we took a rough average
-and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846
-likely exhibit TP-AGB boosting (Girardi et al. 2013). We note
-that some clusters show multiple stellar populations, whose age
-spread has been estimated in some cases (e.g., Mackey & Broby
-Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is con￾sistent with the age uncertainties we adopted.
-Distances of Galactic clusters were also taken from
-Kharchenko et al. (2016), while for the Magellanic Clouds and
-their clusters we adopted the distance moduli µLMC = 18.49 ±
-0.09 mag and µSMC = 18.96 ± 0.02 mag from de Grijs et al.
-(2017). We searched for data on interstellar extinction from sev￾eral literature works (e.g., Nayak et al. 2016; Kharchenko et al.
-2016; Perren et al. 2017), all of which suggest that extinction
-in the Ks filter is smaller than ∼ 0.1 mag for most of the clus￾ters we considered, and at most as large as ∼ 0.3 mag, which is
-negligible for our purposes.
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Feast (2007). More recently, the study of the Galaxy with LPVs
+has been stimulated by the wealth of data acquired by large-scale
+surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), especially the Gaia mission (Grady et al. 2019, 2020).
+It seems relevant that just a few years after the study of Feast
+(1963), Kippenhahn & Smith (1969) predicted the PA relation
+of classical Cepheids from stellar evolution and pulsation models. The theoretical modeling of Cepheids and of their periodluminosity (PL) and PA relations is now an active field of research (e.g., Bono et al. 2005; Anderson et al. 2016; De Somma
+et al. 2020). In contrast, when it comes to theoretical assessments
+of the LPV PA relation, the literature is surprisingly scarce (especially in comparison with the significant effort put into empirical
+studies). In fact, we were able to identify only two relevant studies addressing this subject (Wyatt & Cahn 1983; Eggen 1998).
+The discrepancy in period predictions between linear and nonlinear pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter
+& Wood 2005; Trabucchi et al. 2021b), and more generally the
+difficulty in modeling the structure of evolved red giants, likely
+played a role in hampering the theoretical investigation of the PA
+relation of LPVs.
+Motivated by the release of updated AGB evolutionary models (Pastorelli et al. 2019, 2020) and the availability of new, accurate model predictions for the FM period of AGB stars (Trabucchi et al. 2019, 2021b), we decided to investigate the nature
+of the PA relation of LPVs on theoretical grounds. The adopted
+models and observed data are described in Sect. 2, while in
+Sect. 3 we present the results, which are discussed in Sect. 4.
+We summarize our conclusions in Sect. 5.
+2. Methods
+2.1. Models
+We employed PARSEC-COLIBRI isochrones (Marigo et al.
+2017) with stellar evolutionary models from Pastorelli et al.
+(2019, 2020) for the thermally pulsing asymptotic giant branch
+(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, version 1.2S) for the preceding evolution. The adopted set of
+isochrones covers the range 0.001 to 0.016 in initial metallicity (Zi), with a 0.001 step, while it spans the age interval
+8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB
+phase is short-lived, it only spans a small range of initial masses
+for each given isochrone, of order of 10−2 M at most.
+The adopted isochrones include linear pulsation periods from
+Trabucchi et al. (2019) for overtone modes and nonlinear periods
+computed with the period-mass-radius relation from Trabucchi
+et al. (2021b) for the FM1. Pulsation properties were computed
+along both the early-AGB and the TP-AGB. We did not extend
+our analysis to red supergiant stars as the pulsation prescription
+we employed are strictly valid only below 7 M.
+We recall that, with the adopted nonlinear relation, the period
+increases with radius (R) as a broken power law, whose exponent
+decreases as soon as the “bending radius” Rb is exceeded, it and
+becomes zero when the “saturation radius” Rs > Rb is reached
+(i.e., the period becomes independent of radius). The exact values of Rb and Rs
+, as well as of the exponents, depend on the
+current mass (M). We assume that the FM is dominant if the
+stellar radius is larger than the critical value Rdom,0, which we
+computed from the current stellar mass using Eq. 4 of Trabucchi
+et al. (2021b).
+1 Hereinafter, whenever we discuss periods, it should be understood
+that we refer to FM periods on which this work is focused.
+2.2. Data
+As a first set of data, we considered the cluster-LPV pairs used
+by Grady et al. (2019, see their tables 1 and 2). These consist of
+19 clusters in the Large Magellanic Cloud, hosting a total of 20
+potential LPV members, and eight Galactic clusters each hosting
+a potential LPV member.
+We expanded this list with data for LPVs in a few populous
+clusters, namely the Galactic clusters NGC 362, NGC 2808, 47
+Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC
+1978 and NGC 1846; and the cluster NGC 419 in the Small Magellanic Cloud (SMC). The source lists were taken from Lebzelter & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010),
+whose notation for the sources names is adopted here. After excluding the star LW3 in NGC 1846 and the star V129 in ω Cen,
+which are unlikely cluster members (cf. Lebzelter & Wood 2007,
+2016), we reached a total of 203 sources.
+The aforementioned studies also provide a lot of information, possibly including JHK photometry, one or more periods,
+and a spectral type. In order to expand on the available data,
+we crossmatched the selected sample with the Two Micron AllSky Survey (2MASS, Skrutskie et al. 2006), the all-sky data
+release of the Wide-field Infrared Survey Explorer (AllWISE,
+Cutri et al. 2013), the catalog of variable stars from the AllSky Automated Survey for SuperNovae (ASAS-SN Jayasinghe
+et al. 2020), the catalogs of LPVs in the Magellanic Clouds from
+the third phase of the Optical Gravitational Lensing Experiment
+(OGLE-III, Soszynski et al. ´ 2009, 2011), the early third data release from the Gaia mission (Gaia EDR3, Gaia Collaboration
+et al. 2021), and the catalog of LPV candidates from Gaia DR2
+(Mowlavi et al. 2018).
+Following Grady et al. (2019), we took ages from
+Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters
+in the Galaxy and LMC, respectively, thereby ensuring that ages
+would be homogeneously derived for clusters in both galaxies.
+Age uncertainties from Baumgardt et al. (2013), provided for
+each cluster, are generally around σlog(τ) ' 0.05. Kharchenko
+et al. (2016) do not provide age uncertainties, but a reasonable
+upper limit for their method should be σlog(τ) = 0.2 based on
+the analysis of Kharchenko et al. (2005) (the same value was
+adopted by Grady et al. 2019, in their Fig. 7).
+As discussed by Kamath et al. (2010), the age of the SMC
+cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is
+consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij
+et al. (2014), while it is as young as τ ' 0.89 ± 0.015 Gyr according to Perren et al. (2017). Since an accurate estimate is not
+necessary for our exploratory analysis, we took a rough average
+and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846
+likely exhibit TP-AGB boosting (Girardi et al. 2013). We note
+that some clusters show multiple stellar populations, whose age
+spread has been estimated in some cases (e.g., Mackey & Broby
+Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is consistent with the age uncertainties we adopted.
+Distances of Galactic clusters were also taken from
+Kharchenko et al. (2016), while for the Magellanic Clouds and
+their clusters we adopted the distance moduli µLMC = 18.49 ±
+0.09 mag and µSMC = 18.96 ± 0.02 mag from de Grijs et al.
+(2017). We searched for data on interstellar extinction from several literature works (e.g., Nayak et al. 2016; Kharchenko et al.
+2016; Perren et al. 2017), all of which suggest that extinction
+in the Ks filter is smaller than ∼ 0.1 mag for most of the clusters we considered, and at most as large as ∼ 0.3 mag, which is
+negligible for our purposes.
 Article number, page 2 of 9
-Trabucchi et al.: The period-age relation of LPVs
-A detailed membership verification is beyond the scope of
-this work, and we relied on the checks performed by authors
-whose source lists we adopted. It should be kept in mind that
-some sources may not be real cluster members.
-For sources without a spectral type, we used the Gaia￾2MASS diagram (Lebzelter et al. 2018, 2019) to determine
-whether they are O- or C-rich. We used the near-infrared period￾luminosity diagram to identify the most likely pulsation mode
-associated with each period of each observed source. We se￾lected only FM periods and rejected long secondary periods and
-periods attributed to overtone mode pulsation. The details of
-these classification steps are provided in Appendix A. Out of
-203 sources from the initial list, we identified 95 LPVs pulsat￾ing in the FM, consisting of 40 C-rich and 55 O-rich sources.
-They consist of 29 Miras, 33 semi-regular variables, and 33 other
-sources (most likely LPVs) whose variability type has not been
-determined. We note that, with the exception of Gaia DR2, the
-sources of variability data considered here do not report the un￾certainty associated with observed periods. However, since peri￾ods were derived in most cases from well-sampled, high-quality
-variability observations, relative period uncertainties are most
-likely negligible compared with those associated with age.
-3. Results
-Panel (a) of Fig. 1 shows a comparison between model predic￾tions and observations in the PFM–log(τ/yr) plane. The former
-are displayed by a density map showing the expected number
-NFM of LPVs pulsating in the FM in each period-age bin, nor￾malized to maximum. Model predictions are in good agreement
-with data derived from observations (i.e., individual LPVs in
-clusters, represented by symbols), and they show that the pe￾riod of LPVs pulsating in the FM decreases with increasing age.
-Crosses mark the average properties of the three groups of C￾rich LPVs from Feast et al. (2006, their table 4), which fit the
-general pattern with the exception of their group 3, estimated to
-be older than what our models predict at P ' 650.
-We also show a linear best-fit to the models distribution
-(weighted by NFM), which shows a fairly good agreement with
-the best-fit to observations by Grady et al. (2019, also shown).
-However, the best-fit line does not fully capture the properties
-of the predictions, nor of the observed trend. Indeed, models are
-indicative of a substantial dispersion around the relation. For in￾stance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550
-days. Conversely, LPVs pulsating in the FM with a period of 350
-days are predicted to be at least ∼200 Myr old, but they can be as
-old as ∼3 Gyr. Observed data are consistent with the predicted
-spread, although the agreement cannot be considered as the ob￾served sample adopted is not complete.
-Nonetheless, it is relevant that some clusters host multiple
-LPVs, which are thus almost coeval, and they do span a wide
-period range. Some of these clusters host multiple stellar popu￾lations that are believed to have formed over a time comparable
-with the age uncertainties we adopted. This means that longer￾period (more massive) LPVs in these clusters probably lean to￾ward the lower age limit assumed for their host cluster, and the
-opposite is true at shorter periods. This tends to strengthen the
-agreement between models and observations.
-Our data set samples the intermediate-age range (NGC 419
-and NGC 1846) relatively well as well as old ages (ω Cen, 47
-Tuc, NGC 362, and NGC 2808). This provides us with the op￾portunity to study the period distribution at these ages, and for
-a more detailed comparison between models and observations.
-On the basis of the average age of these two groups of clus￾ters and the associated uncertainty, and taking the discrete age
-sampling of the isochrones into account, we considered the age
-ranges log(τ/yr) = 9.15±0.10 and log(τ/yr) = 10.10±0.20. Pe￾riod distributions at those ages are displayed in panels (b) and (c)
-of Fig. 1, respectively, showing good agreement between model
-predictions and observations. We note that in both cases, the dis￾tribution is skewed toward short periods, which seems to be true
-at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2,
-which is a version of the PA plane limited to an O-rich compo￾sition2
-. Indeed, although at τ . 5 Gyr the observed sample is
-very scarce, it appears to be consistent with models predicting a
-more densely populated region in the shorter-period half of the
-PA distribution.
-The case of C-stars, shown in panel (b) of Fig. 2, is differ￾ent. They only form over a restricted range of initial masses
-and ages, so their occurrence in a given stellar population is an
-age indicator on its own. Toward the low-mass (old age) side
-of the C-star regime, the behavior is similar to the O-rich case
-with a concentration around relatively short periods. C-rich mod￾els tend to have a lower surface temperature and larger radii,
-at a given mass, compared to O-rich models, and thus they at￾tain longer periods more easily. This occurs in particular toward
-higher masses, so that younger C-rich models are more concen￾trated at longer periods, leading to a steeper PA relation com￾pared with the O-rich case. These predictions agree with ob￾servations on the old side of the period distribution, while the
-scarcity of C stars at τ ' 0.6 Gyr prevents us from performing a
-comparison at younger ages.
-In appendix B, we provide analytic PA relations by fitting the
-high-density parts of the O- and C-rich models’ distribution. We
-emphasize that, because of the large scatter of the relation, ages
-estimated in this way for individual LPVs are bound to be highly
-uncertain. As a way to assess the error in age determination, we
-also provide analytic best-fit relations to the boundaries of the
-PA distribution of the models in the appendix. These relations
-are displayed in Fig. 2.
-4. Discussion
-In general agreement with observations, models confirm that
-LPVs pulsating predominantly in the FM follow a PA relation,
-which exhibits a non-negligible dispersion. Thanks to the newly
-available nonlinear period predictions, we were able to better ex￾amine the nature of this relation and the origin of its scatter.
-The PA relation is intimately connected with the PL relation,
-both patterns emerging because of the prominent role of mass in
-shaping stellar structure and evolution. Indeed, stellar mass de￾termines the lifetimes of the main evolutionary stages, and thus
-the age of stars in the AGB phase. Pulsation models (Trabuc￾chi et al. 2021b) show that the radius Rdom,0 (and correspond￾ing luminosity) at the onset of dominant FM pulsation (DFMP)
-increases with mass, so that the most massive FM-dominated
-LPVs are brighter. They also have longer periods, as this in￾creases with radius. In other words, the period, luminosity, and
-age near the tip of the AGB are all functions of initial stellar
-mass (at least to a good approximation).
-We note that this would not be the case if the FM were dom￾inant along the entire AGB, as the large change in radius during
-this phase would result in a wide range of periods at a given age.
-It is the very fact that DFMP occurs only during the final portion
-2 A further version of the PA plane highlighting both chemical types
-can be found in Fig. A.2 of appendix A.1.
+Trabucchi et al.: The period-age relation of LPVs
+A detailed membership verification is beyond the scope of
+this work, and we relied on the checks performed by authors
+whose source lists we adopted. It should be kept in mind that
+some sources may not be real cluster members.
+For sources without a spectral type, we used the Gaia2MASS diagram (Lebzelter et al. 2018, 2019) to determine
+whether they are O- or C-rich. We used the near-infrared periodluminosity diagram to identify the most likely pulsation mode
+associated with each period of each observed source. We selected only FM periods and rejected long secondary periods and
+periods attributed to overtone mode pulsation. The details of
+these classification steps are provided in Appendix A. Out of
+203 sources from the initial list, we identified 95 LPVs pulsating in the FM, consisting of 40 C-rich and 55 O-rich sources.
+They consist of 29 Miras, 33 semi-regular variables, and 33 other
+sources (most likely LPVs) whose variability type has not been
+determined. We note that, with the exception of Gaia DR2, the
+sources of variability data considered here do not report the uncertainty associated with observed periods. However, since periods were derived in most cases from well-sampled, high-quality
+variability observations, relative period uncertainties are most
+likely negligible compared with those associated with age.
+3. Results
+Panel (a) of Fig. 1 shows a comparison between model predictions and observations in the PFM–log(τ/yr) plane. The former
+are displayed by a density map showing the expected number
+NFM of LPVs pulsating in the FM in each period-age bin, normalized to maximum. Model predictions are in good agreement
+with data derived from observations (i.e., individual LPVs in
+clusters, represented by symbols), and they show that the period of LPVs pulsating in the FM decreases with increasing age.
+Crosses mark the average properties of the three groups of Crich LPVs from Feast et al. (2006, their table 4), which fit the
+general pattern with the exception of their group 3, estimated to
+be older than what our models predict at P ' 650.
+We also show a linear best-fit to the models distribution
+(weighted by NFM), which shows a fairly good agreement with
+the best-fit to observations by Grady et al. (2019, also shown).
+However, the best-fit line does not fully capture the properties
+of the predictions, nor of the observed trend. Indeed, models are
+indicative of a substantial dispersion around the relation. For instance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550
+days. Conversely, LPVs pulsating in the FM with a period of 350
+days are predicted to be at least ∼200 Myr old, but they can be as
+old as ∼3 Gyr. Observed data are consistent with the predicted
+spread, although the agreement cannot be considered as the observed sample adopted is not complete.
+Nonetheless, it is relevant that some clusters host multiple
+LPVs, which are thus almost coeval, and they do span a wide
+period range. Some of these clusters host multiple stellar populations that are believed to have formed over a time comparable
+with the age uncertainties we adopted. This means that longerperiod (more massive) LPVs in these clusters probably lean toward the lower age limit assumed for their host cluster, and the
+opposite is true at shorter periods. This tends to strengthen the
+agreement between models and observations.
+Our data set samples the intermediate-age range (NGC 419
+and NGC 1846) relatively well as well as old ages (ω Cen, 47
+Tuc, NGC 362, and NGC 2808). This provides us with the opportunity to study the period distribution at these ages, and for
+a more detailed comparison between models and observations.
+On the basis of the average age of these two groups of clusters and the associated uncertainty, and taking the discrete age
+sampling of the isochrones into account, we considered the age
+ranges log(τ/yr) = 9.15±0.10 and log(τ/yr) = 10.10±0.20. Period distributions at those ages are displayed in panels (b) and (c)
+of Fig. 1, respectively, showing good agreement between model
+predictions and observations. We note that in both cases, the distribution is skewed toward short periods, which seems to be true
+at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2,
+which is a version of the PA plane limited to an O-rich composition2
+. Indeed, although at τ . 5 Gyr the observed sample is
+very scarce, it appears to be consistent with models predicting a
+more densely populated region in the shorter-period half of the
+PA distribution.
+The case of C-stars, shown in panel (b) of Fig. 2, is different. They only form over a restricted range of initial masses
+and ages, so their occurrence in a given stellar population is an
+age indicator on its own. Toward the low-mass (old age) side
+of the C-star regime, the behavior is similar to the O-rich case
+with a concentration around relatively short periods. C-rich models tend to have a lower surface temperature and larger radii,
+at a given mass, compared to O-rich models, and thus they attain longer periods more easily. This occurs in particular toward
+higher masses, so that younger C-rich models are more concentrated at longer periods, leading to a steeper PA relation compared with the O-rich case. These predictions agree with observations on the old side of the period distribution, while the
+scarcity of C stars at τ ' 0.6 Gyr prevents us from performing a
+comparison at younger ages.
+In appendix B, we provide analytic PA relations by fitting the
+high-density parts of the O- and C-rich models’ distribution. We
+emphasize that, because of the large scatter of the relation, ages
+estimated in this way for individual LPVs are bound to be highly
+uncertain. As a way to assess the error in age determination, we
+also provide analytic best-fit relations to the boundaries of the
+PA distribution of the models in the appendix. These relations
+are displayed in Fig. 2.
+4. Discussion
+In general agreement with observations, models confirm that
+LPVs pulsating predominantly in the FM follow a PA relation,
+which exhibits a non-negligible dispersion. Thanks to the newly
+available nonlinear period predictions, we were able to better examine the nature of this relation and the origin of its scatter.
+The PA relation is intimately connected with the PL relation,
+both patterns emerging because of the prominent role of mass in
+shaping stellar structure and evolution. Indeed, stellar mass determines the lifetimes of the main evolutionary stages, and thus
+the age of stars in the AGB phase. Pulsation models (Trabucchi et al. 2021b) show that the radius Rdom,0 (and corresponding luminosity) at the onset of dominant FM pulsation (DFMP)
+increases with mass, so that the most massive FM-dominated
+LPVs are brighter. They also have longer periods, as this increases with radius. In other words, the period, luminosity, and
+age near the tip of the AGB are all functions of initial stellar
+mass (at least to a good approximation).
+We note that this would not be the case if the FM were dominant along the entire AGB, as the large change in radius during
+this phase would result in a wide range of periods at a given age.
+It is the very fact that DFMP occurs only during the final portion
+2 A further version of the PA plane highlighting both chemical types
+can be found in Fig. A.2 of appendix A.1.
 Article number, page 3 of 9
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
-Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on
-a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape
-indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of
-galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit
-to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked
-in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the effect of the TP-AGB boosting is
-suppressed in panel (a).
-Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while
-dashed lines are best fits to the edges of the model distribution (see the text for more details).
-of the AGB that limits the range of periods a FM-pulsating LPV
-can have at a given age. Yet, the DFMP part of the AGB is long
-enough for significant variations in radius to occur, which result
-in the dispersion of the PA relation seen in Fig. 1.
-At a given initial metallicity Zi
-, the shape of the period dis￾tribution primarily results from the fact that, throughout the TP￾AGB (the stage during which the FM is normally excited), the
-envelope expansion accelerates, while the period becomes pro￾gressively less sensitive to changes in radius (see Appendix C).
-In particular, the slope of the period-radius relation decreases
-sharply at Pb = P(Rb). The FM period distribution is roughly
-symmetric around that value, but at its short-period side, the FM
-is not dominant. Therefore, when only FM-dominated LPVs are
-considered, as is done here, the observed period distribution ap￾pears skewed toward short periods.
-This feature is strengthened when a set of isochrones is con￾sidered which spans a range of initial metallicities because the
-adopted criterion for the onset of DFMP does not depend on
-metallicity, but the FM period does as metal-poor LPVs are
-warmer and have smaller radii compared with metal-rich ones.
-As a consequence, the bulk of the period distribution of metal￾poor LPVs is at periods shorter than Pb, so they only contribute
-to the global distribution (i.e., at all Zi at a given age) over a
-small period range at P & Pb. In contrast, metal-rich LPVs have
-periods well beyond Pb, so they contribute both at that value and
-at longer periods. The result is an excess of FM-dominated LPVs
-near Pb, that is to say on the short side of the overall period dis￾tribution.
-We note that, in contrast with the prescription we adopted,
-the onset of DFMP in reality is probably sensitive to metallic￾Article number, page 4 of 9
-Trabucchi et al.: The period-age relation of LPVs
-ity. While the good degree of agreement with observations sug￾gests that the dependence is weak at most, it is possible for
-any discrepancy to be smeared out by the fact that our set of
-isochrone implicitly assumes a flat star-formation rate with no
-age-metallicity relation, so it is not an accurate representation of
-any realistic stellar environment. In this sense, the PA relation is
-environment-dependent, and it is not necessarily universal.
-A further point of uncertainty stems from the fact that the
-prescription we adopted assumes that the FM period only de￾pends upon the mass and radius, and that it is affected by a
-change in composition only through the effect that such a varia￾tion has on the radius. While this is true to a good approximation,
-linear models show a small dependence of periods on metallic￾ity at a fixed mass and radius, but the quantitative impact in the
-nonlinear case is unknown. We can only estimate, based on the
-results of Trabucchi et al. (2019), an uncertainty of ±10% at most
-with respect to the prescriptions adopted here.
-Qualitatively, a realistic age-metallicity relation and the
-metallicity dependence of the period and of the onset of DFMP
-are all expected to result in a steeper PA relation than the one
-we predict, but it is difficult to assess the relative importance of
-these effects. In this sense, the composition probably affects the
-shape of the PA relation more than its dispersion. The latter is
-likely affected by the composition indirectly through mass loss,
-the analysis of which is beyond the scope of this study. How￾ever, we point out that mass loss represents a source of scatter in
-combination with the occurrence of thermal pulses, because it re￾duces the minimum radius for the onset of DFMP. Thus, during
-the luminosity dips associated with thermal pulses, a LPV can
-have a period shorter than the one it had when it first entered the
-DFMP regime (see Appendix C). An additional source of uncer￾tainty, which we disregarded, is rotation (or other processes that
-induce extra mixing in the core) which causes a spread in ages
-at a given initial mass (cf. Anderson et al. 2016, for the case of
-classical Cepheids).
-The fairly good agreement between models and observations
-encourages the use of LPVs as age indicators, but the scatter of
-the PA relation hampers this application. We attempted to reduce
-the scatter through corrections involving photometric properties,
-as is customarily done for classical Cepheids with a color term
-(e.g., Bono et al. 2005), but with unsatisfactory results. A correc￾tion dependent on the photometric amplitude of variability rep￾resents a promising alternative, but it cannot be pursued at the
-moment. Indeed, for computational efficiency, current pulsation
-models include only a crude treatment of the atmospheric layers
-as they do not affect pulsation periods. On the other hand, the
-atmosphere is crucial in determining the spectral energy distri￾bution and its variation throughout the pulsation cycle, and hence
-the amplitude of variability. At the same time, the observational
-sample adopted here is too heterogeneous for a self-consistent
-investigation of amplitude, but this kind of study could be made
-possible by the upcoming data release 3 of the Gaia mission
-(Gaia Collaboration et al. 2021) and the future Legacy Survey
-of Space and Time (LSST, Ivezic et al. ´ 2019) of the Vera Rubin
-Observatory.
-It is worth noting that our analysis applies to Miras as well
-as SRVs, provided that they predominantly pulsate in the FM.
-The limitation of PA relation studies to Miras, as has mainly
-been done in literature so far, undoubtedly has some advan￾tages: to begin with, the fact that Miras are typically easier to
-detect than SRVs, and their light curves are easier to process
-as they tend to be more regular. Moreover, Miras represent the
-end-point of AGB evolution, so in principle they correspond to a
-smaller range of stellar parameters compared to the full extent of
-the DFMP regime, and they display a smaller range of periods
-at a given age (cf. Feast & Whitelock 2000b). In other words,
-they should exhibit a relatively narrow PA relation (even though,
-based on the observational data set we adopted, there is no con￾clusive evidence that considering only Miras reduces the scatter
-of the PA relation).
-Nonetheless, we caution against this approach as it is prone
-to introducing uncontrolled biases, as the traditional distinction
-between SRVs and Miras is arbitrary (see Trabucchi et al. 2021a,
-and references therein). As such, it disregards the physical pro￾cesses at the origin of the range of amplitudes characterizing
-LPVs. In particular, photometric amplitudes are largely deter￾mined by the formation and dissociation of molecules in the stel￾lar atmosphere, and they are likely to be metallicity-dependent.
-It is therefore reasonable to assume that metal-poor (old) Mira
-analogs might be classified as SRVs, thereby undermining the
-potential application of the PA relation if restricted to Miras.
-This seems to be supported by the fact that the bulk of old LPVs
-in our sample are classified as SRVs. Therefore, studies involv￾ing PA relations of LPVs would advantageously include both
-Miras and FM-pulsating SRVs.
-The challenge associated with SRVs stems from the fact that
-they are often multiperiodic (even when predominantly pulsat￾ing in the FM), a property that complicates the light curve anal￾ysis and period extraction. At the same time, this feature could
-potentially improve age determinations as overtone modes are
-expected to display a PA relation as well.
-5. Conclusions
-We used the results from recent nonlinear pulsation calculations
-and combined them with state-of-the-art isochrone models to in￾vestigate the PA relation of FM-dominated LPVs, finding good
-agreement with the distribution of observed LPVs in star clus￾ters. The theoretical PA relation displays a non-negligible scat￾ter, whose origin we identified due to the fact that, despite being
-very brief, the portion of AGB evolution during which the FM
-becomes dominant shows a relatively large range in mass and
-radius at a given age.
-The theoretical distribution of FM periods is roughly sym￾metric, but the FM is not dominant at the shortest periods. As a
-result, models predict that the distribution of dominant FM peri￾ods at a given age is skewed toward short periods, in agreement
-with observations. Depending on stellar populations, metallicity
-may enhance this feature as metal-poor LPVs, which tend to be
-warmer and more compact, only contribute near short periods.
-We provide the best-fit PA relation separately for O-rich and
-C-rich FM-pulsating LPVs. The latter LPVs show a steeper PA
-relation because of their lower surface temperatures, which allow
-them to reach longer periods more easily.
-Our analysis concerns all LPVs predominantly pulsating in
-the FM, regardless of whether they are classified as Miras or
-SRVs. We discourage such a distinction in that it is arbitrary and
-prone to selection biases that risk compromising the use of LPVs
-as age indicators.
-The main limitation in the use of the PA relation for age de￾terminations of individual LPVs stems from its relatively large
-scatter. We suggest that corrective terms, involving the ampli￾tude of variability, might help to reduce this scatter and antici￾pate that upcoming data from ongoing and future surveys dedi￾cated to time-domain astronomy will be highly valuable to probe
-this possibility. A study of the impact of metallicity on nonlinear
-pulsation is highly desirable to pursue this line of investigation,
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on
+a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape
+indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of
+galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit
+to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked
+in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the effect of the TP-AGB boosting is
+suppressed in panel (a).
+Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while
+dashed lines are best fits to the edges of the model distribution (see the text for more details).
+of the AGB that limits the range of periods a FM-pulsating LPV
+can have at a given age. Yet, the DFMP part of the AGB is long
+enough for significant variations in radius to occur, which result
+in the dispersion of the PA relation seen in Fig. 1.
+At a given initial metallicity Zi, the shape of the period distribution primarily results from the fact that, throughout the TPAGB (the stage during which the FM is normally excited), the
+envelope expansion accelerates, while the period becomes progressively less sensitive to changes in radius (see Appendix C).
+In particular, the slope of the period-radius relation decreases
+sharply at Pb = P(Rb). The FM period distribution is roughly
+symmetric around that value, but at its short-period side, the FM
+is not dominant. Therefore, when only FM-dominated LPVs are
+considered, as is done here, the observed period distribution appears skewed toward short periods.
+This feature is strengthened when a set of isochrones is considered which spans a range of initial metallicities because the
+adopted criterion for the onset of DFMP does not depend on
+metallicity, but the FM period does as metal-poor LPVs are
+warmer and have smaller radii compared with metal-rich ones.
+As a consequence, the bulk of the period distribution of metalpoor LPVs is at periods shorter than Pb, so they only contribute
+to the global distribution (i.e., at all Zi at a given age) over a
+small period range at P & Pb. In contrast, metal-rich LPVs have
+periods well beyond Pb, so they contribute both at that value and
+at longer periods. The result is an excess of FM-dominated LPVs
+near Pb, that is to say on the short side of the overall period distribution.
+We note that, in contrast with the prescription we adopted,
+the onset of DFMP in reality is probably sensitive to metallicArticle number, page 4 of 9
+Trabucchi et al.: The period-age relation of LPVs
+ity. While the good degree of agreement with observations suggests that the dependence is weak at most, it is possible for
+any discrepancy to be smeared out by the fact that our set of
+isochrone implicitly assumes a flat star-formation rate with no
+age-metallicity relation, so it is not an accurate representation of
+any realistic stellar environment. In this sense, the PA relation is
+environment-dependent, and it is not necessarily universal.
+A further point of uncertainty stems from the fact that the
+prescription we adopted assumes that the FM period only depends upon the mass and radius, and that it is affected by a
+change in composition only through the effect that such a variation has on the radius. While this is true to a good approximation,
+linear models show a small dependence of periods on metallicity at a fixed mass and radius, but the quantitative impact in the
+nonlinear case is unknown. We can only estimate, based on the
+results of Trabucchi et al. (2019), an uncertainty of ±10% at most
+with respect to the prescriptions adopted here.
+Qualitatively, a realistic age-metallicity relation and the
+metallicity dependence of the period and of the onset of DFMP
+are all expected to result in a steeper PA relation than the one
+we predict, but it is difficult to assess the relative importance of
+these effects. In this sense, the composition probably affects the
+shape of the PA relation more than its dispersion. The latter is
+likely affected by the composition indirectly through mass loss,
+the analysis of which is beyond the scope of this study. However, we point out that mass loss represents a source of scatter in
+combination with the occurrence of thermal pulses, because it reduces the minimum radius for the onset of DFMP. Thus, during
+the luminosity dips associated with thermal pulses, a LPV can
+have a period shorter than the one it had when it first entered the
+DFMP regime (see Appendix C). An additional source of uncertainty, which we disregarded, is rotation (or other processes that
+induce extra mixing in the core) which causes a spread in ages
+at a given initial mass (cf. Anderson et al. 2016, for the case of
+classical Cepheids).
+The fairly good agreement between models and observations
+encourages the use of LPVs as age indicators, but the scatter of
+the PA relation hampers this application. We attempted to reduce
+the scatter through corrections involving photometric properties,
+as is customarily done for classical Cepheids with a color term
+(e.g., Bono et al. 2005), but with unsatisfactory results. A correction dependent on the photometric amplitude of variability represents a promising alternative, but it cannot be pursued at the
+moment. Indeed, for computational efficiency, current pulsation
+models include only a crude treatment of the atmospheric layers
+as they do not affect pulsation periods. On the other hand, the
+atmosphere is crucial in determining the spectral energy distribution and its variation throughout the pulsation cycle, and hence
+the amplitude of variability. At the same time, the observational
+sample adopted here is too heterogeneous for a self-consistent
+investigation of amplitude, but this kind of study could be made
+possible by the upcoming data release 3 of the Gaia mission
+(Gaia Collaboration et al. 2021) and the future Legacy Survey
+of Space and Time (LSST, Ivezic et al. ´ 2019) of the Vera Rubin
+Observatory.
+It is worth noting that our analysis applies to Miras as well
+as SRVs, provided that they predominantly pulsate in the FM.
+The limitation of PA relation studies to Miras, as has mainly
+been done in literature so far, undoubtedly has some advantages: to begin with, the fact that Miras are typically easier to
+detect than SRVs, and their light curves are easier to process
+as they tend to be more regular. Moreover, Miras represent the
+end-point of AGB evolution, so in principle they correspond to a
+smaller range of stellar parameters compared to the full extent of
+the DFMP regime, and they display a smaller range of periods
+at a given age (cf. Feast & Whitelock 2000b). In other words,
+they should exhibit a relatively narrow PA relation (even though,
+based on the observational data set we adopted, there is no conclusive evidence that considering only Miras reduces the scatter
+of the PA relation).
+Nonetheless, we caution against this approach as it is prone
+to introducing uncontrolled biases, as the traditional distinction
+between SRVs and Miras is arbitrary (see Trabucchi et al. 2021a,
+and references therein). As such, it disregards the physical processes at the origin of the range of amplitudes characterizing
+LPVs. In particular, photometric amplitudes are largely determined by the formation and dissociation of molecules in the stellar atmosphere, and they are likely to be metallicity-dependent.
+It is therefore reasonable to assume that metal-poor (old) Mira
+analogs might be classified as SRVs, thereby undermining the
+potential application of the PA relation if restricted to Miras.
+This seems to be supported by the fact that the bulk of old LPVs
+in our sample are classified as SRVs. Therefore, studies involving PA relations of LPVs would advantageously include both
+Miras and FM-pulsating SRVs.
+The challenge associated with SRVs stems from the fact that
+they are often multiperiodic (even when predominantly pulsating in the FM), a property that complicates the light curve analysis and period extraction. At the same time, this feature could
+potentially improve age determinations as overtone modes are
+expected to display a PA relation as well.
+5. Conclusions
+We used the results from recent nonlinear pulsation calculations
+and combined them with state-of-the-art isochrone models to investigate the PA relation of FM-dominated LPVs, finding good
+agreement with the distribution of observed LPVs in star clusters. The theoretical PA relation displays a non-negligible scatter, whose origin we identified due to the fact that, despite being
+very brief, the portion of AGB evolution during which the FM
+becomes dominant shows a relatively large range in mass and
+radius at a given age.
+The theoretical distribution of FM periods is roughly symmetric, but the FM is not dominant at the shortest periods. As a
+result, models predict that the distribution of dominant FM periods at a given age is skewed toward short periods, in agreement
+with observations. Depending on stellar populations, metallicity
+may enhance this feature as metal-poor LPVs, which tend to be
+warmer and more compact, only contribute near short periods.
+We provide the best-fit PA relation separately for O-rich and
+C-rich FM-pulsating LPVs. The latter LPVs show a steeper PA
+relation because of their lower surface temperatures, which allow
+them to reach longer periods more easily.
+Our analysis concerns all LPVs predominantly pulsating in
+the FM, regardless of whether they are classified as Miras or
+SRVs. We discourage such a distinction in that it is arbitrary and
+prone to selection biases that risk compromising the use of LPVs
+as age indicators.
+The main limitation in the use of the PA relation for age determinations of individual LPVs stems from its relatively large
+scatter. We suggest that corrective terms, involving the amplitude of variability, might help to reduce this scatter and anticipate that upcoming data from ongoing and future surveys dedicated to time-domain astronomy will be highly valuable to probe
+this possibility. A study of the impact of metallicity on nonlinear
+pulsation is highly desirable to pursue this line of investigation,
 Article number, page 5 of 9
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
-as would be a theoretical investigation of the dependence of pho￾tometric amplitudes upon global stellar parameters.
-Acknowledgements. M.T. and N.M. acknowledge the support provided by the
-Swiss National Science Foundation through grant Nr. 188697. We are grateful
-to the anonymous referee for the constructive comments that helped improving
-this paper, and to Léo Girardi for helping with the computation and interpre￾tation of isochrones. This research has made use of: data from the OGLE-III
-Catalog of Variable Stars; data products from the Two Micron All Sky Sur￾vey, which is a joint project of the University of Massachusetts and the In￾frared Processing and Analysis Center/California Institute of Technology, funded
-by the National Aeronautics and Space Administration and the National Sci￾ence Foundation; data from the European Space Agency (ESA) mission Gaia
-(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Process￾ing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/
-gaia/dpac/consortium). Funding for the DPAC has been provided by na￾tional institutions, in particular the institutions participating in the Gaia Multi￾lateral Agreement. This research has made use of the following free/open source
-software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and
-Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver
-et al. 2016) notebooks; the Python libraries NumPy (Harris et al. 2020), SciPy
-(Virtanen et al. 2020), matplotlib (a Python library for publication quality graph￾ics, Hunter 2007), and Astropy (a community-developed core Python package
-for Astronomy, Astropy Collaboration et al. 2018). This research has made use of
-NASA’s Astrophysics Data System Bibliographic Services, and of the following
-services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue
-access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky
-atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012;
-Pineau et al. 2020).
-References
-Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A,
-591, A8
-Astropy Collaboration, Price-Whelan, A. M., Sipocz, B. M., et al. 2018, AJ, 156, ˝
-123
-Battinelli, P. & Demers, S. 2012, A&A, 544, A10
-Battinelli, P. & Demers, S. 2013, A&A, 553, A93
-Baumgardt, H., Parmentier, G., Anders, P., & Grebel, E. K. 2013, MNRAS, 430,
-676
-Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pa￾cific Conference Series, Vol. 461, Astronomical Data Analysis Software and
-Systems XXI, ed. P. Ballester, D. Egret, & N. P. F. Lorente, 291
-Bonnarel, F., Fernique, P., Bienaymé, O., et al. 2000, A&AS, 143, 33
-Bono, G., Marconi, M., Cassisi, S., et al. 2005, ApJ, 621, 966
-Bressan, A., Marigo, P., Girardi, L., et al. 2012, MNRAS, 427, 127
-Catchpole, R. M., Whitelock, P. A., Feast, M. W., et al. 2016, MNRAS, 455,
-2216
-Cutri, R. M., Wright, E. L., Conrow, T., et al. 2013, Explanatory Supplement
-to the AllWISE Data Release Products, Explanatory Supplement to the All￾WISE Data Release Products
-de Grijs, R., Courbin, F., Martínez-Vázquez, C. E., et al. 2017, Space Sci. Rev.,
-212, 1743
-De Somma, G., Marconi, M., Cassisi, S., et al. 2020, MNRAS, 496, 5039
-Eggen, O. J. 1998, AJ, 115, 2435
-Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol.
-378, Why Galaxies Care About AGB Stars: Their Importance as Actors and
-Probes, ed. F. Kerschbaum, C. Charbonnel, & R. F. Wing, 479
-Feast, M. & Whitelock, P. 2000a, in Astrophysics and Space Science Library,
-Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Gio￾vannelli, 229
-Feast, M. W. 1963, MNRAS, 125, 367
-Feast, M. W. 1966, The Observatory, 86, 120
-Feast, M. W. 1981, in Astrophysics and Space Science Library, Vol. 88, Physical
-Processes in Red Giants, ed. J. Iben, I. & A. Renzini, 193–204
-Feast, M. W., Robertson, B. S. C., & Black, C. 1980, MNRAS, 190, 227
-Feast, M. W. & Whitelock, P. A. 2000b, MNRAS, 317, 460
-Feast, M. W., Whitelock, P. A., & Menzies, J. W. 2006, MNRAS, 369, 791
-Gaia Collaboration, Brown, A. G. A., Vallenari, A., et al. 2021, A&A, 649, A1
-Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142
-Goudfrooij, P., Girardi, L., Kozhurina-Platais, V., et al. 2014, ApJ, 797, 35
-Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022
-Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128
-Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357
-Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90
-Ivezic, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 ´
-Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13
-Joo, S.-J. & Lee, Y.-W. 2013, ApJ, 762, 36
-Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105
-Kamath, D., Wood, P. R., Soszynski, I., & Lebzelter, T. 2010, MNRAS, 408, 522 ´
-Kharchenko, N. V., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D.
-2005, A&A, 438, 1163
-Kharchenko, N. V., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D.
-2016, A&A, 585, A101
-Kippenhahn, R. & Smith, L. 1969, A&A, 1, 142
-Kluyver, T., Ragan-Kelley, B., Pérez, F., et al. 2016, in Positioning and Power
-in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides &
-B. Scmidt (Netherlands: IOS Press), 87–90
-Lebzelter, T., Mowlavi, N., Marigo, P., et al. 2018, A&A, 616, L13
-Lebzelter, T., Trabucchi, M., Mowlavi, N., et al. 2019, A&A, 631, A24
-Lebzelter, T. & Wood, P. R. 2005, A&A, 441, 1117
-Lebzelter, T. & Wood, P. R. 2007, A&A, 475, 643
-Lebzelter, T. & Wood, P. R. 2011, A&A, 529, A137
-Lebzelter, T. & Wood, P. R. 2016, A&A, 585, A111
-Lloyd Evans, T. 1976, MNRAS, 174, 169
-Lloyd Evans, T. 1983a, MNRAS, 204, 985
-Lloyd Evans, T. 1983b, MNRAS, 204, 961
-Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Li￾brary, Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in
-Related Systems, ed. J. D. Fernie, 151
-Mackey, A. D. & Broby Nielsen, P. 2007, MNRAS, 379, 151
-Marigo, P., Girardi, L., Bressan, A., et al. 2017, ApJ, 835, 77
-Menzies, J., Feast, M., Tanabé, T., Whitelock, P., & Nakada, Y. 2002, MNRAS,
-335, 923
-Menzies, J., Feast, M., Whitelock, P., et al. 2008, MNRAS, 385, 1045
-Menzies, J. W., Feast, M. W., Whitelock, P. A., & Matsunaga, N. 2011, MNRAS,
-414, 3492
-Menzies, J. W., Whitelock, P. A., & Feast, M. W. 2015, MNRAS, 452, 910
-Menzies, J. W., Whitelock, P. A., Feast, M. W., & Matsunaga, N. 2010, MNRAS,
-406, 86
-Merrill, P. W. 1923, ApJ, 58, 215
-Mowlavi, N., Lecoeur-Taïbi, I., Lebzelter, T., et al. 2018, A&A, 618, A58
-Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016,
-MNRAS, 463, 1446
-Ochsenbein, F., Bauer, P., & Marcout, J. 2000, A&AS, 143, 23
-Pastorelli, G., Marigo, P., Girardi, L., et al. 2020, MNRAS, 498, 3283
-Pastorelli, G., Marigo, P., Girardi, L., et al. 2019, MNRAS, 485, 5666
-Pérez, F. & Granger, B. E. 2007, Computing in Science and Engineering, 9, 21
-Perren, G. I., Piatti, A. E., & Vázquez, R. A. 2017, A&A, 602, A89
-Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical So￾ciety of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis
-Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Short￾ridge, 125
-Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y. 2012, ApJ, 761, L10
-Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163
-Soszynski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22 ´
-Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2009, Acta Astron., 59, 239 ´
-Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2011, Acta Astron., 61, 217 ´
-Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se￾ries, Vol. 351, Astronomical Data Analysis Software and Systems XV, ed.
-C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666
-Trabucchi, M., Mowlavi, N., & Lebzelter, T. 2021a, A&A, 656, A66
-Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2017, ApJ, 847, 139
-Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2019, MNRAS, 482, 929
-Trabucchi, M., Wood, P. R., Mowlavi, N., et al. 2021b, MNRAS, 500, 1575
-Urago, R., Omodaka, T., Nagayama, T., et al. 2020, ApJ, 891, 50
-Villanova, S., Geisler, D., Gratton, R. G., & Cassisi, S. 2014, ApJ, 791, 107
-Virtanen, P., Gommers, R., Oliphant, T. E., et al. 2020, Nature Methods, 17, 261
-Wenger, M., Ochsenbein, F., Egret, D., et al. 2000, A&AS, 143, 9
-Whitelock, P., Feast, M., & Catchpole, R. 1991, MNRAS, 248, 276
-Whitelock, P., Menzies, J., Feast, M., et al. 1994, MNRAS, 267, 711
-Whitelock, P. A. 1986, MNRAS, 219, 525
-Whitelock, P. A., Menzies, J. W., Feast, M. W., et al. 2009, MNRAS, 394, 795
-Whitelock, P. A., Menzies, J. W., Feast, M. W., Nsengiyumva, F., & Matsunaga,
-N. 2013, MNRAS, 428, 2216
-Wilson, R. E. & Merrill, P. W. 1942, ApJ, 95, 248
-Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225
-Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+as would be a theoretical investigation of the dependence of photometric amplitudes upon global stellar parameters.
+Acknowledgements. M.T. and N.M. acknowledge the support provided by the
+Swiss National Science Foundation through grant Nr. 188697. We are grateful
+to the anonymous referee for the constructive comments that helped improving
+this paper, and to Léo Girardi for helping with the computation and interpretation of isochrones. This research has made use of: data from the OGLE-III
+Catalog of Variable Stars; data products from the Two Micron All Sky Survey, which is a joint project of the University of Massachusetts and the Infrared Processing and Analysis Center/California Institute of Technology, funded
+by the National Aeronautics and Space Administration and the National Science Foundation; data from the European Space Agency (ESA) mission Gaia
+(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Processing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/
+gaia/dpac/consortium). Funding for the DPAC has been provided by national institutions, in particular the institutions participating in the Gaia Multilateral Agreement. This research has made use of the following free/open source
+software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and
+Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver
+et al. 2016) notebooks; the Python libraries NumPy (Harris et al. 2020), SciPy
+(Virtanen et al. 2020), matplotlib (a Python library for publication quality graphics, Hunter 2007), and Astropy (a community-developed core Python package
+for Astronomy, Astropy Collaboration et al. 2018). This research has made use of
+NASA’s Astrophysics Data System Bibliographic Services, and of the following
+services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue
+access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky
+atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012;
+Pineau et al. 2020).
+References
+Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A,
+591, A8
+Astropy Collaboration, Price-Whelan, A. M., Sipocz, B. M., et al. 2018, AJ, 156, ˝
+123
+Battinelli, P. & Demers, S. 2012, A&A, 544, A10
+Battinelli, P. & Demers, S. 2013, A&A, 553, A93
+Baumgardt, H., Parmentier, G., Anders, P., & Grebel, E. K. 2013, MNRAS, 430,
+676
+Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pacific Conference Series, Vol. 461, Astronomical Data Analysis Software and
+Systems XXI, ed. P. Ballester, D. Egret, & N. P. F. Lorente, 291
+Bonnarel, F., Fernique, P., Bienaymé, O., et al. 2000, A&AS, 143, 33
+Bono, G., Marconi, M., Cassisi, S., et al. 2005, ApJ, 621, 966
+Bressan, A., Marigo, P., Girardi, L., et al. 2012, MNRAS, 427, 127
+Catchpole, R. M., Whitelock, P. A., Feast, M. W., et al. 2016, MNRAS, 455,
+2216
+Cutri, R. M., Wright, E. L., Conrow, T., et al. 2013, Explanatory Supplement
+to the AllWISE Data Release Products, Explanatory Supplement to the AllWISE Data Release Products
+de Grijs, R., Courbin, F., Martínez-Vázquez, C. E., et al. 2017, Space Sci. Rev.,
+212, 1743
+De Somma, G., Marconi, M., Cassisi, S., et al. 2020, MNRAS, 496, 5039
+Eggen, O. J. 1998, AJ, 115, 2435
+Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol.
+378, Why Galaxies Care About AGB Stars: Their Importance as Actors and
+Probes, ed. F. Kerschbaum, C. Charbonnel, & R. F. Wing, 479
+Feast, M. & Whitelock, P. 2000a, in Astrophysics and Space Science Library,
+Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Giovannelli, 229
+Feast, M. W. 1963, MNRAS, 125, 367
+Feast, M. W. 1966, The Observatory, 86, 120
+Feast, M. W. 1981, in Astrophysics and Space Science Library, Vol. 88, Physical
+Processes in Red Giants, ed. J. Iben, I. & A. Renzini, 193–204
+Feast, M. W., Robertson, B. S. C., & Black, C. 1980, MNRAS, 190, 227
+Feast, M. W. & Whitelock, P. A. 2000b, MNRAS, 317, 460
+Feast, M. W., Whitelock, P. A., & Menzies, J. W. 2006, MNRAS, 369, 791
+Gaia Collaboration, Brown, A. G. A., Vallenari, A., et al. 2021, A&A, 649, A1
+Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142
+Goudfrooij, P., Girardi, L., Kozhurina-Platais, V., et al. 2014, ApJ, 797, 35
+Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022
+Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128
+Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357
+Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90
+Ivezic, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 ´
+Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13
+Joo, S.-J. & Lee, Y.-W. 2013, ApJ, 762, 36
+Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105
+Kamath, D., Wood, P. R., Soszynski, I., & Lebzelter, T. 2010, MNRAS, 408, 522 ´
+Kharchenko, N. V., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D.
+2005, A&A, 438, 1163
+Kharchenko, N. V., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D.
+2016, A&A, 585, A101
+Kippenhahn, R. & Smith, L. 1969, A&A, 1, 142
+Kluyver, T., Ragan-Kelley, B., Pérez, F., et al. 2016, in Positioning and Power
+in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides &
+B. Scmidt (Netherlands: IOS Press), 87–90
+Lebzelter, T., Mowlavi, N., Marigo, P., et al. 2018, A&A, 616, L13
+Lebzelter, T., Trabucchi, M., Mowlavi, N., et al. 2019, A&A, 631, A24
+Lebzelter, T. & Wood, P. R. 2005, A&A, 441, 1117
+Lebzelter, T. & Wood, P. R. 2007, A&A, 475, 643
+Lebzelter, T. & Wood, P. R. 2011, A&A, 529, A137
+Lebzelter, T. & Wood, P. R. 2016, A&A, 585, A111
+Lloyd Evans, T. 1976, MNRAS, 174, 169
+Lloyd Evans, T. 1983a, MNRAS, 204, 985
+Lloyd Evans, T. 1983b, MNRAS, 204, 961
+Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Library, Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in
+Related Systems, ed. J. D. Fernie, 151
+Mackey, A. D. & Broby Nielsen, P. 2007, MNRAS, 379, 151
+Marigo, P., Girardi, L., Bressan, A., et al. 2017, ApJ, 835, 77
+Menzies, J., Feast, M., Tanabé, T., Whitelock, P., & Nakada, Y. 2002, MNRAS,
+335, 923
+Menzies, J., Feast, M., Whitelock, P., et al. 2008, MNRAS, 385, 1045
+Menzies, J. W., Feast, M. W., Whitelock, P. A., & Matsunaga, N. 2011, MNRAS,
+414, 3492
+Menzies, J. W., Whitelock, P. A., & Feast, M. W. 2015, MNRAS, 452, 910
+Menzies, J. W., Whitelock, P. A., Feast, M. W., & Matsunaga, N. 2010, MNRAS,
+406, 86
+Merrill, P. W. 1923, ApJ, 58, 215
+Mowlavi, N., Lecoeur-Taïbi, I., Lebzelter, T., et al. 2018, A&A, 618, A58
+Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016,
+MNRAS, 463, 1446
+Ochsenbein, F., Bauer, P., & Marcout, J. 2000, A&AS, 143, 23
+Pastorelli, G., Marigo, P., Girardi, L., et al. 2020, MNRAS, 498, 3283
+Pastorelli, G., Marigo, P., Girardi, L., et al. 2019, MNRAS, 485, 5666
+Pérez, F. & Granger, B. E. 2007, Computing in Science and Engineering, 9, 21
+Perren, G. I., Piatti, A. E., & Vázquez, R. A. 2017, A&A, 602, A89
+Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical Society of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis
+Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Shortridge, 125
+Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y. 2012, ApJ, 761, L10
+Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163
+Soszynski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22 ´
+Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2009, Acta Astron., 59, 239 ´
+Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2011, Acta Astron., 61, 217 ´
+Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Series, Vol. 351, Astronomical Data Analysis Software and Systems XV, ed.
+C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666
+Trabucchi, M., Mowlavi, N., & Lebzelter, T. 2021a, A&A, 656, A66
+Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2017, ApJ, 847, 139
+Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2019, MNRAS, 482, 929
+Trabucchi, M., Wood, P. R., Mowlavi, N., et al. 2021b, MNRAS, 500, 1575
+Urago, R., Omodaka, T., Nagayama, T., et al. 2020, ApJ, 891, 50
+Villanova, S., Geisler, D., Gratton, R. G., & Cassisi, S. 2014, ApJ, 791, 107
+Virtanen, P., Gommers, R., Oliphant, T. E., et al. 2020, Nature Methods, 17, 261
+Wenger, M., Ochsenbein, F., Egret, D., et al. 2000, A&AS, 143, 9
+Whitelock, P., Feast, M., & Catchpole, R. 1991, MNRAS, 248, 276
+Whitelock, P., Menzies, J., Feast, M., et al. 1994, MNRAS, 267, 711
+Whitelock, P. A. 1986, MNRAS, 219, 525
+Whitelock, P. A., Menzies, J. W., Feast, M. W., et al. 2009, MNRAS, 394, 795
+Whitelock, P. A., Menzies, J. W., Feast, M. W., Nsengiyumva, F., & Matsunaga,
+N. 2013, MNRAS, 428, 2216
+Wilson, R. E. & Merrill, P. W. 1942, ApJ, 95, 248
+Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225
+Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350
 Article number, page 6 of 9
-Trabucchi et al.: The period-age relation of LPVs
-Fig. A.1. Absolute-Ks Gaia-2MASS diagram for the stars with or with￾out a spectral type (left and right panels, respectively) in the selected
-sample. Symbol colors and shapes indicate the spectral type and host
-cluster described in the legend, respectively, which also reports the num￾ber of sources displayed (i.e., having both optical and NIR photometry).
-The dashed line marks the separation between O- and C-rich sources
-according to Lebzelter et al. (2018). An arrow marks the source MSX
-LMC 124 in NGC 1830 that, having WBP,RP−WJ,Ks = 9.73 mag, lies out￾side the plot area. Background dots are LPVs in the LMC from OGLE￾III (light gray) and Mowlavi et al. (2018) (darker gray).
-Appendix A: Classification of observed LPVs
-Appendix A.1: Spectral type
-We adopted the spectral types provided by Lebzelter & Wood
-(2007) and Kamath et al. (2010) for 52 of the LPVs they studied
-in NGC 1846, NGC 1978, and NGC 419. The only exception
-is the star 5-3 in NGC 419, for which we adopted the S-type as
-reported by Lloyd Evans (1983a).
-We also searched the SIMBAD astronomical database
-(Wenger et al. 2000) for spectral type information, which we
-found for 26 more stars. We used the Gaia-2MASS diagram of
-Lebzelter et al. (2018) to confirm the chemical type classification
-taken from literature and to characterize the surface chemistry of
-sources of an unknown spectral type (see Fig. A.1). Among the
-latter, we identified 13 C-rich stars and 106 O-rich sources.
-Three of the sources without a spectral type lack Gaia pho￾tometry, so they cannot be classified with the Gaia-2MASS. Two
-of them (LW5 and LW22 in 47 Tuc) have no match in Gaia
-EDR3, but they have NIR data and are probably O-rich based on
-their position in the J − Ks versus Ks color-magnitude diagram.
-The third source is one of the two stars in NGC 1903 from the
-list of Grady et al. (2019), which we identified with the 2MASS
-source J05171633-6920298. It is likely C-rich according to the
-NIR color-magnitude diagram.
-Finally, the sources V138 in ω Cen, LW15 in NGC 2808,
-and LW4 in NGC 362 lack NIR data. They cannot be placed in
-the NIR PL diagram, upon which we relied to assign pulsation
-modes to periods, so we excluded them from the sample. The
-distribution of O- and C-rich sources in the period-age diagram
-is shown in Fig. A.2.
-Appendix A.2: Variability
-For variability information, we complemented the data from
-Lebzelter & Wood and Kamath et al. (2010) with the catalogs
-from OGLE-III, ASAS-SN, and Gaia DR2. Combining these
-data sets, we found at least one period for each of the 176 sources
-in our sample.
-In order to identify the pulsation mode most likely respon￾sible for periods in a given source, we assumed that the second
-overtone mode is associated with sequence A, the first overtone
-mode with sequences B and C0
-, and the fundamental mode with
-sequence C (e.g., Trabucchi et al. 2017). We excluded long sec￾ondary periods on sequence D as they are not due to stellar pul￾sation (Soszynski et al. ´ 2021, and references therein), and we
-used the pattern of PL sequences in the LMC as a reference to
-guide the mode identification (cf. Trabucchi et al. 2021a).
-We performed this classification separately for periods com￾ing from each distinct data set. If two or more periods from dif￾ferent data sets were assigned to the same pulsation mode, we
-retained only one of those periods, with priority to the values
-from Lebzelter & Wood and Kamath et al. (2010). If the latter
-authors do not provide this information, we adopted the period
-from OGLE-III if available, and otherwise from ASAS-SN or
-from Gaia DR2.
-For some sources, the periods reported in different catalogs
-were assigned to the same mode through this procedure. In most
-cases, these periods are reasonably similar to each other. Only
-in a few cases were they significantly different, but this did not
-alter our conclusions.
-When available, the variability type was taken from OGLE￾III or ASAS-SN. We note that we are only interested in whether
-a star is classified as a Mira or semi-regular variable. In many
-cases, this type is not given or the star is simply considered, for
-instance, as an LPV or AGB in SIMBAD, in which case we con￾sidered the variability type as undetermined.
-Appendix B: Fitting relations
-We obtained analytic expressions for the PA relations separately
-for O- and C-rich stars, proceeding as follows. For each bin of
-log(τ/yr), we modeled the period distribution with a Gaussian
-kernel density estimator (KDE) and identified the peak of the
-distribution. To describe the boundaries of the PA relation, we
-adopted, at each age, the values of the period at which the dis￾tribution equals 25% of its maximum. We selected this arbitrary
-value upon visual inspection of the PA plane. We modeled the
-central trend of the PA relation, as well as its short- and long￾period edges, with linear or quadratic functions in the form
-log(τ/yr) = a0 + a1 (P/P˜) + a2 (P/P˜)
-2
-, (B.1)
-(where P˜ = 350 days) and employed a Lenvenberg-Marquardt
-nonlinear regression algorithm3
-to derive the best-fit coefficients,
-which are listed in Table B.1. We remark that these best-fit ex￾pressions are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3
-and 20 < P/days < 700 for O-rich composition, and within
-3 We made use of the Python library SciPy to perform Gaussian KDE
-modeling and best-fit, respectively, by means of the gaussian_kde
-tool from the stats module and the curve_fit function from the
-optimize module.
+Trabucchi et al.: The period-age relation of LPVs
+Fig. A.1. Absolute-Ks Gaia-2MASS diagram for the stars with or without a spectral type (left and right panels, respectively) in the selected
+sample. Symbol colors and shapes indicate the spectral type and host
+cluster described in the legend, respectively, which also reports the number of sources displayed (i.e., having both optical and NIR photometry).
+The dashed line marks the separation between O- and C-rich sources
+according to Lebzelter et al. (2018). An arrow marks the source MSX
+LMC 124 in NGC 1830 that, having WBP,RP−WJ,Ks = 9.73 mag, lies outside the plot area. Background dots are LPVs in the LMC from OGLEIII (light gray) and Mowlavi et al. (2018) (darker gray).
+Appendix A: Classification of observed LPVs
+Appendix A.1: Spectral type
+We adopted the spectral types provided by Lebzelter & Wood
+(2007) and Kamath et al. (2010) for 52 of the LPVs they studied
+in NGC 1846, NGC 1978, and NGC 419. The only exception
+is the star 5-3 in NGC 419, for which we adopted the S-type as
+reported by Lloyd Evans (1983a).
+We also searched the SIMBAD astronomical database
+(Wenger et al. 2000) for spectral type information, which we
+found for 26 more stars. We used the Gaia-2MASS diagram of
+Lebzelter et al. (2018) to confirm the chemical type classification
+taken from literature and to characterize the surface chemistry of
+sources of an unknown spectral type (see Fig. A.1). Among the
+latter, we identified 13 C-rich stars and 106 O-rich sources.
+Three of the sources without a spectral type lack Gaia photometry, so they cannot be classified with the Gaia-2MASS. Two
+of them (LW5 and LW22 in 47 Tuc) have no match in Gaia
+EDR3, but they have NIR data and are probably O-rich based on
+their position in the J − Ks versus Ks color-magnitude diagram.
+The third source is one of the two stars in NGC 1903 from the
+list of Grady et al. (2019), which we identified with the 2MASS
+source J05171633-6920298. It is likely C-rich according to the
+NIR color-magnitude diagram.
+Finally, the sources V138 in ω Cen, LW15 in NGC 2808,
+and LW4 in NGC 362 lack NIR data. They cannot be placed in
+the NIR PL diagram, upon which we relied to assign pulsation
+modes to periods, so we excluded them from the sample. The
+distribution of O- and C-rich sources in the period-age diagram
+is shown in Fig. A.2.
+Appendix A.2: Variability
+For variability information, we complemented the data from
+Lebzelter & Wood and Kamath et al. (2010) with the catalogs
+from OGLE-III, ASAS-SN, and Gaia DR2. Combining these
+data sets, we found at least one period for each of the 176 sources
+in our sample.
+In order to identify the pulsation mode most likely responsible for periods in a given source, we assumed that the second
+overtone mode is associated with sequence A, the first overtone
+mode with sequences B and C0, and the fundamental mode with
+sequence C (e.g., Trabucchi et al. 2017). We excluded long secondary periods on sequence D as they are not due to stellar pulsation (Soszynski et al. ´ 2021, and references therein), and we
+used the pattern of PL sequences in the LMC as a reference to
+guide the mode identification (cf. Trabucchi et al. 2021a).
+We performed this classification separately for periods coming from each distinct data set. If two or more periods from different data sets were assigned to the same pulsation mode, we
+retained only one of those periods, with priority to the values
+from Lebzelter & Wood and Kamath et al. (2010). If the latter
+authors do not provide this information, we adopted the period
+from OGLE-III if available, and otherwise from ASAS-SN or
+from Gaia DR2.
+For some sources, the periods reported in different catalogs
+were assigned to the same mode through this procedure. In most
+cases, these periods are reasonably similar to each other. Only
+in a few cases were they significantly different, but this did not
+alter our conclusions.
+When available, the variability type was taken from OGLEIII or ASAS-SN. We note that we are only interested in whether
+a star is classified as a Mira or semi-regular variable. In many
+cases, this type is not given or the star is simply considered, for
+instance, as an LPV or AGB in SIMBAD, in which case we considered the variability type as undetermined.
+Appendix B: Fitting relations
+We obtained analytic expressions for the PA relations separately
+for O- and C-rich stars, proceeding as follows. For each bin of
+log(τ/yr), we modeled the period distribution with a Gaussian
+kernel density estimator (KDE) and identified the peak of the
+distribution. To describe the boundaries of the PA relation, we
+adopted, at each age, the values of the period at which the distribution equals 25% of its maximum. We selected this arbitrary
+value upon visual inspection of the PA plane. We modeled the
+central trend of the PA relation, as well as its short- and longperiod edges, with linear or quadratic functions in the form
+log(τ/yr) = a0 + a1 (P/P˜) + a2 (P/P˜)
+2
+, (B.1)
+(where P˜ = 350 days) and employed a Lenvenberg-Marquardt
+nonlinear regression algorithm3to derive the best-fit coefficients,
+which are listed in Table B.1. We remark that these best-fit expressions are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3
+and 20 < P/days < 700 for O-rich composition, and within
+3 We made use of the Python library SciPy to perform Gaussian KDE
+modeling and best-fit, respectively, by means of the gaussian_kde
+tool from the stats module and the curve_fit function from the
+optimize module.
 Article number, page 7 of 9
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
-Fig. A.2. Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red).
-Table B.1. Best-fit coefficients for the PA relation and its boundaries in
-the form given in Eq. B.1.
-Sp. type relation a0 a1 a2
-O-rich
-center 10.78 -2.660 0.5953
-lower edge 10.46 -2.818 0.6578
-upper edge 10.54 -0.8187 -0.2335
-C-rich
-center 9.755 -0.7532
-lower edge 9.982 -1.698
-upper edge 8.498 -1.827 -0.9959
-8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich
-case.
-Because of the connection between age and initial mass, the
-PA relation can be translated into a period-initial mass relation,
-which we derived using the same approach described above, and
-assuming the form
-log(Mi/M) = b0 + b1 (P/P˜) + b2 (P/P˜)
-2
-. (B.2)
-The resulting best-fit lines are displayed in Fig. B.1, and the co￾efficients are given in Table B.2.
-We remark that both the PA and the period-initial mass rela￾tions depend on model assumptions, in particular mass loss and
-mixing, as well as on the properties of the population of LPVs,
-namely the star-formation history and age-metallicity relation.
-Appendix C: The shape of the period distribution
-As an example case, we consider an isochrone of age log(τ/yr) =
-8.3 and initial metallicity Zi = 0.006. Stars on the TP-AGB have
-initial masses Mi ' 3.85 M over a small range of ∼ 10−3 M.
-The relation between period and initial mass is displayed in
-panel (a) of Fig. C.1, where isochrone portions undergoing
-Table B.2. Best-fit coefficients for the period-initial mass relation and
-its boundaries in the form given in Eq. B.2.
-Sp. type relation b0 b1 b2
-O-rich
-center -0.2790 0.8958 -0.1828
-lower edge -0.1772 0.9975 -0.2203
-upper edge -0.1740 0.2783 0.8247
-C-rich
-center -0.0304 0.2885
-lower edge -0.0131 0.5752
-upper edge -0.2245 -0.2720 0.2343
-DFMP are indicated by solid lines. Panel (b) shows the period
-distributions for a few different cases.
-It is instructive, to begin with, to ignore the effect of thermal
-pulses and consider only the quiescent evolution (green lines in
-Fig. C.1). The smallest initial mass corresponds to a star that just
-entered the TP-AGB, when the FM has a period of ∼ 240 days
-but is not dominant. It only becomes dominant above a threshold
-radius Rdom,0, that is for periods longer than a (mass-dependent)
-critical period Pdom,0 (the solid gray line in Fig. C.1). The least
-evolved (quiescent) model with dominant FM has PFM ' 360
-days (green circle and horizontal line), corresponding to a sharp
-cut in the period distribution shown in panel (b) of Fig. C.1.
-As a star evolves along the AGB it expands, and its period be￾comes longer in response to the increase in radius. Models with
-a higher initial mass are more evolved, hence they have a larger
-radius and a longer period. The rate at which a period increases
-with radius is not fixed, but rather decreases with evolution. Ac￾cording to the prescription of Trabucchi et al. (2021b), a period
-grows with radius as a broken power-law with exponent α ' 1.8
-if R < Rb, and with α ' 1.25 at larger radii.
-This is equivalent to saying that the period grows more
-slowly after it exceeds a critical value Pb = P(Rb), marked by
-the gray dotted line in Fig. C.1. The isochrone reaches it at
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Fig. A.2. Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red).
+Table B.1. Best-fit coefficients for the PA relation and its boundaries in
+the form given in Eq. B.1.
+Sp. type relation a0 a1 a2
+O-rich
+center 10.78 -2.660 0.5953
+lower edge 10.46 -2.818 0.6578
+upper edge 10.54 -0.8187 -0.2335
+C-rich
+center 9.755 -0.7532
+lower edge 9.982 -1.698
+upper edge 8.498 -1.827 -0.9959
+8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich
+case.
+Because of the connection between age and initial mass, the
+PA relation can be translated into a period-initial mass relation,
+which we derived using the same approach described above, and
+assuming the form
+log(Mi/M) = b0 + b1 (P/P˜) + b2 (P/P˜)
+2
+. (B.2)
+The resulting best-fit lines are displayed in Fig. B.1, and the coefficients are given in Table B.2.
+We remark that both the PA and the period-initial mass relations depend on model assumptions, in particular mass loss and
+mixing, as well as on the properties of the population of LPVs,
+namely the star-formation history and age-metallicity relation.
+Appendix C: The shape of the period distribution
+As an example case, we consider an isochrone of age log(τ/yr) =
+8.3 and initial metallicity Zi = 0.006. Stars on the TP-AGB have
+initial masses Mi ' 3.85 M over a small range of ∼ 10−3 M.
+The relation between period and initial mass is displayed in
+panel (a) of Fig. C.1, where isochrone portions undergoing
+Table B.2. Best-fit coefficients for the period-initial mass relation and
+its boundaries in the form given in Eq. B.2.
+Sp. type relation b0 b1 b2
+O-rich
+center -0.2790 0.8958 -0.1828
+lower edge -0.1772 0.9975 -0.2203
+upper edge -0.1740 0.2783 0.8247
+C-rich
+center -0.0304 0.2885
+lower edge -0.0131 0.5752
+upper edge -0.2245 -0.2720 0.2343
+DFMP are indicated by solid lines. Panel (b) shows the period
+distributions for a few different cases.
+It is instructive, to begin with, to ignore the effect of thermal
+pulses and consider only the quiescent evolution (green lines in
+Fig. C.1). The smallest initial mass corresponds to a star that just
+entered the TP-AGB, when the FM has a period of ∼ 240 days
+but is not dominant. It only becomes dominant above a threshold
+radius Rdom,0, that is for periods longer than a (mass-dependent)
+critical period Pdom,0 (the solid gray line in Fig. C.1). The least
+evolved (quiescent) model with dominant FM has PFM ' 360
+days (green circle and horizontal line), corresponding to a sharp
+cut in the period distribution shown in panel (b) of Fig. C.1.
+As a star evolves along the AGB it expands, and its period becomes longer in response to the increase in radius. Models with
+a higher initial mass are more evolved, hence they have a larger
+radius and a longer period. The rate at which a period increases
+with radius is not fixed, but rather decreases with evolution. According to the prescription of Trabucchi et al. (2021b), a period
+grows with radius as a broken power-law with exponent α ' 1.8
+if R < Rb, and with α ' 1.25 at larger radii.
+This is equivalent to saying that the period grows more
+slowly after it exceeds a critical value Pb = P(Rb), marked by
+the gray dotted line in Fig. C.1. The isochrone reaches it at
 Article number, page 8 of 9
-Trabucchi et al.: The period-age relation of LPVs
-Fig. B.1. Similar to Fig. 2, but showing initial mass Mi
-in place of age. The best-fit lines to the most populated band and edges of the theoretical
-PFM – Mi relation are shown.
-Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows
-period as a function of initial mass (current mass on the top axis) on the
-TP-AGB for a ∼ 200 Myr old isochrone with Zi = 0.006. Red lines
-show full thermal pulses, while blue lines ignore luminosity spikes and
-green lines show only the quiescent evolution. The same color code
-is used for the period distributions (normalized to their maximum) on
-panel (b). Solid lines indicate that the FM is dominant. Circles indi￾cate the earliest onset of DFMP accounting for (red) or ignoring (green)
-luminosity spikes, and the shortest period of the dominant FM (blue).
-Gray lines mark the critical values of periods at which the FM becomes
-dominant (solid line), less sensitive to radius (dotted line, which occurs
-at the vertical line for this specific isochrone), and independent of radius
-(dashed line).
-Mi ' 3.8524 M (vertical gray line), when PFM ' 420 days. In
-models with a smaller initial mass, the period is still increasing
-at a relatively large rate as the envelope expands, while in more
-massive models the period has already become less sensitive to
-changes in radius. This is reflected by a slight inflection of the
-green curve, which corresponds to the maximum in the period
-distribution shown in panel (b) of Fig. C.1. The period distri￾bution of the full TP-AGB range is roughly symmetric around
-this maximum, while limiting the selection to DFMP, produces
-a distribution skewed toward short periods, as found in Sect. 3.
-If the luminosity dips following thermal pulses are taken
-into account (blue lines), the corresponding envelope contrac￾tion causes the period to decrease, and the cut at ∼ 360 days
-becomes less sharp. Because of mass loss, the threshold period
-Pdom,0 is lowered, so that the shortest period associated with
-DFMP does not correspond to the least evolved model (green
-circle), but rather to the luminosity dip of a thermal pulse (blue
-circle).
-To be precise, the earliest occurrence of DFMP is on the left￾most luminosity spike (red circle), whose duration is so short that
-it is unlikely to be observed. Indeed, the inclusion of luminosity
-spikes alters the period distribution at long periods very little.
-Luminosity spikes are relevant only for relatively massive and
-young TP-AGB stars, and they give rise to the poorly populated
-portion of the PA relation at the longest periods, as seen in panel
-(a) of Fig. 2.
-Article number, page 9 of 9
+Trabucchi et al.: The period-age relation of LPVs
+Fig. B.1. Similar to Fig. 2, but showing initial mass Miin place of age. The best-fit lines to the most populated band and edges of the theoretical
+PFM – Mi relation are shown.
+Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows
+period as a function of initial mass (current mass on the top axis) on the
+TP-AGB for a ∼ 200 Myr old isochrone with Zi = 0.006. Red lines
+show full thermal pulses, while blue lines ignore luminosity spikes and
+green lines show only the quiescent evolution. The same color code
+is used for the period distributions (normalized to their maximum) on
+panel (b). Solid lines indicate that the FM is dominant. Circles indicate the earliest onset of DFMP accounting for (red) or ignoring (green)
+luminosity spikes, and the shortest period of the dominant FM (blue).
+Gray lines mark the critical values of periods at which the FM becomes
+dominant (solid line), less sensitive to radius (dotted line, which occurs
+at the vertical line for this specific isochrone), and independent of radius
+(dashed line).
+Mi ' 3.8524 M (vertical gray line), when PFM ' 420 days. In
+models with a smaller initial mass, the period is still increasing
+at a relatively large rate as the envelope expands, while in more
+massive models the period has already become less sensitive to
+changes in radius. This is reflected by a slight inflection of the
+green curve, which corresponds to the maximum in the period
+distribution shown in panel (b) of Fig. C.1. The period distribution of the full TP-AGB range is roughly symmetric around
+this maximum, while limiting the selection to DFMP, produces
+a distribution skewed toward short periods, as found in Sect. 3.
+If the luminosity dips following thermal pulses are taken
+into account (blue lines), the corresponding envelope contraction causes the period to decrease, and the cut at ∼ 360 days
+becomes less sharp. Because of mass loss, the threshold period
+Pdom,0 is lowered, so that the shortest period associated with
+DFMP does not correspond to the least evolved model (green
+circle), but rather to the luminosity dip of a thermal pulse (blue
+circle).
+To be precise, the earliest occurrence of DFMP is on the leftmost luminosity spike (red circle), whose duration is so short that
+it is unlikely to be observed. Indeed, the inclusion of luminosity
+spikes alters the period distribution at long periods very little.
+Luminosity spikes are relevant only for relatively massive and
+young TP-AGB stars, and they give rise to the poorly populated
+portion of the PA relation at the longest periods, as seen in panel
+(a) of Fig. 2.
+Article number, page 9 of 
\ No newline at end of file
diff --git a/read/results/pdfium/2201.00214.txt b/read/results/pdfium/2201.00214.txt
index 85a597e..b1bf082 100644
--- a/read/results/pdfium/2201.00214.txt
+++ b/read/results/pdfium/2201.00214.txt
@@ -1,953 +1,940 @@
-arXiv:2201.00214v1 [astro-ph.SR] 1 Jan 2022
-Temperature Analysis of Flaring
-(AR11283) and non-Flaring (AR12194)
-Coronal Loops
-N. Fathalian1
-, S. S. Hosseini Rad2
-, N. Alipour2
-, H. Safari2
-1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran.
-2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran.
-e-mail: narges_fathalian@alum.sharif.edu
-January 4, 2022
-Abstract
-Here, we study the temperature structure of flaring and non-flaring coronal loops, using extracted
-loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging
-Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use data for loops of X2.1-class-flaring active
-region (AR11283) during 22:10UT till 23:00UT, on 2011, September 6; and non-flaring active region
-(AR12194) during 08:00:00UT till 09:00:00UT on 2014, October 26. By using spatially-synthesized
-Gaussian DEM forward-fitting method, we calculate the peak temperatures for each strip of the loops.
-We apply the Lomb-Scargle method to compute the oscillations periods for the temperature series of each
-strip. The periods of the temperature oscillations for the flaring loops are ranged from 7 min to 28.4
-min. These temperature oscillations show very close behavior to the slow-mode oscillation. We observe
-that the temperature oscillations in the flaring loops are started at least around 10 minutes before the
-transverse oscillations and continue for a long time duration even after the transverse oscillations are
-ended. The temperature amplitudes are increased at the flaring time (during 20 min) in the flaring loops.
-The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but
-their significances are less (below 0.5) in comparison with the flaring ones (near to one). Hence the
-detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the
-flaring ones, and maybe they are just fluctuations. Based on our confined observations, it seems that the
-flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the
-non-flaring ones. More accurate commentary in this respect requires more extensive statistical research
-and broader observations.
-Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions
-I. Introduction
-Analyzing the thermal structure of coronal loops is of considerable interest, especially as these
-magnetic loops have an essential role in heating the solar chromosphere and corona. Such anal￾ysis can help to describe how the process of solar flaring is correlated with the loop’s thermal
-structure.
-Detections of coronal waves have a historical preview and have been reported for several times
-(e.g., Aschwanden et al. (1999); Nakariakov et al. (1999);Wang et al. (2003); Wang & Solanki (2004);
-Berghmans & Clette (1999); De Moortel et al. (2000), Verwichte et al. (2004), De Moortel & Brady
-(2007), Ballai et al. (2011)). Coronal seismology and MHD waves have been reviewed widely by
-1
-De Moortel (2005), Nakariakov & Verwichte (2005), Aschwanden (2006), Banerjee et al. (2007) and
-De Moortel & Nakariakov (2012). Along with the development of the observations, transverse
-and longitudinal oscillations have also been studied theoretically (e.g., Gruszecki et al. (2006),
-Pascoe et al. (2007), Fathalian et al. (2010); Luna et al. (2010); Fathalian & Safari (2010). Coronal
-seismology techniques help to elicit the information from observations of oscillatory phenomena
-and the results to be interpreted by using theoretical models (see for e.g., Roberts et al. (1984);
-Goossens et al. (1992)). Oscillatory patterns and processes which happen during solar flares, were
-interesting and subject of investigations from different approaches (e.g., Nakariakov et al. (2010),
-Nisticò et al. (2013), Anfinogentov et al. (2013), Hindman & Jain (2014), Russell et al. (2015)). As
-we know the transverse loops oscillations usually occur in response to a close filament or flare
-(Wills-Davey & Thompson (1999)).
-Rapidly decaying long-period oscillations are mostly interpreted as global (or fundamen￾tal mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014), and Wang
-(2011), also see Ofman & Wang (2002), and for slow-mode observed in fan-loops see Pant et al.
-(2017)). They often occur in hot coronal loops of active regions, associated with tiny (or micro-)
-flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected
-in intensity plots of solar and stellar flares are possibly caused by standing slow-mode waves (see
-reviews by Van Doorsselaere et al. (2016), and McLaughlin et al. (2018)).Excitation, propagation,
-and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al.
-(2007); Wang et al. (2015); Jess et al. (2016); Nakariakov et al. (2017); Nisticò et al. (2017); Kolotkov
-et al. (2019); Krishna Prasad et al. (2019); Reale et al. (2019); Wang & Ofman (2019)). To have
-a complete overview of slow-mode magnetoacoustic waves in coronal loops see the review by
-Wang et al. (2021).
-Investigating and comparing the thermal structures and oscillations of coronal loops in loops
-of flaring and non-flaring active regions could help us in better understanding the loops’ material
-oscillations and the flare impact on them. Several different methods have been developed to in￾vestigate the thermal structure of the coronal loops and loop strands. The thermal stability of the
-coronal loops was the subject of research, done by Habbal & Rosner (1979) (and references cited
-therein). McClymont & Craig (1985) stated that a pressure fluctuation must assist asymmetric
-coronal temperature perturbation. They concluded that coronal loops are impartially stable in
-the case of uniform heating. Van Doorsselaere et al. (2011) used spectroscopic line ratios to obtain
-the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona.
-The dependence of coronal loop temperature on loop length and magnetic field strength is also
-a favorite topic. For instance, Dahlburg et al. (2018) probed the temperature properties of solar
-coronal loops over a wide range of lengths and magnetic field strengths via numerical simula￾tions and observed a very high correlation between magnetic field strength and a maximum of
-the temperature. The effect of temperature inhomogeneity on the periods and the damping times
-of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al.
-(2012)). Fathalian (2019) estimated the loop temperature using the intensity ratios and the AIA re￾sponse functions in different wavelengths. Different emission measure (DEM) computations and
-methods have been developed to estimate the temperature in the corona, which led to various
-discussions. Schmelz et al. (2010) analyzed a coronal loop, which was observed on 2010 August
-3, by AIA. They took some differential emission measure (DEM) curves, claiming a multithermal
-rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). Af￾ter that, Aschwanden & Boerner (2011) criticized the method of background subtraction which
-Schmelz et al. had applied. They claimed that the background subtraction method caused their
-inferred result of a multithermal loop. Aschwanden & Boerner (2011) analyzed a set of hundred
+arXiv:2201.00214v1 [astro-ph.SR] 1 Jan 2022
+Temperature Analysis of Flaring
+(AR11283) and non-Flaring (AR12194)
+Coronal Loops
+N. Fathalian1, S. S. Hosseini Rad2, N. Alipour2, H. Safari2
+1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran.
+2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran.
+e-mail: narges_fathalian@alum.sharif.edu
+January 4, 2022
+Abstract
+Here, we study the temperature structure of flaring and non-flaring coronal loops, using extracted
+loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging
+Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use data for loops of X2.1-class-flaring active
+region (AR11283) during 22:10UT till 23:00UT, on 2011, September 6; and non-flaring active region
+(AR12194) during 08:00:00UT till 09:00:00UT on 2014, October 26. By using spatially-synthesized
+Gaussian DEM forward-fitting method, we calculate the peak temperatures for each strip of the loops.
+We apply the Lomb-Scargle method to compute the oscillations periods for the temperature series of each
+strip. The periods of the temperature oscillations for the flaring loops are ranged from 7 min to 28.4
+min. These temperature oscillations show very close behavior to the slow-mode oscillation. We observe
+that the temperature oscillations in the flaring loops are started at least around 10 minutes before the
+transverse oscillations and continue for a long time duration even after the transverse oscillations are
+ended. The temperature amplitudes are increased at the flaring time (during 20 min) in the flaring loops.
+The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but
+their significances are less (below 0.5) in comparison with the flaring ones (near to one). Hence the
+detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the
+flaring ones, and maybe they are just fluctuations. Based on our confined observations, it seems that the
+flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the
+non-flaring ones. More accurate commentary in this respect requires more extensive statistical research
+and broader observations.
+Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions
+I. Introduction
+Analyzing the thermal structure of coronal loops is of considerable interest, especially as these
+magnetic loops have an essential role in heating the solar chromosphere and corona. Such analysis can help to describe how the process of solar flaring is correlated with the loop’s thermal
+structure.
+Detections of coronal waves have a historical preview and have been reported for several times
+(e.g., Aschwanden et al. (1999); Nakariakov et al. (1999);Wang et al. (2003); Wang & Solanki (2004);
+Berghmans & Clette (1999); De Moortel et al. (2000), Verwichte et al. (2004), De Moortel & Brady
+(2007), Ballai et al. (2011)). Coronal seismology and MHD waves have been reviewed widely by
+
+De Moortel (2005), Nakariakov & Verwichte (2005), Aschwanden (2006), Banerjee et al. (2007) and
+De Moortel & Nakariakov (2012). Along with the development of the observations, transverse
+and longitudinal oscillations have also been studied theoretically (e.g., Gruszecki et al. (2006),
+Pascoe et al. (2007), Fathalian et al. (2010); Luna et al. (2010); Fathalian & Safari (2010). Coronal
+seismology techniques help to elicit the information from observations of oscillatory phenomena
+and the results to be interpreted by using theoretical models (see for e.g., Roberts et al. (1984);
+Goossens et al. (1992)). Oscillatory patterns and processes which happen during solar flares, were
+interesting and subject of investigations from different approaches (e.g., Nakariakov et al. (2010),
+Nisticò et al. (2013), Anfinogentov et al. (2013), Hindman & Jain (2014), Russell et al. (2015)). As
+we know the transverse loops oscillations usually occur in response to a close filament or flare
+(Wills-Davey & Thompson (1999)).
+Rapidly decaying long-period oscillations are mostly interpreted as global (or fundamental mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014), and Wang
+(2011), also see Ofman & Wang (2002), and for slow-mode observed in fan-loops see Pant et al.
+(2017)). They often occur in hot coronal loops of active regions, associated with tiny (or micro-)
+flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected
+in intensity plots of solar and stellar flares are possibly caused by standing slow-mode waves (see
+reviews by Van Doorsselaere et al. (2016), and McLaughlin et al. (2018)).Excitation, propagation,
+and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al.
+(2007); Wang et al. (2015); Jess et al. (2016); Nakariakov et al. (2017); Nisticò et al. (2017); Kolotkov
+et al. (2019); Krishna Prasad et al. (2019); Reale et al. (2019); Wang & Ofman (2019)). To have
+a complete overview of slow-mode magnetoacoustic waves in coronal loops see the review by
+Wang et al. (2021).
+Investigating and comparing the thermal structures and oscillations of coronal loops in loops
+of flaring and non-flaring active regions could help us in better understanding the loops’ material
+oscillations and the flare impact on them. Several different methods have been developed to investigate the thermal structure of the coronal loops and loop strands. The thermal stability of the
+coronal loops was the subject of research, done by Habbal & Rosner (1979) (and references cited
+therein). McClymont & Craig (1985) stated that a pressure fluctuation must assist asymmetric
+coronal temperature perturbation. They concluded that coronal loops are impartially stable in
+the case of uniform heating. Van Doorsselaere et al. (2011) used spectroscopic line ratios to obtain
+the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona.
+The dependence of coronal loop temperature on loop length and magnetic field strength is also
+a favorite topic. For instance, Dahlburg et al. (2018) probed the temperature properties of solar
+coronal loops over a wide range of lengths and magnetic field strengths via numerical simulations and observed a very high correlation between magnetic field strength and a maximum of
+the temperature. The effect of temperature inhomogeneity on the periods and the damping times
+of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al.
+(2012)). Fathalian (2019) estimated the loop temperature using the intensity ratios and the AIA response functions in different wavelengths. Different emission measure (DEM) computations and
+methods have been developed to estimate the temperature in the corona, which led to various
+discussions. Schmelz et al. (2010) analyzed a coronal loop, which was observed on 2010 August
+3, by AIA. They took some differential emission measure (DEM) curves, claiming a multithermal
+rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). After that, Aschwanden & Boerner (2011) criticized the method of background subtraction which
+Schmelz et al. had applied. They claimed that the background subtraction method caused their
+inferred result of a multithermal loop. Aschwanden & Boerner (2011) analyzed a set of hundred
 loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian
-DEM model. In this regard, some attention was paid to the instrumental limitations and abil￾ity of AIA and Guennou et al. (2012a,b) discussed on the accuracy of the differential emission
-measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovemen￾tioned controversy of whether the cross-field temperatures of coronal loops are multithermal or
-isothermal, continued by Schmelz et al. (2013) (similar to Schmelz et al. (2011)). They analyzed
-twelve loops to understand the cross-field temperature distributions of them and reveal the loops’
-substructure. Based on their achievements, the warmer loops entail broader DEMs. Thereafter,
-Schmelz et al. (2014) found indications of a relationship between the DEM weighted-temperature
-and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have
-narrower DEM widths. This could imply that fewer strands are seen emitting in the later cool￾ing phase, which they claim could potentially resolve the abovementioned controversy. In this
-subject, Aschwanden et al. (2015) (as well as 2013 (Aschwanden, 2013)) developed a method to
-extract the loop temperature which is based on Gaussian fit for Differential Emission Measure,
-named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter).
-This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and
-non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows:
-In section II, data, we introduce the considered flaring and non-flaring active regions and describe
-the data employed and the time and properties of the flare, occurred in the active region. In
-section III, we explain the method we use to analyze the time-series of temperatures in different
-strips of the loops. Section IV is specified to our results, obtained related to flaring and non￾flaring regions. In section V we briefly state a summary of this work.
-II. Data
-We investigate the thermal structure and treatment of loops in a flaring region to see if it follows
-the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time.
-For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops
-of it have been analyzed by Jain et al. (2015). They analyzed intensity variations in the wavelength
-171 in two coronal loops of this region and detected obvious transverse oscillation with periods
-of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see
-the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active
-region, extract its loops and analyze their thermal treatment. Then we compare the temperature
-treatment of the loops at the flaring region with the loops of the non-flaring region to see the
-differences.
-The temperature analysis done here uses EUV images from the AIA onboard the SDO. AIA
-has ten different wavelength channels, three in white light and UV, and the other seven in EUV
-channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temper￾atures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015).
-Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335
-). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK.
-The two below data sets are finally selected to study thermal variations and coronal loops
-oscillations in flaring or non-flaring active regions. A few distinct loops are visible in the regions.
-Finally, these loops are chosen:
-– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely
-happening around the loops with the specification we are looking for. So this selected LOS
-X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA
+DEM model. In this regard, some attention was paid to the instrumental limitations and ability of AIA and Guennou et al. (2012a,b) discussed on the accuracy of the differential emission
+measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovementioned controversy of whether the cross-field temperatures of coronal loops are multithermal or
+isothermal, continued by Schmelz et al. (2013) (similar to Schmelz et al. (2011)). They analyzed
+twelve loops to understand the cross-field temperature distributions of them and reveal the loops’
+substructure. Based on their achievements, the warmer loops entail broader DEMs. Thereafter,
+Schmelz et al. (2014) found indications of a relationship between the DEM weighted-temperature
+and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have
+narrower DEM widths. This could imply that fewer strands are seen emitting in the later cooling phase, which they claim could potentially resolve the abovementioned controversy. In this
+subject, Aschwanden et al. (2015) (as well as 2013 (Aschwanden, 2013)) developed a method to
+extract the loop temperature which is based on Gaussian fit for Differential Emission Measure,
+named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter).
+This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and
+non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows:
+In section II, data, we introduce the considered flaring and non-flaring active regions and describe
+the data employed and the time and properties of the flare, occurred in the active region. In
+section III, we explain the method we use to analyze the time-series of temperatures in different
+strips of the loops. Section IV is specified to our results, obtained related to flaring and nonflaring regions. In section V we briefly state a summary of this work.
+II. Data
+We investigate the thermal structure and treatment of loops in a flaring region to see if it follows
+the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time.
+For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops
+of it have been analyzed by Jain et al. (2015). They analyzed intensity variations in the wavelength
+171 in two coronal loops of this region and detected obvious transverse oscillation with periods
+of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see
+the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active
+region, extract its loops and analyze their thermal treatment. Then we compare the temperature
+treatment of the loops at the flaring region with the loops of the non-flaring region to see the
+differences.
+The temperature analysis done here uses EUV images from the AIA onboard the SDO. AIA
+has ten different wavelength channels, three in white light and UV, and the other seven in EUV
+channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temperatures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015).
+Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335
+). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK.
+The two below data sets are finally selected to study thermal variations and coronal loops
+oscillations in flaring or non-flaring active regions. A few distinct loops are visible in the regions.
+Finally, these loops are chosen:
+– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely
+happening around the loops with the specification we are looking for. So this selected LOS
+X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA
 AR 11283, in the time period of 22:10UT till 23:00UT of 2011 September 6 with the cadence
-of 12 sec. This period of time is selected since no other flare is happening during it. A
-few distinct loops are visible and follow-able here during this period. Loop shapes in our
-active region change permanently; therefore, it is difficult or impossible to follow a loop
-over a very long time. Hence, it is not useful to extend the time interval of this region
-to the time before the flare. The transverse oscillations of two loops in this region were
-analyzed before by Jain et al. (2015). We mark these loops by A and B in Figure 1 b. They
-detected fundamental mode oscillation with periods of roughly 2 minutes and decay time
-of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any)
-or thermal fluctuations in this condition. Figure 1a (left) displays AR 11283 and the area,
-indicated by the white box is featured in a zoom-in view in Figure 1.b (right) and the five
-selected parts of the center of the three chosen loops are shown by red lines (the movie of
-the region is available in this link). As it is clear in the movie, these three loops oscillate
-together and their oscillations decay simultaneously. The center of figure 1.a is coordinated
-at (230, 165) arcsec and its width and height are 450′′
-× 456′′ /750 × 775 pixels. The flare
-occurring in this active region is an X2.1 class flare located close to the disk center at latitude
-14◦ north and longitude 18◦ west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT,
-ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection
-(CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with
-the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for
-more details look at LASCO CME catalogue.) 1
-– Three loops of non-flaring active region 12194: As a blind test, we select three loops of the
-non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till
-09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec
-and its width and height are 615′′
-× 615′′ /1025 × 1025 pixels. We consider the images of
-the selected area with the cadence of 12 sec in the same six wavelengths mentioned above.
-These loops are relatively motionless and do not show any transversal oscillation (see the
-region’s movie in the link). We select the loops in such a way that they do not have any
-crossing over the neighbor loops (in our perspective) during this time. In figure 2 the
-selected loops are distinguished in red in the mentioned active region. The size of the final
-cut of non-flaring region (represented in the right) is 351 × 401 pixels.
-The data set are primarily downloaded at level 1 with a pixel resolution of 0.6 arcsec. We use
-the standard aia_prep.pro subroutine available in SDO package SolarSoftWare library to adjust
-the screen scale between the four arms of the AIA. This pre-processing step increases the data
-level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series.
-We also used drot_map.pro subroutine to correct the differential rotation effect. According to the
-movie made by pre-processed images, the most obvious loops (marked in the abovementioned
-figures) are selected in each region (with obvious transversal oscillations in the case of the flaring
-active region).
-III. Temperature Analysis Method
-We extract the selected loop segment pixels, for each loop, and calculate the normal vectors
-to each point of the loop’s direction. Then by using these data, we straighten each loop in a
-considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available
-empty area around each loop and the distance to the neighbor loop). The area around the
-loop is needed for calculations of background subtraction. The selected loop segment is cut in
+of 12 sec. This period of time is selected since no other flare is happening during it. A
+few distinct loops are visible and follow-able here during this period. Loop shapes in our
+active region change permanently; therefore, it is difficult or impossible to follow a loop
+over a very long time. Hence, it is not useful to extend the time interval of this region
+to the time before the flare. The transverse oscillations of two loops in this region were
+analyzed before by Jain et al. (2015). We mark these loops by A and B in Figure 1 b. They
+detected fundamental mode oscillation with periods of roughly 2 minutes and decay time
+of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any)
+or thermal fluctuations in this condition. Figure 1a (left) displays AR 11283 and the area,
+indicated by the white box is featured in a zoom-in view in Figure 1.b (right) and the five
+selected parts of the center of the three chosen loops are shown by red lines (the movie of
+the region is available in this link). As it is clear in the movie, these three loops oscillate
+together and their oscillations decay simultaneously. The center of figure 1.a is coordinated
+at (230, 165) arcsec and its width and height are 450′′× 456′′ /750 × 775 pixels. The flare
+occurring in this active region is an X2.1 class flare located close to the disk center at latitude
+14◦ north and longitude 18◦ west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT,
+ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection
+(CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with
+the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for
+more details look at LASCO CME catalogue.) 1
+– Three loops of non-flaring active region 12194: As a blind test, we select three loops of the
+non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till
+09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec
+and its width and height are 615′′× 615′′ /1025 × 1025 pixels. We consider the images of
+the selected area with the cadence of 12 sec in the same six wavelengths mentioned above.
+These loops are relatively motionless and do not show any transversal oscillation (see the
+region’s movie in the link). We select the loops in such a way that they do not have any
+crossing over the neighbor loops (in our perspective) during this time. In figure 2 the
+selected loops are distinguished in red in the mentioned active region. The size of the final
+cut of non-flaring region (represented in the right) is 351 × 401 pixels.
+The data set are primarily downloaded at level 1 with a pixel resolution of 0.6 arcsec. We use
+the standard aia_prep.pro subroutine available in SDO package SolarSoftWare library to adjust
+the screen scale between the four arms of the AIA. This pre-processing step increases the data
+level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series.
+We also used drot_map.pro subroutine to correct the differential rotation effect. According to the
+movie made by pre-processed images, the most obvious loops (marked in the abovementioned
+figures) are selected in each region (with obvious transversal oscillations in the case of the flaring
+active region).
+III. Temperature Analysis Method
+We extract the selected loop segment pixels, for each loop, and calculate the normal vectors
+to each point of the loop’s direction. Then by using these data, we straighten each loop in a
+considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available
+empty area around each loop and the distance to the neighbor loop). The area around the
+loop is needed for calculations of background subtraction. The selected loop segment is cut in
 1Based on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/
-all wavelengths and at the same considered box from the images set. These loop images are
-necessary entrances for our thermal analysis process. Then the loop is divided into different
-strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we
-use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden
-et al. (2015).
-The images in the above six wavelength filters are considered to calculate the temperature in
-each strip of the loop. The DEM function is considered a single-Gaussian function relative to the
-temperature determined by the forward fitting method. To obtain the temperature for each loop,
-we divided the loop into narrow strips, and then the intensity flux was averaged over each strip.
-The number of each strip is displayed with the index i. One of the usual methods to subtract
-the background from observed data is fitting a single-Gaussian cospatial function with a linear
-function on the flux profile. The DEM for each strip is considered to be single-Gaussian DEM
-in terms of the logarithm of the temperature, which has three free parameters (Aschwanden &
-Boerner, 2011):
-DEMi =
-dEMi
-dT = EMp,i exp (−
-[log (T) − log (Tp,i
-)
-2σ
-2
-T,i
-). (1)
-In which, Tp,i
-is the DEM peak temperature, EMp,i
-is the peak EM function, and σT,i
-is the
-logarithmic width of the temperature for that strip. To calculate the background-subtracted fluxes
-(for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below):
-F0λ =
-Z
-dEM(T)
-dT Rλ(T)dT = ∑
-k
-EM(Tk
-)Rλ(Tk
-). (2)
-Here, Rλ(T) is the instrumental temperature response function of each wavelength filter λ, which
-is obtained by the code aia_get_response.pro in the SSW package. As time has passed, the AIA
-response functions calibration has partly changed. Here, we use the updated calibration of the
-temperature response functions, for each of the AIA temperature filters, according to the CHI￾ANTI Version 2019 code available in the Solar SoftWare (SSW). After forward-fitting the Gaussian
-DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting pa￾rameters, temperature width (σT,i
-), peak of temperature (Tp,i
-), and peak emission measure (EMp,i
-)
-are found by minimizing χ
-2
-i
-.
-Our data sample is uneven because of omitting some damaged images in between. There￾fore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is
-developed to use the technique periodogram, in the case where the observation times are un￾evenly spaced (Scargle, 1982). The Lomb-Scargle periodogram method is useful in cases where
-the periodicity of data treatment is not immediately apparent. This method allows efficient com￾putation of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in
-an intuitive means of determining the period of oscillation (VanderPlas, 2018). Therefore we use
-Lomb-Scargle Periodogram to evaluate and estimate the efficient periods of temperature oscilla￾tions in our loops. We select the first period related to the highest power frequency, which is
-obtained by this method.We considered the achieved periods with the highest significances and
-amplitudes. The most significant (highest) periods observed in temperature (minute) for flaring
-and non-flaring loops are listed in Tables 1 and 2, respectively. To estimate the significance of
-the periods, we computed the probability values (p-values). In the Lomb-Scargle method, the
-significance returned here is the false alarm probability of the null hypothesis, i.e., as the data
-is composed of independent Gaussian random variables. Accordingly, low probability values
+all wavelengths and at the same considered box from the images set. These loop images are
+necessary entrances for our thermal analysis process. Then the loop is divided into different
+strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we
+use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden
+et al. (2015).
+The images in the above six wavelength filters are considered to calculate the temperature in
+each strip of the loop. The DEM function is considered a single-Gaussian function relative to the
+temperature determined by the forward fitting method. To obtain the temperature for each loop,
+we divided the loop into narrow strips, and then the intensity flux was averaged over each strip.
+The number of each strip is displayed with the index i. One of the usual methods to subtract
+the background from observed data is fitting a single-Gaussian cospatial function with a linear
+function on the flux profile. The DEM for each strip is considered to be single-Gaussian DEM
+in terms of the logarithm of the temperature, which has three free parameters (Aschwanden &
+Boerner, 2011):
+DEMi =
+dEMi
+dT = EMp,i exp (−
+[log (T) − log (Tp,i)
+2σ
+2
+T,i
+). (1)
+In which, Tp,iis the DEM peak temperature, EMp,iis the peak EM function, and σT,iis the
+logarithmic width of the temperature for that strip. To calculate the background-subtracted fluxes
+(for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below):
+F0λ =
+Z
+dEM(T)
+dT Rλ(T)dT = ∑
+k
+EM(Tk)Rλ(Tk). (2)
+Here, Rλ(T) is the instrumental temperature response function of each wavelength filter λ, which
+is obtained by the code aia_get_response.pro in the SSW package. As time has passed, the AIA
+response functions calibration has partly changed. Here, we use the updated calibration of the
+temperature response functions, for each of the AIA temperature filters, according to the CHIANTI Version 2019 code available in the Solar SoftWare (SSW). After forward-fitting the Gaussian
+DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting parameters, temperature width (σT,i
+), peak of temperature (Tp,i), and peak emission measure (EMp,i)
+are found by minimizing χ
+2
+i
+.
+Our data sample is uneven because of omitting some damaged images in between. Therefore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is
+developed to use the technique periodogram, in the case where the observation times are unevenly spaced (Scargle, 1982). The Lomb-Scargle periodogram method is useful in cases where
+the periodicity of data treatment is not immediately apparent. This method allows efficient computation of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in
+an intuitive means of determining the period of oscillation (VanderPlas, 2018). Therefore we use
+Lomb-Scargle Periodogram to evaluate and estimate the efficient periods of temperature oscillations in our loops. We select the first period related to the highest power frequency, which is
+obtained by this method.We considered the achieved periods with the highest significances and
+amplitudes. The most significant (highest) periods observed in temperature (minute) for flaring
+and non-flaring loops are listed in Tables 1 and 2, respectively. To estimate the significance of
+the periods, we computed the probability values (p-values). In the Lomb-Scargle method, the
+significance returned here is the false alarm probability of the null hypothesis, i.e., as the data
+is composed of independent Gaussian random variables. Accordingly, low probability values
 (p-value less than 0.05) indicate a high degree of significance in the associated periodic signal.
-IV. Results
-i. Temperature Analysis of Flaring Active Region Loops
-Thenceforth the temperature time-series of different strips of the selected loops are calculated
-using the method described in section 3. In the following figures, the vertical axis shows the
-logarithm of the temperature and the horizontal axis shows the time duration. To be comparable
-by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been co￾scaled in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature
-map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 12, and 6 strips, respectively. Each
-strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’
-temperature oscillations are presented here. Figure 3 displays the time-series of temperature
-oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the
-errors for each point (temperature) but removed in the presentation to avoid overcrowding of the
-figures. As we observe in Figures 3 and 4), the temperature oscillations are started and increase
-around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended
-(22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain
-et al. (2015). As Jain et al. reported, LoopA and B have a transverse oscillation with periods
-of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time
-(23:20) and decaying after the flare ended (22:24). So as we observe, the temperature oscillations in
-these flaring loops happen before the start of their transverse oscillations and are continuing even
-in the time interval after the transverse oscillations decay. Although the temperature oscillations
-do not decay as rapid as the transverse oscillations do, and conversely, the loop temperature
-increases at the end of the oscillating mode (see Fig.4, the temperature map of the loop A, for
-instance)
-We calculate the temperature oscillations periods, using Lomb-Scargle method. We consider
-the thermal oscillations periods with the highest significances. As this method shows, the most
-powerful period in the range of data time-series (listed in Table1) are from 7 to 28.4 minutes
-observed in the strips of the marked loops of this flaring region. These loops of flaring region
-also show some short periods in temperature oscillations which some are less than 10 minutes
-(listed in Table1). These short periods are more frequently observed in the loops of the flaring
-active region. Such short periods are very scarce for the loops of the non-flaring active region
-(compare Tables1 and 2).
-The first column in Table1 is the number of every strip along the loop. The second column is
-the period of the most powerful frequency observed for the loop strips, calculated by the Lomb￾Scargle method. The third column shows the maximum of log(T) minus its minimum in each
-strip. The columns of Table2 are exactly the same as Table1; the only difference is that Table2 is
-for the non-flaring loops.
-The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop
-marked in Figure 1.b. The mean of the parameter (Max(log T)-Min(log T)) for the strips of loop A
-is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided
-into 11 strips, has the length of 20.24 (Mm). The mean of (Max(log T)-Min(log T)) and the mean
-of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively. The loop B2, which has 8
-strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± 0.21. The mean
-of (Max(log T)-Min(log T)) is 0.81 through this loop segment. The loops C1 and C2, divided into
-12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22,
-and 6.14 ± 0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively.
+IV. Results
+i. Temperature Analysis of Flaring Active Region Loops
+Thenceforth the temperature time-series of different strips of the selected loops are calculated
+using the method described in section 3. In the following figures, the vertical axis shows the
+logarithm of the temperature and the horizontal axis shows the time duration. To be comparable
+by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been coscaled in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature
+map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 12, and 6 strips, respectively. Each
+strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’
+temperature oscillations are presented here. Figure 3 displays the time-series of temperature
+oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the
+errors for each point (temperature) but removed in the presentation to avoid overcrowding of the
+figures. As we observe in Figures 3 and 4), the temperature oscillations are started and increase
+around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended
+(22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain
+et al. (2015). As Jain et al. reported, LoopA and B have a transverse oscillation with periods
+of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time
+(23:20) and decaying after the flare ended (22:24). So as we observe, the temperature oscillations in
+these flaring loops happen before the start of their transverse oscillations and are continuing even
+in the time interval after the transverse oscillations decay. Although the temperature oscillations
+do not decay as rapid as the transverse oscillations do, and conversely, the loop temperature
+increases at the end of the oscillating mode (see Fig.4, the temperature map of the loop A, for
+instance)
+We calculate the temperature oscillations periods, using Lomb-Scargle method. We consider
+the thermal oscillations periods with the highest significances. As this method shows, the most
+powerful period in the range of data time-series (listed in Table1) are from 7 to 28.4 minutes
+observed in the strips of the marked loops of this flaring region. These loops of flaring region
+also show some short periods in temperature oscillations which some are less than 10 minutes
+(listed in Table1). These short periods are more frequently observed in the loops of the flaring
+active region. Such short periods are very scarce for the loops of the non-flaring active region
+(compare Tables1 and 2).
+The first column in Table1 is the number of every strip along the loop. The second column is
+the period of the most powerful frequency observed for the loop strips, calculated by the LombScargle method. The third column shows the maximum of log(T) minus its minimum in each
+strip. The columns of Table2 are exactly the same as Table1; the only difference is that Table2 is
+for the non-flaring loops.
+The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop
+marked in Figure 1.b. The mean of the parameter (Max(log T)-Min(log T)) for the strips of loop A
+is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided
+into 11 strips, has the length of 20.24 (Mm). The mean of (Max(log T)-Min(log T)) and the mean
+of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively. The loop B2, which has 8
+strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± 0.21. The mean
+of (Max(log T)-Min(log T)) is 0.81 through this loop segment. The loops C1 and C2, divided into
+12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22,
+and 6.14 ± 0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively.
 We observe that despite the temperature oscillations, the flaring loops show a temperature
-rise at the end of the considered time interval (figure3). As their temperature maps also show,
-the oscillations follow with a relatively sensible rise in the final temperature of the loop segments
-(Figures 4). Although in the case of the transverse oscillations, the loops oscillate as the flare
-occurs and then the oscillations decay and stop, in the case of temperature oscillations, the tem￾peratures of the various strips of the loops oscillate and at the end of the flare occurrence, they
-get to a relatively higher value of temperature in average.
-Figure 4 shows the temperature maps of the flaring loops A, B1, B2, C1, and C2, respectively
-as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and
-the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each
-separated grid part on the map is standing for one strip. Figure 4 shows that the temperature
-for most of the strips increased, bypassing a few oscillations. Before the end of the time duration,
-some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at
-the early times of the duration and becomes hotter at the middle and end times with a swing
-to lower temperatures again (see Fig. 4). There are some temperature fluctuations at the middle
-times (the red and green stripes) while at the end the strips temperatures are smoother with less
-fluctuations. The temperature map of the loop segment B2 (Fig.4) shows that at the beginning of
-the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end
-times this pattern is reversed in this loop segment. In loop segment C1 (Fig.4), the temperature
-fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time
-(23:00) the temperature is much higher than the beginning. The temperature is increasing after
-the flare time (22:24) for the loop C2 either (see Fig.4). This happens with some oscillations in
-the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation
-in most of the flaring loops’ strips after the flare time. According to these temperature maps,
-the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20
-minutes after that.
-We expect the flaring loops to cool down as a result of heat conduction and radiative cooling.
-Hence this relative temperature increase should be scrutinized. As we probed, this temperature
-rise is also followed in intensity time-series. As the intensity time-series show, the related intensity
-in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the
-authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of
-7 × 106 ◦K (Ugarte-Urra & Warren (2014)). By using the method developed by Warren et al. (2012)
-the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the
-evolution of hot plasma in the loops. We do it to omit the contamination from the cooler plasma
-(mostly around 1MK) which also contributes to this AIA channel Boerner et al. (2012). This is
-done by subtracting the contaminating warm (i.e., around 1MK) component to the bandpass.
-This warm contribution is calculated from a weighted combination of the emission from the AIA
-171 and 193 channels dominated by Fe X and Fe X I I emission, respectively. This intensity
-analysis is done directly and it has not gone through any other process like the thermal analysis.
-For this purpose, we applied the formulation (1) used by Li et al. (2015). Plots in Figure 5 show
-the intensity map, and the mean intensity variation of the wavelength Fe XV I I I, for Loop A of
-the flaring region, respectively. As these plots show, this intensity is also higher at the end of
-the time duration in respect of the flare time. It seems to us that the expected cooling has not
-occurred in these flaring loops yet, even after the flare occurrence in the probed duration due to
-some plausible reasons. We consider that the mentioned simultaneous CME (see sectionII) which
-this flare is associated with could cause this increase in temperature. We can be sure that the
-source of this CME is AR 11283 (Romano et al. (2015)). This CME is in our flare region, hence
-the loops receive energy even after the flare occurrence and it is probably the reason why the
+rise at the end of the considered time interval (figure3). As their temperature maps also show,
+the oscillations follow with a relatively sensible rise in the final temperature of the loop segments
+(Figures 4). Although in the case of the transverse oscillations, the loops oscillate as the flare
+occurs and then the oscillations decay and stop, in the case of temperature oscillations, the temperatures of the various strips of the loops oscillate and at the end of the flare occurrence, they
+get to a relatively higher value of temperature in average.
+Figure 4 shows the temperature maps of the flaring loops A, B1, B2, C1, and C2, respectively
+as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and
+the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each
+separated grid part on the map is standing for one strip. Figure 4 shows that the temperature
+for most of the strips increased, bypassing a few oscillations. Before the end of the time duration,
+some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at
+the early times of the duration and becomes hotter at the middle and end times with a swing
+to lower temperatures again (see Fig. 4). There are some temperature fluctuations at the middle
+times (the red and green stripes) while at the end the strips temperatures are smoother with less
+fluctuations. The temperature map of the loop segment B2 (Fig.4) shows that at the beginning of
+the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end
+times this pattern is reversed in this loop segment. In loop segment C1 (Fig.4), the temperature
+fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time
+(23:00) the temperature is much higher than the beginning. The temperature is increasing after
+the flare time (22:24) for the loop C2 either (see Fig.4). This happens with some oscillations in
+the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation
+in most of the flaring loops’ strips after the flare time. According to these temperature maps,
+the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20
+minutes after that.
+We expect the flaring loops to cool down as a result of heat conduction and radiative cooling.
+Hence this relative temperature increase should be scrutinized. As we probed, this temperature
+rise is also followed in intensity time-series. As the intensity time-series show, the related intensity
+in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the
+authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of
+7 × 106 ◦K (Ugarte-Urra & Warren (2014)). By using the method developed by Warren et al. (2012)
+the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the
+evolution of hot plasma in the loops. We do it to omit the contamination from the cooler plasma
+(mostly around 1MK) which also contributes to this AIA channel Boerner et al. (2012). This is
+done by subtracting the contaminating warm (i.e., around 1MK) component to the bandpass.
+This warm contribution is calculated from a weighted combination of the emission from the AIA
+171 and 193 channels dominated by Fe X and Fe X I I emission, respectively. This intensity
+analysis is done directly and it has not gone through any other process like the thermal analysis.
+For this purpose, we applied the formulation (1) used by Li et al. (2015). Plots in Figure 5 show
+the intensity map, and the mean intensity variation of the wavelength Fe XV I I I, for Loop A of
+the flaring region, respectively. As these plots show, this intensity is also higher at the end of
+the time duration in respect of the flare time. It seems to us that the expected cooling has not
+occurred in these flaring loops yet, even after the flare occurrence in the probed duration due to
+some plausible reasons. We consider that the mentioned simultaneous CME (see sectionII) which
+this flare is associated with could cause this increase in temperature. We can be sure that the
+source of this CME is AR 11283 (Romano et al. (2015)). This CME is in our flare region, hence
+the loops receive energy even after the flare occurrence and it is probably the reason why the
 expected cooling does not occur.
-The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same
-significance in all strips of the loops, but for most strips of the flaring loops, the significances are
-very near to one. To be assured about these oscillations, we probed the intensity time-series for
-each strip of the loops and we observed that this loop’s intensities shows intensity oscillations
-too (i.e., alongside the loop). The most probable dominant periods observed in intensity, for
-wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A, 16.7, and 18.22 min for strips of
-F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods
-are in the same order of the observed thermal oscillation periods. The intensity in this time series
-has not passed any thermal process but still shows oscillation periods close to thermal ones. So
-we think these results confirm the observation of thermal oscillations.
-ii. Temperature Analysis of non-Flaring Active Region Loops
-The temperature time-series for different strips of the selected loops of the non-flaring active
-region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. 6),
-the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time
-duration. Figure 6 displays the time-series of temperature variations for the first two strips of
-the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the
-logarithm of temperature (like the flaring loops range). The most powerful periods, observed in
-most of these non-flaring loops’ strips (listed in Table2) are from 8.5 min. to 30 min. Comparing
-the periods of the loops in the flaring region (Table1) with the non-flaring one (Table2), we see
-that the temperature periods of the flaring loops have lower values on average and have more
-diversity than the non-flaring ones. As Tables 1 and 2 show, the mean temperatures of nonf￾loops are lower in comparison with the f-loops, a fact we also expected from common sense.
-The parameter (Max(log T)-Min(log T)) in nonf-loops’ strips is less than that for the flaring loops’
-strips.
-Nonf-loop A, divided into 11 strips, has the length of 19.91 (Mm) which is the length of the
-selected part of the loop marked in Figure 2b. The mean of (Max(log T)-Min(log T)) for the strips
-of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10.
-Nonf-Loop B, divided into 6 strips, has the length of 11.11 (Mm), and the mean temperature (log),
-and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonf￾loop C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of
-5.82 ± 0.12, and the mean (Max(log T)-Min(log T)) of 0.56.
-The first highest period observed for the temperature oscillations of these non-flaring loops’
-strips is reported in Table2. As we observe the temperature periods in these non-flaring loops
-are mostly longer than those of the flaring loops (compare the values listed in Table1 and Table2).
-Therefore the temperature oscillations of these loops are a little slower than the flaring ones.
-Figure 7 shows the temperature maps of the non-flaring loops A, B, and C, respectively as a
-time series. In each plot, the vertical axis is the distance along the loop in Mm, and the horizontal
-axis is the time. The color bar in the left shows the colors considered for the temperature range.
-Each separated colored part in the map is one strip. These color maps are plotted totally at the
-same color range of the loops of the flaring region either.
-As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature
-fluctuations and are smoother in comparison with the flaring ones (Fig. 4). Furthermore, that
-much increase in the temperatures of the strips, which was obvious in the loops of the flaring
-region toward the end times, is not observed here. The temperatures are also totally lower in the
-nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the
+The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same
+significance in all strips of the loops, but for most strips of the flaring loops, the significances are
+very near to one. To be assured about these oscillations, we probed the intensity time-series for
+each strip of the loops and we observed that this loop’s intensities shows intensity oscillations
+too (i.e., alongside the loop). The most probable dominant periods observed in intensity, for
+wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A, 16.7, and 18.22 min for strips of
+F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods
+are in the same order of the observed thermal oscillation periods. The intensity in this time series
+has not passed any thermal process but still shows oscillation periods close to thermal ones. So
+we think these results confirm the observation of thermal oscillations.
+ii. Temperature Analysis of non-Flaring Active Region Loops
+The temperature time-series for different strips of the selected loops of the non-flaring active
+region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. 6),
+the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time
+duration. Figure 6 displays the time-series of temperature variations for the first two strips of
+the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the
+logarithm of temperature (like the flaring loops range). The most powerful periods, observed in
+most of these non-flaring loops’ strips (listed in Table2) are from 8.5 min. to 30 min. Comparing
+the periods of the loops in the flaring region (Table1) with the non-flaring one (Table2), we see
+that the temperature periods of the flaring loops have lower values on average and have more
+diversity than the non-flaring ones. As Tables 1 and 2 show, the mean temperatures of nonfloops are lower in comparison with the f-loops, a fact we also expected from common sense.
+The parameter (Max(log T)-Min(log T)) in nonf-loops’ strips is less than that for the flaring loops’
+strips.
+Nonf-loop A, divided into 11 strips, has the length of 19.91 (Mm) which is the length of the
+selected part of the loop marked in Figure 2b. The mean of (Max(log T)-Min(log T)) for the strips
+of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10.
+Nonf-Loop B, divided into 6 strips, has the length of 11.11 (Mm), and the mean temperature (log),
+and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonfloop C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of
+5.82 ± 0.12, and the mean (Max(log T)-Min(log T)) of 0.56.
+The first highest period observed for the temperature oscillations of these non-flaring loops’
+strips is reported in Table2. As we observe the temperature periods in these non-flaring loops
+are mostly longer than those of the flaring loops (compare the values listed in Table1 and Table2).
+Therefore the temperature oscillations of these loops are a little slower than the flaring ones.
+Figure 7 shows the temperature maps of the non-flaring loops A, B, and C, respectively as a
+time series. In each plot, the vertical axis is the distance along the loop in Mm, and the horizontal
+axis is the time. The color bar in the left shows the colors considered for the temperature range.
+Each separated colored part in the map is one strip. These color maps are plotted totally at the
+same color range of the loops of the flaring region either.
+As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature
+fluctuations and are smoother in comparison with the flaring ones (Fig. 4). Furthermore, that
+much increase in the temperatures of the strips, which was obvious in the loops of the flaring
+region toward the end times, is not observed here. The temperatures are also totally lower in the
+nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the
 non-flaring loops have relatively more similar temperature fluctuations.
-As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the
-flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes,
-and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of
-the flaring active region, and shorter temperature periods (less than 10 minutes, nearer to the
-transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison
-with the non-flaring ones. And figure 9 shows that the increasing and decreasing of temperature
-range, or the difference between maximum and minimum of the temperature value (max(log(T))-
-min(log(T))), is much higher on average for the loops’ strips of the flaring AR in comparison with
-the loops’ strips of the non-flaring one.
-V. Summery
-We reported the temperature oscillations of coronal loops of a flaring active region. We selected
-the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This
-region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been
-analyzed before by Jain et al. (2015). They analyzed intensity variations in the wavelength 171
-in two coronal loops of this region and detected obvious transverse oscillation with periods of
-roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure.1b)
-at the flare time. We were curious to know if the temperature variations follow the transverse
-oscillations of the loops, or there is any relation or correlation between them. We also wanted to
-investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal
-properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three
-segments of its loops and analyzed their thermal treatment. Then we compared the temperature
-treatment of the loops at the flaring region with the loops of the non-flaring region to see the
-differences. We were eager to observe the probable discrepancies between flaring and non-flaring
-loops in this respect.
-Here we used data of three loops of the flaring active region (AR11283) around the time of the
-Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring
-active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures
-1 and 2). To calculate the time series of the loop temperature values, we first extracted the loop
-pixels in each image and then displayed the loop straightly for all the images in the time series
-of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian
-DEM forward-fitting method founded by Aschwanden et al. (2015). We calculated the peak
-temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze
-temperature oscillations of the time-series for each strip of the loops.
-We observed temperature oscillations which are following the transverse loop oscillations
-observed by Jain et al. (2015) for the flaring loops. Furthermore, the temperature oscillations in
-these flaring loops happen before the transverse oscillations start and continue even in the time
-duration after the transverse oscillations decay. As observed, the temperature oscillations do not
-decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase
-at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures
-of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the
-flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in
-the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly
-2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of
-roughly 10 to 28.5 minutes in different segments of this loop. And as the transverse oscillation
-decays in this interval, no special definite decay is observed in its temperature oscillations.
+As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the
+flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes,
+and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of
+the flaring active region, and shorter temperature periods (less than 10 minutes, nearer to the
+transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison
+with the non-flaring ones. And figure 9 shows that the increasing and decreasing of temperature
+range, or the difference between maximum and minimum of the temperature value (max(log(T))min(log(T))),
+ is much higher on average for the loops’ strips of the flaring AR in comparison with
+the loops’ strips of the non-flaring one.
+V. Summery
+We reported the temperature oscillations of coronal loops of a flaring active region. We selected
+the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This
+region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been
+analyzed before by Jain et al. (2015). They analyzed intensity variations in the wavelength 171
+in two coronal loops of this region and detected obvious transverse oscillation with periods of
+roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure.1b)
+at the flare time. We were curious to know if the temperature variations follow the transverse
+oscillations of the loops, or there is any relation or correlation between them. We also wanted to
+investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal
+properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three
+segments of its loops and analyzed their thermal treatment. Then we compared the temperature
+treatment of the loops at the flaring region with the loops of the non-flaring region to see the
+differences. We were eager to observe the probable discrepancies between flaring and non-flaring
+loops in this respect.
+Here we used data of three loops of the flaring active region (AR11283) around the time of the
+Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring
+active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures
+1 and 2). To calculate the time series of the loop temperature values, we first extracted the loop
+pixels in each image and then displayed the loop straightly for all the images in the time series
+of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian
+DEM forward-fitting method founded by Aschwanden et al. (2015). We calculated the peak
+temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze
+temperature oscillations of the time-series for each strip of the loops.
+We observed temperature oscillations which are following the transverse loop oscillations
+observed by Jain et al. (2015) for the flaring loops. Furthermore, the temperature oscillations in
+these flaring loops happen before the transverse oscillations start and continue even in the time
+duration after the transverse oscillations decay. As observed, the temperature oscillations do not
+decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase
+at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures
+of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the
+flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in
+the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly
+2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of
+roughly 10 to 28.5 minutes in different segments of this loop. And as the transverse oscillation
+decays in this interval, no special definite decay is observed in its temperature oscillations.
 The temperature periods of the flaring loops are rather shorter than the temperature periods
-of the non-flaring loops. The loops of the flaring region show some short temperature oscillations
-periods in which some are less than 10 minutes (Table1). These kind of short periods are more
-frequently observed for the loops of the flaring active region and in the case of the non-flaring
-ones, are very scarce. We observed that the periods of the flaring loops have more diversity
-than those of the non-flaring ones. Based on our confined observations, the non-flaring loops’
-periods are longer and their temperatures’ values are totally lower. So our research showed that
-thermal structures of the flaring loops differ from the non-flaring ones in the ways described
-above. As temperature maps show, the temperature fluctuations are increasing at the flaring time
-and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’
-temperature. Conversely, it seems that different strips of the non-flaring loops have relatively
-more similar temperature fluctuations. The temperatures are either higher in average in the flar￾ing loops’ segments as expected. The significances of the periods, obtained by the Lomb-Scargle
-method, are calculated for each strip of each loop and the results show that these significances
-for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of
-the non-flaring region are less than 0.5. Hence the detected periods in the flaring loops’ strips
-have high significances (near to one) and are oscillations. Whereas the detected periods in the
-non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe
-they are just fluctuations.
-Using this method for the coronal loops showed that the oscillation modes obtained for the
-temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of
-the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These
-kind of oscillations often occur in hot coronal loops (log(T) > 6) of active regions especially the
-ones associated with small (or micro-) flares (Wang et al. (2021)). The loops of our flaring active
-region are also hot loops with the mean temperature above this range. They also show intensity
-oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring
-loops. The temperature of the non-flaring loops are lower (log(T) < 6) and as discussed above,
-we believe that the observed oscillation-like periods in non-flaring loops should be more probably
-related to the high amplitude fluctuations.
-Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes
-of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the
-FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respec￾tively. And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for
-nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)-
-Min(log T)) for these non-flaring loops show a difference from the flaring ones and are lower.
-Loops of the non-flaring active region 12194 have a relatively uniform temperature at the
-beginning of the time interval, which rises slightly at its end. As the Solar Monitor reports in the
-neighborhood of this region, the flaring active region 12192 exists of which between its multiple
-flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a possible suggestion
-that the abovementioned slight temperature rise in the loops of AR 12194 (in the time interval
-8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions
-exist in the AR 12192.
-Hence as our study shows, the temperature of coronal loops of flaring AR changes in an
-oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher tem￾peratures on average and higher oscillation periods with higher peaks and deeper valleys. More
-accurate commentary in this respect requires more extensive statistical research and broader ob￾servations.
-arcsec
-arcsec
-79 154 229 304 379 454
-−68
-25
-118
-211
-304
-397
-a
-arcsec
-arcsec
-114.6 171.2 227.8 284.4 341
-171.4
-206.3
-241.2
-276.1
-311
-Loop B1 Loop A
-Loop C2
-Loop C1
-b
-Loop B2
-Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view
-of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are
-the same loops studied by Jain et al. (2015) (see Fig.3a in Jain et al. (2015)).
-arcsec
-arcsec
-−154 0 154 308
-−572
-−418
-−264
-−110
-44
-a
-arcsec
-arcsec
-−202 −134 −66 2 70
-−396
-−338
-−280
-−221
-−162
-nonf−LoopA
-nonf−LoopB
-nonf−LoopC
-b
-Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 recorded by AIA/SDO. (b) Zoom-in
+of the non-flaring loops. The loops of the flaring region show some short temperature oscillations
+periods in which some are less than 10 minutes (Table1). These kind of short periods are more
+frequently observed for the loops of the flaring active region and in the case of the non-flaring
+ones, are very scarce. We observed that the periods of the flaring loops have more diversity
+than those of the non-flaring ones. Based on our confined observations, the non-flaring loops’
+periods are longer and their temperatures’ values are totally lower. So our research showed that
+thermal structures of the flaring loops differ from the non-flaring ones in the ways described
+above. As temperature maps show, the temperature fluctuations are increasing at the flaring time
+and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’
+temperature. Conversely, it seems that different strips of the non-flaring loops have relatively
+more similar temperature fluctuations. The temperatures are either higher in average in the flaring loops’ segments as expected. The significances of the periods, obtained by the Lomb-Scargle
+method, are calculated for each strip of each loop and the results show that these significances
+for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of
+the non-flaring region are less than 0.5. Hence the detected periods in the flaring loops’ strips
+have high significances (near to one) and are oscillations. Whereas the detected periods in the
+non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe
+they are just fluctuations.
+Using this method for the coronal loops showed that the oscillation modes obtained for the
+temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of
+the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These
+kind of oscillations often occur in hot coronal loops (log(T) > 6) of active regions especially the
+ones associated with small (or micro-) flares (Wang et al. (2021)). The loops of our flaring active
+region are also hot loops with the mean temperature above this range. They also show intensity
+oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring
+loops. The temperature of the non-flaring loops are lower (log(T) < 6) and as discussed above,
+we believe that the observed oscillation-like periods in non-flaring loops should be more probably
+related to the high amplitude fluctuations.
+Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes
+of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the
+FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively. And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for
+nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)Min(log
+ T)) for these non-flaring loops show a difference from the flaring ones and are lower.
+Loops of the non-flaring active region 12194 have a relatively uniform temperature at the
+beginning of the time interval, which rises slightly at its end. As the Solar Monitor reports in the
+neighborhood of this region, the flaring active region 12192 exists of which between its multiple
+flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a possible suggestion
+that the abovementioned slight temperature rise in the loops of AR 12194 (in the time interval
+8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions
+exist in the AR 12192.
+Hence as our study shows, the temperature of coronal loops of flaring AR changes in an
+oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher temperatures on average and higher oscillation periods with higher peaks and deeper valleys. More
+accurate commentary in this respect requires more extensive statistical research and broader observations.
+arcsec
+arcsec
+79 154 229 304 379 454
+−68
+25
+118
+211
+304
+397
+a
+arcsec
+arcsec
+114.6 171.2 227.8 284.4 341
+171.4
+206.3
+241.2
+276.1
+311
+Loop B1 Loop A
+Loop C2
+Loop C1
+b
+Loop B2
+Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view
+of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are
+the same loops studied by Jain et al. (2015) (see Fig.3a in Jain et al. (2015)).
+arcsec
+arcsec
+−154 0 154 308
+−572
+−418
+−264
+−110
+44
+a
+arcsec
+arcsec
+−202 −134 −66 2 70
+−396
+−338
+−280
+−221
+−162
+nonf−LoopA
+nonf−LoopB
+nonf−LoopC
+b
+Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 recorded by AIA/SDO. (b) Zoom-in
 view of the area, marked by a box in the left, the loops are distinguished in red.
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-LogT
-F−LoopA
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-LogT
-22:10 22:20 22:30 22:40 22:50 23:00
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-time
-LogT
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-LogT
-F−LoopB1
-22:10 22:20 22:30 22:40 22:50 23:00
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-time
-LogT
-Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to
-3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+LogT
+F−LoopA
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+LogT
+22:10 22:20 22:30 22:40 22:50 23:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+LogT
+F−LoopB1
+22:10 22:20 22:30 22:40 22:50 23:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
+Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to
+3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the
 logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1.
-22:10 22:20 22:30 22:40 22:50 23:00
-0 
-11
-21
-32
-42
-F−loopA
-Time
-Loop Length(Mm)
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-22:10 22:20 22:30 22:40 22:50 23:00
-0 
-5 
-10
-15
-20
-F−loopB1
-Time
-Loop Length(Mm)
-6
-6.05
-6.1
-6.15
-6.2
-6.25
-6.3
-6.35
-6.4
-6.45
-6.5
-22:10 22:20 22:30 22:40 22:50 23:00
-0 
-4 
-8 
-12
-16
-F−loopB2
-Time
-Loop Length(Mm)
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-22:10 22:20 22:30 22:40 22:50 23:00
-0 
-6 
-11
-17
-22
-F−loopC1
-Time
-Loop Length(Mm)
-5.6
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-22:10 22:20 22:30 22:40 22:50 23:00
-0 
-3 
-6 
-8 
-11
-F−loopC2
-Time
-Loop Length(Mm)
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical
-axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+11
+21
+32
+42
+F−loopA
+Time
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+5
+10
+15
+20
+F−loopB1
+Time
+Loop Length(Mm)
+6
+6.05
+6.1
+6.15
+6.2
+6.25
+6.3
+6.35
+6.4
+6.45
+6.5
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+4
+8
+12
+16
+F−loopB2
+Time
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+6
+11
+17
+22
+F−loopC1
+Time
+Loop Length(Mm)
+5.6
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+3
+6
+8
+11
+F−loopC2
+Time
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical
+axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows
 the colors considered for the temperature range.
-Table 1: The properties observed for the loop segments of the flaring AR.
-FLoopA
-(Strip Number)
-The highest
-Temp.’s period
-observed
-Max(log(T))-
-Min(log(T))
-FLoopB2
-(Strip Number)
-The highest
-Temp.’s period
-observed
-Max(log(T))-
-Min(log(T))
-1 9.94 1.09 1 18.07 0.68
-2 16.57 0.79 2 24.85 0.83
-3 8.46 0.65 3 24.85 0.85
-4 28.4 1.11 4 7.36 0.84
-5 28.4 0.75 5 8.64 0.85
-6 24.85 0.76 6 8.28 0.93
-7 22.09 0.58 7 18.07 0.84
-8 18.07 1.55 8 28.4 0.73
-9 18.07 1.6 FLoopC1 - -
-10 12.42 1.57 1 28.4 1.46
-11 12.42 1.42 2 22.09 1.34
-12 24.85 1.56 3 16.57 1.36
-13 19.88 1.6 4 28.04 1.49
-14 19.88 1.24 5 24.85 1.6
-15 18.07 1.58 6 24.85 1.42
-16 19.88 1.45 7 15.29 1.6
-17 16.57 0.7 8 13.25 1.56
-18 7.36 1.6 9 13.25 1.6
-19 8.64 0.95 10 16.57 1.6
-20 16.57 1.54 11 16.57 1.6
-21 7.36 1.18 12 9.46 1.13
-22 7.36 1.51 FLoopC1 - -
-23 18.07 1.58 1 18.07 0.88
-24 22.09 1.33 2 28.4 0.8
-25 24.85 0.72 3 15.29 0.87
-FLoopB1 - - 4 16.57 0.93
-1 18.07 1.43 5 18.07 1.22
-2 15.29 0.76 6 28.4 0.58
-3 18.07 0.76
-4 18.07 0.75
-5 18.07 0.59
-6 19.88 0.8
-7 19.88 0.91
-8 19.88 1.36
-9 11.04 1.6
-10 18.07 1.6
+Table 1: The properties observed for the loop segments of the flaring AR.
+FLoopA
+(Strip Number)
+The highest
+Temp.’s period
+observed
+Max(log(T))Min(log(T))
+
+FLoopB2
+(Strip Number)
+The highest
+Temp.’s period
+observed
+Max(log(T))Min(log(T))
+
+1 9.94 1.09 1 18.07 0.68
+2 16.57 0.79 2 24.85 0.83
+3 8.46 0.65 3 24.85 0.85
+4 28.4 1.11 4 7.36 0.84
+5 28.4 0.75 5 8.64 0.85
+6 24.85 0.76 6 8.28 0.93
+7 22.09 0.58 7 18.07 0.84
+8 18.07 1.55 8 28.4 0.73
+9 18.07 1.6 FLoopC1 - 10
+ 12.42 1.57 1 28.4 1.46
+11 12.42 1.42 2 22.09 1.34
+12 24.85 1.56 3 16.57 1.36
+13 19.88 1.6 4 28.04 1.49
+14 19.88 1.24 5 24.85 1.6
+15 18.07 1.58 6 24.85 1.42
+16 19.88 1.45 7 15.29 1.6
+17 16.57 0.7 8 13.25 1.56
+18 7.36 1.6 9 13.25 1.6
+19 8.64 0.95 10 16.57 1.6
+20 16.57 1.54 11 16.57 1.6
+21 7.36 1.18 12 9.46 1.13
+22 7.36 1.51 FLoopC1 - 23
+ 18.07 1.58 1 18.07 0.88
+24 22.09 1.33 2 28.4 0.8
+25 24.85 0.72 3 15.29 0.87
+FLoopB1 - - 4 16.57 0.93
+1 18.07 1.43 5 18.07 1.22
+2 15.29 0.76 6 28.4 0.58
+3 18.07 0.76
+4 18.07 0.75
+5 18.07 0.59
+6 19.88 0.8
+7 19.88 0.91
+8 19.88 1.36
+9 11.04 1.6
+10 18.07 1.6
 11 18.07 1.6
-Table 2: The properties observed for the loop segments of the non flaring AR.
-Nonf-LoopA
-(Strip Number)
-The highest
-Temp.’s period
-observed
-Max(log(T))-
-Min(log(T))
-1 24 0.61
-2 30 0.95
-3 30 0.81
-4 20 1.51
-5 20 0.77
-6 20 0.81
-7 11.42 0.71
-8 12 0.73
-9 30 0.72
-10 30 0.77
-11 30 0.61
-Nonf-LoopB
-(Strip Number)
-The highest
-Temp.’s period
-observed
-Max(log(T))-
-Min(log(T))
-1 26.66 0.36
-2 26.66 0.64
-3 10.43 0.45
-4 12 0.62
-5 30 0.98
-6 8.57 0.67
-Nonf-LoopC
-(Strip Number)
-The highest
-Temp.’s period
-observed
-Max(log(T))-
-Min(log(T))
-1 26.66 0.76
-2 26.66 0.75
-3 26.66 0.26
-4 30 0.27
+Table 2: The properties observed for the loop segments of the non flaring AR.
+Nonf-LoopA
+(Strip Number)
+The highest
+Temp.’s period
+observed
+Max(log(T))Min(log(T))
+
+1 24 0.61
+2 30 0.95
+3 30 0.81
+4 20 1.51
+5 20 0.77
+6 20 0.81
+7 11.42 0.71
+8 12 0.73
+9 30 0.72
+10 30 0.77
+11 30 0.61
+Nonf-LoopB
+(Strip Number)
+The highest
+Temp.’s period
+observed
+Max(log(T))Min(log(T))
+
+1 26.66 0.36
+2 26.66 0.64
+3 10.43 0.45
+4 12 0.62
+5 30 0.98
+6 8.57 0.67
+Nonf-LoopC
+(Strip Number)
+The highest
+Temp.’s period
+observed
+Max(log(T))Min(log(T))
+
+1 26.66 0.76
+2 26.66 0.75
+3 26.66 0.26
+4 30 0.27
 5 30 0.8
-22:10 22:20 22:30 22:40 22:50 23:00
-0 
-11
-22
-32
-43
-Int−Fe−LoopA
-Time
-Loop Length(Mm)
-0
-0.02
-0.04
-0.06
-0.08
-0.1
-0.12
-0.14
-0.16
-0.18
-0.2
-22:10 22:20 22:30 22:40 22:50 23:00
-0
-0.1
-0.2
-0.3
-0.4
-0.5
-0.6
-0.7
-0.8
-0.9
-1
-Int−Fe−LoopA
-Time
-Normalized Intensity Fe XVIII
-Figure 5: Normalized intensity map of the flaring loop A for the wavelength Fe XV I I I, and mean intensity of Fe
-XV I I I (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and
-normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors
-considered for the Intensity range.
-VI. acknowledgements
-The author Narges Fathalian wishes to also express her thanks for the technical support and
-comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regarding to this
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+11
+22
+32
+43
+Int−Fe−LoopA
+Time
+Loop Length(Mm)
+0
+0.02
+0.04
+0.06
+0.08
+0.1
+0.12
+0.14
+0.16
+0.18
+0.2
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+0.1
+0.2
+0.3
+0.4
+0.5
+0.6
+0.7
+0.8
+0.9
+1
+Int−Fe−LoopA
+Time
+Normalized Intensity Fe XVIII
+Figure 5: Normalized intensity map of the flaring loop A for the wavelength Fe XV I I I, and mean intensity of Fe
+XV I I I (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and
+normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors
+considered for the Intensity range.
+VI. acknowledgements
+The author Narges Fathalian wishes to also express her thanks for the technical support and
+comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regarding to this
 work.
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-LogT
-NonF−LoopA
-8:00 8:10 8:20 8:30 8:40 8:50 9:00
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-time
-LogT
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-LogT
-NonF−LoopB
-8:00 8:10 8:20 8:30 8:40 8:50 9:00
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-time
-LogT
-Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non￾flaring Loops A and B. Horizontal axis is the time and the vertical axis is the logarithm of the temperature.
-8:10 8:20 8:30 8:40 8:50 9:00
-0 
-5 
-10
-15
-20
-NonF−loopA
-Time
-Loop Length(Mm)
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-8:10 8:20 8:30 8:40 8:50 9:00
-0 
-5 
-9 
-14
-18
-NonF−loopB
-Time
-Loop Length(Mm)
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-8:10 8:20 8:30 8:40 8:50 9:00
-0 
-3 
-5 
-8 
-10
-NonF−loopC
-Time
-Loop Length(Mm)
-5.8
-6
-6.2
-6.4
-6.6
-6.8
-Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis
-is the distance along the loop in Mm, and the horizontal axis is the time. The color-bar in the left shows the
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+LogT
+NonF−LoopA
+8:00 8:10 8:20 8:30 8:40 8:50 9:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+LogT
+NonF−LoopB
+8:00 8:10 8:20 8:30 8:40 8:50 9:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
+Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the nonflaring Loops A and B. Horizontal axis is the time and the vertical axis is the logarithm of the temperature.
+8:10 8:20 8:30 8:40 8:50 9:00
+0
+5
+10
+15
+20
+NonF−loopA
+Time
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+8:10 8:20 8:30 8:40 8:50 9:00
+0
+5
+9
+14
+18
+NonF−loopB
+Time
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+8:10 8:20 8:30 8:40 8:50 9:00
+0
+3
+5
+8
+10
+NonF−loopC
+Time
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis
+is the distance along the loop in Mm, and the horizontal axis is the time. The color-bar in the left shows the
 colors considered for the temperature range.
-6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
-0
-0.05
-0.1
-0.15
-0.2
-0.25
-0.3
-0.35
-0.4
-Temp. Period (min)
-Percentage of Temp. Periods
-Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non￾flaring (red bars) ARs. The horizontal axis shows the temperature periods in minute.
-0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7
-0
-2
-4
-6
-8
-10
-12
-max(log(T))−min(log(T))
-Number
-Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars)
+6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
+0
+0.05
+0.1
+0.15
+0.2
+0.25
+0.3
+0.35
+0.4
+Temp. Period (min)
+Percentage of Temp. Periods
+Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and nonflaring (red bars) ARs. The horizontal axis shows the temperature periods in minute.
+0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7
+0
+2
+4
+6
+8
+10
+12
+max(log(T))−min(log(T))
+Number
+Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars)
 and non-flaring (red bars) ARs.
-References
-Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 280
-Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F.
-2013, ApJ, 773, 156
-Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5
-Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of London Series A, 364,
-417
-Aschwanden, M. J., & Boerner, P. 2011, The Astrophysical Journal, 732, 81
-Aschwanden, M. J., Boerner, P., Ryan, D., et al. 2015, The Astrophysical Journal, 802, 53
-Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexander, D. 1999, ApJ, 520, 880
-Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13
-Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3
-Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207
-Boerner, P., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41
-Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116
-De Moortel, I. 2005, Philosophical Transactions of the Royal Society of London Series A, 363, 2743
-De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210
-De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23
-De Moortel, I., & Nakariakov, V. M. 2012, Philosophical Transactions of the Royal Society of
-London Series A, 370, 3193
-Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369
-Fathalian, N., & Safari, H. 2010, ApJ, 724, 411
-Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403
-Goossens, M., Hollweg, J. V., & Sakurai, T. 1992, Solar Physics, 138, 233
-Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A, 460, 887
-Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203, 25
-Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203, 26
-Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113
-Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103
-Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19
+References
+Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 280
+Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F.
+2013, ApJ, 773, 156
+Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5
+Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of London Series A, 364,
+417
+Aschwanden, M. J., & Boerner, P. 2011, The Astrophysical Journal, 732, 81
+Aschwanden, M. J., Boerner, P., Ryan, D., et al. 2015, The Astrophysical Journal, 802, 53
+Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexander, D. 1999, ApJ, 520, 880
+Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13
+Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3
+Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207
+Boerner, P., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41
+Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116
+De Moortel, I. 2005, Philosophical Transactions of the Royal Society of London Series A, 363, 2743
+De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210
+De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23
+De Moortel, I., & Nakariakov, V. M. 2012, Philosophical Transactions of the Royal Society of
+London Series A, 370, 3193
+Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369
+Fathalian, N., & Safari, H. 2010, ApJ, 724, 411
+Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403
+Goossens, M., Hollweg, J. V., & Sakurai, T. 1992, Solar Physics, 138, 233
+Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A, 460, 887
+Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203, 25
+Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203, 26
+Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113
+Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103
+Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19
 Jess, D. B., Reznikova, V. E., Ryans, R. S. I., et al. 2016, Nature Physics, 12, 179
-Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 2019, A&A, 628, A133
-Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space
-Sciences, 6, 57
-Li, L. P., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109
-Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277
-Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371
-McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834
-McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelínek, P., & Takasao, S. 2018, Space
-Science Reviews volume, 214, 45
-Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2017, ApJ, 849, 62
-Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010, Plasma Physics and Controlled Fusion,
-52, 124009
-Nakariakov, V. M., Ofman, L., Deluca, E. E., Roberts, B., & Davila, J. M. 1999, Science, 285, 862
-Nakariakov, V. M., & Verwichte, E. 2005, Living Reviews in Solar Physics, 2, 3
-Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 552, A57
-Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 2017, A&A, 600, A37
-Ofman, L., & Wang, T. 2002, ApJ, 580, L85
-Pant, V., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5
-Pascoe, D. J., Nakariakov, V. M., & Arber, T. D. 2007, Solar Physics, 246, 165
-Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, ApJ, 884, 131
-Roberts, B., Edwin, P. M., & Benz, A. O. 1984, ApJ, 279, 857
-Romano, P., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55
-Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 581, A8
-Scargle, J. D. 1982, ApJ, 263, 835
-Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ, 731, 49
-Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ, 725, L34
-Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171
-Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 2013, ApJ, 764, 53
-Ugarte-Urra, I., & Warren, H. P. 2014, ApJ, 783, 12
-Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, Solar Physics, 291, 3143
+Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 2019, A&A, 628, A133
+Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space
+Sciences, 6, 57
+Li, L. P., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109
+Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277
+Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371
+McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834
+McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelínek, P., & Takasao, S. 2018, Space
+Science Reviews volume, 214, 45
+Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2017, ApJ, 849, 62
+Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010, Plasma Physics and Controlled Fusion,
+52, 124009
+Nakariakov, V. M., Ofman, L., Deluca, E. E., Roberts, B., & Davila, J. M. 1999, Science, 285, 862
+Nakariakov, V. M., & Verwichte, E. 2005, Living Reviews in Solar Physics, 2, 3
+Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 552, A57
+Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 2017, A&A, 600, A37
+Ofman, L., & Wang, T. 2002, ApJ, 580, L85
+Pant, V., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5
+Pascoe, D. J., Nakariakov, V. M., & Arber, T. D. 2007, Solar Physics, 246, 165
+Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, ApJ, 884, 131
+Roberts, B., Edwin, P. M., & Benz, A. O. 1984, ApJ, 279, 857
+Romano, P., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55
+Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 581, A8
+Scargle, J. D. 1982, ApJ, 263, 835
+Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ, 731, 49
+Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ, 725, L34
+Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171
+Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 2013, ApJ, 764, 53
+Ugarte-Urra, I., & Warren, H. P. 2014, ApJ, 783, 12
+Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, Solar Physics, 291, 3143
 Van Doorsselaere, T., Wardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32
-VanderPlas, J. T. 2018, ApJ, 236, 16
-Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77
-Wang, T. 2011, Space Science Reviews, 158, 397–419
-Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598
-Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33
-Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17
-Wang, T., & Ofman, L. 2019, ApJ, 886, 2
-Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13
-Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217
-Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 759, 141
-Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 190, 467
+VanderPlas, J. T. 2018, ApJ, 236, 16
+Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77
+Wang, T. 2011, Space Science Reviews, 158, 397–419
+Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598
+Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33
+Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17
+Wang, T., & Ofman, L. 2019, ApJ, 886, 2
+Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13
+Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217
+Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 759, 141
+Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 190, 467
\ No newline at end of file
diff --git a/read/results/pdfium/GeoTopo-book.txt b/read/results/pdfium/GeoTopo-book.txt
index 407fed2..62b1487 100644
--- a/read/results/pdfium/GeoTopo-book.txt
+++ b/read/results/pdfium/GeoTopo-book.txt
@@ -1,6443 +1,6123 @@
-Einführung in die
-Geometrie und Topologie
+Einführung in die
+Geometrie und Topologie
 0. Auflage, 31. Dezember 2016 Martin Thoma
-Vorwort
-Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet
-die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen
-und Tutorien.
-Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (Schwarz￾Weiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de).
-Danksagungen
-An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen
-gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich
-basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze
-Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre
-Inhalte in diesem Skript einbauen zu dürfen!
-Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsauf￾gaben und Lösungen zu benutzen.
-Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen
-Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per
-E-Mail und nach dem Tutorium beantwortet. Danke!
-Was ist Topologie?
-Die Kugeloberfläche S
-2
-lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche
-oder der Oberfläche einer Pyramide verformen, aber nicht zum R
-2 oder zu einem Torus T
-2
-. Für
-den R
-2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein
-Loch machen.
-Erforderliche Vorkenntnisse
-Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \, ∅, R,P(M))
-und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Wider￾spruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag,
-Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem
-in „Analysis I“ vermittelt.
-Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit,
-der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare
+Vorwort
+Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet
+die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen
+und Tutorien.
+Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (SchwarzWeiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de).
+Danksagungen
+An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen
+gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich
+basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze
+Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre
+Inhalte in diesem Skript einbauen zu dürfen!
+Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsaufgaben und Lösungen zu benutzen.
+Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen
+Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per
+E-Mail und nach dem Tutorium beantwortet. Danke!
+Was ist Topologie?
+Die Kugeloberfläche S
+2
+lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche
+oder der Oberfläche einer Pyramide verformen, aber nicht zum R
+2 oder zu einem Torus T2
+. Für
+den R
+2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein
+Loch machen.
+Erforderliche Vorkenntnisse
+Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \, ∅, R,P(M))
+und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Widerspruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag,
+Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem
+in „Analysis I“ vermittelt.
+Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit,
+der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare
 Algebra II“ wird der Begriff der Orthonormalbasis eingeführt.
-iii
-(a) S
-2
-(b) Würfel (c) Pyramide
-y
-x
-(d) R
-2
-(e) T
-2
-Abbildung 0.1: Beispiele für verschiedene Formen
-Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und
+
+(a) S
+2
+(b) Würfel (c) Pyramide
+y
+x
+(d) R
+2
+(e) T
+2
+Abbildung 0.1: Beispiele für verschiedene Formen
+Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und
 Zahlentheorie“ gehört zu haben.
-Inhaltsverzeichnis
-1 Topologische Grundbegriffe 2
-1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2
-1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6
-1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
-1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
-1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14
-1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
-Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22
-2 Mannigfaltigkeiten und Simplizialkomplexe 24
-2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24
-2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29
-2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
-Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43
-3 Fundamentalgruppe und Überlagerungen 44
-3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44
-3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47
-3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51
-3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61
-4 Euklidische und nichteuklidische Geometrie 64
-4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64
-4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74
-4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74
-4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77
-Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86
-5 Krümmung 87
-5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
-5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89
-5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
-5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94
-Lösungen der Übungsaufgaben 99
-Bildquellen 105
-Abkürzungsverzeichnis 106
-Ergänzende Definitionen und Sätze 107
+Inhaltsverzeichnis
+1 Topologische Grundbegriffe 2
+1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2
+1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6
+1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
+1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22
+2 Mannigfaltigkeiten und Simplizialkomplexe 24
+2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24
+2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29
+2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+3 Fundamentalgruppe und Überlagerungen 44
+3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44
+3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47
+3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51
+3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61
+4 Euklidische und nichteuklidische Geometrie 64
+4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64
+4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74
+4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74
+4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86
+5 Krümmung 87
+5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
+5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89
+5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94
+Lösungen der Übungsaufgaben 99
+Bildquellen 105
+Abkürzungsverzeichnis 106
+Ergänzende Definitionen und Sätze 107
 Symbolverzeichnis 108
-2 Inhaltsverzeichnis
+ Inhaltsverzeichnis
 Stichwortverzeichnis 111
-1 Topologische Grundbegriffe
-1.1 Topologische Räume
-Definition 1
-Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P(X)
-mit folgenden Eigenschaften
-(i) ∅, X ∈ T
-(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T
-(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist [
-i∈I
-Ui ∈ T
-Die Elemente von T heißen offene Teilmengen von X.
-A ⊆ X heißt abgeschlossen, wenn X \ A offen ist.
-Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es
-Mengen, die sowohl abgeschlossen als auch offen sind.
-Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.)
-Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }.
-Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem XC = X \ X = ∅ ∈ T und
-X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. 
-Beispiel 1 (Topologien)
-1) X = R
-n mit der von der euklidischen Metrik erzeugten Topologie TEuklid:
-U ⊆ R
-n
-offen ⇔ für jedes x ∈ U gibt es r > 0,
-sodass Br(x) = { y ∈ R
-n
-| d(x, y) < r } ⊆ U
-Diese Topologie wird auch „Standardtopologie des R
-n
-“ genannt. Sie beinhaltet unter
-anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli￾chem Mittelpunkt (vgl. Definition 1.ii).
-2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum.
-3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie.
-4) X := R, TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie
-Beobachtungen:
-• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 }
+1 Topologische Grundbegriffe
+1.1 Topologische Räume
+Definition 1
+Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P(X)
+mit folgenden Eigenschaften
+(i) ∅, X ∈ T
+(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T
+(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist [
+i∈I
+Ui ∈ T
+Die Elemente von T heißen offene Teilmengen von X.
+A ⊆ X heißt abgeschlossen, wenn X \ A offen ist.
+Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es
+Mengen, die sowohl abgeschlossen als auch offen sind.
+Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.)
+Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }.
+Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem XC = X \ X = ∅ ∈ T und
+X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. 
+Beispiel 1 (Topologien)
+1) X = R
+n mit der von der euklidischen Metrik erzeugten Topologie TEuklid:
+U ⊆ R
+n
+offen ⇔ für jedes x ∈ U gibt es r > 0,
+sodass Br(x) = { y ∈ R
+n
+| d(x, y) < r } ⊆ U
+Diese Topologie wird auch „Standardtopologie des R
+n
+“ genannt. Sie beinhaltet unter
+anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedlichem Mittelpunkt (vgl. Definition 1.ii).
+2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum.
+3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie.
+4) X := R, TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie
+Beobachtungen:
+• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 }
 • Es gibt keine disjunkten offenen Mengen in TZ.
-4 1.1. TOPOLOGISCHE RÄUME
-5) X := R
-n
-, TZ = {U ⊆ R
-n
-|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass
-R
-n \ U = V (f1, . . . , fr)}
-6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum.
-∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen.
-Definition 2
-Sei (X, T) ein topologischer Raum und x ∈ X.
-Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und
-U0 ⊆ U.
-Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt.
-Definition 3
-Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge.
-a) M◦
-:= { x ∈ M | M ist Umgebung von x } =
-[
-U⊆M
-U∈T
-U heißt Inneres oder offener
-Kern von M.
-b) M := \
-M⊆A
-A abgeschlossen
-A heißt abgeschlossene Hülle oder Abschluss von M.
-c) ∂M := M \ M◦ heißt Rand von M.
-d) M heißt dicht in X, wenn M = X ist.
-Beispiel 2
-1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅
-2) Sei X = R und M = (a, b). Dann gilt: M = [a, b]
-3) Sei X = R, T = TZ und M = (a, b). Dann gilt: M = R
-Definition 4
-Sei (X, T) ein topologischer Raum.
-a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen
-aus B ist.
-b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen
-Durchschnitten von Elementen aus S ist.
-Beispiel 3 (Basis und Subbasis)
-1) Jede Basis ist auch eine Subbasis, z.B.
-S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als
-auch Subbasis.
-2) Gegeben sei X = R
-n mit euklidischer Topologie T. Dann ist
-B = { Br(x) | r ∈ Q>0, x ∈ Q
-n
-}
-ist eine abzählbare Basis von T.
-3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }.
+ 1.1. TOPOLOGISCHE RÄUME
+5) X := R
+n
+, TZ = {U ⊆ R
+n
+|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass
+R
+n \ U = V (f1, . . . , fr)}
+6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum.
+∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen.
+Definition 2
+Sei (X, T) ein topologischer Raum und x ∈ X.
+Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und
+U0 ⊆ U.
+Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt.
+Definition 3
+Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge.
+a) M◦
+:= { x ∈ M | M ist Umgebung von x } =
+[
+U⊆M
+U∈T
+U heißt Inneres oder offener
+Kern von M.
+b) M := \
+M⊆A
+A abgeschlossen
+A heißt abgeschlossene Hülle oder Abschluss von M.
+c) ∂M := M \ M◦ heißt Rand von M.
+d) M heißt dicht in X, wenn M = X ist.
+Beispiel 2
+1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅
+2) Sei X = R und M = (a, b). Dann gilt: M = [a, b]
+3) Sei X = R, T = TZ und M = (a, b). Dann gilt: M = R
+Definition 4
+Sei (X, T) ein topologischer Raum.
+a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen
+aus B ist.
+b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen
+Durchschnitten von Elementen aus S ist.
+Beispiel 3 (Basis und Subbasis)
+1) Jede Basis ist auch eine Subbasis, z.B.
+S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als
+auch Subbasis.
+2) Gegeben sei X = R
+n mit euklidischer Topologie T. Dann ist
+B = { Br(x) | r ∈ Q>0, x ∈ Q
+n
+}
+ist eine abzählbare Basis von T.
+3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }.
 Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt:
-5 1.1. TOPOLOGISCHE RÄUME
-• S ⊆ T
-• ∅, { 0, 1 } und { 0, 2 } ∈ S
-• { 0 } = { 0, 1 } ∩ { 0, 2 }
-• X = { 0, 1 } ∪ { 0, 2 }
-Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen
-aus S erzeugt werden kann.
-Bemerkung 2
-Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S
-Subbasis ist.
-Definition 5
-Sei (X, T) ein topologischer Raum und Y ⊆ X.
-TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y .
-TY heißt Teilraumtopologie und (Y, TY ) heißt ein Teilraum von (X, T).
-Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt.
-Definition 6
-Seien X1, X2 topologische Räume.
-U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit
-i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt.
-T = { U ⊆ X1 × X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie.
-B = { U1 × U2 | Ui offen in Xi
-, i = 1, 2 } ist eine Basis von T.
-U
-x
-x2
-x1
-U2
-U1
-X1
-X2
-Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U
-Beispiel 4 (Produkttopologien)
-1) X1 = X2 = R mit euklidischer Topologie.
-⇒ Die Produkttopologie auf R × R = R
-2
-stimmt mit der euklidischen Topologie auf
-R
-2 überein.
-2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R
-2
-: U1 × U2
+ 1.1. TOPOLOGISCHE RÄUME
+• S ⊆ T
+• ∅, { 0, 1 } und { 0, 2 } ∈ S
+• { 0 } = { 0, 1 } ∩ { 0, 2 }
+• X = { 0, 1 } ∪ { 0, 2 }
+Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen
+aus S erzeugt werden kann.
+Bemerkung 2
+Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S
+Subbasis ist.
+Definition 5
+Sei (X, T) ein topologischer Raum und Y ⊆ X.
+TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y .
+TY heißt Teilraumtopologie und (Y, TY ) heißt ein Teilraum von (X, T).
+Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt.
+Definition 6
+Seien X1, X2 topologische Räume.
+U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit
+i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt.
+T = { U ⊆ X1 × X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie.
+B = { U1 × U2 | Ui offen in Xi, i = 1, 2 } ist eine Basis von T.
+U
+x
+x2
+x1
+U2
+U1
+X1
+X2
+Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U
+Beispiel 4 (Produkttopologien)
+1) X1 = X2 = R mit euklidischer Topologie.
+⇒ Die Produkttopologie auf R × R = R
+2
+stimmt mit der euklidischen Topologie auf
+R
+2 überein.
+2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R
+2
+: U1 × U2
 (Siehe Abbildung 1.2)
-6 1.1. TOPOLOGISCHE RÄUME
-U1 = R \ N
-U2 =
-R \ N
-Abbildung 1.2: Zariski-Topologie auf R
-2
-Definition 7
-Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge
-der Äquivalenzklassen, π : X → X, x 7→ [x]∼.
-TX
-:= 
-U ⊆ X
-
- π
-−1
-(U) ∈ TX
-	
-(X, TX
-) heißt Quotiententopologie.
-Beispiel 5
-X = R, a ∼ b :⇔ a − b ∈ Z
--1 0 1 2 3 4 5 R
-0
-a
-U
-π a
-−1
-(u)
-0 ∼ 1, d. h. [0] = [1]
-Beispiel 6
-Sei X = R
-2 und (x1, y1) ∼ (x2, y2) ⇔ x1 − x2 ∈ Z und y1 − y2 ∈ Z. Dann ist X/∼ ein Torus.
-Beispiel 7 (Projektiver Raum)
-X = R
-n+1 \ { 0 } , x ∼ y ⇔ ∃λ ∈ R
-× mit y = λx
-⇔ x und y liegen auf der gleichen
-Ursprungsgerade
-X = P
-n
+ 1.1. TOPOLOGISCHE RÄUME
+U1 = R \ N
+U2 =
+R \ N
+Abbildung 1.2: Zariski-Topologie auf R
+2
+Definition 7
+Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge
+der Äquivalenzklassen, π : X → X, x 7→ [x]∼.
+TX
+:= 
+U ⊆ X
+
+ π
+−1
+(U) ∈ TX
+
+(X, TX
+) heißt Quotiententopologie.
+Beispiel 5
+X = R, a ∼ b :⇔ a − b ∈ Z
+-1 0 1 2 3 4 5 R
+0
+a
+U
+π a
+−1
+(u)
+0 ∼ 1, d. h. [0] = [1]
+Beispiel 6
+Sei X = R
+2 und (x1, y1) ∼ (x2, y2) ⇔ x1 − x2 ∈ Z und y1 − y2 ∈ Z. Dann ist X/∼ ein Torus.
+Beispiel 7 (Projektiver Raum)
+X = R
+n+1 \ { 0 } , x ∼ y ⇔ ∃λ ∈ R× mit y = λx
+⇔ x und y liegen auf der gleichen
+Ursprungsgerade
+X = P
+n
 (R)
-7 1.2. METRISCHE RÄUME
-Also für n = 1:
-−4 −2 2 4 6 8
-−4
-−2
-2
-4
-1.2 Metrische Räume
-Definition 8
-Sei X eine Menge. Eine Abbildung d : X × X → R
-+
-0
-heißt Metrik, wenn gilt:
-(i) Definitheit: d(x, y) = 0 ⇔ x = y ∀x, y ∈ X
-(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X
-(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X
-Das Paar (X, d) heißt ein metrischer Raum.
-Bemerkung 3
-Sei (X, d) ein metrischer Raum und
-Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R
-+
-B = { Br(x) ⊆ P(X) | x ∈ X, r ∈ R
-+ } ist Basis einer Topologie auf X.
-Definition 9
-Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit
-∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2))
-Dann heißt ϕ eine Isometrie von X nach Y .
-Beispiel 8 (Skalarprodukt erzeugt Metrik)
-Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt h·, ·i. Dann wird V
-durch d(x, y) := p
-hx − y, x − yi zum metrischen Raum.
-Beispiel 9 (diskrete Metrik)
-Sei X eine Menge. Dann heißt
-d(x, y) = (
-0 falls x = y
-1 falls x 6= y
+ 1.2. METRISCHE RÄUME
+Also für n = 1:
+−4 −2 2 4 6 8
+−4
+−2
+2
+4
+1.2 Metrische Räume
+Definition 8
+Sei X eine Menge. Eine Abbildung d : X × X → R
++
+0
+heißt Metrik, wenn gilt:
+(i) Definitheit: d(x, y) = 0 ⇔ x = y ∀x, y ∈ X
+(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X
+(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X
+Das Paar (X, d) heißt ein metrischer Raum.
+Bemerkung 3
+Sei (X, d) ein metrischer Raum und
+Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R
++
+B = { Br(x) ⊆ P(X) | x ∈ X, r ∈ R
++ } ist Basis einer Topologie auf X.
+Definition 9
+Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit
+∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2))
+Dann heißt ϕ eine Isometrie von X nach Y .
+Beispiel 8 (Skalarprodukt erzeugt Metrik)
+Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt h·, ·i. Dann wird V
+durch d(x, y) := phx − y, x − yi zum metrischen Raum.
+Beispiel 9 (diskrete Metrik)
+Sei X eine Menge. Dann heißt
+d(x, y) = (
+0 falls x = y
+1 falls x 6= y
 die diskrete Metrik. Die Metrik d induziert die diskrete Topologie.
-8 1.2. METRISCHE RÄUME
-Beispiel 10
-X = R
-2 und d ((x1, y1),(x2, y2)) := max(kx1 − x2k, ky1 − y2k) ist Metrik.
-Beobachtung: d erzeugt die euklidische Topologie.
-Br(0) =
-r r
-r
-r
-(a) Br(0) (b) Euklidische Topologie
+ 1.2. METRISCHE RÄUME
+Beispiel 10
+X = R
+2 und d ((x1, y1),(x2, y2)) := max(kx1 − x2k, ky1 − y2k) ist Metrik.
+Beobachtung: d erzeugt die euklidische Topologie.
+Br(0) =
+r r
+r
+r
+(a) Br(0) (b) Euklidische Topologie
 Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10
-9 1.2. METRISCHE RÄUME
-Beispiel 11 (SNCF-Metrik1
-)
-X = R
-2
-−4 −2 2 4 6 8
-−4
-−2
-2
-4
-Definition 10
-Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x =6 y in X
-Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅.
-Bemerkung 4 (Trennungseigenschaft)
-Metrische Räume sind hausdorffsch, wegen
-d(x, y) > 0 ⇒ ∃ε > 0 : Bε(x) ∩ Bε(y) = ∅
-Beispiel 12 (Topologische Räume und Hausdorff-Räume)
-1) (R, TZ) ist ein topologischer Raum, der nicht hausdorffsch ist.
-2) (R, TEuklid) ist ein topologischer Hausdorff-Raum.
-Bemerkung 5 (Eigenschaften von Hausdorff-Räumen)
-Seien X, X1, X2 Hausdorff-Räume.
-a) Jeder Teilraum von X ist hausdorffsch.
-b) X1 × X2 ist hausdorffsch (vgl. Abbildung 1.4).
-Definition 11
-Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder
-Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle
-n ≥ n0.
-Bemerkung 6
-Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert.
-Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge.
-Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls
-x 6= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0
-⇒ x = y 
+ 1.2. METRISCHE RÄUME
+Beispiel 11 (SNCF-Metrik1)
+X = R
+2
+−4 −2 2 4 6 8
+−4
+−2
+2
+4
+Definition 10
+Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x =6 y in X
+Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅.
+Bemerkung 4 (Trennungseigenschaft)
+Metrische Räume sind hausdorffsch, wegen
+d(x, y) > 0 ⇒ ∃ε > 0 : Bε(x) ∩ Bε(y) = ∅
+Beispiel 12 (Topologische Räume und Hausdorff-Räume)
+1) (R, TZ) ist ein topologischer Raum, der nicht hausdorffsch ist.
+2) (R, TEuklid) ist ein topologischer Hausdorff-Raum.
+Bemerkung 5 (Eigenschaften von Hausdorff-Räumen)
+Seien X, X1, X2 Hausdorff-Räume.
+a) Jeder Teilraum von X ist hausdorffsch.
+b) X1 × X2 ist hausdorffsch (vgl. Abbildung 1.4).
+Definition 11
+Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder
+Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle
+n ≥ n0.
+Bemerkung 6
+Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert.
+Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge.
+Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls
+x 6= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0
+⇒ x = y 
 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt.
-10 1.3. STETIGKEIT
-(x1, y1) (x2, y2)
-x1 x2
-U1 × X2 U2 × X2
-X1
-X2
-Abbildung 1.4: Wenn X1, X2 hausdorffsch sind, dann auch X1 × X2
-1.3 Stetigkeit
-Definition 12
-Seien (X, TX),(Y, TY ) topologische Räume und f : X → Y eine Abbildung.
-a) f heißt stetig :⇔ ∀U ∈ TY : f
-−1
-(U) ∈ TX.
-b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g :
-Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY .
-Bemerkung 72
-Seien X, Y metrische Räume und f : X → Y eine Abbildung.
-Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für
-alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε.
-Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)).
-Dann ist U offen in Y .
-Def. 12.a =====⇒ f
-−1
-(U) ist offen in X. Dann ist x ∈ f
-−1
-(U).
-⇒ ∃δ > 0, sodass Bδ(x) ⊆ f
-−1
-(U)
-⇒ f(Bδ(x)) ⊆ U
-⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh.
-„⇐“: Sei U ⊆ Y offen, X ∈ f
-−1
-(U).
-Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U
-Vor. ==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x)))
-⇒ Bδ(x) ⊆ f
-−1
-(Bε(f(x))) ⊆ f
-−1
-(U) 
-Bemerkung 8
-Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt:
-f ist stetig
-⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f
-−1
-(A) ⊆ X ist abgeschlossen.
-Beispiel 13 (Stetige Abbildungen und Homöomorphismen)
-1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus.
+ 1.3. STETIGKEIT
+(x1, y1) (x2, y2)
+x1 x2
+U1 × X2 U2 × X2
+X1
+X2
+Abbildung 1.4: Wenn X1, X2 hausdorffsch sind, dann auch X1 × X2
+1.3 Stetigkeit
+Definition 12
+Seien (X, TX),(Y, TY ) topologische Räume und f : X → Y eine Abbildung.
+a) f heißt stetig :⇔ ∀U ∈ TY : f
+−1
+(U) ∈ TX.
+b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g :
+Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY .
+Bemerkung 72
+Seien X, Y metrische Räume und f : X → Y eine Abbildung.
+Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für
+alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε.
+Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)).
+Dann ist U offen in Y .
+Def. 12.a =====⇒ f
+−1
+(U) ist offen in X. Dann ist x ∈ f
+−1
+(U).
+⇒ ∃δ > 0, sodass Bδ(x) ⊆ f
+−1
+(U)
+⇒ f(Bδ(x)) ⊆ U
+⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh.
+„⇐“: Sei U ⊆ Y offen, X ∈ f
+−1
+(U).
+Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U
+Vor. ==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x)))
+⇒ Bδ(x) ⊆ f
+−1
+(Bε(f(x))) ⊆ f
+−1
+(U) 
+Bemerkung 8
+Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt:
+f ist stetig
+⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f
+−1
+(A) ⊆ X ist abgeschlossen.
+Beispiel 13 (Stetige Abbildungen und Homöomorphismen)
+1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus.
 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt.
-11 1.3. STETIGKEIT
-2) Ist (Y, TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung
-f : X → Y stetig.
-3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen
-Raum Y und jede Abbildung f.
-4) Sei X = [0, 1), Y = S
-1 = { z ∈ C | kzk = 1 } und f(t) = e
-2πit
-.
-0 1 R
-0
-f
-g
-Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist.
-Die Umkehrabbildung g ist nicht stetig, da g
-−1
-(U) nicht offen ist (vgl. Abbildung 1.5).
-Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig)
-Seien X, Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen.
-Dann ist g ◦ f : X → Z stetig.
-X
-f
-/
-g◦f
- 
-Y
-g
-
-Z
-Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f)
-−1
-(U) = f
-−1
-(g
-−1
-(U)). g
-−1
-(U) ist offen in Y weil g stetig
-ist, f
-−1
-(g
-−1
-(U)) ist offen in X, weil f stetig ist. 
-Bemerkung 10
-a) Für jeden topologischen Raum X ist
-Homöo(X) := { f : X → X | f ist Homöomorphismus }
-eine Gruppe.
-b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus.
-c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für
-jeden metrischen Raum X.
-Bemerkung 11 (Projektionen sind stetig)
-Seien X, Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen
-πX : (x, y) 7→ x und πY : (x, y) 7→ y
-Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig.
-Beweis: Sei U ⊆ X offen
-⇒ π
-−1
-X (U) = U × Y ist offen in X × Y . 
-Bemerkung 12
-Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum
-versehen mit der Quotiententopologie, π : X → X, x 7→ [x]∼.
+ 1.3. STETIGKEIT
+2) Ist (Y, TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung
+f : X → Y stetig.
+3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen
+Raum Y und jede Abbildung f.
+4) Sei X = [0, 1), Y = S
+1 = { z ∈ C | kzk = 1 } und f(t) = e2πit
+.
+0 1 R
+0
+f
+g
+Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist.
+Die Umkehrabbildung g ist nicht stetig, da g
+−1
+(U) nicht offen ist (vgl. Abbildung 1.5).
+Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig)
+Seien X, Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen.
+Dann ist g ◦ f : X → Z stetig.
+X
+f
+/
+g◦f
+
+Y
+g
+
+Z
+Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f)
+−1
+(U) = f
+−1
+(g
+−1
+(U)). g
+−1
+(U) ist offen in Y weil g stetig
+ist, f
+−1
+(g
+−1
+(U)) ist offen in X, weil f stetig ist. 
+Bemerkung 10
+a) Für jeden topologischen Raum X ist
+Homöo(X) := { f : X → X | f ist Homöomorphismus }
+eine Gruppe.
+b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus.
+c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für
+jeden metrischen Raum X.
+Bemerkung 11 (Projektionen sind stetig)
+Seien X, Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen
+πX : (x, y) 7→ x und πY : (x, y) 7→ y
+Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig.
+Beweis: Sei U ⊆ X offen
+⇒ π
+−1
+X (U) = U × Y ist offen in X × Y . 
+Bemerkung 12
+Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum
+versehen mit der Quotiententopologie, π : X → X, x 7→ [x]∼.
 Dann ist π stetig.
-12 1.4. ZUSAMMENHANG
-Beweis: Nach Definition ist U ⊆ X offen ⇔ π
-−1
-(U) ⊆ X offen. 
-Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird.
-Beispiel 14 (Stereographische Projektion)
-R
-n und S
-n \ { N } sind homöomorph für beliebiges N ∈ S
-n
-. Es gilt:
-S
-n =
-
-x ∈ R
-n+1 
- kxk = 1 	
-=
-(
-x ∈ R
-n+1
-
-
-
-
-
-nX
-+1
-i=1
-x
-2
-i = 1 )
-O. B. d. A. sei N =
-
-
-0
-.
-.
-.
-0
-1
-
-
-. Die Gerade durch N und P schneidet die Ebene H in genau
-einem Punkt Pˆ. P wird auf Pˆ abgebildet.
-f :S
-n
-\ { N } → R
-n
-P 7→
-genau ein Punkt
-z }| {
-LP ∩ H
-wobei R
-n = H =
-
-
-
-
-
-x1
-.
-.
-.
-xn+1
-
-
-∈ R
-n+1
-
-
-
-
-
-
-
-xn+1 = 0
-
-
-
-und LP die Gerade in R
-n+1 durch N
-und P ist.
-Sei P =
-
-
-x1
-.
-.
-.
-xn+1
-
-, so ist xn+1 < 1, also ist LP nicht parallel zu H. Also schneiden sich LP
-und H in genau einem Punkt Pˆ.
-Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
-1.4 Zusammenhang
-Definition 13
-a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen
-U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X.
-b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit
+ 1.4. ZUSAMMENHANG
+Beweis: Nach Definition ist U ⊆ X offen ⇔ π
+−1
+(U) ⊆ X offen. 
+Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird.
+Beispiel 14 (Stereographische Projektion)
+R
+n und Sn \ { N } sind homöomorph für beliebiges N ∈ Sn
+. Es gilt:
+S
+n =
+
+x ∈ R
+n+1
+ kxk = 1
+=
+(
+x ∈ R
+n+1
+
+
+
+
+
+nX+1
+i=1
+x
+2
+i = 1 )
+O. B. d. A. sei N =
+
+
+0
+.
+.
+.
+0
+1
+
+
+. Die Gerade durch N und P schneidet die Ebene H in genau
+einem Punkt Pˆ. P wird auf Pˆ abgebildet.
+f :S
+n
+\ { N } → R
+n
+P 7→
+genau ein Punkt
+z }| {
+LP ∩ H
+wobei R
+n = H =
+
+
+
+
+
+x1
+.
+.
+.
+xn+1
+
+
+∈ R
+n+1
+
+
+
+
+
+
+
+xn+1 = 0
+
+
+
+und LP die Gerade in R
+n+1 durch N
+und P ist.
+Sei P =
+
+
+x1
+.
+.
+.
+xn+1
+
+, so ist xn+1 < 1, also ist LP nicht parallel zu H. Also schneiden sich LP
+und H in genau einem Punkt Pˆ.
+Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
+1.4 Zusammenhang
+Definition 13
+a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen
+U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X.
+b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit
 der Teilraumtopologie zusammenhängend ist.
-13 1.4. ZUSAMMENHANG
-x
-y
-z
-N
-Pˆ
-0
-P
-Abbildung 1.6: Visualisierung der stereographischen Projektion
-Bemerkung 13
-X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2
-mit A1 ∩ A2 = ∅ und A1 ∪ A2 = X.
-Beispiel 15 (Zusammenhang von Räumen)
-1) (R
-n
-, TEuklid) ist zusammenhängend, denn:
-Annahme: R
-n = U1 ∪˙ U2 mit ∅ 6= U1, U2 ∈ TEuklid existieren.
-Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun
-betrachten wir V ( Rn als (metrischen) Teilraum mit der Teilraumtopologie TV .
-Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie.
-Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder
-Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen.
-2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0
-3) R
-2 \ { 0 } ist zusammenhängend.
-4) Q ( R ist nicht zusammenhängend, da (Q ∩ R<
-√
-2
-) ∪ (Q ∩ R>
-√
-2
-) = Q
-5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist.
-6) R mit Zariski-Topologie ist zusammenhängend.
-Bemerkung 14
-Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen￾hängend.
-14 1.4. ZUSAMMENHANG
-Beweis: durch Widerspruch
-Annahme: A = A1 ∪ A2, Ai abgeschlossen, Ai 6= ∅, A1 ∩ A2 = ∅
-⇒ A = (A ∩ A1)
-| {z }
-abgeschlossen
-∪˙ (A ∩ A2)
-| {z }
-abgeschlossen
-| {z }
-disjunkt
-Wäre A ∩ A1 = ∅
-⇒ A ⊆ A = A1 ∪˙ A2
-⇒ A ⊆ A2 ⇒ A ⊆ A2
-⇒ A1 = ∅
-⇒ Widerspruch zu A1 6= ∅
-⇒ A ∩ A1 6= ∅ und analog A ∩ A2 6= ∅
-⇒ Widerspruch zu A ist zusammenhängend. 
-Bemerkung 15
-Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend.
-Ist A ∩ B 6= ∅, dann ist A ∪ B zusammenhängend.
-Beweis: Sei A ∪ B = U1 ∪˙ U2, Ui 6= ∅ offen
-o. B. d. A. ======⇒ A = (A ∩ U1) ∪˙ (A ∩ U2) offen
-A zhgd.
-====⇒ A ∩ U1 = ∅
-A∩B6=∅
-====⇒ U1 ⊆ B
-B = (B ∩ U1)
-| {z }
-=U1
-∪ (B ∩ U2)
-| {z }
-=∅
-ist unerlaubte Zerlegung.
-
-Definition 14
-Sei X ein topologischer Raum.
-Für x ∈ X sei Z(x) ⊆ X definiert durch
-Z(x) := [
-A⊆Xzhgd.
-x∈A
-A
-Z(x) heißt Zusammenhangskomponente.
-Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten)
-Sei X ein topologischer Raum. Dann gilt:
-a) Z(x) ist die größte zusammenhängende Teilmenge von X, die x enthält.
-b) Z(x) ist abgeschlossen.
-c) X ist disjunkte Vereinigung von Zusammenhangskomponenten.
+ 1.4. ZUSAMMENHANG
+x
+y
+z
+N
+Pˆ
+0
+P
+Abbildung 1.6: Visualisierung der stereographischen Projektion
+Bemerkung 13
+X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2
+mit A1 ∩ A2 = ∅ und A1 ∪ A2 = X.
+Beispiel 15 (Zusammenhang von Räumen)
+1) (R
+n
+, TEuklid) ist zusammenhängend, denn:
+Annahme: R
+n = U1 ∪˙ U2 mit ∅ 6= U1, U2 ∈ TEuklid existieren.
+Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun
+betrachten wir V ( Rn als (metrischen) Teilraum mit der Teilraumtopologie TV .
+Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie.
+Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder
+Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen.
+2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0
+3) R
+2 \ { 0 } ist zusammenhängend.
+4) Q ( R ist nicht zusammenhängend, da (Q ∩ R<
+√
+2
+) ∪ (Q ∩ R>
+√
+2
+) = Q
+5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist.
+6) R mit Zariski-Topologie ist zusammenhängend.
+Bemerkung 14
+Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammenhängend.
+ 1.4. ZUSAMMENHANG
+Beweis: durch Widerspruch
+Annahme: A = A1 ∪ A2, Ai abgeschlossen, Ai 6= ∅, A1 ∩ A2 = ∅
+⇒ A = (A ∩ A1)
+| {z }
+abgeschlossen
+∪˙ (A ∩ A2)
+| {z }
+abgeschlossen
+| {z }
+disjunkt
+Wäre A ∩ A1 = ∅
+⇒ A ⊆ A = A1 ∪˙ A2
+⇒ A ⊆ A2 ⇒ A ⊆ A2
+⇒ A1 = ∅
+⇒ Widerspruch zu A1 6= ∅
+⇒ A ∩ A1 6= ∅ und analog A ∩ A2 6= ∅
+⇒ Widerspruch zu A ist zusammenhängend. 
+Bemerkung 15
+Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend.
+Ist A ∩ B 6= ∅, dann ist A ∪ B zusammenhängend.
+Beweis: Sei A ∪ B = U1 ∪˙ U2, Ui 6= ∅ offen
+o. B. d. A. ======⇒ A = (A ∩ U1) ∪˙ (A ∩ U2) offen
+A zhgd.
+====⇒ A ∩ U1 = ∅
+A∩B6=∅
+====⇒ U1 ⊆ B
+B = (B ∩ U1)
+| {z }
+=U1
+∪ (B ∩ U2)
+| {z }
+=∅
+ist unerlaubte Zerlegung.
+
+Definition 14
+Sei X ein topologischer Raum.
+Für x ∈ X sei Z(x) ⊆ X definiert durch
+Z(x) := [
+A⊆Xzhgd.
+x∈A
+A
+Z(x) heißt Zusammenhangskomponente.
+Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten)
+Sei X ein topologischer Raum. Dann gilt:
+a) Z(x) ist die größte zusammenhängende Teilmenge von X, die x enthält.
+b) Z(x) ist abgeschlossen.
+c) X ist disjunkte Vereinigung von Zusammenhangskomponenten.
 Beweis:
-15 1.5. KOMPAKTHEIT
-a) Sei Z(x) = A1 ∪˙ A2 mit Ai 6= ∅ abgeschlossen.
-O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A,
-die auch x enthält. ⇒ A = (A ∩ A1)
-| {z }
-3x
-∪ (A ∩ A2)
-| {z }
-3y
-ist unerlaubte Zerlegung.
-b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x)
-c) Ist Z(y) ∩ Z(x) 6= ∅
-Bem. 15 =====⇒ Z(y) ∪ Z(x) ist zusammenhängend.
-⇒ Z(x) ∪ Z(y) ⊆ Z(x) ⇒ Z(y) ⊆ Z(x)
-⊆ Z(y) ⇒ Z(x) ⊆ Z(y)
-
-Bemerkung 17
-Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend.
-Beweis: Sei f(A) = U1 ∪ U2, Ui 6= ∅, offen, disjunkt.
-⇒ f
-−1
-(f(A)) = f
-−1
-(U1) ∪ f
-−1
-(U2)
-⇒ A = (A ∩ f
-−1
-(U1))
-| {z }
-6=∅
-∪ (A ∩ f
-−1
-(U2))
-| {z }
-6=∅
-
-1.5 Kompaktheit
-Definition 15
-Sei X eine Menge und U ⊆ P(X).
-U heißt eine Überdeckung von X, wenn gilt:
-∀x ∈ X : ∃M ∈ U : x ∈ M
-Definition 16
-Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X
-U = { Ui }i∈I mit Ui offen in X
-eine endliche Teilüberdeckung
-[
-i∈J⊆I
-Ui = X mit |J| ∈ N
-besitzt.
-Bemerkung 18
-Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie.
-Beweis: Sei (Ui)i∈J eine offene Überdeckung von I.
-Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in
+ 1.5. KOMPAKTHEIT
+a) Sei Z(x) = A1 ∪˙ A2 mit Ai 6= ∅ abgeschlossen.
+O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A,
+die auch x enthält. ⇒ A = (A ∩ A1)
+| {z }
+3x
+∪ (A ∩ A2)
+| {z }
+3y
+ist unerlaubte Zerlegung.
+b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x)
+c) Ist Z(y) ∩ Z(x) 6= ∅
+Bem. 15 =====⇒ Z(y) ∪ Z(x) ist zusammenhängend.
+⇒ Z(x) ∪ Z(y) ⊆ Z(x) ⇒ Z(y) ⊆ Z(x)
+⊆ Z(y) ⇒ Z(x) ⊆ Z(y)
+
+Bemerkung 17
+Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend.
+Beweis: Sei f(A) = U1 ∪ U2, Ui 6= ∅, offen, disjunkt.
+⇒ f
+−1
+(f(A)) = f
+−1
+(U1) ∪ f
+−1
+(U2)
+⇒ A = (A ∩ f
+−1
+(U1))
+| {z }
+6=∅
+∪ (A ∩ f
+−1
+(U2))
+| {z }
+6=∅
+
+1.5 Kompaktheit
+Definition 15
+Sei X eine Menge und U ⊆ P(X).
+U heißt eine Überdeckung von X, wenn gilt:
+∀x ∈ X : ∃M ∈ U : x ∈ M
+Definition 16
+Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X
+U = { Ui }i∈I mit Ui offen in X
+eine endliche Teilüberdeckung
+[
+i∈J⊆I
+Ui = X mit |J| ∈ N
+besitzt.
+Bemerkung 18
+Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie.
+Beweis: Sei (Ui)i∈J eine offene Überdeckung von I.
+Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in
 einem der Ui enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle
-16 1.5. KOMPAKTHEIT
-der Länge δ unterteilen und alle Ui
-in die endliche Überdeckung aufnehmen, die Teilintervalle
-enthalten.
-Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1]
-der Länge 1/n sodass In ( Ui für alle i ∈ J.
-Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann
-gibt es i ∈ J mit x ∈ Ui
-. Da Ui offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui
-.
-Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x − xn| < ε/2, also
-In ⊆ (x − ε, x + ε) ⊆ Ui für mindestens ein n ∈ N.
-4
-⇒ Widerspruch
-Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in
-Uij enthalten.
-⇒ Uj1
-, . . . , Ujd
-ist endliche Teilüberdeckung von U. 
-Beispiel 16 (Kompakte Räume)
-1) R ist nicht kompakt.
-2) (0, 1) ist nicht kompakt.
-Un = (1/n, 1 − 1/n) ⇒
-S
-n∈N Un = (0, 1)
-3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch.
-Bemerkung 19
-Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt.
-Beweis: Sei (Vi)i∈I offene Überdeckung von A.
-Dann gibt es für jedes i ∈ I eine offene Teilmenge Ui ⊆ X mit Vi = Ui ∩ A.
-⇒ A ⊆
-[
-i∈I
-Ui
-⇒ U = { Ui
-| i ∈ I } ∪ { X \ A } ist offene Überdeckung von X
-X kompakt
-=======⇒ es gibt i1, . . . , in ∈ I, sodass [n
-j=1
-Uij ∪ (X \ A) = X
-⇒
-
-
-[n
-j=1
-Uij ∪ (X \ A)
-
- ∩ A = A
-⇒
-[n
-j=1
-(Uij ∩ A)
-| {z }
-=Vij
-∪ ((X \ A) ∩ A)
-| {z }
-=∅
-= A
-⇒ Vi1
-, . . . , Vin überdecken A.
-
-Bemerkung 20
-Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie
-kompakt.
-Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es
-offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆ Wi
-.
-3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
+ 1.5. KOMPAKTHEIT
+der Länge δ unterteilen und alle Uiin die endliche Überdeckung aufnehmen, die Teilintervalle
+enthalten.
+Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1]
+der Länge 1/n sodass In ( Ui für alle i ∈ J.
+Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann
+gibt es i ∈ J mit x ∈ Ui. Da Ui offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui.
+Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x − xn| < ε/2, also
+In ⊆ (x − ε, x + ε) ⊆ Ui für mindestens ein n ∈ N.
+4
+⇒ Widerspruch
+Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in
+Uij enthalten.
+⇒ Uj1, . . . , Ujdist endliche Teilüberdeckung von U. 
+Beispiel 16 (Kompakte Räume)
+1) R ist nicht kompakt.
+2) (0, 1) ist nicht kompakt.
+Un = (1/n, 1 − 1/n) ⇒
+S
+n∈N Un = (0, 1)
+3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch.
+Bemerkung 19
+Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt.
+Beweis: Sei (Vi)i∈I offene Überdeckung von A.
+Dann gibt es für jedes i ∈ I eine offene Teilmenge Ui ⊆ X mit Vi = Ui ∩ A.
+⇒ A ⊆
+[
+i∈I
+Ui
+⇒ U = { Ui| i ∈ I } ∪ { X \ A } ist offene Überdeckung von X
+X kompakt
+=======⇒ es gibt i1, . . . , in ∈ I, sodass [n
+j=1
+Uij ∪ (X \ A) = X
+⇒
+
+
+[n
+j=1
+Uij ∪ (X \ A)
+
+ ∩ A = A
+⇒
+[n
+j=1
+(Uij ∩ A)
+| {z }
+=Vij
+∪ ((X \ A) ∩ A)
+| {z }
+=∅
+= A
+⇒ Vi1, . . . , Vin überdecken A.
+
+Bemerkung 20
+Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie
+kompakt.
+Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es
+offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆ Wi.
+3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
 4Sogar für unendlich viele.
-17 1.5. KOMPAKTHEIT
-Wi
-x
-y
-x
-Vx,y
-Ux,y
-Y
-X
-Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen
-Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y
-kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit Sm(x0)
-i=1 Ux0,yi ×
-Vx0,yi ⊇ { x0 } × Y .
-Sei Ux0
-:= Tm(x)
-i=1 Ux0,yi
-. Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit Sn
-j=1 Uxj = X
-⇒
-Sk
-j=1
-Sm(xj )
-i=1 ￾
-Uxj ,yi × Vxj ,yi
-
-| {z }
-Ein grün-oranges Kästchen
-⊇ X × Y
-⇒
-S
-j
-S
-i Wi(xj , yi) = X × Y 
-Bemerkung 21
-Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen.
-Beweis: z. Z.: Komplement ist offen
-Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \ K. Für jedes x ∈ K seien
-Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅.
-Xi
-K
-x
-y
-Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass Sm
-i=1 Uxi ⊇ K.
-Sei V := \n
-i=1
-Vx 
-18 1.6. WEGE UND KNOTEN
-⇒ V ∩
- [n
-i=1
-Uxi
-!
-= ∅
-⇒ V ∩ K = ∅
-⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist.
-⇒ X \ K ist offen
-Damit ist K abgeschlossen. 
-Bemerkung 22
-Seien X, Y topologische Räume, f : X → Y stetig.
-Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt.
-Beweis: Sei (Vi)i∈I offene Überdeckung von f(K)
-f stetig
-====⇒ (f
-−1
-(Vi))i∈I ist offene Überdeckung von K
-Kompakt
-=====⇒ es gibt i1, . . . , in, sodass f
-−1
-(Vi1
-), . . . , f −1
-(Vin
-) Überdeckung von K ist.
-⇒ f(f
-−1
-(Vi1
-)), . . . , f(f
-−1
-(Vin
-)) überdecken f(K).
-Es gilt: f(f
-−1
-(V )) = V ∩ f(X) 
-Satz 1.1 (Heine-Borel)
-Eine Teilmenge von R
-n oder C
-n
-ist genau dann kompakt, wenn sie beschränkt und
-abgeschlossen ist.
-Beweis: „⇒“: Sei K ⊆ R
-n
-(oder C
-n
-) kompakt.
-Da R
-n und C
-n hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset￾zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist
-beschränkt.
-„⇐“ Sei A ⊆ R
-n
-(oder C
-n
-) beschränkt und abgeschlossen.
-Dann gibt es einen Würfel W = [−N, N] × · · · × [−N, N]
-| {z }
-n mal
-mit A ⊆ W bzw. „Polyzylinder“
-Z = { (z1, . . . , zn) ∈ C
-n
-| zi ≤ N für i = 1, . . . , n }
-Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch
-kompakt. Genauso ist Z kompakt, weil
-{ z ∈ C k z| ≤ 1 }
-homöomorph zu
-
-(x, y) ∈ R
-2
-
- k(x, y)k ≤ 1
-	
-ist. 
-1.6 Wege und Knoten
-Definition 17
+ 1.5. KOMPAKTHEIT
+Wi
+x
+y
+x
+Vx,y
+Ux,y
+Y
+X
+Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen
+Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y
+kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit Sm(x0)
+i=1 Ux0,yi ×
+Vx0,yi ⊇ { x0 } × Y .
+Sei Ux0:= Tm(x)
+i=1 Ux0,yi
+. Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit Sn
+j=1 Uxj = X
+⇒
+Sk
+j=1
+Sm(xj )
+i=1
+Uxj ,yi × Vxj ,yi
+
+| {z }
+Ein grün-oranges Kästchen
+⊇ X × Y
+⇒
+S
+j
+S
+i Wi(xj , yi) = X × Y 
+Bemerkung 21
+Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen.
+Beweis: z. Z.: Komplement ist offen
+Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \ K. Für jedes x ∈ K seien
+Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅.
+Xi
+K
+x
+y
+Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass Sm
+i=1 Uxi ⊇ K.
+Sei V := \n
+i=1
+Vxi
+ 1.6. WEGE UND KNOTEN
+⇒ V ∩
+ [n
+i=1
+Uxi
+!
+= ∅
+⇒ V ∩ K = ∅
+⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist.
+⇒ X \ K ist offen
+Damit ist K abgeschlossen. 
+Bemerkung 22
+Seien X, Y topologische Räume, f : X → Y stetig.
+Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt.
+Beweis: Sei (Vi)i∈I offene Überdeckung von f(K)
+f stetig
+====⇒ (f
+−1
+(Vi))i∈I ist offene Überdeckung von K
+Kompakt
+=====⇒ es gibt i1, . . . , in, sodass f
+−1
+(Vi1), . . . , f −1(Vin
+) Überdeckung von K ist.
+⇒ f(f
+−1
+(Vi1)), . . . , f(f
+−1
+(Vin)) überdecken f(K).
+Es gilt: f(f
+−1
+(V )) = V ∩ f(X) 
+Satz 1.1 (Heine-Borel)
+Eine Teilmenge von R
+n oder Cn
+ist genau dann kompakt, wenn sie beschränkt und
+abgeschlossen ist.
+Beweis: „⇒“: Sei K ⊆ R
+n
+(oder C
+n
+) kompakt.
+Da R
+n und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Voraussetzung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist
+beschränkt.
+„⇐“ Sei A ⊆ R
+n
+(oder C
+n
+) beschränkt und abgeschlossen.
+Dann gibt es einen Würfel W = [−N, N] × · · · × [−N, N]
+| {z }
+n mal
+mit A ⊆ W bzw. „Polyzylinder“
+Z = { (z1, . . . , zn) ∈ C
+n
+| zi ≤ N für i = 1, . . . , n }
+Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch
+kompakt. Genauso ist Z kompakt, weil
+{ z ∈ C k z| ≤ 1 }
+homöomorph zu
+
+(x, y) ∈ R
+2
+
+ k(x, y)k ≤ 1
+
+ist. 
+1.6 Wege und Knoten
+Definition 17
+Sei X ein topologischer Raum.
+ 1.6. WEGE UND KNOTEN
+a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X.
+b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt.
+c) γ heißt einfach, wenn γ|[0,1) injektiv ist.
+Beispiel 17
+Ist X diskret, so ist jeder Weg konstant, d. h. von der Form
+∀x ∈ [0, 1] : γ(x) = c, c ∈ X
+Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ.
+Definition 18
+Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten
+x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ(0) = x und γ(1) = y.
+Bemerkung 23
 Sei X ein topologischer Raum.
-19 1.6. WEGE UND KNOTEN
-a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X.
-b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt.
-c) γ heißt einfach, wenn γ|[0,1) injektiv ist.
-Beispiel 17
-Ist X diskret, so ist jeder Weg konstant, d. h. von der Form
-∀x ∈ [0, 1] : γ(x) = c, c ∈ X
-Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ.
-Definition 18
-Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten
-x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ(0) = x und γ(1) = y.
-Bemerkung 23
-Sei X ein topologischer Raum.
-a) X ist wegzusammenhängend ⇒ X ist zusammenhängend
-b) X ist wegzusammenhängend 6⇐ X ist zusammenhängend
-Beweis:
-a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte,
-abgeschlossene Teilmengen von X mit A1 ∪ A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1] → X
-ein Weg von x nach y.
-Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist.
-C = (C ∩ A1)
-| {z }
-3x
-∪ (C ∩ A2)
-| {z }
-3y
-ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch
-b) Sei X =
-n
-(x, y) ∈ R
-2
-
-
-
-x
-2 + y
-2 = 1 ∨ y = 1 + 2 · e
-− 1
-10 x
-o
-.
-Abbildung 1.8a veranschaulicht diesen Raum.
-Sei U1 ∪ U2 = X, U1 6= U2 = ∅, Ui offen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2,
-weil C und S zusammenhängend sind.
-Also ist C = U1 und S = U2 (oder umgekehrt).
-Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist.
-Aber: Bε(y) ∩ S 6= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht
-wegzusammenhängend. 
-Beispiel 18 (Hilbert-Kurve)
-Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbil￾dung 1.9 dargestellte Hilbert-Kurve.
-Definition 19
-Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ :
-[0, 1] → C ⊆ X bzw. γ : S
+a) X ist wegzusammenhängend ⇒ X ist zusammenhängend
+b) X ist wegzusammenhängend 6⇐ X ist zusammenhängend
+Beweis:
+a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte,
+abgeschlossene Teilmengen von X mit A1 ∪ A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1] → X
+ein Weg von x nach y.
+Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist.
+C = (C ∩ A1)
+| {z }
+3x
+∪ (C ∩ A2)
+| {z }
+3y
+ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch
+b) Sei X =
+n
+(x, y) ∈ R
+2
+
+
+
+x
+2 + y2 = 1 ∨ y = 1 + 2 · e
+− 1
+10 x
+o
+.
+Abbildung 1.8a veranschaulicht diesen Raum.
+Sei U1 ∪ U2 = X, U1 6= U2 = ∅, Ui offen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2,
+weil C und S zusammenhängend sind.
+Also ist C = U1 und S = U2 (oder umgekehrt).
+Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist.
+Aber: Bε(y) ∩ S 6= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht
+wegzusammenhängend. 
+Beispiel 18 (Hilbert-Kurve)
+Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbildung 1.9 dargestellte Hilbert-Kurve.
+Definition 19
+Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ :
+[0, 1] → C ⊆ X bzw. γ : S
 1 → C ⊆ X, wobei C := Bild γ.
-20 1.6. WEGE UND KNOTEN
-(a) Spirale S mit Kreis C
-0.1 1
-−1
-0
-1
-X
-Y
-{(x,sin( 1
-x
-)) ∈ X × Y }
-(−1, 1) ⊆ Y
-(b) Sinus
-Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
-sind.
-(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5
-Abbildung 1.9: Hilbert-Kurve
-Jede Jordankurve ist also ein einfacher Weg.
-Satz 1.2 (Jordanscher Kurvensatz)
-Ist C = γ([0, 1]) eine geschlossene Jordankurve in R
-2
-, so hat R
-2 \ C genau zwei
-Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt.
-außen
-innen
-Jordankurve
-Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die be￾schränkte äußeres genannt.
-Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie:
-Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265)
-nachgelesen werden.
+ 1.6. WEGE UND KNOTEN
+(a) Spirale S mit Kreis C
+0.1 1
+−1
+0
+1
+X
+Y
+{(x,sin( 1
+x
+)) ∈ X × Y }
+(−1, 1) ⊆ Y
+(b) Sinus
+Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
+sind.
+(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5
+Abbildung 1.9: Hilbert-Kurve
+Jede Jordankurve ist also ein einfacher Weg.
+Satz 1.2 (Jordanscher Kurvensatz)
+Ist C = γ([0, 1]) eine geschlossene Jordankurve in R
+2
+, so hat R
+2 \ C genau zwei
+Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt.
+außen
+innen
+Jordankurve
+Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte äußeres genannt.
+Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie:
+Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265)
+nachgelesen werden.
 Idee: Ersetze Weg C durch Polygonzug.
-21 1.6. WEGE UND KNOTEN
-Definition 20
-Eine geschlossene Jordankurve in R
-3 heißt Knoten.
-Beispiel 19 (Knoten)
-(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 62-Knoten
-Abbildung 1.11: Beispiele für verschiedene Knoten
-Definition 21
-Zwei Knoten γ1, γ2 : S
-1 → R
-3 heißen äquivalent, wenn es eine stetige Abbildung
-H : S
-1 × [0, 1] → R
-3
-gibt mit
-H(z, 0) = γ1(z) ∀z ∈ S
-1
-H(z, 1) = γ2(z) ∀z ∈ S
-1
-und für jedes feste t ∈ [0, 1] ist
-Hz : S
-1 → R
-3
-, z 7→ H(z, t)
-ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2.
-Definition 22
-Sei γ : [0, 1] → R
-3
-ein Knoten, E eine Ebene und π : R
-3 → E eine Projektion auf E.
-π heißt Knotendiagramm von γ, wenn gilt:
-
-π
-−1
-(x)
-
- ≤ 2 ∀x ∈ π(γ)
-Ist (π|γ([0,1]))
-−1
-(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt:
-∃λ > 1 : (y1 − x) = λ(y2 − x)
-Satz 1.3 (Satz von Reidemeister)
-Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie
+ 1.6. WEGE UND KNOTEN
+Definition 20
+Eine geschlossene Jordankurve in R
+3 heißt Knoten.
+Beispiel 19 (Knoten)
+(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 62-Knoten
+Abbildung 1.11: Beispiele für verschiedene Knoten
+Definition 21
+Zwei Knoten γ1, γ2 : S
+1 → R3 heißen äquivalent, wenn es eine stetige Abbildung
+H : S
+1 × [0, 1] → R3
+gibt mit
+H(z, 0) = γ1(z) ∀z ∈ S
+1
+H(z, 1) = γ2(z) ∀z ∈ S
+1
+und für jedes feste t ∈ [0, 1] ist
+Hz : S
+1 → R3
+, z 7→ H(z, t)
+ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2.
+Definition 22
+Sei γ : [0, 1] → R
+3
+ein Knoten, E eine Ebene und π : R
+3 → E eine Projektion auf E.
+π heißt Knotendiagramm von γ, wenn gilt:
+
+π
+−1
+(x)
+
+ ≤ 2 ∀x ∈ π(γ)
+Ist (π|γ([0,1]))
+−1
+(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt:
+∃λ > 1 : (y1 − x) = λ(y2 − x)
+Satz 1.3 (Satz von Reidemeister)
+Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie
 durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können.
-22 1.6. WEGE UND KNOTEN
-(a) Ω1 (b) Ω2
-(c) Ω3
-Abbildung 1.12: Reidemeister-Züge
-Beweis: Durch sorgfältige Fallunterscheidung.5
-Definition 23
-Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt
-werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben
-auftreten.
-Abbildung 1.13: Ein 3-gefärber Kleeblattknoten
+ 1.6. WEGE UND KNOTEN
+(a) Ω1 (b) Ω2
+(c) Ω3
+Abbildung 1.12: Reidemeister-Züge
+Beweis: Durch sorgfältige Fallunterscheidung.5
+Definition 23
+Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt
+werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben
+auftreten.
+Abbildung 1.13: Ein 3-gefärber Kleeblattknoten
 5Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177.
-23 1.6. WEGE UND KNOTEN
-Übungsaufgaben
-Aufgabe 1 (Sierpińskiraum)
-Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum.
-(a) Beweisen Sie, dass (X, TX) ein topologischer Raum ist.
-(b) Ist (X, TX) hausdorffsch?
-(c) Ist TX von einer Metrik erzeugt?
-Aufgabe 2
-Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie
-versehen.
-Zeigen Sie:
-(a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen.
-(b) { −1, 1 } ist nicht offen.
-(c) Es gibt unendlich viele Primzahlen.
-Aufgabe 3 (Cantorsches Diskontinuum)
-Für jedes i ∈ N sei Pi
-:= { 0, 1 } mit der diskreten Topologie. Weiter Sei P := Q
-i∈N Pi
-.
-(a) Wie sehen die offenen Mengen von P aus?
-(b) Was können Sie über den Zusammenhang von P sagen?
-Aufgabe 4 (Kompaktheit)
-(a) Ist GLn(R) = { A ∈ R
-n×n
-| det(A) 6= 0 } kompakt?
-(b) Ist SLn(R) = { A ∈ R
-n×n
-| det(A) = 1 } kompakt?
-(c) Ist P(R) kompakt?
-Aufgabe 5 (Begriffe)
-Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“.
-Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist,
-begründen Sie warum.
-1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist,
+ 1.6. WEGE UND KNOTEN
+Übungsaufgaben
+Aufgabe 1 (Sierpińskiraum)
+Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum.
+(a) Beweisen Sie, dass (X, TX) ein topologischer Raum ist.
+(b) Ist (X, TX) hausdorffsch?
+(c) Ist TX von einer Metrik erzeugt?
+Aufgabe 2
+Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie
+versehen.
+Zeigen Sie:
+(a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen.
+(b) { −1, 1 } ist nicht offen.
+(c) Es gibt unendlich viele Primzahlen.
+Aufgabe 3 (Cantorsches Diskontinuum)
+Für jedes i ∈ N sei Pi:= { 0, 1 } mit der diskreten Topologie. Weiter Sei P := Q
+i∈N Pi
+.
+(a) Wie sehen die offenen Mengen von P aus?
+(b) Was können Sie über den Zusammenhang von P sagen?
+Aufgabe 4 (Kompaktheit)
+(a) Ist GLn(R) = { A ∈ R
+n×n
+| det(A) 6= 0 } kompakt?
+(b) Ist SLn(R) = { A ∈ R
+n×n
+| det(A) = 1 } kompakt?
+(c) Ist P(R) kompakt?
+Aufgabe 5 (Begriffe)
+Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“.
+Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist,
+begründen Sie warum.
+1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist,
 2) ein Homomorphismus, der kein Homöomorphismus ist,
-24 1.6. WEGE UND KNOTEN
-3) ein Homöomorphismus, der kein Homomorphismus ist
-Aufgabe 6 (Begriffe)
+ 1.6. WEGE UND KNOTEN
+3) ein Homöomorphismus, der kein Homomorphismus ist
+Aufgabe 6 (Begriffe)
 Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“.
-2 Mannigfaltigkeiten und
-Simplizialkomplexe
-2.1 Topologische Mannigfaltigkeiten
-Definition 24
-Sei (X, T) ein topologischer Raum und n ∈ N.
-a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V
-Homöomorphismus von U auf eine offene Teilmenge V ⊆ R
-n
-.
-b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui
-, ϕi)i∈I von Karten auf X,
-sodass S
-i∈I Ui = X.
-c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist,
-eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt.
-Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem R
-n ähnlich.
-Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten)
-Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R.
-Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ R
-n
-, wobei
-V offen und ϕ ein Homöomorphismus ist, eine Karte auf X.
-Da jede offene Teilmenge des R
-n genauso mächtig ist wie der R
-n
-, ϕ als Homöomorphismus
-insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig
-sind, ist U genauso mächtig wie der R
-n
-. Da jede Mannigfaltigkeit mindestens eine Karte
-hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der R
-n
-. 
-Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
-beliebig viele Elemente haben.
-Bemerkung 25
-a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1]
-b) Für n =6 m sind R
-n und R
-m nicht homöomorph. Zum Beweis benutzt man den „Satz
-von der Gebietstreue“ (Brouwer):
-Ist U ⊆ R
-n offen und f : U → R
-n
-stetig und injektiv, so ist f(U) offen.
-Ist n < m und R
-m homöomorph zu R
-n
-, so wäre
-f : R
-n → R
-m → R
-n
-, (x1, . . . , xn) 7→ (x1, x2, . . . , xn, 0, . . . , 0)
-eine stetige injektive Abbildung. Also müsste f(R
-n
+2 Mannigfaltigkeiten und
+Simplizialkomplexe
+2.1 Topologische Mannigfaltigkeiten
+Definition 24
+Sei (X, T) ein topologischer Raum und n ∈ N.
+a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V
+Homöomorphismus von U auf eine offene Teilmenge V ⊆ R
+n
+.
+b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui, ϕi)i∈I von Karten auf X,
+sodass S
+i∈I Ui = X.
+c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist,
+eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt.
+Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem R
+n ähnlich.
+Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten)
+Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R.
+Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ R
+n
+, wobei
+V offen und ϕ ein Homöomorphismus ist, eine Karte auf X.
+Da jede offene Teilmenge des R
+n genauso mächtig ist wie der Rn
+, ϕ als Homöomorphismus
+insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig
+sind, ist U genauso mächtig wie der R
+n
+. Da jede Mannigfaltigkeit mindestens eine Karte
+hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der R
+n
+. 
+Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
+beliebig viele Elemente haben.
+Bemerkung 25
+a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1]
+b) Für n 6= m sind R
+n und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz
+von der Gebietstreue“ (Brouwer):
+Ist U ⊆ R
+n offen und f : U → Rn
+stetig und injektiv, so ist f(U) offen.
+Ist n < m und R
+m homöomorph zu Rn
+, so wäre
+f : R
+n → Rm → Rn
+, (x1, . . . , xn) 7→ (x1, x2, . . . , xn, 0, . . . , 0)
+eine stetige injektive Abbildung. Also müsste f(R
+n
 ) offen sein ⇒ Widerspruch
-26 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-Beispiel 20 (Mannigfaltigkeiten)
-1) Jede offene Teilmenge U ⊆ R
-n
-ist eine n-dimensionale Mannigfaltigkeit mit einem
-Atlas aus einer Karte.
-2) C
-n
-ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
-(z1, . . . , zn) 7→ (<(z1), =(z1), . . . , <(zn), =(zn))
-3) P
-n
-(R) = (R
-n+1 \ { 0 })/∼ = S
-n/∼ und P
-n
-(C) sind Mannigfaltigkeiten der Dimension
-n bzw. 2n, da gilt:
-Sei Ui
-:= { (x0 : · · · : xn) ∈ Pn
-(R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist P
-n
-(R) = Sn
-i=0 Ui
-und die Abbildung
-Ui → R
-n
-(x0 : · · · : xn) 7→
-
-x0
-xi
-, . . . , ✁
-✁
-✁
-xi
-xi
-, . . . ,
-xn
-xi
-
-(y1 : · · · : yi−1 : 1 : yi
-: · · · : yn) 7→(y1, . . . , yn)
-ist bijektiv.
-Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas:
-x = (1 : 0 : 0) ∈ U0 → R
-2 x 7→ (0, 0)
-y = (0 : 1 : 1) ∈ U2 → R
-2
-y 7→ (0, 1)
-Umgebung: B1(0, 1) → { (1 : u : v) | k(u, v)k < 1 } = V1
-Umgebung: B1(0, 1) →
-
-(w : z : 1)
-
- w
-2 + z
-2 < 1
-	
-= V2
-V1 ∩ V2 = ∅?
-(a : b : c) ∈ V1 ∩ V2
-⇒ a 6= 0 und (
-b
-a
-)
-2 + ( c
-a
-)
-2 < 1 ⇒ c
-a < 1
-⇒ c 6= 0 und (
-a
-c
-)
-2 + ( b
-c
-)
-2 < 1 ⇒ a
-c < 1
-⇒ Widerspruch
-4) S
-n =
-
-x ∈ R
-n+1 
- kxk = 1 	
-ist n-dimensionale Mannigfaltigkeit.
-Karten:
-Di
-:= {(x1, . . . , xn+1) ∈ S
-n
-|xi > 0} → B1(0, . . . , 0
-| {z }
-∈Rn
-)
-Ci
-:= {(x1, . . . , xn+1) ∈ S
-n
-|xi < 0} → B1(0, . . . , 0)
-(x1, . . . , xn+1) 7→ (x1, . . . ,✚xi
-, . . . , xn+1)
-1
-(x1, . . . , xn) 7→ (x1, . . . , xi−1,
-q
-1 −
-Pn
-k=1 x
-2
-k
-, xi
-, . . . , xn), oder −
-q
-1 −
-Pn
-k=1 x
-2
-k
-für Ci
-S
-n =
-Sn+1
-i=1 (Ci ∪ Di)
-Als kompakte Mannigfaltigkeit wird S
-n auch „geschlossene Mannigfaltigkeit“ genannt.
-5) [0, 1] ist keine Mannigfaltigkeit, denn:
-Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall
-ist.
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+Beispiel 20 (Mannigfaltigkeiten)
+1) Jede offene Teilmenge U ⊆ R
+n
+ist eine n-dimensionale Mannigfaltigkeit mit einem
+Atlas aus einer Karte.
+2) C
+n
+ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
+(z1, . . . , zn) 7→ (<(z1), =(z1), . . . , <(zn), =(zn))
+3) P
+n
+(R) = (R
+n+1 \ { 0 })/∼ = Sn/∼ und Pn
+(C) sind Mannigfaltigkeiten der Dimension
+n bzw. 2n, da gilt:
+Sei Ui:= { (x0 : · · · : xn) ∈ Pn(R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist P
+n
+(R) = Sn
+i=0 Ui
+und die Abbildung
+Ui → R
+n
+(x0 : · · · : xn) 7→
+
+x0
+xi
+, . . . , ✁
+✁
+✁
+xi
+xi
+, . . . ,
+xn
+xi
+
+(y1 : · · · : yi−1 : 1 : yi: · · · : yn) 7→(y1, . . . , yn)
+ist bijektiv.
+Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas:
+x = (1 : 0 : 0) ∈ U0 → R
+2 x 7→ (0, 0)
+y = (0 : 1 : 1) ∈ U2 → R
+2
+y 7→ (0, 1)
+Umgebung: B1(0, 1) → { (1 : u : v) | k(u, v)k < 1 } = V1
+Umgebung: B1(0, 1) →
+
+(w : z : 1)
+
+ w
+2 + z2 < 1
+
+= V2
+V1 ∩ V2 = ∅?
+(a : b : c) ∈ V1 ∩ V2
+⇒ a 6= 0 und (
+b
+a
+)
+2 + ( c
+a
+)
+2 < 1 ⇒ c
+a < 1
+⇒ c 6= 0 und (
+a
+c
+)
+2 + ( b
+c
+)
+2 < 1 ⇒ a
+c < 1
+⇒ Widerspruch
+4) S
+n =
+
+x ∈ R
+n+1
+ kxk = 1
+ist n-dimensionale Mannigfaltigkeit.
+Karten:
+Di:= {(x1, . . . , xn+1) ∈ S
+n
+|xi > 0} → B1(0, . . . , 0
+| {z }
+∈Rn
+)
+Ci:= {(x1, . . . , xn+1) ∈ S
+n
+|xi < 0} → B1(0, . . . , 0)
+(x1, . . . , xn+1) 7→ (x1, . . . ,✚xi, . . . , xn+1)
+1
+(x1, . . . , xn) 7→ (x1, . . . , xi−1,
+q
+1 −
+Pn
+k=1 x
+2
+k
+, xi, . . . , xn), oder −
+q
+1 −
+Pn
+k=1 x
+2
+k
+für Ci
+S
+n =
+Sn+1
+i=1 (Ci ∪ Di)
+Als kompakte Mannigfaltigkeit wird S
+n auch „geschlossene Mannigfaltigkeit“ genannt.
+5) [0, 1] ist keine Mannigfaltigkeit, denn:
+Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall
+ist.
 1xi wird rausgenommen
-27 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-6) V1 =
-
-(x, y) ∈ R
-2
-
- x · y = 0 	
-ist keine Mannigfaltigkeit.
-Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4
-Zusammenhangskomponenten. Jeder R
-n
-zerfällt jedoch in höchstens zwei Zusammen￾hangskomponenten, wenn man einen Punkt entfernt.
-7) V2 =
-
-(x, y) ∈ R
-2
-
- x
-3 = y
-2
-	
-ist eine Mannigfaltigkeit.
-8) X = (R \ { 0 }) ∪ (01, 02)
-U ⊆ X offen ⇔
-(
-U offen in R \ { 0 } , falls 01 ∈/ U, 02 ∈ U
-∃ε > 0 : (−ε, ε) ⊆ U falls 01 ∈ U, 02 ∈ U
-Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und homöomorph
-zu R.
-Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01
-und 02.
-9) GLn(R) ist eine Mannigfaltigkeit der Dimension n
-2
-, weil offene Teilmengen von R
-n
-2
-eine Mannigfaltigkeit bilden.
-Definition 25
-Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho￾möomorphismus Z = (X ∪˙ Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation
-und der von ∼ induzierten Quotiententopologie.
-Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen
-Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit.
-Bemerkung 26
-Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit
-der Dimension n + m.
-Beweis: Produkte von Karten sind Karten. 
-Beispiel 21
-Mannigfaltigkeiten mit Dimension 1:
-1) Offene Intervalle, R, (0, 1) sind alle homöomorph
-2) S
-1
-Mannigfaltigkeiten mit Dimension 2:
-1) R
-2
-2) S
-2
-(0 Henkel)
-3) T
-2
-(1 Henkel)
-4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1
-Bemerkung 27
-Sei n ∈ N, F : R
-n → R stetig differenzierbar und X = V (F) := { x ∈ R
-n
-| F(x) = 0 } das
-„vanishing set“.
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+6) V1 =
+
+(x, y) ∈ R
+2
+
+ x · y = 0
+ist keine Mannigfaltigkeit.
+Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4
+Zusammenhangskomponenten. Jeder R
+n
+zerfällt jedoch in höchstens zwei Zusammenhangskomponenten, wenn man einen Punkt entfernt.
+7) V2 =
+
+(x, y) ∈ R
+2
+
+ x
+3 = y2
+
+ist eine Mannigfaltigkeit.
+8) X = (R \ { 0 }) ∪ (01, 02)
+U ⊆ X offen ⇔
+(
+U offen in R \ { 0 } , falls 01 ∈/ U, 02 ∈ U
+∃ε > 0 : (−ε, ε) ⊆ U falls 01 ∈ U, 02 ∈ U
+Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und homöomorph
+zu R.
+Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01
+und 02.
+9) GLn(R) ist eine Mannigfaltigkeit der Dimension n
+2
+, weil offene Teilmengen von R
+n
+2
+eine Mannigfaltigkeit bilden.
+Definition 25
+Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Homöomorphismus Z = (X ∪˙ Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation
+und der von ∼ induzierten Quotiententopologie.
+Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen
+Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit.
+Bemerkung 26
+Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit
+der Dimension n + m.
+Beweis: Produkte von Karten sind Karten. 
+Beispiel 21
+Mannigfaltigkeiten mit Dimension 1:
+1) Offene Intervalle, R, (0, 1) sind alle homöomorph
+2) S
+1
+Mannigfaltigkeiten mit Dimension 2:
+1) R
+2
+2) S
+2
+(0 Henkel)
+3) T
+2
+(1 Henkel)
+4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1
+Bemerkung 27
+Sei n ∈ N, F : R
+n → R stetig differenzierbar und X = V (F) := { x ∈ Rn
+| F(x) = 0 } das
+„vanishing set“.
 Dann gilt:
-28 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus.
-a) X ist abgeschlossen in R
-n
-b) Ist grad(F)(X) 6= 0 ∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n − 1.
-Beweis:
-a) Sei y ∈ R
-n \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆ Bε(F(y)) mit
-ε =
-1
-2
-kF(y)k. Folgt Bδ(y) ∩ V (F) = ∅ ⇒ R
-n \ V (F) ist offen.
-b) Sei x ∈ X mit grad(F)(x) 6= 0, also o. B. d. A. ∂F
-∂X1
-(x) 6= 0, x = (x1, . . . , xn),
-x
-0
-:= (x2, . . . , xn) ∈ R
-n−1
-. Der Satz von der impliziten Funktion liefert nun: Es
-gibt Umgebungen U von x
-0 und differenzierbare Funktionen g : U → R, sodass
-G : U → R
-n
-, u 7→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x
-in X ist.
-
-Beispiel 22
-1) F : R
-3 → R, (x, y, z) 7→ x
-2+y
-2+z
-2−1, V (F) = S
-2
-, grad(F) = (2x, 2y, 2z)
-Bem. 27.b ======⇒
-S
-n
-ist n-dimensionale Mannigfaltigkeit in R
-n+1
-2) F : R
-2 → R, (x, y) 7→ y
-2 −x
-3 Es gilt: grad(F) = (−3x
-2
-, 2y). Also: grad(0, 0) = (0, 0).
-−5 −4 −3 −2 −1
-0
-1
-2
-3
-4
-5
-−4
-−2
-0
-2
-4
-−100
-0
-100
-x
-y
-z
-−100
-0
-100
-f(x, y)
-(a) F(x, y) = y
-2 − x
-3
-2 4 6 8 10 12
-−10
-−5
-5
-10
-x
-y
-a =
-1
-3
-a = 1
-a = 2
-(b) y
-2 − ax3 = 0
-Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a.
-Daher ist Bemerkung 27.b nicht anwendbar, aber V (F) ist trotzdem eine 1-dimensionale
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus.
+a) X ist abgeschlossen in R
+n
+b) Ist grad(F)(X) 6= 0 ∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n − 1.
+Beweis:
+a) Sei y ∈ R
+n \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆ Bε(F(y)) mit
+ε =
+1
+2
+kF(y)k. Folgt Bδ(y) ∩ V (F) = ∅ ⇒ R
+n \ V (F) ist offen.
+b) Sei x ∈ X mit grad(F)(x) 6= 0, also o. B. d. A. ∂F
+∂X1
+(x) 6= 0, x = (x1, . . . , xn),
+x
+0
+:= (x2, . . . , xn) ∈ R
+n−1
+. Der Satz von der impliziten Funktion liefert nun: Es
+gibt Umgebungen U von x
+0 und differenzierbare Funktionen g : U → R, sodass
+G : U → R
+n
+, u 7→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x
+in X ist.
+
+Beispiel 22
+1) F : R
+3 → R, (x, y, z) 7→ x2+y2+z2−1, V (F) = S2
+, grad(F) = (2x, 2y, 2z)
+Bem. 27.b ======⇒
+S
+n
+ist n-dimensionale Mannigfaltigkeit in R
+n+1
+2) F : R
+2 → R, (x, y) 7→ y2 −x3 Es gilt: grad(F) = (−3x2
+, 2y). Also: grad(0, 0) = (0, 0).
+−5 −4 −3 −2 −1
+0
+1
+2
+3
+4
+5
+−4
+−2
+0
+2
+4
+−100
+0
+100
+x
+y
+z
+−100
+0
+100
+f(x, y)
+(a) F(x, y) = y
+2 − x3
+2 4 6 8 10 12
+−10
+−5
+5
+10
+x
+y
+a =
+1
+3
+a = 1
+a = 2
+(b) y
+2 − ax3 = 0
+Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a.
+Daher ist Bemerkung 27.b nicht anwendbar, aber V (F) ist trotzdem eine 1-dimensionale
 topologische Mannigfaltigkeit.
-29 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-Definition 26
-Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale
-Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui
-, ϕi) gibt, wobei Ui ⊆ Xi offen und
-ϕi ein Homöomorphismus auf eine offene Teilmenge von
-R
-n
-+,0
-:= { (x1, . . . , xn) ∈ R
-n
-| xn ≥ 0 }
-ist.
-R
-n
-+,0
-ist ein „Halbraum“.
-Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten.
-∼=
-(a) Halbraum
-∼
-=
-(b) Pair of pants
-∼=
-(c) Sphäre mit einem Loch
-Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand
-Definition 27
-Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt
-∂X := [
-(U,ϕ)∈A
-{ x ∈ U | ϕ(x) = 0 }
-Rand von X.
-∂X ist eine Mannigfaltigkeit der Dimension n − 1.
-Definition 28
-Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui
-, ϕi)i∈I
-Für i, j ∈ I mit Ui ∩ Uj 6= ∅ heißt
-ϕij := ϕj ◦ ϕ
-−1
-i
-ϕi(Ui ∩ Uj ) → ϕj (Ui ∩ Uj )
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+Definition 26
+Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale
+Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆ Xi offen und
+ϕi ein Homöomorphismus auf eine offene Teilmenge von
+R
+n
++,0
+:= { (x1, . . . , xn) ∈ R
+n
+| xn ≥ 0 }
+ist.
+R
+n
++,0
+ist ein „Halbraum“.
+Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten.
+∼=
+(a) Halbraum
+∼
+=
+(b) Pair of pants
+∼=
+(c) Sphäre mit einem Loch
+Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand
+Definition 27
+Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt
+∂X := [
+(U,ϕ)∈A
+{ x ∈ U | ϕ(x) = 0 }
+Rand von X.
+∂X ist eine Mannigfaltigkeit der Dimension n − 1.
+Definition 28
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I
+Für i, j ∈ I mit Ui ∩ Uj 6= ∅ heißt
+ϕij := ϕj ◦ ϕ
+−1
+i
+ϕi(Ui ∩ Uj ) → ϕj (Ui ∩ Uj )
 Kartenwechsel oder Übergangsfunktion.
-30 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-R
-n R
-n
-Ui Uj
-Vi Vj
-X
-ϕi ϕj
-Abbildung 2.4: Kartenwechsel
-2.2 Differenzierbare Mannigfaltigkeiten
-Definition 29
-Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui
-, ϕi)i∈I .
-a) X heißt differenzierbare Mannigfaltigkeit der Klasse C
-k
-, wenn jede Karten￾wechselabbildung ϕij , i, j ∈ I k-mal stetig differenzierbar ist.
-b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig￾faltigkeit der Klasse C∞ ist.
-Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt.
-Definition 30
-Sei X eine differenzierbare Mannigfaltigkeit der Klasse C
-k
-(k ∈ N ∪ { ∞ }) mit Atlas
-A = (Ui
-, ϕi)i∈I .
-a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ
-−1
-i
-und ϕi ◦ ϕ
-−1
-(i ∈ I mit Ui ∩ U 6= ∅) differenzierbar von Klasse C
-k
-sind.
-b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der
-Klasse C
-k
-. Er heißt C
-k
--Struktur auf X.
-Eine C∞-Struktur heißt auch differenzierbare Struktur auf X.
-Bemerkung 28
-Für n ≥ 4 gibt es auf S
-n mehrere verschiedene differenzierbare Strukturen, die sogenannten
-„exotische Sphären“.
-Definition 31
-Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X.
-a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C
-k
-), wenn
-es Karten (U, ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass
-ψ ◦ f ◦ ϕ
-−1
-stetig differenzierbar von Klasse C
-k
-in ϕ(x) ist.
-b) f heißt differenzierbar (von Klasse C
-k
-), wenn f in jedem x ∈ X differenzierbar ist.
-c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine
-differenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+R
+n Rn
+Ui Uj
+Vi Vj
+X
+ϕi ϕj
+Abbildung 2.4: Kartenwechsel
+2.2 Differenzierbare Mannigfaltigkeiten
+Definition 29
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I .
+a) X heißt differenzierbare Mannigfaltigkeit der Klasse C
+k
+, wenn jede Kartenwechselabbildung ϕij , i, j ∈ I k-mal stetig differenzierbar ist.
+b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannigfaltigkeit der Klasse C∞ ist.
+Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt.
+Definition 30
+Sei X eine differenzierbare Mannigfaltigkeit der Klasse C
+k
+(k ∈ N ∪ { ∞ }) mit Atlas
+A = (Ui, ϕi)i∈I .
+a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ
+−1
+i
+und ϕi ◦ ϕ
+−1
+(i ∈ I mit Ui ∩ U 6= ∅) differenzierbar von Klasse C
+k
+sind.
+b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der
+Klasse C
+k
+. Er heißt C
+k
+-Struktur auf X.
+Eine C∞-Struktur heißt auch differenzierbare Struktur auf X.
+Bemerkung 28
+Für n ≥ 4 gibt es auf S
+n mehrere verschiedene differenzierbare Strukturen, die sogenannten
+„exotische Sphären“.
+Definition 31
+Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X.
+a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C
+k
+), wenn
+es Karten (U, ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass
+ψ ◦ f ◦ ϕ
+−1
+stetig differenzierbar von Klasse C
+k
+in ϕ(x) ist.
+b) f heißt differenzierbar (von Klasse C
+k
+), wenn f in jedem x ∈ X differenzierbar ist.
+c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine
+differenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und
 f ◦ g = idY .
-31 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-Bemerkung 29
-Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab.
-Beweis: Seien (U
-0
-, ϕ0
-) und (V
-0
-, ψ0
-) Karten von X bzw. Y um x bzw. f(x) mit f(U
-0
-) ⊆ V
-0
-.
-⇒ ψ
-0 ◦ f ◦ (ϕ
-0
-)
-−1
-= ψ
-0 ◦ (ψ
-−1 ◦ ψ) ◦ f ◦ (ϕ
-−1 ◦ ϕ) ◦ (ϕ
-0
-)
-−1
-ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ
-−1 differenzierbar ist.
-Beispiel 23
-f : R → R, x 7→ x
-3
-ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := √3 x
-gilt: f ◦ g = idR, g ◦ f = idR
-Bemerkung 30
-Sei X eine glatte Mannigfaltigkeit. Dann ist
-Diffeo(X) := { f : X → X | f ist Diffeomorphismus }
-eine Untergruppe von Homöo(X).
-Definition 32
-S ⊆ R
-3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R
-3 ∃U ⊆ R
-2 offen:
-∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 ∀u ∈ U.
-F heißt (lokale) reguläre Parametrisierung von S.
-F(u, v) = (x(u, v), y(u, v), z(u, v))
-JF (u, v) =
-
-
-∂x
-∂u(p)
-∂x
-∂v (p)
-∂y
-∂u(p)
-∂y
-∂v (p)
-∂z
-∂u(p)
-∂z
-∂v (p)
-
-
-Beispiel 24
-1) Rotationsflächen: Sei r : R → R>0 eine differenzierbare Funktion.
-F : R
-2 → R
-3
-(u, v) 7→ (r(u) cos(u), r(v) sin(u), v)
-JF (u, v) =
-
-
-−r(v) sin u r0
-(v) cos u
-r(v) cos u r0
-(v) sin u
-0 1
-
-
-hat Rang 2 für alle (u, v) ∈ R
-2
-.
-2) Kugelkoordinaten: F : R
-2 → R
-3
-,
-(u, v) 7→ (R cos v cos u, R cos v sin u, R sin v)
-Es gilt: F(u, v) ∈ S
-2
-R
-, denn
-R
-2
-cos2
-(v) cos2
-(u) + R
-2
-cos2
-(v) sin2
-(u) + R
-2
-sin2
-(v)
-=R
-2
-(cos2
-(v) cos2
-(u) + cos2
-(v) sin2
-(u) + sin2
-(v))
-=R
-2
-￾
-cos2
-(v)(cos2
-(u) + sin2
-(u)) + sin2
-(v)
-
-=R
-2
-￾
-cos2
-(v) + sin2
-(v)
-
-=R  
-32 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-N
-S
-v u
-(a) Kugelkoordinaten
-−1
-0
-1
-2−2
-−1
-0
-1
-2
-0.6
-0.8
-1
-(b) Rotationskörper
-π
-2
-π 3π
-2
-2π
-−1
-−0.5
-0.5
-1
-x
-y
-sin x
-cos x
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+Bemerkung 29
+Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab.
+Beweis: Seien (U
+0
+, ϕ0) und (V
+0
+, ψ0) Karten von X bzw. Y um x bzw. f(x) mit f(U
+0
+) ⊆ V
+0
+.
+⇒ ψ
+0 ◦ f ◦ (ϕ0
+)
+−1
+= ψ
+0 ◦ (ψ−1 ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ0
+)
+−1
+ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ
+−1 differenzierbar ist.
+Beispiel 23
+f : R → R, x 7→ x
+3
+ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := √3 x
+gilt: f ◦ g = idR, g ◦ f = idR
+Bemerkung 30
+Sei X eine glatte Mannigfaltigkeit. Dann ist
+Diffeo(X) := { f : X → X | f ist Diffeomorphismus }
+eine Untergruppe von Homöo(X).
+Definition 32
+S ⊆ R
+3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 ∃U ⊆ R2 offen:
+∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 ∀u ∈ U.
+F heißt (lokale) reguläre Parametrisierung von S.
+F(u, v) = (x(u, v), y(u, v), z(u, v))
+JF (u, v) =
+
+
+∂x
+∂u(p)
+∂x
+∂v (p)
+∂y
+∂u(p)
+∂y
+∂v (p)
+∂z
+∂u(p)
+∂z
+∂v (p)
+
+
+Beispiel 24
+1) Rotationsflächen: Sei r : R → R>0 eine differenzierbare Funktion.
+F : R
+2 → R3
+(u, v) 7→ (r(u) cos(u), r(v) sin(u), v)
+JF (u, v) =
+
+
+−r(v) sin u r0(v) cos u
+r(v) cos u r0(v) sin u
+0 1
+
+
+hat Rang 2 für alle (u, v) ∈ R
+2
+.
+2) Kugelkoordinaten: F : R
+2 → R3
+,
+(u, v) 7→ (R cos v cos u, R cos v sin u, R sin v)
+Es gilt: F(u, v) ∈ S
+2
+R
+, denn
+R
+2
+cos2(v) cos2(u) + R
+2
+cos2(v) sin2(u) + R
+2
+sin2(v)
+=R
+2
+(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v))
+=R
+2
+
+cos2(v)(cos2(u) + sin2(u)) + sin2(v)
+
+=R
+2
+
+cos2(v) + sin2(v)
+
+=R
+2
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+N
+S
+v u
+(a) Kugelkoordinaten
+−1
+0
+1
+2−2
+−1
+0
+1
+2
+0.6
+0.8
+1
+(b) Rotationskörper
+π
+2
+π 3π
+2
+2π
+−1
+−0.5
+0.5
+1
+x
+y
+sin x
+cos x
 (c) Sinus und Kosinus haben keine gemeinsame Nullstelle
-33 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-Die Jacobi-Matrix
-JF (u, v) =
-
-
-−R cos v sin u −R sin v cos u
-R cos v cos u −R sin v sin u
-0 R cos v
-
-
-hat Rang 2 für cos v 6= 0. In N und S ist cos v = 0.
-Bemerkung 31
-Jede reguläre Fläche S ⊆ R
-3
-ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
-Beweis:
-S ⊆ R
-3
-ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
-regulären Flächen folgt direkt, dass Karten (Ui
-, Fi) und (Uj ⊆ R
-2
-, Fj : R
-2 → R
-3
-) von S mit
-Ui ∩ Uj 6= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind.
-z.Z.: F
-−1
-j
-◦ Fi
-ist ein Diffeomorphismus.
-Ui Uj
-S
-s
-Fi Fj
-F
-−1
-j
-◦Fi
-Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31
-Idee: Finde differenzierbare Funktion Fg−1
-j
-in Umgebung W von s, sodass Fg−1
-j
-|S∩W = F
-−1
-j
-.
-Ausführung: Sei u0 ∈ Ui
-, v0 ∈ Uj mit Fi(u0) = s = Fj (v0).
-Da Rg(JFj
-(v0)) = 2 ist, ist o. B. d. A.
-det ∂x
-∂u
-∂x
-∂v
-∂y
-∂u
-∂y
-∂v
-
-(v0) 6= 0
-und Fj (u, v) = (x(u, v), y(u, v), z(u, v)).
-Definiere Ffj : Uj × R → R
-3 durch
-Ffj (u, v, t) := (x(u, v), y(u, v), z(u, v) + t)
-Offensichtlich: Ffj |Uj×{ 0 } = Fj
-JFfj
-=
-
-
-∂x
-∂u
-∂x
-∂v 0
-∂y
-∂u
-∂y
-∂v 0
-∂z
-∂u
-∂z
-∂v 1
-
- ⇒ det JFfj
-(v0, 0) 6= 0
-Analysis II
-======⇒ Es gibt Umgebungen W von Fj von Ffj (v0, 0) = Fj (v0) = s, sodass Ffj auf W eine
-differenzierbar Inverse F
-−1
-j
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+Die Jacobi-Matrix
+JF (u, v) =
+
+
+−R cos v sin u −R sin v cos u
+R cos v cos u −R sin v sin u
+0 R cos v
+
+
+hat Rang 2 für cos v 6= 0. In N und S ist cos v = 0.
+Bemerkung 31
+Jede reguläre Fläche S ⊆ R
+3
+ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
+Beweis:
+S ⊆ R
+3
+ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
+regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆ R
+2
+, Fj : R
+2 → R3
+) von S mit
+Ui ∩ Uj 6= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind.
+z.Z.: F
+−1
+j
+◦ Fiist ein Diffeomorphismus.
+Ui Uj
+S
+s
+Fi Fj
+F
+−1
+j
+◦Fi
+Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31
+Idee: Finde differenzierbare Funktion Fg−1
+j
+in Umgebung W von s, sodass Fg−1
+j
+|S∩W = F
+−1
+j
+.
+Ausführung: Sei u0 ∈ Ui, v0 ∈ Uj mit Fi(u0) = s = Fj (v0).
+Da Rg(JFj
+(v0)) = 2 ist, ist o. B. d. A.
+det ∂x
+∂u
+∂x
+∂v
+∂y
+∂u
+∂y
+∂v
+
+(v0) 6= 0
+und Fj (u, v) = (x(u, v), y(u, v), z(u, v)).
+Definiere Ffj : Uj × R → R
+3 durch
+Ffj (u, v, t) := (x(u, v), y(u, v), z(u, v) + t)
+Offensichtlich: Ffj |Uj×{ 0 } = Fj
+JFfj
+=
+
+
+∂x
+∂u
+∂x
+∂v 0
+∂y
+∂u
+∂y
+∂v 0
+∂z
+∂u
+∂z
+∂v 1
+
+ ⇒ det JFfj
+(v0, 0) 6= 0
+Analysis II
+======⇒ Es gibt Umgebungen W von Fj von Ffj (v0, 0) = Fj (v0) = s, sodass Ffj auf W eine
+differenzierbar Inverse F
+−1
+j
 hat.
-34 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-Weiter gilt:
-Ffj
-−1
-|W∩S = F
-−1
-j
-|W∩S
-⇒ F
-−1
-j
-◦ Fi
-|F
-−1
-i
-(W∩S) = F
-−1
-j
-◦ Fi
-|F
-−1
-i
-(W∩S)
-ist differenzierbar.
-Definition 33
-Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe.
-a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G → G und ι : G → G
-definiert durch
-g ◦ h := g · h und ι(g) := g
-−1
-stetig sind.
-b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und
-(G, ι) differenzierbar sind.
-Beispiel 25 (Lie-Gruppen)
-1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen.
-2) GLn(R)
-3) (R
-×, ·)
-4) (R>0, ·)
-5) (R
-n
-, +), denn A · B(i, j) = Pn
-k=1 aikbkj ist nach allen Variablen differenzierbar
-(A−1
-)(i, j) = det(Aij )
-det A
-Aij =
-
-
-ai1 . . . ain
-.
-.
-.
-.
-.
-.
-.
-.
-.
-an1 . . . ann
-
-
-∈ R
-(n−1)×(n−1)
-ist differenzierbar.
-det Aij kann 0 werden, da:
-
-1 1
-−1 0
-6) SLn(R) = { A ∈ GLn(R) | det(A) = 1 }
-Bemerkung 32
-Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung
-lg : G → G
-h 7→ g · h
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+Weiter gilt:
+Ffj
+−1
+|W∩S = F
+−1
+j
+|W∩S
+⇒ F
+−1
+j
+◦ Fi|F
+−1
+i
+(W∩S) = F
+−1
+j
+◦ Fi|F
+−1
+i
+(W∩S)
+ist differenzierbar.
+Definition 33
+Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe.
+a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G → G und ι : G → G
+definiert durch
+g ◦ h := g · h und ι(g) := g
+−1
+stetig sind.
+b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und
+(G, ι) differenzierbar sind.
+Beispiel 25 (Lie-Gruppen)
+1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen.
+2) GLn(R)
+3) (R
+×, ·)
+4) (R>0, ·)
+5) (R
+n
+, +), denn A · B(i, j) = Pn
+k=1 aikbkj ist nach allen Variablen differenzierbar
+(A−1)(i, j) = det(Aij )
+det A
+Aij =
+
+
+ai1 . . . ain
+.
+.
+.
+.
+.
+.
+.
+.
+.
+an1 . . . ann
+
+
+∈ R
+(n−1)×(n−1)
+ist differenzierbar.
+det Aij kann 0 werden, da:
+
+1 1
+−1 0
+6) SLn(R) = { A ∈ GLn(R) | det(A) = 1 }
+Bemerkung 32
+Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung
+lg : G → G
+h 7→ g · h
 ein Diffeomorphismus.
-35 2.3. SIMPLIZIALKOMPLEX
-2.3 Simplizialkomplex
-Definition 34
-Seien v0, . . . , vk ∈ R
-n Punkte.
-a) v0, . . . , vk sind in allgemeiner Lage
-⇔ es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält
-⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig.
-b) conv(v0, . . . , vk) := n Pk
-i=0 λivi
-
-
-
-λi ≥ 0,
-Pk
-i=0 λi = 1 o
-heißt die konvexe Hülle von
-v0, . . . , vk.
-Definition 35
-a) Sei ∆n = conv(e0, . . . , en) ⊆ R
-n+1 die konvexe Hülle der Standard-Basisvektoren
-e0, . . . , en.
-Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex.
-b) Für Punkte v0, . . . , vk im R
-n
-in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk)
-ein k-Simplex in R
-n
-.
-c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir
-:=
-conv(vi0
-, . . . , vir
-) ein r-Simplex und heißt Teilsimplex oder Seite von ∆.
-(a) 0-Simplex ∆0
-1 2 3
-1
-2
-3
-e0
-e1
-(b) 1-Simplex ∆1
-1 2 3
-1
-2
-3
-e0
-e1
-e2
-(c) 2-Simplex ∆2
-e0 e1
-e2
-e3
-(d) 3-Simplex ∆3
-Abbildung 2.6: Beispiele für k-Simplexe
-Definition 36
-a) Eine endliche Menge K von Simplizes im R
-n heißt (endlicher) Simplizialkomplex,
-wenn gilt:
-(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K.
-(ii) Für ∆1, ∆2 ∈ K ist ∆1 ∩ ∆2 leer oder ein Teilsimplex von ∆1 und von ∆2.
-b) |K| := S
-∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K.
+ 2.3. SIMPLIZIALKOMPLEX
+2.3 Simplizialkomplex
+Definition 34
+Seien v0, . . . , vk ∈ R
+n Punkte.
+a) v0, . . . , vk sind in allgemeiner Lage
+⇔ es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält
+⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig.
+b) conv(v0, . . . , vk) := n Pk
+i=0 λivi
+
+
+
+λi ≥ 0,
+Pk
+i=0 λi = 1 o
+heißt die konvexe Hülle von
+v0, . . . , vk.
+Definition 35
+a) Sei ∆n = conv(e0, . . . , en) ⊆ R
+n+1 die konvexe Hülle der Standard-Basisvektoren
+e0, . . . , en.
+Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex.
+b) Für Punkte v0, . . . , vk im R
+n
+in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk)
+ein k-Simplex in R
+n
+.
+c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir:=
+conv(vi0, . . . , vir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆.
+(a) 0-Simplex ∆0
+1 2 3
+1
+2
+3
+e0
+e1
+(b) 1-Simplex ∆1
+1 2 3
+1
+2
+3
+e0
+e1
+e2
+(c) 2-Simplex ∆2
+e0 e1
+e2
+e3
+(d) 3-Simplex ∆3
+Abbildung 2.6: Beispiele für k-Simplexe
+Definition 36
+a) Eine endliche Menge K von Simplizes im R
+n heißt (endlicher) Simplizialkomplex,
+wenn gilt:
+(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K.
+(ii) Für ∆1, ∆2 ∈ K ist ∆1 ∩ ∆2 leer oder ein Teilsimplex von ∆1 und von ∆2.
+b) |K| := S
+∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K.
 c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension von K.
-36 2.3. SIMPLIZIALKOMPLEX
-(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex
-(ohne untere Fläche!)
-(c) 2D Simplizialkomplex
-(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex
-P
-(f) P ist kein Teilsimplex, da Eigen￾schaft Punkt b.ii verletzt ist
-P
-(g) Simplizialkomplex
-Abbildung 2.7: Beispiele für Simplizialkomplexe
-Definition 37
-Seien K, L Simplizialkomplexe. Eine stetige Abbildung
-f : |K| → |L|
-heißt simplizial, wenn für jedes ∆ ∈ K gilt:
-a) f(∆) ∈ L
-b) f|∆ : ∆ → f(∆) ist eine affine Abbildung.
-Beispiel 26 (Simpliziale Abbildungen)
-1) ϕ(e1) := b1, ϕ(e2) := b2
+ 2.3. SIMPLIZIALKOMPLEX
+(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex
+(ohne untere Fläche!)
+(c) 2D Simplizialkomplex
+(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex
+P
+(f) P ist kein Teilsimplex, da Eigenschaft Punkt b.ii verletzt ist
+P
+(g) Simplizialkomplex
+Abbildung 2.7: Beispiele für Simplizialkomplexe
+Definition 37
+Seien K, L Simplizialkomplexe. Eine stetige Abbildung
+f : |K| → |L|
+heißt simplizial, wenn für jedes ∆ ∈ K gilt:
+a) f(∆) ∈ L
+b) f|∆ : ∆ → f(∆) ist eine affine Abbildung.
+Beispiel 26 (Simpliziale Abbildungen)
+1) ϕ(e1) := b1, ϕ(e2) := b2
 ϕ ist eine eindeutig bestimmte lineare Abbildung
-37 2.3. SIMPLIZIALKOMPLEX
-0 e2
-e1
-0 b1
-b2
-ϕ
-2) Folgende Abbildung ϕ : ∆n → ∆n−1
-ist simplizial:
-ϕ
-3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8)
-M M
-a
-a
-a
-b
-b
-b
-c
-c
-c
-d
-d
-d
-M
-a
-b
-c
-d
-b b b
-b b b
-b b b
-b
-b
-b
-b
-b
-b
-b b
-b
-b b
-b b
-b b
-b
-b
-b
-b
-Abbildung 2.8: Abbildung eines Torus auf eine Sphäre
-Definition 38
-Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in
-K.
-Dann heißt
-χ(K) :=
-dim
-X
-K
-n=0
-(−1)n
-an(K)
-Eulerzahl (oder Euler-Charakteristik) von K.
-Beispiel 27
-1) χ(∆1
-) = 2 − 1 = 1
-χ(∆2
-) = 3 − 3 + 1 = 1
-χ(∆3
-) = 4 − 6 + 4 − 1 = 1
-2) χ(Oktaeder-Oberfläche) = 6 − 12 + 8 = 2
-χ(Rand des Tetraeders) = 2
-χ(Ikosaeder) = 12 − 30 + 20 = 2
-3) χ(Würfel) = 8 − 12 + 6 = 2
-χ(Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2
-Bemerkung 33
-χ(∆n
-) = 1 für jedes n ∈ N0
-38 2.3. SIMPLIZIALKOMPLEX
-Beweis: ∆n
-ist die konvexe Hülle von (e0, . . . , en) in R
-n+1. Jede (k + 1)-elementige Teilmenge
-von { e0, . . . , en } definiert ein k-Simplex.
-⇒ ak(∆n
-) = ￾
-n+1
-k+1
-, k = 0, . . . , n
-⇒ χ(∆n
-) = Pn
-k=0(−1)k
-￾
-n+1
-k+1
-f(x) = (x + 1)n+1
-Binomischer
-Lehrsatz =
-Pn+1
-k=0 ￾
-n+1
-k
-
-x
-k
-⇒ 0 = Pn+1
-k=0 ￾
-n+1
-k
-
-(−1)k = χ(∆n
-) − 1
-⇒ χ(∆n
-) = 1 
-Definition 39
-a) Ein 1D-Simplizialkomplex heißt Graph.
-b) Ein Graph, der homöomorph zu S
-1
-ist, heißt Kreis.
-c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält.
-(a) Dies wird häufig auch als
-Multigraph bezeichnet.
-(b) Planare Einbettung des Te￾traeders
-(c) K5 (d) K3,3
-Abbildung 2.9: Beispiele für Graphen
-Bemerkung 34
-Für jeden Baum T gilt χ(T) = 1.
-Beweis: Induktion über die Anzahl der Ecken.
-Bemerkung 35
-a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ
-enthält.2
-b) Ist n = a1(Γ) − a1(T), so ist χ(Γ) = 1 − n.
-Beweis:
-a) Siehe „Algorithmus von Kruskal“.
-2T wird „Spannbaum“ gena    
-39 2.3. SIMPLIZIALKOMPLEX
-b) χ(Γ) = a0(Γ) − a1(Γ)
-= a0(Γ) − (n + a1(T))
-= a0(T) − a1(T) − n
-= χ(T) − n
-= 1 − n
-Bemerkung 36
-Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ R
-n
-. Sei K der Simplizialkomplex, der aus ∆ durch
-„Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1.
-(a) K (b) ∆, das aus K durch Unter￾teilung entsteht
-Abbildung 2.10: Beispiel für Bemerkung 36.
-Beweis: χ(K) = χ(∆) − (−1)n
-| {z }
-n-Simplex
-+
-Xn
-k=0
-(−1)k
-
-n + 1
-k
-
-| {z }
-(1+(−1))n+1
-= χ(∆) 
-Definition 40
-Sei X ein topologischer Raum, K ein Simplizialkomplex und
-h : |K| → X
-ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine
-Triangulierung von X.
-Beispiel 28 (Triangulierung des Torus)
-Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für
-fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind
-in Beispiel 28.
-Satz 2.1 (Eulersche Polyederformel)
-Sei P ein konvexes Polyeder in R
-3
-, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex,
-sodass gilt:
-∀x, y ∈ ∂P : [x, y] ⊆ P
-Dann ist χ(∂P) = 2.
-Beweis:
-1) Die Aussage ist richtig für den Tetraeder.
-2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S
-2
-.
-Erhalte Triangulierung von S
-2
+ 2.3. SIMPLIZIALKOMPLEX
+0 e2
+e1
+0 b1
+b2
+ϕ
+2) Folgende Abbildung ϕ : ∆n → ∆n−1ist simplizial:
+ϕ
+3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8)
+M M
+a
+a
+a
+b
+b
+b
+c
+c
+c
+d
+d
+d
+M
+a
+b
+c
+d
+b b b
+b b b
+b b b
+b
+b
+b
+b
+b
+b
+b b
+b
+b b
+b b
+b b
+b
+b
+b
+b
+Abbildung 2.8: Abbildung eines Torus auf eine Sphäre
+Definition 38
+Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in
+K.
+Dann heißt
+χ(K) :=
+dim
+X
+K
+n=0
+(−1)nan(K)
+Eulerzahl (oder Euler-Charakteristik) von K.
+Beispiel 27
+1) χ(∆1) = 2 − 1 = 1
+χ(∆2) = 3 − 3 + 1 = 1
+χ(∆3) = 4 − 6 + 4 − 1 = 1
+2) χ(Oktaeder-Oberfläche) = 6 − 12 + 8 = 2
+χ(Rand des Tetraeders) = 2
+χ(Ikosaeder) = 12 − 30 + 20 = 2
+3) χ(Würfel) = 8 − 12 + 6 = 2
+χ(Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2
+Bemerkung 33
+χ(∆n) = 1 für jedes n ∈ N0
+ 2.3. SIMPLIZIALKOMPLEX
+Beweis: ∆nist die konvexe Hülle von (e0, . . . , en) in R
+n+1. Jede (k + 1)-elementige Teilmenge
+von { e0, . . . , en } definiert ein k-Simplex.
+⇒ ak(∆n) =
+n+1
+k+1
+, k = 0, . . . , n
+⇒ χ(∆n) = Pn
+k=0(−1)k
+
+n+1
+k+1
+f(x) = (x + 1)n+1
+Binomischer
+Lehrsatz =
+Pn+1
+k=0
+n+1
+k
+
+x
+k
+⇒ 0 = Pn+1
+k=0
+n+1
+k
+
+(−1)k = χ(∆n) − 1
+⇒ χ(∆n) = 1 
+Definition 39
+a) Ein 1D-Simplizialkomplex heißt Graph.
+b) Ein Graph, der homöomorph zu S
+1
+ist, heißt Kreis.
+c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält.
+(a) Dies wird häufig auch als
+Multigraph bezeichnet.
+(b) Planare Einbettung des Tetraeders
+(c) K5 (d) K3,3
+Abbildung 2.9: Beispiele für Graphen
+Bemerkung 34
+Für jeden Baum T gilt χ(T) = 1.
+Beweis: Induktion über die Anzahl der Ecken.
+Bemerkung 35
+a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ
+enthält.2
+b) Ist n = a1(Γ) − a1(T), so ist χ(Γ) = 1 − n.
+Beweis:
+a) Siehe „Algorithmus von Kruskal“.
+2T wird „Spannbaum“ genannt.
+ 2.3. SIMPLIZIALKOMPLEX
+b) χ(Γ) = a0(Γ) − a1(Γ)
+= a0(Γ) − (n + a1(T))
+= a0(T) − a1(T) − n
+= χ(T) − n
+= 1 − n
+Bemerkung 36
+Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ R
+n
+. Sei K der Simplizialkomplex, der aus ∆ durch
+„Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1.
+(a) K (b) ∆, das aus K durch Unterteilung entsteht
+Abbildung 2.10: Beispiel für Bemerkung 36.
+Beweis: χ(K) = χ(∆) − (−1)n
+| {z }
+n-Simplex
++
+Xn
+k=0
+(−1)k
+
+n + 1
+k
+
+| {z }
+(1+(−1))n+1
+= χ(∆) 
+Definition 40
+Sei X ein topologischer Raum, K ein Simplizialkomplex und
+h : |K| → X
+ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine
+Triangulierung von X.
+Beispiel 28 (Triangulierung des Torus)
+Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für
+fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind
+in Beispiel 28.
+Satz 2.1 (Eulersche Polyederformel)
+Sei P ein konvexes Polyeder in R
+3
+, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex,
+sodass gilt:
+∀x, y ∈ ∂P : [x, y] ⊆ P
+Dann ist χ(∂P) = 2.
+Beweis:
+1) Die Aussage ist richtig für den Tetraeder.
+2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S
+2
+.
+Erhalte Triangulierung von S
+2
 .
-40 2.3. SIMPLIZIALKOMPLEX
-(a) Die beiden markierten Dreiecke schneiden sich im
-Mittelpunkt und in einer Seite.
-(b) Die beiden markierten Dreiecke schneiden sich im
-Mittelpunkt und außen.
-Abbildung 2.11: Fehlerhafte Triangulierungen
-(a) Einfache Triangulierung (b) Minimale Triangulierung
+ 2.3. SIMPLIZIALKOMPLEX
+(a) Die beiden markierten Dreiecke schneiden sich im
+Mittelpunkt und in einer Seite.
+(b) Die beiden markierten Dreiecke schneiden sich im
+Mittelpunkt und außen.
+Abbildung 2.11: Fehlerhafte Triangulierungen
+(a) Einfache Triangulierung (b) Minimale Triangulierung
 Abbildung 2.12: Triangulierungen des Torus
-41 2.3. SIMPLIZIALKOMPLEX
-3) Sind P1 und P2 konvexe Polygone und T1, T2 die zugehörigen Triangulierungen von
-S
-2
-, so gibt es eine Triangulierung T, die sowohl um T1 als auch um T2 Verfeinerung
-ist (vgl. Abbildung 2.13).
-T1
-T2
-T
-Abbildung 2.13: T ist eine Triangulierung, die für T1 und T2 eine Verfeinerung ist.
-Nach Bemerkung 36 ist χ(∂P1) = χ(T1) = χ(T) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A.
-P2 ein Tetraeder ist.
-Bemerkung 37 (Der Rand vom Rand ist 0)
-Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V .
-Sei An die Menge der n-Simplizes in K, d. h.
-An(K) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K)
-und Cn(K) der R-Vektorraum mit Basis An(K), d. h.
-Cn(K) =
-
-
-
-X
-σ∈An(K)
-cσ · σ
-
-
-
-
-
-
-cσ ∈ R
-
-
-
-Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn.
-Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , xˆi
-P
-, . . . , xn) die i-te Seite von σ und dσ = dnσ :=
-i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare
-Abbildung.
-Dann gilt: dn−1 ◦ dn = 0
-a b
-c
-σ
-e3
-e2 e1
-Abbildung 2.14: Simplizialkomplex mit Totalordnung
-Beispiel 29
-Sei a < b < c. Dann gilt:
-d2σ = e1 − e2 + e3
+ 2.3. SIMPLIZIALKOMPLEX
+3) Sind P1 und P2 konvexe Polygone und T1, T2 die zugehörigen Triangulierungen von
+S
+2
+, so gibt es eine Triangulierung T, die sowohl um T1 als auch um T2 Verfeinerung
+ist (vgl. Abbildung 2.13).
+T1
+T2
+T
+Abbildung 2.13: T ist eine Triangulierung, die für T1 und T2 eine Verfeinerung ist.
+Nach Bemerkung 36 ist χ(∂P1) = χ(T1) = χ(T) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A.
+P2 ein Tetraeder ist.
+Bemerkung 37 (Der Rand vom Rand ist 0)
+Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V .
+Sei An die Menge der n-Simplizes in K, d. h.
+An(K) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K)
+und Cn(K) der R-Vektorraum mit Basis An(K), d. h.
+Cn(K) =
+
+
+
+X
+σ∈An(K)
+cσ · σ
+
+
+
+
+
+
+cσ ∈ R
+
+
+
+Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn.
+Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , xˆi
+P
+, . . . , xn) die i-te Seite von σ und dσ = dnσ :=
+i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare
+Abbildung.
+Dann gilt: dn−1 ◦ dn = 0
+a b
+c
+σ
+e3
+e2 e1
+Abbildung 2.14: Simplizialkomplex mit Totalordnung
+Beispiel 29
+Sei a < b < c. Dann gilt:
+d2σ = e1 − e2 + e3
 d1(e1 − e2 + e3) = (c − b) − (c − a) + (b − a)
-42 2.3. SIMPLIZIALKOMPLEX
-= 0
-Sei a < b < c < d. Dann gilt für Tetraeder:
-d3(∆(a, b, c, d)) = ∆(b, c, d) − ∆(a, c, d) + ∆(a, b, d) − ∆(a, b, c), wobei:
-d2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c)
-d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c)
-d2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b)
-d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b)
-⇒ d2(d3(∆(a, b, c, d))) = 0
-Beweis: Sei σ ∈ An. Dann gilt:
-dn−1(dnσ) = dn−1(
-Xn
-i=0
-(−1)i
-∂iσ)
-=
-Xn
-i=0
-(−1)i
-dn−1(∂iσ)
-=
-Xn
-i=0
-(−1)i
-nX−1
-j=0
-∂i(∂jσ)(−1)j
-=
-X
-0≤i≤j≤n−1
-(−1)i+j
-∂j (∂i(σ)) +X
-0≤j<i≤n
-(−1)i+j
-∂i−1(∂jσ)
-= 0
-weil jeder Summand aus der ersten Summe auch in der zweiten Summe vorkommt, aber mit
-umgekehrten Vorzeichen. 
-Definition 41
-Sei K ein Simplizialkomplex, Zn := Kern(dn) ⊆ Cn und Bn := Bild(dn+1) ⊆ Cn.
-a) Hn = Hn(K, R) := Zn/Bn heißt n-te Homologiegruppe von K.
-b) bn(K) := dimR Hn heißt n-te Betti-Zahl von K.
-Bemerkung 38
-Nach Bemerkung 37 ist Bn ⊆ Zn, denn dn+1(C) ∈ Kern(dn) für C ∈ Cn+1.
-Satz 2.2
-Für jeden endlichen Simplizialkomplex K der Dimension d gilt:
-X
-d
-k=0
-(−1)k
-bk(K) = X
-d
-k=0
-(−1)k
-ak(K) = χ(K)
-Bemerkung 39
+ 2.3. SIMPLIZIALKOMPLEX
+= 0
+Sei a < b < c < d. Dann gilt für Tetraeder:
+d3(∆(a, b, c, d)) = ∆(b, c, d) − ∆(a, c, d) + ∆(a, b, d) − ∆(a, b, c), wobei:
+d2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c)
+d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c)
+d2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b)
+d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b)
+⇒ d2(d3(∆(a, b, c, d))) = 0
+Beweis: Sei σ ∈ An. Dann gilt:
+dn−1(dnσ) = dn−1(
+Xn
+i=0
+(−1)i∂iσ)
+=
+Xn
+i=0
+(−1)idn−1(∂iσ)
+=
+Xn
+i=0
+(−1)i
+nX−1
+j=0
+∂i(∂jσ)(−1)j
+=
+X
+0≤i≤j≤n−1
+(−1)i+j∂j (∂i(σ)) +X
+0≤j<i≤n
+(−1)i+j∂i−1(∂jσ)
+= 0
+weil jeder Summand aus der ersten Summe auch in der zweiten Summe vorkommt, aber mit
+umgekehrten Vorzeichen. 
+Definition 41
+Sei K ein Simplizialkomplex, Zn := Kern(dn) ⊆ Cn und Bn := Bild(dn+1) ⊆ Cn.
+a) Hn = Hn(K, R) := Zn/Bn heißt n-te Homologiegruppe von K.
+b) bn(K) := dimR Hn heißt n-te Betti-Zahl von K.
+Bemerkung 38
+Nach Bemerkung 37 ist Bn ⊆ Zn, denn dn+1(C) ∈ Kern(dn) für C ∈ Cn+1.
+Satz 2.2
+Für jeden endlichen Simplizialkomplex K der Dimension d gilt:
+X
+d
+k=0
+(−1)kbk(K) = X
+d
+k=0
+(−1)kak(K) = χ(K)
+Bemerkung 39
 Es gilt nicht ak = bk ∀k ∈ N0.
-43 2.3. SIMPLIZIALKOMPLEX
-Beweis:
-• Dimensionsformel für dn: an = dim Zn + dim Bn−1 für n ≥ 1
-• Dimensionsformel für Zn → Hn = Zn/Bn : dim Zn = bn + dim Bn
-• dim Zd = bd, da dim Zd = bd + dim Bd, wobei dim Bd = 0, da ad+1 = 0
-• a0 − dim B0 = b0, da a0 − dim B0 = a0 − dim Z0 + b0 und a0 = dim Z0, weil a−1 = 0
-⇒
-X
-d
-k=0
-(−1)k
-ak = a0 +
-X
-d
-k=1
-(−1)k
-(dim Zk + dim Bk−1)
-= a0 +
-X
-d
-k=1
-(−1)k dim Zk +
-X
-d−1
-k=0
-(−1)k+1 dim Bk
-= a0 +
-X
-d
-k=1
-(−1)k dim Zk −
-X
-d−1
-k=0
-(−1)k dim Bk
-= a0 +
-X
-d−1
-k=1
-(−1)k
-bk + (−1)d dim Zd
-| {z }
-=bd
-− dim B0
-= b0 +
-X
-d−1
-k=1
-(−1)k
-bk + (−1)d
-bd
-=
-X
-d
-k=0
-(−1)k
-bk
-44 2.3. SIMPLIZIALKOMPLEX
-Übungsaufgaben
-Aufgabe 7 (Zusammenhang)
-(a) Beweisen Sie, dass eine topologische Mannigfaltigkeit genau dann wegzusammenhän￾gend ist, wenn sie zusammenhängend ist
-(b) Betrachten Sie nun wie in Beispiel 20.8 den Raum X := (R\{ 0 })∪{ 01, 02 } versehen
+ 2.3. SIMPLIZIALKOMPLEX
+Beweis:
+• Dimensionsformel für dn: an = dim Zn + dim Bn−1 für n ≥ 1
+• Dimensionsformel für Zn → Hn = Zn/Bn : dim Zn = bn + dim Bn
+• dim Zd = bd, da dim Zd = bd + dim Bd, wobei dim Bd = 0, da ad+1 = 0
+• a0 − dim B0 = b0, da a0 − dim B0 = a0 − dim Z0 + b0 und a0 = dim Z0, weil a−1 = 0
+⇒
+X
+d
+k=0
+(−1)kak = a0 +
+X
+d
+k=1
+(−1)k(dim Zk + dim Bk−1)
+= a0 +
+X
+d
+k=1
+(−1)k dim Zk +
+X
+d−1
+k=0
+(−1)k+1 dim Bk
+= a0 +
+X
+d
+k=1
+(−1)k dim Zk −
+X
+d−1
+k=0
+(−1)k dim Bk
+= a0 +
+X
+d−1
+k=1
+(−1)kbk + (−1)d dim Zd
+| {z }
+=bd
+− dim B0
+= b0 +
+X
+d−1
+k=1
+(−1)kbk + (−1)dbd
+=
+X
+d
+k=0
+(−1)kbk
+ 2.3. SIMPLIZIALKOMPLEX
+Übungsaufgaben
+Aufgabe 7 (Zusammenhang)
+(a) Beweisen Sie, dass eine topologische Mannigfaltigkeit genau dann wegzusammenhängend ist, wenn sie zusammenhängend ist
+(b) Betrachten Sie nun wie in Beispiel 20.8 den Raum X := (R\{ 0 })∪{ 01, 02 } versehen
 mit der dort definierten Topologie. Ist X wegzusammenhängend?
-3 Fundamentalgruppe und Überlagerungen
-3.1 Homotopie von Wegen
-a b
-γ1
-γ2
-(a) γ1 und γ2 sind homotop,
-da man sie „zueinander ver￾schieben“ kann.
-a b
-γ1
-γ2
-(b) γ1 und γ2 sind wegen dem
-Hindernis nicht homotop.
-Abbildung 3.1: Beispiele für Wege γ1 und γ2
-Definition 42
-Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b, d. h. γ1(0) =
-γ2(0) = a, γ1(1) = γ2(1) = b
-γ1 und γ2 heißen homotop, wenn es eine stetige Abbildung H : I × I → X mit
-H(t, 0) = γ1(t) ∀t ∈ I
-H(t, 1) = γ2(t) ∀t ∈ I
-und H(0, s) = a und H(1, s) = b für alle s ∈ I gibt. Dann schreibt man: γ1 ∼ γ2
-H heißt Homotopie zwischen γ1 und γ2.
-Bemerkung 40
-Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b und H eine
-Homotopie zwischen γ1 und γ2.
-Dann gilt: Der Weg
-γs : I → X, γs(t) = H(t, s)
-ist Weg in X von a nach b für jedes s ∈ I.
-Beweis: H ist stetig, also ist H(t, s) insbesondere für jedes feste s stetig. Da H(0, s) = a und
-H(1, s) = b für alle s ∈ I und γs eine Abbildung von I auf X ist, ist γs ein Weg in X von a
-nach b für jedes s ∈ I. 
-Bemerkung 41
-Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege in X von a nach b
-definiert.
+3 Fundamentalgruppe und Überlagerungen
+3.1 Homotopie von Wegen
+a b
+γ1
+γ2
+(a) γ1 und γ2 sind homotop,
+da man sie „zueinander verschieben“ kann.
+a b
+γ1
+γ2
+(b) γ1 und γ2 sind wegen dem
+Hindernis nicht homotop.
+Abbildung 3.1: Beispiele für Wege γ1 und γ2
+Definition 42
+Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b, d. h. γ1(0) =
+γ2(0) = a, γ1(1) = γ2(1) = b
+γ1 und γ2 heißen homotop, wenn es eine stetige Abbildung H : I × I → X mit
+H(t, 0) = γ1(t) ∀t ∈ I
+H(t, 1) = γ2(t) ∀t ∈ I
+und H(0, s) = a und H(1, s) = b für alle s ∈ I gibt. Dann schreibt man: γ1 ∼ γ2
+H heißt Homotopie zwischen γ1 und γ2.
+Bemerkung 40
+Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b und H eine
+Homotopie zwischen γ1 und γ2.
+Dann gilt: Der Weg
+γs : I → X, γs(t) = H(t, s)
+ist Weg in X von a nach b für jedes s ∈ I.
+Beweis: H ist stetig, also ist H(t, s) insbesondere für jedes feste s stetig. Da H(0, s) = a und
+H(1, s) = b für alle s ∈ I und γs eine Abbildung von I auf X ist, ist γs ein Weg in X von a
+nach b für jedes s ∈ I. 
+Bemerkung 41
+Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege in X von a nach b
+definiert.
 Beweis:
-46 3.1. HOMOTOPIE VON WEGEN
-• reflexiv: H(t, s) = γ(t) für alle (t, s) ∈ I × I
-• symmetrisch: H0
-(t, s) = H(t, 1 − s) für alle (t, s) ∈ I × I
-• transitiv: Seien H0 bzw. H00 Homotopien von γ1 nach γ2 bzw. von γ2 nach γ3.
-Dann sei H(t, s) := (
-H0
-(t, 2s) falls 0 ≤ s ≤
-1
-2
-H00(t, 2s − 1) falls 1
-2 ≤ s ≤ 1
-⇒ H ist stetig und Homotopie von γ1 nach γ3.
-
-Beispiel 30
-1) Sei X = S
-1
-. γ1 und γ2 aus Abbildung 3.3a nicht homotop.
-2) Sei X = T
-2
-. γ1, γ2 und γ3 aus Abbildung 3.3b sind paarweise nicht homotop.
-3) Sei X = R
-2 und a = b = (0, 0).
-Je zwei Wege im R
-2 mit Anfangs- und Endpunkt (0, 0) sind homotop.
-Abbildung 3.2: Zwei Wege im R
-2 mit Anfangs- und Endpunkt (0, 0)
-Sei γ0 : I → R
-2 der konstante Weg γ0(t) = (0, 0) ∀t ∈ I. Sei γ(0) = γ(1) = (0, 0).
-H(t, s) := (1 − s)γ(t) ist stetig, H(t, 0) = γ(t) ∀t ∈ I und H(t, 1) = (0, 0) ∀t ∈ I.
-Bemerkung 42
-Sei X ein topologischer Raum, γ : I → X ein Weg und ϕ : I → I stetig mit ϕ(0) = 0,
-ϕ(1) = 1. Dann sind γ und γ ◦ ϕ homotop.
-Beweis: Sei H(t, s) = γ((1 − s)t + s · ϕ(t)).
-Dann ist H stetig, H(t, 0) = γ(t), H(t, 1) = γ(ϕ(t)), H(0, s) = γ(0) und H(1, s) =
-γ(1 − s + s) = γ(1)
+ 3.1. HOMOTOPIE VON WEGEN
+• reflexiv: H(t, s) = γ(t) für alle (t, s) ∈ I × I
+• symmetrisch: H0(t, s) = H(t, 1 − s) für alle (t, s) ∈ I × I
+• transitiv: Seien H0 bzw. H00 Homotopien von γ1 nach γ2 bzw. von γ2 nach γ3.
+Dann sei H(t, s) := (
+H0(t, 2s) falls 0 ≤ s ≤
+1
+2
+H00(t, 2s − 1) falls 1
+2 ≤ s ≤ 1
+⇒ H ist stetig und Homotopie von γ1 nach γ3.
+
+Beispiel 30
+1) Sei X = S
+1
+. γ1 und γ2 aus Abbildung 3.3a nicht homotop.
+2) Sei X = T
+2
+. γ1, γ2 und γ3 aus Abbildung 3.3b sind paarweise nicht homotop.
+3) Sei X = R
+2 und a = b = (0, 0).
+Je zwei Wege im R
+2 mit Anfangs- und Endpunkt (0, 0) sind homotop.
+Abbildung 3.2: Zwei Wege im R
+2 mit Anfangs- und Endpunkt (0, 0)
+Sei γ0 : I → R
+2 der konstante Weg γ0(t) = (0, 0) ∀t ∈ I. Sei γ(0) = γ(1) = (0, 0).
+H(t, s) := (1 − s)γ(t) ist stetig, H(t, 0) = γ(t) ∀t ∈ I und H(t, 1) = (0, 0) ∀t ∈ I.
+Bemerkung 42
+Sei X ein topologischer Raum, γ : I → X ein Weg und ϕ : I → I stetig mit ϕ(0) = 0,
+ϕ(1) = 1. Dann sind γ und γ ◦ ϕ homotop.
+Beweis: Sei H(t, s) = γ((1 − s)t + s · ϕ(t)).
+Dann ist H stetig, H(t, 0) = γ(t), H(t, 1) = γ(ϕ(t)), H(0, s) = γ(0) und H(1, s) =
+γ(1 − s + s) = γ(1)
 ⇒ H ist Homotopie. 
-47 3.1. HOMOTOPIE VON WEGEN
-a
-b
-γ2 γ1
-(a) Kreis mit zwei Wegen
-a
-b
-(b) Torus mit drei Wegen
-Abbildung 3.3: Beispiele für (nicht)-Homotopie von Wegen
-Definition 43
-Seien γ1, γ2 Wege in X mit γ1(1) = γ2(0). Dann ist
-γ(t) = (
-γ1(2t) falls 0 ≤ t < 1
-2
-γ2(2t − 1) falls 1
-2 ≤ t ≤ 1
-ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ = γ1 ∗ γ2.
-Bemerkung 43
-Das Zusammensetzen von Wegen ist nur bis auf Homotopie assoziativ, d. h.:
-γ1 ∗ (γ2 ∗ γ3) 6= (γ1 ∗ γ2) ∗ γ3
-γ1 ∗ (γ2 ∗ γ3) ∼ (γ1 ∗ γ2) ∗ γ3
-mit γ1(1) = γ2(0) und γ2(1) = γ3(0).
-γ1 γ2 γ3
-0 1/2 3/4 1
-(a) γ1 ∗ (γ2 ∗ γ3)
-γ1 γ2 γ3
-0 1/4 1/2 1
-(b) (γ1 ∗ γ2) ∗ γ3
-Abbildung 3.4: Das Zusammensetzen von Wegen ist nicht assoziativ
-Beweis: Das Zusammensetzen von Wegen ist wegen Bemerkung 42 bis auf Homotopie assoziativ.
-Verwende dazu
-ϕ(t) =
-
-
-
-1
-2
-t falls 0 ≤ t < 1
-2
-t −
-1
-4
-falls 1
-2 ≤ t < 3
-4
-2t − 1 falls 3
-4 ≤ t ≤ 1
-Bemerkung 44
-Sei X ein topologischer Raum, a, b, c ∈ X, γ1, γ0
-1 Wege von a nach b und γ2, γ0
-2 Wege von b
-nach c.
-Sind γ1 ∼ γ
-0
-1 und γ2 ∼ γ
-0
-2
-, so ist γ1 ∗ γ2 ∼ γ
-0
-1
-∗ γ
-0
-2
+ 3.1. HOMOTOPIE VON WEGEN
+a
+b
+γ2 γ1
+(a) Kreis mit zwei Wegen
+a
+b
+(b) Torus mit drei Wegen
+Abbildung 3.3: Beispiele für (nicht)-Homotopie von Wegen
+Definition 43
+Seien γ1, γ2 Wege in X mit γ1(1) = γ2(0). Dann ist
+γ(t) = (
+γ1(2t) falls 0 ≤ t < 1
+2
+γ2(2t − 1) falls 1
+2 ≤ t ≤ 1
+ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ = γ1 ∗ γ2.
+Bemerkung 43
+Das Zusammensetzen von Wegen ist nur bis auf Homotopie assoziativ, d. h.:
+γ1 ∗ (γ2 ∗ γ3) 6= (γ1 ∗ γ2) ∗ γ3
+γ1 ∗ (γ2 ∗ γ3) ∼ (γ1 ∗ γ2) ∗ γ3
+mit γ1(1) = γ2(0) und γ2(1) = γ3(0).
+γ1 γ2 γ3
+0 1/2 3/4 1
+(a) γ1 ∗ (γ2 ∗ γ3)
+γ1 γ2 γ3
+0 1/4 1/2 1
+(b) (γ1 ∗ γ2) ∗ γ3
+Abbildung 3.4: Das Zusammensetzen von Wegen ist nicht assoziativ
+Beweis: Das Zusammensetzen von Wegen ist wegen Bemerkung 42 bis auf Homotopie assoziativ.
+Verwende dazu
+ϕ(t) =
+
+
+
+1
+2
+t falls 0 ≤ t < 1
+2
+t −
+1
+4
+falls 1
+2 ≤ t < 34
+2t − 1 falls 3
+4 ≤ t ≤ 1
+Bemerkung 44
+Sei X ein topologischer Raum, a, b, c ∈ X, γ1, γ0
+1 Wege von a nach b und γ2, γ02 Wege von b
+nach c.
+Sind γ1 ∼ γ
+0
+1 und γ2 ∼ γ
+0
+2
+, so ist γ1 ∗ γ2 ∼ γ
+0
+1
+∗ γ
+0
+2
+.
+ 3.2. FUNDAMENTALGRUPPE
+γ γ1
+0
+1
+a
+b
+c
+γ
+0
+2
+γ2
+Abbildung 3.5: Situation aus Bemerkung 44
+.
+Beweis: Sei Hi eine Homotopie zwischen γi und γ
+0
+i
+, i = 1, 2.
+Dann ist
+H(t, s) := (
+H1(2t, s) falls 0 ≤ t ≤
+1
+2
+∀s ∈ I
+H2(2t − 1, s) falls 1
+2 ≤ t ≤ 1
+eine Homotopie zwischen γ1 ∗ γ2 und γ
+0
+1
+∗ γ
+0
+2
 .
-48 3.2. FUNDAMENTALGRUPPE
-γ γ1
-0
-1
-a
-b
-c
-γ
-0
-2
-γ2
-Abbildung 3.5: Situation aus Bemerkung 44
-.
-Beweis: Sei Hi eine Homotopie zwischen γi und γ
-0
-i
-, i = 1, 2.
-Dann ist
-H(t, s) := (
-H1(2t, s) falls 0 ≤ t ≤
-1
-2
-∀s ∈ I
-H2(2t − 1, s) falls 1
-2 ≤ t ≤ 1
-eine Homotopie zwischen γ1 ∗ γ2 und γ
-0
-1
-∗ γ
-0
-2
-.
-Eine spezielle Homotopieäquivalenz sind sog. Deformationsretraktionen:
-Definition 44
-Sei X ein topologischer Raum, A ⊆ X, r : X → A eine stetige Abbildung und ι = (idX)|A.
-a) ι : A → X mit ι(x) = x heißt die Inklusionsabbildung und man schreibt: ι : A ,→ X.
-b) r heißt Retraktion, wenn r|A = idA ist.
-c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι ◦ r ∼ idX gibt.
-Beispiel 31 (Zylinder auf Kreis)
-Sei X = S
-1 × R ein topologischer Raum und
-r : S
-1 × R → S
-1 × { 0 } ∼= S
-1
-mit
-r(x, y) := (x, 0)
-eine Abbildung. r ist eine Retraktion, da r|S1 ∼= idS1
-.
-ι ◦ r : S
-1 × R → S
-1 × R
-(x, y) 7→ (x, 0)
-H : (S
-1 × R) × I → S
-1 × R
-(x, y, t) 7→ (x, ty)
-3.2 Fundamentalgruppe
-Für einen Weg γ sei [γ] seine Homotopieklasse.
-Definition 45
-Sei X ein topologischer Raum und x ∈ X. Sei außerdem
+Eine spezielle Homotopieäquivalenz sind sog. Deformationsretraktionen:
+Definition 44
+Sei X ein topologischer Raum, A ⊆ X, r : X → A eine stetige Abbildung und ι = (idX)|A.
+a) ι : A → X mit ι(x) = x heißt die Inklusionsabbildung und man schreibt: ι : A ,→ X.
+b) r heißt Retraktion, wenn r|A = idA ist.
+c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι ◦ r ∼ idX gibt.
+Beispiel 31 (Zylinder auf Kreis)
+Sei X = S
+1 × R ein topologischer Raum und
+r : S
+1 × R → S1 × { 0 } ∼= S1
+mit
+r(x, y) := (x, 0)
+eine Abbildung. r ist eine Retraktion, da r|S1 ∼= idS1.
+ι ◦ r : S
+1 × R → S1 × R
+(x, y) 7→ (x, 0)
+H : (S
+1 × R) × I → S1 × R
+(x, y, t) 7→ (x, ty)
+3.2 Fundamentalgruppe
+Für einen Weg γ sei [γ] seine Homotopieklasse.
+Definition 45
+Sei X ein topologischer Raum und x ∈ X. Sei außerdem
 π1(X, x) := { [γ] | γ ist Weg in X mit γ(0) = γ(1) = x }
-49 3.2. FUNDAMENTALGRUPPE
-Durch [γ1] ∗G [γ2] := [γ1 ∗ γ2] wird π1(X, x) zu einer Gruppe. Diese Gruppe heißt Funda￾mentalgruppe von X im Basispunkt x.
-Bemerkung 45
-Im R
-2 gibt es nur eine Homotopieklasse.
-Beweis: (Fundamentalgruppe ist eine Gruppe)
-a) Abgeschlossenheit folgt direkt aus der Definition von ∗G
-b) Assoziativität folgt aus Bemerkung 43
-c) Neutrales Element e = [γ0], γ0(t) = x ∀t ∈ I. e ∗ [γ] = [γ] = [γ] ∗ e, da γ0 ∗ γ ∼ γ
-d) Inverses Element [γ]
-−1 = [γ] = [γ(1 − t)], denn γ ∗ γ ∼ γ0 ∼ γ ∗ γ
-Beispiel 32
-1) S
-1 = { z ∈ C | |z| = 1 } =
-
-(cos ϕ,sin ϕ) ∈ R
-2
-
- 0 ≤ ϕ ≤ 2π
-	
-π1(S
-1
-, 1) = 
-[γ
-k
-]
-
- k ∈ Z
-	 ∼= Z. Dabei ist γ(t) = e
-2πit = cos(2πt) + i sin(2πt) und
-γ
-k
-:= γ ∗ · · · ∗ γ
-| {z }
-k mal
-[γ
-k
-] 7→ k ist ein Isomorphismus.
-2) π1(R
-2
-, 0) = π1(R
-2
-, x) = { e } für jedes x ∈ R
-2
-3) π1(R
-n
-, x) = { e } für jedes x ∈ R
-n
-4) G ⊆ R
-n heißt sternförmig bzgl. x ∈ G, wenn für jedes y ∈ G auch die Strecke
-[x, y] ⊆ G ist.
-Für jedes sternförmige G ⊆ R
-n
-ist π1(G, x) = { e }
-x
-Abbildung 3.6: Sternförmiges Gebiet
-.
-5) π1(S
-2
-, x0) = { e }, da im R
-2 alle Wege homotop zu { e } sind. Mithilfe der stereogra￾phischen Projektion kann von S
-2 auf den R
-2 abgebildet werden.
-Dieses Argument funktioniert nicht mehr bei flächenfüllenden Wegen, d. h. wenn
-γ : I → S
-2
-surjektiv ist.
-Bemerkung 46
-Sei X ein topologischer Raum, a, b ∈ X, δ : I → X ein Weg von a nach b.
-Dann ist die Abbildung
-α : π1(X, a) → π1(X, b) [γ] 7→ [δ ∗ γ ∗ δ]
+ 3.2. FUNDAMENTALGRUPPE
+Durch [γ1] ∗G [γ2] := [γ1 ∗ γ2] wird π1(X, x) zu einer Gruppe. Diese Gruppe heißt Fundamentalgruppe von X im Basispunkt x.
+Bemerkung 45
+Im R
+2 gibt es nur eine Homotopieklasse.
+Beweis: (Fundamentalgruppe ist eine Gruppe)
+a) Abgeschlossenheit folgt direkt aus der Definition von ∗G
+b) Assoziativität folgt aus Bemerkung 43
+c) Neutrales Element e = [γ0], γ0(t) = x ∀t ∈ I. e ∗ [γ] = [γ] = [γ] ∗ e, da γ0 ∗ γ ∼ γ
+d) Inverses Element [γ]
+−1 = [γ] = [γ(1 − t)], denn γ ∗ γ ∼ γ0 ∼ γ ∗ γ
+Beispiel 32
+1) S
+1 = { z ∈ C | |z| = 1 } =
+
+(cos ϕ,sin ϕ) ∈ R
+2
+
+ 0 ≤ ϕ ≤ 2π
+
+π1(S
+1
+, 1) = [γ
+k
+]
+
+ k ∈ Z
+	 ∼= Z. Dabei ist γ(t) = e
+2πit = cos(2πt) + i sin(2πt) und
+γ
+k
+:= γ ∗ · · · ∗ γ
+| {z }
+k mal
+[γ
+k
+] 7→ k ist ein Isomorphismus.
+2) π1(R
+2
+, 0) = π1(R
+2
+, x) = { e } für jedes x ∈ R
+2
+3) π1(R
+n
+, x) = { e } für jedes x ∈ R
+n
+4) G ⊆ R
+n heißt sternförmig bzgl. x ∈ G, wenn für jedes y ∈ G auch die Strecke
+[x, y] ⊆ G ist.
+Für jedes sternförmige G ⊆ R
+n
+ist π1(G, x) = { e }
+x
+Abbildung 3.6: Sternförmiges Gebiet
+.
+5) π1(S
+2
+, x0) = { e }, da im R
+2 alle Wege homotop zu { e } sind. Mithilfe der stereographischen Projektion kann von S
+2 auf den R2 abgebildet werden.
+Dieses Argument funktioniert nicht mehr bei flächenfüllenden Wegen, d. h. wenn
+γ : I → S
+2
+surjektiv ist.
+Bemerkung 46
+Sei X ein topologischer Raum, a, b ∈ X, δ : I → X ein Weg von a nach b.
+Dann ist die Abbildung
+α : π1(X, a) → π1(X, b) [γ] 7→ [δ ∗ γ ∗ δ]
 ein Gruppenisomorphismus.
-50 3.2. FUNDAMENTALGRUPPE
-a b
-γ
-δ
-Abbildung 3.7: Situation aus Bemerkung 46
-.
-Beweis:
-α([γ1] ∗ [γ2]) = [δ ∗ (γ1 ∗ γ2) ∗ δ]
-= [δ ∗ γ1 ∗ δ ∗ δ ∗ γ2 ∗ δ]
-= [δ ∗ γ1 ∗ δ] ∗ [δ ∗ γ2 ∗ δ]
-= α([γ1]) ∗ α([γ2])
-Definition 46
-Ein wegzusammenhängender topologischer Raum X heißt einfach zusammenhängend,
-wenn π1(X, x) = { e } für ein x ∈ X.
-Wenn π1(X, x) = { e } für ein x ∈ X gilt, dann wegen Bemerkung 46 sogar für alle x ∈ X.
-Bemerkung 47
-Es seien X, Y topologische Räume, f : X → Y eine stetige Abbildung, x ∈ X, y := f(x) ∈ Y .
-a) Dann ist die Abbildung f∗ : π1(X, x) → π1(Y, y), [γ] → [f ◦ γ] ein Gruppenhomomor￾phismus.
-b) Ist Z ein weiterer topologischer Raum und g : Y → Z eine stetige Abbildung z := g(y).
-Dann ist (g ◦ f)∗ = g∗ ◦ f∗ : π1(X, x) → π1(Z, z)
-Beweis:
-a) f∗ ist wohldefiniert: Seien γ1, γ2 homotope Wege von x. z.Z.: f ◦ γ1 ∼ f ◦ γ2: Nach
-Voraussetzung gibt es stetige Abbildungen H : I × I → X mit
-H(t, 0) = γ1(t),
-H(t, 1) = γ2(t),
-H(0, s) = H(1, s) = x.
-Dann ist f ◦H : I ×I → Y stetig mit (f ◦H)(t, 0) = f(H(t, 0)) = f(γ1(t)) = (f ◦γ1)(t)
-etc. ⇒ f ◦ γ1 ∼ f ◦ γ2.
-f∗([γ1] ∗ [γ2]) = [f ◦ (γ1 ∗ γ2)] = [(f ◦ γ1)] ∗ [(f ◦ γ2)] = f∗([γ1]) ∗ f∗([γ2])
-b) (g ◦ f)∗([γ]) = [(g ◦ f) ◦ γ] = [g ◦ (f ◦ γ)] = g∗([f ◦ γ]) = g∗(f∗([γ])) = (g∗ ◦ f∗)([γ])
-Beispiel 33
-1) f : S
-1
-,→ R
-2
-ist injektiv, aber f∗ : π1(S
-1
-, 1) ∼= Z → π1(R
-2
-, 1) = { e } ist nicht injektiv.
-2) f : R → S
-1
-, t 7→ (cos 2πt,sin 2πt) ist surjektiv, aber f∗ : π1(R, 0) = { e } → π1(S
-1
-, 1) ∼=
+ 3.2. FUNDAMENTALGRUPPE
+a b
+γ
+δ
+Abbildung 3.7: Situation aus Bemerkung 46
+.
+Beweis:
+α([γ1] ∗ [γ2]) = [δ ∗ (γ1 ∗ γ2) ∗ δ]
+= [δ ∗ γ1 ∗ δ ∗ δ ∗ γ2 ∗ δ]
+= [δ ∗ γ1 ∗ δ] ∗ [δ ∗ γ2 ∗ δ]
+= α([γ1]) ∗ α([γ2])
+Definition 46
+Ein wegzusammenhängender topologischer Raum X heißt einfach zusammenhängend,
+wenn π1(X, x) = { e } für ein x ∈ X.
+Wenn π1(X, x) = { e } für ein x ∈ X gilt, dann wegen Bemerkung 46 sogar für alle x ∈ X.
+Bemerkung 47
+Es seien X, Y topologische Räume, f : X → Y eine stetige Abbildung, x ∈ X, y := f(x) ∈ Y .
+a) Dann ist die Abbildung f∗ : π1(X, x) → π1(Y, y), [γ] → [f ◦ γ] ein Gruppenhomomorphismus.
+b) Ist Z ein weiterer topologischer Raum und g : Y → Z eine stetige Abbildung z := g(y).
+Dann ist (g ◦ f)∗ = g∗ ◦ f∗ : π1(X, x) → π1(Z, z)
+Beweis:
+a) f∗ ist wohldefiniert: Seien γ1, γ2 homotope Wege von x. z.Z.: f ◦ γ1 ∼ f ◦ γ2: Nach
+Voraussetzung gibt es stetige Abbildungen H : I × I → X mit
+H(t, 0) = γ1(t),
+H(t, 1) = γ2(t),
+H(0, s) = H(1, s) = x.
+Dann ist f ◦H : I ×I → Y stetig mit (f ◦H)(t, 0) = f(H(t, 0)) = f(γ1(t)) = (f ◦γ1)(t)
+etc. ⇒ f ◦ γ1 ∼ f ◦ γ2.
+f∗([γ1] ∗ [γ2]) = [f ◦ (γ1 ∗ γ2)] = [(f ◦ γ1)] ∗ [(f ◦ γ2)] = f∗([γ1]) ∗ f∗([γ2])
+b) (g ◦ f)∗([γ]) = [(g ◦ f) ◦ γ] = [g ◦ (f ◦ γ)] = g∗([f ◦ γ]) = g∗(f∗([γ])) = (g∗ ◦ f∗)([γ])
+Beispiel 33
+1) f : S
+1
+,→ R
+2
+ist injektiv, aber f∗ : π1(S
+1
+, 1) ∼= Z → π1(R
+2
+, 1) = { e } ist nicht injektiv.
+2) f : R → S
+1
+, t 7→ (cos 2πt,sin 2πt) ist surjektiv, aber f∗ : π1(R, 0) = { e } → π1(S
+1
+, 1) ∼=
 Z ist nicht surjektiv.
-51 3.2. FUNDAMENTALGRUPPE
-Bemerkung 48
-Sei f : X → Y ein Homöomorphismus zwischen topologischen Räumen X, Y . Dann gilt:
-f∗ : π1(X, x) → π1(Y, f(x))
-ist ein Isomorphismus für jedes x ∈ X.
-Beweis: Sei g : Y → X die Umkehrabbildung, d. h. g ist stetig und f ◦ g = idY , g ◦ f = idX
-⇒ f∗ ◦ g∗ = (f ◦ g)∗ = (idY )∗ = idπ1(Y,f(X) und g∗ ◦ f∗ = idπ1(X,x)
-.
-Definition 47
-Seien X, Y topologische Räume, x0 ∈ X, y0 ∈ Y, f, g : X → Y stetig mit f(x0) = y0 = g(x0).
-f und g heißen homotop (f ∼ g), wenn es eine stetige Abbildung H : X × I → Y mit
-H(x, 0) = f(x) ∀x ∈ X
-H(x, 1) = g(x) ∀x ∈ X
-H(x0, s) = y0 ∀s ∈ I
-gibt.
-Bemerkung 49
-Sind f und g homotop, so ist f∗ = g∗ : π1(X, x0) → π1(Y, y0).
-Beweis: Sei γ ein geschlossener Weg in X um x0, d. h. [γ] ∈ π1(X, x0).
-Z. z.: f ◦ γ ∼ g ◦ γ
-Sei dazu Hγ : I × I → Y,(t, s) 7→ H(γ(t), s). Dann gilt:
-Hγ(t, 0) = H(γ(t), 0) = (f ◦ γ)(t) ∀t ∈ I
-Hγ(1, s) = H(γ(1), s) = H(x0, s) = y0 ∀s ∈ I
-Hγ(t, 1) = H(γ(t), 1) = g(γ(t)) ∀t ∈ I
-Beispiel 34
-f : X → Y, g : Y → X mit g ◦ f ∼ idX, f ◦ g ∼ idY
-⇒ f∗ ist Isomorphismus. Konkret: f : R
-2 → { 0 } , g : { 0 } → R
-2
-⇒ f ◦ g = id{ 0 }
-, g ◦ f : R
-2 → R
-2
-, x 7→ 0 für alle x.
-g ◦ f ∼ idR2 mit Homotopie: H : R
-2 × I → R
-2
-, H(x, s) = (1 − s)x (stetig!)
-⇒ H(x, 0) = x = idR2 (x), H(x, 1) = 0, H(0, s) = 0 ∀s ∈ I.
-Satz 3.1 (Satz von Seifert und van Kampen „light“)
-Sei X ein topologischer Raum, U, V ⊆ X offen mit U ∪ V = X und U ∩ V wegzusam￾menhängend.
-Dann wird π1(X, x) für x ∈ U ∩ V erzeugt von geschlossenen Wegen um x, die ganz in
+ 3.2. FUNDAMENTALGRUPPE
+Bemerkung 48
+Sei f : X → Y ein Homöomorphismus zwischen topologischen Räumen X, Y . Dann gilt:
+f∗ : π1(X, x) → π1(Y, f(x))
+ist ein Isomorphismus für jedes x ∈ X.
+Beweis: Sei g : Y → X die Umkehrabbildung, d. h. g ist stetig und f ◦ g = idY , g ◦ f = idX
+⇒ f∗ ◦ g∗ = (f ◦ g)∗ = (idY )∗ = idπ1(Y,f(X) und g∗ ◦ f∗ = idπ1(X,x).
+Definition 47
+Seien X, Y topologische Räume, x0 ∈ X, y0 ∈ Y, f, g : X → Y stetig mit f(x0) = y0 = g(x0).
+f und g heißen homotop (f ∼ g), wenn es eine stetige Abbildung H : X × I → Y mit
+H(x, 0) = f(x) ∀x ∈ X
+H(x, 1) = g(x) ∀x ∈ X
+H(x0, s) = y0 ∀s ∈ I
+gibt.
+Bemerkung 49
+Sind f und g homotop, so ist f∗ = g∗ : π1(X, x0) → π1(Y, y0).
+Beweis: Sei γ ein geschlossener Weg in X um x0, d. h. [γ] ∈ π1(X, x0).
+Z. z.: f ◦ γ ∼ g ◦ γ
+Sei dazu Hγ : I × I → Y,(t, s) 7→ H(γ(t), s). Dann gilt:
+Hγ(t, 0) = H(γ(t), 0) = (f ◦ γ)(t) ∀t ∈ I
+Hγ(1, s) = H(γ(1), s) = H(x0, s) = y0 ∀s ∈ I
+Hγ(t, 1) = H(γ(t), 1) = g(γ(t)) ∀t ∈ I
+Beispiel 34
+f : X → Y, g : Y → X mit g ◦ f ∼ idX, f ◦ g ∼ idY
+⇒ f∗ ist Isomorphismus. Konkret: f : R
+2 → { 0 } , g : { 0 } → R2
+⇒ f ◦ g = id{ 0 }, g ◦ f : R
+2 → R2
+, x 7→ 0 für alle x.
+g ◦ f ∼ idR2 mit Homotopie: H : R
+2 × I → R2
+, H(x, s) = (1 − s)x (stetig!)
+⇒ H(x, 0) = x = idR2 (x), H(x, 1) = 0, H(0, s) = 0 ∀s ∈ I.
+Satz 3.1 (Satz von Seifert und van Kampen „light“)
+Sei X ein topologischer Raum, U, V ⊆ X offen mit U ∪ V = X und U ∩ V wegzusammenhängend.
+Dann wird π1(X, x) für x ∈ U ∩ V erzeugt von geschlossenen Wegen um x, die ganz in
 U oder ganz in V verlaufen.
-52 3.3. ÜBERLAGERUNGEN
-Beweis: Sei γ : I → X ein geschlossener Weg um x. Überdecke I mit endlich vielen offenen
-Intervallen I1, I2, . . . , In, die ganz in γ
-−1
-(U) oder ganz in γ
-−1
-(V ) liegen.
-O. B. d. A. sei γ(I1) ⊆ U, γ(I2) ⊆ V , etc.
-Wähle ti ∈ Ii ∩ Ii+1, also γ(ti) ∈ U ∩ V . Sei σi Weg in U ∩ V von x0 nach γ(ti) ⇒ γ ist
-homotop zu
-γ1 ∗ σ1
-| {z }
-in U
-∗ σ1 ∗ γ2 ∗ σ2
-| {z }
-in V
-∗ · · · ∗ σn−1 ∗ γ2 mit γi
-:= γ|Ii
-a b
-x
-Abbildung 3.8: Topologischer Raum X
-Beispiel 35 (Satz von Seifert und van Kampen)
-1) Sei X wie in Abbildung 3.8. π1(X, x) wird „frei“ erzeugt von a und b, weil π1(U, x) =
-hai ∼= Z, π1(V, x) = hbi ∼= Z, insbesondere ist a ∗ b nicht homotop zu b ∗ a.
-2) Torus: π1(T
-2
-, X) wird erzeugt von a und b.
-V
-U
-a
-b
-V
-a b
-Abbildung 3.9: a ∗ b = b ∗ a ⇔ a ∗ b ∗ a ∗ b ∼ e
-3.3 Überlagerungen
-Definition 48
-Es seien X, Y zusammenhängende topologische Räume und p : Y → X eine stetige Abbil￾dung.
-p heißt Überlagerung, wenn jedes x ∈ X eine offene Umgebung U = U(x) ⊆ X besitzt,
-sodass p
-−1
-(U) disjunkte Vereinigung von offenen Teilmengen Vj ⊆ Y ist (j ∈ I) und
-p|Vj
-: Vj → U ein Homöomorphismus ist.
-|I| heißt Grad der Überlagerung p und man schreibt:
+ 3.3. ÜBERLAGERUNGEN
+Beweis: Sei γ : I → X ein geschlossener Weg um x. Überdecke I mit endlich vielen offenen
+Intervallen I1, I2, . . . , In, die ganz in γ
+−1
+(U) oder ganz in γ
+−1
+(V ) liegen.
+O. B. d. A. sei γ(I1) ⊆ U, γ(I2) ⊆ V , etc.
+Wähle ti ∈ Ii ∩ Ii+1, also γ(ti) ∈ U ∩ V . Sei σi Weg in U ∩ V von x0 nach γ(ti) ⇒ γ ist
+homotop zu
+γ1 ∗ σ1
+| {z }
+in U
+∗ σ1 ∗ γ2 ∗ σ2
+| {z }
+in V
+∗ · · · ∗ σn−1 ∗ γ2 mit γi:= γ|Ii
+a b
+x
+Abbildung 3.8: Topologischer Raum X
+Beispiel 35 (Satz von Seifert und van Kampen)
+1) Sei X wie in Abbildung 3.8. π1(X, x) wird „frei“ erzeugt von a und b, weil π1(U, x) =
+hai ∼= Z, π1(V, x) = hbi ∼= Z, insbesondere ist a ∗ b nicht homotop zu b ∗ a.
+2) Torus: π1(T
+2
+, X) wird erzeugt von a und b.
+V
+U
+a
+b
+V
+a b
+Abbildung 3.9: a ∗ b = b ∗ a ⇔ a ∗ b ∗ a ∗ b ∼ e
+3.3 Überlagerungen
+Definition 48
+Es seien X, Y zusammenhängende topologische Räume und p : Y → X eine stetige Abbildung.
+p heißt Überlagerung, wenn jedes x ∈ X eine offene Umgebung U = U(x) ⊆ X besitzt,
+sodass p
+−1
+(U) disjunkte Vereinigung von offenen Teilmengen Vj ⊆ Y ist (j ∈ I) und
+p|Vj: Vj → U ein Homöomorphismus ist.
+|I| heißt Grad der Überlagerung p und man schreibt:
 deg p := |I|
-53 3.3. ÜBERLAGERUNGEN
-Abbildung 3.10: R → S
-1
-,
-t 7→ (cos 2πt,sin 2πt)
-Beispiel 36
-1) siehe Abbildung 3.10
-2) siehe Abbildung 3.11
-3) R
-n → T
-n = R
-n/Z
-n
-4) S
-n → Pn
-(R)
-5) S
-1 → S
-1
-, z 7→ z
-2
-, siehe Abbildung 3.12
-0 1 2 3 4 5 6
-0
-1
-2
-3
-4
-5
-6
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-*
-−−−→
-Abbildung 3.11: R
-2 → T
-2 = R
-2/Z
-2
-Bemerkung 50
-Überlagerungen sind surjektiv.
-Beweis: Sei p : Y → X eine Überlagerung und x ∈ X beliebig. Dann existiert eine offene
-Umgebung U(x) ⊆ X und offene Teilmengen Vj ⊆ X mit p
-−1
-(U) = ˙
-S
-Vj und p|Vj
-: Vj → U
-ist Homöomorphismus.
-D. h. es existiert ein y ∈ Vj , so dass p|Vj
-(y) = x. Da x ∈ X beliebig war und ein y ∈ Y
+ 3.3. ÜBERLAGERUNGEN
+Abbildung 3.10: R → S
+1
+,
+t 7→ (cos 2πt,sin 2πt)
+Beispiel 36
+1) siehe Abbildung 3.10
+2) siehe Abbildung 3.11
+3) R
+n → Tn = Rn/Zn
+4) S
+n → Pn
+(R)
+5) S
+1 → S1
+, z 7→ z
+2
+, siehe Abbildung 3.12
+0 1 2 3 4 5 6
+0
+1
+2
+3
+4
+5
+6
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+−−−→
+Abbildung 3.11: R
+2 → T2 = R2/Z2
+Bemerkung 50
+Überlagerungen sind surjektiv.
+Beweis: Sei p : Y → X eine Überlagerung und x ∈ X beliebig. Dann existiert eine offene
+Umgebung U(x) ⊆ X und offene Teilmengen Vj ⊆ X mit p
+−1
+(U) = ˙
+S
+Vj und p|Vj: Vj → U
+ist Homöomorphismus.
+D. h. es existiert ein y ∈ Vj , so dass p|Vj(y) = x. Da x ∈ X beliebig war und ein y ∈ Y
 existiert, mit p(y) = x, ist p surjektiv. 
-54 3.3. ÜBERLAGERUNGEN
-1
-i
-z
-z
-2
-ϕ
-ϕ
-z
-2
-Abbildung 3.12: t 7→ (cos 4πt,sin 4πt)
-Definition 49
-Seien (X, TX),(Y, TY ) topologische Räume und f : X → Y eine Abbildung.
-f heißt offen :⇔ ∀U ∈ TX : f(U) ∈ TY .
-Beispiel 37 (Offene und stetige Abbildungen)
-Sei X ein topologischer Raum und seien fi
-: R → R mit i ∈ { 1, 2, 3 } und g : R → S
-1 =
-{ z ∈ C | kzk = 1 } Abbildungen.
-1) f1 := idR ist eine offene und stetige Abbildung.
-2) g(x) := e
-2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
-3) f2(x) := 42 ist eine stetige, aber keine offene Abbildung.
-4) f3(x) := (
-0 falls x ∈ Q
-42 falls x ∈ R \ Q
-ist weder stetig noch offen.
-Bemerkung 51
-Überlagerungen sind offene Abbildungen.
-Beweis: Sei y ∈ V und x ∈ p(V ), sodass x = p(y) gilt. Sei weiter U = Ux eine offene Umgebung
-von x wie in Definition 48 und Vj die Komponente von p
-−1
-(U), die y enthält.
-Dann ist V ∩ Vj offene Umgebung von y.
-⇒ p(V ∩ Vj ) ist offen in p(Vj ), also auch offen in X. Außerdem ist p(y) = x ∈ p(V ∩ Vj ) und
-p(V ∩ Vj ) ⊆ p(V ).
-⇒ p(V ) ist offen.
-Definition 50
-Sei X ein topologischer Raum und M ⊆ X.
-M heißt diskret in X, wenn M in X keinen Häufungspunkt hat.
-Bemerkung 52
-Sei p : Y → X Überlagerung, x ∈ X.
-a) X hausdorffsch ⇒ Y hausdorffsch
-b) p
-−1
-(x) ist diskret in Y für jedes x ∈ X.
-Beweis:
-a) Seien y1, y2 ∈ Y .
+ 3.3. ÜBERLAGERUNGEN
+1
+i
+z
+z
+2
+ϕ
+ϕ
+z
+2
+Abbildung 3.12: t 7→ (cos 4πt,sin 4πt)
+Definition 49
+Seien (X, TX),(Y, TY ) topologische Räume und f : X → Y eine Abbildung.
+f heißt offen :⇔ ∀U ∈ TX : f(U) ∈ TY .
+Beispiel 37 (Offene und stetige Abbildungen)
+Sei X ein topologischer Raum und seien fi: R → R mit i ∈ { 1, 2, 3 } und g : R → S
+1 =
+{ z ∈ C | kzk = 1 } Abbildungen.
+1) f1 := idR ist eine offene und stetige Abbildung.
+2) g(x) := e
+2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
+3) f2(x) := 42 ist eine stetige, aber keine offene Abbildung.
+4) f3(x) := (
+0 falls x ∈ Q
+42 falls x ∈ R \ Q
+ist weder stetig noch offen.
+Bemerkung 51
+Überlagerungen sind offene Abbildungen.
+Beweis: Sei y ∈ V und x ∈ p(V ), sodass x = p(y) gilt. Sei weiter U = Ux eine offene Umgebung
+von x wie in Definition 48 und Vj die Komponente von p
+−1
+(U), die y enthält.
+Dann ist V ∩ Vj offene Umgebung von y.
+⇒ p(V ∩ Vj ) ist offen in p(Vj ), also auch offen in X. Außerdem ist p(y) = x ∈ p(V ∩ Vj ) und
+p(V ∩ Vj ) ⊆ p(V ).
+⇒ p(V ) ist offen.
+Definition 50
+Sei X ein topologischer Raum und M ⊆ X.
+M heißt diskret in X, wenn M in X keinen Häufungspunkt hat.
+Bemerkung 52
+Sei p : Y → X Überlagerung, x ∈ X.
+a) X hausdorffsch ⇒ Y hausdorffsch
+b) p
+−1
+(x) ist diskret in Y für jedes x ∈ X.
+Beweis:
+a) Seien y1, y2 ∈ Y .
 1. Fall: p(y1) = p(y2) = x.
-55 3.3. ÜBERLAGERUNGEN
-Sei U Umgebung von x wie in Definition 48, Vj1 bzw. Vj2 die Komponente von p
-−1
-(U),
-die y1 bzw. y2 enthält.
-Dann ist Vj1
-6= Vj2
-, weil beide ein Element aus p
-−1
-(x) enthalten.
-⇒ Vj1 ∩ Vj2 = ∅ nach Voraussetzung.
-2. Fall: p(y1) 6= p(y2).
-Dann seien U1 und U2 disjunkte Umgebungen von p(y1) und p(y2).
-⇒ p
-−1
-(U1) und p
-−1
-(U2) sind disjunkte Umgebungen von y1 und y2.
-b) Sei x ∈ X beliebig, aber fest.
-Zu zeigen: ∀yi ∈ p
-−1
-(x) : ∃Vi ∈ TY mit yi ∈ Vi
-, sodass gilt:i 6= j ⇒ Vi ∩ Vj = ∅.
-Die Vi existieren wegen der Definition einer Überlagerung: p heißt Überlagerung
-:⇔ ∀x ∈ X∃U = U(x) ∈ TX : p
-−1
-(U) = ˙
-S
-Vi∈TY
-Vi und p|Vi
-ist Homöomorphismus.
-⇒ (p|Vi
-)
-−1
-(x) = { yi }
-⇒ Alle yi
-liegen diskret in Y , da Häufungspunkte unendlich viele Elemente in jeder
-Umgebung benötigen. 
-Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
-Sei p : Y → X Überlagerung. Dann gilt:
-∀x1, x2 ∈ X : |p
-−1
-(x1)| = |p
-−1
-(x2)|
-Hinweis: |p
-−1
-(x1)| = ∞ ist erlaubt!
-Beweis: Sei U Umgebung von x1 wie in Definition 48, x ∈ U. Dann enthält jedes Vj mit j ∈ I
-genau ein Element von p
-−1
-(x).
-⇒ |p
-−1
-(x)| ist konstant für x ∈ U
-X zhgd.
-====⇒ |p
-−1
-(x)| ist konstant für x ∈ X.
-Definition 51
-Es seien X, Y, Z topologische Räume, p : Y → X eine Überlagerung und f : Z → X stetig.
-Eine stetige Abbildung ˜f : Z → Y heißt Liftung von f, wenn p ◦
-˜f = f ist.
-Y
-X
-Z
-p
-˜f
-f
-Bemerkung 54 (Eindeutigkeit der Liftung)
-Sei Z zusammenhängend und f0, f1 : Z → Y Liftungen von f.
-∃z0 ∈ Z : f0(z0) = f1(z0) ⇒ f0 = f1
-Beweis: Sei T = { z ∈ Z | f0(z) = f1(z) }.
+ 3.3. ÜBERLAGERUNGEN
+Sei U Umgebung von x wie in Definition 48, Vj1 bzw. Vj2 die Komponente von p
+−1
+(U),
+die y1 bzw. y2 enthält.
+Dann ist Vj16= Vj2, weil beide ein Element aus p
+−1
+(x) enthalten.
+⇒ Vj1 ∩ Vj2 = ∅ nach Voraussetzung.
+2. Fall: p(y1) 6= p(y2).
+Dann seien U1 und U2 disjunkte Umgebungen von p(y1) und p(y2).
+⇒ p
+−1
+(U1) und p
+−1
+(U2) sind disjunkte Umgebungen von y1 und y2.
+b) Sei x ∈ X beliebig, aber fest.
+Zu zeigen: ∀yi ∈ p
+−1
+(x) : ∃Vi ∈ TY mit yi ∈ Vi, sodass gilt:i 6= j ⇒ Vi ∩ Vj = ∅.
+Die Vi existieren wegen der Definition einer Überlagerung: p heißt Überlagerung
+:⇔ ∀x ∈ X∃U = U(x) ∈ TX : p
+−1
+(U) = ˙
+S
+Vi∈TY
+Vi und p|Viist Homöomorphismus.
+⇒ (p|Vi)
+−1
+(x) = { yi }
+⇒ Alle yiliegen diskret in Y , da Häufungspunkte unendlich viele Elemente in jeder
+Umgebung benötigen. 
+Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
+Sei p : Y → X Überlagerung. Dann gilt:
+∀x1, x2 ∈ X : |p
+−1
+(x1)| = |p
+−1
+(x2)|
+Hinweis: |p
+−1
+(x1)| = ∞ ist erlaubt!
+Beweis: Sei U Umgebung von x1 wie in Definition 48, x ∈ U. Dann enthält jedes Vj mit j ∈ I
+genau ein Element von p
+−1
+(x).
+⇒ |p
+−1
+(x)| ist konstant für x ∈ U
+X zhgd.
+====⇒ |p
+−1
+(x)| ist konstant für x ∈ X.
+Definition 51
+Es seien X, Y, Z topologische Räume, p : Y → X eine Überlagerung und f : Z → X stetig.
+Eine stetige Abbildung ˜f : Z → Y heißt Liftung von f, wenn p ◦
+˜f = f ist.
+Y
+X
+Z
+p
+˜f
+f
+Bemerkung 54 (Eindeutigkeit der Liftung)
+Sei Z zusammenhängend und f0, f1 : Z → Y Liftungen von f.
+∃z0 ∈ Z : f0(z0) = f1(z0) ⇒ f0 = f1
+Beweis: Sei T = { z ∈ Z | f0(z) = f1(z) }.
 Z. z.: T ist offen und Z \ T ist auch offen.
-56 3.3. ÜBERLAGERUNGEN
-0 1 2 3 4 5 6
-0
-1
-2
-3
-4
-5
-6
-T
-Liften
-−−−→ R
-2/Z
-2
-Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht ge￾schlossen
-Sei z ∈ T, x = f(z), U Umgebung von x wie in Definition 48, V die Komponente von p
-−1
-(U),
-die y := f0(z) = f1(z) enthält.
-Sei q : U → V die Umkehrabbildung zu p|V .
-Sei W := f
-−1
-(U) ∩ f
-−1
-0
-(V ) ∩ f
-−1
-1
-(V ). W ist offene Umgebung in Z von z.
-Behauptung: W ⊆ T
-Denn für w ∈ W ist q(f(w)) = q((p ◦ f0))(w) = ((q ◦ p) ◦ f0)(w) = f0(w) = q(f(w)) = f1(w)
-⇒ T ist offen.
-Analog: Z \ T ist offen.
-Satz 3.2
-Sei p : Y → X Überlagerung, γ : I → X ein Weg, y ∈ Y mit p(y) = γ(0) =: x.
-Dann gibt es genau einen Weg γ˜ : I → Y mit γ˜(0) = y und p ◦ γ˜ = γ.
-p : Y → X Überlagerung, X, Y wegzusammenhängend. p stetig und surjektiv, zu x ∈ X∃
-Umgebung U, so dass p
-−1
-(U) = S
-Vj
-p|Vj
-: Vj → U Homöomorphismus.
-Bemerkung 55
-Wege in X lassen sich zu Wegen in Y liften.
-Zu jedem y ∈ p
-−1
+ 3.3. ÜBERLAGERUNGEN
+0 1 2 3 4 5 6
+0
+1
+2
+3
+4
+5
+6
+T
+Liften
+−−−→ R
+2/Z2
+Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht geschlossen
+Sei z ∈ T, x = f(z), U Umgebung von x wie in Definition 48, V die Komponente von p
+−1
+(U),
+die y := f0(z) = f1(z) enthält.
+Sei q : U → V die Umkehrabbildung zu p|V .
+Sei W := f
+−1
+(U) ∩ f
+−1
+0
+(V ) ∩ f
+−1
+1
+(V ). W ist offene Umgebung in Z von z.
+Behauptung: W ⊆ T
+Denn für w ∈ W ist q(f(w)) = q((p ◦ f0))(w) = ((q ◦ p) ◦ f0)(w) = f0(w) = q(f(w)) = f1(w)
+⇒ T ist offen.
+Analog: Z \ T ist offen.
+Satz 3.2
+Sei p : Y → X Überlagerung, γ : I → X ein Weg, y ∈ Y mit p(y) = γ(0) =: x.
+Dann gibt es genau einen Weg γ˜ : I → Y mit γ˜(0) = y und p ◦ γ˜ = γ.
+p : Y → X Überlagerung, X, Y wegzusammenhängend. p stetig und surjektiv, zu x ∈ X∃
+Umgebung U, so dass p
+−1
+(U) = SVj
+p|Vj: Vj → U Homöomorphismus.
+Bemerkung 55
+Wege in X lassen sich zu Wegen in Y liften.
+Zu jedem y ∈ p
+−1
 (γ(0)) gibt es genau einen Lift von γ.
-57 3.3. ÜBERLAGERUNGEN
-Proposition 3.3
-Seien p : Y → X eine Überlagerung, a, b ∈ X, γ0, γ1 : I → X homotope Wege von a
-nach b, a˜ ∈ p
-−1
-(a), γ˜0, γ˜1 Liftungen von γ0 bzw. γ1 mit γ˜i(0) = ˜a.
-Dann ist γ˜0(1) = ˜γ1(1) und γ˜0 ∼ γ˜1.
-Beweis: Sei H : I × I → X Homotopie zwischen γ1 und γ2.
-Für s ∈ I sei γs : I → X, t 7→ H(t, s).
-Sei γ˜s Lift von γs mit γ˜s(0) = ˜a
-Sei H˜ : I × I → Y, H˜ (t, s) := ( ˜γs(t), s)
-Dann gilt:
-(i) H˜ ist stetig (Beweis wie für Bemerkung 54)
-(ii) H˜ (t, 0) = ˜γ0(t), H˜ (t, 1) = ˜γ1(t)
-(iii) H˜ (0, s) = ˜γs(0) = ˜a
-(iv) H˜ (1, s) ∈ p
-−1
-(b)
-Da p
-−1
-(b) diskrete Teilmenge von Y ist
-⇒ ˜bs = H˜ (1, s) = H˜ (1, 0) ∀s ∈ I
-⇒ ˜b0 = ˜b1 und H˜ ist Homotopie zwischen γ˜0 und γ˜1. 
-Folgerung 3.4
-Sei p : Y → X eine Überlagerung, x0 ∈ X, y0 ∈ p
-−1
-(x0)
-a) p∗ : π1(Y, y0) → π1(X, x0) ist injektiv
-b) [π1(X, x0) : p∗(π1(Y, y0))] = deg(p)
-Beweis:
-a) Sei γ˜ ein Weg in Y um y0 und p∗([˜γ]) = e, also p ◦ γ˜ ∼ γx0
-Nach Proposition 3.3 ist dann γ˜ homotop zum Lift des konstanten Wegs γx0 mit
-Anfangspunkt y0, also zu γy0 ⇒ [˜γ] = e
-b) Sei d = deg p und p
-−1
-(x0) = { y0, y1, . . . , yd−1 }. Für einen geschlossenen Weg γ in X
-um x0 sei γ˜ die Liftung mit γ˜(0) = y0.
-γ˜(1) ∈ { y0, . . . , yd−1 } hängt nur von [γ] ∈ π1(X, x0) ab.
-Für geschlossene Wege γ0, γ1 um x gilt:
-γ˜0(1) = ˜γ1(1)
-⇔[ ˜γ0 ∗ γ˜1
-−1
-] ∈ π1(Y, y0)
-⇔[γ0 ∗ γ
-−1
-1
-] ∈ p∗(π1(Y, y0))
+ 3.3. ÜBERLAGERUNGEN
+Proposition 3.3
+Seien p : Y → X eine Überlagerung, a, b ∈ X, γ0, γ1 : I → X homotope Wege von a
+nach b, a˜ ∈ p
+−1
+(a), γ˜0, γ˜1 Liftungen von γ0 bzw. γ1 mit γ˜i(0) = ˜a.
+Dann ist γ˜0(1) = ˜γ1(1) und γ˜0 ∼ γ˜1.
+Beweis: Sei H : I × I → X Homotopie zwischen γ1 und γ2.
+Für s ∈ I sei γs : I → X, t 7→ H(t, s).
+Sei γ˜s Lift von γs mit γ˜s(0) = ˜a
+Sei H˜ : I × I → Y, H˜ (t, s) := ( ˜γs(t), s)
+Dann gilt:
+(i) H˜ ist stetig (Beweis wie für Bemerkung 54)
+(ii) H˜ (t, 0) = ˜γ0(t), H˜ (t, 1) = ˜γ1(t)
+(iii) H˜ (0, s) = ˜γs(0) = ˜a
+(iv) H˜ (1, s) ∈ p
+−1
+(b)
+Da p
+−1
+(b) diskrete Teilmenge von Y ist
+⇒ ˜bs = H˜ (1, s) = H˜ (1, 0) ∀s ∈ I
+⇒ ˜b0 = ˜b1 und H˜ ist Homotopie zwischen γ˜0 und γ˜1. 
+Folgerung 3.4
+Sei p : Y → X eine Überlagerung, x0 ∈ X, y0 ∈ p
+−1
+(x0)
+a) p∗ : π1(Y, y0) → π1(X, x0) ist injektiv
+b) [π1(X, x0) : p∗(π1(Y, y0))] = deg(p)
+Beweis:
+a) Sei γ˜ ein Weg in Y um y0 und p∗([˜γ]) = e, also p ◦ γ˜ ∼ γx0
+Nach Proposition 3.3 ist dann γ˜ homotop zum Lift des konstanten Wegs γx0 mit
+Anfangspunkt y0, also zu γy0 ⇒ [˜γ] = e
+b) Sei d = deg p und p
+−1
+(x0) = { y0, y1, . . . , yd−1 }. Für einen geschlossenen Weg γ in X
+um x0 sei γ˜ die Liftung mit γ˜(0) = y0.
+γ˜(1) ∈ { y0, . . . , yd−1 } hängt nur von [γ] ∈ π1(X, x0) ab.
+Für geschlossene Wege γ0, γ1 um x gilt:
+γ˜0(1) = ˜γ1(1)
+⇔[ ˜γ0 ∗ γ˜1
+−1
+] ∈ π1(Y, y0)
+⇔[γ0 ∗ γ
+−1
+1
+] ∈ p∗(π1(Y, y0))
 ⇔[γ0] und [γ1]liegen in der selben Nebenklasse bzgl. p∗(π1(Y, y0))
-58 3.3. ÜBERLAGERUNGEN
-Zu i ∈ { 0, . . . , d − 1 } gibt es Weg δi
-in Y mit δi(0) = y0 und δi(1) = yi
-⇒ p ∪ δi
-ist geschlossener Weg in X um x0.
-⇒ Jedes yi mit i = 0, . . . , d − 1 ist γ˜(1) für ein [γ] ∈ π1(X, x0).
-Bemerkung 56
-Sei p : Y → X Überlagerung und X einfach zusammenhängend.
-Dann ist p ein Homöomorphismus.
-Beweis: Wegen Bemerkung 55.a ist auch Y einfach zusammenhängend und wegen Bemer￾kung 55.b ist deg(p) = 1, p ist also bijektiv.
-Nach Bemerkung 51 ist p offen ⇒ p
-−1
-ist stetig. ⇒ p ist Homöomorphismus. 
-Definition 52
-Eine Überlagerung p : X˜ → X heißt universell, wenn X˜ einfach zusammenhängend ist.
-Beispiel 38 (Universelle Überlagerungen)
-R → S
-1
-, t 7→ (cos 2πt,sin 2πt)
-R
-2 → T
-2 = R
-2/Z
-2
-S
-n → Pn
-(R) für n ≥ 2
-Satz 3.5
-Sei p : X˜ → X eine universelle Überlagerung, q : Y → X weitere Überlagerung.
-Sei x0 ∈ X, x˜0 ∈ X, y ˜
-0 ∈ Y mit q(y0) = x0 = p( ˜x0).
-Dann gibt es genau eine Überlagerung p˜ : X˜ → Y mit p˜( ˜x0) = y0.
-Beweis: Sei z ∈ X, γ ˜
-z : I → X˜ ein Weg von x˜0 nach z.
-Sei δz die eindeutige Liftung von p ◦ γz nach Y mit δz(0) = y0.
-Setze p˜(z) = δz(1).
-Da X˜ einfach zusammenhängend ist, hängt p˜(z) nicht vom gewählten Weg γz ab.
-Offensichtlich ist q(˜p(z)) = p(z).
-Zu zeigen: p˜ ist stetig in z ∈ X˜:
-Sei W ⊆ Y offene Umgebung von p˜(z).
-q offen
-====⇒ q(W) ist offene Umgebung von p(z) · d(˜p(z)).
-Sei U ⊆ q(W) offen wie in Definition 48 und V ⊆ q
-−1
-(U) die Komponente, die p˜(z) enthält.
-O. B. d. A. sei V ⊆ W.
-Sei Z := p
-−1
-(U). Für u ∈ Z sei δ ein Weg in Z von z nach u.
-⇒ γz ∗ δ ist Weg von x0 nach u
-⇒ p˜(u) ∈ V
-⇒ Z ⊆ ˜p−1(W)
+ 3.3. ÜBERLAGERUNGEN
+Zu i ∈ { 0, . . . , d − 1 } gibt es Weg δiin Y mit δi(0) = y0 und δi(1) = yi
+⇒ p ∪ δiist geschlossener Weg in X um x0.
+⇒ Jedes yi mit i = 0, . . . , d − 1 ist γ˜(1) für ein [γ] ∈ π1(X, x0).
+Bemerkung 56
+Sei p : Y → X Überlagerung und X einfach zusammenhängend.
+Dann ist p ein Homöomorphismus.
+Beweis: Wegen Bemerkung 55.a ist auch Y einfach zusammenhängend und wegen Bemerkung 55.b ist deg(p) = 1, p ist also bijektiv.
+Nach Bemerkung 51 ist p offen ⇒ p
+−1
+ist stetig. ⇒ p ist Homöomorphismus. 
+Definition 52
+Eine Überlagerung p : X˜ → X heißt universell, wenn X˜ einfach zusammenhängend ist.
+Beispiel 38 (Universelle Überlagerungen)
+R → S
+1
+, t 7→ (cos 2πt,sin 2πt)
+R
+2 → T2 = R2/Z2
+S
+n → Pn
+(R) für n ≥ 2
+Satz 3.5
+Sei p : X˜ → X eine universelle Überlagerung, q : Y → X weitere Überlagerung.
+Sei x0 ∈ X, x˜0 ∈ X, y ˜
+0 ∈ Y mit q(y0) = x0 = p( ˜x0).
+Dann gibt es genau eine Überlagerung p˜ : X˜ → Y mit p˜( ˜x0) = y0.
+Beweis: Sei z ∈ X, γ ˜
+z : I → X˜ ein Weg von x˜0 nach z.
+Sei δz die eindeutige Liftung von p ◦ γz nach Y mit δz(0) = y0.
+Setze p˜(z) = δz(1).
+Da X˜ einfach zusammenhängend ist, hängt p˜(z) nicht vom gewählten Weg γz ab.
+Offensichtlich ist q(˜p(z)) = p(z).
+Zu zeigen: p˜ ist stetig in z ∈ X˜:
+Sei W ⊆ Y offene Umgebung von p˜(z).
+q offen
+====⇒ q(W) ist offene Umgebung von p(z) · d(˜p(z)).
+Sei U ⊆ q(W) offen wie in Definition 48 und V ⊆ q
+−1
+(U) die Komponente, die p˜(z) enthält.
+O. B. d. A. sei V ⊆ W.
+Sei Z := p
+−1
+(U). Für u ∈ Z sei δ ein Weg in Z von z nach u.
+⇒ γz ∗ δ ist Weg von x0 nach u
+⇒ p˜(u) ∈ V
+⇒ Z ⊆ ˜p−1(W)
 ⇒ p˜ ist stetig
-59 3.3. ÜBERLAGERUNGEN
-Folgerung 3.6
-Sind p : X˜ → X und q : Y˜ → X universelle Überlagerungen, so sind X˜ und Y˜ homöomorph.
-Beweis: Seien x0 ∈ X, x˜0 ∈ X˜ mit p( ˜x0) = x0 und y˜0 ∈ q
-−1
-(x0) ⊆ Y˜ .
-Nach Satz 3.5 gibt es genau eine Überlagerung
-f : X˜ → Y˜ mit f(x0) = ˜y0 und q ◦ f = p
-und genau eine Überlagerung
-g : Y˜ → X˜ mit g( ˜y0) = ˜x0 und p ◦ g = q
-Damit gilt: p ◦ q ◦ f = q ◦ f = p, q ◦ f ◦ g = p ◦ g = q. Also ist g ◦ f : X˜ → X˜ Lift von
-p : X˜ → X mit (g ◦ f)( ˜x0) = ˜x0.
-Da auch idx˜ diese Eigenschaft hat, folgt mit Bemerkung 53: g ◦ f = idX˜ .
-Analog gilt f ◦ g = idY˜ . 
-Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
-Definition 53
-Sei (X, T) ein topologischer Raum und x ∈ X.
-U ⊆ T heißt eine Umgebungsbasis von x, wenn jede offene Umgebung von x eine Teilmenge
-von U enthält.
-Satz 3.7
-Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
-Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
-Dann gibt es eine universelle Überlagerung.
-Beweis: Sei x0 ∈ X und X˜ := { (x, [γ]) | x ∈ X, γ Weg von xo nach x } und p : X˜ → X,(x, [γ]) 7→
-x.
-Die Topologie auf X˜ ist folgende: Definiere eine Umgebungsbasis von (x, [γ]) wie folgt: Es
-sei U eine einfach zusammenhängende Umgebung von x und
-U˜ = U˜(x, [γ]) := { (y, [γ ∗ α]) | y ∈ U, α Weg in U von x nach y }
-p ist Überlagerung: p|U˜ : U˜ → U bijektiv. p ist stetig und damit p|U˜ ein Homöomorphismus.
-Sind γ1, γ2 Wege von x0 nach x und γ1 ∼ γ2, so ist U˜(x, [γ1]) ∩ U˜(x, [γ2]) = ∅, denn: Ist
-γ1 ∗ α ∼ γ2 ∗ α, so ist auch γ1 ∼ γ2. Also ist p eine Überlagerung.
-X˜ ist einfach zusammenhängend: Es sei x˜0 := (x0, e) und γ˜ : I → X˜ ein geschlossener Weg
-um x˜0.
-Sei γ := p(˜γ).
-Annahme: [˜γ] 6= e
-Mit Bemerkung 55.a folgt dann: [γ] 6= e.
-Dann ist der Lift von γ nach x˜ mit Anfangspunkt x˜0 ein Weg von x˜0 nach (x0, [γ]). Wider￾spruch.
-60 3.3. ÜBERLAGERUNGEN
-Definition 54
-Es sei p : Y → X eine Überlagerung und f : Y → Y ein Homöomorphismus.
-a) f heißt Decktransformation von p :⇔ p ◦ f = p.
-b) Die Decktransformationen von p : Y → X bilden mit der Verkettung eine Gruppe,
-die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y /X) oder
-Deck(Y → X).
-c) p heißt regulär, wenn | Deck(Y /X)| = deg p gilt.
-Bemerkung 57 (Eigenschaften der Decktransformation)
-a) (Deck Y /X, ◦) ist eine Gruppe
-b) Ist f ∈ Deck(Y /X) und f 6= id, dann hat f keinen Fixpunkt.
-c) | Deck(Y /X)| ≤ deg p
-d) Ist f eine reguläre Überlagerung, dann gilt: ∀x ∈ X : Deck(Y /X) operiert transitiv
-auf der Menge der Urbilder f
-−1
-(x).
-Beweis:
-a) Es gilt:
-• idY ∈ Deck Y /X,
-• f, g ∈ Deck Y /X ⇒ p ◦ (f ◦ g) = (p ◦ f) ◦ g = p ◦ g ⇒ f ◦ g ∈ Deck Y /X
-• f ∈ Deck Y /X ⇒ p ◦ f = p ⇒ p ◦ f
-−1 = (p ◦ f) ◦ f
-−1 = p ◦ (f ◦ f
-−1
-) = p ⇒
-f
-−1 ∈ Deck Y /X
-b) Die Menge
-Fix(f) = { y ∈ Y | f(y) = y }
-ist abgeschlossen als Urbild der Diagonale ∆ ⊆ Y × Y unter der stetigen Abbildung
-y 7→ (f(y), y). Außerdem ist Fix(f) offen, denn ist y ∈ Fix(f), so sei U eine Umgebung
-von p(y) ∈ X wie in Definition 48 und U ⊆ p
-−1
-(U) die Komponente, die y enthält;
-also p : V → U ein Homöomorphismus. Dann ist W := f
-−1
-(V ) ∩ V offene Umgebung
-von y.
-Für z ∈ W ist f(z) ∈ V und p(f(z)) = p(z). Da p injektiv auf V ist, folgt f(z) = z,
-d. h. Fix(f) 6= ∅.
-Da Y zusammenhängend ist, folgt aus Fix( ˜f) 6= ∅ schon Fix(f) = Y , also f = idY .
-c) Es sei x0 ∈ X, deg(p) = d und p
-−1
-(x0) = { y0, . . . , yd−1 }. Für f ∈ Deck(Y /X) ist
-f(y0) = { y0, . . . , yd−1 }.
-Zu i ∈ { 0, . . . , d − 1 } gibt es höchstens ein f ∈ Deck(Y /X) mit f(y0) = y1, denn ist
-f(y0) = g(y0), so ist (g
-−1 ◦ f)(y0) = y0, also nach Bemerkung 57.c g
-−1 ◦ f = idY .
-d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
-Beispiel 39 (Decktransformationen)
-1) p : R → S
-1
-: Deck(R/S1
-) = { t 7→ t + n | n ∈ Z } ∼= Z
-2) p : R
-2 → T
-2
-: Deck(R
-2/T2
-) ∼= Z × Z = Z
-2
-3) p : S
-n → Pn
-(R) : Deck(S
-n/P
-n
+ 3.3. ÜBERLAGERUNGEN
+Folgerung 3.6
+Sind p : X˜ → X und q : Y˜ → X universelle Überlagerungen, so sind X˜ und Y˜ homöomorph.
+Beweis: Seien x0 ∈ X, x˜0 ∈ X˜ mit p( ˜x0) = x0 und y˜0 ∈ q
+−1
+(x0) ⊆ Y˜ .
+Nach Satz 3.5 gibt es genau eine Überlagerung
+f : X˜ → Y˜ mit f(x0) = ˜y0 und q ◦ f = p
+und genau eine Überlagerung
+g : Y˜ → X˜ mit g( ˜y0) = ˜x0 und p ◦ g = q
+Damit gilt: p ◦ q ◦ f = q ◦ f = p, q ◦ f ◦ g = p ◦ g = q. Also ist g ◦ f : X˜ → X˜ Lift von
+p : X˜ → X mit (g ◦ f)( ˜x0) = ˜x0.
+Da auch idx˜ diese Eigenschaft hat, folgt mit Bemerkung 53: g ◦ f = idX˜ .
+Analog gilt f ◦ g = idY˜ . 
+Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
+Definition 53
+Sei (X, T) ein topologischer Raum und x ∈ X.
+U ⊆ T heißt eine Umgebungsbasis von x, wenn jede offene Umgebung von x eine Teilmenge
+von U enthält.
+Satz 3.7
+Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
+Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
+Dann gibt es eine universelle Überlagerung.
+Beweis: Sei x0 ∈ X und X˜ := { (x, [γ]) | x ∈ X, γ Weg von xo nach x } und p : X˜ → X,(x, [γ]) 7→
+x.
+Die Topologie auf X˜ ist folgende: Definiere eine Umgebungsbasis von (x, [γ]) wie folgt: Es
+sei U eine einfach zusammenhängende Umgebung von x und
+U˜ = U˜(x, [γ]) := { (y, [γ ∗ α]) | y ∈ U, α Weg in U von x nach y }
+p ist Überlagerung: p|U˜ : U˜ → U bijektiv. p ist stetig und damit p|U˜ ein Homöomorphismus.
+Sind γ1, γ2 Wege von x0 nach x und γ1 ∼ γ2, so ist U˜(x, [γ1]) ∩ U˜(x, [γ2]) = ∅, denn: Ist
+γ1 ∗ α ∼ γ2 ∗ α, so ist auch γ1 ∼ γ2. Also ist p eine Überlagerung.
+X˜ ist einfach zusammenhängend: Es sei x˜0 := (x0, e) und γ˜ : I → X˜ ein geschlossener Weg
+um x˜0.
+Sei γ := p(˜γ).
+Annahme: [˜γ] 6= e
+Mit Bemerkung 55.a folgt dann: [γ] 6= e.
+Dann ist der Lift von γ nach x˜ mit Anfangspunkt x˜0 ein Weg von x˜0 nach (x0, [γ]). Widerspruch.
+ 3.3. ÜBERLAGERUNGEN
+Definition 54
+Es sei p : Y → X eine Überlagerung und f : Y → Y ein Homöomorphismus.
+a) f heißt Decktransformation von p :⇔ p ◦ f = p.
+b) Die Decktransformationen von p : Y → X bilden mit der Verkettung eine Gruppe,
+die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y /X) oder
+Deck(Y → X).
+c) p heißt regulär, wenn | Deck(Y /X)| = deg p gilt.
+Bemerkung 57 (Eigenschaften der Decktransformation)
+a) (Deck Y /X, ◦) ist eine Gruppe
+b) Ist f ∈ Deck(Y /X) und f 6= id, dann hat f keinen Fixpunkt.
+c) | Deck(Y /X)| ≤ deg p
+d) Ist f eine reguläre Überlagerung, dann gilt: ∀x ∈ X : Deck(Y /X) operiert transitiv
+auf der Menge der Urbilder f
+−1
+(x).
+Beweis:
+a) Es gilt:
+• idY ∈ Deck Y /X,
+• f, g ∈ Deck Y /X ⇒ p ◦ (f ◦ g) = (p ◦ f) ◦ g = p ◦ g ⇒ f ◦ g ∈ Deck Y /X
+• f ∈ Deck Y /X ⇒ p ◦ f = p ⇒ p ◦ f
+−1 = (p ◦ f) ◦ f−1 = p ◦ (f ◦ f−1
+) = p ⇒
+f
+−1 ∈ Deck Y /X
+b) Die Menge
+Fix(f) = { y ∈ Y | f(y) = y }
+ist abgeschlossen als Urbild der Diagonale ∆ ⊆ Y × Y unter der stetigen Abbildung
+y 7→ (f(y), y). Außerdem ist Fix(f) offen, denn ist y ∈ Fix(f), so sei U eine Umgebung
+von p(y) ∈ X wie in Definition 48 und U ⊆ p
+−1
+(U) die Komponente, die y enthält;
+also p : V → U ein Homöomorphismus. Dann ist W := f
+−1
+(V ) ∩ V offene Umgebung
+von y.
+Für z ∈ W ist f(z) ∈ V und p(f(z)) = p(z). Da p injektiv auf V ist, folgt f(z) = z,
+d. h. Fix(f) 6= ∅.
+Da Y zusammenhängend ist, folgt aus Fix( ˜f) 6= ∅ schon Fix(f) = Y , also f = idY .
+c) Es sei x0 ∈ X, deg(p) = d und p
+−1
+(x0) = { y0, . . . , yd−1 }. Für f ∈ Deck(Y /X) ist
+f(y0) = { y0, . . . , yd−1 }.
+Zu i ∈ { 0, . . . , d − 1 } gibt es höchstens ein f ∈ Deck(Y /X) mit f(y0) = y1, denn ist
+f(y0) = g(y0), so ist (g
+−1 ◦ f)(y0) = y0, also nach Bemerkung 57.c g−1 ◦ f = idY .
+d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
+Beispiel 39 (Decktransformationen)
+1) p : R → S
+1
+: Deck(R/S1) = { t 7→ t + n | n ∈ Z } ∼= Z
+2) p : R
+2 → T2
+: Deck(R
+2/T2
+) ∼= Z × Z = Z
+2
+3) p : S
+n → Pn
+(R) : Deck(S
+n/Pn
 (R)) = { x 7→ ±x } ∼= Z/2Z
-61 3.3. ÜBERLAGERUNGEN
-Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamen￾talgruppe herstellen:
-Satz 3.8
-Ist p : X˜ → X eine universelle Überlagerung, so gilt:
-Deck(X/X ˜ ) ∼= π1(X, x0) ∀x0 ∈ X
-Beweis: Wähle x˜0 ∈ p
-−1
-(x0). Es sei ρ : Deck(x/x ˜ ) → π1(X, x0) die Abbildung, die f auf [p(γf )]
-abbildet, wobei γf ein Weg von x˜0 nach f(x˜0) sei. Da x˜ einfach zusammenhängend ist, ist
-γf bis auf Homotopie eindeutig bestimmt und damit auch ρ wohldefiniert.
-• ρ ist Gruppenhomomorphismus: Seien f, g ∈ Deck(X/X ˜ ) ⇒ γg◦f = γg ∗ g(γf ) ⇒
-p(γg◦f ) = p(γg) ∗ (p ◦ g)
-| {z }
-=p
-(γf ) = ρ(g) 6= ρ(f)
-• ρ ist injektiv: ρ(f) = e ⇒ p(γf ) ∼ γx0
-Satz 3.2 ====⇒ γf ∼ γx˜0 ⇒ f(x0) = x˜0
-Bem. 57.c ======⇒ f =
-idx˜.
-• ρ ist surjektiv: Sei [γ] ∈ π1(X, x0), γ˜ Lift von γ nach x˜ mit Anfangspunkt x˜0. Der
-Endpunkt von γ˜ sei x˜1.
-p ist reguläre Überlagerung: Seien x˜0, x˜1 ∈ X˜ mit p(x˜0) = p(x˜1). Nach Satz 3.5 gibt
-es genau eine Überlagerung p˜ : X˜ → X mit p = p ◦ p˜ und p˜(x˜0) = x˜1. Somit ist p˜ eine
-Decktransformation und damit p eine reguläre Überlagerung.
-Da p reguläre Überlagerung ist, gibt es ein f ∈ Deck(X/X ˜ ) mit f( ˜x0) = ˜x1.
-Aus der Definition von ρ folgt: ρ(f) = p(γf ) = γ
-
-Beispiel 40 (Bestimmung von π1(S
-1
-))
-p : R → S
-1
-, t 7→ (cos 2πt,sin 2πt) ist universelle Überlagerung, da R zusammenhängend ist.
-Für n ∈ Z sei fn : R → R, t 7→ t + n die Translation um n.
-Es gilt: (p ◦ fn)(t) = p(fn(t)) = p(t) ∀t ∈ R, d. h. fn ist Decktransformation.
-Ist umgekehrt g irgendeine Decktransformation, so gilt insbesondere für t = 0:
-(cos(2πg(0)),sin(2πg(0))) = (p ◦ g)(0) = p(0) = (1, 0)
-Es existiert n ∈ Z mit g(0) = n. Da auch fn(0) = 0 + n = n gilt, folgt mit Bemerkung 57.c
-g = fn. Damit folgt:
-Deck(R/S1
-) = { fn | n ∈ Z } ∼= Z
-Nach Satz 3.8 also π1(S
-1
-) ∼= Deck(R/S1
-) ∼= Z
-62 3.4. GRUPPENOPERATIONEN
-3.4 Gruppenoperationen
-Definition 55
-Sei (G, ·) eine Gruppe und X eine Menge.
-Eine Gruppenoperation von G auf X ist eine Abbildung ◦ : G × X → X für die gilt:
-a) 1G ◦ x = x ∀x ∈ X
-b) (g · h) ◦ x = g ◦ (h ◦ x) ∀g, h ∈ G∀x ∈ X
-Beispiel 41
-1) G = (Z, +), X = R, n ◦ x = x + n
-2) G operiert auf X = G durch g ◦ h := g · h
-3) G operiert auf X = G durch g ◦ h := g · h · g
-−1
-, denn
-i) 1G ◦ h = 1G · h · 1
-−1
-G = h
-ii) (g1 · g2) ◦ h = (g1 · g2) · h · (g · g2)
-−1
-= g1 · (g2 · h · g
-−1
-2
-) · g
-−1
-1
-= g1 ◦ (g2 ◦ h)
-Definition 56
-Sei G eine Gruppe, X ein topologischer Raum und ◦ : G × X → X eine Gruppenoperation.
-a) G operiert durch Homöomorphismen, wenn für jedes g ∈ G die Abbildung
-mg : X → X, x 7→ g ◦ x
-ein Homöomorphismus ist.
-b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation ◦ stetig, wenn
-∀g ∈ G : mg ist stetig
-gilt.
-Bemerkung 58
-Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
-Beweis: Nach Voraussetzung ist mg := ◦|{ g }×X : X → X, x 7→ g ◦ x stetig.
-Die Umkehrabbildung zu mg ist mg−1 :
-(mg−1 ◦ mg)(x) = mg−1 (mg(x))
-= mg−1 (g ◦ x)
-= g
-−1
-◦ (g ◦ x)
-Def. 55.b = (g
-−1
-· g) ◦ x
-= 1G ◦ x
-Def. 55.a = x
-Beispiel 42
+ 3.3. ÜBERLAGERUNGEN
+Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamentalgruppe herstellen:
+Satz 3.8
+Ist p : X˜ → X eine universelle Überlagerung, so gilt:
+Deck(X/X ˜ ) ∼= π1(X, x0) ∀x0 ∈ X
+Beweis: Wähle x˜0 ∈ p
+−1
+(x0). Es sei ρ : Deck(x/x ˜ ) → π1(X, x0) die Abbildung, die f auf [p(γf )]
+abbildet, wobei γf ein Weg von x˜0 nach f(x˜0) sei. Da x˜ einfach zusammenhängend ist, ist
+γf bis auf Homotopie eindeutig bestimmt und damit auch ρ wohldefiniert.
+• ρ ist Gruppenhomomorphismus: Seien f, g ∈ Deck(X/X ˜ ) ⇒ γg◦f = γg ∗ g(γf ) ⇒
+p(γg◦f ) = p(γg) ∗ (p ◦ g)
+| {z }
+=p
+(γf ) = ρ(g) 6= ρ(f)
+• ρ ist injektiv: ρ(f) = e ⇒ p(γf ) ∼ γx0
+Satz 3.2 ====⇒ γf ∼ γx˜0 ⇒ f(x0) = x˜0Bem. 57.c ======⇒ f =
+idx˜.
+• ρ ist surjektiv: Sei [γ] ∈ π1(X, x0), γ˜ Lift von γ nach x˜ mit Anfangspunkt x˜0. Der
+Endpunkt von γ˜ sei x˜1.
+p ist reguläre Überlagerung: Seien x˜0, x˜1 ∈ X˜ mit p(x˜0) = p(x˜1). Nach Satz 3.5 gibt
+es genau eine Überlagerung p˜ : X˜ → X mit p = p ◦ p˜ und p˜(x˜0) = x˜1. Somit ist p˜ eine
+Decktransformation und damit p eine reguläre Überlagerung.
+Da p reguläre Überlagerung ist, gibt es ein f ∈ Deck(X/X ˜ ) mit f( ˜x0) = ˜x1.
+Aus der Definition von ρ folgt: ρ(f) = p(γf ) = γ
+
+Beispiel 40 (Bestimmung von π1(S
+1
+))
+p : R → S
+1
+, t 7→ (cos 2πt,sin 2πt) ist universelle Überlagerung, da R zusammenhängend ist.
+Für n ∈ Z sei fn : R → R, t 7→ t + n die Translation um n.
+Es gilt: (p ◦ fn)(t) = p(fn(t)) = p(t) ∀t ∈ R, d. h. fn ist Decktransformation.
+Ist umgekehrt g irgendeine Decktransformation, so gilt insbesondere für t = 0:
+(cos(2πg(0)),sin(2πg(0))) = (p ◦ g)(0) = p(0) = (1, 0)
+Es existiert n ∈ Z mit g(0) = n. Da auch fn(0) = 0 + n = n gilt, folgt mit Bemerkung 57.c
+g = fn. Damit folgt:
+Deck(R/S1) = { fn | n ∈ Z } ∼= Z
+Nach Satz 3.8 also π1(S
+1
+) ∼= Deck(R/S1) ∼= Z
+ 3.4. GRUPPENOPERATIONEN
+3.4 Gruppenoperationen
+Definition 55
+Sei (G, ·) eine Gruppe und X eine Menge.
+Eine Gruppenoperation von G auf X ist eine Abbildung ◦ : G × X → X für die gilt:
+a) 1G ◦ x = x ∀x ∈ X
+b) (g · h) ◦ x = g ◦ (h ◦ x) ∀g, h ∈ G∀x ∈ X
+Beispiel 41
+1) G = (Z, +), X = R, n ◦ x = x + n
+2) G operiert auf X = G durch g ◦ h := g · h
+3) G operiert auf X = G durch g ◦ h := g · h · g
+−1
+, denn
+i) 1G ◦ h = 1G · h · 1
+−1
+G = h
+ii) (g1 · g2) ◦ h = (g1 · g2) · h · (g · g2)
+−1
+= g1 · (g2 · h · g
+−1
+2
+) · g
+−1
+1
+= g1 ◦ (g2 ◦ h)
+Definition 56
+Sei G eine Gruppe, X ein topologischer Raum und ◦ : G × X → X eine Gruppenoperation.
+a) G operiert durch Homöomorphismen, wenn für jedes g ∈ G die Abbildung
+mg : X → X, x 7→ g ◦ x
+ein Homöomorphismus ist.
+b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation ◦ stetig, wenn
+∀g ∈ G : mg ist stetig
+gilt.
+Bemerkung 58
+Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
+Beweis: Nach Voraussetzung ist mg := ◦|{ g }×X : X → X, x 7→ g ◦ x stetig.
+Die Umkehrabbildung zu mg ist mg−1 :
+(mg−1 ◦ mg)(x) = mg−1 (mg(x))
+= mg−1 (g ◦ x)
+= g
+−1
+◦ (g ◦ x)
+Def. 55.b = (g
+−1
+· g) ◦ x
+= 1G ◦ x
+Def. 55.a = x
+Beispiel 42
 In Beispiel 41.1 operiert Z durch Homöomorphismen.
-63 3.4. GRUPPENOPERATIONEN
-Bemerkung 59
-Sei G eine Gruppe und X eine Menge.
-a) Die Gruppenoperation von G auf X entsprechen bijektiv den Gruppenhomomorphismen
-% : G → Perm(X) = Sym(X) = { f : X → X | f ist bijektiv }
-b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
-Homöomorphismus den Gruppenhomomorphismen G → Homöo(X)
-Beweis:
-Sei ◦ : G × X → X eine Gruppenoperation von G auf X. Dann sei % : G → Perm(X)
-definiert durch %(g)(X) = g · x ∀g ∈ G, x ∈ X, also %(g) = mg.
-% ist Homomorphismus: %(g1 · g2) = mg1·g2 = mg1
-◦ mg2 = %(g1) ◦ %(g2), denn für x ∈ X :
-%(g1 · g2)(x) = (g1 · g2) ◦ x = g1 ◦ (g2 ◦ x) = %(g1)(%(g2)(x)) = (%(g1) ◦ %(g2))(x)
-Umgekehrt: Sei % : G → Perm(X) Gruppenhomomorphismus. Definiere ◦ : G × X → X
-durch g ◦ x = %(g)(x).
-z. z. Definition 55.b:
-g1 ◦ (g2 ◦ x) = %(g1)(g2 ◦ x)
-= %(g1)(%(g2)(x))
-= (%(g1) ◦ %(g2))(x)
-% ist Hom.
-= %(g1 · g2)(x)
-= (g1 · g2) ◦ x
-z. z. Definition 55.a: 1G · x = %(1G)(x) = idX(x) = x, weil % ein Homomorphismus ist.
-Beispiel 43
-Sei X ein wegzusammenhängender topologischer Raum, p : X˜ → X eine universelle Überla￾gerung, x0 ∈ X, x˜0 ∈ X˜ mit p( ˜x0) = x0.
-Dann operiert π1(X, x0) auf X˜ durch Homöomorphismen wie folgt:
-Für [γ] ∈ π1(X, x0) und x˜ ∈ X˜ sei [γ] ◦ x˜ = γ ˜∗ %(1) wobei γ˜ ein Weg von x˜0 nach x˜ in X˜
-sei, % := p(
-˜δ) = p ◦ δ.
-Also: δ ist ein Weg in X von x0 nach x = p(x˜) und γ]∗ δ die Liftung von γ ∗ δ mit
-Anfangspunkt x˜0.
-[γ] · x˜ hängt nicht von der Wahl von γ˜ ab; ist γ˜
-0
-ein anderer Weg von x˜0 nach x˜, so sind ˜δ
-und ˜δ
-0 homotop, also auch γ]∗ δ und γ]∗ δ
-0 homotop.
-Gruppenoperation, denn:
-i) [e] ◦ x˜ = eg∗ δ = ˜x
-ii) γ1^∗ γ2 ∗ δ(1) = [γ1 ∗ γ2] ◦ x˜ = ([γ1] ∗ [γ2]) ◦ x˜
-γ1 ∗ γ2 ∗ δ(1) = [γ1] ◦ (
-˜ γ2 ∗ δ)(1) = [γ1] ◦ ([γ2] ◦ x˜)
-Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π1(X, x0)
-aus Beispiel 43 einen Gruppenhomomorphismus % : π1(X, x0) → Homöo(X). Nach Satz 3.8 ist
-%(π1(X, x0)) = Deck(X/X ˜ )
-=
-n
-f : X˜ → X˜ Homöomorphismus
-
-
- p ◦ f = p
+ 3.4. GRUPPENOPERATIONEN
+Bemerkung 59
+Sei G eine Gruppe und X eine Menge.
+a) Die Gruppenoperation von G auf X entsprechen bijektiv den Gruppenhomomorphismen
+% : G → Perm(X) = Sym(X) = { f : X → X | f ist bijektiv }
+b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
+Homöomorphismus den Gruppenhomomorphismen G → Homöo(X)
+Beweis:
+Sei ◦ : G × X → X eine Gruppenoperation von G auf X. Dann sei % : G → Perm(X)
+definiert durch %(g)(X) = g · x ∀g ∈ G, x ∈ X, also %(g) = mg.
+% ist Homomorphismus: %(g1 · g2) = mg1·g2 = mg1◦ mg2 = %(g1) ◦ %(g2), denn für x ∈ X :
+%(g1 · g2)(x) = (g1 · g2) ◦ x = g1 ◦ (g2 ◦ x) = %(g1)(%(g2)(x)) = (%(g1) ◦ %(g2))(x)
+Umgekehrt: Sei % : G → Perm(X) Gruppenhomomorphismus. Definiere ◦ : G × X → X
+durch g ◦ x = %(g)(x).
+z. z. Definition 55.b:
+g1 ◦ (g2 ◦ x) = %(g1)(g2 ◦ x)
+= %(g1)(%(g2)(x))
+= (%(g1) ◦ %(g2))(x)
+% ist Hom.
+= %(g1 · g2)(x)
+= (g1 · g2) ◦ x
+z. z. Definition 55.a: 1G · x = %(1G)(x) = idX(x) = x, weil % ein Homomorphismus ist.
+Beispiel 43
+Sei X ein wegzusammenhängender topologischer Raum, p : X˜ → X eine universelle Überlagerung, x0 ∈ X, x˜0 ∈ X˜ mit p( ˜x0) = x0.
+Dann operiert π1(X, x0) auf X˜ durch Homöomorphismen wie folgt:
+Für [γ] ∈ π1(X, x0) und x˜ ∈ X˜ sei [γ] ◦ x˜ = γ ˜∗ %(1) wobei γ˜ ein Weg von x˜0 nach x˜ in X˜
+sei, % := p(
+˜δ) = p ◦ δ.
+Also: δ ist ein Weg in X von x0 nach x = p(x˜) und γ]∗ δ die Liftung von γ ∗ δ mit
+Anfangspunkt x˜0.
+[γ] · x˜ hängt nicht von der Wahl von γ˜ ab; ist γ˜
+0
+ein anderer Weg von x˜0 nach x˜, so sind ˜δ
+und ˜δ
+0 homotop, also auch γ]∗ δ und γ]∗ δ
+0 homotop.
+Gruppenoperation, denn:
+i) [e] ◦ x˜ = eg∗ δ = ˜x
+ii) γ1^∗ γ2 ∗ δ(1) = [γ1 ∗ γ2] ◦ x˜ = ([γ1] ∗ [γ2]) ◦ x˜
+γ1 ∗ γ2 ∗ δ(1) = [γ1] ◦ (
+˜ γ2 ∗ δ)(1) = [γ1] ◦ ([γ2] ◦ x˜)
+Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π1(X, x0)
+aus Beispiel 43 einen Gruppenhomomorphismus % : π1(X, x0) → Homöo(X). Nach Satz 3.8 ist
+%(π1(X, x0)) = Deck(X/X ˜ )
+=
+n
+f : X˜ → X˜ Homöomorphismus
+
+
+ p ◦ f = p
 o
-64 3.4. GRUPPENOPERATIONEN
-Beispiel 44
-Sei X := S
-2 ⊆ R
-3 und τ die Drehung um die z-Achse um 180◦
-.
-g = hτ i = { id, τ } operiert auf S
-2 durch Homöomorphismen.
-Frage: Was ist S
-2/G? Ist S
-2/G eine Mannigfaltigkeit?
-4 Euklidische und nichteuklidische
-Geometrie
-Definition 57
-Das Tripel (X, d, G) heißt genau dann eine Geometrie, wenn (X, d) ein metrischer Raum
-und ∅ 6= G ⊆ P(X) gilt. Dann heißt G die Menge aller Geraden.
-4.1 Axiome für die euklidische Ebene
-Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
-nennt man Axiomensystem. Da der Begriff des Axiomensystems so grundlegend ist, hat man
-auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
-sein, die Axiome sollen möglichst unabhängig sein und Vollständigkeit wäre auch toll. Mit
-Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
-auf den ersten Blick eine einfache Eigenschaft zu sein. Auf den zweiten Blick muss man jedoch
-einsehen, dass das Parallelenproblem, also die Frage ob das Parallelenaxiom unabhängig von
-den restlichen Axiomen ist, über 2000 Jahre nicht gelöst wurde. Ein ganz anderes Kaliber ist
-die Frage nach der Vollständigkeit. Ein Axiomensystem gilt als Vollständig, wenn jede Aussage
-innerhalb des Systems verifizierbar oder falsifizierbar ist. Interessant ist hierbei der Gödelsche
-Unvollständigkeitssatz, der z. B. für die Arithmetik beweist, dass nicht alle Aussagen formal
-bewiesen oder widerlegt werden können.
-Kehren wir nun jedoch zurück zur Geometrie. Euklid hat in seiner Abhandlung „Die Elemente“
-ein Axiomensystem für die Geometrie aufgestellt.
-Euklids Axiome
-• Strecke zwischen je zwei Punkten
-• Jede Strecke bestimmt genau eine Gerade
-• Kreis (um jeden Punkt mit jedem Radius)
-• Je zwei rechte Winkel sind gleich (Isometrie, Bewegung)
-• Parallelenaxiom von Euklid:
-Wird eine Gerade so von zwei Geraden geschnitten, dass die Summe der Innenwinkel
-kleiner als zwei Rechte ist, dann schneiden sich diese Geraden auf der Seite dieser Winkel.
-Man mache sich klar, dass das nur dann nicht der Fall ist, wenn beide Geraden par￾allel sind und senkrecht auf die erste stehen.
-Definition 58
-Eine euklidische Ebene ist eine Geometrie (X, d, G), die Axiome §1 - §5 erfüllt:
+ 3.4. GRUPPENOPERATIONEN
+Beispiel 44
+Sei X := S
+2 ⊆ R3 und τ die Drehung um die z-Achse um 180◦
+.
+g = hτ i = { id, τ } operiert auf S
+2 durch Homöomorphismen.
+Frage: Was ist S
+2/G? Ist S2/G eine Mannigfaltigkeit?
+4 Euklidische und nichteuklidische
+Geometrie
+Definition 57
+Das Tripel (X, d, G) heißt genau dann eine Geometrie, wenn (X, d) ein metrischer Raum
+und ∅ 6= G ⊆ P(X) gilt. Dann heißt G die Menge aller Geraden.
+4.1 Axiome für die euklidische Ebene
+Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
+nennt man Axiomensystem. Da der Begriff des Axiomensystems so grundlegend ist, hat man
+auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
+sein, die Axiome sollen möglichst unabhängig sein und Vollständigkeit wäre auch toll. Mit
+Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
+auf den ersten Blick eine einfache Eigenschaft zu sein. Auf den zweiten Blick muss man jedoch
+einsehen, dass das Parallelenproblem, also die Frage ob das Parallelenaxiom unabhängig von
+den restlichen Axiomen ist, über 2000 Jahre nicht gelöst wurde. Ein ganz anderes Kaliber ist
+die Frage nach der Vollständigkeit. Ein Axiomensystem gilt als Vollständig, wenn jede Aussage
+innerhalb des Systems verifizierbar oder falsifizierbar ist. Interessant ist hierbei der Gödelsche
+Unvollständigkeitssatz, der z. B. für die Arithmetik beweist, dass nicht alle Aussagen formal
+bewiesen oder widerlegt werden können.
+Kehren wir nun jedoch zurück zur Geometrie. Euklid hat in seiner Abhandlung „Die Elemente“
+ein Axiomensystem für die Geometrie aufgestellt.
+Euklids Axiome
+• Strecke zwischen je zwei Punkten
+• Jede Strecke bestimmt genau eine Gerade
+• Kreis (um jeden Punkt mit jedem Radius)
+• Je zwei rechte Winkel sind gleich (Isometrie, Bewegung)
+• Parallelenaxiom von Euklid:
+Wird eine Gerade so von zwei Geraden geschnitten, dass die Summe der Innenwinkel
+kleiner als zwei Rechte ist, dann schneiden sich diese Geraden auf der Seite dieser Winkel.
+Man mache sich klar, dass das nur dann nicht der Fall ist, wenn beide Geraden parallel sind und senkrecht auf die erste stehen.
+Definition 58
+Eine euklidische Ebene ist eine Geometrie (X, d, G), die Axiome §1 - §5 erfüllt:
 §1) Inzidenzaxiome:
-66 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-(i) Zu P 6= Q ∈ X gibt es genau ein g ∈ G mit { P, Q } ⊆ g.
-(ii) |g| ≥ 2 ∀g ∈ G
-(iii) X /∈ G
-§2) Abstandsaxiom: Zu P, Q, R ∈ X gibt es genau dann ein g ∈ G mit { P, Q, R } ⊆ g,
-wenn gilt:
-• d(P, R) = d(P, Q) + d(Q, R) oder
-• d(P, Q) = d(P, R) + d(R, Q) oder
-• d(Q, R) = d(Q, P) + d(P, R)
-Definition 59
-Sei (X, d, G) eine Geometrie und seien P, Q, R ∈ X.
-a) P, Q, R liegen kollinear, wenn es g ∈ G gibt mit { P, Q, R } ⊆ g.
-b) Q liegt zwischen P und R, wenn d(P, R) = d(P, Q) + d(Q, R)
-c) Strecke P R := { Q ∈ X | Q liegt zwischen P und R }
-d) Halbgeraden:
-P R+ := {Q ∈ X|Q liegt zwischen P und R oder
-R liegt zwischen P und Q}
-P R− := { Q ∈ X | P liegt zwischen Q und R }
-P R
-P R− P R
-P R+
-Abbildung 4.1: Halbgeraden
-Bemerkung 60
-a) P R+ ∪ P R− = P R
-b) P R+ ∩ P R− = { P }
-Beweis:
-a) „⊆“ folgt direkt aus der Definition von P R+ und P R−
-„⊇“: Sei Q ∈ P R ⇒ P, Q, R sind kollinear.
-2
-⇒
-
-
-
-Q liegt zwischen P und R ⇒ Q ∈ P R
-R liegt zwischen P und Q ⇒ Q ∈ P R
-P liegt zwischen Q und R ⇒ Q ∈ P R
-b) „⊇“ ist offensichtlich
-„⊆“: Sei P R+ ∩ P R−. Dann ist d(Q, R) = d(P, Q) + d(P, R) weil Q ∈ P R− und
-
-d(P, R) = d(P, Q) + d(Q, R) oder
-d(P, Q) = d(P, R) + d(R, Q)
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+(i) Zu P 6= Q ∈ X gibt es genau ein g ∈ G mit { P, Q } ⊆ g.
+(ii) |g| ≥ 2 ∀g ∈ G
+(iii) X /∈ G
+§2) Abstandsaxiom: Zu P, Q, R ∈ X gibt es genau dann ein g ∈ G mit { P, Q, R } ⊆ g,
+wenn gilt:
+• d(P, R) = d(P, Q) + d(Q, R) oder
+• d(P, Q) = d(P, R) + d(R, Q) oder
+• d(Q, R) = d(Q, P) + d(P, R)
+Definition 59
+Sei (X, d, G) eine Geometrie und seien P, Q, R ∈ X.
+a) P, Q, R liegen kollinear, wenn es g ∈ G gibt mit { P, Q, R } ⊆ g.
+b) Q liegt zwischen P und R, wenn d(P, R) = d(P, Q) + d(Q, R)
+c) Strecke P R := { Q ∈ X | Q liegt zwischen P und R }
+d) Halbgeraden:
+P R+ := {Q ∈ X|Q liegt zwischen P und R oder
+R liegt zwischen P und Q}
+P R− := { Q ∈ X | P liegt zwischen Q und R }
+P R
+P R− P R
+P R+
+Abbildung 4.1: Halbgeraden
+Bemerkung 60
+a) P R+ ∪ P R− = P R
+b) P R+ ∩ P R− = { P }
+Beweis:
+a) „⊆“ folgt direkt aus der Definition von P R+ und P R−
+„⊇“: Sei Q ∈ P R ⇒ P, Q, R sind kollinear.
+2
+⇒
+
+
+
+Q liegt zwischen P und R ⇒ Q ∈ P R
+R liegt zwischen P und Q ⇒ Q ∈ P R
+P liegt zwischen Q und R ⇒ Q ∈ P R
+b) „⊇“ ist offensichtlich
+„⊆“: Sei P R+ ∩ P R−. Dann ist d(Q, R) = d(P, Q) + d(P, R) weil Q ∈ P R− und
+
+d(P, R) = d(P, Q) + d(Q, R) oder
+d(P, Q) = d(P, R) + d(R, Q)
 
-67 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-⇒ d(Q, R) = 2d(P, Q) + d(Q, R)
-⇒ d(P, Q) = 0
-⇒ P = Q
-d(P, Q) = 2d(P, R) + d(P, Q)
-⇒ P = R
-⇒ Widerspruch
-Definition 60
-§3) Anordnungsaxiome
-(i) Zu jeder Halbgerade H mit Anfangspunkt P ∈ X und jedem r ∈ R≥0 gibt es
-genau ein Q ∈ H mit d(P, Q) = r.
-(ii) Jede Gerade zerlegt X \g = H1∪˙ H2 in zwei nichtleere Teilmengen H1, H2, sodass
-für alle A ∈ Hi
-, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j.
-Diese Teilmengen Hi heißen Halbebenen bzgl. g.
-§4) Bewegungsaxiom: Zu P, Q, P0
-, Q0 ∈ X mit d(P, Q) = d(P
-0
-, Q0
-) gibt es mindestens
-2 Isometrien ϕ1, ϕ2 mit ϕi(P) = P
-0 und ϕi(Q) = Q0 mit i = 1, 2.
-1
-§5) Parallelenaxiom: Zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g gibt es
-höchstens ein h ∈ G mit P ∈ h und h ∩ g = ∅. h heißt Parallele zu g durch P.
-Satz 4.1 (Satz von Pasch)
-Seien P, Q, R nicht kollinear, g ∈ G mit g ∩ { P, Q, R } = ∅ und g ∩ P Q 6= ∅.
-Dann ist entweder g ∩ P R 6= ∅ oder g ∩ QR 6= ∅.
-Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
-schneiden, auch eine weitere Seite schneiden.
-Beweis: g ∩ P Q 6= ∅
-3(ii) ⇒ P und Q liegen in verschiedenen Halbebenen bzgl. g
-⇒ o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
-⇒ g ∩ RP 6= ∅
-Bemerkung 61
-Sei P, Q ∈ X mit P 6= Q sowie A, B ∈ X \ P Q mit A =6 B. Außerdem seien A und B in der
-selben Halbebene bzgl. P Q sowie Q und B in der selben Halbebene bzgl. P A.
-Dann gilt: P B+ ∩ AQ 6= ∅
-Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
-eines konvexen Vierecks schneiden sich.
-Beweis: Sei P
-0 ∈ P Q−, P0 6= P
-Satz 4.1 ====⇒ P B schneidet AP0 ∪ AQ
-Sei C der Schnittpunkt. Dann gilt:
-1Die „Verschiebung“ von P
-0Q
-0 nach P Q und die Isometrie, die zusätzlich an der Gerade durch P und Q spiegelt.
-68 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-P
-P
-0
-Q
-A B
-C
-Abbildung 4.2: Situation aus Bemerkung 61
-(i) C ∈ P B+, denn A und B liegen in derselben Halbebene bzgl. P Q = P
-0Q, also auch
-AP0 und AQ.
-(ii) C liegt in derselben Halbebene bzgl. P A wie B, weil das für Q gilt.
-AP0
-liegt in der anderen Halbebene bzgl. P A ⇒ C /∈ P0A ⇒ C ∈ AQ
-Da C ∈ P B+ und C ∈ AQ folgt nun direkt: ∅ 6= { C } ⊆ P B+ ∩ AQ 
-Bemerkung 62
-Seien P, Q ∈ X mit P 6= Q und A, B ∈ X \P Q in der selben Halbebene bzgl. P Q. Außerdem
-sei d(A, P) = d(B, P) und d(A, Q) = d(B, Q).
-Dann ist A = B.
-P
-Q
-A
-B
-Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
-Intuitiv weiß man, dass daraus folgt, dass A = B gilt.
-Beweis: durch Widerspruch
-Annahme: A 6= B
-Dann ist B /∈ (P A ∪ QA) wegen §2.
-1. Fall: Q und B liegen in derselben Halbebene bzgl. P A
-Bem. 61 =====⇒ P B+ ∩ AQ 6= ∅.
-Sei C der Schnittpunkt vom P B und AQ.
-Dann gilt:
-(i) d(A, C) + d(C, Q) = d(A, Q)
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+⇒ d(Q, R) = 2d(P, Q) + d(Q, R)
+⇒ d(P, Q) = 0
+⇒ P = Q
+d(P, Q) = 2d(P, R) + d(P, Q)
+⇒ P = R
+⇒ Widerspruch
+Definition 60
+§3) Anordnungsaxiome
+(i) Zu jeder Halbgerade H mit Anfangspunkt P ∈ X und jedem r ∈ R≥0 gibt es
+genau ein Q ∈ H mit d(P, Q) = r.
+(ii) Jede Gerade zerlegt X \g = H1∪˙ H2 in zwei nichtleere Teilmengen H1, H2, sodass
+für alle A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j.
+Diese Teilmengen Hi heißen Halbebenen bzgl. g.
+§4) Bewegungsaxiom: Zu P, Q, P0, Q0 ∈ X mit d(P, Q) = d(P
+0
+, Q0) gibt es mindestens
+2 Isometrien ϕ1, ϕ2 mit ϕi(P) = P
+0 und ϕi(Q) = Q0 mit i = 1, 2.1
+§5) Parallelenaxiom: Zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g gibt es
+höchstens ein h ∈ G mit P ∈ h und h ∩ g = ∅. h heißt Parallele zu g durch P.
+Satz 4.1 (Satz von Pasch)
+Seien P, Q, R nicht kollinear, g ∈ G mit g ∩ { P, Q, R } = ∅ und g ∩ P Q 6= ∅.
+Dann ist entweder g ∩ P R 6= ∅ oder g ∩ QR 6= ∅.
+Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
+schneiden, auch eine weitere Seite schneiden.
+Beweis: g ∩ P Q 6= ∅
+3(ii) ⇒ P und Q liegen in verschiedenen Halbebenen bzgl. g
+⇒ o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
+⇒ g ∩ RP 6= ∅
+Bemerkung 61
+Sei P, Q ∈ X mit P 6= Q sowie A, B ∈ X \ P Q mit A 6= B. Außerdem seien A und B in der
+selben Halbebene bzgl. P Q sowie Q und B in der selben Halbebene bzgl. P A.
+Dann gilt: P B+ ∩ AQ 6= ∅
+Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
+eines konvexen Vierecks schneiden sich.
+Beweis: Sei P
+0 ∈ P Q−, P0 6= P
+Satz 4.1 ====⇒ P B schneidet AP0 ∪ AQ
+Sei C der Schnittpunkt. Dann gilt:
+1Die „Verschiebung“ von P0Q0 nach P Q und die Isometrie, die zusätzlich an der Gerade durch P und Q spiegelt.
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+P
+P
+0
+Q
+A B
+C
+Abbildung 4.2: Situation aus Bemerkung 61
+(i) C ∈ P B+, denn A und B liegen in derselben Halbebene bzgl. P Q = P
+0Q, also auch
+AP0 und AQ.
+(ii) C liegt in derselben Halbebene bzgl. P A wie B, weil das für Q gilt.
+AP0liegt in der anderen Halbebene bzgl. P A ⇒ C /∈ P0A ⇒ C ∈ AQ
+Da C ∈ P B+ und C ∈ AQ folgt nun direkt: ∅ 6= { C } ⊆ P B+ ∩ AQ 
+Bemerkung 62
+Seien P, Q ∈ X mit P 6= Q und A, B ∈ X \P Q in der selben Halbebene bzgl. P Q. Außerdem
+sei d(A, P) = d(B, P) und d(A, Q) = d(B, Q).
+Dann ist A = B.
+P
+Q
+A
+B
+Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
+Intuitiv weiß man, dass daraus folgt, dass A = B gilt.
+Beweis: durch Widerspruch
+Annahme: A 6= B
+Dann ist B /∈ (P A ∪ QA) wegen §2.
+1. Fall: Q und B liegen in derselben Halbebene bzgl. P A
+Bem. 61 =====⇒ P B+ ∩ AQ 6= ∅.
+Sei C der Schnittpunkt vom P B und AQ.
+Dann gilt:
+(i) d(A, C) + d(C, Q) = d(A, Q)
 Vor. = d(B, Q) < d(B, C) + d(C, Q) ⇒ d(A, C) < d(B, C)
-69 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-P Q
-B
-C
-A
-(a) 1. Fall
-P
-Q
-B A
-(b) 2. Fall
-Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
-(ii) a) B liegt zwischen P und C.
-d(P, A) + d(A, C) > d(P, C) = d(P, B) + d(B, C) = d(P, A) + d(B, C) ⇒
-d(A, C) > d(B, C) ⇒ Widerspruch zu Punkt (i)
-b) C liegt zwischen P und B
-d(P, C) + d(C, A) > d(P, A) = d(P, B) = d(P, C) + d(C, B)
-⇒ d(C, A) > d(C, B)
-⇒ Widerspruch zu Punkt (i)
-2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. P A.
-Dann liegen A und Q in derselben Halbebene bzgl. P B.
-Tausche A und B ⇒ Fall 1 
-Bemerkung 63
-Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P 6= Q und ϕ eine Isometrie
-mit ϕ(P) = P und ϕ(Q) = Q.
-Dann gilt ϕ(S) = S ∀S ∈ P Q.
-Beweis:
-O. B. d. A. sei S ∈ P Q 2
-⇔ d(P, Q) = d(P, S) + d(S, Q)
-ϕ∈Iso(X) ⇒ d(ϕ(P), ϕ(Q)) = d(ϕ(P), ϕ(S)) + d(ϕ(S), ϕ(Q))
-P,Q∈Fix(ϕ) ⇒ d(P, Q) = d(P, ϕ(S)) + d(ϕ(S), Q)
-⇒ ϕ(S) liegt zwischen P und Q
-⇒ d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S))
-3(i) ⇒ ϕ(S) = S
-
-Proposition 4.2
-In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P0
-, Q, Q0 mit d(P, Q) = d(P
-0
-, Q0
-)
-höchstens zwei Isometrien mit ϕ(P) = P
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+P Q
+B
+C
+A
+(a) 1. Fall
+P
+Q
+B A
+(b) 2. Fall
+Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
+(ii) a) B liegt zwischen P und C.
+d(P, A) + d(A, C) > d(P, C) = d(P, B) + d(B, C) = d(P, A) + d(B, C) ⇒
+d(A, C) > d(B, C) ⇒ Widerspruch zu Punkt (i)
+b) C liegt zwischen P und B
+d(P, C) + d(C, A) > d(P, A) = d(P, B) = d(P, C) + d(C, B)
+⇒ d(C, A) > d(C, B)
+⇒ Widerspruch zu Punkt (i)
+2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. P A.
+Dann liegen A und Q in derselben Halbebene bzgl. P B.
+Tausche A und B ⇒ Fall 1 
+Bemerkung 63
+Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P 6= Q und ϕ eine Isometrie
+mit ϕ(P) = P und ϕ(Q) = Q.
+Dann gilt ϕ(S) = S ∀S ∈ P Q.
+Beweis:
+O. B. d. A. sei S ∈ P Q 2⇔ d(P, Q) = d(P, S) + d(S, Q)
+ϕ∈Iso(X) ⇒ d(ϕ(P), ϕ(Q)) = d(ϕ(P), ϕ(S)) + d(ϕ(S), ϕ(Q))
+P,Q∈Fix(ϕ) ⇒ d(P, Q) = d(P, ϕ(S)) + d(ϕ(S), Q)
+⇒ ϕ(S) liegt zwischen P und Q
+⇒ d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S))
+3(i) ⇒ ϕ(S) = S
+
+Proposition 4.2
+In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P0, Q, Q0 mit d(P, Q) = d(P
+0
+, Q0)
+höchstens zwei Isometrien mit ϕ(P) = P
 0 und ϕ(Q) = Q0
-70 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit
-ϕi(P) = P
-0 und ϕi(Q) = Q0 gibt.
-Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P) = P
-0
-, ϕi(Q) = Q0 mit i = 1, 2, 3.
-Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
-(Teil i) ∃R ∈ X \ P Q mit ϕ1(R) = ϕ2(R).
-(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX.
-Aus (Teil i) und (Teil ii) folgt, dass ϕ
-−1
-2
-◦ ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem
-Fall Fixpunkte sind.
-Nun zu den Beweisen der Teilaussagen:
-(Teil i) Sei R ∈ X \ P Q. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben
-Halbebene bzgl. P
-0Q0 = ϕi(P Q).
-O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene.
-Es gilt: d(P
-0
-, ϕ1(R)) = d(ϕ1(P), ϕ1(R))
-= d(P, R)
-= d(ϕ2(P), ϕ2(R))
-= d(P
-0
-, ϕ2(R))
-und analog d(Q0
-, ϕ1(R)) = d(Q0
-, ϕ2(R))
-(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR. Sei B ∈
-P Q \ { P, Q }. Dann ist ϕ(B) = B wegen Bemerkung 63.
-Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ
-Bem. 63 =====⇒ ϕ(A) = A.
-P B Q
-C
-R
-A
-Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ P Q \ { P, Q }, A /∈ P Q ∪ P R ∪ QR
-Ist R /∈ AB, so ist AB ∩ P R 6= ∅ oder AB ∈ RQ 6= ∅ nach Satz 4.1. Der Schnittpunkt
-C ist dann Fixpunkt von ϕ
-0 nach Bemerkung 63 ⇒ ϕ(A) = A.
-Bemerkung 64 (SWS-Kongruenzsatz)
-Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C
-0
-Dreiecke, für die gilt:
-(i) d(A, B) = d(A0
-, B0
-)
-(ii) ∠CAB ∼= ∠C
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit
+ϕi(P) = P
+0 und ϕi(Q) = Q0 gibt.
+Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P) = P
+0
+, ϕi(Q) = Q0 mit i = 1, 2, 3.
+Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
+(Teil i) ∃R ∈ X \ P Q mit ϕ1(R) = ϕ2(R).
+(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX.
+Aus (Teil i) und (Teil ii) folgt, dass ϕ
+−1
+2
+◦ ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem
+Fall Fixpunkte sind.
+Nun zu den Beweisen der Teilaussagen:
+(Teil i) Sei R ∈ X \ P Q. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben
+Halbebene bzgl. P
+0Q0 = ϕi(P Q).
+O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene.
+Es gilt: d(P
+0
+, ϕ1(R)) = d(ϕ1(P), ϕ1(R))
+= d(P, R)
+= d(ϕ2(P), ϕ2(R))
+= d(P
+0
+, ϕ2(R))
+und analog d(Q0, ϕ1(R)) = d(Q0, ϕ2(R))
+(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR. Sei B ∈
+P Q \ { P, Q }. Dann ist ϕ(B) = B wegen Bemerkung 63.
+Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ
+Bem. 63 =====⇒ ϕ(A) = A.
+P B Q
+C
+R
+A
+Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ P Q \ { P, Q }, A /∈ P Q ∪ P R ∪ QR
+Ist R /∈ AB, so ist AB ∩ P R 6= ∅ oder AB ∈ RQ 6= ∅ nach Satz 4.1. Der Schnittpunkt
+C ist dann Fixpunkt von ϕ
+0 nach Bemerkung 63 ⇒ ϕ(A) = A.
+Bemerkung 64 (SWS-Kongruenzsatz)
+Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C
+0
+Dreiecke, für die gilt:
+(i) d(A, B) = d(A0, B0)
+(ii) ∠CAB ∼= ∠C
+0A0B0
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+(iii) d(A, C) = d(A0, C0)
+Dann ist 4ABC kongruent zu 4A0B0C
+0
+.
+Beweis: Sei ϕ die Isometrie mit ϕ(A0
+) = A, ϕ(A0C
+0+) = AC+ und ϕ(A0B0+) = AB+. Diese
+Isometrie existiert wegen Punkt §4.
+⇒ C ∈ ϕ(A0C
+0+) und B ∈ ϕ(A0B0+).
+d(A0, C0) = d(ϕ(A0), ϕ(C
+0
+)) = d(A, ϕ(C
+0
+)) 3(i)==⇒ ϕ(C
+0
+) = C
+d(A0, B0) = d(ϕ(A0), ϕ(B0)) = d(A, ϕ(B0)) 3(i)==⇒ ϕ(B0) = B
+Also gilt insbesondere ϕ(4A0B0C
+0
+) = 4ABC. 
+Bemerkung 65 (WSW-Kongruenzsatz)
+Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C
+0
+Dreiecke, für die gilt:
+(i) d(A, B) = d(A0, B0)
+(ii) ∠CAB ∼= ∠C
 0A0B0
-71 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-(iii) d(A, C) = d(A0
-, C0
-)
-Dann ist 4ABC kongruent zu 4A0B0C
-0
-.
-Beweis: Sei ϕ die Isometrie mit ϕ(A0
-) = A, ϕ(A0C
-0+) = AC+ und ϕ(A0B0+) = AB+. Diese
-Isometrie existiert wegen Punkt §4.
-⇒ C ∈ ϕ(A0C
-0+) und B ∈ ϕ(A0B0+).
-d(A0
-, C0
-) = d(ϕ(A0
-), ϕ(C
-0
-)) = d(A, ϕ(C
-0
-)) 3(i)
-==⇒ ϕ(C
-0
-) = C
-d(A0
-, B0
-) = d(ϕ(A0
-), ϕ(B0
-)) = d(A, ϕ(B0
-)) 3(i)
-==⇒ ϕ(B0
-) = B
-Also gilt insbesondere ϕ(4A0B0C
-0
-) = 4ABC. 
-Bemerkung 65 (WSW-Kongruenzsatz)
-Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C
-0
-Dreiecke, für die gilt:
-(i) d(A, B) = d(A0
-, B0
-)
-(ii) ∠CAB ∼= ∠C
-0A0B0
-(iii) ∠ABC ∼= ∠A0B0C
-0
-Dann ist 4ABC kongruent zu 4A0B0C
-0
-.
-Beweis: Sei ϕ die Isometrie mit ϕ(A0
-) = A, ϕ(B0
-) = B und ϕ(C
-0
-) liegt in der selben Halbebene
-bzgl. AB wie C. Diese Isometrie existiert wegen §4.
-Aus ∠CAB = ∠C
-0A0B0 = ∠ϕ(C
-0
-)ϕ(A0
-)ϕ(B0
-) = ∠ϕ(C
-0
-)AB folgt, dass ϕ(C
-0
-) ∈ AC+.
-Analog folgt aus ∠ABC = ∠A0B0C
-0 = ∠ϕ(A0
-)ϕ(B0
-)ϕ(C
-0
-) = ∠ABϕ(C
-0
-), dass ϕ(C
-0
-) ∈
-BC+.
-Dann gilt ϕ(C
-0
-) ∈ AC ∩ BC = { C } ⇒ ϕ(C
-0
-) = C.
-Es gilt also ϕ(4A0B0C
-0
-) = 4ABC. 
-Definition 61
-a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P.
-Man schreibt: ∠R1P R2 bzw. ∠R2P R1
-2
-b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
-anderen abbildet.
-c) ∠R0
-1P
-0R0
-2 heißt kleiner als ∠R1P R2, wenn es eine Isometrie ϕ gibt, mit ϕ(P
-0
-) = P,
-ϕ(P
-0R
-0+
-1
-) = P R+
-1
-und ϕ(R0
-2
-) liegt in der gleichen Halbebene bzgl. P R1 wie R2 und in
-der gleichen Halbebene bzgl. P R2 wie R1
-d) Im Dreieck 4P QR gibt es Innenwinkel und Außenwinkel.
-Bemerkung 66
-In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel.
-Beweis: Zeige ∠P RQ < ∠RQP0
-.
-Sei M der Mittelpunkt der Strecke QR und P
-0 ∈ P Q+ \ P Q. Sei A ∈ MP − mit d(P, M) =
-d(M, A).
-2Für dieses Skript gilt: ∠R1P R2 = ∠R2P R1. Also sind insbesondere alle Winkel ≤ 180◦
+(iii) ∠ABC ∼= ∠A0B0C
+0
+Dann ist 4ABC kongruent zu 4A0B0C
+0
 .
-72 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-P R0
-1 R1
-R0
-2
-R2
-(a) ∠R
-0
-1P
-0R
-0
-2 ist kleiner als ∠R1P R2,
-vgl. Definition 61.c
-P
-Q R
-(b) Innenwinkel und Außenwin￾kel in 4P QR, vgl. Definiti￾on 61.d
-Abbildung 4.6: Situation aus Definition 61
-Q M
-A
-P
-R
-(a) Parallelogramm AQPR
-α
-β
-R
-Q P
-(b) Innen- und Außenwin￾kel von 4P QR
-Abbildung 4.7: Situation aus Bemerkung 66
-Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒ 4MRQ
-ist kongruent zu 4AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet,
-bildet R auf Q und P auf A ab.
-⇒ ∠MQA = ∠MRP = ∠QRP = ∠P RQ.
-Noch zu zeigen: ∠MQA < ∠RQP0
-, denn A liegt in der selben Halbebene bzgl. P Q wie M.
-Proposition 4.3 (Existenz der Parallelen)
-Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4.
-Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine
-Parallele h ∈ G mit P ∈ h und g ∩ h = ∅.
-Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P
-0 ∈ f mit
-d(P, P0
-) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält.
-73 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-Q
-h
-f
-g
-P
-Abbildung 4.8: Situation aus Proposition 4.3
-Annahme: ϕ(g) ∩ g 6= ∅
-⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g.
-Dann ist ∠RQP = ∠RQP0 < ∠RP P0 nach Bemerkung 66 und ∠RQP = ∠RP P0
-, weil
-ϕ(∠RQP) = ∠RP P0
-.
-⇒ Widerspruch
-⇒ ϕ(g) ∩ g = ∅ 
-Folgerung 4.4
-Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
-D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP +) = P R+, sodass ϕ(R) in der gleichen
-Halbebene bzgl. P Q liegt wie R.
-Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die
-beiden Halbgeraden bilden eine Gerade.
-Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
-Dreiecke mit drei 90◦
--Winkeln.
-Proposition 4.5
-In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
+Beweis: Sei ϕ die Isometrie mit ϕ(A0) = A, ϕ(B0) = B und ϕ(C
+0
+) liegt in der selben Halbebene
+bzgl. AB wie C. Diese Isometrie existiert wegen §4.
+Aus ∠CAB = ∠C
+0A0B0 = ∠ϕ(C0
+)ϕ(A0)ϕ(B0) = ∠ϕ(C
+0
+)AB folgt, dass ϕ(C
+0
+) ∈ AC+.
+Analog folgt aus ∠ABC = ∠A0B0C
+0 = ∠ϕ(A0
+)ϕ(B0)ϕ(C
+0
+) = ∠ABϕ(C
+0
+), dass ϕ(C
+0
+) ∈
+BC+.
+Dann gilt ϕ(C
+0
+) ∈ AC ∩ BC = { C } ⇒ ϕ(C
+0
+) = C.
+Es gilt also ϕ(4A0B0C
+0
+) = 4ABC. 
+Definition 61
+a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P.
+Man schreibt: ∠R1P R2 bzw. ∠R2P R1
+2
+b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
+anderen abbildet.
+c) ∠R0
+1P
+0R0
+2 heißt kleiner als ∠R1P R2, wenn es eine Isometrie ϕ gibt, mit ϕ(P
+0
+) = P,
+ϕ(P
+0R
+0+
+1
+) = P R+
+1
+und ϕ(R0
+2
+) liegt in der gleichen Halbebene bzgl. P R1 wie R2 und in
+der gleichen Halbebene bzgl. P R2 wie R1
+d) Im Dreieck 4P QR gibt es Innenwinkel und Außenwinkel.
+Bemerkung 66
+In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel.
+Beweis: Zeige ∠P RQ < ∠RQP0.
+Sei M der Mittelpunkt der Strecke QR und P
+0 ∈ P Q+ \ P Q. Sei A ∈ MP − mit d(P, M) =
+d(M, A).
+2Für dieses Skript gilt: ∠R1P R2 = ∠R2P R1. Also sind insbesondere alle Winkel ≤ 180◦
+.
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+P R0
+1 R1
+R0
+2
+R2
+(a) ∠R
+0
+1P
+0R0
+2 ist kleiner als ∠R1P R2,
+vgl. Definition 61.c
+P
+Q R
+(b) Innenwinkel und Außenwinkel in 4P QR, vgl. Definition 61.d
+Abbildung 4.6: Situation aus Definition 61
+Q M
+A
+P
+R
+(a) Parallelogramm AQPR
+α
+β
+R
+Q P
+(b) Innen- und Außenwinkel von 4P QR
+Abbildung 4.7: Situation aus Bemerkung 66
+Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒ 4MRQ
+ist kongruent zu 4AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet,
+bildet R auf Q und P auf A ab.
+⇒ ∠MQA = ∠MRP = ∠QRP = ∠P RQ.
+Noch zu zeigen: ∠MQA < ∠RQP0, denn A liegt in der selben Halbebene bzgl. P Q wie M.
+Proposition 4.3 (Existenz der Parallelen)
+Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4.
+Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine
+Parallele h ∈ G mit P ∈ h und g ∩ h = ∅.
+Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P
+0 ∈ f mit
+d(P, P0) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält.
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+Q
+h
+f
+g
+P
+Abbildung 4.8: Situation aus Proposition 4.3
+Annahme: ϕ(g) ∩ g 6= ∅
+⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g.
+Dann ist ∠RQP = ∠RQP0 < ∠RP P0 nach Bemerkung 66 und ∠RQP = ∠RP P0, weil
+ϕ(∠RQP) = ∠RP P0.
+⇒ Widerspruch
+⇒ ϕ(g) ∩ g = ∅ 
+Folgerung 4.4
+Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
+D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP +) = P R+, sodass ϕ(R) in der gleichen
+Halbebene bzgl. P Q liegt wie R.
+Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die
+beiden Halbgeraden bilden eine Gerade.
+Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
+Dreiecke mit drei 90◦-Winkeln.
+Proposition 4.5
+In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
 Innenwinkel ≤ π.
-74 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-Sei im Folgenden „IWS“ die „Innenwinkelsumme“.
-Beweis: Sei 4 ein Dreieck mit IWS(4) = π + ε
-α
-β
-γ
-P
-(a) Summe der Winkel α, β und γ
-α1
-α2 β
-γ
-M
-A B
-C A0
-α
-(b) Situation aus Proposition 4.5
-Abbildung 4.10: Situation aus Proposition 4.5
-Sei α ein Innenwinkel von 4.
-Beh.: Es gibt ein Dreieck 40 mit IWS(40
-) = IWS(4) und einem Innenwinkel α
-0 ≤
-α
-2
-.
-Dann gibt es für jedes n ein 4n mit IWS(4n) = IWS(4) und Innenwinkel α
-0 ≤
-α
-2n . Für
-α
-2n < ε ist dann die Summe der beiden Innenwinkel um 4n größer als π ⇒ Widerspruch
-zu Folgerung 4.4.
-Beweis: Es seien A, B, C ∈ X und 4 das Dreieck mit den Eckpunkten A, B, C und α sei
-der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C.
-Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM.
-Sei weiter A0 ∈ MA− mit d(A0
-, M) = d(A, M).
-Die Situation ist in Abbildung 4.10b skizziert.
-⇒ 4(MA0C) und 4(MAB) sind kongruent. ⇒ ∠ABM = ∠A0CM und ∠MA0C =
-∠MAB. ⇒ α+β +γ = IWS(4ABC) = IWS(4AA0C) und α1+α2 = α, also o. B. d. A.
-α1 ≤
-α
-2
-Bemerkung 67
-In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
-α
-0
-α
-00
-α β
-β
-0
-γ
-A B
-C
-g
-Abbildung 4.11: Situation aus Bemerkung 67
-Beweis: Sei g eine Parallele von AB durch C.
-• Es gilt α
-0 = α wegen Proposition 4.3.
-• Es gilt β
-0 = β wegen Proposition 4.3.
-• Es gilt α
-00 = α
-0 wegen Aufgabe 8.
-75 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-⇒ IWS(4ABC) = γ + α
-00 + β
-0 = π
-Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
-π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
-4.2 Weitere Eigenschaften einer euklidischen Ebene
-Satz 4.6 (Strahlensatz)
-In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich.
-x
-y
-−1 0 1 2 3 4
-0
-1
-2
-3
-z
-x
-λ
-2
-z
-λ
-2x
-Abbildung 4.12: Strahlensatz
-Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar.
-A B0
-C
-0
-B
-C
-c
-b a
-c
-0
-b
-0
-a
-0
-Abbildung 4.13: Die Dreiecke 4ABC und 4AB0C
-0
-sind ähnlich.
-4.2.1 Flächeninhalt
-Definition 62
-„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+Sei im Folgenden „IWS“ die „Innenwinkelsumme“.
+Beweis: Sei 4 ein Dreieck mit IWS(4) = π + ε
+α
+β
+γ
+P
+(a) Summe der Winkel α, β und γ
+α1
+α2 β
+γ
+M
+A B
+C A0
+α
+(b) Situation aus Proposition 4.5
+Abbildung 4.10: Situation aus Proposition 4.5
+Sei α ein Innenwinkel von 4.
+Beh.: Es gibt ein Dreieck 40 mit IWS(40) = IWS(4) und einem Innenwinkel α
+0 ≤
+α
+2
+.
+Dann gibt es für jedes n ein 4n mit IWS(4n) = IWS(4) und Innenwinkel α
+0 ≤
+α
+2n . Für
+α
+2n < ε ist dann die Summe der beiden Innenwinkel um 4n größer als π ⇒ Widerspruch
+zu Folgerung 4.4.
+Beweis: Es seien A, B, C ∈ X und 4 das Dreieck mit den Eckpunkten A, B, C und α sei
+der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C.
+Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM.
+Sei weiter A0 ∈ MA− mit d(A0, M) = d(A, M).
+Die Situation ist in Abbildung 4.10b skizziert.
+⇒ 4(MA0C) und 4(MAB) sind kongruent. ⇒ ∠ABM = ∠A0CM und ∠MA0C =
+∠MAB. ⇒ α+β +γ = IWS(4ABC) = IWS(4AA0C) und α1+α2 = α, also o. B. d. A.
+α1 ≤
+α
+2
+Bemerkung 67
+In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
+α
+0
+α
+00
+α β
+β
+0
+γ
+A B
+C
+g
+Abbildung 4.11: Situation aus Bemerkung 67
+Beweis: Sei g eine Parallele von AB durch C.
+• Es gilt α
+0 = α wegen Proposition 4.3.
+• Es gilt β
+0 = β wegen Proposition 4.3.
+• Es gilt α
+00 = α0 wegen Aufgabe 8.
+ 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
+⇒ IWS(4ABC) = γ + α
+00 + β0 = π
+Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
+π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
+4.2 Weitere Eigenschaften einer euklidischen Ebene
+Satz 4.6 (Strahlensatz)
+In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich.
+x
+y
+−1 0 1 2 3 4
+0
+1
+2
+3
+z
+x
+λ
+2
+z
+λ
+2x
+Abbildung 4.12: Strahlensatz
+Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar.
+A B0
+C
+0
+B
+C
+c
+b a
+c
+0
+b
+0
+a
+0
+Abbildung 4.13: Die Dreiecke 4ABC und 4AB0C
+0
+sind ähnlich.
+4.2.1 Flächeninhalt
+Definition 62
+„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in
 kongruente Dreiecke zerlegen lassen.
-76 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Drei￾ecke
-Abbildung 4.14: Flächengleichheit
-Der Flächeninhalt eines Dreiecks ist 1/2 · Grundseite · Höhe.
-A B
-C
-LC
-hc
-c
-(a) 1/2 · |AB| · |hc|
-·
-A B
-C
-LA
-ha
-c
-(b) 1/2 · |BC| · |ha|
-Abbildung 4.15: Flächenberechnung im Dreieck
-Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
-α
-α
-γ
-γ
-A B
-C
-LA
-LC
-Abbildung 4.16: 4ABLa und 4CLCB sind ähnlich, weil IWS = π
-Strahlensatz =======⇒ a
-hc
-=
-c
-ha → a · ha = c · hc
-Satz 4.7 (Satz des Pythagoras)
-Im rechtwinkligen Dreieck gilt a
-2 + b
-2 = c
-2
-, wobei c die Hypotenuse und a, b die beiden
-Katheten sind.
-Beweis: (a + b) · (a + b) = a
-2 + 2ab + b
-2 = c
-2 + 4 · (
-1
-2
+ 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
+(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Dreiecke
+Abbildung 4.14: Flächengleichheit
+Der Flächeninhalt eines Dreiecks ist 1/2 · Grundseite · Höhe.
+A B
+C
+LC
+hc
+c
+(a) 1/2 · |AB| · |hc|
+·
+A B
+C
+LA
+ha
+c
+(b) 1/2 · |BC| · |ha|
+Abbildung 4.15: Flächenberechnung im Dreieck
+Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
+α
+α
+γ
+γ
+A B
+C
+LA
+LC
+Abbildung 4.16: 4ABLa und 4CLCB sind ähnlich, weil IWS = π
+Strahlensatz =======⇒ a
+hc
+=
+c
+ha → a · ha = c · hc
+Satz 4.7 (Satz des Pythagoras)
+Im rechtwinkligen Dreieck gilt a
+2 + b2 = c2
+, wobei c die Hypotenuse und a, b die beiden
+Katheten sind.
+Beweis: (a + b) · (a + b) = a
+2 + 2ab + b2 = c2 + 4 · (
+1
+2
 · a · b)
-77 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-c
-b a
-A B
-C
-·
-(a) a, b sind Katheten und c ist die Hypo￾tenuse
-b a
-b
-a
-a b
-b
-a
-·
-· ·
-·
-γ
-(b) Beweisskizze
-Abbildung 4.17: Satz des Pythagoras
-Satz 4.8
-Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d, G), nämlich X = R
-2
-,
-d = euklidischer Abstand, G = Menge der üblichen Geraden.
-Beweis:
-(i) (R
-2
-, dEuklid) ist offensichtlich eine euklidische Ebene.
-(ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0
-im rechten Winkel schneiden.
-Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl.
-Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2.
-Sei xP := d(PX, 0) und yP := d(PY , 0).
-In Abbildung 4.19 wurde die Situation skizziert.
-Sei h : X → R
-2
-eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem
-Quadranten definiert, in dem P liegt, d. h.
-∀Q ∈ X mit P Q ∩ g1 = ∅ = P Q ∩ g2
-Fortsetzung auf ganz X durch konsistente Vorzeichenwahl.
-Im Folgenden werden zwei Aussagen gezeigt:
-(i) h ist surjektiv
-(ii) h ist eine Isometrie
-Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist.
+ 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
+c
+b a
+A B
+C
+·
+(a) a, b sind Katheten und c ist die Hypotenuse
+b a
+b
+a
+a b
+b
+a
+·
+· ·
+·
+γ
+(b) Beweisskizze
+Abbildung 4.17: Satz des Pythagoras
+Satz 4.8
+Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d, G), nämlich X = R
+2
+,
+d = euklidischer Abstand, G = Menge der üblichen Geraden.
+Beweis:
+(i) (R
+2
+, dEuklid) ist offensichtlich eine euklidische Ebene.
+(ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0
+im rechten Winkel schneiden.
+Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl.
+Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2.
+Sei xP := d(PX, 0) und yP := d(PY , 0).
+In Abbildung 4.19 wurde die Situation skizziert.
+Sei h : X → R
+2
+eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem
+Quadranten definiert, in dem P liegt, d. h.
+∀Q ∈ X mit P Q ∩ g1 = ∅ = P Q ∩ g2
+Fortsetzung auf ganz X durch konsistente Vorzeichenwahl.
+Im Folgenden werden zwei Aussagen gezeigt:
+(i) h ist surjektiv
+(ii) h ist eine Isometrie
+Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist.
 Nun zu den Beweisen der Teilaussagen:
-78 4.3. HYPERBOLISCHE GEOMETRIE
-·
-g1
-g2
-P
-X
-(a) Schritt 1
-·
-g1
-g2
-xP
-yP
-P
-0 PX
-PY
-X
-(b) Schritt 2
-Abbildung 4.18: Beweis zu Satz 4.8
-(i) Sei (x, y) ∈ R
-2
-, z. B. x ≥ 0, y ≥ 0. Sei P
-0 ∈ g1 mit d(0, P0
-) = x und P
-0 auf der
-gleichen Seite von g2 wie P.
-g1
-g2
-xP
-yP
-P
-Q
-0
-R
-X
-Abbildung 4.19: Beweis zu Satz 4.8
-(ii) Zu Zeigen: d(P, Q) = d(h(P), h(Q))
-d(P, Q)
-2 Pythagoras
-= d(P, R)
-2 + d(R, Q)
-2 = (yQ − yP )
-2 + (xQ − xP )
-2
-.
-h(Q) = (xQ, yQ)
-4.3 Hyperbolische Geometrie
-Definition 63
-Sei
-H := { z ∈ C | =(z) > 0 } =
-
-(x, y) ∈ R
-2
-
- y > 0
-	
-79 4.3. HYPERBOLISCHE GEOMETRIE
-die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪ G2 mit
-G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z − m| = r } }
-G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : <(z) = x } }
-Die Elemente aus G heißen hyperbolische Geraden.
-Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
-Die hyperbolischen Geraden erfüllen. . .
-a) . . . die Inzidenzaxiome §1
-b) . . . das Anordnungsaxiom §3 (ii)
-c) . . . nicht das Parallelenaxiom §5
-Beweis:
-a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
-Gegeben z1, z2 ∈ H
-Existenz:
-Fall 1 <(z1) = <(z2)
-⇒ z1 und z2 liegen auf
-g = { z ∈ C | <(z) = <(z1) ∧ H }
-Siehe Abbildung 4.20a.
-Fall 2 <(z1) 6= <(z2)
-Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech￾te zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten
-zu z1 und z2 sind gleich weit von z1 und z2 entfernt. Daher ist der Schnittpunkt mit
-der x-Achse der Mittelpunkt eines Kreises durch z1 und z2 (vgl. Abbildung 4.20b)
-x
-y
-−1 0 1 2 3 4 5
-0
-1
-2
-3
-4
-Z1
-Z2
-<(Z1)
-(a) Fall 1
-x
-y
-−1 0 1 2 3 4 5
-0
-1
-2
-3
-4
-Z1
-Z2
-(b) Fall 2
-Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
-Geraden
+ 4.3. HYPERBOLISCHE GEOMETRIE
+·
+g1
+g2
+P
+X
+(a) Schritt 1
+·
+g1
+g2
+xP
+yP
+P
+0 PX
+PY
+X
+(b) Schritt 2
+Abbildung 4.18: Beweis zu Satz 4.8
+(i) Sei (x, y) ∈ R
+2
+, z. B. x ≥ 0, y ≥ 0. Sei P
+0 ∈ g1 mit d(0, P0
+) = x und P
+0 auf der
+gleichen Seite von g2 wie P.
+g1
+g2
+xP
+yP
+P
+Q
+0
+R
+X
+Abbildung 4.19: Beweis zu Satz 4.8
+(ii) Zu Zeigen: d(P, Q) = d(h(P), h(Q))
+d(P, Q)
+2 Pythagoras
+= d(P, R)
+2 + d(R, Q)2 = (yQ − yP )2 + (xQ − xP )2
+.
+h(Q) = (xQ, yQ)
+4.3 Hyperbolische Geometrie
+Definition 63
+Sei
+H := { z ∈ C | =(z) > 0 } =
+
+(x, y) ∈ R
+2
+
+ y > 0
+
+ 4.3. HYPERBOLISCHE GEOMETRIE
+die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪ G2 mit
+G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z − m| = r } }
+G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : <(z) = x } }
+Die Elemente aus G heißen hyperbolische Geraden.
+Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
+Die hyperbolischen Geraden erfüllen. . .
+a) . . . die Inzidenzaxiome §1
+b) . . . das Anordnungsaxiom §3 (ii)
+c) . . . nicht das Parallelenaxiom §5
+Beweis:
+a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
+Gegeben z1, z2 ∈ H
+Existenz:
+Fall 1 <(z1) = <(z2)
+⇒ z1 und z2 liegen auf
+g = { z ∈ C | <(z) = <(z1) ∧ H }
+Siehe Abbildung 4.20a.
+Fall 2 <(z1) 6= <(z2)
+Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten
+zu z1 und z2 sind gleich weit von z1 und z2 entfernt. Daher ist der Schnittpunkt mit
+der x-Achse der Mittelpunkt eines Kreises durch z1 und z2 (vgl. Abbildung 4.20b)
+x
+y
+−1 0 1 2 3 4 5
+0
+1
+2
+3
+4
+Z1
+Z2
+<(Z1)
+(a) Fall 1
+x
+y
+−1 0 1 2 3 4 5
+0
+1
+2
+3
+4
+Z1
+Z2
+(b) Fall 2
+Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
+Geraden
 b) Sei g ∈ G1 ∪˙ G2 eine hyperbolische Gerade.
-80 4.3. HYPERBOLISCHE GEOMETRIE
-Es existieren disjunkte Zerlegungen von H \ g:
-Fall 1: g = { z ∈ H k z − m| = r } ∈ G1
-Dann gilt:
-H = { z ∈ H k z − m| < r }
-| {z }
-=:H1 (Kreisinneres)
-∪ { ˙ z ∈ H k z − m| > r }
-| {z }
-=:H2 (Kreisäußeres)
-Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer.
-Fall 2: g = { z ∈ H | <z = x } ∈ G2
-Die disjunkte Zerlegung ist:
-H = { z ∈ H | <(z) < x }
-| {z }
-=:H1 (Links)
-∪ { ˙ z ∈ H | <(z) > x }
-| {z }
-=:H2 (Rechts)
-Zu zeigen: ∀A ∈ Hi
-, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j
-„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g 6= ∅
-Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand
-von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der
-größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige
-Abbildung f : R → R>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g 6= ∅
-„⇒“: A ∈ Hi
-, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g 6= ∅ ⇒ i 6= j
-Sei h die Gerade, die durch A und B geht.
-Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen
-unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt
-schneiden. Sei C dieser Punkt.
-Aus A, B /∈ g folgt: C 6= A und C 6= B. Also liegt C zwischen A und B. Daraus folgt,
-dass A und B bzgl. g in verschiedenen Halbebenen liegen.
-c) Siehe Abbildung 4.21.
-x
-y
-−5 −4 −3 −2 −1 0 1 2 3 4 5 6
-0
-1
-2
-3
-4
-5
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Es existieren disjunkte Zerlegungen von H \ g:
+Fall 1: g = { z ∈ H k z − m| = r } ∈ G1
+Dann gilt:
+H = { z ∈ H k z − m| < r }
+| {z }
+=:H1 (Kreisinneres)
+∪ { ˙ z ∈ H k z − m| > r }
+| {z }
+=:H2 (Kreisäußeres)
+Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer.
+Fall 2: g = { z ∈ H | <z = x } ∈ G2
+Die disjunkte Zerlegung ist:
+H = { z ∈ H | <(z) < x }
+| {z }
+=:H1 (Links)
+∪ { ˙ z ∈ H | <(z) > x }
+| {z }
+=:H2 (Rechts)
+Zu zeigen: ∀A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j
+„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g 6= ∅
+Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand
+von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der
+größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige
+Abbildung f : R → R>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g 6= ∅
+„⇒“: A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g 6= ∅ ⇒ i 6= j
+Sei h die Gerade, die durch A und B geht.
+Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen
+unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt
+schneiden. Sei C dieser Punkt.
+Aus A, B /∈ g folgt: C 6= A und C 6= B. Also liegt C zwischen A und B. Daraus folgt,
+dass A und B bzgl. g in verschiedenen Halbebenen liegen.
+c) Siehe Abbildung 4.21.
+x
+y
+−5 −4 −3 −2 −1 0 1 2 3 4 5 6
+0
+1
+2
+3
+4
+5
 Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
-81 4.3. HYPERBOLISCHE GEOMETRIE
-Definition 64
-Es seien a, b, c, d ∈ R mit ad − bc 6= 0 und σ : C → C eine Abbildung definiert durch
-σ(z) := az + b
-cz + d
-σ heißt Möbiustransformation.
-Proposition 4.9
-a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation
-σ(z) := 
-a b
-c d
-◦ z :=
-az + b
-cz + d
-b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H.
-c) PSL2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h.
-zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1,
-σ(x∞) = ∞.
-d) SL2(R) wird von den Matrizen
-
-λ 0
-0 λ
-−1
-
-| {z }
-=:Aλ
-,
-
-1 t
-0 1
-| {z }
-=:Bt
-und 
-0 1
-−1 0
-| {z }
-=:C
-mit t, λ ∈ R
-×
-erzeugt.
-e) PSL2(R) operiert auf G.
-Beweis:
-a) Sei z = x + iy ∈ H, d. h. y > 0 und σ =
-
-a b
-c d
-∈ SL2(R)
-⇒ σ(z) = a(x + iy) + b
-c(x + iy) + d
-=
-(ax + b) + iay
-(cx + d) + icy
-·
-(cx + d) − icy
-(cx + d) − icy
-=
-(ax + b)(cx + d) + aycy
-(cx + d)
-2 + (cy)
-2
-+ i
-ay(cx + d) − (ax + b)cy
-(cx + d)
-2 + (cy)
-2
-=
-axcx + axd + bcx + bd + aycy
-(cx + d)
-2 + (cy)
-2
-+ i
-(ad − bc)y
-(cx + d)
-2 + (cy)
-2
-SL2(R)
-=
-ac(x
-2 + y
-2
-) + adx + bcx + bd
-(cx + d)
-2 + (cy)
-2
-+ i
-y
-(cx + d)
-2 + (cy)
-2
-⇒ =(σ(z)) = y
-(cx+d)
-2+(cy)
-2 > 0
-Die Abbildung bildet also nach H ab. Außerdem gilt:
-
-1 0
-0 1
-◦ z =
-x + iy
-1
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Definition 64
+Es seien a, b, c, d ∈ R mit ad − bc 6= 0 und σ : C → C eine Abbildung definiert durch
+σ(z) := az + b
+cz + d
+σ heißt Möbiustransformation.
+Proposition 4.9
+a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation
+σ(z) := 
+a b
+c d
+◦ z :=
+az + b
+cz + d
+b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H.
+c) PSL2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h.
+zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1,
+σ(x∞) = ∞.
+d) SL2(R) wird von den Matrizen
+
+λ 0
+0 λ
+−1
+
+| {z }
+=:Aλ
+,
+
+1 t
+0 1
+| {z }
+=:Bt
+und 
+0 1
+−1 0
+| {z }
+=:C
+mit t, λ ∈ R
+×
+erzeugt.
+e) PSL2(R) operiert auf G.
+Beweis:
+a) Sei z = x + iy ∈ H, d. h. y > 0 und σ =
+
+a b
+c d
+∈ SL2(R)
+⇒ σ(z) = a(x + iy) + b
+c(x + iy) + d
+=
+(ax + b) + iay
+(cx + d) + icy
+·
+(cx + d) − icy
+(cx + d) − icy
+=
+(ax + b)(cx + d) + aycy
+(cx + d)
+2 + (cy)2
++ i
+ay(cx + d) − (ax + b)cy
+(cx + d)
+2 + (cy)2
+=
+axcx + axd + bcx + bd + aycy
+(cx + d)
+2 + (cy)2
++ i
+(ad − bc)y
+(cx + d)
+2 + (cy)2
+SL2(R)
+=
+ac(x
+2 + y2
+) + adx + bcx + bd
+(cx + d)
+2 + (cy)2
++ i
+y
+(cx + d)
+2 + (cy)2
+⇒ =(σ(z)) = y
+(cx+d)
+2+(cy)2 > 0
+Die Abbildung bildet also nach H ab. Außerdem gilt:
+
+1 0
+0 1
+◦ z =
+x + iy
+1
 = x + iy = z
-82 4.3. HYPERBOLISCHE GEOMETRIE
-und
-
-a b
-c d
-◦
-a
-0
-b
-0
-c
-0 d
-0
-
-◦ z
-
-=
-
-a b
-c d
-◦
-a
-0
-z + b
-0
-c
-0z + d
-0
-=
-a
-a
-0z+b
-0
-c
-0z+d
-0 + b
-c
-a
-0z+b
-0
-c
-0z+d
-0 + d
-=
-a(a
-0z+b
-0
-)+b(c
-0z+d
-0
-)
-c
-0z+d
-0
-c(a
-0z+b
-0)+d(c
-0z+d
-0)
-c
-0z+d
-0
-=
-a(a
-0
-z + b
-0
-) + b(c
-0
-z + d
-0
-)
-c(a
-0z + b
-0) + d(c
-0z + d
-0)
-=
-(aa0 + bc0
-)z + ab0 + bd0
-(ca0 + db0)z + cb0 + dd0
-=
-
-aa0 + bc0 ab0 + bd0
-ca0 + db0
-cb0 + dd0
-
-◦ z
-=
-a b
-c d
-·
-
-a
-0
-b
-0
-c
-0 d
-0
- ◦ z
-b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H.
-c) Ansatz: σ =
-
-a b
-c d
-σ(x0) = ax0+b
-cx0+d
-!= 0 ⇒ ax0 + b = 0 ⇒ b = −ax0
-σ(x∞) = ∞ ⇒ cx∞ + d = 0 ⇒ d = −cx∞
-σ(x1) = 1 ⇒ ax1 + b = cx1 + d
-a(x1 − x0) = c(x1 − x∞) ⇒ c = a
-x1−x0
-x1−x∞
-⇒ −a
-2
-· x∞
-x1−x0
-x1−x∞
-+ a
-2x0
-x1−x0
-x1−x∞
-= 1
-⇒ a
-2 x1−x0
-x0−x∞
-(x0 − x∞) = 1 ⇒ a
-2 =
-x1−x∞
-(x1−x∞)(x1−x0)
-d) Es gilt:
-A
-−1
-λ = A1
-λ
-B
-−1
-t = B−t
-C
-−1 = C
-3
-Daher genügt es zu zeigen, dass man mit Aλ, Bt und C alle Matrizen aus SL2(R)
-erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
-Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren.
-Sei also
-M =
-
-a b
-c d
-∈ SL2(R)
-beliebig.
-Fall 1: a = 0
-Da M ∈ SL2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c 6= 0. Es
-folgt:
-
-0 1
-−1 0
-·
-
-a b
-c d
-=
-
-c d
-−a −b
+ 4.3. HYPERBOLISCHE GEOMETRIE
+und
+
+a b
+c d
+◦
+a
+0
+b
+0
+c
+0 d0
+
+◦ z
+
+=
+
+a b
+c d
+◦
+a
+0
+z + b
+0
+c
+0z + d0
+=
+a
+a
+0z+b0
+c
+0z+d0 + b
+c
+a
+0z+b0
+c
+0z+d0 + d
+=
+a(a
+0z+b0
+)+b(c
+0z+d0
+)
+c
+0z+d0
+c(a
+0z+b0)+d(c0z+d0)
+c
+0z+d0
+=
+a(a
+0
+z + b
+0
+) + b(c
+0
+z + d
+0
+)
+c(a
+0z + b0) + d(c0z + d0)
+=
+(aa0 + bc0)z + ab0 + bd0
+(ca0 + db0)z + cb0 + dd0
+=
+
+aa0 + bc0 ab0 + bd0
+ca0 + db0cb0 + dd0
+
+◦ z
+=
+a b
+c d
+·
+
+a
+0
+b
+0
+c
+0 d0
+ ◦ z
+b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H.
+c) Ansatz: σ =
+
+a b
+c d
+σ(x0) = ax0+b
+cx0+d
+!= 0 ⇒ ax0 + b = 0 ⇒ b = −ax0
+σ(x∞) = ∞ ⇒ cx∞ + d = 0 ⇒ d = −cx∞
+σ(x1) = 1 ⇒ ax1 + b = cx1 + d
+a(x1 − x0) = c(x1 − x∞) ⇒ c = a
+x1−x0
+x1−x∞
+⇒ −a
+2
+· x∞
+x1−x0
+x1−x∞
++ a
+2x0
+x1−x0
+x1−x∞
+= 1
+⇒ a
+2 x1−x0
+x0−x∞
+(x0 − x∞) = 1 ⇒ a
+2 =
+x1−x∞
+(x1−x∞)(x1−x0)
+d) Es gilt:
+A
+−1
+λ = A1
+λ
+B
+−1
+t = B−t
+C
+−1 = C3
+Daher genügt es zu zeigen, dass man mit Aλ, Bt und C alle Matrizen aus SL2(R)
+erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
+Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren.
+Sei also
+M =
+
+a b
+c d
+∈ SL2(R)
+beliebig.
+Fall 1: a = 0
+Da M ∈ SL2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c 6= 0. Es
+folgt:
+
+0 1
+−1 0
+·
+
+a b
+c d
+=
+
+c d
+−a −b
 
-83 4.3. HYPERBOLISCHE GEOMETRIE
-Gehe zu Fall 2.
-Fall 2: a 6= 0
-Nun wird in M durch M · A1
-a
-an der Stelle von a eine 1 erzeugt:
-
-a b
-c d
-·
-1
-a
-0
-0 a
-
-=
-
-1 ab
-c
-a
-ad
-Gehe zu Fall 3.
-Fall 3: a = 1
-
-1 b
-c d
-·
-
-1 −b
-0 1 
-=
-
-1 0
-c d − bc
-Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M2,2 = 1.
-Gehe zu Fall 4.
-Fall 4: a = 1, b = 0, d = 1
-A−1CBcC
-
-1 0
-c 1
-
-=
-
-1 0
-0 1
-Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R. 
-e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
-• σ =
-
-λ 0
-0 λ
-−1
-
-, also σ(z) = λ
-2
-z. Daraus ergeben sich die Situationen, die in
-Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
-x
-y
-−1 0 1 2 3 4 5 6 7
-0
-1
-2
-3
-m λ
-2m
-m + ir
-λ
-2m + iλ2r
-m + 1
-(a) Fall 1
-x
-y
-−1 0 1 2 3 4
-0
-1
-2
-3
-z
-x
-λ
-2
-z
-λ
-2x
-(b) Fall 2 (Strahlensatz)
-Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
-• Offensichtlich gilt die Aussage für σ =
-
-1 a
-0 1
-• Sei nun σ =
-
-0 1
-−1 0
-, also σ(z) = −
-1
-z
-Bemerkung 69
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Gehe zu Fall 2.
+Fall 2: a 6= 0
+Nun wird in M durch M · A1
+a
+an der Stelle von a eine 1 erzeugt:
+
+a b
+c d
+·
+1
+a
+0
+0 a
+
+=
+
+1 ab
+c
+a
+ad
+Gehe zu Fall 3.
+Fall 3: a = 1
+
+1 b
+c d
+·
+
+1 −b
+0 1 
+=
+
+1 0
+c d − bc
+Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M2,2 = 1.
+Gehe zu Fall 4.
+Fall 4: a = 1, b = 0, d = 1
+A−1CBcC
+
+1 0
+c 1
+
+=
+
+1 0
+0 1
+Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R. 
+e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
+• σ =
+
+λ 0
+0 λ
+−1
+
+, also σ(z) = λ
+2
+z. Daraus ergeben sich die Situationen, die in
+Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
+x
+y
+−1 0 1 2 3 4 5 6 7
+0
+1
+2
+3
+m λ
+2m
+m + ir
+λ
+2m + iλ2r
+m + 1
+(a) Fall 1
+x
+y
+−1 0 1 2 3 4
+0
+1
+2
+3
+z
+x
+λ
+2
+z
+λ
+2x
+(b) Fall 2 (Strahlensatz)
+Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
+• Offensichtlich gilt die Aussage für σ =
+
+1 a
+0 1
+• Sei nun σ =
+
+0 1
+−1 0
+, also σ(z) = −
+1
+z
+Bemerkung 69
 Zu hyperbolischen Geraden g1, g2 gibt es σ ∈ PSL2(R) mit σ(g1) = g2.
-84 4.3. HYPERBOLISCHE GEOMETRIE
-·
-x
-y
-−1 0 1
-0
-1
-z = r · e
-iϕ
-1
-z =
-1
-r
-· e
-iϕ
-Abbildung 4.23: Inversion am Kreis
-Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert
-σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
-Definition 65
-Seien z1, z2, z3, z4 ∈ C paarweise verschieden.
-Dann heißt
-DV(z1, z2, z3, z4) :=
-z1−z4
-z1−z2
-z3−z4
-z3−z2
-=
-(z1 − z4) · (z3 − z2)
-(z1 − z2) · (z3 − z4)
-Doppelverhältnis von z1, . . . , z4.
-Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
-a) DV(z1, . . . , z4) ∈ C \ { 0, 1 }
-b) DV(z1, z4, z3, z2) = 1
-DV(z1,z2,z3,z4)
-c) DV(z3, z2, z1, z4) = 1
-DV(z1,z2,z3,z4)
-d) DV ist auch wohldefiniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind.
-e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen).
-f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ { ∞ } ist
-DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
-und für σ(z) = 1
-z
-gilt
-DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
-g) DV(z1, z2, z3, z4) ∈ R ∪ { ∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden.
-Beweis:
-a) DV(z1, . . . , z4) 6= 0, da zi paarweise verschieden
-DV(z1, . . . , z4) 6= 1, da:
-Annahme: DV(z1, . . . , z4) = 1
+ 4.3. HYPERBOLISCHE GEOMETRIE
+·
+x
+y
+−1 0 1
+0
+1
+z = r · e
+iϕ
+1
+z =
+1
+r
+· e
+iϕ
+Abbildung 4.23: Inversion am Kreis
+Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert
+σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
+Definition 65
+Seien z1, z2, z3, z4 ∈ C paarweise verschieden.
+Dann heißt
+DV(z1, z2, z3, z4) :=
+z1−z4
+z1−z2
+z3−z4
+z3−z2
+=
+(z1 − z4) · (z3 − z2)
+(z1 − z2) · (z3 − z4)
+Doppelverhältnis von z1, . . . , z4.
+Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
+a) DV(z1, . . . , z4) ∈ C \ { 0, 1 }
+b) DV(z1, z4, z3, z2) = 1
+DV(z1,z2,z3,z4)
+c) DV(z3, z2, z1, z4) = 1
+DV(z1,z2,z3,z4)
+d) DV ist auch wohldefiniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind.
+e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen).
+f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ { ∞ } ist
+DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
+und für σ(z) = 1
+z
+gilt
+DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
+g) DV(z1, z2, z3, z4) ∈ R ∪ { ∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden.
+Beweis:
+a) DV(z1, . . . , z4) 6= 0, da zi paarweise verschieden
+DV(z1, . . . , z4) 6= 1, da:
+Annahme: DV(z1, . . . , z4) = 1
 ⇔ (z1 − z2)(z3 − z4) = (z1 − z4)(z3 − z2)
-85 4.3. HYPERBOLISCHE GEOMETRIE
-⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4
-⇔ z2z3 + z1z4 = z3z4 + z1z2
-⇔ z2z3 − z3z4 = z1z2 − z1z4
-⇔ z3(z2 − z4) = z1(z2 − z4)
-⇔ z3 = z1 oder z2 = z4
-Alle zi sind paarweise verschieden ⇒ Widerspruch 
-b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4)
-(z1−z4)·(z3−z2) =
-1
-DV(z1,z2,z3,z4)
-c) DV(z3, z2, z1, z4) = (z3−z4)·(z1−z2)
-(z3−z2)·(z1−z4) =
-1
-DV(z1,z2,z3,z4)
-d) Zwei der zi dürfen gleich sein, da:
-Fall 1 z1 = z4 oder z3 = z2
-In diesem Fall ist DV(z1, . . . , z4) = 0
-Fall 2 z1 = z2 oder z3 = z4
-Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞ gilt.
-Fall 3 z1 = z3 oder z2 = z4
-Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1.
-Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞
-e) DV(0, 1, ∞, z4) = (0−z4)·(∞−1)
-(0−1)·(∞−z4) =
-z4·(∞−1)
-∞−z4
-= z4
-f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man
-drei Parameter von σ wählen darf.
-Bem. 70.f
-⇒ DV(z1, . . . , z4) = DV(0, 1, ∞, σ(z4))
-⇒ DV(z1, . . . , z4) ∈ R ∪ { ∞ }
-⇔ σ(z4) ∈ R ∪ { ∞ }
-Behauptung folgt, weil σ
-−1
-(R ∪ ∞) ein Kreis oder eine Gerade in C ist.
-Definition 66
-Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die
-„Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }.
-Dann sei dH(z1, z2) := 1
-2
-| ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik.
-Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2
-die „Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }.
-Dann gilt:
-1
-2
-| ln DV(a1, z1, a2, z2)| =
-1
-2
-| ln DV(a2, z1, a1, z2)|
-Beweis: Wegen Bemerkung 70.c gilt:
-DV(a1, z1, a2, z2) = 1
-DV(a2, z1, a1, z2)
-Außerdem gilt:
-ln 1
-x
-= ln x
+ 4.3. HYPERBOLISCHE GEOMETRIE
+⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4
+⇔ z2z3 + z1z4 = z3z4 + z1z2
+⇔ z2z3 − z3z4 = z1z2 − z1z4
+⇔ z3(z2 − z4) = z1(z2 − z4)
+⇔ z3 = z1 oder z2 = z4
+Alle zi sind paarweise verschieden ⇒ Widerspruch 
+b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4)
+(z1−z4)·(z3−z2) =
+1
+DV(z1,z2,z3,z4)
+c) DV(z3, z2, z1, z4) = (z3−z4)·(z1−z2)
+(z3−z2)·(z1−z4) =
+1
+DV(z1,z2,z3,z4)
+d) Zwei der zi dürfen gleich sein, da:
+Fall 1 z1 = z4 oder z3 = z2
+In diesem Fall ist DV(z1, . . . , z4) = 0
+Fall 2 z1 = z2 oder z3 = z4
+Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞ gilt.
+Fall 3 z1 = z3 oder z2 = z4
+Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1.
+Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞
+e) DV(0, 1, ∞, z4) = (0−z4)·(∞−1)
+(0−1)·(∞−z4) =
+z4·(∞−1)
+∞−z4
+= z4
+f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
+g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man
+drei Parameter von σ wählen darf.
+Bem. 70.f
+⇒ DV(z1, . . . , z4) = DV(0, 1, ∞, σ(z4))
+⇒ DV(z1, . . . , z4) ∈ R ∪ { ∞ }
+⇔ σ(z4) ∈ R ∪ { ∞ }
+Behauptung folgt, weil σ
+−1
+(R ∪ ∞) ein Kreis oder eine Gerade in C ist.
+Definition 66
+Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die
+„Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }.
+Dann sei dH(z1, z2) := 1
+2
+| ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik.
+Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2
+die „Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }.
+Dann gilt:
+1
+2
+| ln DV(a1, z1, a2, z2)| =
+1
+2
+| ln DV(a2, z1, a1, z2)|
+Beweis: Wegen Bemerkung 70.c gilt:
+DV(a1, z1, a2, z2) = 1
+DV(a2, z1, a1, z2)
+Außerdem gilt:
+ln 1
+x
+= ln x
 −1 = (−1) · ln x = − ln x
-86 4.3. HYPERBOLISCHE GEOMETRIE
-Da der ln im Betrag steht, folgt direkt:
-1
-2
-| ln DV(a1, z1, a2, z2)| =
-1
-2
-| ln DV(a2, z1, a1, z2)|
-Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelver￾hältnis genutzt werden. 
-Beh.: Die hyperbolische Metrik ist eine Metrik auf H.
-Beweis: Wegen Bemerkung 70.f ist
-d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) = ∞
-d. h. σ(gz1,z2
-) = iR (imaginäre Achse).
-also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b.
-2d(ia, ib) =| ln DV(0, ia, ∞, ib) |
-=| ln (0 − ib)(∞ − ia)
-(0 − ia)(∞ − ib)
-|
-=| ln b
-a
-|
-= ln b − ln a
-Also: d(z1, z2) ≥ 0, d(z1, z2) = 0 ⇔ z1 = z2
-2d(z2, z1) =| ln DV(a2, z2, a1, z1) |
-=| ln DV(∞, ib, 0, ia) |
-Bem. 70.b = | ln DV(0, ib, ∞, ia) |
-= 2d(z1, z2)
-Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) =
-d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt).
-Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die
-Vorlesung „Hyperbolische Geometrie“ verwiesen.
-Satz 4.10
-Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen
-Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt,
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Da der ln im Betrag steht, folgt direkt:
+1
+2
+| ln DV(a1, z1, a2, z2)| =
+1
+2
+| ln DV(a2, z1, a1, z2)|
+Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelverhältnis genutzt werden. 
+Beh.: Die hyperbolische Metrik ist eine Metrik auf H.
+Beweis: Wegen Bemerkung 70.f ist
+d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) = ∞
+d. h. σ(gz1,z2) = iR (imaginäre Achse).
+also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b.
+2d(ia, ib) =| ln DV(0, ia, ∞, ib) |
+=| ln (0 − ib)(∞ − ia)
+(0 − ia)(∞ − ib)
+|
+=| ln b
+a
+|
+= ln b − ln a
+Also: d(z1, z2) ≥ 0, d(z1, z2) = 0 ⇔ z1 = z2
+2d(z2, z1) =| ln DV(a2, z2, a1, z1) |
+=| ln DV(∞, ib, 0, ia) |
+Bem. 70.b = | ln DV(0, ib, ∞, ia) |
+= 2d(z1, z2)
+Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) =
+d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt).
+Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die
+Vorlesung „Hyperbolische Geometrie“ verwiesen.
+Satz 4.10
+Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen
+Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt,
 aber Axiom §5 ist verletzt.
-87 4.3. HYPERBOLISCHE GEOMETRIE
-Übungsaufgaben
-Aufgabe 8
-Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels
-∠P QR ist der Winkel, der aus den Halbgeraden QP − und QR− gebildet wird. Die
-Nebenwinkel von ∠P QR sind die von QP + und QR− bzw. QP − und QR+ gebildeten
-Winkel.
-Zeigen Sie:
-(a) Die beiden Nebenwinkel von ∠P QR sind gleich.
-(b) Der Winkel ∠P QR ist gleich seinem Scheitelwinkel.
-Aufgabe 9
-Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von
-Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y .
-Zeigen Sie:
-(a) Ist 4ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die
-Winkel ∠ABC und ∠BCA gleich.
-(b) Ist 4ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
-gegenüber und umgekehrt.
-(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit
-P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
-und der Schnittpunkt des Lots mit g heißt Lotfußpunkt.
-Aufgabe 10
-Seien f, g, h ∈ G und paarweise verschieden.
-Zeigen Sie: f k g ∧ g k h ⇒ f k h
-Aufgabe 11
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Übungsaufgaben
+Aufgabe 8
+Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels
+∠P QR ist der Winkel, der aus den Halbgeraden QP − und QR− gebildet wird. Die
+Nebenwinkel von ∠P QR sind die von QP + und QR− bzw. QP − und QR+ gebildeten
+Winkel.
+Zeigen Sie:
+(a) Die beiden Nebenwinkel von ∠P QR sind gleich.
+(b) Der Winkel ∠P QR ist gleich seinem Scheitelwinkel.
+Aufgabe 9
+Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von
+Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y .
+Zeigen Sie:
+(a) Ist 4ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die
+Winkel ∠ABC und ∠BCA gleich.
+(b) Ist 4ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
+gegenüber und umgekehrt.
+(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit
+P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
+und der Schnittpunkt des Lots mit g heißt Lotfußpunkt.
+Aufgabe 10
+Seien f, g, h ∈ G und paarweise verschieden.
+Zeigen Sie: f k g ∧ g k h ⇒ f k h
+Aufgabe 11
 Beweise den Kongruenzsatz SSS.
-5 Krümmung
-Definition 67
-Sei f : [a, b] → R
-n
-eine eine Funktion aus C∞. Dann heißt f Kurve.
-5.1 Krümmung von Kurven
-Definition 68
-Sei γ : I = [a, b] → R
-n
-eine Kurve.
-a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt:
-kγ
-0
-(t)k2 = 1 ∀t ∈ I
-Dabei ist γ
-0
-(t) = (γ
-0
-1
-(t), γ0
-2
-(t), . . . , γ0
-n
-(t)).
-b) l(γ) = R b
-a
-kγ
-0
-(t)kdt heißt Länge von γ.
-Bemerkung 71 (Eigenschaften von Kurven I)
-Sei γ : I = [a, b] → R
-n
-eine C∞-Funktion.
-a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b − a.
-b) Ist γ durch Bogenlänge parametrisiert, so ist γ
-0
-(t) orthogonal zu γ
-00(t) für alle t ∈ I.
-Beweis:
-a) l(γ) = R b
-a
-kγ
-0
-(t)kdt =
-R b
-a
-1dt = b − a.
-b) Im Folgenden wird die Aussage nur für γ : [a, b] → R
-2 bewiesen. Allerdings funktioniert
-der Beweis im R
-n analog. Es muss nur die Ableitung angepasst werden.
-1 = kγ
-0
-(t)k = kγ
-0
-(t)k
-2 = hγ
-0
-(t), γ0
-(t)i
-⇒ 0 =
-d
-dt
-hγ
-0
-(t), γ0
-(t)i
-=
-d
-dt
-(γ
-0
-1
-(t)γ
-0
-1
-(t) + γ
-0
-2
-(t)γ
-0
-2
-(t))
-= 2 · (γ
-00
-1
-(t) · γ
-0
-1
-(t) + γ
-00
-2
-(t) · γ
-0
-2
-(t))
-= 2 · hγ
-00(t), γ0
-(t)i
-Definition 69
-Sei γ : I → R
-2
-eine durch Bogenlänge parametrisierte Kurve.
-a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt:
-hn(t), γ0
-(t)i = 0, kn(t)k = 1 und det((γ
-0
+5 Krümmung
+Definition 67
+Sei f : [a, b] → R
+n
+eine eine Funktion aus C∞. Dann heißt f Kurve.
+5.1 Krümmung von Kurven
+Definition 68
+Sei γ : I = [a, b] → R
+n
+eine Kurve.
+a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt:
+kγ
+0
+(t)k2 = 1 ∀t ∈ I
+Dabei ist γ
+0
+(t) = (γ
+0
+1
+(t), γ0
+2
+(t), . . . , γ0
+n
+(t)).
+b) l(γ) = R b
+a
+kγ
+0
+(t)kdt heißt Länge von γ.
+Bemerkung 71 (Eigenschaften von Kurven I)
+Sei γ : I = [a, b] → R
+n
+eine C∞-Funktion.
+a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b − a.
+b) Ist γ durch Bogenlänge parametrisiert, so ist γ
+0
+(t) orthogonal zu γ
+00(t) für alle t ∈ I.
+Beweis:
+a) l(γ) = R b
+a
+kγ
+0
+(t)kdt =
+R b
+a
+1dt = b − a.
+b) Im Folgenden wird die Aussage nur für γ : [a, b] → R
+2 bewiesen. Allerdings funktioniert
+der Beweis im R
+n analog. Es muss nur die Ableitung angepasst werden.
+1 = kγ
+0
+(t)k = kγ
+0
+(t)k
+2 = hγ0
+(t), γ0(t)i
+⇒ 0 =
+d
+dt
+hγ
+0
+(t), γ0(t)i
+=
+d
+dt
+(γ
+0
+1
+(t)γ
+0
+1
+(t) + γ
+0
+2
+(t)γ
+0
+2
+(t))
+= 2 · (γ
+00
+1
+(t) · γ
+0
+1
+(t) + γ
+00
+2
+(t) · γ
+0
+2
+(t))
+= 2 · hγ
+00(t), γ0
+(t)i
+Definition 69
+Sei γ : I → R
+2
+eine durch Bogenlänge parametrisierte Kurve.
+a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt:
+hn(t), γ0(t)i = 0, kn(t)k = 1 und det((γ
+0
 (t), n(t))) = +1
-89 5.1. KRÜMMUNG VON KURVEN
-b) Seit κ : I → R so, dass gilt:
-γ
-00(t) = κ(t) · n(t)
-Dann heißt κ(t) Krümmung von γ in t.
-Da n(t) und γ
-00(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t).
-Beispiel 45
-Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt:
-γ(t) = 
-r · cos
-t
-r
-, r · sin
-t
-r
-
-für t ∈ [0, 2πr]
-ist parametrisiert durch Bogenlänge, da gilt:
-γ
-0
-(t) = 
-(r ·
-1
-r
-)(− sin
-t
-r
-), r
-1
-r
-cos
-t
-r
-
-=
-
-− sin
-t
-r
-, cos
-t
-r
-
-Der Normalenvektor von γ in t ist
-n(t) = 
-− cos
-t
-r
-, − sin
-t
-r
-
-da gilt:
-hn(t), γ0
-(t)i =
-− cos t
-r
-− sin t
-r
-
-,
-
-− sin t
-r
-cos t
-r
-
-= (− cos
-t
-r
-) · (− sin
-t
-r
-) + (− sin
-t
-r
-) · (cos t
-r
-)
-= 0
-kn(t)k =
-
-
-
-
-(− cos
-t
-r
-, − sin
-t
-r
-)
-
-
-
-
-= (− cos
-t
-r
-)
-2 + (− sin
-t
-r
-)
-2
-= 1
-det(γ
-0
-1
-(t), n(t)) =
-
-
-
-
-
-− sin t
-r − cos t
-r
-cos t
-r − sin t
-r
-
-
-
-
-= (− sin
-t
-r
-)
-2 − (− cos
-t
-r
-) · cos
-t
-r
-= 1
-Die Krümmung ist für jedes t konstant 1
-r
-, da gilt:
-γ
-00(t) = 
-−
-1
-r
-cos
-t
-r
-, −
-1
-r
-sin
-t
-r
-
-=
-1
-r
-·
-
-− cos
-t
-r
-, − sin
-t
-r
-
-⇒ κ(t) = 1
+ 5.1. KRÜMMUNG VON KURVEN
+b) Seit κ : I → R so, dass gilt:
+γ
+00(t) = κ(t) · n(t)
+Dann heißt κ(t) Krümmung von γ in t.
+Da n(t) und γ
+00(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t).
+Beispiel 45
+Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt:
+γ(t) = r · cos
+t
+r
+, r · sin
+t
+r
+
+für t ∈ [0, 2πr]
+ist parametrisiert durch Bogenlänge, da gilt:
+γ
+0
+(t) = (r ·
+1
+r
+)(− sin
+t
+r
+), r
+1
+r
+cos
+t
+r
+
+=
+
+− sin
+t
+r
+, cos
+t
+r
+
+Der Normalenvektor von γ in t ist
+n(t) = − cos
+t
+r
+, − sin
+t
+r
+
+da gilt:
+hn(t), γ0(t)i =
+− cos t
+r
+− sin t
+r
+
+,
+
+− sin t
+r
+cos t
+r
+
+= (− cos
+t
+r
+) · (− sin
+t
+r
+) + (− sin
+t
+r
+) · (cos t
+r
+)
+= 0
+kn(t)k =
+
+
+
+
+(− cos
+t
+r
+, − sin
+t
+r
+)
+
+
+
+
+= (− cos
+t
+r
+)
+2 + (− sin
+t
+r
+)
+2
+= 1
+det(γ
+0
+1
+(t), n(t)) =
+
+
+
+
+
+− sin t
+r − cos tr
+cos t
+r − sin tr
+
+
+
+
+= (− sin
+t
+r
+)
+2 − (− cos
+t
+r
+) · cos
+t
+r
+= 1
+Die Krümmung ist für jedes t konstant 1
+r
+, da gilt:
+γ
+00(t) = 
+−
+1
+r
+cos
+t
+r
+, −
+1
+r
+sin
+t
 r
-90 5.2. TANGENTIALEBENE
-Definition 70
-Sei γ : I → R
-3
-eine durch Bogenlänge parametrisierte Kurve.
-a) Für t ∈ I heißt κ(t) := kγ
-00(t)k die Krümmung von γ in t.
-b) Ist für t ∈ I die Ableitung γ
-00(t) 6= 0, so heißt γ
-00(t)
-kγ
-00(t)k Normalenvektor an γ in t.
-c) b(t) sei ein Vektor, der γ
-0
-(t), n(t) zu einer orientierten Orthonormalbasis von R
-3
-ergänzt.
-Also gilt:
-det(γ
-0
-(t), n(t), b(t)) = 1
-b(t) heißt Binormalenvektor, die Orthonormalbasis
-
-γ
-0
-(t), n(t), b(t)
-	
-heißt begleitendes Dreibein.
-Bemerkung 72 (Eigenschaften von Kurven II)
-Sei γ : I → R
-3 durch Bogenlänge parametrisierte Kurve.
-a) n(t) ist orthogonal zu γ
-0
-(t).
-b) b(t) aus Definition 70.c ist eindeutig.
-5.2 Tangentialebene
-Erinnerung Sie sich an Definition 32 „reguläre Fläche“.
-Äquivalent dazu ist: S ist lokal von der Form
-V (f) = 
-x ∈ R
-3
-
- f(x) = 0 	
-für eine C∞-Funktion f : R
-3 → R.
-Definition 71
-Sei S ⊆ R
-3
-eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um
-s ∈ V :
-(u, v) 7→ (x(u, v), y(u, v), z(u, v))
-Für p = F
-−1
-(s) ∈ U sei
-JF (p) =
-
-
-∂x
-∂u(p)
-∂x
-∂v (p)
-∂y
-∂u(p)
-∂y
-∂v (p)
-∂z
-∂u(p)
-∂z
-∂v (p)
-
-
-und DpF : R
-2 → R
-3 die durch JF (p) definierte lineare Abbildung.
-Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈ S.
-Bemerkung 73 (Eigenschaften der Tangentialebene)
-a) TsS ist 2-dimensionaler Untervektorraum von R
-3
-.
-b) TsS = hu, ˜ v˜i, wobei u, ˜ v˜ die Spaltenvektoren der Jacobi-Matrix JF (p) sind.
+
+=
+1
+r
+·
+
+− cos
+t
+r
+, − sin
+t
+r
+
+⇒ κ(t) = 1
+r
+ 5.2. TANGENTIALEBENE
+Definition 70
+Sei γ : I → R
+3
+eine durch Bogenlänge parametrisierte Kurve.
+a) Für t ∈ I heißt κ(t) := kγ
+00(t)k die Krümmung von γ in t.
+b) Ist für t ∈ I die Ableitung γ
+00(t) 6= 0, so heißt γ
+00(t)
+kγ
+00(t)k Normalenvektor an γ in t.
+c) b(t) sei ein Vektor, der γ
+0
+(t), n(t) zu einer orientierten Orthonormalbasis von R
+3
+ergänzt.
+Also gilt:
+det(γ
+0
+(t), n(t), b(t)) = 1
+b(t) heißt Binormalenvektor, die Orthonormalbasis
+
+γ
+0
+(t), n(t), b(t)
+
+heißt begleitendes Dreibein.
+Bemerkung 72 (Eigenschaften von Kurven II)
+Sei γ : I → R
+3 durch Bogenlänge parametrisierte Kurve.
+a) n(t) ist orthogonal zu γ
+0
+(t).
+b) b(t) aus Definition 70.c ist eindeutig.
+5.2 Tangentialebene
+Erinnerung Sie sich an Definition 32 „reguläre Fläche“.
+Äquivalent dazu ist: S ist lokal von der Form
+V (f) = x ∈ R
+3
+
+ f(x) = 0
+für eine C∞-Funktion f : R
+3 → R.
+Definition 71
+Sei S ⊆ R
+3
+eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um
+s ∈ V :
+(u, v) 7→ (x(u, v), y(u, v), z(u, v))
+Für p = F
+−1
+(s) ∈ U sei
+JF (p) =
+
+
+∂x
+∂u(p)
+∂x
+∂v (p)
+∂y
+∂u(p)
+∂y
+∂v (p)
+∂z
+∂u(p)
+∂z
+∂v (p)
+
+
+und DpF : R
+2 → R3 die durch JF (p) definierte lineare Abbildung.
+Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈ S.
+Bemerkung 73 (Eigenschaften der Tangentialebene)
+a) TsS ist 2-dimensionaler Untervektorraum von R
+3
+.
+b) TsS = hu, ˜ v˜i, wobei u, ˜ v˜ die Spaltenvektoren der Jacobi-Matrix JF (p) sind.
 c) TsS hängt nicht von der gewählten Parametrisierung ab.
-91 5.2. TANGENTIALEBENE
-d) Sei S = V (f) eine reguläre Fläche in R
-3
-, also f : V → R eine C∞-Funktion, V ⊆ R
-3
-offen, grad(f)(x) 6= 0 für alle x ∈ S.
-Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S.
-Beweis:
-a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist
-eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein
-Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2.
-b) Hier kann man wie in Punkt a) argumentieren
-c) TsS = {x ∈ R
-3
-|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ(0) =
-s und γ
-0
-(0) = x}
-Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-d) Sei x ∈ TsS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ
-0
-(0) = s,
-sodass γ
-0
-(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0
-⇒ 0 = (f ◦ γ)
-0
-(0) = hgrad(f)(γ(0)), γ0
-(0)i
-⇒ TsS ⊆ grad(f)(s)
-⊥
-dim=2 ====⇒ TsS = (grad(f)(s))⊥
-Definition 72
-a) Ein Normalenfeld auf der regulären Fläche S ⊆ R
-3
-ist eine Abbildung n : S → S
-2 ⊆
-R
-3 mit n(s) ∈ TsS
-⊥ für jedes s ∈ S.
-b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt.
-Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden.
-Im Folgenden werden diese Begriffe jedoch synonym benutzt.
-Bemerkung 74 (Eigenschaften von Normalenfeldern)
-a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞).
-b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R
-3 von s und eine lokale Parametrisierung
-F : U → V von S um s, sodass auf F(U) = V ∩ S ein stetiges Normalenfeld existiert.
-c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen
-Parametrisierungen Fi
-: Ui → Vi
-, i ∈ I gibt, sodass für alle i, j ∈ F und alle
-s ∈ Vi ∩ Vj ∩ S gilt:
-det(Ds
-Vi→Vj
-z }| {
-Fj ◦ F
-−1
-i
-| {z }
-∈R3×3
-) > 0
-Beweis: Wird hier nicht geführt.
-Beispiel 46 (Normalenfelder)
-1) S = S
-2
-, n1 = idS2 ist ein stetiges Normalenfeld.
-Auch n2 = −idS2 ist ein stetiges Normalenfeld.
-2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma￾lenfeld, aber kein stetiges Normalenfeld.
-92 5.3. GAUSS-KRÜMMUNG
-Abbildung 5.1: Möbiusband
-5.3 Gauß-Krümmung
-Bemerkung 75
-Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, kxk = 1.
-Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R
-3
-.
-Dann gibt es eine Umgebung V ⊆ R
-3 von s, sodass
-C := (s + E) ∩ S ∩ V
-das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ(0) = s
-und γ
-0
-(0) = x.
-Beweis: „Satz über implizite Funktionen“1
-Definition 73
-In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in der Ebene
-(s + E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ
-0
-(0).
-Man schreibt: κNor(s, x) := κγ(0)
-Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt.
-Beispiel 47 (Gauß-Krümmung)
-1) S = S
-2 = V (X2 + Y
-2 + Z
-2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id,
-s = (0, 0, 1), x = (1, 0, 0)
-⇒ E = R · x + R · n(s) (x, z-Ebene)
-C = E ∩ S ist Kreislinie
-κNor(s, x) = 1
-r = 1
-2) S = V (X2 + Z
-2 − 1) ⊆ R
-3
-ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0)
-x1 = (0, 1, 0) ⇒ E1 = R · e1 + R · e2 (x, y-Ebene)
-S ∩ E1 = V (X2 + Y
-2 − 1) ∩ E, Kreislinie in E
-⇒ κNor(s, x1) = ±1
-x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene)
+ 5.2. TANGENTIALEBENE
+d) Sei S = V (f) eine reguläre Fläche in R
+3
+, also f : V → R eine C∞-Funktion, V ⊆ R
+3
+offen, grad(f)(x) 6= 0 für alle x ∈ S.
+Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S.
+Beweis:
+a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist
+eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein
+Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2.
+b) Hier kann man wie in Punkt a) argumentieren
+c) TsS = {x ∈ R
+3
+|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ(0) =
+s und γ
+0
+(0) = x}
+Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
+d) Sei x ∈ TsS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ
+0
+(0) = s,
+sodass γ
+0
+(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0
+⇒ 0 = (f ◦ γ)
+0
+(0) = hgrad(f)(γ(0)), γ0(0)i
+⇒ TsS ⊆ grad(f)(s)
+⊥
+dim=2 ====⇒ TsS = (grad(f)(s))⊥
+Definition 72
+a) Ein Normalenfeld auf der regulären Fläche S ⊆ R
+3
+ist eine Abbildung n : S → S
+2 ⊆
+R
+3 mit n(s) ∈ TsS⊥ für jedes s ∈ S.
+b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt.
+Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden.
+Im Folgenden werden diese Begriffe jedoch synonym benutzt.
+Bemerkung 74 (Eigenschaften von Normalenfeldern)
+a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞).
+b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R
+3 von s und eine lokale Parametrisierung
+F : U → V von S um s, sodass auf F(U) = V ∩ S ein stetiges Normalenfeld existiert.
+c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen
+Parametrisierungen Fi: Ui → Vi, i ∈ I gibt, sodass für alle i, j ∈ F und alle
+s ∈ Vi ∩ Vj ∩ S gilt:
+det(Ds
+Vi→Vj
+z }| {
+Fj ◦ F
+−1
+i
+| {z }
+∈R3×3
+) > 0
+Beweis: Wird hier nicht geführt.
+Beispiel 46 (Normalenfelder)
+1) S = S
+2
+, n1 = idS2 ist ein stetiges Normalenfeld.
+Auch n2 = −idS2 ist ein stetiges Normalenfeld.
+2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld, aber kein stetiges Normalenfeld.
+ 5.3. GAUSS-KRÜMMUNG
+Abbildung 5.1: Möbiusband
+5.3 Gauß-Krümmung
+Bemerkung 75
+Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, kxk = 1.
+Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R
+3
+.
+Dann gibt es eine Umgebung V ⊆ R
+3 von s, sodass
+C := (s + E) ∩ S ∩ V
+das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ(0) = s
+und γ
+0
+(0) = x.
+Beweis: „Satz über implizite Funktionen“1
+Definition 73
+In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in der Ebene
+(s + E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ
+0
+(0).
+Man schreibt: κNor(s, x) := κγ(0)
+Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt.
+Beispiel 47 (Gauß-Krümmung)
+1) S = S
+2 = V (X2 + Y2 + Z2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id,
+s = (0, 0, 1), x = (1, 0, 0)
+⇒ E = R · x + R · n(s) (x, z-Ebene)
+C = E ∩ S ist Kreislinie
+κNor(s, x) = 1
+r = 1
+2) S = V (X2 + Z
+2 − 1) ⊆ R3
+ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0)
+x1 = (0, 1, 0) ⇒ E1 = R · e1 + R · e2 (x, y-Ebene)
+S ∩ E1 = V (X2 + Y
+2 − 1) ∩ E, Kreislinie in E
+⇒ κNor(s, x1) = ±1
+x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene)
 1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II
-93 5.3. GAUSS-KRÜMMUNG
-V ∩ E2 ∩ S =
-
-(1, 0, z) ∈ R
-3
-
- z ∈ R
-	
-ist eine Gerade
-⇒ κNor(s, x2) = 0
-3) S = V (X2 − Y
-2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
-x1 = (1, 0, 0), n(s) = (0, 0, 1)
-x2 = (0, 1, 0)
-κNor(s, x1) = 2
-κNor(s, x2) = −2
-−1.5 −1 −0.5
-0
-0.5
-1
-1.5
-−1
-0
-1
-0
-1
-2
-3
-4
-5
-x
-y
-z
-(a) S = V (X
-2 + Z
-2 − 1)
-−2 −1.5 −1 −0.5
-0
-0.5
-1
-1.5
-2
-−2
-−1
-0
-1
-2
-−2
-0
-2
-x
-y
-z −4
-−2
-0
-2
-4
-f(x, y)
-(b) S = V (X
-2 − Y
-2 − Z)
-Abbildung 5.2: Beispiele für reguläre Flächen
-Definition 74
-Sei S ⊆ R
-3
-eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S.
-γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
-γ
-00(0) 6= 0.
-Sei n(0) := γ
-00(0)
-kγ
-00(0)k
-. Zerlege
-n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS)
-⊥
-Dann ist n(0)⊥ = hn(0), n(s)i · n(s)
-κNor(s, γ) := hγ
-00(0), n(s)i die Normalkrümmung.
-Bemerkung 76
-Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ).
-Beweis: γ
-00(0) = γ
-00(0), da γ
-0
-(0) = −γ
-0
-(0).
-Es gilt: κNor(s, γ) hängt nur von |γ
-0
-(0)| ab und ist gleich κNor(s, γ0
-(0)).
-Bemerkung 77
-Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
-Sei T
-1
-s S = { x ∈ TsS | kxk = 1 } ∼= S
-1
-. Dann ist
-κ
-n
-Nor(s) : T
-1
-s S → R, x 7→ κNor(s, x)
-eine glatte Funktion und Bild κ
-n
-Nor(s) ist ein abgeschlossenes Intervall.
-Definition 75
+ 5.3. GAUSS-KRÜMMUNG
+V ∩ E2 ∩ S =
+
+(1, 0, z) ∈ R
+3
+
+ z ∈ R
+
+ist eine Gerade
+⇒ κNor(s, x2) = 0
+3) S = V (X2 − Y
+2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
+x1 = (1, 0, 0), n(s) = (0, 0, 1)
+x2 = (0, 1, 0)
+κNor(s, x1) = 2
+κNor(s, x2) = −2
+−1.5 −1 −0.5
+0
+0.5
+1
+1.5
+−1
+0
+1
+0
+1
+2
+3
+4
+5
+x
+y
+z
+(a) S = V (X
+2 + Z2 − 1)
+−2 −1.5 −1 −0.5
+0
+0.5
+1
+1.5
+2
+−2
+−1
+0
+1
+2
+−2
+0
+2
+x
+y
+z −4
+−2
+0
+2
+4
+f(x, y)
+(b) S = V (X
+2 − Y2 − Z)
+Abbildung 5.2: Beispiele für reguläre Flächen
+Definition 74
+Sei S ⊆ R
+3
+eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S.
+γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
+γ
+00(0) 6= 0.
+Sei n(0) := γ
+00(0)
+kγ
+00(0)k
+. Zerlege
+n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS)
+⊥
+Dann ist n(0)⊥ = hn(0), n(s)i · n(s)
+κNor(s, γ) := hγ
+00(0), n(s)i die Normalkrümmung.
+Bemerkung 76
+Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ).
+Beweis: γ
+00(0) = γ00(0), da γ0
+(0) = −γ
+0
+(0).
+Es gilt: κNor(s, γ) hängt nur von |γ
+0
+(0)| ab und ist gleich κNor(s, γ0(0)).
+Bemerkung 77
 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
-94 5.3. GAUSS-KRÜMMUNG
-a) κ
-n
-1
-(s) : = min 
-κ
-n
-Nor(s, x)
-
- x ∈ T
-1
-s S
-	
-und
-κ
-n
-2
-(s) : = max 
-κ
-n
-Nor(s, x)
-
- x ∈ T
-1
-s S
-	
-heißen Hauptkrümmungen von S in s.
-b) K(s) := κ
-n
-1
-(s) · κ
-n
-2
-(s) heißt Gauß-Krümmung von S in s.
-Bemerkung 78
-Ersetzt man n durch −n, so gilt:
-κ
-−n
-Nor(s, x) = −κ
-n
-Nor(x) ∀x ∈ T
-1
-s S
-⇒ κ
-−n
-1
-(s) = −κ
-n
-2
-(s)
-κ
-−n
-2
-(s) = −κ
-n
-1
-(s)
-und K−n
-(s) = Kn
-(s) =: K(s)
-Beispiel 48
-1) S = S
-2
-. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S
-2
-⇒ K(s) = 1
-2) Zylinder:
-κ1(s) = 0, κ2(s) = 1 ⇒ K(s) = 0
-3) Sattelpunkt auf hyperbolischem Paraboloid:
-κ1(s) < 0, κ2(s) = 0 → K(s) < 0
-4) S = Torus. Siehe Abbildung 5.3
-s1
-s2
-s3
-Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0
-Bemerkung 79
+Sei T
+1
+s S = { x ∈ TsS | kxk = 1 } ∼= S
+1
+. Dann ist
+κ
+n
+Nor(s) : T
+1
+s S → R, x 7→ κNor(s, x)
+eine glatte Funktion und Bild κ
+n
+Nor(s) ist ein abgeschlossenes Intervall.
+Definition 75
+Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
+ 5.3. GAUSS-KRÜMMUNG
+a) κ
+n
+1
+(s) : = min κ
+n
+Nor(s, x)
+
+ x ∈ T
+1
+s S
+
+und
+κ
+n
+2
+(s) : = max κ
+n
+Nor(s, x)
+
+ x ∈ T
+1
+s S
+
+heißen Hauptkrümmungen von S in s.
+b) K(s) := κ
+n
+1
+(s) · κ
+n
+2
+(s) heißt Gauß-Krümmung von S in s.
+Bemerkung 78
+Ersetzt man n durch −n, so gilt:
+κ
+−n
+Nor(s, x) = −κ
+n
+Nor(x) ∀x ∈ T
+1
+s S
+⇒ κ
+−n
+1
+(s) = −κ
+n
+2
+(s)
+κ
+−n
+2
+(s) = −κ
+n
+1
+(s)
+und K−n(s) = Kn(s) =: K(s)
+Beispiel 48
+1) S = S
+2
+. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S
+2
+⇒ K(s) = 1
+2) Zylinder:
+κ1(s) = 0, κ2(s) = 1 ⇒ K(s) = 0
+3) Sattelpunkt auf hyperbolischem Paraboloid:
+κ1(s) < 0, κ2(s) = 0 → K(s) < 0
+4) S = Torus. Siehe Abbildung 5.3
+s1
+s2
+s3
+Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0
+Bemerkung 79
 Sei S eine reguläre Fläche, s ∈ S ein Punkt.
-95 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s.
-b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s.
-5.4 Erste und zweite Fundamentalform
-Sei S ⊆ R
-3
-eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine
-lokale Parametrisierung von S um s. Weiter sei p := F
-−1
-(s).
-Definition 76
-Sei IS ∈ R
-2×2 definiert als
-IS : = 
-g1,1(s) g1,2(s)
-g1,2(s) g2,2(s)
-
-=
-
-E(s) F(s)
-F(s) G(s)
-
-mit gi,j = gs(DpF(ei), DpF(ej ))
-= h
-∂F
-∂ui
-(p),
-∂F
-∂uj
-(p)i i, j ∈ { 1, 2 }
-Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F.
-Bemerkung 80
-a) Die Einschränkung des Standardskalarproduktes des R
-3 auf TsS macht TsS zu einem
-euklidischen Vektorraum.
-b) { DpF(e1), DpF(e2) } ist eine Basis von TsS.
-c) Bzgl. der Basis { DpF(e1), DpF(e2) } hat das Standardskalarprodukt aus Bemer￾kung 80.a die Darstellungsmatrix IS.
-d) gi,j (s) ist eine differenzierbare Funktion von s.
-Bemerkung 81
-det(IS) =
-
-
-
-
-∂F
-∂u1
-(p) ×
-∂F
-∂u2
-(p)
-
-
-
-
-2
-Beweis: Sei ∂F
-∂u1
-(p) =
-
-
-x1
-x2
-x3
-
- ,
-∂F
-∂u2
-(p) =
-
-
-y1
-y2
-y3
-
-
-Dann ist ∂F
-∂u1
-(p) ×
-∂F
-∂u2
-(p) =
-
-
-z1
-z2
-z3
-
- mit
-z1 = x2y3 − x3y2
-z2 = x3y1 − x1y3
-z3 = x1y2 − x2y1
-⇒ k ∂F
-∂u1
-(p) ×
-∂F
-∂u2
-(p)k = z
-2
-1 + z
-2
-2 + z
-2
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s.
+b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s.
+5.4 Erste und zweite Fundamentalform
+Sei S ⊆ R
+3
+eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine
+lokale Parametrisierung von S um s. Weiter sei p := F
+−1
+(s).
+Definition 76
+Sei IS ∈ R
+2×2 definiert als
+IS : = 
+g1,1(s) g1,2(s)
+g1,2(s) g2,2(s)
+
+=
+
+E(s) F(s)
+F(s) G(s)
+
+mit gi,j = gs(DpF(ei), DpF(ej ))
+= h
+∂F
+∂ui
+(p),
+∂F
+∂uj
+(p)i i, j ∈ { 1, 2 }
+Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F.
+Bemerkung 80
+a) Die Einschränkung des Standardskalarproduktes des R
+3 auf TsS macht TsS zu einem
+euklidischen Vektorraum.
+b) { DpF(e1), DpF(e2) } ist eine Basis von TsS.
+c) Bzgl. der Basis { DpF(e1), DpF(e2) } hat das Standardskalarprodukt aus Bemerkung 80.a die Darstellungsmatrix IS.
+d) gi,j (s) ist eine differenzierbare Funktion von s.
+Bemerkung 81
+det(IS) =
+
+
+
+
+∂F
+∂u1
+(p) ×
+∂F
+∂u2
+(p)
+
+
+
+
+2
+Beweis: Sei ∂F
+∂u1
+(p) =
+
+
+x1
+x2
+x3
+
+ ,
+∂F
+∂u2
+(p) =
+
+
+y1
+y2
+y3
+
+
+Dann ist ∂F
+∂u1
+(p) ×
+∂F
+∂u2
+(p) =
+
+
+z1
+z2
+z3
+
+ mit
+z1 = x2y3 − x3y2
+z2 = x3y1 − x1y3
+z3 = x1y2 − x2y1
+⇒ k ∂F
+∂u1
+(p) ×
+∂F
+∂u2
+(p)k = z
+2
+1 + z
+2
+2 + z
+2
+3
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+det(IS) = g1,1g2,2 − g
+2
+1,2
+=
+*
+
+x1
+x2
+x3
+
+ ,
+
+
+x1
+x2
+x3
+
+
++ *
+
+y1
+y2
+y3
+
+ ,
+
+
+y1
+y2
+y3
+
+
++
+−
+*
+
+x1
+x2
+x3
+
+ ,
+
+
+y1
+y2
+y3
+
+
++
+2
+= (x
+2
+1 + x
+2
+2 + x
+2
+3
+)(y
+2
+1 + y
+2
+2 + y
+2
+3
+) − (x1y1 + x2y2 + x3y3)
+2
+Definition 77
+a) Das Differential dA =
+p
+det(I)du1du2 heißt Flächenelement von S bzgl. der Parametrisierung F.
+b) Für eine Funktion f : V → R heißt
+Z
+V
+fdA := Z
+U
+f(F(u1, u2)
+| {z }
+=:s
+)
+p
+det I(s)du1du2
+der Wert des Integrals von f über V , falls das Integral rechts existiert.
+Bemerkung 82
+a) R
+V
+fdA ist unabhängig von der gewählten Parametrisierung.
+b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
+Dann ist R
+S
+fdA wohldefiniert, falls (z. B.) S kompakt ist.
+Etwa:
+Z
+S
+fdA =
+Xn
+i=1
+Z
+Vi
+fdA
+−
+X
+i6=j
+Z
+Vi∩Vj
+fdA
++
+X
+i,j,k
+Z
+Vi∩Vj∩Vk
+fdA
+− . . .
+Beweis:
+a) Mit Transformationsformel.
+b) Ist dem Leser überlassen.
+Proposition 5.1
+Sei S ⊆ R
 3
-96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-det(IS) = g1,1g2,2 − g
-2
-1,2
-=
-*
-
-x1
-x2
-x3
-
- ,
-
-
-x1
-x2
-x3
-
-
-+ *
-
-y1
-y2
-y3
-
- ,
-
-
-y1
-y2
-y3
-
-
-+
-−
-*
-
-x1
-x2
-x3
-
- ,
-
-
-y1
-y2
-y3
-
-
-+
-2
-= (x
-2
-1 + x
-2
-2 + x
-2
-3
-)(y
-2
-1 + y
-2
-2 + y
-2
-3
-) − (x1y1 + x2y2 + x3y3)
-2
-Definition 77
-a) Das Differential dA =
-p
-det(I)du1du2 heißt Flächenelement von S bzgl. der Para￾metrisierung F.
-b) Für eine Funktion f : V → R heißt
-Z
-V
-fdA := Z
-U
-f(F(u1, u2)
-| {z }
-=:s
-)
-p
-det I(s)du1du2
-der Wert des Integrals von f über V , falls das Integral rechts existiert.
-Bemerkung 82
-a) R
-V
-fdA ist unabhängig von der gewählten Parametrisierung.
-b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
-Dann ist R
-S
-fdA wohldefiniert, falls (z. B.) S kompakt ist.
-Etwa:
-Z
-S
-fdA =
-Xn
-i=1
-Z
-Vi
-fdA
-−
-X
-i6=j
-Z
-Vi∩Vj
-fdA
-+
-X
-i,j,k
-Z
-Vi∩Vj∩Vk
-fdA
-− . . .
-Beweis:
-a) Mit Transformationsformel.
-b) Ist dem Leser überlassen.
-Proposition 5.1
-Sei S ⊆ R
-3
-eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S
-2
-.
-Dann gilt:
-a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S
-2 durch
-dsn(x) = d
-dt
-n(s„+“tx
-| {z }
-Soll auf Fläche S bleiben
-)
-
-
-
-t=0
+eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S
+2
+.
+Dann gilt:
+a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S
+2 durch
+dsn(x) = d
+dt
+n(s„+“tx
+| {z }
+Soll auf Fläche S bleiben
+)
+
+
+
+t=0
 Die Abbildung dsn heißt Weingarten-Abbildung
-97 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-b) Tn(s)S
-2 = TsS.
-c) dsn ist ein Endomorphismus von TsS.
-d) dsn ist selbstadjungiert bzgl. des Skalarproduktes IS.
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+b) Tn(s)S
+2 = TsS.
+c) dsn ist ein Endomorphismus von TsS.
+d) dsn ist selbstadjungiert bzgl. des Skalarproduktes IS.
 Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt.
-98 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-Beweis:
-a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-b) Tn(S)S
-2 = hn(s)i
-⊥ = TsS
-c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus.
-d) Zu zeigen: ∀x, y ∈ IsS : hx, dsn(y)i = hdsn(x), yi
-Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
-Basisvektoren zu zeigen.
-Sei xi = DpF(ei) = ∂F
-∂ui
-(p) i = 1, 2
-Beh.: hxi
-, dsn(xj )i = h
-∂
-2F
-∂ui∂uj
-(p), dsn(xi)i
-⇒ h ∂
-2F
-∂ui∂uj
-(p), dsn(xi)i = hxj , dsn(xi)i
-Bew.: 0 = h
-∂F
-∂u (p + tej ), n(p + tej )i
-⇒ 0 =
-d
-dt
-
-h
-∂F
-∂u (p + tej ), n(p + tej )i
-
-
-
-t=0
-= h
-d
-dt
-∂F
-∂ui
-(p + tej )
-| {z }
-∂2F
-∂uj∂ui
-(p)
-
-
-
-t=0
-, n(s)i + hxi
-, dsn DpF(ej )
-| {z }
-xj
-i
-Definition 78
-Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fundamental￾form von S in s bzgl. F.
-Man schreibt: IIs(x, y) = h−dsn(x), yi = Is(−dsn(x), y)
-Bemerkung 83
-Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix
-(h
-(s)
-i,j )i,j=1,2 mit hi,j (s) = h
-∂
-2F
-∂ui∂uj
-(p), n(s)i
-Proposition 5.2
-Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt:
-κNor(s, γ) = IIs(γ
-0
-(0), γ0
-(0))
-Beweis: Nach Definition 74 ist κNor(s, γ) = hγ
-00(0), n(s)i. Nach Voraussetzung gilt
-n(γ(t)) ⊥ γ
-0
-(t) ⇔ hγ
-00(0), n(s)i = 0
-Die Ableitung nach t ergibt
-0 =
-d
-dt
-(hn(γ(t)), γ0
-(t))
-=
-
-d
-dt
-n(γ(t))
-
-
-
-t=0
-, γ0
-(0)
-+ hn(s), γ00(0)i
-99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-= hdsn(γ
-0
-(0)), γ0
-(0)i + κNor(s, γ)
-= −IIs(γ
-0
-(0), γ0
-(0)) + κNor(s, γ)
-Folgerung 5.3
-Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein:
-κNor(s, γ) = κNor(s, γ0
-(0))
-Satz 5.4
-Sei S ⊆ R
-3
-eine reguläre, orientierbare Fläche und s ∈ S.
-a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs.
-b) Für die Gauß-Krümmung gilt: K(s) = det(IIs)
-Beweis:
-a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von
-IIs. Ist x ∈ TsS, kxk = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2.
-Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi
-, yi) = λi
-. Dann gilt:
-IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2
-= (1 − sin2 ϕ)λ1 + sin2 ϕλ2
-= λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1
-= cos2 ϕ + (1 − cos2 ϕ)λ2
-= λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2
-Prop. 5.2
-=====⇒ λ1 = min 
-κNor(s, x)
-
- x ∈ T
-1
-s S
-	
-λ2 = max 
-κNor(s, x)
-
- x ∈ T
-1
-s S
-	
-Satz 5.5 (Satz von Gauß-Bonnet)
-Sei S ⊆ R
-3
-eine kompakte orientierbare reguläre Fläche. Dann gilt:
-Z
-S
-K(s)dA = 2πχ(S)
-Dabei ist χ(S) die Euler-Charakteristik von S.
-Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+Beweis:
+a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
+b) Tn(S)S
+2 = hn(s)i⊥ = TsS
+c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus.
+d) Zu zeigen: ∀x, y ∈ IsS : hx, dsn(y)i = hdsn(x), yi
+Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
+Basisvektoren zu zeigen.
+Sei xi = DpF(ei) = ∂F
+∂ui
+(p) i = 1, 2
+Beh.: hxi, dsn(xj )i = h
+∂
+2F
+∂ui∂uj
+(p), dsn(xi)i
+⇒ h ∂
+2F
+∂ui∂uj
+(p), dsn(xi)i = hxj , dsn(xi)i
+Bew.: 0 = h
+∂F
+∂u (p + tej ), n(p + tej )i
+⇒ 0 =
+d
+dt
+
+h
+∂F
+∂u (p + tej ), n(p + tej )i
+
+
+
+t=0
+= h
+d
+dt
+∂F
+∂ui
+(p + tej )
+| {z }
+∂2F
+∂uj∂ui
+(p)
+
+
+
+t=0
+, n(s)i + hxi, dsn DpF(ej )
+| {z }
+xj
+i
+Definition 78
+Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fundamentalform von S in s bzgl. F.
+Man schreibt: IIs(x, y) = h−dsn(x), yi = Is(−dsn(x), y)
+Bemerkung 83
+Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix
+(h
+(s)
+i,j )i,j=1,2 mit hi,j (s) = h
+∂
+2F
+∂ui∂uj
+(p), n(s)i
+Proposition 5.2
+Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt:
+κNor(s, γ) = IIs(γ
+0
+(0), γ0(0))
+Beweis: Nach Definition 74 ist κNor(s, γ) = hγ
+00(0), n(s)i. Nach Voraussetzung gilt
+n(γ(t)) ⊥ γ
+0
+(t) ⇔ hγ
+00(0), n(s)i = 0
+Die Ableitung nach t ergibt
+0 =
+d
+dt
+(hn(γ(t)), γ0(t))
+=
+
+d
+dt
+n(γ(t))
+
+
+
+t=0
+, γ0(0)+ hn(s), γ00(0)i
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+= hdsn(γ
+0
+(0)), γ0(0)i + κNor(s, γ)
+= −IIs(γ
+0
+(0), γ0(0)) + κNor(s, γ)
+Folgerung 5.3
+Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein:
+κNor(s, γ) = κNor(s, γ0(0))
+Satz 5.4
+Sei S ⊆ R
+3
+eine reguläre, orientierbare Fläche und s ∈ S.
+a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs.
+b) Für die Gauß-Krümmung gilt: K(s) = det(IIs)
+Beweis:
+a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von
+IIs. Ist x ∈ TsS, kxk = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2.
+Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi, yi) = λi. Dann gilt:
+IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2
+= (1 − sin2 ϕ)λ1 + sin2 ϕλ2
+= λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1
+= cos2 ϕ + (1 − cos2 ϕ)λ2
+= λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2
+Prop. 5.2
+=====⇒ λ1 = min κNor(s, x)
+
+ x ∈ T
+1
+s S
+
+λ2 = max κNor(s, x)
+
+ x ∈ T
+1
+s S
+
+Satz 5.5 (Satz von Gauß-Bonnet)
+Sei S ⊆ R
+3
+eine kompakte orientierbare reguläre Fläche. Dann gilt:
+Z
+S
+K(s)dA = 2πχ(S)
+Dabei ist χ(S) die Euler-Charakteristik von S.
+Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von
 Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden.
-Lösungen der Übungsaufgaben
-Lösung zu Aufgabe 1
-Teilaufgabe a) Es gilt:
-(i) ∅, X ∈ TX.
-(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈
-TX : U1 ∩ U2 ∈ TX.
-(iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine
-beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I :
-S
-i∈I Ui ∈ TX
-Also ist (X, TX) ein topologischer Raum.
-Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x 6= y und die einzige Umgebung von x
-ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden.
-(X, TX) ist also nicht hausdorffsch.
-Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X, TX) nach
-(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, TX)
-kein metrischer Raum sein kann.
-Lösung zu Aufgabe 2
-Teilaufgabe a)
-Beh.: ∀a ∈ Z : { a } ist abgeschlossen.
-Sei a ∈ Z beliebig. Dann gilt:
-Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de
-schicken.
-Teilaufgabe b)
-Beh.: { −1, 1 } ist nicht offen
-Bew.: durch Widerspruch
-Annahme: { −1, 1 } ist offen.
-Dann gibt es T ⊆ B, sodass S
-M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele
-Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente
-⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { −1, 1 } ist
-nicht offen. 
-Teilaufgabe c)
+Lösungen der Übungsaufgaben
+Lösung zu Aufgabe 1
+Teilaufgabe a) Es gilt:
+(i) ∅, X ∈ TX.
+(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈
+TX : U1 ∩ U2 ∈ TX.
+(iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine
+beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I :
+S
+i∈I Ui ∈ TX
+Also ist (X, TX) ein topologischer Raum.
+Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x 6= y und die einzige Umgebung von x
+ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden.
+(X, TX) ist also nicht hausdorffsch.
+Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X, TX) nach
+(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, TX)
+kein metrischer Raum sein kann.
+Lösung zu Aufgabe 2
+Teilaufgabe a)
+Beh.: ∀a ∈ Z : { a } ist abgeschlossen.
+Sei a ∈ Z beliebig. Dann gilt:
+Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de
+schicken.
+Teilaufgabe b)
+Beh.: { −1, 1 } ist nicht offen
+Bew.: durch Widerspruch
+Annahme: { −1, 1 } ist offen.
+Dann gibt es T ⊆ B, sodass S
+M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele
+Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente
+⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { −1, 1 } ist
+nicht offen. 
+Teilaufgabe c)
 Beh.: Es gibt unendlich viele Primzahlen.
-101 Lösungen der Übungsaufgaben
-Bew.: durch Widerspruch
-Annahme: Es gibt nur endlich viele Primzahlen p ∈ P
-Dann ist
-Z \ { −1, +1 }
-FS d. Arithmetik =
-[
-p∈P
-U0,p
-endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich. 
-Lösung zu Aufgabe 3
-(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form
-Y
-j∈J
-Uj ×
-Y
-i∈N,i6=j
-Pi
-wobei J ⊆ N endlich und Uj ⊆ Pj offen ist.
-Beweis: Nach Definition der Produkttopologie bilden Mengen der Form
-Y
-i∈J
-Uj ×
-Y
-i∈N\J
-Pi
-wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie.
-Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen
-Form. 
-(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig.
-Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs￾komponente Z ⊆ P. Da Z zusammenhängend ist und ∀i ∈ I : pi
-: P → Pi
-ist
-stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden
-Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder
-pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für
-alle i ∈ N. Dann gilt also:
-pi(x)
-| {z }
-=xi
-= zi = pi(y)
-| {z }
-=yi
-∀i ∈ N
-Somit folgt: x = y 
-Lösung zu Aufgabe 4
-(a) Beh.: GLn(R) ist nicht kompakt.
-Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 }
-nicht kompakt. 22
-⇒ GLn(R) ist nicht kompakt. 
-(b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt.
-Bew.: Für SL1(R) gilt: SL1(R) = 
-A ∈ R
-1×1
-
- det A = 1 	
-=
-￾
-1
- ∼= { 1 }.
-22
-⇒ SL1(R)
-ist kompakt 
-102 Lösungen der Übungsaufgaben
-SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des R
-n
-2
-identifizieren. Nach Satz 1.1
-sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere
-nun für für n ∈ N≥2, m ∈ N:
-Am = diagn
-(m,
-1
-m
-, . . . , 1)
-Dann gilt: det Am = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da kAmk∞ =
-m −−−−→ m→∞
-∞. 
-(c) Beh.: P(R) ist kompakt.
-Bew.: P(R) ∼= S
-n/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil￾dung stetig. Da S
-n als abgeschlossene und beschränkte Teilmenge des R
-n+1 kompakt
-ist 22⇒ P(R) ist kompakt. 
-Lösung zu Aufgabe 5
-Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden.
-Definition 79
-Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
-ϕ heißt Homomorphismus, wenn
-∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2)
-gilt.
-Es folgt direkt:
-1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R, +). Dann ist ϕ1 ein
-Gruppenhomomorphismus und ein Homöomorphismus.
-2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G → H, x 7→ x mod 3 ein
-Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomor￾phismus.
-3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine
-Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup￾penhomomorphismus.
-Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten
-verwendet.
-Lösung zu Aufgabe 6
-Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf
-Seite 6.
-Definition 80
-Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
-ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist.
-Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen
+ Lösungen der Übungsaufgaben
+Bew.: durch Widerspruch
+Annahme: Es gibt nur endlich viele Primzahlen p ∈ P
+Dann ist
+Z \ { −1, +1 }
+FS d. Arithmetik =
+[
+p∈P
+U0,p
+endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich. 
+Lösung zu Aufgabe 3
+(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form
+Y
+j∈J
+Uj ×
+Y
+i∈N,i6=j
+Pi
+wobei J ⊆ N endlich und Uj ⊆ Pj offen ist.
+Beweis: Nach Definition der Produkttopologie bilden Mengen der Form
+Y
+i∈J
+Uj ×
+Y
+i∈N\J
+Pi
+wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie.
+Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen
+Form. 
+(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig.
+Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangskomponente Z ⊆ P. Da Z zusammenhängend ist und ∀i ∈ I : pi
+: P → Piist
+stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden
+Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder
+pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für
+alle i ∈ N. Dann gilt also:
+pi(x)
+| {z }
+=xi
+= zi = pi(y)
+| {z }
+=yi
+∀i ∈ N
+Somit folgt: x = y 
+Lösung zu Aufgabe 4
+(a) Beh.: GLn(R) ist nicht kompakt.
+Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 }
+nicht kompakt. 22⇒ GLn(R) ist nicht kompakt. 
+(b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt.
+Bew.: Für SL1(R) gilt: SL1(R) = A ∈ R
+1×1
+
+ det A = 1
+=
+
+1
+ ∼= { 1 }.
+22
+⇒ SL1(R)
+ist kompakt.
+ Lösungen der Übungsaufgaben
+SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des R
+n
+2
+identifizieren. Nach Satz 1.1
+sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere
+nun für für n ∈ N≥2, m ∈ N:
+Am = diagn(m,
+1
+m
+, . . . , 1)
+Dann gilt: det Am = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da kAmk∞ =
+m −−−−→ m→∞∞. 
+(c) Beh.: P(R) ist kompakt.
+Bew.: P(R) ∼= S
+n/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung stetig. Da S
+n als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt
+ist 22⇒ P(R) ist kompakt. 
+Lösung zu Aufgabe 5
+Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden.
+Definition 79
+Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
+ϕ heißt Homomorphismus, wenn
+∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2)
+gilt.
+Es folgt direkt:
+1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R, +). Dann ist ϕ1 ein
+Gruppenhomomorphismus und ein Homöomorphismus.
+2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G → H, x 7→ x mod 3 ein
+Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomorphismus.
+3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine
+Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Gruppenhomomorphismus.
+Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten
+verwendet.
+Lösung zu Aufgabe 6
+Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf
+Seite 6.
+Definition 80
+Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
+ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist.
+Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen
 Sinn und ein Isomorphismus benötigt eine Gruppenstruktur.
-103 Lösungen der Übungsaufgaben
-Lösung zu Aufgabe 7
-(a) Vor.: Sei M eine topologische Mannigfaltigkeit.
-Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend
-Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung
-direkt aus Bemerkung 23.
-„⇐“: Seien x, y ∈ M und
-Z := { z ∈ M | ∃Weg von x nach z }
-Es gilt:
-(i) Z 6= ∅, da M lokal wegzusammenhängend ist
-(ii) Z ist offen, da M lokal wegzusammenhängend ist
-(iii) Z
-C := { z˜ ∈ M | @Weg von x nach z˜ } ist offen
-Da M eine Mannigfaltigkeit ist, existiert zu jedem z˜ ∈ Z
-C eine offene und
-wegzusammenhängende Umgebung Uz˜ ⊆ M.
-Es gilt sogar Uz˜ ⊆ Z
-C, denn gäbe es ein Uz˜ 3 z ∈ Z, so gäbe es Wege γ2 :
-[0, 1] → M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M, γ1(0) = z, γ ˜ 1(1) = z.
-Dann wäre aber
-γ : [0, 1] → M,
-γ(x) = (
-γ1(2x) falls 0 ≤ x ≤
-1
-2
-γ2(2x − 1) falls 1
-2 < x ≤ 1
-ein stetiger Weg von z˜ nach x ⇒ Widerspruch.
-Da M zusammenhängend ist und M = Z
-|{z}
-offen
-∪ Z
-C
-|{z}
-offen
-, sowie Z 6= ∅ folgt Z
-C = ∅.
-Also ist M = Z wegzusammenhängend. 
-(b) Beh.: X ist wegzusammenhängend.
-Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R.
-Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte
-01 und 02.
-Da (R\ { 0 })∪ { 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem
-beliebigen Punkt a ∈ R \ { 0 }.
-Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein
-Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach
-02. 
-Lösung zu Aufgabe 9
+ Lösungen der Übungsaufgaben
+Lösung zu Aufgabe 7
+(a) Vor.: Sei M eine topologische Mannigfaltigkeit.
+Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend
+Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung
+direkt aus Bemerkung 23.
+„⇐“: Seien x, y ∈ M und
+Z := { z ∈ M | ∃Weg von x nach z }
+Es gilt:
+(i) Z 6= ∅, da M lokal wegzusammenhängend ist
+(ii) Z ist offen, da M lokal wegzusammenhängend ist
+(iii) Z
+C := { z˜ ∈ M | @Weg von x nach z˜ } ist offen
+Da M eine Mannigfaltigkeit ist, existiert zu jedem z˜ ∈ Z
+C eine offene und
+wegzusammenhängende Umgebung Uz˜ ⊆ M.
+Es gilt sogar Uz˜ ⊆ Z
+C, denn gäbe es ein Uz˜ 3 z ∈ Z, so gäbe es Wege γ2 :
+[0, 1] → M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M, γ1(0) = z, γ ˜ 1(1) = z.
+Dann wäre aber
+γ : [0, 1] → M,
+γ(x) = (
+γ1(2x) falls 0 ≤ x ≤
+1
+2
+γ2(2x − 1) falls 1
+2 < x ≤ 1
+ein stetiger Weg von z˜ nach x ⇒ Widerspruch.
+Da M zusammenhängend ist und M = Z
+|{z}
+offen
+∪ Z
+C
+|{z}
+offen
+, sowie Z 6= ∅ folgt Z
+C = ∅.
+Also ist M = Z wegzusammenhängend. 
+(b) Beh.: X ist wegzusammenhängend.
+Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R.
+Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte
+01 und 02.
+Da (R\ { 0 })∪ { 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem
+beliebigen Punkt a ∈ R \ { 0 }.
+Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein
+Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach
+02. 
+Lösung zu Aufgabe 9
 Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈ X und 4ABC ein Dreieck.
-104 Lösungen der Übungsaufgaben
-(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB
-Bew.: Sei AB ∼= AC.
-⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
-⇒ ϕ(∠ABC) = ∠ACB
-⇒ ∠ABC ∼= ∠ACB 
-(b) Beh.: Der längeren Seite von 4ABC liegt der größere Winkel gegenüber und umge￾kehrt.
-Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C
-0 ∈ AC+ mit d(A, C0
-) = d(A, B)
-⇒ C
-0
-liegt zwischen A und C.
-Es gilt ]ABC0 < ]ABC und aus Aufgabe 9 (a) folgt: ]ABC0 = ]AC0B.
-∠BC0A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 =====⇒ ]BC0A > ]BCA
-⇒ ]BCA < ]BC0A = ]ABC0 < ]ABC Sei umgekehrt ]ABC > ]BCA, kann
-wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B) > d(A, C) gelten.
-Wegen Aufgabe 9 (a) kann nicht d(A, B) = d(A, C) gelten.
-⇒ d(A, B) < d(A, C) 
-(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g
-Beh.: ∃! Lot
-Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden
-Halbebenen bzgl. g.
-⇒ ϕ(P)P schneidet g in F.
-Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g
-⇒ ϕ(P)P schneidet g in F.
-Sei A ∈ g \ { F }. Dann gilt ϕ(∠AF P) = ∠AF ϕ(P) = π ⇒ ∠AF P ist rechter Winkel.
-Gäbe es nun G ∈ g \ { F }, so dass P G weiteres Lot von P auf g ist, wäre 4P F G
-ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
-·
-·
-A
-G
-P
-F
-g
-Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P
-Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π
-⇒ G gibt es nicht. 
-Lösung zu Aufgabe 10
-Sei f k h und o. B. d. A. f k g.
-f ∦ h ⇒ f ∩ h =6 ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
+ Lösungen der Übungsaufgaben
+(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB
+Bew.: Sei AB ∼= AC.
+⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
+⇒ ϕ(∠ABC) = ∠ACB
+⇒ ∠ABC ∼= ∠ACB 
+(b) Beh.: Der längeren Seite von 4ABC liegt der größere Winkel gegenüber und umgekehrt.
+Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C
+0 ∈ AC+ mit d(A, C0
+) = d(A, B)
+⇒ C
+0
+liegt zwischen A und C.
+Es gilt ]ABC0 < ]ABC und aus Aufgabe 9 (a) folgt: ]ABC0 = ]AC0B.
+∠BC0A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 =====⇒ ]BC0A > ]BCA
+⇒ ]BCA < ]BC0A = ]ABC0 < ]ABC Sei umgekehrt ]ABC > ]BCA, kann
+wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B) > d(A, C) gelten.
+Wegen Aufgabe 9 (a) kann nicht d(A, B) = d(A, C) gelten.
+⇒ d(A, B) < d(A, C) 
+(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g
+Beh.: ∃! Lot
+Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden
+Halbebenen bzgl. g.
+⇒ ϕ(P)P schneidet g in F.
+Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g
+⇒ ϕ(P)P schneidet g in F.
+Sei A ∈ g \ { F }. Dann gilt ϕ(∠AF P) = ∠AF ϕ(P) = π ⇒ ∠AF P ist rechter Winkel.
+Gäbe es nun G ∈ g \ { F }, so dass P G weiteres Lot von P auf g ist, wäre 4P F G
+ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
+·
+·
+A
+G
+P
+F
+g
+Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P
+Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π
+⇒ G gibt es nicht. 
+Lösung zu Aufgabe 10
+Sei f k h und o. B. d. A. f k g.
+f ∦ h ⇒ f ∩ h 6= ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
 zu g durch x, da x /∈ g. Diese ist f, da x ∈ f und f k g. Da aber x ∈ h, kann h nicht
-105 Lösungen der Übungsaufgaben
-parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f 6= h). ⇒ g ∦ h 
-Lösung zu Aufgabe 11
-Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem 4ABC und 4A0B0C
-0
-Dreiecke, für die gilt:
-d(A, B) = d(A
-0
-, B0
-)
-d(A, C) = d(A
-0
-, C0
-)
-d(B, C) = d(B
-0
-, C0
-)
-Sei ϕ die Isometrie mit ϕ(A) = A0
-, ϕ(B) = B0 und ϕ(C
-0
-) liegt in der selben Halbebene
-bzgl. AB wie C. Diese Isometrie existiert wegen §4.
-Es gilt d(A, C) = d(A0
-, C0
-) = d(ϕ(A0
-), ϕ(C
-0
-)) = d(A, ϕ(C
-0
-)) und d(B, C) = d(B0
-, C0
-) =
-d(ϕ(B0
-), ϕ(C
-0
-)) = d(B, ϕ(C
-0
-)).
-Bem. 62 =====⇒ C = ϕ(C).
-Es gilt also ϕ(4A0B0C
-0
+ Lösungen der Übungsaufgaben
+parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f =6 h). ⇒ g ∦ h 
+Lösung zu Aufgabe 11
+Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem 4ABC und 4A0B0C
+0
+Dreiecke, für die gilt:
+d(A, B) = d(A
+0
+, B0)
+d(A, C) = d(A
+0
+, C0)
+d(B, C) = d(B
+0
+, C0)
+Sei ϕ die Isometrie mit ϕ(A) = A0, ϕ(B) = B0 und ϕ(C
+0
+) liegt in der selben Halbebene
+bzgl. AB wie C. Diese Isometrie existiert wegen §4.
+Es gilt d(A, C) = d(A0, C0) = d(ϕ(A0), ϕ(C
+0
+)) = d(A, ϕ(C
+0
+)) und d(B, C) = d(B0, C0) =
+d(ϕ(B0), ϕ(C
+0
+)) = d(B, ϕ(C
+0
+)).
+Bem. 62 =====⇒ C = ϕ(C).
+Es gilt also ϕ(4A0B0C
+0
 ) = 4ABC. 
-Bildquellen
-Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt.
-Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert.
-Abb. 0.1a S
-2
-: Tom Bombadil, tex.stackexchange.com/a/42865
-Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069
-Abb. 0.1e T
-2
-: Jake, tex.stackexchange.com/a/70979/5645
-Abb. 1.6 Stereographische Projektion: texample.net/tikz/examples/map-projections
-Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie:
-– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png
-– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
-– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png
-– 62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
-Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3)
-Abb. 1.13 Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring.
-png
-Abb. 2.1 Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration.
-png
-Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014.
-Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
-Abb. 3.10 Überlagerung von S
-1 mit R: Alex, tex.stackexchange.com/a/149706/5645
-Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint,
-commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png
-Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645
+Bildquellen
+Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt.
+Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert.
+Abb. 0.1a S
+2
+: Tom Bombadil, tex.stackexchange.com/a/42865
+Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069
+Abb. 0.1e T
+2
+: Jake, tex.stackexchange.com/a/70979/5645
+Abb. 1.6 Stereographische Projektion: texample.net/tikz/examples/map-projections
+Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie:
+– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png
+– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
+– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png
+– 62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
+Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3)
+Abb. 1.13 Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring.
+png
+Abb. 2.1 Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration
+png
+Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014.
+Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
+Abb. 3.10 Überlagerung von S
+1 mit R: Alex, tex.stackexchange.com/a/149706/5645
+Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint,
+commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png
+Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645
 Abb. 5.3 Krümmung des Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
-Abkürzungsverzeichnis
-Beh. Behauptung
-Bew. Beweis
-bzgl. bezüglich
-bzw. beziehungsweise
-ca. circa
-d. h. das heißt
-Def. Definition
-etc. et cetera
-ex. existieren
-Hom. Homomorphismus
-o. B. d. A. ohne Beschränkung der Allgemeinheit
-Prop. Proposition
-sog. sogenannte
-Vor. Voraussetzung
-vgl. vergleiche
-z. B. zum Beispiel
-zhgd. zusammenhängend
+Abkürzungsverzeichnis
+Beh. Behauptung
+Bew. Beweis
+bzgl. bezüglich
+bzw. beziehungsweise
+ca. circa
+d. h. das heißt
+Def. Definition
+etc. et cetera
+ex. existieren
+Hom. Homomorphismus
+o. B. d. A. ohne Beschränkung der Allgemeinheit
+Prop. Proposition
+sog. sogenannte
+Vor. Voraussetzung
+vgl. vergleiche
+z. B. zum Beispiel
+zhgd. zusammenhängend
 z. z. zu zeigen
-Ergänzende Definitionen und Sätze
-Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle
-benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet,
-aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra
-und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen.
-Definition 81
-Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 }
-mit xn → x0.
-Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra
-entnommen:
-Definition 82
-Es seien V und W K-Vektorräume und A(V ) und A(W) die zugehörigen affinen Räume.
-Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1
-gilt:
-f(λa + µb) = λf(a) + µf(b)
-Definition 83
-Sei V ein Vektorraum und S ⊆ V eine Teilmenge.
-S heißt eine Orthonormalbasis von V , wenn gilt:
-(i) S ist eine Basis von V
-(ii) ∀v ∈ S : kvk = 1
-(iii) ∀v1, v2 ∈ S : v1 6= v2 ⇒ hv1, v2i = 0
-Satz (Zwischenwertsatz)
-Sei a < b und f ∈ C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder
-f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0.
-Definition 84
-Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung.
-v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv.
-Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f.
-Satz (Binomischer Lehrsatz)
-Sei x, y ∈ R. Dann gilt:
-(x + y)
-n =
-Xn
-k=0
-
-n
-k
-
-x
-n−k
-y
-k ∀n ∈ N0
-Definition 85
-Seien a, b ∈ R
-3 Vektoren.
-a × b :=
-
-
-a1
-b3
-a3
-
- ×
-
-
-a1
-b3
-a3
-
- =
-
-
-a2b3 − a3b2
-a3b1 − a1b3
-a1b2 − a2b1
-
+Ergänzende Definitionen und Sätze
+Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle
+benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet,
+aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra
+und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen.
+Definition 81
+Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 }
+mit xn → x0.
+Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra
+entnommen:
+Definition 82
+Es seien V und W K-Vektorräume und A(V ) und A(W) die zugehörigen affinen Räume.
+Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1
+gilt:
+f(λa + µb) = λf(a) + µf(b)
+Definition 83
+Sei V ein Vektorraum und S ⊆ V eine Teilmenge.
+S heißt eine Orthonormalbasis von V , wenn gilt:
+(i) S ist eine Basis von V
+(ii) ∀v ∈ S : kvk = 1
+(iii) ∀v1, v2 ∈ S : v1 6= v2 ⇒ hv1, v2i = 0
+Satz (Zwischenwertsatz)
+Sei a < b und f ∈ C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder
+f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0.
+Definition 84
+Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung.
+v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv.
+Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f.
+Satz (Binomischer Lehrsatz)
+Sei x, y ∈ R. Dann gilt:
+(x + y)
+n =
+Xn
+k=0
+
+n
+k
+
+x
+n−k
+y
+k ∀n ∈ N0
+Definition 85
+Seien a, b ∈ R
+3 Vektoren.
+a × b :=
+
+
+a1
+b3
+a3
+
+ ×
+
+
+a1
+b3
+a3
+
+ =
+
+
+a2b3 − a3b2
+a3b1 − a1b3
+a1b2 − a2b1
+
 
-Symbolverzeichnis
-Mengenoperationen
-Seien A, B und M Mengen.
-AC Komplement von A
-P(M) Potenzmenge von M
-M Abschluss von M
-∂M Rand der Menge M
-M◦
-Inneres der Menge M
-A × B Kreuzprodukt
-A ⊆ B Teilmengenbeziehung
-A ( B echte Teilmengenbeziehung
-A \ B Differenzmenge
-A ∪ B Vereinigung
-A ∪˙ B Disjunkte Vereinigung
-A ∩ B Schnitt
-Geometrie
-AB Gerade durch die Punkte A und
-B
-AB Strecke mit Endpunkten A und B
-4ABC Dreieck mit Eckpunkten A, B, C
-AB ∼= CD Die Strecken AB und CD sind
-isometrisch
-|K| Geometrische Realisierung des
-Simplizialkomplexes K
-Gruppen
-Sei X ein topologischer Raum und K ein Kör￾per.
-Homöo(X) Homöomorphismengruppe
-Iso(X) Isometriengruppe
-GLn(K) Allgemeine lineare Gruppe (von
-General Linear Group)
-SLn(K) Spezielle lineare Gruppe
-PSLn(K) Projektive lineare Gruppe
-Perm(X) Permutationsgruppe
-Sym(X) Symmetrische Gruppe
-Wege
-Sei γ : I → X ein Weg.
-[γ] Homotopieklasse von γ
-γ1 ∗ γ2 Zusammenhängen von Wegen
-γ1 ∼ γ2 Homotopie von Wegen
-γ(x) Inverser Weg, also γ(x) := γ(1 − x)
-C Bild eines Weges γ, also C :=
-γ([0, 1])
-Weiteres
-B Basis einer Topologie
-Bδ(x) δ-Kugel um x
-S Subbasis einer Topologie
-T Topologie
-A Atlas
-P Projektiver Raum
-h·, ·i Skalarprodukt
-X/∼ X modulo ∼
-[x]∼ Äquivalenzklassen von x bzgl. ∼
-kxk Norm von x
-|x| Betrag von x
-hai Erzeugnis von a
-S
-n Sphäre
-T
-n Torus
-f ◦ g Verkettung von f und g
-πX Projektion auf X
-f|U f eingeschränkt auf U
-f
-−1
-(M) Urbild von M
-Rg(M) Rang von M
+Symbolverzeichnis
+Mengenoperationen
+Seien A, B und M Mengen.
+AC Komplement von A
+P(M) Potenzmenge von M
+M Abschluss von M
+∂M Rand der Menge M
+M◦Inneres der Menge M
+A × B Kreuzprodukt
+A ⊆ B Teilmengenbeziehung
+A ( B echte Teilmengenbeziehung
+A \ B Differenzmenge
+A ∪ B Vereinigung
+A ∪˙ B Disjunkte Vereinigung
+A ∩ B Schnitt
+Geometrie
+AB Gerade durch die Punkte A und
+B
+AB Strecke mit Endpunkten A und B
+4ABC Dreieck mit Eckpunkten A, B, C
+AB ∼= CD Die Strecken AB und CD sind
+isometrisch
+|K| Geometrische Realisierung des
+Simplizialkomplexes K
+Gruppen
+Sei X ein topologischer Raum und K ein Körper.
+Homöo(X) Homöomorphismengruppe
+Iso(X) Isometriengruppe
+GLn(K) Allgemeine lineare Gruppe (von
+General Linear Group)
+SLn(K) Spezielle lineare Gruppe
+PSLn(K) Projektive lineare Gruppe
+Perm(X) Permutationsgruppe
+Sym(X) Symmetrische Gruppe
+Wege
+Sei γ : I → X ein Weg.
+[γ] Homotopieklasse von γ
+γ1 ∗ γ2 Zusammenhängen von Wegen
+γ1 ∼ γ2 Homotopie von Wegen
+γ(x) Inverser Weg, also γ(x) := γ(1 − x)
+C Bild eines Weges γ, also C :=
+γ([0, 1])
+Weiteres
+B Basis einer Topologie
+Bδ(x) δ-Kugel um x
+S Subbasis einer Topologie
+T Topologie
+A Atlas
+P Projektiver Raum
+h·, ·i Skalarprodukt
+X/∼ X modulo ∼
+[x]∼ Äquivalenzklassen von x bzgl. ∼
+kxk Norm von x
+|x| Betrag von x
+hai Erzeugnis von a
+S
+n Sphäre
+T
+n Torus
+f ◦ g Verkettung von f und g
+πX Projektion auf X
+f|U f eingeschränkt auf U
+f
+−1
+(M) Urbild von M
+Rg(M) Rang von M
 χ(K) Euler-Charakteristik von K
-110 Symbolverzeichnis
-∆k Standard-Simplex
-X#Y Verklebung von X und Y
-dn Lineare Abbildung aus Bemer￾kung 37
-A ∼= B A ist isometrisch zu B
-f∗ Abbildung zwischen Fundamental￾gruppen (vgl. Seite 49)
-111 Symbolverzeichnis
-Zahlenmengen
-N = { 1, 2, 3, . . . } Natürliche Zahlen
-Z = N ∪ { 0, −1, −2, . . . } Ganze Zahlen
-Q = Z ∪
- 1
-2
-,
-1
-3
-,
-2
-3
-	
-=
- z
-n mit z ∈ Z und n ∈ Z \ { 0 }
-	
-Rationale Zahlen
-R = Q ∪
- √
-2, −
-√3
-3, . . . 	
-Reele Zahlen
-R+ Echt positive reele Zahlen
-R
-n
-+,0
-:= { (x1, . . . , xn) ∈ R
-n
-| xn ≥ 0 } Halbraum
-R
-× = R \ { 0 } Einheitengruppe von R
-C = { a + ib | a, b ∈ R } Komplexe Zahlen
-P = { 2, 3, 5, 7, . . . } Primzahlen
-H = { z ∈ C | =z > 0 } obere Halbebene
-I = [0, 1] ( R Einheitsintervall
-f : S
-1
-,→ R
-2 Einbettung der Kreislinie in die Ebene
-π1(X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X
-Fix(f) Menge der Fixpunkte der Abbildung f
-k · k2 2-Norm; Euklidische Norm
-κ Krümmung
-κNor Normalenkrümmung
-V (f) Nullstellenmenge von f
-2
-Krümmung
-DpF : R
-2 → R
-3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
-TsS Tangentialebene an S ⊆ R
-3 durch s ∈ S
-dsn(x) Weingarten-Abbildung
-2
+ Symbolverzeichnis
+∆k Standard-Simplex
+X#Y Verklebung von X und Y
+dn Lineare Abbildung aus Bemerkung 37
+A ∼= B A ist isometrisch zu B
+f∗ Abbildung zwischen Fundamentalgruppen (vgl. Seite 49)
+ Symbolverzeichnis
+Zahlenmengen
+N = { 1, 2, 3, . . . } Natürliche Zahlen
+Z = N ∪ { 0, −1, −2, . . . } Ganze Zahlen
+Q = Z ∪
+ 1
+2
+,
+1
+3
+,
+2
+3
+
+=
+ z
+n mit z ∈ Z und n ∈ Z \ { 0 }
+
+Rationale Zahlen
+R = Q ∪
+ √
+2, −
+√3
+3, . . . 	Reele Zahlen
+R+ Echt positive reele Zahlen
+R
+n
++,0
+:= { (x1, . . . , xn) ∈ R
+n
+| xn ≥ 0 } Halbraum
+R
+× = R \ { 0 } Einheitengruppe von R
+C = { a + ib | a, b ∈ R } Komplexe Zahlen
+P = { 2, 3, 5, 7, . . . } Primzahlen
+H = { z ∈ C | =z > 0 } obere Halbebene
+I = [0, 1] ( R Einheitsintervall
+f : S
+1
+,→ R
+2 Einbettung der Kreislinie in die Ebene
+π1(X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X
+Fix(f) Menge der Fixpunkte der Abbildung f
+k · k2 2-Norm; Euklidische Norm
+κ Krümmung
+κNor Normalenkrümmung
+V (f) Nullstellenmenge von f
+2
+Krümmung
+DpF : R
+2 → R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
+TsS Tangentialebene an S ⊆ R
+3 durch s ∈ S
+dsn(x) Weingarten-Abbildung
+2
 von Vanishing Set
-Stichwortverzeichnis
-Abbildung
-affine, 107
-differenzierbare, 29
-homotope, 50
-offene, 53
-simpliziale, 35
-stetige, 9
-Abschluss, 3
-Abstand, 86
-Abstandsaxiom, 65
-Achterknoten, 20
-Aktion, siehe Gruppenoperation
-Anordnungsaxiome, 66
-Atlas, 24
-Außenwinkel, 70
-Axiom, 64
-Axiomensystem, 64
-Basis, 3
-Baum, 37
-Betti-Zahl, 41
-Bewegungsaxiom, 66
-Binormalenvektor, 89
-Cantorsches Diskontinuum, 22
-C
-k
--Struktur, 29
-Decktransformation, 59
-Decktransformationsgruppe, 59
-Deformationsretrakt, 47
-dicht, 3
-Diffeomorphismus, 29
-Dimension, 34
-diskret, 53
-Doppelverhältnis, 83
-Dreibein
-begleitendes, 89
-Ebene
-euklidische, 64
-Eigenvektor, 107
-Eigenwert, 107
-einfach zusammenhängend, 49
-Einheitsnormalenfeld, 90
-Euler-Charakteristik, siehe Eulerzahl
-Eulersche Polyederformel, 38
-Eulerzahl, 36
-Färbbarkeit, 21
-Faser, siehe Urbild
-Fläche
-orientierbare, 90
-reguläre, 30
-Flächenelement, 95
-Formoperator, siehe Weingarten-Abbildung
-Fundamentalform
-erste, 94
-zweite, 97
-Fundamentalgruppe, 47
-Gauß-Krümmung, 92, 91–94
-Geometrie, 64
-Gerade, 64
-hyperbolische, 77
-Graph, 37
-Grenzwert, 8
-Gruppe
-allgemeine lineare, 22, 26
-spezielle lineare, 22
-topologische, 33
-Gruppe operiert durch Homöomorphismen,
-61
-Gruppenaktion, siehe Gruppenoperation
-Gruppenoperation, 60, 60–63
-stetige, 61
-Häufungspunkt, 107
-Hülle
-konvexe, 34
-Halbebene, 66
-Halbgerade, 65
-Halbraum, 28
-Hauptkrümmung, 92
+Stichwortverzeichnis
+Abbildung
+affine, 107
+differenzierbare, 29
+homotope, 50
+offene, 53
+simpliziale, 35
+stetige, 9
+Abschluss, 3
+Abstand, 86
+Abstandsaxiom, 65
+Achterknoten, 20
+Aktion, siehe Gruppenoperation
+Anordnungsaxiome, 66
+Atlas, 24
+Außenwinkel, 70
+Axiom, 64
+Axiomensystem, 64
+Basis, 3
+Baum, 37
+Betti-Zahl, 41
+Bewegungsaxiom, 66
+Binormalenvektor, 89
+Cantorsches Diskontinuum, 22
+C
+k
+-Struktur, 29
+Decktransformation, 59
+Decktransformationsgruppe, 59
+Deformationsretrakt, 47
+dicht, 3
+Diffeomorphismus, 29
+Dimension, 34
+diskret, 53
+Doppelverhältnis, 83
+Dreibein
+begleitendes, 89
+Ebene
+euklidische, 64
+Eigenvektor, 107
+Eigenwert, 107
+einfach zusammenhängend, 49
+Einheitsnormalenfeld, 90
+Euler-Charakteristik, siehe Eulerzahl
+Eulersche Polyederformel, 38
+Eulerzahl, 36
+Färbbarkeit, 21
+Faser, siehe Urbild
+Fläche
+orientierbare, 90
+reguläre, 30
+Flächenelement, 95
+Formoperator, siehe Weingarten-Abbildung
+Fundamentalform
+erste, 94
+zweite, 97
+Fundamentalgruppe, 47
+Gauß-Krümmung, 92, 91–94
+Geometrie, 64
+Gerade, 64
+hyperbolische, 77
+Graph, 37
+Grenzwert, 8
+Gruppe
+allgemeine lineare, 22, 26
+spezielle lineare, 22
+topologische, 33
+Gruppe operiert durch Homöomorphismen,
+61
+Gruppenaktion, siehe Gruppenoperation
+Gruppenoperation, 60, 60–63
+stetige, 61
+Häufungspunkt, 107
+Hülle
+konvexe, 34
+Halbebene, 66
+Halbgerade, 65
+Halbraum, 28
+Hauptkrümmung, 92
 Hilbert-Kurve, 19, 19
-113 Stichwortverzeichnis
-Homöomorphismengruppe, 10
-Homöomorphismus, 9
-Homologiegruppe, 41
-Homomorphismus, 101
-Homotopie, 44
-Homotopieklasse, 47
-Inklusionsabbildung, 47
-Innenwinkel, 70
-Inneres, 3
-Inzidenzaxiome, 64
-Isometrie, 6, 10
-Isometriegruppe, 10
-Isomorphismus, 101
-Isotopie, 20
-Jordankurve, 19
-geschlossene, 19
-Karte, 24
-Kartenwechsel, 28
-Kern
-offener, 3
-Kleeblattknoten, 20
-Klumpentopologie, siehe triviale Topologie
-Knoten, 20, 17–21
-äquivalente, 20
-trivialer, 20
-Knotendiagramm, 20
-kollinear, 65
-kongruent, siehe isometrisch
-Kongruenz, siehe Isometrie
-Kongruenzsatz
-SSS, 104
-SWS, 69
-SWW, 74
-WSW, 70
-Krümmung, 88, 89
-Kreis, 37
-Kreuzprodukt, 107
-Kurve, 87
-Länge einer, 87
-Lage
-allgemeine, 34
-Lehrsatz
-Binomischer, 107
-Lie-Gruppe, 33
-liegt zwischen, 65
-Liftung, 54
-Limes, 8
-lokal, 3
-Lot, 86
-Lotfußpunkt, 86
-Möbiusband, 91
-Möbiustransformation, 80
-Mannigfaltigkeit, 24
-differenzierbare, 29
-geschlossene, 25
-glatte, 29
-mit Rand, 28
-Menge
-abgeschlossene, 2
-offene, 2
-zusammenhängende, 11
-Metrik, 6
-diskrete, 6
-hyperbolische, 84
-SNCF, 8
-Nebenwinkel, 86
-Neilsche Parabel, 27
-Normalenfeld, 90
-Normalenvektor, 87, 89
-Normalkrümmung, 91, 92, 98
-Oktaeder, 34
-Orthonormalbasis, 107
-Paraboloid
-hyperbolisches, 92
-Parallele, 66
-Parallelenaxiom, 64
-parametrisiert
-durch Bogenlänge, 87
-Parametrisierung
-reguläre, 30
-Polyzylinder, 17
-Produkttopologie, 4
-Projektion
-stereographische, 11
-Punkt, 34
-Quotiententopologie, 5, 10, 11
-Rand, 3, 28
-Raum
-hausdorffscher, 8
-kompakter, 14
-metrischer, 6
+ Stichwortverzeichnis
+Homöomorphismengruppe, 10
+Homöomorphismus, 9
+Homologiegruppe, 41
+Homomorphismus, 101
+Homotopie, 44
+Homotopieklasse, 47
+Inklusionsabbildung, 47
+Innenwinkel, 70
+Inneres, 3
+Inzidenzaxiome, 64
+Isometrie, 6, 10
+Isometriegruppe, 10
+Isomorphismus, 101
+Isotopie, 20
+Jordankurve, 19
+geschlossene, 19
+Karte, 24
+Kartenwechsel, 28
+Kern
+offener, 3
+Kleeblattknoten, 20
+Klumpentopologie, siehe triviale Topologie
+Knoten, 20, 17–21
+äquivalente, 20
+trivialer, 20
+Knotendiagramm, 20
+kollinear, 65
+kongruent, siehe isometrisch
+Kongruenz, siehe Isometrie
+Kongruenzsatz
+SSS, 104
+SWS, 69
+SWW, 74
+WSW, 70
+Krümmung, 88, 89
+Kreis, 37
+Kreuzprodukt, 107
+Kurve, 87
+Länge einer, 87
+Lage
+allgemeine, 34
+Lehrsatz
+Binomischer, 107
+Lie-Gruppe, 33
+liegt zwischen, 65
+Liftung, 54
+Limes, 8
+lokal, 3
+Lot, 86
+Lotfußpunkt, 86
+Möbiusband, 91
+Möbiustransformation, 80
+Mannigfaltigkeit, 24
+differenzierbare, 29
+geschlossene, 25
+glatte, 29
+mit Rand, 28
+Menge
+abgeschlossene, 2
+offene, 2
+zusammenhängende, 11
+Metrik, 6
+diskrete, 6
+hyperbolische, 84
+SNCF, 8
+Nebenwinkel, 86
+Neilsche Parabel, 27
+Normalenfeld, 90
+Normalenvektor, 87, 89
+Normalkrümmung, 91, 92, 98
+Oktaeder, 34
+Orthonormalbasis, 107
+Paraboloid
+hyperbolisches, 92
+Parallele, 66
+Parallelenaxiom, 64
+parametrisiert
+durch Bogenlänge, 87
+Parametrisierung
+reguläre, 30
+Polyzylinder, 17
+Produkttopologie, 4
+Projektion
+stereographische, 11
+Punkt, 34
+Quotiententopologie, 5, 10, 11
+Rand, 3, 28
+Raum
+hausdorffscher, 8
+kompakter, 14
+metrischer, 6
 projektiver, 5, 22, 25, 52
-114 Stichwortverzeichnis
-topologischer, 2
-zusammenhängender, 11
-Realisierung
-geometrische, 34
-Retraktion, 47
-Satz von
-Gauß-Bonnet, 98
-Scheitelwinkel, 86
-Seite, 34
-Sierpińskiraum, 3, 22
-Simplex, 34
-Simplizialkomplex, 34
-Simplizialkomplexe
-flächengleiche, 74
-Sphäre
-exotische, 29
-Standard-Simplex, 34
-Standardtopologie, 2
-sternförmig, 48
-Stetigkeit, 9–11
-Strecke, 65
-Struktur
-differenzierbare, 29
-Subbasis, 3
-Tangentialebene, 89, 89–90
-Teilraum, 4
-Teilraumtopologie, 4
-Teilsimplex, 34
-Topologie
-diskrete, 2, 6
-euklidische, 2
-feinste, 11
-triviale, 2
-Zariski, 2, 12, 15
-Torus, iii, 5, 38, 51, 93
-Total Unzusammenhängend, 100
-Triangulierung, 38
-Überdeckung, 14
-Übergangsfunktion, siehe Kartenwechsel
-Überlagerung, 51, 51–60
-reguläre, 59
-universelle, 57
-Umgebung, 3
-Umgebungsbasis, 58
-vanishing set, 26
-Vektorprodukt, siehe Kreuzprodukt
-Verklebung, 26
-verträglich, 29
-Würfel, 34
-Weg, 17
-einfacher, 17
-geschlossener, 17
-homotope, 44
-inverser, 48
-zusammengesetzter, 46
-Wegzusammenhang, 18
-Weingarten-Abbildung, 95
-Winkel, 70
-Zusammenhang, 11–14
-Zusammenhangskomponente, 13
-Zwischenwertsatz, 107
+ Stichwortverzeichnis
+topologischer, 2
+zusammenhängender, 11
+Realisierung
+geometrische, 34
+Retraktion, 47
+Satz von
+Gauß-Bonnet, 98
+Scheitelwinkel, 86
+Seite, 34
+Sierpińskiraum, 3, 22
+Simplex, 34
+Simplizialkomplex, 34
+Simplizialkomplexe
+flächengleiche, 74
+Sphäre
+exotische, 29
+Standard-Simplex, 34
+Standardtopologie, 2
+sternförmig, 48
+Stetigkeit, 9–11
+Strecke, 65
+Struktur
+differenzierbare, 29
+Subbasis, 3
+Tangentialebene, 89, 89–90
+Teilraum, 4
+Teilraumtopologie, 4
+Teilsimplex, 34
+Topologie
+diskrete, 2, 6
+euklidische, 2
+feinste, 11
+triviale, 2
+Zariski, 2, 12, 15
+Torus, iii, 5, 38, 51, 93
+Total Unzusammenhängend, 100
+Triangulierung, 38
+Überdeckung, 14
+Übergangsfunktion, siehe Kartenwechsel
+Überlagerung, 51, 51–60
+reguläre, 59
+universelle, 57
+Umgebung, 3
+Umgebungsbasis, 58
+vanishing set, 26
+Vektorprodukt, siehe Kreuzprodukt
+Verklebung, 26
+verträglich, 29
+Würfel, 34
+Weg, 17
+einfacher, 17
+geschlossener, 17
+homotope, 44
+inverser, 48
+zusammengesetzter, 46
+Wegzusammenhang, 18
+Weingarten-Abbildung, 95
+Winkel, 70
+Zusammenhang, 11–14
+Zusammenhangskomponente, 13
+Zwischenwertsatz, 107
\ No newline at end of file
diff --git a/read/results/pdfminer/1602.06541.txt b/read/results/pdfminer/1602.06541.txt
index 8ca229f..48f4936 100644
--- a/read/results/pdfminer/1602.06541.txt
+++ b/read/results/pdfminer/1602.06541.txt
@@ -1969,12 +1969,12 @@ J.
 
 13
 
-30,
-
 10,
 
 no.
 
+30,
+
 Analysis
 
 1699–1712,
@@ -2046,10 +2046,10 @@ Dec.
 
 2,
 
-no.
-
 pp.
 
+no.
+
 Jun.
 
 of
diff --git a/read/results/pdfminer/1707.09725.txt b/read/results/pdfminer/1707.09725.txt
index 5d9257e..8e21522 100644
--- a/read/results/pdfminer/1707.09725.txt
+++ b/read/results/pdfminer/1707.09725.txt
@@ -6211,18 +6211,18 @@ Softsign
 
 Softmax
 
-81.46 % σ = 5.08
-
 88.41 % σ = 0.36
 
+81.46 % σ = 5.08
+
 88.19 % σ = 0.31
 
 87.92 % σ = 0.40
 
-79.67 % σ = 4.85
-
 84.70 % σ = 0.15
 
+79.67 % σ = 4.85
+
 84.69 % σ = 0.08
 
 88.59 % 85.43 % 92 – 140
@@ -6248,10 +6248,10 @@ ELU
 
 84.46 % σ = 0.23
 
-88.61 % σ = 0.41
-
 88.00 % σ = 0.47
 
+88.61 % σ = 0.41
+
 Softplus
 
 ReLU
@@ -6324,16 +6324,16 @@ ELU
 
 75.5
 
-83.2
+80.1
 
 78.8
 
-80.1
-
-67.2
+83.2
 
 68.9
 
+67.2
+
 Table A.3.: Test accuracy of adjusted baseline models trained with diﬀerent activation functions on
 
 STL-10. For LReLU, α = 0.3 was chosen.
@@ -8596,11 +8596,11 @@ Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3-
 S. E. Fahlman, “An empirical study of learning speed in back-propagation
 http://repository.cmu.edu/cgi/
 
+[Online]. Available:
+
 networks,”
 viewcontent.cgi?article=2799&context=compsci
 
-[Online]. Available:
-
 1988.
 
 L. Fei-Fei, R. Fergus, and P. Perona,
diff --git a/read/results/pdfminer/2201.00022.txt b/read/results/pdfminer/2201.00022.txt
index e4ebdb5..54b836f 100644
--- a/read/results/pdfminer/2201.00022.txt
+++ b/read/results/pdfminer/2201.00022.txt
@@ -1,15 +1,16 @@
-Draft version January 4, 2022
+Draft version July 7, 2022
 Typeset using LATEX twocolumn style in AASTeX631
 
-1
+2
 2
 0
 2
-c
-e
-D
-1
-3
+
+l
+u
+J
+
+6
 
 ]
 
@@ -26,7 +27,7 @@ s
 a
 [
 
-1
+2
 v
 2
 2
@@ -55,17 +56,22 @@ Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3
 ABSTRACT
 
 Most stellar evolution models predict that black holes (BHs) should not exist above approximately
-50 − 70 M(cid:12). However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and
-above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs),
-can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding
-main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax-
-ation, we ﬁnd that this channel can be quite eﬃcient, forming IMBHs as massive as 104 M(cid:12). Our
-results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for-
-mation channel also has implications for observations. Collisions between stars and BHs can produce
-electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally,
-formed through this channel, both black holes in the mass gap and IMBHs can merge with the super-
-massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational
-wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively).
+50 − 70 M(cid:12), the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
+indicate the existence of BHs with masses at and above this threshold. We suggest that massive
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
+processes such as collisions, mass segregation, and relaxation, we ﬁnd that this channel can be quite
+eﬃcient, forming IMBHs as massive as 104 M(cid:12). This upper limit assumes that (1) the BHs accrete a
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
+new stars are introduced into the region near the SMBH is high enough to oﬀset depletion by stellar
+disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs,
+respectively).
 
 1. INTRODUCTION
 
@@ -82,11 +88,6 @@ GW170104, and GW170814 fall within the mass gap
 form second generation BHs and, in some cases, inter-
 mediate mass BHs (IMBHs), these gravitational wave
 (GW) events can occur in globular clusters, young stel-
-lar clusters, or the ﬁeld (e.g., Rodriguez et al. 2018; Ro-
-driguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
-2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
-2021; Arca Sedda et al. 2021). However, IMBHs are
-not limited to these locations and may reside in galac-
 
 Corresponding author: Sanaea C. Rose
 srose@astro.ucla.edu
@@ -96,6 +97,11 @@ metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
 2017a; Limongi & Chieﬃ 2018a; Sakstein et al. 2020; Belczynski
 et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
 
+lar clusters, or the ﬁeld (e.g., Rodriguez et al. 2018; Ro-
+driguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
+2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
+2021; Arca Sedda et al. 2021). However, IMBHs are
+not limited to these locations and may reside in galac-
 tic nuclei as well. Several studies propose that our
 own galactic center may host an IMBH in the inner pc
 (e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004;
@@ -114,16 +120,6 @@ lated gas (e.g., Begelman et al. 2006; Yue et al. 2014;
 Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
 2016). These high redshift IMBHs would need to sur-
 vive galaxy evolution and mergers to present day (e.g.,
-Rashkov & Madau 2014), with signiﬁcant eﬀects on their
-stellar and even dark matter surroundings (e.g., Bertone
-et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
-et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
-popular formation channel relies on the coalescence of
-many stellar-mass black holes. For example, IMBHs
-may form in the centers of globular clusters, where few-
-body interactions lead to the merger of stellar-mass BHs
-(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha
-et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
 
  
  
@@ -135,12 +131,24 @@ et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
 
 Rose et al.
 
+Rashkov & Madau 2014), with signiﬁcant eﬀects on their
+stellar and even dark matter surroundings (e.g., Bertone
+et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
+et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
+popular formation channel relies on the coalescence of
+many stellar-mass black holes, which may seed objects
+as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs
+may form in the centers of globular clusters, where few-
+body interactions lead to the merger of stellar-mass BHs
+(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha
+et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
 driguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
 2020b). Other formation mechanisms invoke successive
-collisions and mergers of massive stars (e.g., Portegies
-Zwart & McMillan 2002; Portegies Zwart et al. 2004;
-Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al.
-2021; Di Carlo et al. 2021).
+collisions and mergers of massive stars (e.g., Ebisuzaki
+et al. 2001; Portegies Zwart & McMillan 2002; Portegies
+Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017;
+Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al.
+2021; Das et al. 2021a,b; Escala 2021).
 
 The main obstacle to sequential BH mergers in clus-
 ters is that the merger recoil velocity kick often exceeds
@@ -158,17 +166,29 @@ tion timescale. Using this approach, they showed that
 103 − 104 M(cid:12) IMBHs can form eﬃciently over the life-
 time of a cluster.
 
-However, as discussed in Section 2.2, direct star-BH
+However, as discussed in Section 2.2, direct BH-star
 collisions are much more frequent than BH-BH collision
 in galactic nuclei, making the former a promising chan-
-nel for BH growth. We propose that IMBHs can form
-naturally within the central pc of a SMBH in a galactic
-center. Speciﬁcally, these IMBHs form through repeated
-collisions with main sequence stars, accreting some or
-all of the star’s mass depending on the details of the
-collision. We demonstrate that this channel can create
-IMBHs with masses as large as 104 M(cid:12), depending on
-the density proﬁle of the surrounding stars.
+nel for BH growth. In an N-body study of young star
+clusters, Rizzuto et al. (2022) ﬁnd that BH-star colli-
+sions are a main contributor to the formation of BHs
+in the mass gap and IMBHs. In a similar vein, Stone
+et al. (2017) demonstrate that massive BHs can form
+from repeated tidal encounters between stars and BHs.
+More generally, several studies have explored the role of
+collisions in a GN, with implications for the stellar and
+red giant populations (e.g., Dale & Davies 2006; Dale
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+et al. 2021). We propose that IMBHs can form naturally
+within the central pc of a galactic center through re-
+peated collisions between BHs and main sequence stars.
+During a collision, the BH can accrete some portion of
+the star’s mass. Over many collisions, it can grow ap-
+preciably in size. We demonstrate that this channel can
+
+create IMBHs with masses as large as 104 M(cid:12), an upper
+limit that depends on the density proﬁle of the surround-
+ing stars and the eﬃciency of the accretion.
 
 The paper is structured as follows: we describe rele-
 vant physical processes and our approach in Section 2.
@@ -178,11 +198,11 @@ tion 2.3. Section 2.4 discusses our treatment of the
 mass growth with each collision and presents analytic
 solutions to our equations in two diﬀerent regimes, ef-
 ﬁcient collisions and ineﬃcient collisions We compare
-these solutions to our statistical results. Sections 2.5
-and 2.7 discuss implications for GW merger events be-
+these solutions to our statistical results. Sections 2.6
+and 2.8 discuss implications for GW merger events be-
 tween IMBHs and the SMBH. We then incorporate re-
 laxation processes and discuss the subsequent results in
-Section 2.8. Finally, we discuss and summarize our ﬁnd-
+Section 2.9. Finally, we discuss and summarize our ﬁnd-
 ings in Section 3.
 
 2. METHODOLOGY
@@ -191,7 +211,6 @@ We consider a population of stellar mass BHs embed-
 ded in a cluster of 1 M(cid:12) stars. When stars and BHs
 collide, the BHs can accrete mass. The growth rate de-
 pends on the physical processes outlined below. We use
-
 a statistical approach to estimate the stellar encounters
 and ﬁnal IMBH masses.
 
@@ -222,49 +241,44 @@ build a comprehensive physical picture of BH growth at
 all distances from the SMBH, including within 0.01 pc.
 Otherwise, the innermost region of the GN would be
 poorly represented in our sample. We consider other
-observationally motivated distributions in Section 2.8,
-but reserve a more detailed examination of the distribu-
-tion’s impact for future work.
 
-2.2. Direct Collisions
+IMBH Formation in Galactic Nuclei
 
-BHs in the GN can undergo direct collisions with other
-objects. The timescale for this process, tcoll, can be es-
-timated using a simple rate calculation: t−1
-coll = nσA,
-where n is the number density of objects, σ is the ve-
-locity dispersion, and A is the cross-section. We use the
-collision timescale from Rose et al. (2020):
+3
 
-t−1
-coll = πn(a•)σ(a•)
+in Figure 1.2 As this timescale depends on the density
+of surrounding stars, we adopt a density proﬁle of the
+form:
 
-(cid:18)
+ρ(r•) = ρ0
 
-×
+(cid:19)−α
 
-f1(e•)r2
+,
 
-c + f2(e•)rc
+(cid:18) r•
+r0
 
-2G(mBH + m(cid:63))
-σ(a•)2
+(2)
 
-(cid:19)
+where r• denotes the distance from the SMBH. We adopt
+a SMBH mass of 4 × 106 M(cid:12) such that our ﬁducial GN
+matches our own galactic center (e.g., Ghez et al. 2005;
+Genzel et al. 2003). In this case, the normalization in
+Eq. (2) is ρ0 = 1.35 × 106 M(cid:12)/pc3 at r0 = 0.25 pc (Gen-
+zel et al. 2010). Additionally, in Eq. (2), α gives the
+slope of the power law. We assume that a uniform pop-
+ulation of solar mass stars account for most of the mass
+in the GN, making the stellar number density:
 
-. (1)
+n(r•) =
 
-where G is the gravitational constant and rc is the sum
-of the radii of the interacting objects, a black hole with
-mass mBH and a star with mass m(cid:63). Detailed in Rose
-et al. (2020), f1(e•) and f2(e•) account for the eﬀect of
-the eccentricity of the BH’s orbit about the SMBH on
-the collision rate, while n and σ are simply evaluated
-at the semimajor axis of the orbit (see below). Note
+ρ(r•)
+1 M(cid:12)
 
-IMBH Formation in Galactic Nuclei
+.
 
-3
+(3)
 
 The collision timescale also depends on the velocity dis-
 persion, which we express as:
@@ -305,24 +319,9 @@ vant equations, see O’Leary et al. 2009; Gond´an et al.
 lisions will be the main driver of IMBH growth in the
 GN.
 
-2.3. Statistical Approach to Collisions
+2 We note that the eccentricity has a very minor eﬀect on the
 
-We simulate the mass growth of a population of BHs
-with initial conditions detailed in Section 2.1. Over an
-increment ∆t of 106 yr, we calculate the probability of
-a collision occurring, given by ∆t/tcoll. This choice of
-∆t is motivated by our galactic center’s star formation
-timescale (e.g., Lu et al. 2009), allowing for regular re-
-plenishment of the stellar population in the GN. We have
-checked that the results are not sensitive to this choice
-of ∆t, omitted here to avoid clutter. We draw a number
-between 0 and 1 using a random number generator. If
-that number is less than or equal to the probability, we
-increase the BH’s mass by ∆m, the mass that the BH is
-expected to accrete in a single collision (see Section 2.4
-for details). We recalculate the collision timescale using
-the updated BH mass and repeat this process until the
-time elapsed equals the simulation time of 10 Gyr3.
+collision timescale (Rose et al. 2020).
 
 Figure 1. We plot the relevant timescales, including col-
 lision (green), relaxation (gold), and BH-BH GW capture
@@ -334,61 +333,76 @@ density, so we adopt a range of density proﬁles, bounded by
 blue line represents the time for a 105 M(cid:12) BH to merge with
 the SMBH through GW emission.
 
-that this timescale equation includes the eﬀects of grav-
-itational focusing, which enhances the cross-section of
-interaction.
-
-Assuming a circular orbit for simplicity, we plot the
-timescale for a BH orbiting in the GN to collide with
-a 1 M(cid:12) star as a function of distance from the SMBH
-in Figure 1.2 As this timescale depends on the density
-of surrounding stars, we adopt a density proﬁle of the
-form:
-
-ρ(r•) = ρ0
+observationally motivated distributions in Section 2.9,
+but reserve a more detailed examination of the distribu-
+tion’s impact for future work.
 
-(cid:19)−α
+2.2. Direct Collisions
 
-,
+BHs in the GN can undergo direct collisions with other
+objects. The timescale for this process, tcoll, can be es-
+timated using a simple rate calculation: t−1
+coll = nσA,
+where n is the number density of objects, σ is the ve-
+locity dispersion, and A is the cross-section. We use the
+collision timescale from Rose et al. (2020):
 
-(cid:18) r•
-r0
+t−1
+coll = πn(a•)σ(a•)
 
-(2)
+(cid:18)
 
-where r• denotes the distance from the SMBH. We adopt
-a SMBH mass of 4 × 106 M(cid:12) such that our ﬁducial GN
-matches our own galactic center (e.g., Ghez et al. 2005;
-Genzel et al. 2003). In this case, the normalization in
-Eq. (2) is ρ0 = 1.35 × 106 M(cid:12)/pc3 at r0 = 0.25 pc (Gen-
-zel et al. 2010). Additionally, in Eq. (2), α gives the
-slope of the power law. We assume that a uniform pop-
-ulation of solar mass stars account for most of the mass
-in the GN, making the stellar number density:
+×
 
-n(r•) =
+f1(e•)r2
 
-ρ(r•)
-1 M(cid:12)
+c + f2(e•)rc
 
-.
+2G(mBH + m(cid:63))
+σ(a•)2
 
-(3)
+(cid:19)
 
-2 We note that the eccentricity has a very minor eﬀect on the
+. (1)
 
-collision timescale (Rose et al. 2020).
+where G is the gravitational constant and rc is the sum
+of the radii of the interacting objects, a black hole with
+mass mBH and a star with mass m(cid:63). Detailed in Rose
+et al. (2020), f1(e•) and f2(e•) account for the eﬀect of
+the eccentricity of the BH’s orbit about the SMBH on
+the collision rate, while n and σ are simply evaluated
+at the semimajor axis of the orbit (see below). Note
+that this timescale equation includes the eﬀects of grav-
+itational focusing, which enhances the cross-section of
+interaction.
 
-3 Closer to the SMBH, ∆t may exceed the collision timescale by
-a factor of a few for steep density proﬁles. We include a safe-
-guard in our code which takes the ratio tcoll/∆t and rounds it
-to the nearest integer. We take this integer to be the number of
-collisions and increase the BH mass accordingly.
+Assuming a circular orbit for simplicity, we plot the
+timescale for a BH orbiting in the GN to collide with
+a 1 M(cid:12) star as a function of distance from the SMBH
 
 4
 
 Rose et al.
 
+2.3. Statistical Approach to Collisions
+
+We simulate the mass growth of a population of BHs
+with initial conditions detailed in Section 2.1. Over an
+increment ∆t of 106 yr, we calculate the probability of
+a collision occurring, given by ∆t/tcoll. This choice of
+∆t is motivated by our galactic center’s star formation
+timescale (e.g., Lu et al. 2009), allowing for regular re-
+plenishment of the stellar population in the GN. We have
+checked that the results are not sensitive to this choice
+of ∆t, omitted here to avoid clutter. We draw a number
+between 0 and 1 using a random number generator. If
+that number is less than or equal to the probability, we
+increase the BH’s mass by ∆m, the mass that the BH is
+expected to accrete in a single collision (see Section 2.4
+for details). We recalculate the collision timescale using
+the updated BH mass and repeat this process until the
+time elapsed equals the simulation time of 10 Gyr3.
+
 2.4. Mass Growth
 
 When a BH collides with a star, it may accrete ma-
@@ -400,15 +414,19 @@ passing through the star’s center. We begin by con-
 sidering the escape velocity from the BH at the star’s
 outermost point, its surface, which corresponds to the
 maximum impact parameter 1 R(cid:12). Qualitatively, one
-might expect that the BH could accrete the entire star
+might expect that the BH could capture the entire star
 (i.e., ∆m ∼ 1 M(cid:12)) if the relative velocity is smaller than
 the escape velocity from the BH at this point. However,
 in the vicinity of the SMBH, the dispersion velocity of
 the stars may be much larger than the escape velocity
 from the BH at the star’s surface. In this case, the BH
-accretes a “tunnel” of material through the star. This
+captures a “tunnel” of material through the star. This
 tunnel has radius equal to the Bondi radius and length
-approximately 1 R(cid:12).
+approximately 1 R(cid:12). For the purposes of this study, we
+assume that the BH accretes all of the material that
+it captures. The details of the accretion are uncertain,
+however, and it may be much less eﬃcient than our re-
+sults imply. We discuss accretion in Section 2.5.
 
 To estimate ∆m, we begin with the Bondi-Hoyle ac-
 
@@ -429,6 +447,25 @@ s + σ2)3/2
 
 (5)
 
+3 Closer to the SMBH, ∆t may exceed the collision timescale by
+a factor of a few for steep density proﬁles. We include a safe-
+guard in our code which takes the ratio tcoll/∆t and rounds it
+to the nearest integer. We take this integer to be the number of
+collisions and increase the BH mass accordingly.
+
+Figure 2. We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10 M(cid:12) for all of the BHs. Assuming
+the density proﬁle of stars has α = 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+
 where cs is the speed of sound in the star and ρstar is its
 density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
 et al. 1985; Edgar 2004, see latter for a review). We
@@ -441,13 +478,14 @@ a lower limit on ∆m. To ﬁnd ∆m, at each collision, we
 have:
 
 ∆m = min( ˙m × t(cid:63),cross, 1 M(cid:12)) ,
+
+(6)
+
 where t(cid:63),cross ∼ R(cid:12)/σ is the crossing time of the BH in
 the star. We take the minimum between ˙m × t(cid:63),cross and
 1 M(cid:12) because the BH cannot accrete more mass than
 one star at each collision.
 
-(6)
-
 Figure 2 juxtaposes the expected growth using Bondi-
 Hoyle-Lyttleton accretion (blue small points) with a
 much simpler model in which the BH accretes the star’s
@@ -462,21 +500,12 @@ sult is exponential growth (see discussion and details
 surrounding Eq. (8)). In Figure 2, however, the simula-
 tions assume α = 1 for the stellar density proﬁle, ensur-
 ing the collision timescale is long compared to the sim-
-ulation time, 10 Gyr. Therefore, the BHs grow slowly,
 
-Figure 2. We consider an example that highlights the mass
-growth as a function of distance from the SMBH. Grey dots
-represent the initial masses and distances from the SMBH
-of the BHs involved in the simulation. For simplicity, we set
-the inital mass equal to 10 M(cid:12) for all of the BHs. Assuming
-the density proﬁle of stars has α = 1, we consider two cases:
-BHs accrete all of the star’s mass during a collision (red) and
-only a portion of the star’s mass is accreted during a collision
-given by Eq. 6 (blue). The latter case results in less growth
-closer to the SMBH where the velocity dispersion becomes
-high. The shaded regions and dashed lines represent the
-analytical predictions detailed in Section 2.4.
+IMBH Formation in Galactic Nuclei
+
+5
 
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
 and their ﬁnal masses can be approximated using the
 following equation:
 
@@ -514,11 +543,6 @@ star’s mass.
 Eq. 7 does not apply for other values of α. When the
 collision timescale is shorter, corresponding to a larger
 index α in the density proﬁle (see Figure 1), the growth
-
-IMBH Formation in Galactic Nuclei
-
-5
-
 is very eﬃcient and ∆m quickly approaches 1 M(cid:12). Con-
 sequently, while we can now assume ∆m = 1 M(cid:12), we
 can no longer assume the collision timescale is constant.
@@ -533,7 +557,41 @@ where A = σ2Rstar/G and C = 2πGnstarRstar/σ. As an
 example, we plot this curve in purple for the α = 2 case,
 in Figure 3, which agrees with the simulated masses.
 
-2.5. GW Inspiral
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star re-
+sults in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity re-
+main uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accu-
+mulate signiﬁcant amounts of mass over many collisions
+even if it accretes very little in a single one. We ex-
+plore the viability of our channel using a physically mo-
+tivated ineﬃcient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately vesc/(cη), where vesc is
+the escape velocity from the BH at 1 R(cid:12) and η is the
+accretion eﬃciency at the ISCO. We take η to be 0.1
+(e.g., Yu & Tremaine 2002). This expression for the
+fraction accreted is consistent with Kremer et al. (2022)
+equation 19 for s = 0.5, which is a reasonable value for
+s, a free parameter between 0.2 and 0.8. We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more eﬃcient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radi-
+ation away from the captured material (e.g., Blandford
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumen-
+shchev 2008; Begelman 2012a,b; McKinney et al. 2014).
+
+2.6. GW Inspiral
 
 When a BH is close to the SMBH, GW emission can
 circularize and shrink its orbit. We implement the ef-
@@ -542,8 +600,20 @@ eccentricity following Peters & Mathews (1963a). The
 characteristic timescale to merge a BH with an SMBH
 is given by:
 
+2.5. Uncertainties in Accretion
+
 tGW ≈ 2.9 × 1012 yr
 
+We note that the ∆M calculated in this proof-of-
+concept study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challeng-
+ing; this complex problem requires numerically solving
+the generalized GR ﬂuid equations with cooling, heat-
+ing, and radiative transfer, etc. and remains an active
+ﬁeld of research (e.g., Blandford & Begelman 1999; Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+
 (cid:18) M•
 
 106 M(cid:12)
@@ -553,7 +623,7 @@ tGW ≈ 2.9 × 1012 yr
 106 M(cid:12)
 (cid:19)4
 
-10−4 pc
+10−2 pc
 
 (cid:19)−1
 
@@ -572,15 +642,24 @@ f (e•) is between 0.979 and 1.81 (Blaes et al. 2002). We
 plot this timescale for a 1 × 105 M(cid:12) BH in Figure 1 in
 blue.
 
-In our simulations, we assume a BH has merged with
-the SMBH when the condition tGW < telapsed is met.
-When this condition is satisﬁed, we terminate mass
-growth through collisions for that BH.4
+6
 
-2.6. IMBH growth
+Rose et al.
 
-As detailed above, BH-stellar collisions can increase
-the BH masses as a function of time. Here, we examine
+Figure 3. On the right, we plot ﬁnal masses of 500 BHs using diﬀerent values of α in the density proﬁle, shallow (α = 1) to
+cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
+of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
+merger times of these BHs.
+
+In our simulations, we assume a BH has merged with
+the SMBH when the condition tGW < telapsed is met.
+When this condition is satisﬁed, we terminate mass
+growth through collisions for that BH.4
+
+2.7. IMBH growth
+
+As detailed above, BH-stellar collisions can increase
+the BH masses as a function of time. Here, we examine
 the sensitivity of the BH growth to the density power
 law. From Eq. (1), it is clear that the growth rate de-
 pends on the stellar density proﬁle, governed by the in-
@@ -589,13 +668,6 @@ proﬁles, will result in more eﬃcient mass growth.
 In
 Figure 1, larger values of α lead to collision timescales
 in the GN’s inner region, inwards of 0.25 pc, that are
-
-4 For comparison, we also incrementally changed the semimajor
-axis and eccentricity from GW emission following the equations
-in Peters & Mathews (1963b). This method leads to a slight
-increase in the ﬁnal IMBH masses because it accounts for the
-collisions that take place while the orbit is gradually shrinking.
-
 much smaller that the 10 Gyr simulation time. Figure 3
 conﬁrms this expectation. It depicts the mass growth of
 a uniform distribution of BHs with initial conditions de-
@@ -603,26 +675,33 @@ tailed in Section 2.1 for ﬁve α values, spanning 1 (green)
 to 2 (purple). The most massive IMBHs form inwards
 of 0.25 pc for the α = 2 case.
 
-2.7. Gravitational Wave Mergers and Intermediate
+2.8. Gravitational Wave Mergers and Intermediate
 and Extreme Mass Ratio Inspiral Candidates
 
 Towards the SMBH, eﬃcient collisions can create BHs
 massive enough to merge with the SMBH through GWs.
-Following the method detailed in Section 2.5, when a
+Following the method detailed in Section 2.6, when a
 given BH meets the criterion tGW < telapsed, we mark
+
+4 For comparison, we also incrementally changed the semimajor
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b). This method leads to a slight
+increase in the ﬁnal IMBH masses because it accounts for the
+collisions that take place while the orbit is gradually shrinking.
+
 it as merged with the SMBH. We assume that at this
 point the dynamics of the BH will be determined by GW
 emission, shrinking and circularizing the BHs orbit un-
 til it undergoes an extreme or intermediate mass ratio
 inspiral (EMRI and IMRI, respectively). The righthand
 plot in Figure 3 shows the BH masses versus time of
-merger.
 It is interesting to note that even in the ab-
+merger.
 sence of relaxation processes, which are often invoked
 to explain the formation of EMRIs, EMRIs and notably
 IMRIs can form in this region.
 
-2.8. Two Body Relaxation Processes
+2.9. Two Body Relaxation Processes
 
 A BH orbiting the SMBH experiences weak gravita-
 tional interactions with other objects in the GN. Over a
@@ -648,27 +727,31 @@ its orbital energy and angular momentum by order of
 themselves. The BH experiences diﬀusion in its angular
 momentum and energy as a function of time (depending
 on the eccentricity of the orbit, this process can be more
-eﬃcient Fragione & Sari 2018; Sari & Fragione 2019). In
-Figure 1, we plot the relaxation timescale in gold for a
-range of α. We note that the Bahcall & Wolf (1976) pro-
-ﬁle, α = 7/4, corresponds to zero net ﬂux and therefore
-does not preferentially migrate objects inward.
-
-Additionally, because they are more massive on
-average than the surrounding objects, BHs are ex-
-pected to segregate inwards in the GN (e.g., Shapiro
-& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
-Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
+eﬃcient Fragione & Sari 2018; Sari & Fragione 2019).
+Relaxation can cause the orbit of an object in a GN to
+reach high eccentricities. If the object is a BH, it can
+spiral into the SMBH and form an EMRI, while a star
 
-6
-
-Rose et al.
+IMBH Formation in Galactic Nuclei
 
-Figure 3. On the right, we plot ﬁnal masses of 500 BHs using diﬀerent values of α in the density proﬁle, shallow (α = 1) to
-cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
-of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
-merger times of these BHs.
+7
 
+can be tidally disrupted by the SMBH (e.g. Magorrian
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
+Alexander 2005; Aharon & Perets 2016; Stone & Met-
+zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
+Naoz et al. 2022). The relaxation process is therefore
+crucial to our study. In Figure 1, we plot the relaxation
+timescale in gold for a range of α. We note that the Bah-
+call & Wolf (1976) proﬁle, α = 7/4, corresponds to zero
+net ﬂux and therefore does not preferentially migrate
+objects inward.
+
+Additionally, because BHs are more massive on av-
+erage than the surrounding objects, they are expected
+to segregate inwards in the GN (e.g., Shapiro &
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
 They sink toward the SMBH on the mass segregation
 timescale, tseg ≈ (cid:104)M∗(cid:105)/mBH × trelax (e.g., Spitzer 1987;
 Fregeau et al. 2002; Merritt 2006), which is typically an
@@ -686,8 +769,8 @@ of zero and a standard deviation of ∆vrlx/
 ∆vrlx = v•
 approach to changes in the angular momentum). The
 new orbital parameters can be calculated following Lu
-& Naoz (2019), and see Naoz et al. in prep for full set
-of equations.
+& Naoz (2019), and see Naoz et al. (2022) for the full
+set of equations.
 
 √
 
@@ -714,6 +797,7 @@ the BHs to begin diﬀusing over a relaxation timescale,
 their orbital parameters changing slowly through a ran-
 dom process. In this random process, some of the BHs
 may migrate closer to the SMBH. We terminate mass
+
 growth when the BH enters the inner 200 au of the GN,
 within which the density of stars is uncertain. This cut-
 oﬀ is based on the 120 au pericenter of S0-2, the closest
@@ -738,7 +822,7 @@ gion of the GN increases, allowing them to dominate the
 scattering. We reserve the inclusion of these interactions
 for future study.
 
-2.9. Eﬀect of Relaxation Processes
+2.10. Eﬀect of Relaxation Processes
 
 As depicted in Figure 4, two-body relaxation processes
 result in more EMRIs and IMRIs events. These pro-
@@ -746,20 +830,6 @@ cesses allow BHs that begin further from the SMBH
 to migrate inwards and grow more eﬃciently in mass.
 However, it also impedes the growth of BHs that are
 initially closer to the SMBH by allowing them to dif-
-
-IMBH Formation in Galactic Nuclei
-
-7
-
-Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and ﬁnal mass versus ﬁnal distance (red)
-for 500 BHs. This simulation includes relaxation processes, including mass segregation, diﬀusion, and dynamical friction. We
-assume α = 1.75 for the GN density proﬁle. Faded stars represent BHs that merged with the SMBH. As a result of inward
-migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more
-BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two
-diﬀerent values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes.
-The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density proﬁle. Faded
-stars represent BHs that merged with the SMBH.
-
 fuse out of the inner region where collisions are eﬃcient.
 As can be seen in Figure 4, the net result is that more
 BHs grow, but the maximum mass is lower compared
@@ -767,8 +837,8 @@ to the scenario that ignores two-body relaxation. The
 histogram in Figure 4 presents the ﬁnal BH mass distri-
 butions for diﬀerent power law indices α. As expected,
 the two-body relaxation suppresses the α dependence
-In fact, using a KS test, we
 highlighted in Figure 3.
+In fact, using a KS test, we
 ﬁnd that we cannot reject the hypothesis that the two
 distributions were drawn from the same sample for the
 α = 1.75 and α = 2 results. Interestingly, a BH mass
@@ -784,8 +854,21 @@ BH and main-sequence stars. Taking both a statisti-
 cal and analytic approach, we show that this channel
 can produce IMBHs eﬃciently with masses as high as
 103−4 M(cid:12) and may result in many IMBH-SMBH merg-
-ers (intermediate-mass ratio inspiral, IMRIs) and EM-
-RIs.
+ers (intermediate-mass ratio inspirals, or IMRIs) and
+EMRIs.
+
+8
+
+Rose et al.
+
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and ﬁnal mass versus ﬁnal distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diﬀusion, and dynamical friction.
+We assume α = 1.75 for the GN density proﬁle. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two diﬀerent values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
 
 As the stellar mass BH collides with a star, the BH
 will grow in mass. The increase may equal star’s en-
@@ -793,12 +876,11 @@ tire mass if the relative velocity is smaller than the es-
 cape velocity from the BH at 1 R(cid:12). However, near the
 SMBH, the velocity dispersion may be larger than the
 escape velocity from the BH at the star’s radius. In this
-limit, the BH accretes a “tunnel” of material through
+limit, the BH captures a “tunnel” of material through
 the star, estimated using Bondi-Hoyle-Lyttleton accre-
 tion. In our statistical analysis, we account for Bondi-
 Hoyle-Lyttleton accretion and ﬁnd that BHs outside of
-
-10−2 pc from the SMBH can accrete the entire star (see
+10−2 pc from the SMBH can capture the entire star (see
 Figure 2).
 
 The eﬃciency of collisions, and therefore IMBH,
@@ -816,6 +898,31 @@ mass, but their maximum mass is smaller (∼ 104 M(cid:12)).
 Additionally, the ﬁnal masses have no apparent depen-
 dence on distance from the SMBH (see Figure 4).
 
+Most simulations in our study assume that the BHs
+accrete all of the mass that they capture. The ﬁnal BH
+masses can be taken as an upper limit. We note that
+the accretion is a highly uncertain process and repre-
+sents an active ﬁeld of study (e.g., Blandford & Begel-
+man 1999; Park & Ostriker 2001; Narayan et al. 2003;
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+Narayan et al. 2022). To assess the limits of our model,
+
+we also consider a physically motivated accretion model,
+momentum-driven winds (Section 2.5). We present the
+ﬁnal mass distribution for momentum-driven winds in
+Importantly, we ﬁnd that BHs within the
+Figure 4.
+mass gap still form naturally despite the substantially
+reduced accretion. About 5% of the BHs grow by 10
+to 100 M(cid:12). Furthermore, if we increase this ∆M esti-
+mate by a factor of 2 (i.e., use η = 0.05), the simula-
+tion produces a 3.5 × 103 M(cid:12) IMBH for the same initial
+conditions. Our proof-of-concept demonstrates that col-
+lisions between BH and stars are an important process
+that should be taken into account in dense places such
+as a GN.
+
 Mass growth through BH-main-sequence star colli-
 sions may act in concert with other IMBH formation
 channels, such as compact object binary mergers (e.g.,
@@ -834,21 +941,94 @@ Kozai Lidov mechanism, leaving behind a single star or
 a single compact object (e.g., Stephan et al. 2016, 2019;
 Hoang et al. 2018). Additionally, to be susceptible to
 evaporation, BH binaries must have a wider conﬁgura-
-tion. Otherwise, they will be more tightly bound that
+tion. Otherwise, they will be more tightly bound than
+the average kinetic energy of the surrounding objects
+and will only harden through weak gravitational inter-
 
-8
+IMBH Formation in Galactic Nuclei
 
-Rose et al.
+9
 
-the average kinetic energy of the surrounding objects,
-and will only harden through weak gravitational inter-
 actions with neighboring stars (see for example Figure
 6 in Rose et al. 2020).
 
-Not included in this study, collisions between the BH
-and other compact objects will increase the BH growth
-rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra-
-gione et al. 2021) and even neutron star BH mergers
+We note that we assume a steady-state and treat the
+stars as a reservoir in this model. Future work will take a
+more nuanced approach to the background stars, whose
+density as a function of time can be inﬂuenced by several
+factors. Firstly, the relaxation of the stellar population
+occurs on Gyr timescales. Some studies have suggested
+that in situ star formation can occur in the Galactic
+Center as close as 0.04 pc from the SMBH (e.g., Levin
+& Beloborodov 2003; Paumard et al. 2006), and star
+formation episodes can occur as often as every ∼ 5 Myr
+(e.g. Lu et al. 2009). Therefore, we expect that after
+the ﬁrst Gyr, stars within (cid:46) 0.01 pc will be replenished
+at intervals consistent with the star formation episodes;
+the infalling populations of stars are separated by ∼
+5 −10 Myr, which is shorter than the collision timescale.
+However, star-star collisions may complicate this pic-
+ture within ∼ 0.01 pc. As discussed above, regular star
+formation ensures the BHs always have a stellar popula-
+tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc,
+however, the kinetic energy during a collision between
+two 1 M(cid:12) stars is larger than their binding energies.
+Collisions can therefore thin out the stellar populations
+during the time it takes them to diﬀuse to these small
+radii, (cid:46) 0.01 pc, and may reduce the BH growth in the
+innermost region. We reserve the inclusion of star-star
+collisions for future work. We also note that the disrup-
+tion of binary stars by the SMBH may help replenish
+the stellar population even as collisions work to deplete
+it (e.g., Balberg et al. 2013); when a binary is disrupted,
+one of the stars is captured on a tightly bound orbit
+about the SMBH.
+
+An IMBH may also aﬀect the stellar density proﬁle.
+As it spirals into the SMBH, it can perturb stellar orbits,
+and these interactions can lead to hypervelocity stars
+(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt
+2008). L¨ockmann & Baumgardt (2008) show that an
+IMBH can modify an initially steep stellar density pro-
+ﬁle to become consistent with the ﬂatter cusp observed
+in the Galactic Center. The stars may then be replen-
+ished on 100 Myr timescales (Baumgardt et al. 2006a).
+Therefore, after the formation of the ﬁrst few IMBHs,
+subsequent BH growth may occur in bursts, coinciding
+with replenishment of the stars.
+
+While there are many competing dynamical processes
+that shape the stellar density proﬁle, we stress that α
+
+5 In fact, the star-star collision timescale is greater than 10 Myr
+for the entire parameter space, save at 0.001 pc for larger values
+of α; the BH-star collision timescale plotted in Fig. 1 is the same
+order of magnitude as the star-star collision timescale.
+
+can simply be chosen to encapsulate all of the relevant
+physics. A value for α that is constrained by observa-
+tions must already reﬂect ongoing processes like star-
+star collisions and replenishment. Sch¨odel et al. (2018)
+ﬁnd the observed stellar mass enclosed within 0.01 pc of
+the Milky Way’s Galactic Center to be approximately
+180 M(cid:12). This estimate is consistent to order of magni-
+tude with our α = 1.25 case. In a simulation like those
+depicted in Figure 4, which include relaxation, α = 1.25
+leads to a maximum IMBH mass of 140 M(cid:12). Further-
+more, while the stellar mass within 0.01 pc may be a
+few hundred M(cid:12), Do et al. (2019) and GRAVITY Col-
+laboration et al. (2020) set an upper limit on the mass
+enclosed within the orbit of S0-2 to be about a few thou-
+sand M(cid:12), or 0.1% of the central mass. This upper limit
+can include mass that was previously in stars but is now
+in BHs. In that case, the 180 M(cid:12) is what remains of the
+stars, while BHs and IMBHs make up the ∼ 1000 M(cid:12)
+in the innermost region.
+
+Also not included in this study, collisions between the
+BH and other compact objects will increase the BH
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+Fragione et al. 2021) and even neutron star BH mergers
 (e.g., Hoang et al. 2020) become more likely as the BHs
 increase in mass through stellar collisions. As a result,
 the BH-BH collision timescale, discussed in Section 2.2,
@@ -857,63 +1037,72 @@ BHs to grow through this channel in addition to stel-
 lar collisions. Additionally, this compact object mergers
 result in GW recoil, which may have a large impact on
 the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
-2021)
+2021).
 
 The BH’s mass growth increases GW emission, which
-dissipates energy from the orbit. Along with relaxation
-processes, GW emission causes BHs to sink towards the
-SMBH and eventually undergo a merger. As a result,
-the GN environment is conducive to the formation of
-EMRIs and IMRIs. The GW emission from EMRIs and
-IMRIs is expected to be at mHz frequencies, making
-them promising candidates for LISA to observe. While
-the exact rate calculation is beyond the scope of this
-study, the mechanism outlined here seems very promis-
-ing.
-
-Our results also suggest that IMBHs are likely to ex-
-ists in many galactic nuclei, as well as within our own
-galactic center. This implication seems to be consis-
-tent with recent observational and theoretical studies
-(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004;
-G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
-& Liu 2013; Generozov & Madigan 2020; Fragione et al.
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-Collaboration et al. 2020).
+dissipates energy from the orbit. Along with relaxation,
+GW emission causes BHs to sink towards the SMBH
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
+and IMRIs. The GW emission from EMRIs and IM-
+RIs is expected to be at mHz frequencies, making them
+promising candidates for LISA to observe. While the
+exact rate calculation is beyond the scope of this study,
+the mechanism outlined here seems very promising.
+
+Our results also suggest that BHs within the mass gap
+as well as IMBHs likely exist in many galactic nuclei, as
+well as within our own galactic center. This implication
+seems to be consistent with recent observational and
+theoretical studies (e.g., Hansen & Milosavljevi´c 2003;
+Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris
+& Merritt 2009; Chen & Liu 2013; Generozov & Madi-
+gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz
+et al. 2020; GRAVITY Collaboration et al. 2020).
+
+10
+
+Rose et al.
 
 Lastly, the collisions between stellar mass BHs and
 stars may contribute to the x-ray emission from our
-galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al.
-2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter-
-actions, in particular grazing collisions, may also result
-in tidal disruption events (e.g., Perets et al. 2016; Sam-
-sing et al. 2019; Kremer et al. 2021). Thus, the process
-outlined here may produce electromagnetic signatures
-in addition to GW mergers.
-
-SR thanks the Charles E Young fellowship, the Nina
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre-
+mer et al. (2022) for a discussion of electromagnetic sig-
+natures from BH-star collisions)6. These interactions,
+in particular grazing collisions, may also result in tidal
+disruption events (e.g., Baumgardt et al. 2006b; Perets
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre-
+mer et al. 2021). Thus, the process outlined here may
+produce electromagnetic signatures in addition to GW
+mergers.
+
+We thank the anonymous referee for useful comments.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use-
+ful discussion.
+
+SR thanks the Charles E. Young Fellowship, the Nina
 Byers Fellowship, and the Michael A. Jura Memorial
 Graduate Award for support. SR and SN acknowledge
 the partial support from NASA ATP 80NSSC20K0505.
 SN thanks Howard and Astrid Preston for their gener-
 ous support. IL thanks support from the Adams Fellow-
 ship. SN and RS thank the Bhaumik Institute visitor
-program.
-
-REFERENCES
+program. This work was performed in part at the As-
+pen Center for Physics, which is supported by National
+Science Foundation grant PHY-1607611.
 
 Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
 
-Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
+Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
 
-PhRvL, 116, 241102,
+REFERENCES
 
-043002, doi: 10.1103/PhysRevD.102.043002
+PhRvL, 116, 241102,
 
 doi: 10.1103/PhysRevLett.116.241102
 
-Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
-
 —. 2017a, PhRvL, 118, 221101,
 
 doi: 10.1103/PhysRevLett.118.221101
@@ -922,182 +1111,299 @@ doi: 10.1103/PhysRevLett.118.221101
 
 doi: 10.1103/PhysRevLett.119.141101
 
-613, 1143, doi: 10.1086/423299
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
 
-Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
-
-MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
-
-Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
+doi: 10.3847/2041-8205/830/1/L1
 
 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
 
-890, 113, doi: 10.3847/1538-4357/ab6d77
-
 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
 
 doi: 10.1088/0004-637X/780/2/148
 
-Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
 
-2021, arXiv e-prints, arXiv:2109.12119.
+doi: 10.1007/s41114-018-0013-8
+
+6 The connection between the observed X-ray sources at the Galac-
+tic Center and tidal capture has been suggested by Generozov
+et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
+alternative channels.
 
+2021, arXiv e-prints, arXiv:2109.12119.
 https://arxiv.org/abs/2109.12119
 
 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
 
 doi: 10.1086/154711
 
-—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77
+Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
+
+043002, doi: 10.1103/PhysRevD.102.043002
+
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+
+doi: 10.1093/mnrasl/slt071
+
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+
+2006a, MNRAS, 372, 174,
+doi: 10.1111/j.1365-2966.2006.10818.x
+
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+
+Makino, J. 2006b, MNRAS, 372, 467,
+doi: 10.1111/j.1365-2966.2006.10885.x
+
+Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
+
+613, 1143, doi: 10.1086/423299
+
+Begelman, M. C. 1979, MNRAS, 187, 237,
+
+doi: 10.1093/mnras/187.2.237
+
+—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3
+
+IMBH Formation in Galactic Nuclei
+
+11
+
+—. 2012b, MNRAS, 420, 2912,
 
+doi: 10.1111/j.1365-2966.2011.20071.x
+
+Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
+
+Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
+
+MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+
+MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
+Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
+
+C. N. 2021a, MNRAS, 505, 2186,
+doi: 10.1093/mnras/stab1428
+
+890, 113, doi: 10.3847/1538-4357/ab6d77
+
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+
+—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77
 Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R.
 
+T. C. N. 2021b, MNRAS, 503, 1051,
+doi: 10.1093/mnras/stab402
+
 2009, New Journal of Physics, 11, 105016,
 doi: 10.1088/1367-2630/11/10/105016
 
 Binney, J., & Tremaine, S. 1987, Galactic dynamics
-
 —. 2008, Galactic Dynamics: Second Edition
-
 Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
 
 doi: 10.1086/342655
 
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+
+2005, ApJ, 620, 878, doi: 10.1086/427142
+
+Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
+
+MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
+
+Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
+MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+
+doi: 10.1126/science.aav8137
+
+Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433,
+
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+
+doi: 10.1093/mnras/179.3.433
+
+562, L19, doi: 10.1086/338118
+
 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
 
-5 The connection between the observed X-ray sources at the Galac-
-tic Center and tidal capture has been suggested by Generozov
-et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
-alternative channels.
+Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
 
 427, doi: 10.1086/500727
 
 Bondi, H. 1952, MNRAS, 112, 195,
 doi: 10.1093/mnras/112.2.195
 
-IMBH Formation in Galactic Nuclei
+110, 221101, doi: 10.1103/PhysRevLett.110.221101
 
-9
+Edgar, R. 2004, NewAR, 48, 843,
+
+doi: 10.1016/j.newar.2004.06.001
 
 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
 
-Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
+Escala, A. 2021, ApJ, 908, 57,
 
 doi: 10.1093/mnras/104.5.273
 
-649, 91, doi: 10.1086/506193
+doi: 10.3847/1538-4357/abd93c
 
 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
 
-Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
+Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
 
 2042, doi: 10.1093/mnras/stx1007
 
-doi: 10.3847/1538-4357/ab94bc
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+
+Monthly Notices of the Royal Astronomical Society, 443,
+2410, doi: 10.1093/mnras/stu1280
+
+860, 14, doi: 10.3847/1538-4357/aac2c4
+
+Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891,
 
 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
 
-Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
+L31, doi: 10.3847/2041-8213/ab77c9
 
 C. 2012, JCAP, 2012, 054,
 doi: 10.1088/1475-7516/2012/07/054
 
-J. P. 2018, MNRAS, 478, 4030,
-doi: 10.1093/mnras/sty1262
-
 Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R.
 
-Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
+Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021,
+
+arXiv e-prints, arXiv:2107.04639.
+https://arxiv.org/abs/2107.04639
 
 2010, Reviews of Modern Physics, 82, 3069,
 doi: 10.1103/RevModPhys.82.3069
 
-Modern Physics, 82, 3121,
-doi: 10.1103/RevModPhys.82.3121
+Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a,
+
+ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2
 
 Chen, X., & Liu, F. K. 2013, ApJ, 762, 95,
 
-Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812,
+Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902,
 
 doi: 10.1088/0004-637X/762/2/95
 
-doi: 10.1086/377127
+L26, doi: 10.3847/2041-8213/abbc0a
 
 Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33,
 
-Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ,
+Fragione, G., & Sari, R. 2018, ApJ, 852, 51,
 
 doi: 10.3847/1538-4357/aaba16
 
-620, 744, doi: 10.1086/427175
+doi: 10.3847/1538-4357/aaa0d7
 
 Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015,
 MNRAS, 450, 4411, doi: 10.1093/mnras/stv694
 
-Gond´an, L., Kocsis, B., Raﬀai, P., & Frei, Z. 2018, ApJ,
-
-860, 5, doi: 10.3847/1538-4357/aabfee
-
 Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V.,
 
-Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL,
-
 et al. 1996, Science, 272, 1286,
 doi: 10.1126/science.272.5266.1286
 
 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
 
+Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., &
+
+Rasio, F. A. 2004, MNRAS, 352, 1,
+doi: 10.1111/j.1365-2966.2004.07914.x
+
+Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
+
+Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
+Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
+
 doi: 10.1086/156685
 
-908, L29, doi: 10.3847/2041-8213/abdf5b
+649, 91, doi: 10.1086/506193
 
-GRAVITY Collaboration, Abuter, R., Amorim, A., et al.
-2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
 
-Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361,
+Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
 
-Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
+doi: 10.1111/j.1365-2966.2005.09937.x
 
-doi: 10.1088/0004-637X/705/1/361
+doi: 10.3847/1538-4357/ab94bc
 
-MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
-Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
 
-G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL,
+Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
 
-640, L39, doi: 10.1086/503295
+2009, MNRAS, 393, 1016,
+doi: 10.1111/j.1365-2966.2008.14254.x
 
-MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
+J. P. 2018, MNRAS, 478, 4030,
+doi: 10.1093/mnras/sty1262
 
-G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236,
+12
 
-Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
-MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+Rose et al.
 
-doi: 10.1086/430694
+Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
 
-Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature,
+Modern Physics, 82, 3121,
+doi: 10.1103/RevModPhys.82.3121
 
-Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
+Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
 
-556, 70, doi: 10.1038/nature25029
+Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812,
 
-110, 221101, doi: 10.1103/PhysRevLett.110.221101
+doi: 10.1086/376675
 
-Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593,
+doi: 10.1086/377127
 
-Edgar, R. 2004, NewAR, 48, 843,
+Limongi, M., & Chieﬃ, A. 2018a, ApJS, 237, 13,
 
-doi: 10.1016/j.newar.2004.06.001
+Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ,
 
-Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
+doi: 10.3847/1538-4365/aacb24
 
-Monthly Notices of the Royal Astronomical Society, 443,
-2410, doi: 10.1093/mnras/stu1280
+620, 744, doi: 10.1086/427175
 
-Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891,
+Gond´an, L., Kocsis, B., Raﬀai, P., & Frei, Z. 2018, ApJ,
 
-L31, doi: 10.3847/2041-8213/ab77c9
+860, 5, doi: 10.3847/1538-4357/aabfee
+
+Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL,
+
+908, L29, doi: 10.3847/2041-8213/abdf5b
+
+GRAVITY Collaboration, Abuter, R., Amorim, A., et al.
+2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813
+
+Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361,
+
+doi: 10.1088/0004-637X/705/1/361
+
+G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL,
+
+640, L39, doi: 10.1086/503295
+
+G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236,
+
+doi: 10.1086/430694
+
+Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature,
+
+556, 70, doi: 10.1038/nature25029
+
+Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593,
 
 L77, doi: 10.1086/378182
 
@@ -1111,84 +1417,76 @@ Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., &
 Dosopoulou, F. 2018, ApJ, 856, 140,
 doi: 10.3847/1538-4357/aaafce
 
-Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021,
-
 Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
 
-arXiv e-prints, arXiv:2107.04639.
-https://arxiv.org/abs/2107.04639
-
 doi: 10.3847/1538-4357/abb66a
 
-Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
-
-Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a,
-
-ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2
-
-Royal Astronomical Society, 374, 1557,
-doi: 10.1111/j.1365-2966.2006.11275.x
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
 
-Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902,
+doi: 10.1086/431475
 
-Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
 
-L26, doi: 10.3847/2041-8213/abbc0a
+doi: 10.1086/529025
 
-doi: 10.3847/1538-4357/abeb14
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
 
-Fragione, G., & Sari, R. 2018, ApJ, 852, 51,
+2003, ApJ, 592, 1042, doi: 10.1086/375769
 
-Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
 
-doi: 10.3847/1538-4357/aaa0d7
-
-45, doi: 10.3847/1538-4357/abb945
+106, doi: 10.1088/0004-637X/796/2/106
 
-Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., &
-
-Limongi, M., & Chieﬃ, A. 2018a, ApJS, 237, 13,
+Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
 
-Rasio, F. A. 2004, MNRAS, 352, 1,
-doi: 10.1111/j.1365-2966.2004.07914.x
+Royal Astronomical Society, 374, 1557,
+doi: 10.1111/j.1365-2966.2006.11275.x
 
-Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
+—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
+L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
 
-doi: 10.3847/1538-4365/aacb24
+doi: 10.1111/j.1365-2966.2007.12699.x
 
-—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
 Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
 
-Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
-
 doi: 10.1093/mnras/stz036
 
-10
-
-Rose et al.
-
 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
 
-Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
-
 690, 1463, doi: 10.1088/0004-637X/690/2/1463
-
 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
 
 doi: 10.1086/319848
 
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+
+doi: 10.1046/j.1365-8711.1999.02853.x
+
 Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
-
 Mapelli, M., Bouﬀanais, Y., Santoliquido, F., Arca Sedda,
 
 M., & Artale, M. C. 2021a, arXiv e-prints,
 arXiv:2109.06222. https://arxiv.org/abs/2109.06222
-
 Mapelli, M., Dall’Amico, M., Bouﬀanais, Y., et al. 2021b,
 
 MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
 
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+doi: 10.1111/j.1365-2966.2006.10256.x
+
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+
+doi: 10.1086/422244
+
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+
+Narayan, R. 2014, MNRAS, 441, 3177,
+doi: 10.1093/mnras/stu762
+
 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
 
 doi: 10.1088/0034-4885/69/9/R01
@@ -1198,7 +1496,6 @@ Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847,
 doi: 10.1086/317837
 
 Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607
-
 Muno, M. P., Pfahl, E., Baganoﬀ, F. K., et al. 2005, ApJL,
 
 622, L113, doi: 10.1086/429721
@@ -1206,28 +1503,89 @@ Muno, M. P., Pfahl, E., Baganoﬀ, F. K., et al. 2005, ApJL,
 Muno, M. P., Bauer, F. E., Baganoﬀ, F. K., et al. 2009,
 ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
 
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+
+618, 569, doi: 10.1086/426067
+
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+https://arxiv.org/abs/2201.12368
+
+Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
+
+doi: 10.3847/1538-4357/abeb14
+
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+
+L18, doi: 10.3847/2041-8213/ac574b
 Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
 doi: 10.1088/0004-637X/795/2/102
 
-Rasio, F. A. 2018, PhRvL, 120, 151101,
-doi: 10.1103/PhysRevLett.120.151101
+Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
 
-Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
+Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
 
-PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029
-Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019,
+45, doi: 10.3847/1538-4357/abb945
 
-Phys. Rev. D, 100, 043027,
-doi: 10.1103/PhysRevD.100.043027
+L35, doi: 10.3847/2041-8213/ab4fed
 
-Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
+IMBH Formation in Galactic Nuclei
 
-113, doi: 10.3847/1538-4357/abc557
+13
+
+Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
+
+888, L8, doi: 10.3847/2041-8213/ab5e3b
+
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+
+Curd, B. 2022, MNRAS, 511, 3795,
+doi: 10.1093/mnras/stac285
+
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+
+Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005,
+
+ApJ, 628, 368, doi: 10.1086/430728
+
+O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
+
+2127, doi: 10.1111/j.1365-2966.2009.14653.x
+
+O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
+
+& O’Shaughnessy, R. 2006, ApJ, 637, 937,
+doi: 10.1086/498446
+
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+
+D. 2010, ApJ, 722, 642,
+doi: 10.1088/0004-637X/722/1/642
+
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+
+doi: 10.1086/319042
+
+Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643,
+
+1011, doi: 10.1086/503273
+
+Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
+
+Stephen R., J. 2016, ApJ, 823, 113,
+doi: 10.3847/0004-637X/823/2/113
 
 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
 & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
 https://arxiv.org/abs/2009.01213
 
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
+
 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
 
 100, 043009, doi: 10.1103/PhysRevD.100.043009
@@ -1245,48 +1603,29 @@ Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
 
 doi: 10.1086/519309
 
-Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
+Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
 
-Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
+609, A27, doi: 10.1051/0004-6361/201730452
 
-doi: 10.1086/156521
+Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
 
-L35, doi: 10.3847/2041-8213/ab4fed
+doi: 10.1086/156521
 
 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
 
-Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
-
 MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367
 
-888, L8, doi: 10.3847/2041-8213/ab5e3b
-
-O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
-
-2127, doi: 10.1111/j.1365-2966.2009.14653.x
-
 Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine,
 
 K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700
 
 Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631,
 
-O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
+Peters, P. C., & Mathews, J. 1963a, Physical Review, 131,
 
 doi: 10.1086/173190
 
-& O’Shaughnessy, R. 2006, ApJ, 637, 937,
-doi: 10.1086/498446
-
-Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
-
-Stephen R., J. 2016, ApJ, 823, 113,
-doi: 10.3847/0004-637X/823/2/113
-
-Peters, P. C., & Mathews, J. 1963a, Physical Review, 131,
-
 435, doi: 10.1103/PhysRev.131.435
-
 —. 1963b, Physical Review, 131, 435,
 
 doi: 10.1103/PhysRev.131.435
@@ -1301,22 +1640,53 @@ Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL,
 528, L17, doi: 10.1086/312422
 
 —. 2002, ApJ, 576, 899, doi: 10.1086/341798
-
 Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
 
 doi: 10.1088/0004-637X/780/2/187
 
+Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
+
+A56, doi: 10.1051/0004-6361/202037710
+
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+
+MNRAS, doi: 10.1093/mnras/stac231
+
+Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
+
+Rasio, F. A. 2018, PhRvL, 120, 151101,
+doi: 10.1103/PhysRevLett.120.151101
+
+Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
+
+PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029
+Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019,
+
+Phys. Rev. D, 100, 043027,
+doi: 10.1103/PhysRevD.100.043027
+
 Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739,
 
 doi: 10.1093/mnras/stx1576
 
 —. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576
+
 Spitzer, L. 1987, Dynamical evolution of globular clusters
+
 Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
 
 e-prints. https://arxiv.org/abs/1603.02709
 
 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
+
+Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017,
+
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+
+doi: 10.1093/mnras/stv2281
+
 The LIGO Scientiﬁc Collaboration, the Virgo
 
 Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
@@ -1328,6 +1698,7 @@ https://arxiv.org/abs/2009.01190
 
 Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A.
 2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31
+
 Valiante, R., Schneider, R., Volonteri, M., & Omukai, K.
 
 2016, Monthly Notices of the Royal Astronomical
@@ -1335,36 +1706,50 @@ Society, 457, 3356, doi: 10.1093/mnras/stw225
 
 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
 
-Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
+Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
 
-A56, doi: 10.1051/0004-6361/202037710
+113, doi: 10.3847/1538-4357/abc557
 
 G. N. 2021, MNRAS, 504, 146,
 doi: 10.1093/mnras/stab842
 
-IMBH Formation in Galactic Nuclei
+14
 
-11
+Rose et al.
 
 Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
 
-Breivik, K. 2021, ApJ, 917, 76,
+Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
 
+Breivik, K. 2021, ApJ, 917, 76,
 doi: 10.3847/1538-4357/ac088d
 
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+
+doi: 10.1086/379767
+
 Woosley, S. E. 2017, ApJ, 836, 244,
 
-Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
+doi: 10.3847/1538-4357/836/2/244
+
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+
+doi: 10.1046/j.1365-8711.2002.05532.x
+
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+
+doi: 10.1088/0004-637X/761/2/129
+
 2014, Monthly Notices of the Royal Astronomical
+
 Society, 440, 1263, doi: 10.1093/mnras/stu351
 
 Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints,
+
 arXiv:2011.04653. https://arxiv.org/abs/2011.04653
 
 Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26,
 
-doi: 10.3847/1538-4357/836/2/244
-
 doi: 10.3847/1538-4365/aab14f
 
 
\ No newline at end of file
diff --git a/read/results/pdfminer/GeoTopo-book.txt b/read/results/pdfminer/GeoTopo-book.txt
index bb02786..05b93d1 100644
--- a/read/results/pdfminer/GeoTopo-book.txt
+++ b/read/results/pdfminer/GeoTopo-book.txt
@@ -51,19 +51,19 @@ in „Analysis I“ vermittelt.
 ), Mengenschreibweisen (
 
 ,
-∩
+∪
 
 ,
-∪
+∩
 
 P
 
 ∃
 
-∅
-
 \
 
+∅
+
 ,
 
 , R,
@@ -447,10 +447,10 @@ sind dort alle abgeschlossenen Mengen.
 
 {
 
-{
-
 }
 
+{
+
 ,
 
 0
@@ -648,10 +648,10 @@ und T =
 
 0, 1, 2
 
-}
-
 {
 
+}
+
 0
 
 ,
@@ -2183,10 +2183,10 @@ R
 ∩
 X, wobei X ein topologischer Raum ist.
 
-∩
-
 ∪
 
+∩
+
 6) R mit Zariski-Topologie ist zusammenhängend.
 
 Bemerkung 14
@@ -7537,10 +7537,10 @@ R)
 
 →
 
-(cid:55)→
-
 →
 
+(cid:55)→
+
 ×
 
 I
@@ -8097,12 +8097,12 @@ e
 
 →
 
-→
-
 (cid:55)→
 
 →
 
+→
+
 ist nicht injektiv.
 π1(S1, 1) ∼=
 
@@ -8855,10 +8855,10 @@ z
 
 Abbildungen.
 
-}
-
 (cid:107)
 
+}
+
 ∈
 1) f1 := idR ist eine oﬀene und stetige Abbildung.
 2) g(x) := e2πix ist eine oﬀene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
@@ -10188,10 +10188,10 @@ von y.
 
 ⊆
 
-U ein Homöomorphismus. Dann ist W := f −1(V )
-
 Y
 
+U ein Homöomorphismus. Dann ist W := f −1(V )
+
 ×
 ∈
 
@@ -10797,10 +10797,10 @@ b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen dur
 
 →
 
-}
-
 {
 
+}
+
 |
 
 Homöomorphismus den Gruppenhomomorphismen G
@@ -11446,6 +11446,8 @@ Hi, B
 
 ∈ {
 
+1, 2
+
 ∈
 
 ∈
@@ -11457,8 +11459,6 @@ H2 in zwei nichtleere Teilmengen H1, H2, sodass
 
 = j.
 
-1, 2
-
 g
 
 i
@@ -13660,17 +13660,17 @@ Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
 1
 −
 
-4
-
-0
+2
 
-3
+1
 
 5
 
-1
+0
 
-2
+4
+
+3
 
 x
 
@@ -17590,12 +17590,12 @@ zi
 
 }
 
-}
-
 0
 
 }
 
+}
+
 {
 
 ⊆ {
@@ -17773,10 +17773,10 @@ phismus.
 
 H, x
 
-(cid:55)→
-
 →
 
+(cid:55)→
+
 3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine
 Verknüpfung auf X deﬁniert wurde, ist X keine Gruppe und daher auch kein Grup-
 penhomomorphismus.
@@ -18229,12 +18229,12 @@ g. Da aber x
 
 f und f
 
-∈
-
 ∩
 
 ∈
 
+∈
+
 (cid:107)
 
 ∈
diff --git a/read/results/pdfplumber/1601.03642.txt b/read/results/pdfplumber/1601.03642.txt
index cd8e8fd..87845f6 100644
--- a/read/results/pdfplumber/1601.03642.txt
+++ b/read/results/pdfplumber/1601.03642.txt
@@ -1,84 +1,172 @@
 1
-Creativity in Machine Learning w
-x 0
+Creativity in Machine Learning
+Martin Thoma
+E-Mail: info@martin-thoma.de
+Abstract—Recent machine learning techniques can be modified
+to produce creative results. Those results did not exist before; it
+is not a trivial combination of the data which was fed into the
+machine learning system. The obtained results come in multiple
+forms: As images, as text and as audio.
+This paper gives a high level overview of how they are created
+and gives some examples. It is meant to be a summary of the
+current work and give people who are new to machine learning
+some starting points.
+I. INTRODUCTION
+According to [Gad06] creativity is “the ability to use your
+imagination to produce new ideas, make things etc.” and
+imagination is “the ability to form pictures or ideas in your
+mind”.
+Recentadvancesinmachinelearningproduceresultswhichthe
+author would intuitively call creative. A high-level overview
+over several of those algorithms are described in the following.
+This paper is structured as follows: Section II introduces the
+reader on a very simple and superficial level to machine
+learning, Section III gives examples of creativity with images,
+Section IV gives examples of machines producing textual
+content, and Section V gives examples of machine learning
+and music. A discussion follows in Section VI.
+II. BASICSOFMACHINELEARNING
+The traditional approach of solving problems with software
+is to program machines to do so. The task is divided in as
+simple sub-tasks as possible, the subtasks are analyzed and the
+machineisinstructedtoprocesstheinputwithhuman-designed
+algorithms to produce the desired output. However, for some
+taskslikeobjectrecognitionthisapproachisnotfeasible.There
+are way to many different objects, different lighting situations,
+variations in rotation and the arrangement of a scene for a
+human to think of all of them and model them. But with the
+internet, cheap computers, cameras, crowd-sourcing platforms
+like Wikipedia and lots of Websites, services like Amazon
+Mechanical Turk and several other changes in the past decades
+alotofdatahasbecomeavailable.Theideaofmachinelearning
+is to make use of this data.
+A formal definition of the field of Machine Learning is given
+by Tom Mitchel [Mit97]:
+A computer program is said to learn from experi-
+ence E with respect to some class of tasks T and
+performance measure P, if its performance at tasks
+inT,asmeasuredbyP,improveswithexperienceE.
+Σ ϕ
+x
+0
+x
+1
+x 2
+x
+3
+x
+n
+w
 0
 w
-x 1
 1
 w
-Martin Thoma x 2 2 Σ ϕ
+2
 w
-E-Mail: info@martin-thoma.de x 3
-.3
-. . wn
-x
-n
-Abstract—Recent machine learning techniques can be modified (a) Exampleofanartificialneuronunit.(b) Avisualizationofasimplefeed-
-to produce creative results. Those results did not exist before; it xiaretheinputsignalsandwiare forwardneuralnetwork.The5in-
-is not a trivial combination of the data which was fed into the weightswhichhavetogetlearned. putnodesarered,the2biasnodes
-machine learning system. The obtained results come in multiple Each input signal gets multiplied are gray, the 3 hidden units are
-forms: As images, as text and as audio. with its weight, everything gets greenandthesingleoutputnode
-summedupandtheactivationfunc- isblue.
-This paper gives a high level overview of how they are created tionϕisapplied.
-and gives some examples. It is meant to be a summary of the
-current work and give people who are new to machine learning Fig. 1: Neural networks are based on simple units which get
-some starting points. combined to complex networks.
-I. INTRODUCTION This means that machine learning programs adjust internal 6102
+3
+wn
+.
+. .
+(a) Exampleofanartificialneuronunit.
+xiaretheinputsignalsandwiare
+weightswhichhavetogetlearned.
+Each input signal gets multiplied
+with its weight, everything gets
+summedupandtheactivationfunc-
+tionϕisapplied.
+(b) Avisualizationofasimplefeed-
+forwardneuralnetwork.The5in-
+putnodesarered,the2biasnodes
+are gray, the 3 hidden units are
+greenandthesingleoutputnode
+isblue.
+Fig. 1: Neural networks are based on simple units which get
+combined to complex networks.
+This means that machine learning programs adjust internal
 parameters to fit the data they are given. Those computer
-According to [Gad06] creativity is “the ability to use your programs are still developed by software developers, but the
-imagination to produce new ideas, make things etc.” and developer writes them in a way which makes it possible to
-imagination is “the ability to form pictures or ideas in your adjust them without having to re-program everything. Machine naJ
-mind”. learning programs should generally improve when they are fed
+programs are still developed by software developers, but the
+developer writes them in a way which makes it possible to
+adjust them without having to re-program everything. Machine
+learning programs should generally improve when they are fed
 with more data.
-Recentadvancesinmachinelearningproduceresultswhichthe 21
-author would intuitively call creative. A high-level overview The field of machine learning is related to statistics. Some
-over several of those algorithms are described in the following. algorithms directly try to find models which are based on well-
-]VC.sc[
+The field of machine learning is related to statistics. Some
+algorithms directly try to find models which are based on well-
 known distribution assumptions of the developer, others are
-This paper is structured as follows: Section II introduces the
 more general.
-reader on a very simple and superficial level to machine
-learning, Section III gives examples of creativity with images, A common misunderstanding of people who are not related
-Section IV gives examples of machines producing textual in this field is that the developers don’t understand what their
-content, and Section V gives examples of machine learning machine learning program is doing. It is understood very well
-and music. A discussion follows in Section VI. in the sense that the developer, given only a pen, lots of paper 1v24630.1061:viXra
+A common misunderstanding of people who are not related
+in this field is that the developers don’t understand what their
+machine learning program is doing. It is understood very well
+in the sense that the developer, given only a pen, lots of paper
 and a calculator could calculate the same result as the machine
 doeswhenhegetsthesamedata.Andlotsoftime,ofcourse.It
-II. BASICSOFMACHINELEARNING
 isnotunderstoodinthesensethatitishardtomakepredictions
 how the algorithm behaves without actually trying it. However,
-The traditional approach of solving problems with software
 this is similar to expecting from an electrical engineer to
-is to program machines to do so. The task is divided in as
 explain how a computer works. The electrical engineer could
-simple sub-tasks as possible, the subtasks are analyzed and the
 probably get the knowledge he needs to do so, but the amount
-machineisinstructedtoprocesstheinputwithhuman-designed
 of time required to understand such a complex system from
-algorithms to produce the desired output. However, for some
 basic building blocks is a time-intensive and difficult task.
-taskslikeobjectrecognitionthisapproachisnotfeasible.There
-are way to many different objects, different lighting situations, An important group of machine learning algorithms was
-variations in rotation and the arrangement of a scene for a inspired by biological neurons and are thus called artificial
-human to think of all of them and model them. But with the neural networks. Those networks are based on mathematical
-internet, cheap computers, cameras, crowd-sourcing platforms functions called artificial neurons which take n ∈ N num-
-like Wikipedia and lots of Websites, services like Amazon bers x ,...,x ∈ R as input, multiply them with weights
-1 n
-Mechanical Turk and several other changes in the past decades w ,...,w ∈ R, add them and apply a so called activation
-1 n
-alotofdatahasbecomeavailable.Theideaofmachinelearning function ϕ as visualized in Figure 1(a). One example of such
-is to make use of this data. an activation function is the sigmoid function ϕ(x)= 1 .
+An important group of machine learning algorithms was
+inspired by biological neurons and are thus called artificial
+neural networks. Those networks are based on mathematical
+functions called artificial neurons which take n ∈ N num-
+bers x
+1
+,...,x
+n
+∈ R as input, multiply them with weights
+w
+1
+,...,w
+n
+∈ R, add them and apply a so called activation
+function ϕ as visualized in Figure 1(a). One example of such
+an activation function is the sigmoid function ϕ(x)= 1
 1+e−x
+.
 Those functions act as building blocks for more complex
-A formal definition of the field of Machine Learning is given
 systems as they can be chained and grouped in layers as
-by Tom Mitchel [Mit97]:
 visualized in Figure 1(b). The interesting question is how
-A computer program is said to learn from experi- the parameters w are learned. This is usually done by an
+the parameters w
 i
-ence E with respect to some class of tasks T and optimization technique called gradient descent. The gradient
-performance measure P, if its performance at tasks descent algorithm takes a function which has to be derivable,
-inT,asmeasuredbyP,improveswithexperienceE. starts at any point of the surface of this error function and
+are learned. This is usually done by an
+optimization technique called gradient descent. The gradient
+descent algorithm takes a function which has to be derivable,
+starts at any point of the surface of this error function and
+a
+r
+X
+i
+v
+:
+1
+6
+0
+1
+.
+0
+3
+6
+4
+2
+v
+1
+[
+c
+s
+.
+C
+V
+]
+1
+2
+J
+a
+n
+2
+0
+1
+6
 2
 makes a step in the direction which goes downwards. Hence
 it tries to find a minimum of this high-dimensional function.
@@ -92,7 +180,7 @@ One would take one neuron per pixel and channel. This means
 for 500px×500px RGB images one would get 750,000 input
 signals. To approach this problem, so called Convolutional
 Neural Networks (CNNs) were introduced. Instead of learning
-the full connection between the input layer and the first Fig. 2: Aurelia aurita
+the full connection between the input layer and the first
 hidden layer, those networks make use of convolution layers.
 Convolution layers learn a convolution; this means they learn
 the weights of an image filter. An additional advantage is that
@@ -106,35 +194,78 @@ effect it has on the recognition system is difficult to estimate.
 [MOT15] proposes a technique to analyze the weights learned
 by such a network. A similar idea was applied by [VKMT13].
 For example, consider a neural network which was trained to
-recognize various images like bananas. This technique turns Fig. 3: DeepDream impression of Aurelia aurita
+recognize various images like bananas. This technique turns
 the network upside down and starts with random noise. To
 analyze what the network considers bananas to look like, the
-random noise image is gradually tweaked so that it generates Ithasbecomefamousintheinternet[Red].Usually,theimages
-theoutput“banana”.Additionally,thechangescanberestricted are generated in iterations and in each iteration it is zoomed
-inawaythatthestatisticsoftheinputimagehavetobesimilar into the image.
-to natural images. One example of this is that neighboring Images and videos published by the Google engineers can be
-pixels are correlated. seenat[goo15].Figure2showstheoriginalimagefromwhich
-Figure 3 was created with the deep dream algorithm.
+random noise image is gradually tweaked so that it generates
+theoutput“banana”.Additionally,thechangescanberestricted
+inawaythatthestatisticsoftheinputimagehavetobesimilar
+to natural images. One example of this is that neighboring
+pixels are correlated.
 Another technique is to amplify the output of layers. This was
 described in [MOT15]:
 Weaskthenetwork:“Whateveryouseethere,Iwant
-B. Artistic Style Imitation
 more of it!” This creates a feedback loop: if a cloud
 looks a little bit like a bird, the network will make
-it look more like a bird. This in turn will make the A key idea of neural networks is that they learn different
-network recognize the bird even more strongly on representations of the data in each layer. In the case of
-the next pass and so forth, until a highly detailed CNNs, this can easily be visualized as it was done in various
-bird appears, seemingly out of nowhere. papers [ZF14]. Usually, one finds that the network learned
-to build edge detectors in the first layer and more complex
+it look more like a bird. This in turn will make the
+network recognize the bird even more strongly on
+the next pass and so forth, until a highly detailed
+bird appears, seemingly out of nowhere.
 The name “Inceptionism” in the title of [MOT15] comes from
-structures in the upper layers.
 the science-fiction movie “Inception” (2010). One reason it
-might be chosen is because neural networks are structured Gatys,EckerandBethgeshowedin[GEB15]thatwithaclever
-in layers. Recent publications tend to have more and more choice of features it is possible to separate the general style of
-layers [HZRS15]. The used jargon is to say they get “deeper”. an image in terms of local image appearance from the content
-As this technique as published by Google engineers, the of an image. They support their claim by applying the style of
-technique is called Google DeepDream. different artists to an arbitrary image of their choice.
+might be chosen is because neural networks are structured
+in layers. Recent publications tend to have more and more
+layers [HZRS15]. The used jargon is to say they get “deeper”.
+As this technique as published by Google engineers, the
+technique is called Google DeepDream.
+Fig. 2: Aurelia aurita
+Fig. 3: DeepDream impression of Aurelia aurita
+Ithasbecomefamousintheinternet[Red].Usually,theimages
+are generated in iterations and in each iteration it is zoomed
+into the image.
+Images and videos published by the Google engineers can be
+seenat[goo15].Figure2showstheoriginalimagefromwhich
+Figure 3 was created with the deep dream algorithm.
+B. Artistic Style Imitation
+A key idea of neural networks is that they learn different
+representations of the data in each layer. In the case of
+CNNs, this can easily be visualized as it was done in various
+papers [ZF14]. Usually, one finds that the network learned
+to build edge detectors in the first layer and more complex
+structures in the upper layers.
+Gatys,EckerandBethgeshowedin[GEB15]thatwithaclever
+choice of features it is possible to separate the general style of
+an image in terms of local image appearance from the content
+of an image. They support their claim by applying the style of
+different artists to an arbitrary image of their choice.
 3
+(a) OriginalImage (b) Styleimage
+(c) TheartisticstyleofVanGogh’s“StarryNight”appliedtothephotograph
+ofaScottishHighlandCattle.
+Fig. 4: The algorithm takes both, the original image and the
+style image to produce the result.
+This artistic style imitation can be seen itself as creative work.
+An example is given by Figure 4. The code which created this
+example is available under [Joh16].
+Something similar was done by [SPB+14], where the style of
+a portrait photograph was transferred to another photograph.
+A demo can be seen on [Shi14].
+C. Drawing Robots
+PatrickTressetandFrdricFolLeymariecreatedasystemcalled
+AIKON (Automatic IKONic drawing) which can automatically
+generated sketches for portraits [TL05]. AIKON takes a digital
+photograph, detects faces on them and sketches them with a
+pen-plotter.
+Tresset and Leymaire use k-means clustering [KMN+02] to
+segment regions of the photograph with similar color which,
+in turn, will get a similar shading.
+Such a drawing robot could apply machine learning techniques
+known from computer vision for detecting the human. It
+could apply self-learning techniques to draw results most
+similar to the artists impression of the image. However, the
+system described in [TL05] seems not to be a machine
+learning computer program according to the definition by Tom
+Mitchell [Mit97].
 IV. TEXTDATA
 Digital text is the first form of natural communication which
 involved computers. It is used in the form of chats, websites,
@@ -142,7 +273,7 @@ on collaborative projects like Wikipedia, in scientific literature.
 Of course, it was used in pre-digital times, too: In newspaper,
 in novels, in dramas, in religious texts like the bible, in books
 for education, in notes from conversations.
-(a) OriginalImage (b) Styleimage This list could be continued and most of these kinds of texts
+This list could be continued and most of these kinds of texts
 are now available in digital form. This digital form can be
 used to teach machines to generate similar texts.
 The most simple language model which is of use is an n-gram
@@ -158,208 +289,253 @@ Networks (RNNs). Those character predictors take a sequence
 of characters as input and predict the next character. In that
 sense they are similar to the n-gram model, but operate on
 a lower level. Using such a predictor, one can generate texts
-(c) TheartisticstyleofVanGogh’s“StarryNight”appliedtothephotograph
-ofaScottishHighlandCattle. character by character. If the model is good, the text can have
+character by character. If the model is good, the text can have
 the correct punctuation. This would not be possible with a
-Fig. 4: The algorithm takes both, the original image and the word predictor.
-style image to produce the result.
+word predictor.
 Character predictors can be implemented with RNNs. In con-
 trast to standard feed-forward neural networks like multilayer
-This artistic style imitation can be seen itself as creative work. Perceptrons (MLPs) which was shown in Figure 1(b), those
-An example is given by Figure 4. The code which created this networksaretrainedtotaketheiroutputatsomepointaswellas
-example is available under [Joh16]. the normal input. This means they can keep some information
+Perceptrons (MLPs) which was shown in Figure 1(b), those
+networksaretrainedtotaketheiroutputatsomepointaswellas
+the normal input. This means they can keep some information
 over time. One of the most common variant to implement
-Something similar was done by [SPB+14], where the style of RNNs is by using so called Long short-term memory (LSTM)
-a portrait photograph was transferred to another photograph. cells [HS97].
-A demo can be seen on [Shi14].
+RNNs is by using so called Long short-term memory (LSTM)
+cells [HS97].
 Recurrentnetworksapplytwomainideasinordertolearn:The
 first is called unrolling and means that an recurrent network
 is imagined to be an infinite network over time. At each time
-C. Drawing Robots
 step the recurrent neurons get duplicated. The second idea is
 weight sharing which means that those unrolled neurons share
-PatrickTressetandFrdricFolLeymariecreatedasystemcalled
 the same weight.
-AIKON (Automatic IKONic drawing) which can automatically
-generated sketches for portraits [TL05]. AIKON takes a digital
-photograph, detects faces on them and sketches them with a
 A. Similar Texts Generation
-pen-plotter.
-Tresset and Leymaire use k-means clustering [KMN+02] to KarpathytrainedmultiplecharacterRNNsondifferentdatasets
-segment regions of the photograph with similar color which, and gave an excellent introduction [Kar15b]. He trained it on
-in turn, will get a similar shading. Paul Graham’s essays, all the works of Shakespeare, the Hutter
+KarpathytrainedmultiplecharacterRNNsondifferentdatasets
+and gave an excellent introduction [Kar15b]. He trained it on
+Paul Graham’s essays, all the works of Shakespeare, the Hutter
 Prize [hut] 100MB dataset of raw Wikipedia articles, the raw
-Such a drawing robot could apply machine learning techniques
 LATEXsourcefileofabookaboutalgebraicstacksandgeometry
-known from computer vision for detecting the human. It
 and Linux C code.
-could apply self-learning techniques to draw results most
-similar to the artists impression of the image. However, the With that training data, the models can generate similar texts.
-system described in [TL05] seems not to be a machine New works which look like Shakespeare plays, new Wikipedia
-learning computer program according to the definition by Tom articles, new Linux code and new papers about algebraic
-Mitchell [Mit97]. geometry can thus automatically be generated. At a first
+With that training data, the models can generate similar texts.
+New works which look like Shakespeare plays, new Wikipedia
+articles, new Linux code and new papers about algebraic
+geometry can thus automatically be generated. At a first
 4
-glance, they do look authentic. The syntax was mostly used we will now investigate the work which was done in audio
-correctly, the formatting looks as expected, the sentences are synthesization.
+glance, they do look authentic. The syntax was mostly used
+correctly, the formatting looks as expected, the sentences are
 grammaticallycorrect.However,whenonelooksatthebroader
 context it is easy to recognize that the algorithm has no insight
-A. Emily Howell
 in what it is doing. It does match patterns really well, but it
 fails to follow a central theme. In the context of C code this
-David Cope created a project called “Experiments in Musical
 means that new variables are introduced, but not used. At the
-Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He
 same time, variables which were not declared are used. In
-introduces the idea of seeing music as a language which
 the context of Shakespear plays this means that a lot of new
-can be analyzed with natural language processing (NLP)
 characters are introduced, but they don’t speak with each other
-methods. Cope mentions that EMI was more useful to him,
 or about each other.
-when he used the system to “create small phrase-size textures
-The code used to generate these examples is available and as next possibilities using its syntactic dictionary and rule
-ready to use through [Kar15a]. A couple of examples are base” [Cop87].
+The code used to generate these examples is available and
+ready to use through [Kar15a]. A couple of examples are
 in Section A.
-In 2003, Cope started a new project which was based on EMI:
-Emily Howell [Cop13]. This program is able to “creat[e] both
-highly authentic replications and novel music compositions”.
 B. Chatbots
-Thereadermightwanttolistento[Cop12]togetanimpression
-of the beauty of the created music.
 Chatbots are computer programs which participate in chat
-rooms as autonomous agents. This means they have similar According to Cope, an essential part of music is “a set of
-permissions and possibilities as usual human users have, but instructions for creating different, but highly related self-
-users can trigger a set of commands to make the bot give them replications”. Emmy was programmed to find this set of
-valuable information or features. instructions. It tries to find the “signature” of a composer,
-which Cope describes as “contiguous patterns that recur in two
+rooms as autonomous agents. This means they have similar
+permissions and possibilities as usual human users have, but
+users can trigger a set of commands to make the bot give them
+valuable information or features.
 A special category of chatbots are such bots which actively
-or more works of the composer”.
 participate in the conversation, which is usually not the case.
-One of the earliest programs in this category is ELIZA, a bot The new feature of Emily Howell compared to Emmy is that
-created by Joseph Weizenbaum in the 1960s [Wei76]. This Emily Howell does not necessarily remain in a single, already
-program had a set of patterns implemented to which it would known style.
+One of the earliest programs in this category is ELIZA, a bot
+created by Joseph Weizenbaum in the 1960s [Wei76]. This
+program had a set of patterns implemented to which it would
 reply in a seemingly smart way in a psychologists fashion.
-Emily Howell makes use of association network. Cope empha-
 This means quite often the program would simply repeat the
-sizes that this is not a form of a neural network. However, it
 last sentence and add something meaningless like “How do
-is not clear from [Cop13] how exactly an association network
 you feel about it?”. According to [Cur14], Weizenbaum once
-is trained. Cope mentions that Emily Howell is explained in
 found his secretary — who was aware of the fact that this is
-detail in [Cop05].
 a computer program — chatting with the machine. When he
 looked over her shoulder, she asked him “would you mind
 leaving the room”.
-B. GRUV
 Today, much more sophisticated chatbots exist. They make use
-Recurrent neural networks — LSTM networks, to be exact
 of the vast amount of data which is available by millions of
-— are used in [NV15] together with Gated Recurrent Units
 Wikipediaarticles,chatprotocols,websites,helpdeskprotocols,
-(GRU) to build a network which can be trained to generate
 subtitles of movies as well as the astonishing increase in
-music. Instead of taking notes directly or MIDI files, Nayebi
 computing power to train RNNs and language models similar
-and Vitelli took raw audio waveforms as input. Those audio
 to the ones described before.
+Interesting results like the following were obtained by [VL15]:
+Human: what is the purpose of life ?
+Machine: to serve the greater good .
+Human: what is the purpose of living ?
+Machine: to live forever .
+V. AUDIODATA
+Common machine learning tasks which involve audio data
+are speech recognition, speaker identification, identification of
+songs. This leads to some less-common, but interesting topics:
+The composition of music, the synthesizing of audio as art.
+While the composition might be considered in Section IV,
+we will now investigate the work which was done in audio
+synthesization.
+A. Emily Howell
+David Cope created a project called “Experiments in Musical
+Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He
+introduces the idea of seeing music as a language which
+can be analyzed with natural language processing (NLP)
+methods. Cope mentions that EMI was more useful to him,
+when he used the system to “create small phrase-size textures
+as next possibilities using its syntactic dictionary and rule
+base” [Cop87].
+In 2003, Cope started a new project which was based on EMI:
+Emily Howell [Cop13]. This program is able to “creat[e] both
+highly authentic replications and novel music compositions”.
+Thereadermightwanttolistento[Cop12]togetanimpression
+of the beauty of the created music.
+According to Cope, an essential part of music is “a set of
+instructions for creating different, but highly related self-
+replications”. Emmy was programmed to find this set of
+instructions. It tries to find the “signature” of a composer,
+which Cope describes as “contiguous patterns that recur in two
+or more works of the composer”.
+The new feature of Emily Howell compared to Emmy is that
+Emily Howell does not necessarily remain in a single, already
+known style.
+Emily Howell makes use of association network. Cope empha-
+sizes that this is not a form of a neural network. However, it
+is not clear from [Cop13] how exactly an association network
+is trained. Cope mentions that Emily Howell is explained in
+detail in [Cop05].
+B. GRUV
+Recurrent neural networks — LSTM networks, to be exact
+— are used in [NV15] together with Gated Recurrent Units
+(GRU) to build a network which can be trained to generate
+music. Instead of taking notes directly or MIDI files, Nayebi
+and Vitelli took raw audio waveforms as input. Those audio
 waveformsarefeaturevectorsgivenfortimesteps0,1,...,t−
-Interesting results like the following were obtained by [VL15]: 1,t. The network is given those feature vectors X ,...,X
-1 t
-and has to predict the following feature vector X . This
-Human: what is the purpose of life ? t+1
+1,t. The network is given those feature vectors X
+1
+,...,X
+t
+and has to predict the following feature vector X
+t+1
+. This
 means it continues the music. As the input is continuous, the
-Machine: to serve the greater good .
 problem was modeled as a regression task. Discrete Fourier
-Human: what is the purpose of living ?
 Transformation (DFT) was used on chunks of length N of the
-Machine: to live forever .
 music to obtain features in the frequency domain.
 Animplementationcanbefoundat[VN15]andademonstration
-V. AUDIODATA
 can be found at [Vit15].
-Common machine learning tasks which involve audio data
-are speech recognition, speaker identification, identification of C. Audio Synthesization
-songs. This leads to some less-common, but interesting topics:
-The composition of music, the synthesizing of audio as art. Audio synthesization is generating new audio files. This can
-While the composition might be considered in Section IV, eitherbemusicorspeech.Withthetechniquesdescribedbefore,
+C. Audio Synthesization
+Audio synthesization is generating new audio files. This can
+eitherbemusicorspeech.Withthetechniquesdescribedbefore,
 5
-neural networks can be trained to generate music note by note. [Joh15a] D. Johnson, “Biaxial recurrent neural network for music
-However, it is desirable to allow multiple notes being played composition,” GitHub, Aug. 2015. [Online]. Available: https:
-//github.com/hexahedria/biaxial-rnn-music-composition
+neural networks can be trained to generate music note by note.
+However, it is desirable to allow multiple notes being played
 at the same time.
+ThisideaandsomeotherswereappliedbyDanielJohnson.He
+wrote a very good introduction into neural networks for music
+composition which explains those ideas [Joh15b]. Example
+compositionsareavailablethere,too.Healsomadethecodefor
+hisBiaxialRecurrentNeuralNetworkavailableunder[Joh15a].
+VI. DISCUSSION
+What does these examples mean for our understanding of
+creativity? Does it influence how much we value art? Could
+wedefineartandcreativitybetterafterhavingthoseandsimilar
+results?
+I think we might readjust our understanding of creativity just
+like we adjusted our understanding of algorithmically hard
+problems after Deep Blue won against the reigning world
+chess champion Garry Kasparov in 1997.
+However,bynowitisobviousthatmachinelearningalgorithms
+cannot compete with human artists. Today’s state of the art
+algorithms which are purely based on machine learning don’t
+follow a central theme. They lack the ability to plan. Although
+clever algorithms were implemented for composing music, it
+seems as if there is still a lot of supervision involved.
+REFERENCES
+[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987.
+[Online].Available:http://hdl.handle.net/2027/spo.bbp2372.1987.
+025
+[Cop05] ——, Computer models of musical creativity. MIT Press
+Cambridge,2005.
+[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online].
+Available:https://www.youtube.com/watch?v=jLR- c uCwI
+[Cop13] ——,“Thewell-programmedclavier:Styleincomputermusic
+composition,” XRDS: Crossroads, The ACM Magazine for
+Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available:
+http://dl.acm.org/citation.cfm?id=2460444
+[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On-
+line].Available:http://www.bbc.co.uk/blogs/adamcurtis/entries/
+78691781-c9b7-30a0-9a0a-3ff76e8bfe58
+[Gad06] A.Gadsby,Ed.,DictionaryofContemporaryEnglish. Pearson
+EducationLimited,2006.
+[GEB15] L.A.Gatys,A.S.Ecker,andM.Bethge,“Aneuralalgorithmof
+artisticstyle,”arXivpreprintarXiv:1508.06576,2015.[Online].
+Available:http://arxiv.org/abs/1508.06576
+[goo15] “Inceptionism: Going deeper into neural networks,” Google
+Photos,Jun.2015.[Online].Available:https://goo.gl/Bydofw
+[HS97] S.HochreiterandJ.Schmidhuber,“Longshort-termmemory,”
+Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997.
+[Online].Available:http://ieeexplore.ieee.org/xpl/freeabs all.jsp?
+arnumber=6795963
+[hut] “50’000europrizeforcompressinghumanknowledge.”[Online].
+Available:http://prize.hutter1.net/
+[HZRS15] K.He,X.Zhang,S.Ren,andJ.Sun,“Deepresiduallearning
+forimagerecognition,”arXivpreprintarXiv:1512.03385,2015.
+[Online].Available:http://arxiv.org/abs/1512.03385
+[Joh15a] D. Johnson, “Biaxial recurrent neural network for music
+composition,” GitHub, Aug. 2015. [Online]. Available: https:
+//github.com/hexahedria/biaxial-rnn-music-composition
 [Joh15b] ——, “Composing music with recurrent neu-
-ThisideaandsomeotherswereappliedbyDanielJohnson.He ral networks,” Personal Blog, Aug. 2015. [On-
-wrote a very good introduction into neural networks for music line]. Available: http://www.hexahedria.com/2015/08/03/
+ral networks,” Personal Blog, Aug. 2015. [On-
+line]. Available: http://www.hexahedria.com/2015/08/03/
 composing-music-with-recurrent-neural-networks/
-composition which explains those ideas [Joh15b]. Example
-compositionsareavailablethere,too.Healsomadethecodefor [Joh16] J.Johnson,“neural-style,”GitHub,Jan.2016.[Online].Available:
+[Joh16] J.Johnson,“neural-style,”GitHub,Jan.2016.[Online].Available:
 https://github.com/jcjohnson/neural-style
-hisBiaxialRecurrentNeuralNetworkavailableunder[Joh15a].
 [Kar15a] A.Karpathy,“char-rnn,”GitHub,Nov.2015.[Online].Available:
 https://github.com/karpathy/char-rnn
-VI. DISCUSSION
 [Kar15b] ——, “The unreasonable effectiveness of recurrent neural
 networks,” Personal Blog, May 2015. [Online]. Available:
-What does these examples mean for our understanding of http://karpathy.github.io/2015/05/21/rnn-effectiveness/
-creativity? Does it influence how much we value art? Could [KMN+02] T.Kanungo,D.Mount,N.Netanyahu,C.Piatko,R.Silverman,
-wedefineartandcreativitybetterafterhavingthoseandsimilar andA.Wu,“Anefficientk-meansclusteringalgorithm:analysis
+http://karpathy.github.io/2015/05/21/rnn-effectiveness/
+[KMN+02] T.Kanungo,D.Mount,N.Netanyahu,C.Piatko,R.Silverman,
+andA.Wu,“Anefficientk-meansclusteringalgorithm:analysis
 andimplementation,”PatternAnalysisandMachineIntelligence,
-results?
 IEEETransactionson,vol.24,no.7,pp.881–892,Jul2002.
-I think we might readjust our understanding of creativity just [Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in
-like we adjusted our understanding of algorithmically hard computerscience. McGraw-Hill,1997.
-problems after Deep Blue won against the reigning world [MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going
-chess champion Garry Kasparov in 1997. deeper into neural networks,” googleresearch.blogspot.co.uk,
+[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in
+computerscience. McGraw-Hill,1997.
+[MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going
+deeper into neural networks,” googleresearch.blogspot.co.uk,
 Jun.2015.[Online].Available:http://googleresearch.blogspot.de/
-However,bynowitisobviousthatmachinelearningalgorithms 2015/06/inceptionism-going-deeper-into-neural.html
-cannot compete with human artists. Today’s state of the art [Nie15] M. A. Nielsen, Neural Networks and Deep Learn-
-algorithms which are purely based on machine learning don’t ing. Determination Press, 2015. [Online]. Avail-
+2015/06/inceptionism-going-deeper-into-neural.html
+[Nie15] M. A. Nielsen, Neural Networks and Deep Learn-
+ing. Determination Press, 2015. [Online]. Avail-
 able: http://neuralnetworksanddeeplearning.com/chap6.html#
-follow a central theme. They lack the ability to plan. Although
 introducing convolutional networks
-clever algorithms were implemented for composing music, it
 [NV15] A.NayebiandM.Vitelli,“GRUV:Algorithmicmusicgeneration
-seems as if there is still a lot of supervision involved.
 using recurrent neural networks,” 2015. [Online]. Available:
 http://cs224d.stanford.edu/reports/NayebiAran.pdf
-REFERENCES [Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit.
+[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit.
 com/r/deepdream/
-[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987. [Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun.
-[Online].Available:http://hdl.handle.net/2027/spo.bbp2372.1987. 2014. [Online]. Available: https://www.youtube.com/watch?v=
-025 Hj5lGFzlubU
-[Cop05] ——, Computer models of musical creativity. MIT Press [SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand,
-Cambridge,2005. “Style transfer for headshot portraits,” ACM Transactions on
+[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun.
+2014. [Online]. Available: https://www.youtube.com/watch?v=
+Hj5lGFzlubU
+[SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand,
+“Style transfer for headshot portraits,” ACM Transactions on
 Graphics(TOG),vol.33,no.4,p.148,2014.[Online].Available:
-[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. http://dl.acm.org/citation.cfm?id=2601137
-Available:https://www.youtube.com/watch?v=jLR- c uCwI
+http://dl.acm.org/citation.cfm?id=2601137
 [TL05] P.TressetandF.F.Leymarie,“Generativeportraitsketching,”in
-[Cop13] ——,“Thewell-programmedclavier:Styleincomputermusic ProceedingsofVSMM,2005,pp.739–748.
-composition,” XRDS: Crossroads, The ACM Magazine for
-Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: [Vit15] M. Vitelli, “Algorithmic music generation with recurrent
-http://dl.acm.org/citation.cfm?id=2460444 neural networks,” YouTube, Jun. 2015. [Online]. Available:
+ProceedingsofVSMM,2005,pp.739–748.
+[Vit15] M. Vitelli, “Algorithmic music generation with recurrent
+neural networks,” YouTube, Jun. 2015. [Online]. Available:
 https://youtu.be/0VTI1BBLydE
-[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On-
-line].Available:http://www.bbc.co.uk/blogs/adamcurtis/entries/ [VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba,
-78691781-c9b7-30a0-9a0a-3ff76e8bfe58 “Hoggles: Visualizing object detection features,” in Computer
+[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba,
+“Hoggles: Visualizing object detection features,” in Computer
 Vision(ICCV),2013IEEEInternationalConferenceon. IEEE,
-[Gad06] A.Gadsby,Ed.,DictionaryofContemporaryEnglish. Pearson 2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/
-EducationLimited,2006. xpls/abs all.jsp?arnumber=6751109
-[GEB15] L.A.Gatys,A.S.Ecker,andM.Bethge,“Aneuralalgorithmof [VL15] O. Vinyals and Q. Le, “A neural conversational model,”
-artisticstyle,”arXivpreprintarXiv:1508.06576,2015.[Online]. arXivpreprintarXiv:1506.05869,Jul.2015.[Online].Available:
-Available:http://arxiv.org/abs/1508.06576 http://arxiv.org/abs/1506.05869v2
-[goo15] “Inceptionism: Going deeper into neural networks,” Google [VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online].
-Photos,Jun.2015.[Online].Available:https://goo.gl/Bydofw Available:https://github.com/MattVitelli/GRUV
-[HS97] S.HochreiterandJ.Schmidhuber,“Longshort-termmemory,” [Wei76] J. Weizenbaum, Computer Power and Human Reason: From
-Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997. JudgementtoCalculation. W.H.Freeman&CoLtd,1976.
-[Online].Available:http://ieeexplore.ieee.org/xpl/freeabs all.jsp?
-arnumber=6795963 [ZF14] M.D.ZeilerandR.Fergus,“Visualizingandunderstandingcon-
+2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs all.jsp?arnumber=6751109
+[VL15] O. Vinyals and Q. Le, “A neural conversational model,”
+arXivpreprintarXiv:1506.05869,Jul.2015.[Online].Available:
+http://arxiv.org/abs/1506.05869v2
+[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online].
+Available:https://github.com/MattVitelli/GRUV
+[Wei76] J. Weizenbaum, Computer Power and Human Reason: From
+JudgementtoCalculation. W.H.Freeman&CoLtd,1976.
+[ZF14] M.D.ZeilerandR.Fergus,“Visualizingandunderstandingcon-
 volutionalnetworks,”inComputerVision–ECCV2014. Springer,
-[hut] “50’000europrizeforcompressinghumanknowledge.”[Online]. 2014,pp.818–833.
-Available:http://prize.hutter1.net/
-[HZRS15] K.He,X.Zhang,S.Ren,andJ.Sun,“Deepresiduallearning
-forimagerecognition,”arXivpreprintarXiv:1512.03385,2015.
-[Online].Available:http://arxiv.org/abs/1512.03385
+2014,pp.818–833.
 6
 APPENDIXA
 AUTOMATICALLYGENERATEDTEXTS
diff --git a/read/results/pdfplumber/1602.06541.txt b/read/results/pdfplumber/1602.06541.txt
index 984d57f..47fcea8 100644
--- a/read/results/pdfplumber/1602.06541.txt
+++ b/read/results/pdfplumber/1602.06541.txt
@@ -1,65 +1,132 @@
 1
-A Survey of Semantic Segmentation II. TAXONOMYOFSEGMENTATIONALGORITHMS
-Martin Thoma The computer vision community has published a
-info@martin-thoma.de wide range of segmentation algorithms so far. Those
-algorithms can be grouped by the kind of data they
-operate on and the kind of segmentation they are able
+A Survey of Semantic Segmentation
+Martin Thoma
+info@martin-thoma.de
 Abstract—Thissurveygivesanoverviewoverdifferent
-to produce.
 techniques used for pixel-level semantic segmentation.
-Metrics and datasets for the evaluation of segmenta- The following subsections will give four different
-tion algorithms and traditional approaches for segmen- criteria by which segmentation algorithms can be
+Metrics and datasets for the evaluation of segmenta-
+tion algorithms and traditional approaches for segmen-
 tation such as unsupervised methods, Decision Forests
-classified.
 and SVMs are described and pointers to the relevant
-papers are given. Recently published approaches with This survey describes fixed-class (see Section II-A),
-convolutionalneuralnetworksarementionedandtypical single-class affiliation (see Section II-B) algorithms
-problematic situations for segmentation algorithms are whichworkongrayscaleorcoloredsinglepixelimages 6102
+papers are given. Recently published approaches with
+convolutionalneuralnetworksarementionedandtypical
+problematic situations for segmentation algorithms are
 examined. A taxonomy of segmentation algorithms is
-(see Section II-C) in a completely automated, passive
 given.
-fashion (see Section II-D).
-yaM
 I. INTRODUCTION
-Semantic segmentation is the task of clustering A. Allowed classes
+Semantic segmentation is the task of clustering
 parts of images together which belong to the same
-Semantic segmentation is a classification task. As object class. This type of algorithm has several use- 11
-cases such as detecting road signs [MBLAGJ+07], such, the classes on which the algorithm is trained is a
-central design decision.
+object class. This type of algorithm has several use-
+cases such as detecting road signs [MBLAGJ+07],
 detecting tumors [MBVLG02], detecting medical in-
-]VC.sc[
-strumentsinoperations[WAH97],coloncryptssegmen- Most algorithms work with a fixed set of classes;
-tation [CRSS14], land use and land cover classifica- some even only work on binary classes like fore-
-tion [HDT02]. In contrast, non-semantic segmentation ground vs background [RM07], [CS10] or street vs
-onlyclusterspixelstogetherbasedongeneralcharacter- no street [BKTT15].
-istics of single objects. Hence the task of non-semantic However, there are also unsupervised segmentation
-segmentation is not well-defined, as many different algorithms which do not distinguish classes at all (see
-2v14560.2061:viXra
-segmentations might be acceptable. Section V-B) as well as segmentation algorithms which
-Several applications of segmentation in medicine are are able to recognize when they don’t know a class.
-listed in [PXP00]. For example, in [GRC+08] a void class was added
-Object detection, in comparison to semantic seg- for classes which were not in the training set. Such
-mentation, has to distinguish different instances of the a void class was also used in the MSRCv2 dataset
-same object. While having a semantic segmentation (see Section III-B2) to make it possible to make more
-is certainly a big advantage when trying to get object coarse segmentations and thus having to spend less
-instances, there are a couple of problems: neighboring time annotating the image.
+strumentsinoperations[WAH97],coloncryptssegmen-
+tation [CRSS14], land use and land cover classifica-
+tion [HDT02]. In contrast, non-semantic segmentation
+onlyclusterspixelstogetherbasedongeneralcharacter-
+istics of single objects. Hence the task of non-semantic
+segmentation is not well-defined, as many different
+segmentations might be acceptable.
+Several applications of segmentation in medicine are
+listed in [PXP00].
+Object detection, in comparison to semantic seg-
+mentation, has to distinguish different instances of the
+same object. While having a semantic segmentation
+is certainly a big advantage when trying to get object
+instances, there are a couple of problems: neighboring
 pixelsofthesameclassmightbelongtodifferentobject
 instances and regions which are not connected my
 belong to the same object instance. For example, a
-B. Class affiliation of pixels
 tree in front of a car which visually divides the car into
-two parts. Humans do an incredible job when looking at the
-Thispaperisorganizedasfollows:Itbeginsbygiving world. For example, when we see a glass of water
-a taxonomy of segmentation algorithms in Section II. standing on a table we can automatically say that there
-A summary of quality measures and datasets which are istheglassandbehinditthetable,evenifweonlyhada
-used for semantic segmentation follows in Section III. singleimageandwerenotallowedtomove.Thismeans
-A summary of traditional segmentation algorithms and we simultaneously two labels to the coordinates of the
-their characteristics follows in Section V, as well as a glass: Glass and table. Although there is much more
-brief, non-exhaustive summary of recently published work being done on single class affiliation segmenta-
-semantic segmentation algorithms which are based on tion algorithms, there is a publication about multiple
-neural networks in Section VI. Finally, Section VII class affiliation segmentation [LRAL08]. Similarly,
-informs the reader about typical problematic cases for recent publications in pixel-level object segmentation
-segmentation algorithms. used layered models [YHRF12].
+two parts.
+Thispaperisorganizedasfollows:Itbeginsbygiving
+a taxonomy of segmentation algorithms in Section II.
+A summary of quality measures and datasets which are
+used for semantic segmentation follows in Section III.
+A summary of traditional segmentation algorithms and
+their characteristics follows in Section V, as well as a
+brief, non-exhaustive summary of recently published
+semantic segmentation algorithms which are based on
+neural networks in Section VI. Finally, Section VII
+informs the reader about typical problematic cases for
+segmentation algorithms.
+II. TAXONOMYOFSEGMENTATIONALGORITHMS
+The computer vision community has published a
+wide range of segmentation algorithms so far. Those
+algorithms can be grouped by the kind of data they
+operate on and the kind of segmentation they are able
+to produce.
+The following subsections will give four different
+criteria by which segmentation algorithms can be
+classified.
+This survey describes fixed-class (see Section II-A),
+single-class affiliation (see Section II-B) algorithms
+whichworkongrayscaleorcoloredsinglepixelimages
+(see Section II-C) in a completely automated, passive
+fashion (see Section II-D).
+A. Allowed classes
+Semantic segmentation is a classification task. As
+such, the classes on which the algorithm is trained is a
+central design decision.
+Most algorithms work with a fixed set of classes;
+some even only work on binary classes like fore-
+ground vs background [RM07], [CS10] or street vs
+no street [BKTT15].
+However, there are also unsupervised segmentation
+algorithms which do not distinguish classes at all (see
+Section V-B) as well as segmentation algorithms which
+are able to recognize when they don’t know a class.
+For example, in [GRC+08] a void class was added
+for classes which were not in the training set. Such
+a void class was also used in the MSRCv2 dataset
+(see Section III-B2) to make it possible to make more
+coarse segmentations and thus having to spend less
+time annotating the image.
+B. Class affiliation of pixels
+Humans do an incredible job when looking at the
+world. For example, when we see a glass of water
+standing on a table we can automatically say that there
+istheglassandbehinditthetable,evenifweonlyhada
+singleimageandwerenotallowedtomove.Thismeans
+we simultaneously two labels to the coordinates of the
+glass: Glass and table. Although there is much more
+work being done on single class affiliation segmenta-
+tion algorithms, there is a publication about multiple
+class affiliation segmentation [LRAL08]. Similarly,
+recent publications in pixel-level object segmentation
+used layered models [YHRF12].
+a
+r
+X
+i
+v
+:
+1
+6
+0
+2
+.
+0
+6
+5
+4
+1
+v
+2
+[
+c s
+.
+C
+V
+]
+1
+1
+M
+a
+y
+2
+0
+1
+6
 2
 C. Input Data
 The available data which can be used for the
@@ -67,563 +134,1024 @@ inference of a segmentation varies by application.
 • Grayscale vs colored: Grayscale images are
 commonly used in medical imaging such as
 magnetic resonance (MR) imaging or ultrasonog-
-(a) ExampleScene (b) Visualizationofafoundseg-
-raphy whereas colored photographs are obviously mentation
+raphy whereas colored photographs are obviously
 widespread.
-Figure 1: An example of a scene and a possible visu-
 • Excluding or including depth data: RGB-D,
-sometimes also called range [HJBJ+96] is avail- alization of a found segmentation.
+sometimes also called range [HJBJ+96] is avail-
 able in robotics, autonomous cars and recently
 also in consumer electronics such as Microsoft
-III. EVALUATIONANDDATASETS
 Kinect [Zha12].
-• Single image vs stereo images vs co- A. Quality measures for evaluation
+• Single image vs stereo images vs co-
 segmentation: Single image segmentation is the
-A performance measure is a crucial part of any
 most wide-spread kind of segmentation, but using
-machine learning system. As users of a semantic
 stereoimageswasalreadytriedin[BVZ01].Itcan
-segmentationsystemexpectcorrectresults,theaccuracy
 be seen as a more natural way of segmentation as
-is the most commonly used performance measure, but
 most mammals have two eyes. It can also be seen
-there are other measures of quality which matter when
 as being related to having depth data.
-segmentation algorithms are compared. This section
 Co-segmentation as in [RMBK06], [CXGS12] is
-gives an overview of those quality measures.
 the problem of finding a consistent segmentation
-1) Accuracy: Showingthecorrectnessofthesegmen-
 for multiple images. This problem can be seen
-tation hypotheses is done in most publications about
 in two ways: One the one hand, it can be seen
-semantic segmentation. However, there are a couple
 as the problem of finding common objects in at
-of different ways how this accuracy can be displayed.
 least two images. On the other hand, every image
-One way to give readers a first qualitative impression
 after the first can be used as an additional source
-of the obtained segmentations is by showing examples
 of information to find a meaningful segmentation.
-such as Figure 1.
 This idea can be extended to time series such as
-However, this can only support the explanation of
 videos.
-particular problems or showcase special situation. For
-• 2D vs 3D: Segmenting images is a 2D segmenta- meaningfulinformationabouttheoverallaccuracy,there
+• 2D vs 3D: Segmenting images is a 2D segmenta-
 tion task where the smallest unit is called a pixel.
-are a couple of metrics how accuracy can be defined.
-In 3D data, such as volumetric X-ray CT images For this section, let k ∈N be the number of classes,
-as they were used in [HHR01], the smallest unit n ∈N with i,j ∈1,...,k be the number of pixels
-ij 0
+In 3D data, such as volumetric X-ray CT images
+as they were used in [HHR01], the smallest unit
 is called a voxel.
+D. Operation state
+The operation state of the classifying machine can
+eitherbeactiveasin[SUM+11],[SSA12]whererobots
+can move objects to find a segmentation or passive,
+where the received image cannot be influenced. Among
+the passive algorithms, some segment in a completely
+automaticfashion,othersworkinaninteractivemode.
+One example would be a system where the user clicks
+on the background or marks a coarse segmentation and
+thealgorithmfindsafine-grainedsegmentation.[BJ00],
+[RKB04], [PS07] describe systems which work in an
+interactive mode.
+(a) ExampleScene (b) Visualizationofafoundseg-
+mentation
+Figure 1: An example of a scene and a possible visu-
+alization of a found segmentation.
+III. EVALUATIONANDDATASETS
+A. Quality measures for evaluation
+A performance measure is a crucial part of any
+machine learning system. As users of a semantic
+segmentationsystemexpectcorrectresults,theaccuracy
+is the most commonly used performance measure, but
+there are other measures of quality which matter when
+segmentation algorithms are compared. This section
+gives an overview of those quality measures.
+1) Accuracy: Showingthecorrectnessofthesegmen-
+tation hypotheses is done in most publications about
+semantic segmentation. However, there are a couple
+of different ways how this accuracy can be displayed.
+One way to give readers a first qualitative impression
+of the obtained segmentations is by showing examples
+such as Figure 1.
+However, this can only support the explanation of
+particular problems or showcase special situation. For
+meaningfulinformationabouttheoverallaccuracy,there
+are a couple of metrics how accuracy can be defined.
+For this section, let k ∈N be the number of classes,
+n
+ij
+∈N
+0
+with i,j ∈1,...,k be the number of pixels
 which belong to class i and were labeled as class j.
-=(cid:80)k
-(n ) is called a confusion matrix. Let t n
-ij i j=1 ij
+(n
+ij
+) is called a confusion matrix. Let t
+i
+=
+(cid:80)k
+j=1
+n
+ij
 be the total number of pixels of class i.
 One way to compare segmentation algorithms is by
-D. Operation state
 the pixel-wise accuracy of the predicted segmentation
 as done in many publications [SWRC06], [CP08],
-The operation state of the classifying machine can
 [LSD14]. This is also called per-pixel rate and de-
-eitherbeactiveasin[SUM+11],[SSA12]whererobots (cid:80)k i=1nii.
-fined as Taking the pixel-wise classification
-can move objects to find a segmentation or passive, h(cid:80) ak is=1twti
-accuracy o major drawbacks:
-where the received image cannot be influenced. Among
-the passive algorithms, some segment in a completely P1 Taskslikesegmentingimagesforautonomouscars
-automaticfashion,othersworkinaninteractivemode. have large regions which have one class. This
-One example would be a system where the user clicks makes achieving classification accuracies of more
-on the background or marks a coarse segmentation and than 30% with a priori knowledge only possible.
-thealgorithmfindsafine-grainedsegmentation.[BJ00], For example, a system might learn that a certain
-[RKB04], [PS07] describe systems which work in an position of the image is most of the time “sky”
-interactive mode. while another position is most of the time “road”.
+fined as
+(cid:80)k
+i=1
+nii
+(cid:80)k
+i=1
+ti
+. Taking the pixel-wise classification
+accuracy has two major drawbacks:
+P1 Taskslikesegmentingimagesforautonomouscars
+have large regions which have one class. This
+makes achieving classification accuracies of more
+than 30% with a priori knowledge only possible.
+For example, a system might learn that a certain
+position of the image is most of the time “sky”
+while another position is most of the time “road”.
 3
-P2 The manually labeled images could have a more segmentation,everyimageneedstobeprocessedwithin
-coarse labeling. For example, a human classifier 20ms [BKTT15]. This time is called latency.
-could have labeled a region as “car” and the Most papers do not give exact values for the time
-algorithm could have split that region into the theirapplicationneeds.Onereasonmightbethatthisis
-general “car” and the more specific “wheel of a very hardware, implementation and in some cases even
-car” data specific. For example, [HJBJ+96] notes that their
-Three accuracy metrics which do not suffer from algorithm needs 10s on a Sun SparcStation 20. The
-problem P1 are used in [LSD14]: fastestCPUeverproducedforthissystemhad200MHz.
-• mean accuracy: k1 ·(cid:80)k i=1 n ti ii ∈[0,1] C tao inm edpa ur si in ng gt ah nis Ind ti er le ic 7tl -y 48w 20it Kh wre is thul 3ts .9w Gh Hic zh ww oe ur le dno ob t-
+P2 The manually labeled images could have a more
+coarse labeling. For example, a human classifier
+could have labeled a region as “car” and the
+algorithm could have split that region into the
+general “car” and the more specific “wheel of a
+car”
+Three accuracy metrics which do not suffer from
+problem P1 are used in [LSD14]:
+• mean accuracy: 1 k · (cid:80)k i=1 nii ti ∈[0,1]
 • mean intersection over union:
-1 ·(cid:80)k nii ∈[0,1] be meaningful.
-k i=1 ti−nii+(cid:80)k j=1nji However, it does still make sense to mention the
+1
+k
+· (cid:80)k
+i=1
+nii
+ti−nii+(cid:80)k j=1 nji
+∈[0,1]
 • frequency weighted intersection over union:
-execution time as well as the hardware in individual
-((cid:80)k i=1t i)−1(cid:80)k i=1t i· ti−nii+n (cid:80)ii k j=1nji ∈[0,1] papers.Thisgivestheinterestedreaderthepossibilityto
-Another problem might be pixels which cannot be estimatehowdifficultitmightbetoadjustthealgorithm
-assigned to one of the known classes. For this reason, to work in the required time-constraints.
-[SWRC06] makes use of a void class. This class gets Besides the latency, the throughput is another
-completely ignored for all quality measures. Hence the relevant characteristic of algorithms and implementa-
-totalnumberofpixelsisassumedtobewidth·height− tions for semantic segmentation. For example, for the
-number of void pixels. automatic description of images in order to enable text
-One way to deal with problem P1 and problem P2 search the throughput is of much higher importance
-is giving the confusion matrix as done in [SWRC06]. than latency.
-However, this approach is not feasible if many classes 3) Stability: A reasonable requirement on semantic
-are given. segmentation algorithms is the stability of a segmen-
-The F-measure is useful for binary classifica- tation over slight changes in the input image. When
-tion task such as the KITTI road segmentation the image data is sightly blurred by smoke such as
-benchmark [FKG13] or crypt segmentation as done in Figure 4(c), the segmentation should not change.
-by [CRSS14]. It is calculated as “the harmonic mean Also, two images which show a slight change in
-of the precision and recall” [PH05]: perspective should also only result in slight changes in
-the segmentation [PH05].
+( (cid:80)k
+i=1
+t
+i
+) −1(cid:80)k
+i=1
+t
+i
+· nii
+ti−nii+(cid:80)k
+j=1
+nji
+∈[0,1]
+Another problem might be pixels which cannot be
+assigned to one of the known classes. For this reason,
+[SWRC06] makes use of a void class. This class gets
+completely ignored for all quality measures. Hence the
+totalnumberofpixelsisassumedtobewidth·height−
+number of void pixels.
+One way to deal with problem P1 and problem P2
+is giving the confusion matrix as done in [SWRC06].
+However, this approach is not feasible if many classes
+are given.
+The F-measure is useful for binary classifica-
+tion task such as the KITTI road segmentation
+benchmark [FKG13] or crypt segmentation as done
+by [CRSS14]. It is calculated as “the harmonic mean
+of the precision and recall” [PH05]:
+F β =(1+β)2
 tp
-F β =(1+β)2 (1+β2)·tp+β2·fn+fp 4) Memory usage: Peak memory usage matters
-when segmentation algorithms are used in devices like
-where β = 1 is chosen in most cases and tp means smartphones or cameras, or when the algorithms have
-true positive, fn means false negative and fp means to finish in a given time frame, run on the graphics
+(1+β2)·tp+β2·fn+fp
+where β = 1 is chosen in most cases and tp means
+true positive, fn means false negative and fp means
 false positive.
-processing unit (GPU) and consume so much memory
-Finally,itshouldbenotedthatalotofothermeasures for single image segmentation that only the latest
-for the accuracy of segmentations were proposed for graphic cards can be used. However, no publication
-non-semantic segmentation. One of those accuracy were available mentioning the peak memory usage.
+Finally,itshouldbenotedthatalotofothermeasures
+for the accuracy of segmentations were proposed for
+non-semantic segmentation. One of those accuracy
 measures is Normalized Probabilistic Rand (NPR)
 index which was introduced in [UPH05] and eval-
-B. Datasets
 uated in [CSI+09] on dermoscopy images. Other
-non-semantic segmentation measures were introduced The computer vision community produced a couple
-in[MFTM01],butthereasonforcreatingthemseemsto of different datasets which are publicly available. In
-betodealwiththeunder-definedtaskdescriptionofnon- the following, only the most widely used ones as well
-semantic segmentation. These accuracy measures try to as three medical databases are described. An overview
-dealwithdifferentlevelsofcoarsityofthesegmentation. over the quantity and the kind of data is given by
-Thisismuchlessofaprobleminsemanticsegmentation Table I.
-and thus those measures are not explained here. 1) PASCAL VOC: The PASCAL1 VOC2 challenge
-2) Speed: Amaximumupperboundontheexecution was organized eight times with different datasets:
-time for the inference on a single image is a hard Once every year from 2005 to 2012 [EVGW+b].
+non-semantic segmentation measures were introduced
+in[MFTM01],butthereasonforcreatingthemseemsto
+betodealwiththeunder-definedtaskdescriptionofnon-
+semantic segmentation. These accuracy measures try to
+dealwithdifferentlevelsofcoarsityofthesegmentation.
+Thisismuchlessofaprobleminsemanticsegmentation
+and thus those measures are not explained here.
+2) Speed: Amaximumupperboundontheexecution
+time for the inference on a single image is a hard
 requirement for some applications. For example, in the
-1patternanalysis,statisticalmodellingandcomputationallearning,
 case of autonomous cars an algorithm which classifies
+pixel as street or no-street and thus makes a semantic
+segmentation,everyimageneedstobeprocessedwithin
+20ms [BKTT15]. This time is called latency.
+Most papers do not give exact values for the time
+theirapplicationneeds.Onereasonmightbethatthisis
+very hardware, implementation and in some cases even
+data specific. For example, [HJBJ+96] notes that their
+algorithm needs 10s on a Sun SparcStation 20. The
+fastestCPUeverproducedforthissystemhad200MHz.
+Comparing this directly with results which were ob- tainedusinganInteli7-4820Kwith3.9GHzwouldnot
+be meaningful.
+However, it does still make sense to mention the
+execution time as well as the hardware in individual
+papers.Thisgivestheinterestedreaderthepossibilityto
+estimatehowdifficultitmightbetoadjustthealgorithm
+to work in the required time-constraints.
+Besides the latency, the throughput is another
+relevant characteristic of algorithms and implementa-
+tions for semantic segmentation. For example, for the
+automatic description of images in order to enable text
+search the throughput is of much higher importance
+than latency.
+3) Stability: A reasonable requirement on semantic
+segmentation algorithms is the stability of a segmen-
+tation over slight changes in the input image. When
+the image data is sightly blurred by smoke such as
+in Figure 4(c), the segmentation should not change.
+Also, two images which show a slight change in
+perspective should also only result in slight changes in
+the segmentation [PH05].
+4) Memory usage: Peak memory usage matters
+when segmentation algorithms are used in devices like
+smartphones or cameras, or when the algorithms have
+to finish in a given time frame, run on the graphics
+processing unit (GPU) and consume so much memory
+for single image segmentation that only the latest
+graphic cards can be used. However, no publication
+were available mentioning the peak memory usage.
+B. Datasets
+The computer vision community produced a couple
+of different datasets which are publicly available. In
+the following, only the most widely used ones as well
+as three medical databases are described. An overview
+over the quantity and the kind of data is given by
+Table I.
+1) PASCAL VOC: The PASCAL1 VOC2 challenge
+was organized eight times with different datasets:
+Once every year from 2005 to 2012 [EVGW+b].
+1patternanalysis,statisticalmodellingandcomputationallearning,
 anEUnetworkofexcellence
-pixel as street or no-street and thus makes a semantic 2VisualObjectClasses
+2VisualObjectClasses
 4
-Beginning with 2007, a segmentation challenge was Training
-added [EVGW+a]. Prediction
+Beginning with 2007, a segmentation challenge was
+added [EVGW+a].
 The dataset consists of annotated photographs from
-Preprocessing
-www.flicker.com, a photo sharing website. There are Data
-multiple challenges for PASCAL VOC. The 2012 Feature extraction augmentation
+www.flicker.com, a photo sharing website. There are
+multiple challenges for PASCAL VOC. The 2012
 competition had five challenges of which one is a
 segmentation challenge where a single class label was
 givenforeachpixel.Theclassesare:aeroplane,bicycle,
-Window Window-wise Post-
-bird, boat, bottle, bus, car, cat, chair, cow, dining table, extraction Classification processing
+bird, boat, bottle, bus, car, cat, chair, cow, dining table,
 dog,horse,motorbike,person,pottedplant,sheep,sofa,
-train, tv/monitor. Figure 2: A typical segmentation pipeline gets raw
-Although no new competitions will be held, new pixel data, applies preprocessing techniques
-algorithms can be evaluated on the 2010, 2011 and like scaling and feature extraction like HOG
-2012 data via http://host.robots.ox.ac.uk:8080/ features. For training, data augmentation
-The PASCAL VOC segmentation challenges use the techniques such as image rotation can be
-segmentation over union criterion (see Section III-A). applied. For every single image, patches of
-2) MSRCv2: Microsoft Research has published a the image called windows are extracted and
-databaseof591photographswithpixel-levelannotation those windows are classified. The resulting
-of 21 classes: aeroplane, bike, bird, boat, body, book, semantic segmentation can be refined by
-building, car, cat, chair, cow, dog, face, flower, grass, simple morphologic operations or by more
-road, sheep, sign, sky, tree, water. Additionally, there complexapproachessuchasMarkovRandom
-is a void label for pixels which do not belong to Fields (MRFs).
+train, tv/monitor.
+Although no new competitions will be held, new
+algorithms can be evaluated on the 2010, 2011 and
+2012 data via http://host.robots.ox.ac.uk:8080/
+The PASCAL VOC segmentation challenges use the
+segmentation over union criterion (see Section III-A).
+2) MSRCv2: Microsoft Research has published a
+databaseof591photographswithpixel-levelannotation
+of 21 classes: aeroplane, bike, bird, boat, body, book,
+building, car, cat, chair, cow, dog, face, flower, grass,
+road, sheep, sign, sky, tree, water. Additionally, there
+is a void label for pixels which do not belong to
 any of the 21 classes or which are close to the
 segmentationboundary.Thisallowsa“roughandquick
 hand-segmentation which does not align exactly with
-the object boundaries” [SWRC06]. IV. SEGMENTATIONPIPELINE
+the object boundaries” [SWRC06].
 3) Medical Databases: The Warwick-QU Dataset
 consists of 165 images with pixel-level annotation of
-5 classes: “healthy, adenomatous, moderately differen- Typically, semantic segmentation is done with a
-tiated, moderately-to-poorly differentiated, and poorly classifier which operates on fixed-size feature inputs
-differentiated” [CSM09]. This dataset is part of the and a sliding-window approach [DT05], [YBCK10],
-Gland Segmentation (GlaS) challenge. [SCZ08]. This means a classifier is trained on images
-The DIARETDB1 [KKV+14] is a dataset of 89 im- of a fixed size. The trained classifier is then fed with
-ages fundus images. Those images show the interior rectangular regions of the image which are called win-
-surfaceoftheeye.Fundusimagescanbeusedtodetect dows.Althoughtheclassifiergetsanimagepatchofe.g.
-diabetic retinopathy. The images have four classes of 51px×51pxoftheenvironment,itmightonlyclassify
-coarseannotations:hardandsoftexudates,hemorrhages the center pixel or a subset of the complete window.
-and red small dots. This segmentation pipeline is visualized in Figure 2.
+5 classes: “healthy, adenomatous, moderately differen-
+tiated, moderately-to-poorly differentiated, and poorly
+differentiated” [CSM09]. This dataset is part of the
+Gland Segmentation (GlaS) challenge.
+The DIARETDB1 [KKV+14] is a dataset of 89 im-
+ages fundus images. Those images show the interior
+surfaceoftheeye.Fundusimagescanbeusedtodetect
+diabetic retinopathy. The images have four classes of
+coarseannotations:hardandsoftexudates,hemorrhages
+and red small dots.
 20 test and additionally 20 training retinal fun-
-This approach was taken by [BKTT15] and a major-
-dus images are available through the DRIVE data ity of the VOC2007 participants [EVGW+a]. As this
+dus images are available through the DRIVE data
 set [SAN+04]. The vessels were annotated. Addition-
-approach has to apply the patch classifier 512·512=
 ally, [AP11] added vascular features.
-262144timesforimagesofsize512px×512px,there
 The Open-CAS Endoscopic Datasets [MHMK+14]
-are techniques for speeding it up such as applying a
 are 60 images taken from laparoscopic adrenalectomies
-stride and interpolating the results.
 and 60 images taken from laparoscopic pancreatic
-Neuralnetworksareabletoapplytheslidingwindow
 resections. Those are from 3 surgical procedures each.
-approach in a very efficient way by handling a trained
 Half of the data was annotated by a medical expert for
-network as a convolution and applying the convolution
 “medial instrument” and “no medical instrument”. All
-on the complete image.
 images were labeled by anonymous untrained workers
-to which they refer to as knowledge workers (KWs). However, there are alternatives. Namely MRFs and
-One crowd annotation was obtained for each image by Conditional Random Fields (CRFs) which take the
-a majority vote on a pixel basis of 10 segmentations information of the complete image and segment it in
-given by 10 different KWs. an holistic approach.
+to which they refer to as knowledge workers (KWs).
+One crowd annotation was obtained for each image by
+a majority vote on a pixel basis of 10 segmentations
+given by 10 different KWs.
+Training
+Prediction
+Post-
+processing
+Window-wise
+Classification
+Window
+extraction
+Data
+augmentation Feature extraction
+Preprocessing
+Figure 2: A typical segmentation pipeline gets raw
+pixel data, applies preprocessing techniques
+like scaling and feature extraction like HOG
+features. For training, data augmentation
+techniques such as image rotation can be
+applied. For every single image, patches of
+the image called windows are extracted and
+those windows are classified. The resulting
+semantic segmentation can be refined by
+simple morphologic operations or by more
+complexapproachessuchasMarkovRandom
+Fields (MRFs).
+IV. SEGMENTATIONPIPELINE
+Typically, semantic segmentation is done with a
+classifier which operates on fixed-size feature inputs
+and a sliding-window approach [DT05], [YBCK10],
+[SCZ08]. This means a classifier is trained on images
+of a fixed size. The trained classifier is then fed with
+rectangular regions of the image which are called win-
+dows.Althoughtheclassifiergetsanimagepatchofe.g.
+51px×51pxoftheenvironment,itmightonlyclassify
+the center pixel or a subset of the complete window.
+This segmentation pipeline is visualized in Figure 2.
+This approach was taken by [BKTT15] and a major-
+ity of the VOC2007 participants [EVGW+a]. As this
+approach has to apply the patch classifier 512·512=
+262144timesforimagesofsize512px×512px,there
+are techniques for speeding it up such as applying a
+stride and interpolating the results.
+Neuralnetworksareabletoapplytheslidingwindow
+approach in a very efficient way by handling a trained
+network as a convolution and applying the convolution
+on the complete image.
+However, there are alternatives. Namely MRFs and
+Conditional Random Fields (CRFs) which take the
+information of the complete image and segment it in
+an holistic approach.
 5
-V. TRADITIONALAPPROACHES thedirectionsiscalculatedforeachpatch.HOGfeatures
-were proposed in [DT05] and are used in [BMBM10],
+V. TRADITIONALAPPROACHES
 Image segmentation algorithms which use traditional
-[FGMR10] for segmentation tasks.
 approaches, hence don’t apply neural networks and
-3) SIFT: Scale-invariant feature transform (SIFT)
 make heavy use of domain knowledge, are wide-spread
-feature descriptors describe keypoints in an image. The
 in the computer vision community. Features which can
-image patch of the size 16×16 around the keypoint
 be used for segmentation are described in Section V-A,
-is taken. This patch is divided in 16 distinct parts of
 a very brief overview of unsupervised, non-semantic
-the size 4×4. For each of those parts a histogram of
 segmentationisgiveninSectionV-B,RandomDecision
-8 orientations is calculated similar as for HOG features.
 Forests are described in Section V-C, Markov Random
-This results in a 128-dimensional feature vector for
 Fields in Section V-E and Support Vector Machines
-each keypoint.
 (SVMs) in Section V-D. Postprocessing is covered in
-ItshouldbeemphasizedthatSIFTisaglobalfeature
 Section V-G.
-for a complete image.
 It should be noted that algorithms can use combina-
-SIFT is described in detail in [Low04] and are used
 tionof methods.For example, [TNL14]makesuse ofa
-in [PTN09].
 combinationofaSVMandaMRF.Also,auto-encoders
-4) BOV: Bag-of-visual-words (BOV), also called
 can be used to learn features which in turn can be used
-bag of keypoints, is based on vector quantization.
 by any classifier.
+A. Features and Preprocessing methods
+Thechoiceoffeaturesisveryimportantintraditional
+approaches. The most commonly used local and global
+featuresareexplainedinthefollowingaswellasfeature
+dimensionality reduction algorithms.
+1) PixelColor: Pixelcolorindifferentimagespaces
+(e.g. 3 features for RGB, 3 features for HSV, 1 feature
+forthegray-value)arethemostwidelyusedfeatures.A
+typical image is in the RGB color space, but depending
+on the classifier and the problem another color space
+mightresultinbettersegmentations.RGB,YcBcr,HSL,
+Lab and YIQ are some examples used by [CRSS14].
+No single color space has been proven to be superior
+to all others in all contexts [CJSW01]. However, the
+most common choices seem to be RGB and HSI.
+ReasonsforchoosingRGBissimplicityandthesupport
+by programming languages, whereas the choice of
+the HSI color space might make it simpler for the
+classifier to become invariant to illumination. One
+reason for choosing CIE-L*a*b* color space is that it
+approximates human perception of brightness [KP92].
+It follows that choosing the L*a*b color space helps
+algorithms to detect structures which are seen by
+humans.Anotherwayofimprovingthestructurewithin
+an image is histogram equalization, which can be
+applied to improve contrast [PAA+87], [RM07].
+2) Histogram of oriented Gradients: Histogram of
+oriented gradients (HOG) features interpret the image
+as a discrete function I : N2 → {0,...,255} which
+mapstheposition(x,y)toacolor.Foreachpixel,there
+are two gradients: The partial derivative of x and y.
+Now the original image is transformed to two feature
+mapsofequalsizewhichrepresentsthegradient.These
+featuremapsaresplittedintopatchesandahistogramof
+thedirectionsiscalculatedforeachpatch.HOGfeatures
+were proposed in [DT05] and are used in [BMBM10],
+[FGMR10] for segmentation tasks.
+3) SIFT: Scale-invariant feature transform (SIFT)
+feature descriptors describe keypoints in an image. The
+image patch of the size 16×16 around the keypoint
+is taken. This patch is divided in 16 distinct parts of
+the size 4×4. For each of those parts a histogram of
+8 orientations is calculated similar as for HOG features.
+This results in a 128-dimensional feature vector for
+each keypoint.
+ItshouldbeemphasizedthatSIFTisaglobalfeature
+for a complete image.
+SIFT is described in detail in [Low04] and are used
+in [PTN09].
+4) BOV: Bag-of-visual-words (BOV), also called
+bag of keypoints, is based on vector quantization.
 Similar to HOG features, BOV features are histograms
 which count the number of occurrences of certain
-A. Features and Preprocessing methods
 patternswithinapatchoftheimage.BOVaredescribed
-Thechoiceoffeaturesisveryimportantintraditional in [CDF+04] and used in combination with SIFT
-approaches. The most commonly used local and global feature descriptors in [CP08].
-featuresareexplainedinthefollowingaswellasfeature 5) Poselets: Poselets rely on manually added extra
-dimensionality reduction algorithms. keypoints such as “right shoulder”, “left shoulder”,
-1) PixelColor: Pixelcolorindifferentimagespaces “right knee” and “left knee”. They were originally
-(e.g. 3 features for RGB, 3 features for HSV, 1 feature used for human pose estimation. Finding those extra
-forthegray-value)arethemostwidelyusedfeatures.A keypoints is easily possible for well-known image
-typical image is in the RGB color space, but depending classes like humans. However, it is difficult for classes
-on the classifier and the problem another color space like airplanes, ships, organs or cells where the human
-mightresultinbettersegmentations.RGB,YcBcr,HSL, annotators do not know the keypoints. Additionally, the
-Lab and YIQ are some examples used by [CRSS14]. keypointshavetobechosenforeverysingleclass.There
-No single color space has been proven to be superior arestrategiestodealwiththoseproblemslikeviewpoint-
-to all others in all contexts [CJSW01]. However, the dependentkeypoints.Poseletswereusedin[BMBM10]
-most common choices seem to be RGB and HSI. to detect people and in [BBMM11] for general object
-ReasonsforchoosingRGBissimplicityandthesupport detection of the PASCAL VOC dataset.
-by programming languages, whereas the choice of 6) Textons: A texton is the minimal building block
-the HSI color space might make it simpler for the ofvision.Thecomputervisionliteraturedoesnotgivea
-classifier to become invariant to illumination. One strict definition for textons, but edge detectors could be
-reason for choosing CIE-L*a*b* color space is that it one example. One might argue that deep learning tech-
-approximates human perception of brightness [KP92]. niques with Convolution Neuronal Networks (CNNs)
-It follows that choosing the L*a*b color space helps learn textons in the first filters.
-algorithms to detect structures which are seen by An excellent explanation of textons can be found
-humans.Anotherwayofimprovingthestructurewithin in [ZGWX05].
-an image is histogram equalization, which can be 7) Dimensionality Reduction: High-resolution im-
-applied to improve contrast [PAA+87], [RM07]. ageshavealotofpixels.Havingoneormorefeatureper
-2) Histogram of oriented Gradients: Histogram of pixelresultsinwelloveramillionfeatures.Thismakes
-oriented gradients (HOG) features interpret the image training difficult while the higher resolution might not
-as a discrete function I : N2 → {0,...,255} which contain much more information. A simple approach
-mapstheposition(x,y)toacolor.Foreachpixel,there to deal with this is downsampling the high-resolution
-are two gradients: The partial derivative of x and y. image to a low-resolution variant. Another way of
-Now the original image is transformed to two feature doing dimensionality reduction is principal component
-mapsofequalsizewhichrepresentsthegradient.These analysis (PCA), which is applied by [COWR11]. The
-featuremapsaresplittedintopatchesandahistogramof idea behind PCA is to find a hyperplane on which all
+in [CDF+04] and used in combination with SIFT
+feature descriptors in [CP08].
+5) Poselets: Poselets rely on manually added extra
+keypoints such as “right shoulder”, “left shoulder”,
+“right knee” and “left knee”. They were originally
+used for human pose estimation. Finding those extra
+keypoints is easily possible for well-known image
+classes like humans. However, it is difficult for classes
+like airplanes, ships, organs or cells where the human
+annotators do not know the keypoints. Additionally, the
+keypointshavetobechosenforeverysingleclass.There
+arestrategiestodealwiththoseproblemslikeviewpoint-
+dependentkeypoints.Poseletswereusedin[BMBM10]
+to detect people and in [BBMM11] for general object
+detection of the PASCAL VOC dataset.
+6) Textons: A texton is the minimal building block
+ofvision.Thecomputervisionliteraturedoesnotgivea
+strict definition for textons, but edge detectors could be
+one example. One might argue that deep learning tech-
+niques with Convolution Neuronal Networks (CNNs)
+learn textons in the first filters.
+An excellent explanation of textons can be found
+in [ZGWX05].
+7) Dimensionality Reduction: High-resolution im-
+ageshavealotofpixels.Havingoneormorefeatureper
+pixelresultsinwelloveramillionfeatures.Thismakes
+training difficult while the higher resolution might not
+contain much more information. A simple approach
+to deal with this is downsampling the high-resolution
+image to a low-resolution variant. Another way of
+doing dimensionality reduction is principal component
+analysis (PCA), which is applied by [COWR11]. The
+idea behind PCA is to find a hyperplane on which all
 6
-feature vectors can be projected with a minimal loss The 4-neighborhood (north, east, south west) or an 8-
-of information. A detailed description of PCA is given neighborhood (north, north-east, east, south-east, south,
-by [Smi02]. south-west, west, north-west) are plausible choices.
-One problem of PCA is the fact that it does not One way to cut the edges is by building a minimum
-distinguish different classes. This means it can happen spanning tree and removing edges above a threshold.
-that a perfectly linearly separable set of feature vectors This threshold can either be constant, adapted to the
-becomes not separable at all after applying PCA. graph or adjusted by the user. After the edge-cutting
-There are many other techniques for dimensionality step, the connected components are the segments.
-reduction. An overview and a comparison over some A graph-based method which ranked 2nd in the
-of them is given by [vdMPvdH09]. Pascal VOC 2010 challenge [EVGW+10] is described
+feature vectors can be projected with a minimal loss
+of information. A detailed description of PCA is given
+by [Smi02].
+One problem of PCA is the fact that it does not
+distinguish different classes. This means it can happen
+that a perfectly linearly separable set of feature vectors
+becomes not separable at all after applying PCA.
+There are many other techniques for dimensionality
+reduction. An overview and a comparison over some
+of them is given by [vdMPvdH09].
+B. Unsupervised Segmentation
+Unsupervised segmentation algorithms can be used
+in supervised segmentation as another source of infor-
+mation or to refine a segmentation. While unsupervised
+segmentationalgorithmscanneverbesemantic,theyare
+well-studied and deserve at least a very brief overview.
+Semantic segmentation algorithms store information
+about the classes they were trained to segment while
+non-semantic segmentation algorithms try to detect
+consistent regions or region boundaries.
+1) Clustering Algorithms: Clustering algorithms can
+directly be applied on the pixels, when one gives a
+feature vector per pixel. Two clustering algorithms are
+k-means and the mean-shift algorithm.
+The k-means algorithm is a general-purpose cluster-
+ing algorithm which requires the number of clusters to
+be given beforehand. Initially, it places the k centroids
+randomly in the feature space. Then it assigns each
+data point to the nearest centroid, moves the centroid
+to the center of the cluster and continues the process
+until a stopping criterion is reached. A faster variant is
+described in [Har75].
+k-means was applied by [CLP98] for medical image
+segmentation.
+Another clustering algorithm is the mean-shift algo-
+rithm which was introduced by [CM02] for segmen-
+tation tasks. The algorithm finds the cluster centers
+by initializing centroids at random seed points and
+iteratively shifting them to the mean coordinate within
+acertainrange.Insteadoftakingahardrangeconstraint,
+the mean can also be calculated by using any kernel.
+This effectively applies a weight to the coordinates
+of the points. The mean shift algorithm finds cluster
+centers at positions with a highest local density of
+points.
+2) Graph Based Image Segmentation: Graph-based
+imagesegmentationalgorithmstypicallyinterpretpixels
+as vertices and an edge weight is a measure of
+dissimilarity such as the difference in color [FH04],
+[Fel]. There are several different candidates for edges.
+The 4-neighborhood (north, east, south west) or an 8-
+neighborhood (north, north-east, east, south-east, south,
+south-west, west, north-west) are plausible choices.
+One way to cut the edges is by building a minimum
+spanning tree and removing edges above a threshold.
+This threshold can either be constant, adapted to the
+graph or adjusted by the user. After the edge-cutting
+step, the connected components are the segments.
+A graph-based method which ranked 2nd in the
+Pascal VOC 2010 challenge [EVGW+10] is described
 in [CS10]. The system makes heavy use of the multi-
 cue contour detector globalPb [MAFM08] and needs
-B. Unsupervised Segmentation about 10GB of main memory [CS11].
-Unsupervised segmentation algorithms can be used 3) Random Walks: Random walks belong to the
-in supervised segmentation as another source of infor- graph-based image segmentation algorithms. Random
-mation or to refine a segmentation. While unsupervised walk image segmentation usually works as follows:
-segmentationalgorithmscanneverbesemantic,theyare Seed points are placed on the image for the different
-well-studied and deserve at least a very brief overview. objects in the image. From every single pixel, the
-Semantic segmentation algorithms store information probability to reach the different seed points by a
-about the classes they were trained to segment while random walk is calculated. This is done by taking
-non-semantic segmentation algorithms try to detect image gradients as described in Section V-A for HOG
-consistent regions or region boundaries. features. The class of the pixel is the class of which a
-1) Clustering Algorithms: Clustering algorithms can seed point will be reached with highest probability. At
-directly be applied on the pixels, when one gives a first, this is an interactive segmentation method, but it
-feature vector per pixel. Two clustering algorithms are can be extended to be non-interactive by using another
-k-means and the mean-shift algorithm. segmentation methods output as seed points.
-The k-means algorithm is a general-purpose cluster- 4) Active Contour Models: Active contour models
-ing algorithm which requires the number of clusters to (ACMs) are algorithms which segment images roughly
-be given beforehand. Initially, it places the k centroids along edges, but also try to find a border which is
-randomly in the feature space. Then it assigns each smooth. This is done by defining a so called energy
-data point to the nearest centroid, moves the centroid function which will be minimized. They were initially
-to the center of the cluster and continues the process described in [KWT88]. ACMs can be used to segment
-until a stopping criterion is reached. A faster variant is an image or to refine segmentation as it was done
-described in [Har75]. in [AM98] for brain MR images.
-k-means was applied by [CLP98] for medical image 5) Watershed Segmentation: The watershed algo-
-segmentation. rithm takes a grayscale image and interprets it as a
-Another clustering algorithm is the mean-shift algo- height map. Low values are catchment basins and
-rithm which was introduced by [CM02] for segmen- the higher values between two neighboring catchment
-tation tasks. The algorithm finds the cluster centers basins is the watershed. The catchment basins should
-by initializing centroids at random seed points and contain what the developer wants to capture. This
-iteratively shifting them to the mean coordinate within implies that those areas must be dark on grayscale
-acertainrange.Insteadoftakingahardrangeconstraint, images. The algorithm starts to fill the basins from
-the mean can also be calculated by using any kernel. the lowest point. When two basins are connected, a
-This effectively applies a weight to the coordinates watershed is found. The algorithm stops when the
-of the points. The mean shift algorithm finds cluster highest point is reached.
-centers at positions with a highest local density of A detaileddescription ofthe watershed segmentation
-points. algorithm is given in [RM00].
-2) Graph Based Image Segmentation: Graph-based The watershed segmentation was used in [JLD03] to
-imagesegmentationalgorithmstypicallyinterpretpixels segment white blood cells. As the authors describe,
-as vertices and an edge weight is a measure of the segmentation by watershed transform has two
-dissimilarity such as the difference in color [FH04], flaws:Over-segmentationduetolocalminimaandthick
-[Fel]. There are several different candidates for edges. watersheds due to plateaus.
+about 10GB of main memory [CS11].
+3) Random Walks: Random walks belong to the
+graph-based image segmentation algorithms. Random
+walk image segmentation usually works as follows:
+Seed points are placed on the image for the different
+objects in the image. From every single pixel, the
+probability to reach the different seed points by a
+random walk is calculated. This is done by taking
+image gradients as described in Section V-A for HOG
+features. The class of the pixel is the class of which a
+seed point will be reached with highest probability. At
+first, this is an interactive segmentation method, but it
+can be extended to be non-interactive by using another
+segmentation methods output as seed points.
+4) Active Contour Models: Active contour models
+(ACMs) are algorithms which segment images roughly
+along edges, but also try to find a border which is
+smooth. This is done by defining a so called energy
+function which will be minimized. They were initially
+described in [KWT88]. ACMs can be used to segment
+an image or to refine segmentation as it was done
+in [AM98] for brain MR images.
+5) Watershed Segmentation: The watershed algo-
+rithm takes a grayscale image and interprets it as a
+height map. Low values are catchment basins and
+the higher values between two neighboring catchment
+basins is the watershed. The catchment basins should
+contain what the developer wants to capture. This
+implies that those areas must be dark on grayscale
+images. The algorithm starts to fill the basins from
+the lowest point. When two basins are connected, a
+watershed is found. The algorithm stops when the
+highest point is reached.
+A detaileddescription ofthe watershed segmentation
+algorithm is given in [RM00].
+The watershed segmentation was used in [JLD03] to
+segment white blood cells. As the authors describe,
+the segmentation by watershed transform has two
+flaws:Over-segmentationduetolocalminimaandthick
+watersheds due to plateaus.
 7
-C. Random Decision Forests 1) If data is linearly separable, it can be separated
-by a hyperplane. There is one hyperplane which
+C. Random Decision Forests
 Random Decision Forests were first proposed
-maximizes the distance to the next datapoints
 in [Ho95]. This type of classifier applies techniques
-(supportvectors).Thishyperplaneshouldbetaken:
 called ensemble learning, where multiple classifiers
 are trained and a combination of their hypotheses is
-1
-used. One ensemble learning technique is the random minimize (cid:107)w(cid:107)2
-subspaces method where each classifier is trained w,b 2
-s.t. ∀m y ·((cid:104)w,x (cid:105)+b)≥1
-on a random subspace of the feature space. Another i=1 i i
-(cid:124) (cid:123)(cid:122) (cid:125)
-ensemble learning technique is bagging, which is sgn appliedtothisgivestheclassification
+used. One ensemble learning technique is the random
+subspaces method where each classifier is trained
+on a random subspace of the feature space. Another
+ensemble learning technique is bagging, which is
 training the trees on random subsets of the training set.
-2) Eveniftheunderlyingprocesswhichgeneratesthe
 In the case of Random Decision Forests, the classifiers
-features for the two classes is linearly separable,
 are decision trees. A decision tree is a tree where each
-noise can make the data not separable. The intro-
 innernodeusesoneormorefeaturestodecideinwhich
-duction ofslackvariables to relaxthe requirement
 branch to descend. Each leaf is a class.
-of linear separability solves this problem. The
 One strength of Random Decision Forests compared
-trade-off between accepting some errors and a
 tomanyotherclassifierslikeSVMsandneuralnetworks
-more complex model is weighted by a parameter
 is that the scale of measure of the features (nominal,
-C ∈ R+. The bigger C, the more errors are
-ordinal, interval, ratio) can be arbitrary. Another advan- 0
-accepted. The new optimization problem is:
+ordinal, interval, ratio) can be arbitrary. Another advan-
 tage of Random Decision Forests compared to SVMs,
-for example, is the speed of training and classification. 1 (cid:88)m
-Decision trees were extensively studied in the past min wimize 2(cid:107)w(cid:107)2+C· ξ i
-20 years and a multitude of training algorithms have i=1
-been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]). s.t. ∀m i=1y i·((cid:104)w,x i(cid:105)+b)≥1−ξ i
+for example, is the speed of training and classification.
+Decision trees were extensively studied in the past
+20 years and a multitude of training algorithms have
+been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]).
 Possible training hyperparameters are the measure to
-Note that 0 ≤ ξ ≤ 1 means that the data point
-i
 evaluatethe“goodnessofsplit”[Min89],thenumberof
-is within the margin, whereas ξ ≥1 means it is
-i
 decision trees being used, and if the depth of the trees
-misclassified. An SVM with C >0 is also called
 is restricted. Typically in the context of classification,
-a soft-margin SVM.
 decision trees are trained by adding new nodes until
-3) The primal problem is to find the normal vector
 eachleafcontainsonlynodesofasingleclassoruntilit
-w and the bias b. The dual problem is to express
 is not possible to split further. This is called a stopping
-w as a linear combination of the training data x :
-i
 criterion.
-There are two typical training modes: Central axis (cid:88)m
-w= α y x
-projection and perceptron training. In training, for i i i
-each node a hyperplane is searched which is optimal i=1
-according to an error function. where y ∈ {−1,1} represents the class of the
+There are two typical training modes: Central axis
+projection and perceptron training. In training, for
+each node a hyperplane is searched which is optimal
+according to an error function.
+Random Decision Forests with texton features (see
+Section V-A6) are applied in [SJC08] for segmentation.
+In the [MSC] dataset, they report a per-pixel accuracy
+rate of 66.9% for their best system. This system
+requires415msforthesegmentationof320px×213px
+images on a single 2.7GHz core. On the Pascal
+VOC 2007 dataset, they report an average per-pixel
+accuracy for their best segmentation system of 42%.
+An excellent introduction to Random Decision
+Forestsforsemanticsegmentationisgivenby[SCZ08].
+D. SVMs
+SVMs are well-studied binary classifiers which can
+be described by five central ideas. For those ideas, the
+training data is represented as (x i ,y i ) where x i is the
+feature vector and y i ∈ {−1,1} the binary label for
+training example i∈{1,...,m}.
+1) If data is linearly separable, it can be separated
+by a hyperplane. There is one hyperplane which
+maximizes the distance to the next datapoints
+(supportvectors).Thishyperplaneshouldbetaken:
+minimize
+w,b
+1
+2
+(cid:107)w(cid:107)2
+s.t. ∀m
+i=1
+y
+i
+·((cid:104)w,x
+i
+(cid:105)+b)
+(cid:124) (cid:123)(cid:122) (cid:125)
+sgn appliedtothisgivestheclassification
+≥1
+2) Eveniftheunderlyingprocesswhichgeneratesthe
+features for the two classes is linearly separable,
+noise can make the data not separable. The intro-
+duction ofslackvariables to relaxthe requirement
+of linear separability solves this problem. The
+trade-off between accepting some errors and a
+more complex model is weighted by a parameter
+C ∈ R+
+0
+. The bigger C, the more errors are
+accepted. The new optimization problem is:
+minimize w
+1
+2 (cid:107)w(cid:107)2+C·
+m (cid:88)
+i=1
+ξ i
+s.t. ∀m i=1 y i ·((cid:104)w,x i (cid:105)+b)≥1−ξ i
+Note that 0 ≤ ξ
+i
+≤ 1 means that the data point
+is within the margin, whereas ξ
+i
+≥1 means it is
+misclassified. An SVM with C >0 is also called
+a soft-margin SVM.
+3) The primal problem is to find the normal vector
+w and the bias b. The dual problem is to express
+w as a linear combination of the training data x
+i
+:
+w=
+m (cid:88)
+i=1
+α
+i
+y
+i
+x
+i
+where y
+i
+∈ {−1,1} represents the class of the
+training example and α
+i
+are Lagrange multipliers.
+The usage of Lagrange multipliers is explained
+with some examples in [Smi04]. The usage of the
+Lagrange multipliers α
+i
+changes the optimization
+problem depend on the α
+i
+which are weights for
+the feature vectors. It turns out that most α
+i
+will
+be zero. The non-zero weighted vectors are called
+support vectors.
+The optimization problem is now, according
+to [Bur98]:
+maximize
+αi
+m
+(cid:88)
+i=1
+α
 i
-Random Decision Forests with texton features (see training example and α are Lagrange multipliers.
+−
+1
+2
+m
+(cid:88)
+i=1
+m
+(cid:88)
+j=1
+α
 i
-Section V-A6) are applied in [SJC08] for segmentation. The usage of Lagrange multipliers is explained
-In the [MSC] dataset, they report a per-pixel accuracy with some examples in [Smi04]. The usage of the
-rate of 66.9% for their best system. This system Lagrange multipliers α changes the optimization
+α
+j
+y
 i
-requires415msforthesegmentationof320px×213px problem depend on the α which are weights for
+y
+j
+(cid:104)x
 i
-images on a single 2.7GHz core. On the Pascal the feature vectors. It turns out that most α will
+,x
+j
+(cid:105)
+s.t. ∀m
+i=1
+0≤α
 i
-VOC 2007 dataset, they report an average per-pixel be zero. The non-zero weighted vectors are called
-accuracy for their best segmentation system of 42%. support vectors.
-An excellent introduction to Random Decision The optimization problem is now, according
-Forestsforsemanticsegmentationisgivenby[SCZ08]. to [Bur98]:
-m m m
-(cid:88) 1(cid:88)(cid:88)
-D. SVMs maximize α − α α y y (cid:104)x ,x (cid:105)
-αi i 2 i j i j i j
-SVMs are well-studied binary classifiers which can i=1 i=1j=1
-be described by five central ideas. For those ideas, the s.t. ∀m 0≤α ≤C
-i=1 i
-training data is represented as (x i,y i) where x i is the (cid:88)m
-feature vector and y i ∈ {−1,1} the binary label for s.t. α iy i =0
-training example i∈{1,...,m}. i=1
+≤C
+s.t.
+m (cid:88)
+i=1
+α i y i =0
 8
-4) Not every dataset is linearly separable. This prob- yy 77 yy 88 yy 99
+4) Not every dataset is linearly separable. This prob-
 lem is approached by transforming the feature
-xx xx xx
-77 88 99
-vectors x with a non-linear mapping Φ into yy 44 yy 55 yy 66
+vectors x with a non-linear mapping Φ into
 a higher dimensional (probably ∞-dimensional)
-xx xx xx
-44 55 66
-space. As the feature vectors x are only used yy 11 yy 22 yy 33
-within scalar product (cid:104)x ,x (cid:105), it is not necessary
-i j xx xx xx
-11 22 33
+space. As the feature vectors x are only used
+within scalar product (cid:104)x
+i
+,x
+j
+(cid:105), it is not necessary
 to do the transformation. It is enough to do the
-calculation Figure 3: CRF with 4-neighborhood. Each node x i
-representsapixelandeachnodey represents
+calculation
+K(x
+i
+,x
+j
+)=(cid:104)x
 i
-K(x ,x )=(cid:104)x ,x (cid:105)
-i j i j a label.
+,x
+j
+(cid:105)
 This function K is called a kernel. The idea of
-never explicitly transforming the vectors x to the
+never explicitly transforming the vectors x
 i
-getslabeledasshowninFigure3.Forexample,aMRF
+to the
 higher dimensional space is called the kernel trick.
-whichistrainedonimagesofthesize224px×224pixel
 Common kernels include the polynomial kernel
-and gets the raw RGB values as features has
-K (x ,x )=((cid:104)x ,x (cid:105)+r)p
-P i j i j
-224·224·3+224·224=200704
-of degree p and coefficient r, the Gaussian radial (cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-input output
+K
+P
+(x
+i
+,x
+j
+)=((cid:104)x
+i
+,x
+j
+(cid:105)+r)p
+of degree p and coefficient r, the Gaussian radial
 basis function (RBF) kernel
+K Gauss (x i ,x j )=e
+−γ(cid:107)xi−xj(cid:107)2
+2σ2
+and the sigmoid kernel
+K
+tanh
+(x
+i
+,x
+j
+)=tanh(γ(cid:104)x
+i
+,x
+j
+(cid:105)−r)
+where the parameter γ determines how much
+influence single training examples have.
+5) ThedescribedSVMscanonlydistinguishbetween
+two classes. Common strategies to expand those
+binary classifiers to multi-class classification is
+the one-vs-all and the one-vs-one strategy. In the
+one-vs-all strategy n classifiers have to be trained
+which can distinguish one of the n classes against
+all other classes. In the one-vs-one strategy
+n2−n
+2
+classifiers are trained; one classifier for each pair
+of classes.
+A detailed description of SVMs can be found
+in [Bur98].
+SVMs are used by [YHRF12] on the 2009 and 2010
+PASCAL segmentation challenge [EVGW+10]. They
+did not hand their classifier in to the challenge itself,
+but calculated an average rank of 7 among the different
+categories.
+[FGMR10] also used an SVM based method with
+HOG features and achieved the 7th rank in the 2010
+PASCAL segmentation challenge by mean accuracy. It
+needs about 2s on a 2.8GHz 8-core Intel processor.
+E. Markov Random Fields
+MRFs are undirected probabilistic graphical models
+which are wide-spread model in computer vision. The
+overall idea of MRFs is to assign a random variable for
+eachfeatureandarandomvariableforeachpixelwhich
+x
+1
+x
+2
+x
+3
+x
+4
+x
+5
+x
+6
+x
+7
+x
+8
+x
+9
+y 1 y 2 y 3
+y 4 y 5 y 6
+y 7 y 8 y 9
+x
+1
+x
+2
+x
+3
+x
+4
+x
+5
+x
+6
+x
+7
+x
+8
+x
+9
+y 1 y 2 y 3
+y 4 y 5 y 6
+y 7 y 8 y 9
+Figure 3: CRF with 4-neighborhood. Each node x i
+representsapixelandeachnodey
+i
+represents
+a label.
+getslabeledasshowninFigure3.Forexample,aMRF
+whichistrainedonimagesofthesize224px×224pixel
+and gets the raw RGB values as features has
+224·224·3
+(cid:124) (cid:123)(cid:122) (cid:125)
+input
++224·224
+(cid:124) (cid:123)(cid:122) (cid:125)
+output
+=200704
 random variables. Those random variables are condi-
-K Gauss(x i,x j)=e−γ(cid:107)x 2i σ− 2xj(cid:107)2 tionally independent, given their local neighborhood.
+tionally independent, given their local neighborhood.
 These (in)dependencies can be expressed with a graph.
-and the sigmoid kernel Let G=(V,E) be the associated undirected graph
+Let G=(V,E) be the associated undirected graph
 of an MRF and C be the set of all maximal cliques in
-K (x ,x )=tanh(γ(cid:104)x ,x (cid:105)−r)
-tanh i j i j
 that graph. Nodes represent random variables x,y and
-where the parameter γ determines how much edges represent conditional dependencies. Just like in
-influence single training examples have. he 4-neighborhood [SWRC06] and the 8-neighborhood
-5) ThedescribedSVMscanonlydistinguishbetween are reasonable choices for constructing the graph.
-two classes. Common strategies to expand those Typically,randomvariablesyrepresenttheclassofa
-binary classifiers to multi-class classification is singlepixel,randomvariablesxrepresentapixelvalues
-the one-vs-all and the one-vs-one strategy. In the and edges represent pixel neighborhood in computer
-one-vs-all strategy n classifiers have to be trained vision problems segmentation problems where MRFs
-which can distinguish one of the n classes against are used. Accordingly, the random variables y live
-n2−n
-all other classes. In the one-vs-one strategy on 1,...,nr of classes and the random variables x
-2
-classifiers are trained; one classifier for each pair typically live on 0,...,255 or [0,1].
-of classes. The probability of x,y can be expressed as
-A detailed description of SVMs can be found
+edges represent conditional dependencies. Just like in
+he 4-neighborhood [SWRC06] and the 8-neighborhood
+are reasonable choices for constructing the graph.
+Typically,randomvariablesyrepresenttheclassofa
+singlepixel,randomvariablesxrepresentapixelvalues
+and edges represent pixel neighborhood in computer
+vision problems segmentation problems where MRFs
+are used. Accordingly, the random variables y live
+on 1,...,nr of classes and the random variables x
+typically live on 0,...,255 or [0,1].
+The probability of x,y can be expressed as
+P(x,y)=
 1
-in [Bur98]. P(x,y)= e−E(x,y)
 Z
-SVMs are used by [YHRF12] on the 2009 and 2010
-PASCAL segmentation challenge [EVGW+10]. They where Z = (cid:80) e−E(x,y) is a normalization term
+e−E(x,y)
+where Z = (cid:80)
 x,y
-did not hand their classifier in to the challenge itself, called the partition function and E is called the energy
-but calculated an average rank of 7 among the different function. A common choice for the energy function is
-categories. (cid:88)
-E(x,y)= ψ (x,y)
-[FGMR10] also used an SVM based method with c
-HOG features and achieved the 7th rank in the 2010 c∈C
-PASCAL segmentation challenge by mean accuracy. It where ψ is called a clique potential. One choice for
-needs about 2s on a 2.8GHz 8-core Intel processor. cliques of size two x,y=(x ,x ) is [KP06]
-1 2
+e−E(x,y) is a normalization term
+called the partition function and E is called the energy
+function. A common choice for the energy function is
+E(x,y)=
+(cid:88)
+c∈C
+ψ
+c
+(x,y)
+where ψ is called a clique potential. One choice for
+cliques of size two x,y=(x
+1
+,x
+2
+) is [KP06]
+ψ
+c
+(x
+1
+,x
+2
+)=wδ(x
+1
+,x
+2
+)=
 (cid:40)
-+w if x (cid:54)=x
-E. Markov Random Fields ψ (x ,x )=wδ(x ,x )= 1 2
-c 1 2 1 2
-−w if x =x
-1 2
-MRFs are undirected probabilistic graphical models
-which are wide-spread model in computer vision. The According to [Mur12], the most common way of
-overall idea of MRFs is to assign a random variable for inference over the posterior MRF in computer vision
-eachfeatureandarandomvariableforeachpixelwhich problems is Maximum A Posteriori (MAP) estimation.
++w if x
+1
+(cid:54)=x
+2
+−w if x
+1
+=x
+2
+According to [Mur12], the most common way of
+inference over the posterior MRF in computer vision
+problems is Maximum A Posteriori (MAP) estimation.
 9
-Detailed introductions to MRFs are given by VI. NEURALNETWORKSFORSEMANTIC
-[BKR11], [Mur12]. MRFs are used by [ZBS01] and SEGMENTATION
-[MSB12] for image segmentation. Artificial neural networks are classifiers which are
+Detailed introductions to MRFs are given by
+[BKR11], [Mur12]. MRFs are used by [ZBS01] and
+[MSB12] for image segmentation.
+F. Conditional Random Fields
+CRFs are MRFs where all clique potentials are
+conditioned on input features [Mur12]. This means,
+instead of learning the distribution P(y,x), the task
+is reformulated to learn the distribution P(y|x). One
+consequence of this reformulation is that CRFs need
+much less parameters as the distribution of x does
+not have to be estimated. Another advantage of CRFs
+compared to MRFs is that no distribution assumption
+about x has to be made.
+A CRF has the partition function Z:
+Z(x)=
+(cid:88)
+y
+P(x,y)
+and joint probability distribution
+P(y|x)=
+1
+Z(x)
+(cid:89)
+c∈C
+ψ
+c
+(y
+c
+|x)
+The simplest way to define the clique potentials ψ is
+the count of the class y c given x added with a positive
+smoothing constant to prevent the complete term from
+getting zero.
+CRFs as described in [LRKT09] have reached top
+performance in PASCAL VOC 2010 [VOC10] and
+are also used in [HZCP04], [SWRC06] for semantic
+segmentation.
+A method similar to CRFs was proposed
+in [GBVdW+10]. The system of Gonfaus et.al.
+ranked 1st by mean accuracy in the segmentation task
+of the PASCAL VOC 2010 challenge [EVGW+10].
+An introduction to CRFs is given by [SM11].
+G. Post-processing methods
+Post-processing refine a found segmentation and
+remove obvious errors. For example, the morphological
+operations opening and closing can remove noise. The
+opening operation is a dilation followed by a erosion.
+This removes tiny segments. The closing operation is a
+erosion followed by a dilation. This removes tiny gaps
+in otherwise filled regions. They were used in [CLP98]
+for biomedical image segmentation.
+Anotherwayofrefinementofthefoundsegmentation
+is by adjusting the segmentation to match close edges.
+This was used in [BBMM11] with an ultra-metric
+contour map [AMFM09].
+Active contour models are another example of a
+post-processing method [KWT88].
+VI. NEURALNETWORKSFORSEMANTIC
+SEGMENTATION
+Artificial neural networks are classifiers which are
 inspired by biologic neurons. Every single artificial
-F. Conditional Random Fields neuron has some inputs which are weighted and sumed
+neuron has some inputs which are weighted and sumed
 up. Then, the neuron applies a so called activation
-CRFs are MRFs where all clique potentials are
 functiontotheweightedsumandgivesanoutput.Those
-conditioned on input features [Mur12]. This means,
 neurons can take either a feature vector as input or the
-instead of learning the distribution P(y,x), the task
 output of other neurons. In this way, they build up
-is reformulated to learn the distribution P(y|x). One
 feature hierarchies.
-consequence of this reformulation is that CRFs need
 The parameters they learn are the weights w ∈ R.
-much less parameters as the distribution of x does
 Theyarelearnedbygradientdescent.Todoso,anerror
-not have to be estimated. Another advantage of CRFs
 function—usuallycross-entropyormeansquarederror
-compared to MRFs is that no distribution assumption
 — is necessary. For the gradient descent algorithm, one
-about x has to be made.
 sees the labeled training data as given, the weights
-A CRF has the partition function Z:
 as variables and the error function as a surface in
-(cid:88)
-Z(x)= P(x,y) this weight-space. Minimizing the error function in the
-y weight space adapts the neural network to the problem.
+this weight-space. Minimizing the error function in the
+weight space adapts the neural network to the problem.
 There are lots of ideas around neural networks like
-and joint probability distribution
 regularization, better optimization algorithms, automat-
-1 (cid:89) ically building up architectures, design choices for
-P(y|x)= ψ (y |x)
-Z(x) c c activationfunctions.Thisisnotexplainedindetailhere,
-c∈C
+ically building up architectures, design choices for
+activationfunctions.Thisisnotexplainedindetailhere,
 but some of the mayor breakthroughs are outlined.
-The simplest way to define the clique potentials ψ is CNNs are neural networks which learn image filters.
-the count of the class y c given x added with a positive Theydrasticallyreducethenumberofparameterswhich
-smoothing constant to prevent the complete term from have to be learned while being still general enough for
-getting zero. theproblemdomainofimages.ThiswasshownbyAlex
-CRFs as described in [LRKT09] have reached top Krizhevsky et al. in [KSH12]. One major idea was a
-performance in PASCAL VOC 2010 [VOC10] and clever regularization called dropout training, which set
-are also used in [HZCP04], [SWRC06] for semantic the output of neurons while training randomly to zero.
-segmentation. Another contribution was the usage of an activation
-A method similar to CRFs was proposed function called rectified linear unit:
-in [GBVdW+10]. The system of Gonfaus et.al.
-ranked 1st by mean accuracy in the segmentation task ϕ ReLU(x)=max(0,x)
-of the PASCAL VOC 2010 challenge [EVGW+10]. Those are much faster to train than the commonly used
-An introduction to CRFs is given by [SM11]. sigmoid activation functions
+CNNs are neural networks which learn image filters.
+Theydrasticallyreducethenumberofparameterswhich
+have to be learned while being still general enough for
+theproblemdomainofimages.ThiswasshownbyAlex
+Krizhevsky et al. in [KSH12]. One major idea was a
+clever regularization called dropout training, which set
+the output of neurons while training randomly to zero.
+Another contribution was the usage of an activation
+function called rectified linear unit:
+ϕ ReLU (x)=max(0,x)
+Those are much faster to train than the commonly used
+sigmoid activation functions
+ϕ
+Sigmoid
+(x)=
 1
-ϕ (x)=
-G. Post-processing methods Sigmoid e−x+1
-Post-processing refine a found segmentation and Krizhevsky et al. implemented those ideas and partici-
-remove obvious errors. For example, the morphological pated in the ImageNet Large-Scale Visual Recognition
-operations opening and closing can remove noise. The Challenge (ILSVRC). The best other system, which
-opening operation is a dilation followed by a erosion. used SIFT features and Fisher Vectors, had a perfor-
-This removes tiny segments. The closing operation is a mance of about 25.7% while the network by Alex
-erosion followed by a dilation. This removes tiny gaps Krizhevsky et al. got 17.0% error rate on the ILSVRC-
-in otherwise filled regions. They were used in [CLP98] 2010 dataset. As a preprocessing step, they downsam-
-for biomedical image segmentation. pledallimagestoafixedsizeof256px×256pxbefore
-Anotherwayofrefinementofthefoundsegmentation they fed the features into their network. This network
-is by adjusting the segmentation to match close edges. is commonly known as AlexNet.
-This was used in [BBMM11] with an ultra-metric Since AlexNet was developed, a lot of different
-contour map [AMFM09]. neural networks have been proposed. One interesting
-Active contour models are another example of a exampleis[PC13],wherearecurrentCNNforsemantic
-post-processing method [KWT88]. segmentation is presented.
+e−x+1
+Krizhevsky et al. implemented those ideas and partici-
+pated in the ImageNet Large-Scale Visual Recognition
+Challenge (ILSVRC). The best other system, which
+used SIFT features and Fisher Vectors, had a perfor-
+mance of about 25.7% while the network by Alex
+Krizhevsky et al. got 17.0% error rate on the ILSVRC-
+2010 dataset. As a preprocessing step, they downsam-
+pledallimagestoafixedsizeof256px×256pxbefore
+they fed the features into their network. This network
+is commonly known as AlexNet.
+Since AlexNet was developed, a lot of different
+neural networks have been proposed. One interesting
+exampleis[PC13],wherearecurrentCNNforsemantic
+segmentation is presented.
 10
 Another notable paper is [LSD14]. The algorithm
 presentedtheremakesuseofaclassifyingnetworksuch
@@ -632,93 +1160,129 @@ image filter. This way, each pixel gets a probability
 distribution for each of the trained classes. By taking
 the most likely class, a semantic segmentation can be
 done with arbitrary image sizes.
-A very recent publication by Dai et al. [DHS15] (a) LensFlare (b) Vignetting
-showed that segmentation with much deeper networks Imageby[Hus07] Imageby[Man12]
+A very recent publication by Dai et al. [DHS15]
+showed that segmentation with much deeper networks
 is possible and achieves better results.
 More detailed explanations to neural networks for
 visual recognition is given by [LKJ15].
 VII. POSSIBLEPROBLEMSINTHEDATAFOR
 SEGMENTATIONALGORITHMS
 Different segmentation workflows have different
-problems. However, there are a couple of special cases (c) Smokebycauterization (d) Camouflage
-Imageby[GVSY13] Imageby[Kaf07]
+problems. However, there are a couple of special cases
 which should be tested. Those cases might not occur
 often in the training data, but it could still happen in
 the productive system.
 I am not aware of any systematic work which exam-
 ined the influence of problems such as the following.
 A. Lens Flare
-Lens flare is the effect of light getting scattered in (e) Transparency (f) Viewpoint
+Lens flare is the effect of light getting scattered in
 the lens system of the camera. The testing data set of
-the KITTI road evaluation benchmark [FKG13] has a Figure 4: Examples of images which might cause
-couple of photos with this problem. Figure 4(a) shows semantic segmentation systems to fail.
+the KITTI road evaluation benchmark [FKG13] has a
+couple of photos with this problem. Figure 4(a) shows
 an extreme example of lens flare.
-B. Vignetting 2) Camouflage: Some objects, like animals in the
-wild,activelytrytohide(seeFigure4(d)asanexample).
+B. Vignetting
 Vignettingistheeffectofaphotographgettingdarker
-In other cases it might just be bad luck that objects
 inthecorners.Thiscanhavemanyreasons,forexample
-are hard for humans to detect. This problem has two
 filters on the camera blocking light at the corners.
+C. Blurred images
+Images can be blurred for a couple of reasons. A
+problem with the lenses mechanics, focusing on the
+wrongpoint,tooquickmovement,smokeorfoam.One
+example of a blurred image is Figure 4(c), which was
+takenduringaninvivoporcineprocedureofdiaphragm
+dissection. The smoke was caused by cauterization.
+D. Other Problems
+If the following effects can occur at all and if they
+are problems depends heavily on the problem domain
+and the used model.
+1) Partial Occlusions: Segmentation systems which
+employ a model of the objects which should be
+segmented might suffer from partial occlusions.
+(a) LensFlare
+Imageby[Hus07]
+(b) Vignetting
+Imageby[Man12]
+(c) Smokebycauterization
+Imageby[GVSY13]
+(d) Camouflage
+Imageby[Kaf07]
+(e) Transparency (f) Viewpoint
+Figure 4: Examples of images which might cause
+semantic segmentation systems to fail.
+2) Camouflage: Some objects, like animals in the
+wild,activelytrytohide(seeFigure4(d)asanexample).
+In other cases it might just be bad luck that objects
+are hard for humans to detect. This problem has two
 interesting aspects: On the one hand, the segmenting
 systemmightsufferfromthesameproblemsashumans
-C. Blurred images
 do. On the other hand, the segmenting system might be
-Images can be blurred for a couple of reasons. A better than humans are, but it is forced to learn from
-problem with the lenses mechanics, focusing on the images labeled by humans. If the labels are wrong, the
-wrongpoint,tooquickmovement,smokeorfoam.One system is forced to learn something wrong.
-example of a blurred image is Figure 4(c), which was
+better than humans are, but it is forced to learn from
+images labeled by humans. If the labels are wrong, the
+system is forced to learn something wrong.
 3) Semi-transparent Occlusion: Some objects like
-takenduringaninvivoporcineprocedureofdiaphragm
 drinkingglassescanbevisibleandstillleavetheobject
-dissection. The smoke was caused by cauterization.
 behind them visible as shown in Figure 4(e). This is
 mainly a definition problem: Is the seen pixel the glass
-D. Other Problems label or the smartphone label?
-If the following effects can occur at all and if they 4) Viewpoints: Changes in viewpoints can be a
-are problems depends heavily on the problem domain problem, if they don’t occur in the training data. For
-and the used model. example,animagecaptioningsystemwhichwastrained
-1) Partial Occlusions: Segmentation systems which on photographs of professional photographers might
-employ a model of the objects which should be not have photos from the point of view of a child. This
-segmented might suffer from partial occlusions. is visualized in Figure 4(f).
+label or the smartphone label?
+4) Viewpoints: Changes in viewpoints can be a
+problem, if they don’t occur in the training data. For
+example,animagecaptioningsystemwhichwastrained
+on photographs of professional photographers might
+not have photos from the point of view of a child. This
+is visualized in Figure 4(f).
 11
-VIII. DISCUSSION REFERENCES
-Ohta et al. wrote [OKS78] 38 years ago. It is one [AM98] M. S. Atkins and B. T. Mackiewich, “Fully
-of the first papers mentioning semantic segmentation. automatic segmentation of the brain in
-mri,” Medical Imaging, IEEE Transactions
+VIII. DISCUSSION
+Ohta et al. wrote [OKS78] 38 years ago. It is one
+of the first papers mentioning semantic segmentation.
 In this time, a lot of work was done and many
+different directions have been explored. Different kinds
+of semantic segmentation have emerged.
+This paper presents a taxonomy of those kinds
+of semantic segmentation and a brief overview of
+completely automatic, passive, semantic segmentation
+algorithms.
+Future work includes a comparative study of
+those algorithms on publicly available dataset such
+as the ones presented in Table I. Another open
+question is the influence of the problems described
+inSectionVII.Thiscouldbedoneusingasubsetofthe
+thousands of images of Wikipedia Commons, such as
+https://commons.wikimedia.org/wiki/Category:Blurring
+for blurred images.
+A combination of different classifiers in an ensemble
+would be an interesting option to explore in order to
+improve accuracy. Another direction which is currently
+studiediscombiningclassifierssuchasneuralnetworks
+with CRFs [ZJRP+15].
+REFERENCES
+[AM98] M. S. Atkins and B. T. Mackiewich, “Fully
+automatic segmentation of the brain in
+mri,” Medical Imaging, IEEE Transactions
 on, vol. 17, no. 1, pp. 98–107, Feb. 1998.
-different directions have been explored. Different kinds [Online].Available:http://ieeexplore.ieee.org/xpls/
-of semantic segmentation have emerged. abs_all.jsp?arnumber=668699
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=668699
 [AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and
-This paper presents a taxonomy of those kinds
 J. Malik, “From contours to regions: An
-of semantic segmentation and a brief overview of empirical evaluation,” in Computer Vision and
-completely automatic, passive, semantic segmentation Pattern Recognition, 2009. CVPR 2009. IEEE
+empirical evaluation,” in Computer Vision and
+Pattern Recognition, 2009. CVPR 2009. IEEE
 Conferenceon. IEEE,Jun.2009,pp.2294–2301.
-algorithms.
 [Online].Available:http://ieeexplore.ieee.org/xpls/
-Future work includes a comparative study of abs_all.jsp?arnumber=5206707
-those algorithms on publicly available dataset such [AP11] G. Azzopardi and N. Petkov, “Detection of
+abs_all.jsp?arnumber=5206707
+[AP11] G. Azzopardi and N. Petkov, “Detection of
 retinal vascular bifurcations by trainable v4-like
-as the ones presented in Table I. Another open
 filters,” in Computer Analysis of Images and
-question is the influence of the problems described Patterns. Springer,2011,pp.451–459.[Online].
-inSectionVII.Thiscouldbedoneusingasubsetofthe Available:http://www.cs.rug.nl/~imaging/databases/
+Patterns. Springer,2011,pp.451–459.[Online].
+Available:http://www.cs.rug.nl/~imaging/databases/
 retina_database/retinalfeatures_database.html
-thousands of images of Wikipedia Commons, such as
 [BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik,
-https://commons.wikimedia.org/wiki/Category:Blurring
 “Object segmentation by alignment of poselet
-for blurred images. activationstoimagecontours,”inComputerVision
+activationstoimagecontours,”inComputerVision
 and Pattern Recognition (CVPR), 2011 IEEE
-A combination of different classifiers in an ensemble
 Conferenceon. IEEE,Jun.2011,pp.2225–2232.
-would be an interesting option to explore in order to [Online].Available:http://ieeexplore.ieee.org/xpls/
-improve accuracy. Another direction which is currently abs_all.jsp?arnumber=5995659
-studiediscombiningclassifierssuchasneuralnetworks [BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5995659
+[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ
 segmentationusinggraphcuts,”inMedicalImage
-with CRFs [ZJRP+15].
 Computing and Computer-Assisted Intervention–
 MICCAI 2000. Springer, 2000, pp. 276–
 286.[Online].Available:http://link.springer.com/
@@ -760,279 +1324,453 @@ knowledge-based morphological operations with
 biomedicalapplications,”ImageProcessing,IEEE
 Transactionson,vol.7,no.12,pp.1673–1683,Dec.
 12
-1998.[Online].Available:http://ieeexplore.ieee.org/ vol. 1, June 2005, pp. 886–893 vol. 1.
-xpls/abs_all.jsp?arnumber=730379 [Online].Available:http://ieeexplore.ieee.org/xpls/
-[CM02] D. Comaniciu and P. Meer, “Mean shift: A abs_all.jsp?arnumber=1467360
-robust approach toward feature space analysis,” [EVGW+a] M. Everingham, L. Van Gool, C. K. I.
-Pattern Analysis and Machine Intelligence, IEEE Williams, J. Winn, and A. Zisserman, “The
-Transactionson,vol.24,no.5,pp.603–619,2002. PASCAL Visual Object Classes Challenge
-[Online]. Available: http://ieeexplore.ieee.org/xpl/ 2007 (VOC2007) Results,” http://www.pascal-
-login.jsp?tp=&arnumber=1000236 network.org/challenges/VOC/voc2007/workshop/index.html.
-[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, [Online]. Available: http://host.robots.ox.ac.uk:
-“A pixel classification system for segmenting 8080/pascal/VOC/voc2007/index.html
-biomedicalimagesusingintensityneighborhoods [EVGW+b] ——,“ThePASCALVisualObjectClassesChal-
-anddimensionreduction,”inBiomedicalImaging: lenge2012(VOC2012)Results,”http://www.pascal-
-From Nano to Macro, 2011 IEEE International network.org/challenges/VOC/voc2012/workshop/index.html.
-Symposium on. IEEE, 2011, pp. 1649–1652. [Online]. Available: http://host.robots.ox.ac.uk:
-[Online].Available:https://www.andrew.cmu.edu/ 8080/pascal/VOC/voc2012/index.html
-user/gustavor/chen_isbi_11.pdf [EVGW+10] M. Everingham, L. Van Gool, C. K. Williams,
-[CP08] G. Csurka and F. Perronnin, “A simple high J.Winn,andA.Zisserman,“Thepascalvisualobject
-performanceapproachtosemanticsegmentation.” classes (voc) challenge,” International journal of
-in BMVC, 2008, pp. 1–10. [Online]. Avail- computervision,vol.88,no.2,pp.303–338,2010.
-able: http://www.xrce.xerox.com/layout/set/print/ [EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams,
+1998.[Online].Available:http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=730379
+[CM02] D. Comaniciu and P. Meer, “Mean shift: A
+robust approach toward feature space analysis,”
+Pattern Analysis and Machine Intelligence, IEEE
+Transactionson,vol.24,no.5,pp.603–619,2002.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/
+login.jsp?tp=&arnumber=1000236
+[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde,
+“A pixel classification system for segmenting
+biomedicalimagesusingintensityneighborhoods
+anddimensionreduction,”inBiomedicalImaging:
+From Nano to Macro, 2011 IEEE International
+Symposium on. IEEE, 2011, pp. 1649–1652.
+[Online].Available:https://www.andrew.cmu.edu/
+user/gustavor/chen_isbi_11.pdf
+[CP08] G. Csurka and F. Perronnin, “A simple high
+performanceapproachtosemanticsegmentation.”
+in BMVC, 2008, pp. 1–10. [Online]. Avail-
+able: http://www.xrce.xerox.com/layout/set/print/
 content/download/16654/118653/file/2008-023.pdf
-J. Winn, and A. Zisserman, “Visual object
-[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and classeschallenge2012(voc2012),”2012.[Online].
-E.Sabo,“Coloncryptsegmentationwebsite.”[On- Available:http://host.robots.ox.ac.uk:8080/pascal/
-line].Available:http://mis.haifa.ac.il/~ishimshoni/ VOC/voc2012/index.html
+[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and
+E.Sabo,“Coloncryptsegmentationwebsite.”[On-
+line].Available:http://mis.haifa.ac.il/~ishimshoni/
 SegmentCrypt/Download.htm
-[Fel] P. F. Felzenszwalb, “Graph based im-
-[CRSS14] ——, “Memory based active contour algorithm age segmentation.” [Online]. Available: http:
-usingpixel-levelclassifiedimagesforcoloncrypt //cs.brown.edu/~pff/segment/
+[CRSS14] ——, “Memory based active contour algorithm
+usingpixel-levelclassifiedimagesforcoloncrypt
 segmentation,” Computerized Medical Imaging
-[FGMR10] P.F.Felzenszwalb,R.B.Girshick,D.McAllester,
 and Graphics, Nov. 2014. [Online]. Available:
-andD.Ramanan,“Objectdetectionwithdiscrimina-
 http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/
-tivelytrainedpart-basedmodels,”PatternAnalysis
 Active%20contour%20based%20on%20pixel-
-and Machine Intelligence, IEEE Transactions on,
 level%20classified%20image%20for%20colon%
-vol.32,no.9,pp.1627–1645,2010.
 20crypts%20segmentation.pdf
-[FH04] P. F. Felzenszwalb and D. P. Huttenlocher,
 [CS10] J. Carreira and C. Sminchisescu, “Constrained
-“Efficient graph-based image segmentation,”
 parametricmin-cutsforautomaticobjectsegmenta-
-International Journal of Computer Vision,
 tion,”inComputerVisionandPatternRecognition
-vol. 59, no. 2, pp. 167–181, 2004. [Online].
 (CVPR),2010IEEEConferenceon. IEEE,2010,
-Available:http://link.springer.com/article/10.1023/
 pp.3241–3248.
-B:VISI.0000022288.19776.77
 [CS11] ——,“Cpmc:Constrainedparametricmin-cutsfor
-[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A
 automaticobjectsegmentation,”Feb.2011.[Online].
-new performance measure and evaluation
 Available: http://www.maths.lth.se/matematiklth/
-benchmark for road detection algorithms,” in
 personal/sminchis/code/cpmc/
-InternationalConferenceonIntelligentTransporta-
 [CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V.
-tion Systems (ITSC), 2013. [Online]. Available:
 Stoecker,J.M.Malters,andJ.M.Grichnik,“An
-http://www.cvlibs.net/datasets/kitti/eval_road.php
 improvedobjectiveevaluationmeasureforborder
-[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D.
 detection in dermoscopy images,” Skin Research
-Bagdanov,J.Serrat,andJ.Gonzalez,“Harmonypo-
 andTechnology,vol.15,no.4,pp.444–450,2009.
-tentialsforjointclassificationandsegmentation,”in
 [Online].Available:http://arxiv.org/abs/1009.1020
-ComputerVisionandPatternRecognition(CVPR),
 [CSM09] L.P.Coelho,A.Shariff,andR.F.Murphy,“Nuclear
-2010IEEEConferenceon. IEEE,2010,pp.3280–
 segmentation in microscope cell images: a hand-
-3287.
 segmenteddatasetandcomparisonofalgorithms,”
-in Biomedical Imaging: From Nano to Macro, [GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and
-2009.ISBI’09.IEEEInternationalSymposiumon. D.Koller,“Multi-classsegmentationwithrelative
-locationprior,”InternationalJournalofComputer
+in Biomedical Imaging: From Nano to Macro,
+2009.ISBI’09.IEEEInternationalSymposiumon.
 IEEE, 2009, pp. 518–521. [Online]. Available:
-http://murphylab.web.cmu.edu/data Vision,vol.80,no.3,pp.300–316,Apr.2008.
-[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh, [GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.-
-“Random walks based multi-image segmentation: Z.Yang,“Probabilistictrackingofaffine-invariant
-Quasiconvexity results and gpu-based solutions,” anisotropicregions,”PatternAnalysisandMachine
-in Computer Vision and Pattern Recognition Intelligence,IEEETransactionson,vol.35,no.1,
-(CVPR), 2012 IEEE Conference on. IEEE, pp.130–143,2013.
-2012, pp. 1656–1663. [Online]. Available: http: [Har75] J.A.Hartigan,Clusteringalgorithms. JohnWiley
-//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf &Sons,Inc.,1975.
-[DHS15] J.Dai,K.He,andJ.Sun,“Instance-awareseman- [HDT02] C. Huang, L. Davis, and J. Townshend, “An
-ticsegmentationviamulti-tasknetworkcascades,” assessment of support vector machines for land
-arXivpreprintarXiv:1512.04412,2015. coverclassification,”InternationalJournalofremote
-[DT05] N. Dalal and B. Triggs, “Histograms of oriented sensing,vol.23,no.4,pp.725–749,2002.
-gradients for human detection,” in Computer [HHR01] S.Hu,E.Hoffman,andJ.Reinhardt,“Automatic
-Vision and Pattern Recognition, 2005. CVPR lung segmentation for accurate quantitation of
-2005. IEEE Computer Society Conference on, volumetricx-rayctimages,”MedicalImaging,IEEE
+http://murphylab.web.cmu.edu/data
+[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh,
+“Random walks based multi-image segmentation:
+Quasiconvexity results and gpu-based solutions,”
+in Computer Vision and Pattern Recognition
+(CVPR), 2012 IEEE Conference on. IEEE,
+2012, pp. 1656–1663. [Online]. Available: http:
+//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf
+[DHS15] J.Dai,K.He,andJ.Sun,“Instance-awareseman-
+ticsegmentationviamulti-tasknetworkcascades,”
+arXivpreprintarXiv:1512.04412,2015.
+[DT05] N. Dalal and B. Triggs, “Histograms of oriented
+gradients for human detection,” in Computer
+Vision and Pattern Recognition, 2005. CVPR
+2005. IEEE Computer Society Conference on,
+vol. 1, June 2005, pp. 886–893 vol. 1.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=1467360
+[EVGW+a] M. Everingham, L. Van Gool, C. K. I.
+Williams, J. Winn, and A. Zisserman, “The
+PASCAL Visual Object Classes Challenge
+2007 (VOC2007) Results,” http://www.pascal-
+network.org/challenges/VOC/voc2007/workshop/index.html.
+[Online]. Available: http://host.robots.ox.ac.uk:
+8080/pascal/VOC/voc2007/index.html
+[EVGW+b] ——,“ThePASCALVisualObjectClassesChal-
+lenge2012(VOC2012)Results,”http://www.pascal-
+network.org/challenges/VOC/voc2012/workshop/index.html.
+[Online]. Available: http://host.robots.ox.ac.uk:
+8080/pascal/VOC/voc2012/index.html
+[EVGW+10] M. Everingham, L. Van Gool, C. K. Williams,
+J.Winn,andA.Zisserman,“Thepascalvisualobject
+classes (voc) challenge,” International journal of
+computervision,vol.88,no.2,pp.303–338,2010.
+[EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams,
+J. Winn, and A. Zisserman, “Visual object
+classeschallenge2012(voc2012),”2012.[Online].
+Available:http://host.robots.ox.ac.uk:8080/pascal/
+VOC/voc2012/index.html
+[Fel] P. F. Felzenszwalb, “Graph based im-
+age segmentation.” [Online]. Available: http:
+//cs.brown.edu/~pff/segment/
+[FGMR10] P.F.Felzenszwalb,R.B.Girshick,D.McAllester,
+andD.Ramanan,“Objectdetectionwithdiscrimina-
+tivelytrainedpart-basedmodels,”PatternAnalysis
+and Machine Intelligence, IEEE Transactions on,
+vol.32,no.9,pp.1627–1645,2010.
+[FH04] P. F. Felzenszwalb and D. P. Huttenlocher,
+“Efficient graph-based image segmentation,”
+International Journal of Computer Vision,
+vol. 59, no. 2, pp. 167–181, 2004. [Online].
+Available:http://link.springer.com/article/10.1023/
+B:VISI.0000022288.19776.77
+[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A
+new performance measure and evaluation
+benchmark for road detection algorithms,” in
+InternationalConferenceonIntelligentTransporta-
+tion Systems (ITSC), 2013. [Online]. Available:
+http://www.cvlibs.net/datasets/kitti/eval_road.php
+[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D.
+Bagdanov,J.Serrat,andJ.Gonzalez,“Harmonypo-
+tentialsforjointclassificationandsegmentation,”in
+ComputerVisionandPatternRecognition(CVPR),
+2010IEEEConferenceon. IEEE,2010,pp.3280–
+3287.
+[GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and
+D.Koller,“Multi-classsegmentationwithrelative
+locationprior,”InternationalJournalofComputer
+Vision,vol.80,no.3,pp.300–316,Apr.2008.
+[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.-
+Z.Yang,“Probabilistictrackingofaffine-invariant
+anisotropicregions,”PatternAnalysisandMachine
+Intelligence,IEEETransactionson,vol.35,no.1,
+pp.130–143,2013.
+[Har75] J.A.Hartigan,Clusteringalgorithms. JohnWiley
+&Sons,Inc.,1975.
+[HDT02] C. Huang, L. Davis, and J. Townshend, “An
+assessment of support vector machines for land
+coverclassification,”InternationalJournalofremote
+sensing,vol.23,no.4,pp.725–749,2002.
+[HHR01] S.Hu,E.Hoffman,andJ.Reinhardt,“Automatic
+lung segmentation for accurate quantitation of
+volumetricx-rayctimages,”MedicalImaging,IEEE
 13
-Transactionson,vol.20,no.6,pp.490–498,Jun. invariant keypoints,” International Journal of
-2001. ComputerVision,vol.60,no.2,pp.91–110,2004.
-[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. [Online]. Available: http://dx.doi.org/10.1023/B%
-Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, 3AVISI.0000029664.99615.94
-D. W. Eggert, A. Fitzgibbon, and R. B. [LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski,
-Fisher, “An experimental comparison of range “Spectral matting,” Pattern Analysis and
-imagesegmentationalgorithms,”PatternAnalysis Machine Intelligence, IEEE Transactions on,
-and Machine Intelligence, IEEE Transactions vol. 30, no. 10, pp. 1699–1712, 2008.
-on, vol. 18, no. 7, pp. 673–689, Jul. 1996. [Online].Available:http://ieeexplore.ieee.org/xpls/
-[Online].Available:http://ieeexplore.ieee.org/xpls/ abs_all.jsp?arnumber=4547428
-abs_all.jsp?arnumber=506791 [LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr,
-[Ho95] T. K. Ho, “Random decision forests,” in “Associativehierarchicalcrfsforobjectclassimage
-Document Analysis and Recognition, 1995., segmentation,”inComputerVision,2009IEEE12th
-ProceedingsoftheThirdInternationalConference International Conference on, 2009, pp. 739–746.
-on, vol. 1. IEEE, 1995, pp. 278–282. [Online].Available:http://ieeexplore.ieee.org/xpls/
-[Online]. Available: http://ect.bell-labs.com/who/ abs_all.jsp?arnumber=5459248
-tkh/publications/papers/odt.pdf [LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully
-[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia convolutionalnetworksforsemanticsegmentation,”
-Commons, Nov. 2007. [Online]. Avail- arXiv preprint arXiv:1411.4038, 2014. [Online].
-able: https://commons.wikimedia.org/wiki/File: Available:http://arxiv.org/abs/1411.4038
-CCTV_Lens_flare.jpg [MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and
-[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, J. Malik, “Using contours to detect and localize
-“Multiscale conditional random fields for image junctions in natural images,” in Computer Vision
-labeling,” in Computer Vision and Pattern and Pattern Recognition, 2008. CVPR 2008.
-Recognition, 2004. CVPR 2004. Proceedings IEEE Conference on, June 2008, pp. 1–8.
-of the 2004 IEEE Computer Society Conference [Online].Available:http://ieeexplore.ieee.org/xpls/
-on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. abs_all.jsp?arnumber=4587420
-[Online]. Available: http://ieeexplore.ieee.org/xpl/ [Man12] M. Manske, “File:randabschattung mikroskop
-login.jsp?tp=&arnumber=1315232 kamera 6.jpg,” Wikipedia Com-
-[JLD03] K.Jiang,Q.-M.Liao,andS.-Y.Dai,“Anovelwhite mons, Dec. 2012. [Online]. Avail-
-bloodcellsegmentationschemeusingscale-space able: https://commons.wikimedia.org/wiki/File:
-filtering and watershed clustering,” in Machine Randabschattung_Mikroskop_Kamera_6.JPG
-Learning and Cybernetics, 2003 International [MBLAGJ+07] S.Maldonado-Bascon,S.Lafuente-Arroyo,P.Gil-
-Conferenceon,vol.5,Nov2003,pp.2820–2825 Jimenez, H. Gomez-Moreno, and F. Lopez-
-Vol.5.[Online].Available:http://ieeexplore.ieee.org/ Ferreras, “Road-sign detection and recognition
-xpl/login.jsp?tp=&arnumber=1260033 based on support vector machines,” Intelligent
-[Kaf07] L.Kaffer,“File:greatmaleleopardinsouthafrika- Transportation Systems, IEEE Transactions on,
-jd.jpg,”WikipediaCommons,Jul.2007.[Online]. vol. 8, no. 2, pp. 264–278, Jun. 2007.
-Available:https://commons.wikimedia.org/wiki/File: [Online].Available:http://ieeexplore.ieee.org/xpls/
-Great_male_Leopard_in_South_Afrika-JD.JPG abs_all.jsp?arnumber=4220659
-[KKV+14] V.Kalesnykiene,J.-k.Kamarainen,R.Voutilainen, [MBVLG02] N.Moon,E.Bullitt,K.VanLeemput,andG.Gerig,
-J. Pietilä, H. Kälviäinen, and H. Uusitalo, “Automaticbrainandtumorsegmentation,”inMed-
-“Diaretdb1 diabetic retinopathy database and icalImageComputingandComputer-AssistedIn-
-evaluation protocol,” 2014. [Online]. Available: tervention—MICCAI 2002. Springer, 2002, pp.
-http://www2.it.lut.fi/project/imageret/diaretdb1/ 372–379.
-[KP92] J. M. Kasson and W. Plouffe, “An analysis of [MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik,
-selectedcomputerinterchangecolorspaces,”ACM “A database of human segmented natural
-TransactionsonGraphics(TOG),vol.11,no.4,pp. images and its application to evaluating
-373–405,1992. segmentationalgorithmsandmeasuringecological
-[KP06] Z. Kato and T.-C. Pong, “A markov random statistics,” in Computer Vision, 2001. ICCV
-field image segmentation model for color 2001. Proceedings. Eighth IEEE International
-textured images,” Image and Vision Computing, Conferenceon,vol.2. IEEE,2001,pp.416–423.
-vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. [Online].Available:http://ieeexplore.ieee.org/xpls/
-Available: http://www.sciencedirect.com/science/ abs_all.jsp?arnumber=937655
-article/pii/S0262885606001223 [MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann,
-[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, S. Bodenstedt, A. Sanchez, C. Stock, H. G.
-“Imagenet classification with deep convolutional Kenngott, M. Eisenmann, and S. Speidel, “Can
-neuralnetworks,”inAdvancesinneuralinformation masses of non-experts train highly accurate
-processingsystems,2012,pp.1097–1105. image classifiers?” in Medical Image Computing
-[KWT88] M. Kass, A. Witkin, and D. Terzopoulos, andComputer-AssistedIntervention–MICCAI2014.
-“Snakes: Active contour models,” International Springer,2014,pp.438–445.[Online].Available:
-journal of computer vision, vol. 1, no. 4, pp. http://opencas.webarchiv.kit.edu/?q=node/26
-321–331, Jan. 1988. [Online]. Available: http: [Min89] J.Mingers,“Anempiricalcomparisonofselection
-//link.springer.com/article/10.1007/BF00133570 measures for decision-tree induction,” Machine
-[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, Learning, vol. 3, no. 4, pp. 319–342, 1989.
-“CS231n: Convolutional neural networks for [Online].Available:http://dx.doi.org/10.1023/A%
-visual recognition,” 2015. [Online]. Available: 3A1022645801436
-http://cs231n.stanford.edu/ [MSB12] G.Moser,S.B.Serpico,andJ.A.Benediktsson,
-[Low04] D. Lowe, “Distinctive image features from scale- “Markovrandomfieldmodelsforsupervisedland
+Transactionson,vol.20,no.6,pp.490–498,Jun.
+2001.
+[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J.
+Flynn, H. Bunke, D. B. Goldgof, K. Bowyer,
+D. W. Eggert, A. Fitzgibbon, and R. B.
+Fisher, “An experimental comparison of range
+imagesegmentationalgorithms,”PatternAnalysis
+and Machine Intelligence, IEEE Transactions
+on, vol. 18, no. 7, pp. 673–689, Jul. 1996.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=506791
+[Ho95] T. K. Ho, “Random decision forests,” in
+Document Analysis and Recognition, 1995.,
+ProceedingsoftheThirdInternationalConference
+on, vol. 1. IEEE, 1995, pp. 278–282.
+[Online]. Available: http://ect.bell-labs.com/who/
+tkh/publications/papers/odt.pdf
+[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia
+Commons, Nov. 2007. [Online]. Avail-
+able: https://commons.wikimedia.org/wiki/File:
+CCTV_Lens_flare.jpg
+[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn,
+“Multiscale conditional random fields for image
+labeling,” in Computer Vision and Pattern
+Recognition, 2004. CVPR 2004. Proceedings
+of the 2004 IEEE Computer Society Conference
+on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/
+login.jsp?tp=&arnumber=1315232
+[JLD03] K.Jiang,Q.-M.Liao,andS.-Y.Dai,“Anovelwhite
+bloodcellsegmentationschemeusingscale-space
+filtering and watershed clustering,” in Machine
+Learning and Cybernetics, 2003 International
+Conferenceon,vol.5,Nov2003,pp.2820–2825
+Vol.5.[Online].Available:http://ieeexplore.ieee.org/
+xpl/login.jsp?tp=&arnumber=1260033
+[Kaf07] L.Kaffer,“File:greatmaleleopardinsouthafrika-
+jd.jpg,”WikipediaCommons,Jul.2007.[Online].
+Available:https://commons.wikimedia.org/wiki/File:
+Great_male_Leopard_in_South_Afrika-JD.JPG
+[KKV+14] V.Kalesnykiene,J.-k.Kamarainen,R.Voutilainen,
+J. Pietilä, H. Kälviäinen, and H. Uusitalo,
+“Diaretdb1 diabetic retinopathy database and
+evaluation protocol,” 2014. [Online]. Available:
+http://www2.it.lut.fi/project/imageret/diaretdb1/
+[KP92] J. M. Kasson and W. Plouffe, “An analysis of
+selectedcomputerinterchangecolorspaces,”ACM
+TransactionsonGraphics(TOG),vol.11,no.4,pp.
+373–405,1992.
+[KP06] Z. Kato and T.-C. Pong, “A markov random
+field image segmentation model for color
+textured images,” Image and Vision Computing,
+vol. 24, no. 10, pp. 1103–1114, 2006. [Online].
+Available: http://www.sciencedirect.com/science/
+article/pii/S0262885606001223
+[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton,
+“Imagenet classification with deep convolutional
+neuralnetworks,”inAdvancesinneuralinformation
+processingsystems,2012,pp.1097–1105.
+[KWT88] M. Kass, A. Witkin, and D. Terzopoulos,
+“Snakes: Active contour models,” International
+journal of computer vision, vol. 1, no. 4, pp.
+321–331, Jan. 1988. [Online]. Available: http:
+//link.springer.com/article/10.1007/BF00133570
+[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson,
+“CS231n: Convolutional neural networks for
+visual recognition,” 2015. [Online]. Available:
+http://cs231n.stanford.edu/
+[Low04] D. Lowe, “Distinctive image features from scale-
+invariant keypoints,” International Journal of
+ComputerVision,vol.60,no.2,pp.91–110,2004.
+[Online]. Available: http://dx.doi.org/10.1023/B%
+3AVISI.0000029664.99615.94
+[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski,
+“Spectral matting,” Pattern Analysis and
+Machine Intelligence, IEEE Transactions on,
+vol. 30, no. 10, pp. 1699–1712, 2008.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4547428
+[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr,
+“Associativehierarchicalcrfsforobjectclassimage
+segmentation,”inComputerVision,2009IEEE12th
+International Conference on, 2009, pp. 739–746.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5459248
+[LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully
+convolutionalnetworksforsemanticsegmentation,”
+arXiv preprint arXiv:1411.4038, 2014. [Online].
+Available:http://arxiv.org/abs/1411.4038
+[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and
+J. Malik, “Using contours to detect and localize
+junctions in natural images,” in Computer Vision
+and Pattern Recognition, 2008. CVPR 2008.
+IEEE Conference on, June 2008, pp. 1–8.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587420
+[Man12] M. Manske, “File:randabschattung mikroskop
+kamera 6.jpg,” Wikipedia Com-
+mons, Dec. 2012. [Online]. Avail-
+able: https://commons.wikimedia.org/wiki/File:
+Randabschattung_Mikroskop_Kamera_6.JPG
+[MBLAGJ+07] S.Maldonado-Bascon,S.Lafuente-Arroyo,P.Gil-
+Jimenez, H. Gomez-Moreno, and F. Lopez-
+Ferreras, “Road-sign detection and recognition
+based on support vector machines,” Intelligent
+Transportation Systems, IEEE Transactions on,
+vol. 8, no. 2, pp. 264–278, Jun. 2007.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4220659
+[MBVLG02] N.Moon,E.Bullitt,K.VanLeemput,andG.Gerig,
+“Automaticbrainandtumorsegmentation,”inMed-
+icalImageComputingandComputer-AssistedIn-
+tervention—MICCAI 2002. Springer, 2002, pp.
+372–379.
+[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik,
+“A database of human segmented natural
+images and its application to evaluating
+segmentationalgorithmsandmeasuringecological
+statistics,” in Computer Vision, 2001. ICCV
+2001. Proceedings. Eighth IEEE International
+Conferenceon,vol.2. IEEE,2001,pp.416–423.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=937655
+[MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann,
+S. Bodenstedt, A. Sanchez, C. Stock, H. G.
+Kenngott, M. Eisenmann, and S. Speidel, “Can
+masses of non-experts train highly accurate
+image classifiers?” in Medical Image Computing
+andComputer-AssistedIntervention–MICCAI2014.
+Springer,2014,pp.438–445.[Online].Available:
+http://opencas.webarchiv.kit.edu/?q=node/26
+[Min89] J.Mingers,“Anempiricalcomparisonofselection
+measures for decision-tree induction,” Machine
+Learning, vol. 3, no. 4, pp. 319–342, 1989.
+[Online].Available:http://dx.doi.org/10.1023/A%
+3A1022645801436
+[MSB12] G.Moser,S.B.Serpico,andJ.A.Benediktsson,
+“Markovrandomfieldmodelsforsupervisedland
 14
-cover classification from very high resolution tionstrategies,”Fundam.Inform.,vol.41,no.1-2,
-multispectralremotesensingimages,”inAdvances pp.187–228,2000.
-in Radar and Remote Sensing (TyWRRS), 2012 [RM07] J. Reynolds and K. Murphy, “Figure-ground
-Tyrrhenian Workshop on. IEEE, 2012, pp. 235– segmentation using a hierarchical conditional
-242.[Online].Available:http://ieeexplore.ieee.org/ random field,” in Computer and Robot
-xpl/login.jsp?tp=&arnumber=6381135 Vision, 2007. CRV ’07. Fourth Canadian
-[MSC] “Object class recognition image database.” Conference on, May 2007, pp. 175–182.
-[Online].Available:http://research.microsoft.com/ [Online].Available:http://ieeexplore.ieee.org/xpls/
-vision/cambridge/recognition/ abs_all.jsp?arnumber=4228537
-[MSR] “Image understanding - research data,” [RMBK06] C.Rother,T.Minka,A.Blake,andV.Kolmogorov,
-Microsoft Research. [Online]. Avail- “Cosegmentation of image pairs by histogram
-able:http://research.microsoft.com/en-us/projects/ matching - incorporating a global constraint
-objectclassrecognition/ into mrfs,” in Computer Vision and Pattern
-[Mur12] K. P. Murphy, Machine learning: a probabilistic Recognition, 2006 IEEE Computer Society
-perspective. MITpress,2012. Conference on, vol. 1, June 2006, pp. 993–
-[OKS78] Y.-i.Ohta,T.Kanade,andT.Sakai,“Ananalysis 1000.[Online].Available:http://ieeexplore.ieee.org/
-systemforscenescontainingobjectswithsubstruc- xpls/abs_all.jsp?arnumber=1640859
-tures,”inProceedingsoftheFourthInternational [SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer,
-JointConferenceonPatternRecognitions,1978,pp. M.Viergever,B.VanGinnekenetal.,“Ridge-based
-752–754. vesselsegmentationincolorimagesoftheretina,”
-[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin, Medical Imaging, IEEE Transactions on, vol. 23,
-R. Cromartie, A. Geselowitz, T. Greer, B. ter no. 4, pp. 501–509, 2004. [Online]. Available:
-HaarRomeny,J.B.Zimmerman,andK.Zuiderveld, http://www.isi.uu.nl/Research/Databases/DRIVE/
-“Adaptivehistogramequalizationanditsvariations,” [SCZ08] F. Schroff, A. Criminisi, and A. Zisserman,
-Computervision,graphics,andimageprocessing, “Object class segmentation using random
-vol. 39, no. 3, pp. 355–368, 1987. [Online]. forests.” in BMVC, 2008, pp. 1–10. [On-
-Available: http://www.sciencedirect.com/science/ line].Available:http://research.microsoft.com/pubs/
-article/pii/S0734189X8780186X 72423/Criminisi_bmvc2008.pdf
-[PC13] P. H. Pinheiro and R. Collobert, “Recurrent [SJC08] J. Shotton, M. Johnson, and R. Cipolla,
-convolutional neural networks for scene parsing,” “Semantictextonforestsforimagecategorization
-arXiv preprint arXiv:1306.2795, 2013. [Online]. and segmentation,” in Computer vision and
-Available:http://arxiv.org/abs/1306.2795v1 pattern recognition, 2008. CVPR 2008. IEEE
-[PH05] C. Pantofaru and M. Hebert, “A Conference on. IEEE, Jun. 2008, pp. 1–8.
-comparison of image segmentation algorithms,” [Online].Available:http://ieeexplore.ieee.org/xpls/
-Robotics Institute, p. 336, 2005. [Online]. abs_all.jsp?arnumber=4587503
-Available: http://riweb-backend.ri.cmu.edu/ [SM11] C. Sutton and A. McCallum, “An introduction
-pub_files/pub4/pantofaru_caroline_2005_1/ to conditional random fields,” Machine Learning,
-pantofaru_caroline_2005_1.pdf vol. 4, no. 4, pp. 267–373, 2011. [Online].
-[PS07] A. Protiere and G. Sapiro, “Interactive Available: http://homepages.inf.ed.ac.uk/csutton/
-image segmentation via adaptive weighted publications/crftutv2.pdf
-distances,” Image Processing, IEEE Transactions [Smi02] L. I. Smith, “A tutorial on principal components
-on, vol. 16, no. 4, pp. 1046–1057, 2007. analysis,”CornellUniversity,USA,vol.51,p.52,
-[Online].Available:http://ieeexplore.ieee.org/xpls/ 2002.
-abs_all.jsp?arnumber=4130436 [Smi04] B.T.Smith,“Lagrangemultiplierstutorialinthe
-[PTN09] N.Plath,M.Toussaint,andS.Nakajima,“Multi- contextofsupportvectormachines,”MemorialUni-
-classimagesegmentationusingconditionalrandom versityofNewfoundlandSt.John’s,Newfoundland,
-fields and global classification,” in Proceedings Canada,Jun.2004.
-of the 26th Annual International Conference on [SSA12] D.Schiebener,J.Schill,andT.Asfour,“Discovery,
-MachineLearning. ACM,2009,pp.817–824. segmentation and reactive grasping of unknown
-[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A objects.” in Humanoids, 2012, pp. 71–77. [On-
-survey of current methods in medical image line]. Available: http://h2t.anthropomatik.kit.edu/
-segmentation,” Annual Review of Biomedical pdf/Schiebener2012.pdf
-Engineering, vol. 2, no. 1, pp. 315–337, 2000, [SUM+11] D. Schiebener, A. Ude, J. Morimotot,
-pMID: 11701515. [Online]. Available: http:// T. Asfour, and R. Dillmann, “Segmentation
-dx.doi.org/10.1146/annurev.bioeng.2.1.315 andlearningofunknownobjectsthroughphysical
-[Qui86] J. R. Quinlan, “Induction of decision trees,” interaction,” in Humanoid Robots (Humanoids),
-Machine learning, vol. 1, no. 1, pp. 81–106, 2011 11th IEEE-RAS International Conference
-Aug. 1986. [Online]. Available: http://dx.doi.org/ on. IEEE, 2011, pp. 500–506. [Online].
-10.1023/A%3A1022643204877 Available:http://ieeexplore.ieee.org/ielx5/6086637/
-[Qui93] ——,C4.5:ProgramsforMachineLearning,P.Lan- 6100798/06100843.pdf
-gley,Ed. MorganKaufmannPublishers,Inc.,1993. [SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi,
-[RKB04] C.Rother,V.Kolmogorov,andA.Blake,“Grabcut: “Textonboost:Jointappearance,shapeandcontext
-Interactive foreground extraction using iterated modeling for multi-class object recognition and
-graph cuts,” ACM Transactions on Graphics segmentation,” in Computer Vision–ECCV 2006.
-(TOG),vol.23,no.3,pp.309–314,2004.[Online]. Springer,2006,pp.1–15.[Online].Available:http:
-Available:http://delivery.acm.org/10.1145/1020000/ //link.springer.com/chapter/10.1007/11744023_1
-1015720/p309-rother.pdf [TNL14] J. Tighe, M. Niethammer, and S. Lazebnik,
-[RM00] J. B. Roerdink and A. Meijster, “The watershed “Scene parsing with object instances and
-transform:Definitions,algorithmsandparalleliza- occlusion ordering,” in Computer Vision and
+cover classification from very high resolution
+multispectralremotesensingimages,”inAdvances
+in Radar and Remote Sensing (TyWRRS), 2012
+Tyrrhenian Workshop on. IEEE, 2012, pp. 235–
+242.[Online].Available:http://ieeexplore.ieee.org/
+xpl/login.jsp?tp=&arnumber=6381135
+[MSC] “Object class recognition image database.”
+[Online].Available:http://research.microsoft.com/
+vision/cambridge/recognition/
+[MSR] “Image understanding - research data,”
+Microsoft Research. [Online]. Avail-
+able:http://research.microsoft.com/en-us/projects/
+objectclassrecognition/
+[Mur12] K. P. Murphy, Machine learning: a probabilistic
+perspective. MITpress,2012.
+[OKS78] Y.-i.Ohta,T.Kanade,andT.Sakai,“Ananalysis
+systemforscenescontainingobjectswithsubstruc-
+tures,”inProceedingsoftheFourthInternational
+JointConferenceonPatternRecognitions,1978,pp.
+752–754.
+[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin,
+R. Cromartie, A. Geselowitz, T. Greer, B. ter
+HaarRomeny,J.B.Zimmerman,andK.Zuiderveld,
+“Adaptivehistogramequalizationanditsvariations,”
+Computervision,graphics,andimageprocessing,
+vol. 39, no. 3, pp. 355–368, 1987. [Online].
+Available: http://www.sciencedirect.com/science/
+article/pii/S0734189X8780186X
+[PC13] P. H. Pinheiro and R. Collobert, “Recurrent
+convolutional neural networks for scene parsing,”
+arXiv preprint arXiv:1306.2795, 2013. [Online].
+Available:http://arxiv.org/abs/1306.2795v1
+[PH05] C. Pantofaru and M. Hebert, “A
+comparison of image segmentation algorithms,”
+Robotics Institute, p. 336, 2005. [Online].
+Available: http://riweb-backend.ri.cmu.edu/
+pub_files/pub4/pantofaru_caroline_2005_1/
+pantofaru_caroline_2005_1.pdf
+[PS07] A. Protiere and G. Sapiro, “Interactive
+image segmentation via adaptive weighted
+distances,” Image Processing, IEEE Transactions
+on, vol. 16, no. 4, pp. 1046–1057, 2007.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4130436
+[PTN09] N.Plath,M.Toussaint,andS.Nakajima,“Multi-
+classimagesegmentationusingconditionalrandom
+fields and global classification,” in Proceedings
+of the 26th Annual International Conference on
+MachineLearning. ACM,2009,pp.817–824.
+[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A
+survey of current methods in medical image
+segmentation,” Annual Review of Biomedical
+Engineering, vol. 2, no. 1, pp. 315–337, 2000,
+pMID: 11701515. [Online]. Available: http://
+dx.doi.org/10.1146/annurev.bioeng.2.1.315
+[Qui86] J. R. Quinlan, “Induction of decision trees,”
+Machine learning, vol. 1, no. 1, pp. 81–106,
+Aug. 1986. [Online]. Available: http://dx.doi.org/
+10.1023/A%3A1022643204877
+[Qui93] ——,C4.5:ProgramsforMachineLearning,P.Lan-
+gley,Ed. MorganKaufmannPublishers,Inc.,1993.
+[RKB04] C.Rother,V.Kolmogorov,andA.Blake,“Grabcut:
+Interactive foreground extraction using iterated
+graph cuts,” ACM Transactions on Graphics
+(TOG),vol.23,no.3,pp.309–314,2004.[Online].
+Available:http://delivery.acm.org/10.1145/1020000/
+1015720/p309-rother.pdf
+[RM00] J. B. Roerdink and A. Meijster, “The watershed
+transform:Definitions,algorithmsandparalleliza-
+tionstrategies,”Fundam.Inform.,vol.41,no.1-2,
+pp.187–228,2000.
+[RM07] J. Reynolds and K. Murphy, “Figure-ground
+segmentation using a hierarchical conditional
+random field,” in Computer and Robot
+Vision, 2007. CRV ’07. Fourth Canadian
+Conference on, May 2007, pp. 175–182.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4228537
+[RMBK06] C.Rother,T.Minka,A.Blake,andV.Kolmogorov,
+“Cosegmentation of image pairs by histogram
+matching - incorporating a global constraint
+into mrfs,” in Computer Vision and Pattern
+Recognition, 2006 IEEE Computer Society
+Conference on, vol. 1, June 2006, pp. 993–
+1000.[Online].Available:http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=1640859
+[SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer,
+M.Viergever,B.VanGinnekenetal.,“Ridge-based
+vesselsegmentationincolorimagesoftheretina,”
+Medical Imaging, IEEE Transactions on, vol. 23,
+no. 4, pp. 501–509, 2004. [Online]. Available:
+http://www.isi.uu.nl/Research/Databases/DRIVE/
+[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman,
+“Object class segmentation using random
+forests.” in BMVC, 2008, pp. 1–10. [On-
+line].Available:http://research.microsoft.com/pubs/
+72423/Criminisi_bmvc2008.pdf
+[SJC08] J. Shotton, M. Johnson, and R. Cipolla,
+“Semantictextonforestsforimagecategorization
+and segmentation,” in Computer vision and
+pattern recognition, 2008. CVPR 2008. IEEE
+Conference on. IEEE, Jun. 2008, pp. 1–8.
+[Online].Available:http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587503
+[SM11] C. Sutton and A. McCallum, “An introduction
+to conditional random fields,” Machine Learning,
+vol. 4, no. 4, pp. 267–373, 2011. [Online].
+Available: http://homepages.inf.ed.ac.uk/csutton/
+publications/crftutv2.pdf
+[Smi02] L. I. Smith, “A tutorial on principal components
+analysis,”CornellUniversity,USA,vol.51,p.52,
+2002.
+[Smi04] B.T.Smith,“Lagrangemultiplierstutorialinthe
+contextofsupportvectormachines,”MemorialUni-
+versityofNewfoundlandSt.John’s,Newfoundland,
+Canada,Jun.2004.
+[SSA12] D.Schiebener,J.Schill,andT.Asfour,“Discovery,
+segmentation and reactive grasping of unknown
+objects.” in Humanoids, 2012, pp. 71–77. [On-
+line]. Available: http://h2t.anthropomatik.kit.edu/
+pdf/Schiebener2012.pdf
+[SUM+11] D. Schiebener, A. Ude, J. Morimotot,
+T. Asfour, and R. Dillmann, “Segmentation
+andlearningofunknownobjectsthroughphysical
+interaction,” in Humanoid Robots (Humanoids),
+2011 11th IEEE-RAS International Conference
+on. IEEE, 2011, pp. 500–506. [Online].
+Available:http://ieeexplore.ieee.org/ielx5/6086637/
+6100798/06100843.pdf
+[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi,
+“Textonboost:Jointappearance,shapeandcontext
+modeling for multi-class object recognition and
+segmentation,” in Computer Vision–ECCV 2006.
+Springer,2006,pp.1–15.[Online].Available:http:
+//link.springer.com/chapter/10.1007/11744023_1
+[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik,
+“Scene parsing with object instances and
+occlusion ordering,” in Computer Vision and
 15
-Pattern Recognition (CVPR), 2014 IEEE GLOSSARY
+Pattern Recognition (CVPR), 2014 IEEE
 Conference on. IEEE, 2014, pp. 3748–3755.
-ACM active contour model. 6
 [Online].Available:http://ieeexplore.ieee.org/xpls/
 abs_all.jsp?arnumber=6909874
-[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, BOV bag-of-visual-words. 5
+[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert,
 “A measure for objective evaluation of
 image segmentation algorithms,” in Computer
-CNN Convolution Neuronal Network. 5, 9
 Vision and Pattern Recognition-Workshops, 2005.
-CVPR Workshops. IEEE Computer Society CRF Conditional Random Field. 4, 8, 9, 11
+CVPR Workshops. IEEE Computer Society
 Conference on. IEEE, 2005, pp. 34–34.
-[Online].Available:http://repository.cmu.edu/cgi/ GPU graphics processing unit. 3
+[Online].Available:http://repository.cmu.edu/cgi/
 viewcontent.cgi?article=1365&context=robotics
 [vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J.
-HOG histogram of oriented gradients. 5, 6, 8
 vandenHerik,“Dimensionalityreduction:Acom-
 parative review,” Journal of Machine Learning
-Research,vol.10,no.1-41,pp.66–71,2009. ILSVRC ImageNet Large-Scale Visual Recognition
+Research,vol.10,no.1-41,pp.66–71,2009.
 [VOC10] “Voc2010 preliminary results,” 2010. [Online].
-Challenge. 9
 Available:http://host.robots.ox.ac.uk/pascal/VOC/
 voc2010/results/index.html
-[WAH97] G.-Q.Wei,K.Arbter,andG.Hirzinger,“Automatic MAP Maximum A Posteriori. 8
-tracking of laparoscopic instruments by color MR magnetic resonance. 2, 6
+[WAH97] G.-Q.Wei,K.Arbter,andG.Hirzinger,“Automatic
+tracking of laparoscopic instruments by color
 coding,” in CVRMed-MRCAS’97, ser. Lecture
-MRF Markov Random Field. 4, 8
 NotesinComputerScience,J.Troccaz,E.Grimson,
 andR.Mösges,Eds. SpringerBerlinHeidelberg,
-1997,vol.1205,pp.357–366.[Online].Available: PCA principal component analysis. 5
+1997,vol.1205,pp.357–366.[Online].Available:
 http://dx.doi.org/10.1007/BFb0029257
 [YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell
-RBF radial basis function. 8
 segmentation in microscopy imagery using a
 bag of local bayesian classifiers,” in Biomedical
-Imaging: From Nano to Macro, 2010 IEEE SIFT scale-invariant feature transform. 5
-InternationalSymposiumon,Apr.2010,pp.125– SVM Support Vector Machine. 4, 6–8
+Imaging: From Nano to Macro, 2010 IEEE
+InternationalSymposiumon,Apr.2010,pp.125–
 128.[Online].Available:http://ieeexplore.ieee.org/
 xpls/abs_all.jsp?arnumber=5490399
 [YHRF12] Y. Yang, S. Hallman, D. Ramanan, and
@@ -1063,12 +1801,33 @@ of the IEEE International Conference on
 Computer Vision, 2015, pp. 1529–1537. [Online].
 Available: http://www.robots.ox.ac.uk/~szheng/
 papers/CRFasRNN.pdf
+GLOSSARY
+ACM active contour model. 6
+BOV bag-of-visual-words. 5
+CNN Convolution Neuronal Network. 5, 9
+CRF Conditional Random Field. 4, 8, 9, 11
+GPU graphics processing unit. 3
+HOG histogram of oriented gradients. 5, 6, 8
+ILSVRC ImageNet Large-Scale Visual Recognition
+Challenge. 9
+MAP Maximum A Posteriori. 8
+MR magnetic resonance. 2, 6
+MRF Markov Random Field. 4, 8
+PCA principal component analysis. 5
+RBF radial basis function. 8
+SIFT scale-invariant feature transform. 5
+SVM Support Vector Machine. 4, 6–8
 16
 APPENDIXA
 TABLES
-Number Number
-Database ImageResolution(width×height) of of Channels Datasource
-Images Classes
+Database ImageResolution(width×height)
+Number
+of
+Images
+Number
+of
+Classes
+Channels Datasource
 ColonCryptDB (302px−1116px)×(349px−875px) 389 2 3 [CRSS]
 DIARETDB1 1500px×1500px 89 4 3 [KKV+14]
 KITTIRoad (1226px−1242px)×(370px−376px) 289 2 3 [FKG13]
diff --git a/read/results/pdfplumber/1707.09725.txt b/read/results/pdfplumber/1707.09725.txt
index 4846f19..a8aac29 100644
--- a/read/results/pdfplumber/1707.09725.txt
+++ b/read/results/pdfplumber/1707.09725.txt
@@ -1,13 +1,8 @@
 Analysis and Optimization of
 Convolutional Neural Network
 Architectures
-7102
-luJ
-13
 Master Thesis of
-]VC.sc[
 Martin Thoma
-1v52790.7071:viXra
 Department of Computer Science
 Institute for Anthropomatics
 and
@@ -17,6 +12,40 @@ Second reviewer: Prof. Dr.–Ing. J. M. Zöllner
 Advisor: Dipl.–Inform. Michael Weber
 Research Period: 03. May 2017 – 03. August 2017
 KIT–UniversityoftheStateofBaden-WuerttembergandNationalResearchCenteroftheHelmholtzAssociation www.kit.edu
+a
+r
+X
+i
+v
+:
+1
+7
+0
+7
+.
+0
+9
+7
+2
+5
+v
+1
+[
+c
+s
+.
+C
+V
+]
+3
+1
+J
+u
+l
+2
+0
+1
+7
 
 Analysis and Optimization of Convolutional Neural
 Network Architectures
@@ -236,28 +265,64 @@ This chapter introduces linear image filters in Section 2.1, then standard layer
 CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3,
 transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5.
 2.1. Linear Image Filters
-A linear image filter (also called a filter bank or a kernel) is an element F ∈ Rkw×k h×d,
-where k represents the filter’s width, k the filter’s height and d the number of input
-w h
+A linear image filter (also called a filter bank or a kernel) is an element F ∈ Rkw×k h ×d,
+where k
+w
+represents the filter’s width, k
+h
+the filter’s height and d the number of input
 channels. The filter F is convolved with the image I ∈ Rw×h×d to produce a new image I(cid:48).
 The output image I(cid:48) has only one channel. Each pixel I(cid:48)(x,y) of the output image gets
 calculated by point-wise multiplication of one filter element with one element of the original
 image I:
-(cid:98) (cid:88)k 2w(cid:99) (cid:98) (cid:88)k 2h(cid:99) (cid:88)d
-I(cid:48)(x,y) = I(x+i ,y+i ,i )·F(i ,i ,i )
-x y c x y c
-ix=1−(cid:100)k 2w(cid:101)iy=1−(cid:100)k 2h(cid:101)ic=1
+I(cid:48)(x,y) =
+(cid:98)kw 2 (cid:99) (cid:88)
+ix=1−(cid:100)kw
+2
+(cid:101)
+(cid:98) kh 2 (cid:99) (cid:88)
+iy=1−(cid:100) kh
+2
+(cid:101)
+d (cid:88)
+ic=1
+I(x+i
+x
+,y+i
+y
+,i
+c
+)·F(i
+x
+,i
+y
+,i
+c
+)
 This procedure is explained by Figure 2.1. It is essentially a discrete convolution.
-I ∈ R7×7 I(cid:48) ∈ R7×7
-47 187
-47 -642
-58 -849
-1 1 1 1 1 10 0 1 0 1 14 9 6 1 4 1 19 4 4 91 07 7 7 96 4 1 1 1 1 9 91 1 1 0 9 76 4 1 9 1 1 1 19 9 61 1 0 17 7 12 6 9 6 1 1 19 4 40 1 17 6 95 5 6 1 1 14 9 41 1 16 9 60 6 2 1 1 14 9 40 0 05 7 80 1 4 - F9 26 ilter- -53 8 kerne- 3 01 Re- s9 2 9 u3 8 4 l6 2 to- -5 f3 74 p3 953 2 oin- 2 t1 09 -0 w19 - - - - -5 3 7 5 4-4 2 4 1 6 00 0 8 7 - 5 4 5-2 4 55 2 95 5 39 9 64 - - -2 3 -4 1 574 59 0 550 08 5 - 2 1 1 3 26 81 8 7 6 26 01 5 3 8 32 - 83 2 51 2 18 5 73 6 18 1 18 - -2 2 91 1 51 6 78 2 45 8 60 8 - -5 1 68 6 20 5 66 5 43 6 01 5
-0
-6 l ise 8
-118 879
-63 F ∈ R3×3 multiplication 647
-112 302
+I ∈ R7×7
+Filterkernel
+F ∈ R3×3
+Resultofpoint-wise
+multiplication
+I(cid:48) ∈ R7×7
+104 116 116 112
+58
+47
+47
+109 97 114 116 105 110 45 116 104 111 109 97 46 100 101 47 109 97 115 116 101 114 47 99 97 116 99 97 116 99 97 116 46 112 104
+112
+63
+118
+61 49 46 48 9 -3 -1 -6 5 3 2 -8 0 936 -333 -109 -282 545 291 94 -792 0 -4 -254 -498 -662
+-849
+-642
+187
+-520 45 240 211 388 215 -861 -340 559 -105 185 -138 -180 503 -718 429 350 173 251 268 -655 -567 -53 -75 80 571 -128 24 -408 596 -550 368 26 976 156
+302
+647
+879
+223 811 54 660
 Figure 2.1.: Visualization of the application of a linear k×k×1 image filter. For each pixel of the
 outputimage,k2 multiplicationsandk2 additionsoftheproductshavetobecalculated.
 3
@@ -265,7 +330,9 @@ outputimage,k2 multiplicationsandk2 additionsoftheproductshavetobecalculated.
 One important detail is how boundaries are treated. There are four common ways of
 boundary treatment:
 • don’t compute: The image I(cid:48) will be smaller than the original image. I(cid:48) ∈
-R(w−kw+1)×(h−k h+1)×d3, to be exact.
+R(w−kw+1)×(h−k
+h
++1)×d3 , to be exact.
 • zero padding: The image I is padded by zeros where the filter would access elements
 which do not exist. This will result in edges being detected at the border if the border
 pixels are not black, but doesn’t need any computation.
@@ -309,27 +376,50 @@ Traditional CNNs have three important building tools:
 Convolutional layers take several feature maps as input and produce n feature maps1 as
 output, where n is the number of filters in the convolution layer. The filter weights of
 the linear convolutions are the parameters which are adapted to the training data. The
-number n of filters as well as the filter’s size k ×k are hyperparameters of convolutional
-w h
-layers. Sometimes, it is denoted as n@k ×k . Although the filter depth is usually omitted
-w h
-in the notation, the filters are of dimension k ×k ×d(i−1), where d(i−1) is the number of
-w h
+number n of filters as well as the filter’s size k
+w
+×k
+h
+are hyperparameters of convolutional
+layers. Sometimes, it is denoted as n@k
+w
+×k
+h
+. Although the filter depth is usually omitted
+in the notation, the filters are of dimension k
+w
+×k
+h
+×d(i−1), where d(i−1) is the number of
 feature maps of the input layer (i−1).
-Another hyperparameter of convolution layers is the stride s ∈ N and the padding.
+Another hyperparameter of convolution layers is the stride s ∈ N
 ≥1
+and the padding.
 Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the
 size of the feature maps doesn’t change.
 The hyperparameters of convolutional layers are
-• the number of filters n ∈ N ,
+• the number of filters n ∈ N
+≥1
+,
+• k
+w
+,k
+h
+∈ N
 ≥1
-• k ,k ∈ N of the filter size k ×k ×d(i−1),
-w h ≥1 w h
+of the filter size k
+w
+×k
+h
+×d(i−1),
 • the activation function of the layer (see Table B.3) and
 • the stride s ∈ N
 ≥1
-Typical choices are n ∈ {32,64,128}, k = k = k ∈ {1,3,5,11} such as in [KSH12,
-w h
+Typical choices are n ∈ {32,64,128}, k
+w
+= k
+h
+= k ∈ {1,3,5,11} such as in [KSH12,
 SZ14, SLJ+15], rectified linear unit (ReLU) activation and s = 1.
 TheconceptofweightsharingiscrucialforCNNs. Thisconceptwasintroducedin[WHH+89].
 With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just
@@ -339,15 +429,41 @@ if only the flattened output is compared.
 5
 2. Convolutional Neural Networks
 This is easier to see when the filtering operation is denoted formally:
+o(i)(x) = b+
 k
 (cid:88)
-o(i)(x) = b+ w ·x with i ∈ {1,...,w}×{1,...,h}×{1,...,d} [2.1]
-ij j
 j=1
-(cid:98) (cid:88)k 2w(cid:99) (cid:98) (cid:88)k 2h(cid:99) (cid:88)d
-o(x,y,z)(I) = b+ F (i ,i ,i )·I(x+i ,y+i ,i ) [2.2]
-z x y c x y c
-ix=1−(cid:100)k 2w(cid:101)iy=1−(cid:100)k 2h(cid:101)ic=1
+w
+ij
+·x
+j
+with i ∈ {1,...,w}×{1,...,h}×{1,...,d} [2.1]
+o(x,y,z)(I) = b+
+(cid:98)kw 2 (cid:99) (cid:88)
+ix=1−(cid:100)kw
+2
+(cid:101)
+(cid:98) kh 2 (cid:99) (cid:88)
+iy=1−(cid:100) kh
+2
+(cid:101)
+d (cid:88)
+ic=1
+F
+z
+(i
+x
+,i
+y
+,i
+c
+)·I(x+i
+x
+,y+i
+y
+,i
+c
+) [2.2]
 with a bias b ∈ R, x ∈ {1,...,w}, y ∈ {1,...,h} and z ∈ {1,...,d}
 One can see that most weights of the equivalent MLP are zero and many weights are
 equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters.
@@ -356,36 +472,63 @@ estimations for those. This means a MLP which is able to compute the same functi
 CNN will likely have worse results on the same dataset, if a CNN architecture is suitable
 for the dataset.
 See Figure 2.2 for a visualization of the application of a convolutional layer.
+3 feature maps
+(e.g. RGB)
+n feature maps
 n filters of
 size k×k×3
-...
+wi
+d t
+h
+w
+wi
+d t
+h
+w
+h
+e i
+g
+h t
+h
+h
+e i
+g
+h t
+h
 neural
-...
 network
-...
+data
 apply
 ...
-h h
-w w
-data h ... h thgieh thgieh
-t t
-d d
-wi wi
 ...
-3 feature maps
-n feature maps
-(e.g. RGB)
+...
+...
+...
+...
 Figure 2.2.: Application of a single convolutional layer with n filters of size k×k×3 with stride
 s=1 to input data of size width×height with three channels.
 6
 2.2. CNN Layer Types
-A convolutional layer with n filters of size k ×k and SAME padding after d(i−1) feature
-w h
-maps of size s ×s has n·d(i−1)·(k ·k ) parameters if no bias is used. In contrast, a fully
-x y w h
+A convolutional layer with n filters of size k
+w
+×k
+h
+and SAME padding after d(i−1) feature
+maps of size s
+x
+×s
+y
+has n·d(i−1)·(k
+w
+·k
+h
+) parameters if no bias is used. In contrast, a fully
 connected layer which produces the same output size and does not use a bias would have
-n·d(i−1) ·(s ×s )2 parameters. This means a convolutional layer has drastically fewer
-x y
+n·d(i−1) ·(s
+x
+×s
+y
+)2 parameters. This means a convolutional layer has drastically fewer
 parameters. Onetheonehand, thismeansitcanlearnlesscomplexdecisionboundaries. On
 the other hand, it means fewer parameters have to be learned and hence the optimization
 procedure needs fewer examples and the optimization objective is simpler.
@@ -396,8 +539,11 @@ Another insight recently got important: Every fully connected layer has an equiv
 convolutional layer which has the same weights.2 This way, one can use the complete
 classification network as a very complex non-linear image filter which can be used for
 semantic segmentation.
-A fully connected layer with d ∈ N inputs and n ∈ N nodes can be interpreted as a
-≥1 ≥1
+A fully connected layer with d ∈ N
+≥1
+inputs and n ∈ N
+≥1
+nodes can be interpreted as a
 convolutional layer with an input of shape 1×1×d and n filters of size 1×1. This will
 produce an output shape 1×1×n. Every single output is connected to all of the inputs.
 When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize
@@ -407,8 +553,9 @@ omitted if a convolution layer without padding and a filter size equal to the fe
 size is applied. This was used by [LSD15].
 2.2.2. Pooling Layers
 Pooling summarizes a p×p area of the input feature map. Just like convolutional layers,
-pooling can be used with a stride of s ∈ N . As s ≥ 2 is the usual choice, pooling layers
+pooling can be used with a stride of s ∈ N
 >1
+. As s ≥ 2 is the usual choice, pooling layers
 are sometimes also called subsampling layers. Typically, p ∈ {2,3,4,5} and s = 2 such as
 for AlexNet [KSH12] and VGG-16 [SZ14].
 The type of summary for the set of activations A varies between the functions listed
@@ -419,47 +566,94 @@ functions as introduced in [LGT16].
 2. Convolutional Neural Networks
 Name Definition Used by
 Max pooling max{a ∈ A} [BPL10, KSH12]
-Average / mean pooling 1 (cid:80) a LeNet-5 [LBBH98] and [KSlB+10]
-|A| a∈A
-(cid:96) pooling (cid:112)(cid:80) a2 [Le13]
-2 a∈A
+Average / mean pooling 1
+|A|
+(cid:80)
+a∈A
+a LeNet-5 [LBBH98] and [KSlB+10]
+(cid:96)
+2
+pooling (cid:112)(cid:80)
+a∈A
+a2 [Le13]
 Stochastic pooling * [ZF13]
 Table 2.1.: Pooling types for a set A of activations a∈R.
-(*)Forstochasticpooling,eachofthep×pactivationvaluesa inthepoolingregiongets
+(*)Forstochasticpooling,eachofthep×pactivationvaluesa
+i
+inthepoolingregiongets
+pickedwithprobabilityp
 i
-pickedwithprobabilityp i = (cid:80) aja ∈i Aaj. Thisassumestheactivationsa i arenon-negative.
+= ai
+(cid:80)
+aj∈A
+aj
+. Thisassumestheactivationsa
+i
+arenon-negative.
 Pooling is applied for three reasons: To get local translational invariance, to get invariance
-against minor local changes and, most important, for data reduction to 1 th of the data by
+against minor local changes and, most important, for data reduction to 1
 s2
+th of the data by
 using strides of s > 1.
 See Figure 2.3 for a visualization of max pooling.
-2
+7 9 3 5 9 4
+0 7 0 0 9 0
+5 0 9 3 7 5
 9 2 9 6 4 3
+2×2 max pooling
+9 5 9
+9 9 7
+2
 2
-5 0 9 3 7 5 2×2 max pooling 9 9 7
-0 7 0 0 9 0 9 5 9
-7 9 3 5 9 4
 Figure 2.3.: 2×2 max pooling applied to a feature map of size 6×4 with stride s=2 and padding.
 Average pooling of p×p areas with stride s can be replaced by a convolutional layer. If
 the input of the pooling layer are d(i−1) feature maps, the convolutional layer has to have
 d(i−1) filters of size p×p and stride s. The ith filter has the values
- 
-1 ... 1
-p2 p2
- . . . 
-. . .
- . . . 
- 
-1 ... 1
-p2 p2
+
+
+
+
+1
+p2
+... 1
+p2
+.
+.
+.
+.
+.
+.
+.
+.
+.
+1
+p2
+... 1
+p2
+
+
+
+
 for the dimension i and the zero matrix
- 
+
+
+
+
 0 ... 0
-. . .
-. . .
-. . .
- 
+.
+.
+.
+.
+.
+.
+.
+.
+.
 0 ... 0
+
+
+
+
 for all other dimensions i = 1,...,d(i−1).
 8
 2.2. CNN Layer Types
@@ -468,20 +662,27 @@ Dropout is a technique used to prevent overfitting and co-adaptations of neurons
 the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is
 well-described in [SHK+14].
 A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of
-the same shape D ∈ {0,1}s is sampled, where each element d is sampled independently
+the same shape D ∈ {0,1}s is sampled, where each element d
 i
+is sampled independently
 from a Bernoulli distribution. The results are element-wise multiplied to calculate the
 output out of the Dropout layer:
-out = D(cid:12)in with d ∼ B(1,p)
+out = D(cid:12)in with d
 i
+∼ B(1,p)
 where (cid:12) is the Hadamard product
-(A(cid:12)B) := (A) (B)
-i,j i,j i,j
+(A(cid:12)B)
+i,j
+:= (A)
+i,j
+(B)
+i,j
 Hence every value of the input gets set to zero with a dropout probability of p. Typically,
 Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob-
 ability than later layers. In order to keep the expected output at the same value, the
-output of a dropout layer is multiplied with 1 when dropout is enabled [Las17, tf-16b].
+output of a dropout layer is multiplied with 1
 1−p
+when dropout is enabled [Las17, tf-16b].
 At inference time, dropout is disabled.
 Dropout is usually only applied after fully connected layers, but not after convolutional
 layers as it usually increases the test error as pointed out in [GG16].
@@ -504,25 +705,39 @@ input features might drastically change over time.
 One way to approach this problem is by normalizing mini-batches as described in [IS15]. A
 Batch Normalization layer with d-dimensional input x = (x(1),...,x(d)) is first normalized
 point-wise to
-x(k)−x¯(k)
-xˆ(k) =
+ˆ x(k) =
+x(k)− ¯ x(k)
 (cid:112)
 s(cid:48)[x(k)]2+ε
-with x¯(k) = 1 (cid:80)m x(k) being the sample mean and s(cid:48)[x(k)]2 = 1 (cid:80)m (x(k) −x¯(k)) the
-m i=1 i m i=1 i
-sample variance where m ∈ N is the number of training samples per mini-batch, ε > 0
+with ¯ x(k) = 1
+m
+(cid:80)m
+i=1
+x (k)
+i
+being the sample mean and s(cid:48)[x(k)]2 = 1
+m
+(cid:80)m
+i=1
+(x (k)
+i
+− ¯ x(k)) the
+sample variance where m ∈ N
 ≥1
-being a small constant to prevent division by zero and x(k) is the activation of neuron k for
+is the number of training samples per mini-batch, ε > 0
+being a small constant to prevent division by zero and x (k)
 i
+is the activation of neuron k for
 training sample i.
 Additionally, for each activation x(k) two parameters γ(k),β(k) are introduced which scale
 and shift the feature:
-y(k) = γ(k)·xˆ(k)+β(k)
+y(k) = γ(k)· ˆ x(k)+β(k)
 Inthecaseoffullyconnectedlayers, thisisappliedtotheactivation, beforethenon-linearity
 is applied. If it is applied after the activation, it harms the training in early stages. For
 convolution, only one γ and one β is learned per feature map.
+One important special case is γ(k) =
 (cid:112)
-One important special case is γ(k) = s(cid:48)[x(k)]2+ε and β(k) = x¯(k), which would make the
+s(cid:48)[x(k)]2+ε and β(k) = ¯ x(k), which would make the
 Batch Normalization layer an identity layer.
 During evaluation time,3 the expected value and the variance are calculated once for the
 complete dataset. An unbiased estimate of the empirical variance is used.
@@ -537,8 +752,9 @@ The authors of [IS15] suggest to use Batch Normalization before the activation f
 as in Items 1 and 4. Batch Normalization after the activation lead to better results in
 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md
 Another normalization layer is Local Response Normalization as described in [KSH12],
-which includes (cid:96) normalization as described in [WWQ13]. Those two normalization layers,
+which includes (cid:96)
 2
+normalization as described in [WWQ13]. Those two normalization layers,
 however, are superseded by Batch Normalization.
 3also called inference time
 10
@@ -554,19 +770,35 @@ as introduced in [HZRS15a] is to add an identity connection which skips two laye
 identity connection adds the feature maps onto the other feature maps and thus requires
 the output of the input layer of the residual block to be of the same dimension as last layer
 of the residual block.
-Formally, it can be described as follows. If x are the feature maps after layer i and x is
-i 0
+Formally, it can be described as follows. If x
+i
+are the feature maps after layer i and x
+0
+is
 the input image, H is a non-linear transformation of feature maps, then
 y = H(x)
 describes a traditional CNN. Note that this could be multiple layers. A residual block as
 visualized in Figure 2.4 is described by
 y = H(x)+x
 In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if
-conv (x ) describes the application of the convolutional layer i to the input x without the
-i i i
+conv
+i
+(x
+i
+) describes the application of the convolutional layer i to the input x
+i
+without the
 nonlinearity, then such a residual block is
-x = conv (ReLU(conv (x )))+x
-i+2 i+1 i i i
+x
+i+2
+= conv
+i+1
+(ReLU(conv
+i
+(x
+i
+)))+x
+i
 Figure 2.4.: ResNet module
 Image source: [HZRS15a]
 [HM16] provides some insights why deep residual networks are successful.
@@ -576,18 +808,24 @@ Image source: [HZRS15a]
 Two common ways to add more parameters to neural networks are increasing their depth
 by adding more layers or increasing their width by adding more neurons / filters. Inception
 blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as
-“ResNeXt block”: Increasing the cardinality C ∈ N . By cardinality, the authors describe
+“ResNeXt block”: Increasing the cardinality C ∈ N
 ≥1
+. By cardinality, the authors describe
 the concept of having C small convolutional networks with the same topology but different
 weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not
 combine aggregation blocks with residual blocks as the authors did.
 256-d in
-4 @ 1×1×256 4 @ 1×1×256 total 32 4 @ 1×1×256
+concatenate
+total 32
 groups
 ...
-4 @ 3×3×4 4 @ 3×3×4 4 @ 3×3×4
-concatenate
 128-d out
+4 @ 1×1×256
+4 @ 3×3×4
+4 @ 1×1×256
+4 @ 3×3×4
+4 @ 1×1×256
+4 @ 3×3×4
 Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer
 convolutional network. The first layer receives 256 feature maps and applies four 1×1
 filters to it. The second layer applies four 3×3 filters. Although every group has
@@ -595,8 +833,9 @@ the same topology, the learned weights are different. The outputs of the groups
 concatenated.
 The hyperparameters of an aggregation block are:
 • The topology of the group members.
-• The cardinality C ∈ N . Note that a cardinality of C = 1 is equivalent in every
+• The cardinality C ∈ N
 ≥1
+. Note that a cardinality of C = 1 is equivalent in every
 aspect to using the group network without an aggregation block.
 12
 2.3. CNN Blocks
@@ -604,28 +843,36 @@ aspect to using the group network without an aggregation block.
 Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The
 idea is to connect each convolutional layer directly to subsequent convolutional layers.
 Traditional CNNs with L layers and one input layer have L connections between layers,
-but dense blocks have L(L+1) connections between layers. The input feature maps are
+but dense blocks have L(L+1)
 2
+connections between layers. The input feature maps are
 concatenated in depth. According to the authors, this prevents features from being re-
 learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16
 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors
 used only on the order of 12 feature maps per layer.
 A dense block is visualized in Figure 2.6.
 256-d in
-256-d
 k @ 3×3
-k-d
 concatenate
-(256+k)-d
 k @ 3×3
-k-d
 concatenate
+256-d
+k-d
+(256+k)-d
+k-d
 (256+L·k)-d out
 Figure 2.6.: Dense block with L=2 layers and a growth factor of k.
 Dense block have five hyperparameters:
 • The activation function being used. The authors use ReLU.
-• The size k ×k of filters. The authors use k = k = 3.
-w h w h
+• The size k
+w
+×k
+h
+of filters. The authors use k
+w
+= k
+h
+= 3.
 • The number of layers L, where L = 2 is a simple convolutional layer.
 • The number k of filters added per layer (called growth rate in the paper)
 It might be necessary use 1×1 convolutions to reduce the number of L·k feature maps.
@@ -635,10 +882,14 @@ It might be necessary use 1×1 convolutions to reduce the number of L·k feature
 Transition layers are used to overcome constraints imposed by resource limitations or
 architecturaldesignchoices. Oneconstraintisthenumberoffeaturemaps(seeAppendixC.3
 for details). In order to reduce the number of feature maps while still keeping as much
-relevant information as possible in the network, a convolutional layer i with k filters of
+relevant information as possible in the network, a convolutional layer i with k
+i
+filters of
+the shape 1×1×k
+i−1
+is added. The number of filters k
 i
-the shape 1×1×k is added. The number of filters k directly controls the number of
-i−1 i
+directly controls the number of
 generated feature maps.
 In order to reduce the dimensionality (width and height) of the feature maps, one typically
 applies pooling.
@@ -680,45 +931,81 @@ are not covered by the training set and thus indicate the need to collect more d
 15
 2. Convolutional Neural Networks
 2.5.2. Confusion Matrices
-A confusion matrix is a matrix (c) ∈ NK×K, where K ∈ N is the number of classes,
-ij ≥0 ≥2
-which contains all correct and wrong classifications. The item c is the number of times
+A confusion matrix is a matrix (c)
+ij
+∈ NK×K
+≥0
+, where K ∈ N
+≥2
+is the number of classes,
+which contains all correct and wrong classifications. The item c
 ij
+is the number of times
 items of class i were classified as class j. This means the correct classification is on the
-diagonal c and all wrong classifications are of the diagonal. The sum (cid:80)K (cid:80)K c is the
-ii i=1 j=1 ij
-total number of samples which were evaluated and (cid:80) i=1cii is the accuracy.
-(cid:80)K i=1(cid:80)K j=1cij
-The sums r(i) = (cid:80)K c of each class i are worth being investigated as they show if the
-j=1 ij
+diagonal c
+ii
+and all wrong classifications are of the diagonal. The sum (cid:80)K
+i=1
+(cid:80)K
+j=1
+c
+ij
+is the
+total number of samples which were evaluated and (cid:80) i=1 cii
+(cid:80)K
+i=1
+(cid:80)K
+j=1
+cij
+is the accuracy.
+The sums r(i) = (cid:80)K
+j=1
+c
+ij
+of each class i are worth being investigated as they show if the
 classes are skewed. If the number of samples of one class dominates the data set, then the
 classifier can get a high accuracy by simply always prediction the most common class. If
 the accuracy of the classifier is close to the a priory probability of the most common class,
 techniques to deal with skewed classes might help.
 An automatic criterion to check for this problem is
+accuracy ≤
 max({r(i) | i = 1,...,k})
-accuracy ≤ +ε
 (cid:80)k
-r(i)
 i=1
+r(i)
++ε
 where ε is a small value to compensate the fact that some examples might be correct just
 by chance.
 Other values which should be checked are the class-wise sensitivities:
-# correctly identified instances of class k c
+s(k) =
+# correctly identified instances of class k
+# instances of class k
+=
+c
 kk
-s(k) = = ∈ [0,1]
-# instances of class k r(k)
+r(k)
+∈ [0,1]
 If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is
 necessary for s(i).
 The class-wise confusion
+f
+confusability
+(k
+1
+,k
+2
+) =
 c
 k1k2
-f (k ,k ) =
-confusability 1 2 (cid:80)K
+(cid:80)K
+j=1
 c
-j=1 k1j
-indicates if class k gets often classified as class k . The highest values here can indicate
-1 2
+k1j
+indicates if class k
+1
+gets often classified as class k
+2
+. The highest values here can indicate
 if two classes should be merged or a specialized model for separating those classes could
 improve the overall system.
 2.5.3. Validation Curves: Accuracy, loss and other metrics
@@ -731,15 +1018,16 @@ validation curves give an indicator if training longer improves the model’s pe
 2.5. Analysis Techniques
 plotting the error on the training set as well as the error on a validation set, one can also
 estimate if overfitting might become a problem. See Figure 2.7 for an example.
+10 20 30 40 50 60 70 80 90 100
+0.2
+0.4
+0.6
+0.8
+overfitting
+Epochs
 Error
 Training set
 Validation set
-0.8
-0.6
-0.4
-0.2 overfitting
-Epochs
-10 20 30 40 50 60 70 80 90 100
 Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs
 and the quality metric is the error (1−accuracy). The longer the network is trained,
 the better it gets on the training set. At some point the network is fit too well to the
@@ -754,22 +1042,60 @@ Loss functions
 The loss function (also called error function or cost function) is a function which assigns a
 real value to a complex event like the predicted class of a feature vector. It is used to define
 the objective function. For classification problems the loss function is typically cross-entropy
-with (cid:96) or (cid:96) regularization, as it was described in [NH92]:
-1 2
-(cid:96)1 (cid:96)2
-K (cid:122) (cid:125)(cid:124) (cid:123) (cid:122) (cid:125)(cid:124) (cid:123)
-(cid:88)(cid:88) (cid:88) (cid:88)
-E (W) = − [txlog(ox)+(1−tx)log(1−ox)]+λ · |w|+λ · w2
-CE k k k k 1 2
-x∈Xk=1 w∈W w∈W
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-cross-entropydataloss modelcomplexityloss
-where W are the weights, X is the training data set, K ∈ N 0 is the number of classes and
+with (cid:96)
+1
+or (cid:96)
+2
+regularization, as it was described in [NH92]:
+E
+CE
+(W) = −
+(cid:88)
+x∈X
+K
+(cid:88)
+k=1
+[tx
+k
+log(ox
+k
+)+(1−tx
+k
+)log(1−ox
+k
+)]
+(cid:124) (cid:123)(cid:122) (cid:125)
+cross-entropydataloss
++λ
+1
+·
+(cid:96)1
+(cid:122) (cid:125)(cid:124) (cid:123)
+(cid:88)
+w∈W
+|w|+λ
+2
+·
+(cid:96)2
+(cid:122) (cid:125)(cid:124) (cid:123)
+(cid:88)
+w∈W
+w2
+(cid:124) (cid:123)(cid:122) (cid:125)
+modelcomplexityloss
+where W are the weights, X is the training data set, K ∈ N
 ≥
-tx indicates if the training example x is of class k. ox is the output of the classification
-k k
-algorithm which depends on the weights. λ ,λ ∈ [0,∞) weights the regularization and is
-1 2
+0 is the number of classes and
+tx
+k
+indicates if the training example x is of class k. ox
+k
+is the output of the classification
+algorithm which depends on the weights. λ
+1
+,λ
+2
+∈ [0,∞) weights the regularization and is
 typically smaller than 0.1.
 17
 2. Convolutional Neural Networks
@@ -797,29 +1123,45 @@ tion might be bad.
 2.5. Analysis Techniques
 Quality criteria
 There are several quality criteria for classification models. Most quality criteria are based
-the confusion matrix c which denotes at c the number of times the real class was i and j
+the confusion matrix c which denotes at c
 ij
+the number of times the real class was i and j
 was predicted. This means the diagonal contains the number of correct predictions. For
-the following, let t = (cid:80)k c be the number of training samples for class i. The most
-i j=1 ij
+the following, let t
+i
+= (cid:80)k
+j=1
+c
+ij
+be the number of training samples for class i. The most
 common quality criterion is accuracy:
+accuracy(c) =
 (cid:80)k
+i=1
 c
-accuracy(c) = i=1 ii ∈ [0,1]
+ii
 (cid:80)k
+i=1
 t
-i=1 i
+i
+∈ [0,1]
 One problem of accuracy as a quality criterion are skewed classes. If one class is by far
 more common than all other classes, then the simplest way to achieve a high score is to
 always classify everything as the most common class.
 In order to fix this problem, one can use the mean accuracy:
+mean-accuracy(c) = 1
 k
-mean-accuracy(c) = 1 ·(cid:88) c ii ∈ [0,1]
-k t
-i
+·
+k
+(cid:88)
 i=1
-For two-class problems there are many other metrics like precision, recall and F -score.
+c ii
+t
+i
+∈ [0,1]
+For two-class problems there are many other metrics like precision, recall and F
 β
+-score.
 Quality criteria for semantic segmentation are explained in [Tho16].
 Besides the quality of the classification result, several other quality criteria are important
 in practice:
@@ -850,16 +1192,17 @@ not help. Instead, the model or the training algorithm need to be adjusted.
 If the training set’s learning curve is significantly higher than the validation set’s learning
 curve, then removing features (e.g., by decreasing the images resolution), more training
 samples or more regularization will help.
-Error
-Validation set
-Training set
+10 20 30 40 50 60 70 80 90 100
+0.2
+0.4
 0.6
-0.4 variance
 avoidable bias
+variance
 human-level error
-0.2
 Training samples
-10 20 30 40 50 60 70 80 90 100
+Error
+Validation set
+Training set
 Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given
 architecture will make to fit the given training data. At the same time, it is expected
 that the training data gets more similar to the true distribution of the data which
@@ -895,20 +1238,32 @@ image by something. This could be a gray square as in [ZF14] or a black superpix
 in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g.,
 superpixel or position of the square) and the regions are then colored to generate either a
 correct class heatmap of the most-likely class image. It is important to note that the color
-at region r denotes the result if r is occluded.
-i i
+at region r
+i
+denotes the result if r
+i
+is occluded.
 Both visualizations are shown in Figure 2.10. One can see that the network makes sensible
 predictionsforthisimageoftheclass“Pomeranian”. However, theimageoftheclass“Afghan
 Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation.
 Gradient-based approaches
 In [SVZ13], a gradient-based approach was used to generate image-specific class saliency
 maps. The authors describe the problem as a ranking problem, where each pixel of the
-image I is assigned a score S (I ) for a class c of interest. CNNs are non-linear functions,
-0 c 0
-but they can be approximated by the first order Taylor expansion S (I) ≈ wTI +b where
+image I
+0
+is assigned a score S
 c
-w is the derivative of S at I .
-c 0
+(I
+0
+) for a class c of interest. CNNs are non-linear functions,
+but they can be approximated by the first order Taylor expansion S
+c
+(I) ≈ wTI +b where
+w is the derivative of S
+c
+at I
+0
+.
 21
 2. Convolutional Neural Networks
 2.5.6. Argmax Method
@@ -950,31 +1305,58 @@ If the set of learned filters changes with initialization, this might be an indi
 little capacity of that layer. Hence adding more filters to that layer could improve the
 performance.
 Filters can be compared with the k-translation correlation as introduced in [ZCZL16]:
-(cid:104)W ,T(W ,x,y)(cid:105)
-i j f
-ρ (W ,W ) = max ∈ [−1,1],
-k i j
-(x,y)∈{−k,...,k}2\(0,0) (cid:107)W i(cid:107) 2(cid:107)W j(cid:107)
-2
+ρ
+k
+(W
+i
+,W
+j
+) = max
+(x,y)∈{−k,...,k}2\(0,0)
+(cid:104)W
+i
+,T(W
+j
+,x,y)(cid:105)
+f
+(cid:107)W i (cid:107) 2 (cid:107)W j (cid:107) 2
+∈ [−1,1],
 where T(·,x,y) denotes the translation of the first operand by (x,y), with zero padding at
-the borders to keep the shape. (cid:104)·,·(cid:105) denotes the flattened inner product, where the two
+the borders to keep the shape. (cid:104)·,·(cid:105)
 f
+denotes the flattened inner product, where the two
 operands are flattened into column vectors before applying the standard inner product. The
 closer the absolute value of the k-translation correlation to one, the more similar two filters
-W ,W are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and
-i j
+W
+i
+,W
+j
+are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and
 VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found
 this by comparing the averaged maximum k-translational correlation of the networks with
 Gaussian-distributed initialized filters. The averaged maximum k-translational correlation
 is defined as
+¯ ρ
+k
+(W) =
+1
+N
 N
-1 (cid:88) N
-ρ¯ (W) = max ρ (W ,W )
-k k i j
-N j=1,j(cid:54)=i
+(cid:88)
 i=1
-where N is the number of filters in the layer W and W denotes the ith filter.
+N
+max
+j=1,j(cid:54)=i
+ρ
+k
+(W
 i
+,W
+j
+)
+where N is the number of filters in the layer W and W
+i
+denotes the ith filter.
 2.5.9. Weight update tracking
 Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if
 the learning rate is well-chosen. He suggests that the weight update should be in the order
@@ -1046,17 +1428,30 @@ connected to the output nodes.
 3. Topology Learning
 4. Correlation Maximization: Train the weights of the candidates by maximizing S,
 the correlation between candidates output value V with the networks residual error:
+S = (cid:88)
+o∈O
+(cid:12)
+(cid:12)
 (cid:12) (cid:12)
+(cid:12)
+(cid:12)
+(cid:88)
+p∈T
+(cid:0) V p − ¯ V (cid:1) (E p,o − ¯ E o )
+(cid:12)
+(cid:12)
 (cid:12) (cid:12)
-S = (cid:88)(cid:12) (cid:12)(cid:88)(cid:0) V p−V¯(cid:1) (E p,o−E¯ o)(cid:12) (cid:12)
-(cid:12) (cid:12)
-o∈O(cid:12)p∈T (cid:12)
-where O is the set of output nodes, T is the training set, V is the candidate neurons
+(cid:12)
+(cid:12)
+where O is the set of output nodes, T is the training set, V
 p
-activation for a training pattern p. E is the residual output error at node o for
+is the candidate neurons
+activation for a training pattern p. E
 p,o
-pattern p. V¯ and E¯ are averaged values over all elements of T. This step is finished
+is the residual output error at node o for
+pattern p. ¯ V and ¯ E
 o
+are averaged values over all elements of T. This step is finished
 when the correlation no longer increases.
 5. Candidate selection: Keep the candidate node with the highest correlation, freeze
 its incoming weights and add connections to the output nodes.
@@ -1069,35 +1464,65 @@ right corner. The black squares represent frozen weights which are found by corr
 maximization whereas the white squares are trainable weights.
 3.1.2. Meiosis Networks
 Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where
-weights are deterministic and fixed at prediction time, each weight w in Meiosis networks
+weights are deterministic and fixed at prediction time, each weight w
 ij
+in Meiosis networks
 follows a normal distribution:
-w ∼ N(µ ,σ2)
-ij ij ij
+w
+ij
+∼ N(µ
+ij
+,σ2
+ij
+)
 28
 3.2. Pruning approaches
-Hence every connection has two learned parameters: µ and σ2.
-ij ij
+Hence every connection has two learned parameters: µ
+ij
+and σ2
+ij
+.
 The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell
 division. A node j is splitted, when the random part dominates the value of the sampled
 weights:
-(cid:80) (cid:80)
-σ σ
-i ij > 1 and k jk > 1
-(cid:80) (cid:80)
-µ µ
-i ij k jk
+(cid:80)
+i
+σ
+ij
+(cid:80)
+i
+µ
+ij
+> 1 and
+(cid:80)
+k
+σ
+jk
+(cid:80)
+k
+µ
+jk
+> 1
 The mean of the new nodes is sampled around the old mean, half the variance is assigned
 to the new connections.
 Hence Meiosis networks only change the number of neurons per layer. They do not add
 layers or add skip connections.
 3.1.3. Automatic Structure Optimization
 Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on-
-line handwriting recognition. It makes use of the confusion matrix C = (c ) ∈ Nk×k
-ij ≥0
+line handwriting recognition. It makes use of the confusion matrix C = (c
+ij
+) ∈ Nk×k
+≥0
 (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix
-S with s j = s i = c ·c . The maximum of S defines where the ASO algorithm adds
-i j ij ji
+S with s
+i
+j = s
+j
+i = c
+ij
+·c
+ji
+. The maximum of S defines where the ASO algorithm adds
 more parameters. The details how the resources are added are not transferable to CNNs.
 3.2. Pruning approaches
 Pruning approaches start with a network which is bigger than necessary and prune it. The
@@ -1115,18 +1540,21 @@ Damage (OBD) as introduced in [LDS+89]. For every single parameter k, OBD calcul
 the effect on the objective function of deleting k. The authors call the effect of the deletion
 29
 3. Topology Learning
-of parameter k the saliency s . The parameters with the lowest saliency are deleted, which
+of parameter k the saliency s
 k
+. The parameters with the lowest saliency are deleted, which
 means they are set to 0 and are not updated anymore.
 A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights
 in a much better way. This requires, however, to calculate the inverse Hessian matrix
 H−1 ∈ Rn×n where n ∈ N is typically n > 106.
 A much simpler and computationally cheaper pruning criterion is the weight magnitude.
 [HPTD15] prunes all weights w which are below a threshold θ:
-
-w if w ≥ θ
 w ←
-0 otherwise
+
+
+
+w if w ≥ θ
+0 otherwise
 3.3. Genetic approaches
 The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which
 can recombine themselves via crossover and inversion. An introduction to such algorithms
@@ -1186,15 +1614,18 @@ One idea to approach this problem is by building a hierarchy of classifiers. The
 classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single
 classes. Figure 4.1 gives an example for an hierarchy of classifiers.
 Figure 4.1.: Exampleforahierarchyofclassifiers. Eachclassifierisvisualizedbyaroundedrectangle.
-The root classifier C has to distinguish six coarse classes (pedestrian, four+-wheelers,
+The root classifier C
 0
-traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C predicts a
+has to distinguish six coarse classes (pedestrian, four+-wheelers,
+traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C
 0
+predicts a
 pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C
 0
 predicts traffic sign, then another classifier has to predict if it is a speed limit, a
-sign indicating danger or something else. If C , however, predicts road, then no other
+sign indicating danger or something else. If C
 0
+, however, predicts road, then no other
 classifier will become active.
 In this example, the problem has 17 classes. The hierarchical approach introduces
 7 clusters of classes and thus uses 8 classifiers.
@@ -1210,8 +1641,9 @@ children. Siblings are not affected. In the example from Figure 4.1, the classif
 which distinguishes traffic signs can be changed while the classification as pedestrian,
 four+-wheelers, traffic sign, street, other will not be affected. Also, the
 classification between speed limits, danger signs and other signs will not change.
-• Faster training: Except for the root classifier C , each other classifier will have
+• Faster training: Except for the root classifier C
 0
+, each other classifier will have
 less than the total amount of training data. Depending on the combined classes, the
 models could also be simpler. Hence the training time is reduced.
 • Weighting of errors: In practice, some errors are more severe than others. For
@@ -1234,8 +1666,9 @@ can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS
 OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral
 clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of
 them do not allow a human to improve the found clustering manually.
-The confusion matrix (c) ∈ Nk×k states how often class i was present and class j was
+The confusion matrix (c)
 ij
+∈ Nk×k states how often class i was present and class j was
 34
 4.2. Clustering classes
 predicted. The more often this confusion happens, the more similar those two classes are to
@@ -1252,11 +1685,16 @@ have to be swapped to in order to keep the same confusion matrix.
 • If two classes are confused often, then they are similar to the classifier.
 Hence the order of the classes is permutated in such a way that the highest errors are close
 to the diagonal. One possible objective function to be minimized is
-n n
-(cid:88)(cid:88)
-f(C) = C ·|i−j| [4.1]
+f(C) =
+n
+(cid:88)
+i=1
+n
+(cid:88)
+j=1
+C
 ij
-i=1 j=1
+·|i−j| [4.1]
 which punishes errors linearly with the distance to the diagonal. This method is called CMO
 in the following.
 As pointed out by Tobias Ribizel (personal communication), this optimization problem
@@ -1286,8 +1724,11 @@ Those will be moved to the corners of the confusion matrix by optimizing Equatio
 Once a permutation of the classes is found which has a low score Equation (4.1), the clusters
 can either be made by hand by deciding why classes should not be in one clusters. With
 such a permutation, only n−1 binary decisions have to be made and hence only the list of
-classes has to be read. Alternatively, one can calculate the confusions C(cid:48) +C(cid:48) for
-i,i+1 i+1,i
+classes has to be read. Alternatively, one can calculate the confusions C(cid:48)
+i,i+1
++C(cid:48)
+i+1,i
+for
 each pair of classes which are neighbors in the confusion matrix. The higher this value, the
 more similar are the classes according to the classifier. Hence a threshold θ can be applied.
 θ can either be set automatically (e.g., such that 10% of all pairs are above the threshold)
@@ -1369,8 +1810,9 @@ Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsamp
 32px×32px.
 38
 5.1. Baseline Model and Training setup
-# Type Filters @ Parameters FLOPs Output size
+# Type Filters @
 Patch size / stride
+Parameters FLOPs Output size
 Input 0 0 3@32× 32
 1 Convolution 32@3×3×3 /1 896 1736704 32@32× 32
 2 BN + ELU 64 163904 32@32× 32
@@ -1394,19 +1836,43 @@ Dropout 0.5 0 0 512@ 1× 1
 15 Convolution k @1×1×512/1 k·(512+1) 1024·k k @ 1× 1
 Global avg Pooling 1×1 0 k k @ 1× 1
 16 BN + Softmax 2k 7k k @ 1× 1
-(cid:80) 515k 1032k 103424+2k
-+892512 +55729664
+(cid:80) 515k
++892512
+1032k
++55729664
+103424+2k
 Table 5.1.: Baseline architecture with 3 input channels of size 32×32. All convolutional layers
 use SAME padding, except for layer 11 which used VALID padding in order to decrease
 the feature map size to 1×1. If the input feature map is bigger than 32×32, for
 eachpoweroftwotherearetwoConvolution + BN + ELUblocksandoneMax pooling
 block added. This is the framed part in the table.
-Input maxpooling2×2/2 maxpooling2×2/2 maxpooling2×2/2 C512@1×1/1
-32×32 16×16 8×8 4×4 1×1
-C32@3×3/1 C64@3×3/1 C64@3×3/1 C512@4×4/1(V) BN+ELU
-BN+ELU BN+ELU BN+ELU BN+ELU Dropout,p=0.5
-C32@3×3/1 C64@3×3/1 Dropout,p=0.5 Ck@1×1/1
-BN+ELU BN+ELU GlobalAVGpooling
+32×32
+Input
+C32@3×3/1
+BN+ELU
+C32@3×3/1
+BN+ELU
+16×16
+maxpooling2×2/2
+C64@3×3/1
+BN+ELU
+C64@3×3/1
+BN+ELU
+8×8
+maxpooling2×2/2
+C64@3×3/1
+BN+ELU
+4×4
+maxpooling2×2/2
+C512@4×4/1(V)
+BN+ELU
+Dropout,p=0.5
+1×1
+C512@1×1/1
+BN+ELU
+Dropout,p=0.5
+Ck@1×1/1
+GlobalAVGpooling
 BN+Softmax
 Figure 5.1.: Architectureofthebaselinemodel. C 32@3×3/1isaconvolutionallayerwith32filters
 of kernel size 3×3 with stride 1.
@@ -1415,8 +1881,8 @@ of kernel size 3×3 with stride 1.
 5.1.1. Baseline Evaluation
 The results for the baseline model evaluated on eight datasets are given in Table 5.2. The
 speed for inference for different GPUs is given in Table 5.3.
-Single Model Accuracy Ensemble of 10
 Dataset
+Single Model Accuracy Ensemble of 10
 Training Set Test Set Training Set Test Set
 Asirra 94.22% σ = 3.49 94.37% σ = 3.47 97.07% 97.37%
 CIFAR-10 91.23% σ = 1.10 85.84% σ = 0.87 92.36% 86.75%
@@ -1431,8 +1897,8 @@ used in the ensemble. The empirical standard deviation σ of the accuracy is als
 CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
 models uses unlabeled data or data from other datasets. For HASYv2 no test time
 transformations are used.
-Inference per Training
 Network GPU Tensorflow
+Inference per Training
 1 Image 128 images time / epoch
 Baseline Default Intel i7-4930K 3ms 244ms 231.0s
 Baseline Optimized Intel i7-4930K 2ms 143ms 149.0s
@@ -1516,31 +1982,82 @@ training. The image might lead to the wrong conclusion that models which are bet
 the start are also better at the end. In order to check this hypothesis, the relative order of
 validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering
 stays approximately the same, then it can be considered to run the first few epochs many
-times and only train the best models to the end. For 10 models, there can be 102−10 = 45
+times and only train the best models to the end. For 10 models, there can be 102−10
 2
+= 45
 pair-wise changes in the ordering at maximum if the relative order of validation accuracies
 is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred
 in average for each pair of epochs (i,i+1). This means if one knows only the relative order
 of the validation accuracy of two models m and m(cid:48) in epoch i, it is doubtful if one can
 make any statement about the ordering of m and m(cid:48) in epoch i+1.
+0
+1
+0
+2
+0
+3
+0
+4
+0
+5
+0
+6
+0
+7
+0
+8
+0
+9
+0
+1
+0
+0
+1
+1
+0
+1
+2
+0
+1
+3
+0
+1
+4
+0
+0.2
+0.3
+0.4
+0.5
+0.6
 0.7
+epoch
+v
+ali
+d a
+ti
+o
+n
+a c
+c
+u
+r
+a
+c y
+maximum validation accuracy
+minimum validation accuracy
 1.5
-0.6 2
-2.5 ycarucca
-0.5
-3 ssol
-noitadilav
-0.4
+2
+2.5
+3
 3.5
-0.3 maximum validation accuracy 4
-mmainxiimmuummvvaalliiddaattiioonnaaccccuurraaccyy
-minimummvaelaidnaltoiossn accuracy
+4
 4.5
-0.2
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-1 2 3 4 5 6 7 8 9 0 1 2 3 4
-1 1 1 1 1
-epoch
+l
+o s
+s
+maximum validation accuracy
+minimum validation accuracy
+mean loss
 Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The
 differences do not exceed 1% and does not increase by training epoch. Four models
 stopped the first training stage at epoch 133 which causes the shift in the loss and the
@@ -1598,8 +2115,8 @@ to Equation (4.1). The diagonal elements are set to 0 in order to make other ele
 easier to see. The symbols next to the label on the vertical axis indicate the shape
 and the color of the signs.
 The second image shows the same, but with baseline model.
-49
 Best viewed in electronic form.
+49
 Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal
 elements are set to 0 in order to make other elements easier to see. The top image
 shows arbitrary class ordering, the bottom image shows the optimized ordering.
@@ -1624,17 +2141,28 @@ based on CMO as described in Section 4.2.
 51
 5. Experimental Evaluation
 Cluster Spectral clustering Errors CMO Errors
-fish aquarium fish, orchid + flatfish 5 aquarium fish, orchid + flatfish 4
-+ ray, shark + trout, lion + ray + shark, trout
-flowers orchid, aquarium fish + sun- 5 orchid, aquarium fish + sun- 2
-flower + poppy, tulip + rose, flower, poppy, tulip, rose
+fish aquarium fish, orchid + flatfish
++ ray, shark + trout, lion
+5 aquarium fish, orchid + flatfish
++ ray + shark, trout
+4
+flowers orchid, aquarium fish + sun-
+flower + poppy, tulip + rose,
 train
+5 orchid, aquarium fish + sun-
+flower, poppy, tulip, rose
+2
 people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0
-reptiles crocodile, plain, road, table, 9 crocodile, lizard, lobster, cater- 6
-wardrobe + dinosaur + lizard pillar+dinosaur+snake+tur-
-+ snake, worm + turtle tle, crab
-trees maple,oak,pine+willow,forest 3 palm, willow, pine, maple, oak 0
+reptiles crocodile, plain, road, table,
+wardrobe + dinosaur + lizard
++ snake, worm + turtle
+9 crocodile, lizard, lobster, cater-
+pillar+dinosaur+snake+tur-
+tle, crab
+6
+trees maple,oak,pine+willow,forest
 + palm
+3 palm, willow, pine, maple, oak 0
 Total 24 12
 Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by ,
 whereas clusters are separated by +.
@@ -1650,8 +2178,12 @@ K K, κ 0 K, κ 0
 L L, (cid:98) and L, L 1 L, (cid:98) and L, L 1
 M M and M and M 2 M and µ, M and M 3
 N N and N, N and N 2 N and N, N and N, ℵ 3
-O O, O, 0, ◦, °, and o 1 O, O, 0, ◦, ° and and o 2
-P P, P and p, ρ(cid:35)and P and ℘ 3 P and P, P, ℘ an(cid:35)d p, ρ 2
+O O, O, 0, ◦, °,
+(cid:35)
+and o 1 O, O, 0, ◦, ° and
+(cid:35)
+and o 2
+P P, P and p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2
 Q Q, Q, Q, ι, (cid:116), (cid:38), (cid:96), (cid:61), Æ, 1 7 Q and Q, Q 1
 R R, R and R, R, k and (cid:60) 3 R and (cid:60), R, R, R 1
 S S, s, S 0 S, s, S 0
@@ -1683,8 +2215,9 @@ The leaf classifiers use the same topology as the root classifier. By initializi
 the root classifiers weights their performance can be pushed at about the inner accuracy.
 They are, however, only useful if their accuracy is well above the inner accuracy of the root
 classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful.
+Cluster Classes
 accuracy
-Cluster Classes root classifier leaf classifier
+root classifier leaf classifier
 cluster identified class identified | cluster class identified | cluster
 1 3 69.67% 84.27% 72.98%
 2 5 46.60% 58.54% 43.47%
@@ -1709,56 +2242,79 @@ consider data points where the root classifier correctly identified the cluster.
 5. Experimental Evaluation
 5.5. Increased width for faster learning
 More filters in one layer could simplify the optimization problem as each filter needs smaller
-updates. Hence a CNN N with n filters in layer i is expected to take more epochs than a
+updates. Hence a CNN N with n
 i
-CNN N(cid:48) with 2·n filters in layer i to achieve the same validation accuracy.
+filters in layer i is expected to take more epochs than a
+CNN N(cid:48) with 2·n
 i
+filters in layer i to achieve the same validation accuracy.
 This hypothesis can be falsified by training a CNN N and a CNN N(cid:48) and comparing the
 trained number of epochs. As more filters can lead to different results depending on the
 layer where they are added, five models are trained. The details about those models are
 given in Table 5.7
-Filter count Total
 Name Layer
+Filter count Total
 Baseline New parameters
-m 9 64 638 5978566
+m
 9
-m(cid:48) 9 64 974 8925622
+9 64 638 5978566
+m(cid:48)
 9
-m 11 512 3786 5982698
+9 64 974 8925622
+m
 11
-m(cid:48) 11 512 1024 1731980
+11 512 3786 5982698
+m(cid:48)
 11
-m 13 512 8704 5982092
+11 512 1024 1731980
+m
 13
+13 512 8704 5982092
 Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer
 was increased.
 The detailed results are given in Table 5.8. As expected, the number of training epochs of
 the models with increased numbers of parameters is lower. The wall-clock time, however, is
 higher due to the increase in computation per forward- and backward-pass.
-For m , m and m , the filter weight range of the layer with increased capacity decreases
-9 11 13
+For m
+9
+, m
+11
+and m
+13
+, the filter weight range of the layer with increased capacity decreases
 compared to Figure 5.6, the filter weights of the layer with increased capacity are more
-concentrated around zero compared to Figure 5.2. For model m , the distribution of
+concentrated around zero compared to Figure 5.2. For model m
 13
+, the distribution of
 weight of the output layer changed to a more bell-shaped distribution. Except for this, the
 distribution of filter weights in other layers did not change for all three models compared to
 the baseline.
+Model Parameters
 Accuracy Training
-Model Parameters Single Model Ensemble Mean Epochs Mean Time
+Single Model Ensemble Mean Epochs Mean Time
 Mean std
 baseline 944012 63.38% 0.55 64.70% 154.7 3856s
-m 5978566 65.53% 0.37 66.72% 105.7 4472s
+m
 9
-m(cid:48) 8925622 65.10% 1.09 66.54% 95.6 5261s
+5978566 65.53% 0.37 66.72% 105.7 4472s
+m(cid:48)
 9
-m 5982698 65.73% 0.77 67.38% 149.2 5450s
+8925622 65.10% 1.09 66.54% 95.6 5261s
+m
 11
-m(cid:48) 1731980 62.12% 0.48 62.89% 143.6 3665s
+5982698 65.73% 0.77 67.38% 149.2 5450s
+m(cid:48)
+11
+1731980 62.12% 0.48 62.89% 143.6 3665s
+m
+13
+5982092 62.39% 0.66 63.77% 147.8 4485s
+Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m
+9
+, m
 11
-m 5982092 62.39% 0.66 63.77% 147.8 4485s
+, m
 13
-Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m , m , m
-9 11 13
 as well as their accuracies.
 54
 5.6. Weight updates
@@ -1789,14 +2345,20 @@ SAME padding and each layer can have an arbitrary number of filters. A convoluti
 with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called
 narrower and the number of filters in a convolutional layer is the layers width.
 If the number of parameters which may be used for the feature map scale is fixed and high
-enough, there are still many combinations. If n with i = 0,...,k is the number of output
+enough, there are still many combinations. If n
 i
+with i = 0,...,k is the number of output
 feature maps of layer i where i = 0 is the input layer and all filters are 3×3 filters without
 a bias, then the number of parameters is
+Parameters =
 k
-Parameters = (cid:88)(cid:0) (n ·32+1)·n (cid:1)
-i−1 i
+(cid:88)
 i=1
+(cid:0) (n
+i−1
+·32+1)·n
+i
+(cid:1)
 Hence the width of one layer does not only influence the parameters in this layer, but also
 in the next layer.
 The number of possible subsequent layers of one feature map size is enormous, even if
@@ -1865,17 +2427,21 @@ Hence the effect of removing Batch Normalization from the baseline is investigat
 experiment.
 As before, 10 models are trained on CIFAR-100. The training setup and the model m
 no-bn
-are identical to the baseline model m, except that in m the Batch Normalization layers
+are identical to the baseline model m, except that in m
 no-bn
+the Batch Normalization layers
 are removed.
 One notable difference is the training time: While m needs 21ms per epoch in average on
-a GTX 980, m only needs 21ms per epoch. The number of epochs used for training,
+a GTX 980, m
 no-bn
+only needs 21ms per epoch. The number of epochs used for training,
 however, also increased noticeably from 149 epochs to 178 epochs in average. The standard
-deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m .
+deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m
 no-bn
-The mean accuracy of m is 62.86% and hence 0.52 percentage points worse. The
+.
+The mean accuracy of m
 no-bn
+is 62.86% and hence 0.52 percentage points worse. The
 standard deviation between models increased from 0.55 to 0.61. This is likely a result of the
 early stopping policy and the differences in training epochs. This can potentially be fixed
 by retraining the models which stopped earlier than the model which was trained for the
@@ -1886,12 +2452,14 @@ Figure5.2,butthedistributionofbiasweightschangednoticeably: Whilethebiasweightso
 thebaselinearespreadoutinthefirstlayerandmuchmoreconcentratedinsubsequentlayers
 (see Figure 5.3), the model without Batch Normalization has rather concentrated weights
 in the first layers and only the bias weights of the last layer is spread out (see Figure A.2).
-Another model m(cid:48) which has one more filter in the convolutional layer 1, 3, 5, and 7 to
+Another model m(cid:48)
 no-bn
+which has one more filter in the convolutional layer 1, 3, 5, and 7 to
 compensate for the loss of parameters in Batch Normalization. The mean test accuracy of
 10 such models is 62.87% which is 0.51 percentage points worse than the baseline. The
-ensemble of m(cid:48) achieves 64.33% which is 0.37 percentage points worse than the baseline.
+ensemble of m(cid:48)
 no-bn
+achieves 64.33% which is 0.37 percentage points worse than the baseline.
 The mean training time was 14s per epoch and 157.4 epochs with a standard deviation of
 20.7 epochs.
 Hence it is not advisable to remove Batch Normalization for the final model. It could,
@@ -1901,8 +2469,9 @@ Batch Normalization.
 58
 5.9. Batch size
 5.9. Batch size
-The mini-batch size m ∈ N influences
+The mini-batch size m ∈ N
 ≥1
+influences
 • Epochs until convergence: The smaller m, the more often the model is updated
 in one epoch. Those updates, however, are based on fewer samples of the dataset.
 Hence the gradients of different mini-batches can noticeably differ. In the literature,
@@ -1914,29 +2483,38 @@ accuracy of the classifier when training is finished. [KMN+16] supports the view
 smaller m result in less sharp minima. Hence smaller m lead to better generalization.
 Empiric evaluation results can be found in Table 5.9. Those results confirm the claim
 of [KMN+16] that lower batch sizes generalize better.
-Training Mean total Single model Ensemble
-m Epochs
+m
+Training
+Epochs
+Mean total Single model Ensemble
 time training time Accuracy std Accuracy
-8 118 s 81 – 153 14131s 61.93% σ = 1.03 65.68%
+8 118 s
 epoch
-16 62 s 103 – 173 8349s 64.16% σ = 0.81 66.98%
+81 – 153 14131s 61.93% σ = 1.03 65.68%
+16 62 s
 epoch
-32 35 s 119 – 179 5171s 64.11% σ = 0.75 65.89%
+103 – 173 8349s 64.16% σ = 0.81 66.98%
+32 35 s
 epoch
-64 25 s 133 – 195 2892s 63.38% σ = 0.55 64.70%
+119 – 179 5171s 64.11% σ = 0.75 65.89%
+64 25 s
 epoch
-128 18 s 145 – 239 3126s 62.23% σ = 0.73 63.55%
+133 – 195 2892s 63.38% σ = 0.55 64.70%
+128 18 s
 epoch
+145 – 239 3126s 62.23% σ = 0.73 63.55%
 Table 5.9.: Trainingtimeperepochandsinglemodeltestsetaccuracy(meanandstandarddeviation)
 of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on
 CIFAR-100.
 5.10. Bias
 Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a
-model m is created which is identical to the baseline model m, except that the bias of
+model m
 no-bias
+is created which is identical to the baseline model m, except that the bias of
 layers 11, 13 and 15 is removed.
-The mean test accuracy of 10 trained m is 63.74% which is an improvement of
+The mean test accuracy of 10 trained m
 no-bias
+is 63.74% which is an improvement of
 0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13%
 which is 0.43 percentage points better than the baseline. Hence the bias can safely be
 removed.
@@ -2003,9 +2581,10 @@ tanh and softplus performed worse than the identity and it is unclear why the pu
 network performed so much better than the logistic function. One hypothesis why the
 logistic function performs so bad is that it cannot produce negative outputs. Hence the
 logistic− function was developed:
+logistic−(x) =
 1
-logistic−(x) = −0.5
 1+e−x
+−0.5
 The logistic− function has the same derivative as the logistic function and hence still suffers
 from the vanishing gradient problem. The network with the logistic− function achieves an
 accuracy which is 11.30% better than the network with the logistic function, but is still
@@ -2021,16 +2600,27 @@ This contradicts [GBB11, SMGS14].
 A key difference between the logistic− function and ELU is that ELU does neither suffers
 from the vanishing gradient problem nor is its range of values bound. For this reason, the
 S2ReLU activation function, defined as
+S2ReLU(x) = ReLU(
+x
+2
++1)−ReLU(−
+x
+2
++1) =
 
-−x +1 if x ≤ −2
-  2
-x x 
-S2ReLU(x) = ReLU( +1)−ReLU(− +1) = x if −2 ≤ x ≤ 2
-2 2
 
+ 
+
 
- x +1 if x > −2
+
+ 
+−x
+2
++1 if x ≤ −2
+x if −2 ≤ x ≤ 2
+x
 2
++1 if x > −2
 ThisfunctionissimilartoSReLUsasintroducedin[JXF+16]. ThedifferenceisthatS2ReLU
 does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be
 the identity close to zero and have a smaller absolute value than the identity farther away.
@@ -2056,8 +2646,8 @@ Table 5.10.: Properties of activation functions.
 1The dying ReLU problem is similar to the vanishing gradient problem.
 62
 5.13. Activation Functions
-Single model Ensemble of 10
 Function
+Single model Ensemble of 10
 Training set Test set Training set Test set
 Identity 66.25% σ = 0.77 56.74% σ = 0.51 68.77% 58.78%
 Logistic 51.87% σ = 3.64 46.54% σ = 3.22 61.19% 54.58%
@@ -2074,41 +2664,55 @@ PReLU 80.01% σ = 2.03 62.16% σ = 0.73 83.50% 64.79%
 ELU 76.64% σ = 1.48 63.38% σ = 0.55 78.30% 64.70%
 Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation
 functions on CIFAR-100. For LReLU, α=0.3 was chosen.
-Inference per Training Mean total
-Function Epochs
+Function
+Inference per Training
+Epochs
+Mean total
 1 Image 128 time training time
-Identity 8ms 42ms 31 s 108 – 148 3629s
+Identity 8ms 42ms 31 s
 epoch
-Logistic 6ms 31ms 24 s 101 – 167 2234s
+108 – 148 3629s
+Logistic 6ms 31ms 24 s
 epoch
-Logistic− 6ms 31ms 22 s 133 – 255 3421s
+101 – 167 2234s
+Logistic− 6ms 31ms 22 s
 epoch
-Softmax 7ms 37ms 33 s 127 – 248 5250s
+133 – 255 3421s
+Softmax 7ms 37ms 33 s
 epoch
-Tanh 6ms 31ms 23 s 125 – 211 3141s
+127 – 248 5250s
+Tanh 6ms 31ms 23 s
 epoch
-Softsign 6ms 31ms 23 s 122 – 205 3505s
+125 – 211 3141s
+Softsign 6ms 31ms 23 s
 epoch
-ReLU 6ms 31ms 23 s 118 – 192 3449s
+122 – 205 3505s
+ReLU 6ms 31ms 23 s
 epoch
-Softplus 6ms 31ms 24 s 101 – 165 2718s
+118 – 192 3449s
+Softplus 6ms 31ms 24 s
 epoch
-S2ReLU 5ms 32ms 26 s 108 – 209 3231s
+101 – 165 2718s
+S2ReLU 5ms 32ms 26 s
 epoch
-LReLU 7ms 34ms 25 s 109 – 198 3388s
+108 – 209 3231s
+LReLU 7ms 34ms 25 s
 epoch
-PReLU 7ms 34ms 28 s 131 – 215 3970s
+109 – 198 3388s
+PReLU 7ms 34ms 28 s
 epoch
-ELU 6ms 31ms 23 s 146 – 232 3692s
+131 – 215 3970s
+ELU 6ms 31ms 23 s
 epoch
+146 – 232 3692s
 Table 5.12.: Training time and inference time of adjusted baseline models trained with different
 activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the
 identity is the fastest function. This result is likely an implementation specific problem
 of Keras 2.0.4 or Tensorflow 1.1.0.
 63
 5. Experimental Evaluation
-Single model Ensemble Epochs
 Function
+Single model Ensemble Epochs
 Accuracy std Accuracy Range Mean
 Identity 99.45% σ = 0.09 99.63% 55 – 77 62.2
 Logistic 97.27% σ = 2.10 99.48% 37 – 76 54.5
@@ -2129,10 +2733,12 @@ such as self-driving cars is that they increase the computation by a factor of n
 why they improve the test accuracy is by reducing the variance.
 The idea of label smoothing is to use the ensemble prediction of the training data as labels
 for another classifier. For every element x of the training set, the one-hot encoded target
-t(x) is smoothed by the ensemble prediction y (x)
+t(x) is smoothed by the ensemble prediction y
 E
-t(cid:48)(x) = α·t(x)+(1−α)y (x)
+(x)
+t(cid:48)(x) = α·t(x)+(1−α)y
 E
+(x)
 where α ∈ [0,1] is the smoothing factor.
 There are three reasons why label smoothing could be beneficial:
 • Training speed: The ensemble prediction contains more information about the
@@ -2150,12 +2756,14 @@ that the classifier gets into bad local minima.
 be clear which label is the correct one. Also, labeling errors can be present in training
 datasets. Those errors severely harm the training. By smoothing the labels errors
 could be relaxed.
-10 models m are trained with the α = 0.5 smoothed labels from the prediction
+10 models m
 smooth
+are trained with the α = 0.5 smoothed labels from the prediction
 of an ensemble of 10 baseline models. The mean accuracy of the models trained on the
 smoothedtrainingsetlabelswas63.61%(+0.23%)andthestandarddeviationwasσ = 0.72
-(+0.17%). Theensembleof10m modelsachieved64.79%accuracy(+0.09%). Hence
+(+0.17%). Theensembleof10m
 smooth
+modelsachieved64.79%accuracy(+0.09%). Hence
 the effect of this kind of label smoothing on the final accuracy is questionable.
 The training speed didn’t noticeably change either: The number of trained epochs ranged
 from 144 to 205, the mean number of epochs was 177. The baseline training ranged from
@@ -2175,8 +2783,9 @@ map, the bias is removed
 • More filters in the first layers
 ThedetailedarchitectureisgiveninTable5.14andvisualizedinFigure5.16. Theevaluation
 is given in Table 5.15 and the timing comparison is given in Table 5.16.
-# Type Filters @ Parameters FLOPs Output size
+# Type Filters @
 Patch size / stride
+Parameters FLOPs Output size
 Input 0 0 3@32× 32
 1 Convolution 69@3×3×3 /1 1932 3744768 69@32× 32
 2 BN + ELU 138 353418 69@32× 32
@@ -2200,8 +2809,11 @@ Dropout 0.5 0 0 512@ 1× 1
 15 Convolution k @1×1×512/1 512·k 512·k k @ 1× 1
 Global avg Pooling 1×1 0 k k @ 1× 1
 16 BN + Softmax 2k 7k k @ 1× 1
-(cid:80) 514k 520k 179200+2k
-+947654 +87870996
+(cid:80) 514k
++947654
+520k
++87870996
+179200+2k
 Table 5.14.: Optimized architecture with 3 input channels of size 32×32. All convolutional layers
 use SAME padding, except for layer 11 which used VALID padding in order to decrease
 the feature map size to 1×1. If the input feature map is bigger than 32×32, for each
@@ -2209,17 +2821,38 @@ power of two there are two Convolution + BN + ELU blocks and one Max pooling
 block added. This is the framed part in the table.
 66
 5.15. Optimized Classifier
-Input maxpooling3×3/2 maxpooling3×3/2 maxpooling3×3/2 C*512@1×1/1
-32×32 16×16 8×8 4×4 1×1
-C69@3×3/1 C64@3×3/1 C64@3×3/1 C*512@4×4/1(V) BN+ELU
-BN+ELU BN+ELU BN+ELU BN+ELU Dropout,p=0.5
-C69@3×3/1 C64@3×3/1 Dropout,p=0.5 C*k@1×1/1
-BN+ELU BN+ELU GlobalAVGpooling
+32×32
+Input
+C69@3×3/1
+BN+ELU
+C69@3×3/1
+BN+ELU
+16×16
+maxpooling3×3/2
+C64@3×3/1
+BN+ELU
+C64@3×3/1
+BN+ELU
+8×8
+maxpooling3×3/2
+C64@3×3/1
+BN+ELU
+4×4
+maxpooling3×3/2
+C*512@4×4/1(V)
+BN+ELU
+Dropout,p=0.5
+1×1
+C*512@1×1/1
+BN+ELU
+Dropout,p=0.5
+C*k@1×1/1
+GlobalAVGpooling
 BN+Softmax
 Figure 5.16.: Architecture of the optimized model. C 32@3×3/1 is a convolutional layer with
 32 filters of kernel size 3×3 with stride 1. The * indicates that no bias is used.
-Single Model Accuracy Ensemble of 10
 Dataset
+Single Model Accuracy Ensemble of 10
 Training Set Test Set Training Set Test Set
 Asirra 95.83% σ = 4.70 90.75% σ = 4.73 98.78% 93.09%
 CIFAR-10 94.58% σ = 0.70 87.92% σ = 0.46 96.47% 89.86%
@@ -2234,8 +2867,8 @@ used in the ensemble. The empirical standard deviation σ of the accuracy is als
 CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
 models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN
 and HASY, no test time transformations are used.
-Inference per Training
 Network GPU Tensorflow
+Inference per Training
 1 Image 128 images time / epoch
 Optimized Default Intel i7-4930K 5ms 432ms 386s
 Optimized Optimized Intel i7-4930K 4ms 307ms 315s
@@ -2281,8 +2914,9 @@ improve the results when the number of epochs is fixed, but notably improved the
 when the training loss was used as the early stopping criterion.
 5.17. Regularization
 Stronger regularization might even improve the results when using the training loss as an
-early stopping criterion. (cid:96) regularization with a weighting factor of λ = 0.0001 is used in
+early stopping criterion. (cid:96)
 2
+regularization with a weighting factor of λ = 0.0001 is used in
 all other experiments. While the accuracy as shown in Table 5.19 does not show a clear
 pattern, the number of epochs increases with lower model regularization (see Table 5.20).
 2Except data augmentation and test time transformations.
@@ -2290,8 +2924,8 @@ pattern, the number of epochs increases with lower model regularization (see Tab
 4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model.
 68
 5.17. Regularization
-Early Stopping Fixed epochs
 Dataset
+Early Stopping Fixed epochs
 val. acc train loss
 Asirra 93.09% 96.01%3 96.01%
 CIFAR-10 89.86% 91.75% 88.88%
@@ -2304,21 +2938,23 @@ compared training setups without a validation set and thus more training data. T
 second column uses the training loss as a stopping criterion, the third column uses a
 fixed number of epochs which is equal to the mean number of training epochs of the
 models with early stopping on the validation set accuracy.
-Single Model Accuracy Ensemble of 10
 λ
+Single Model Accuracy Ensemble of 10
 Training Set Test Set Training Set Test Set
 λ = 0.01 73.83% σ = 1.78 58.94% σ = 1.33 87.78% 69.98%
 λ = 0.001 82.86% σ = 0.89 63.03% σ = 0.67 91.86% 71.02%
 λ = 0.0001 77.96% σ = 2.18 64.42% σ = 0.73 81.44% 67.03%
-Table 5.19.: Different choices of (cid:96) model regularization applied to the optimized model.
+Table 5.19.: Different choices of (cid:96)
 2
+model regularization applied to the optimized model.
 λ min max mean std
 λ = 0.01 457 503 404.6 37.2
 λ = 0.001 516 649 588.4 41.6
 λ = 0.0001 579 833 696.1 79.1
 Table 5.20.: Training time in epochs of models with early stopping on training loss by different
-choices of (cid:96) model regularization applied to the optimized model.
+choices of (cid:96)
 2
+model regularization applied to the optimized model.
 69
 5. Experimental Evaluation
 70
@@ -2408,8 +3044,8 @@ A. Figures, Tables and Algorithms
 (a) Original image (b) Smoothing filter (c) Laplace edge detection filter
 (d) Sobel edge detection filter (e) Prewitt edge detection filter (f) Canny filter
 Figure A.1.: Examples of image filters. Best viewed in electronic form.
-99-percentile interval
 Layer
+99-percentile interval
 filter bias
 1 [-0.50, 0.48] [-0.06, 0.07]
 3 [-0.21, 0.19] [-0.07, 0.07]
@@ -2437,8 +3073,9 @@ j ← randomInteger(1,...,n)\{i}
 p ← randomUniform(0,1)
 C(cid:48) ← swap(C,i,j)
 s ← accuracy(C(cid:48))
-if p < exp(s−bestScore) then
+if p < exp(s−bestScore
 T
+) then
 C ← C(cid:48)
 if s > bestScore then
 bestScore ← s
@@ -2453,8 +3090,8 @@ return bestM
 76
 Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model,
 but with layer 5 reduced to 3 filters.
-Single model Ensemble of 10 Epochs
 Function
+Single model Ensemble of 10 Epochs
 Training set Test set Train Test Range Mean
 Identity 87.92% σ = 0.40 84.69% σ = 0.08 88.59% 85.43% 92 – 140 114.5
 Logistic 81.46% σ = 5.08 79.67% σ = 4.85 86.38% 84.60% 58 – 91 77.3
@@ -2471,8 +3108,8 @@ HASYv2. For LReLU, α=0.3 was chosen.
 77
 Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but
 with layer 5 reduced to 3 filters.
-Single model Ensemble of 10 Epochs
 Function
+Single model Ensemble of 10 Epochs
 Training set Test set Train Test Range Mean
 Identity 87.49% σ = 2.50 69.86% σ = 1.41 89.78% 71.90% 51 – 65 53.4
 Logistic 45.32% σ = 14.88 40.85% σ = 12.56 51.06% 45.49% 38 – 93 74.6
@@ -2551,49 +3188,82 @@ Table B.2 shows six commonly used weight initialization schemes. Several schemes
 same idea, that unit-variance is desired for each layer as the training converges faster [IS15].
 Name α β γ Reference
 Constant α = 0 β = 0 γ ≥ 0 used by [ZF14]
+Xavier/Glorot uniform α =
 (cid:113)
-Xavier/Glorot uniform α = 6 β = 0 γ = 0 [GB10]
+6
 nin+nout
-(cid:16) (cid:17)2
-Xavier/Glorot normal α = 0 β = 2 γ = 0 [GB10]
+β = 0 γ = 0 [GB10]
+Xavier/Glorot normal α = 0 β =
+(cid:16)
+2
 (nin+nout)
-He α = 0 β = 2 γ = 0 [HZRS15b]
+(cid:17)2
+γ = 0 [GB10]
+He α = 0 β = 2
 nin
+γ = 0 [HZRS15b]
 Orthogonal — — γ = 0 [SMG13]
 LSUV — — γ = 0 [MM15]
 Table B.2.: Weight initialization schemes of the form w ∼α·U[−1,1]+β·N(0,1)+γ.
-n ,n are the number of units in the previous layer and the next layer. Typically,
-in out
+n
+in
+,n
+out
+are the number of units in the previous layer and the next layer. Typically,
 biasesareinitializedwithconstant0andweightsbyoneoftheotherschemesto prevent
 unit-coadaptation. However, dropout makes it possible to use constant initialization for
 all parameters.
 LSUV and Orthogonal initialization cannot be described with this simple pattern.
 B.4. Objective function
 For classification tasks, the cross-entropy
+E
+CE
+(W) = −
+(cid:88)
+x∈X
 K
-(cid:88)(cid:88)
-E (W) = − [txlog(ox)+(1−tx)log(1−ox)]
-CE k k k k
-x∈Xk=1
+(cid:88)
+k=1
+[tx
+k
+log(ox
+k
+)+(1−tx
+k
+)log(1−ox
+k
+)]
 is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation,
-X is the set of training examples, K is the number of classes, tx ∈ {0,1} indicates if the
+X is the set of training examples, K is the number of classes, tx
 k
-training example x is of class k, ox is the output of the classifier for the training example x
+∈ {0,1} indicates if the
+training example x is of class k, ox
 k
+is the output of the classifier for the training example x
 and class k.
 However, regularization terms weighted with a constant λ ∈ (0,+∞) are sometimes added:
-• LASSO: (cid:96) (e.g., used in [HPTD15])
+• LASSO: (cid:96)
 1
-• Weight decay: (cid:96) (e.g., λ = 0.0005 as in [MSM16])
+(e.g., used in [HPTD15])
+• Weight decay: (cid:96)
 2
+(e.g., λ = 0.0005 as in [MSM16])
 • Orthogonality regularization (|(WT ·W −I)|, see [VTKP17])
 81
 B.5. Optimization Techniques
 Most relevant optimization techniques for CNNs are based on SGD, which updates the
 weights according to the rule
+w
+ji
+← w
+ji
++∆w
+ji
+with ∆w
+ji
+= −η
 ∂E
-w ← w +∆w with ∆w = −η x
-ji ji ji ji
+x
 ∂w
 ji
 where η ∈ (0,1), typically 0.01 (e.g., [MSM16]), is called the learning rate.
@@ -2602,32 +3272,53 @@ mini-batch sizes are |B| ∈ {32,64,128,256,512}, e.g. [ZF14]). Larger mini-batc
 lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes
 lead to longer training times due to computational overhead and to more training steps due
 to gradient noise.
+w
+ji
+← w
+ji
++∆w
+ji
+with ∆w
+ji
+= −η
 ∂E
-w ← w +∆w with ∆w = −η B
-ji ji ji ji
+B
 ∂w
 ji
 Nine variations which adjust the learning rate during training are:
 • Momentum:
+w (t+1)
+ji
+← w (t)
+ji
++∆w (t+1)
+ji
+with ∆w (t+1)
+ji
+= −η
 ∂E
-w(t+1) ← w(t) +∆w(t+1) with ∆w(t+1) = −η B +α∆w(t)
-ji ji ji ji ∂w ji
+B
+∂w
+ji
++α∆w (t)
 ji
 with α ∈ [0,1], typically 0.9 (e.g., [ZF14, MSM16])
 • Adagrad [DHS11]
 • RProp and the mini-batch version RMSProp [TH12]
 • Adadelta [Zei12]
-• Power Scheduling [Xu11]: η(t) = η(0)(1+a·t)−c, where t ∈ N is the training step,
+• Power Scheduling [Xu11]: η(t) = η(0)(1+a·t)−c, where t ∈ N
 0
+is the training step,
 a,c are constants.
 • Performance Scheduling [SHY+13]: Measure the error on the cross validation set and
 decrease the learning rate when the algorithms improvement is below a threshold.
-• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0)·10− kt where t ∈ N is the
-0
-training step, η(0) is the initial learning rate, k ∈ N is the number of training steps
+• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0)·10−t k where t ∈ N 0 is the
+training step, η(0) is the initial learning rate, k ∈ N
 ≥1
-until the learning rate is decreased by 1 th.
+is the number of training steps
+until the learning rate is decreased by 1
 10
+th.
 • NewbobScheduling[new00]: StartwithPerformanceScheduling,thenuseExponential
 Decay Scheduling.
 • Adam and AdaMax [KB14]
@@ -2657,41 +3348,85 @@ CNNs have the following hyperparameters:
 – Activation Functions as shown in Table B.3
 – For more, see Sections 2.2 and 2.3.
 Name Function ϕ(x) Range of Values ϕ(cid:48)(x) Used by
+Sign function†
 
-+1 if x ≥ 0
-Sign function† {−1,1} 0 [KS02]
-−1 if x < 0
-
-+1 if x > 0
-Heaviside {0,1} 0 [MP43]
-0 if x < 0
+
+
++1 if x ≥ 0
+−1 if x < 0
+{−1,1} 0 [KS02]
+Heaviside
 step function†
-Logistic function 1 [0,1] ex [DJ99]
-1+e−x (ex+1)2
-Tanh ex−e−x = tanh(x) [−1,1] sech2(x) [LBBH98, Tho14a]
-ex+e−x
 
-1 if x > 0
-ReLU† max(0,x) [0,+∞) [KSH12]
-0 if x < 0
+
+
++1 if x > 0
+0 if x < 0
+{0,1} 0 [MP43]
+Logistic function 1
+1+e−x
+[0,1] ex
+(ex+1)2
+[DJ99]
+Tanh ex−e−x
+ex+e−x
+= tanh(x) [−1,1] sech2(x) [LBBH98, Tho14a]
+ReLU† max(0,x) [0,+∞)
 
-1 if x > 0
-LReLU†2 ϕ(x) = max(αx,x) (−∞,+∞) [MHN13, HZRS15b]
-α if x < 0
+
+
+1 if x > 0
+0 if x < 0
+[KSH12]
+LReLU†2
 (PReLU)
-Softplus log(ex+1) (0,+∞) ex [DBB+01, GBB11]
+ϕ(x) = max(αx,x) (−∞,+∞)
+
+
+
+1 if x > 0
+α if x < 0
+[MHN13, HZRS15b]
+Softplus log(ex+1) (0,+∞) ex
 ex+1
- 
-x if x > 0 1 if x > 0
-ELU (−∞,+∞) [CUH15]
-α(ex−1) if x ≤ 0 αex otherwise
-Softmax‡ o(x) = exj [0,1]K o(x) · (cid:80)K k=1exk−exj [KSH12, Tho14a]
-j (cid:80)K exk j (cid:80)K exk
-k=1  k=1
-1 if x = maxx
-Maxout‡ o(x) = max x (−∞,+∞) i [GWFM+13]
+[DBB+01, GBB11]
+ELU
+
+
+
+x if x > 0
+α(ex−1) if x ≤ 0
+(−∞,+∞)
+
+
+
+1 if x > 0
+αex otherwise
+[CUH15]
+Softmax‡ o(x)
+j
+= exj
+(cid:80)K
+k=1
+exk
+[0,1]K o(x)
+j
+· (cid:80)K k=1 exk−exj
+(cid:80)K
+k=1
+exk
+[KSH12, Tho14a]
+Maxout‡ o(x) = max
 x∈x
-0 otherwise
+x (−∞,+∞)
+
+
+
+1 if x
+i
+= maxx
+0 otherwise
+[GWFM+13]
 Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0
 and functions marked with ‡ operate on all elements of a layer simultaneously. The
 hyperparameters α ∈ (0,1) of Leaky ReLU and ELU are typically α = 0.01. Other
@@ -2703,30 +3438,42 @@ Softmax is the standard activation function for the last layer of a classificati
 as it produces a probability distribution. See Figure B.1 for a plot of some of them.
 2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.
 84
-2.0 y
-ϕ (x) = 1
-1 1+e−x
-ϕ (x) = tanh(x) 1.5
+−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0
+−1.0
+−0.5
+0.5
+1.0
+1.5
+2.0
+x
+y
+ϕ
+1
+(x) = 1
+1+e−x
+ϕ
 2
-ϕ (x) = max(0,x)
+(x) = tanh(x)
+ϕ
 3
-ϕ (x) = log(ex+1)
-4 1.0
-ϕ (x) = max(x,ex−1)
+(x) = max(0,x)
+ϕ
+4
+(x) = log(ex+1)
+ϕ
 5
-0.5
-x
-−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0
-−0.5
-−1.0
+(x) = max(x,ex−1)
 Figure B.1.: Activation functions plotted in [−2,+2]. tanh and ELU are able to produce negative
 numbers. The image of ELU, ReLU and Softplus is not bound on the positive side,
 whereas tanh and the logistic function are always below 1.
 B.7. Regularization
 Regularization techniques aim to make the fitted function smoother and reduce overfitting.
 Regularization techniques are:
-• (cid:96) , (cid:96) , and Orthogonality regularization: See Appendix B.4
-1 2
+• (cid:96)
+1
+, (cid:96)
+2
+, and Orthogonality regularization: See Appendix B.4
 • Max-norm regularization (e.g. used ins [SHK+14])
 • Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth
 (see [HSL+16])
@@ -2741,52 +3488,89 @@ C. Calculating Network Characteristics
 C.1. Parameter Numbers
 • A fully connected layer with n nodes, k inputs has n·(k+1) parameters. The +1 is
 due to the bias.
-• A convolutional layer i with k filters of size n×m being applied to k feature maps
-i i−1
-has k ·k (n·m+1) parameters. The +1 is due to the bias.
-i i−1
-• A fully connected layer with n nodes after k feature maps of size m × m has
-1 2
-n·(k·m ·m +1) parameters.
-1 2
+• A convolutional layer i with k
+i
+filters of size n×m being applied to k
+i−1
+feature maps
+has k
+i
+·k
+i−1
+(n·m+1) parameters. The +1 is due to the bias.
+• A fully connected layer with n nodes after k feature maps of size m
+1
+× m
+2
+has
+n·(k·m
+1
+·m
+2
++1) parameters.
 • A dense block with a depth of L, a growth rate of n and 3×3 filters has L+n·32+
-32·n2(cid:80)L (L−i) = L+9n+9n2L2−L parameters.
-i=0 2
+32·n2(cid:80)L
+i=0
+(L−i) = L+9n+9n2L2−L
+2
+parameters.
 According to [HPTD15], AlexNet has 60 million parameters which is roughly the number
 calculated in Table D.2.
 C.2. FLOPs
 The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence
 the following number are only giving rough estimates.
-In the following, n denotes the number of FLOPs to compute the non-linearity ϕ. For
+In the following, n
 ϕ
-simplicity, n = 5 was chosen.
+denotes the number of FLOPs to compute the non-linearity ϕ. For
+simplicity, n
 ϕ
+= 5 was chosen.
 • A fully connected layer with n nodes and k inputs has to calculate ϕ(W ·x+b) with
 W ∈ Rn×k, x ∈ Rk×1, b ∈ Rn×1. It hence needs about n·(k+(k−1)+1) = 2nk
 additions / multiplications before the non-linearity ϕ is calculated. The total number
-of FLOPs is 2·n·k+n·n .
+of FLOPs is 2·n·k+n·n
 ϕ
-• In the following, biases are ignored. A convolutional layer with k filters of size n×m
+.
+• In the following, biases are ignored. A convolutional layer with k
+i
+filters of size n×m
+being applied to k
+i−1
+filter maps of size w×h results in k
+i
+filter maps of size w×h if
+padding is applied. For each element of each filter map, n·m·k
+i−1
+multiplications and
+(n·m·k
+i−1
+−1) additions have to be made. This results in (2nmk
+i−1
+−1)·(k
 i
-being applied to k filter maps of size w×h results in k filter maps of size w×h if
-i−1 i
-padding is applied. For each element of each filter map, n·m·k multiplications and
+·w·h)
+operations. The total number of FLOPs is (2·n·m·k
 i−1
-(n·m·k −1) additions have to be made. This results in (2nmk −1)·(k ·w·h)
-i−1 i−1 i
-operations. The total number of FLOPs is (2·n·m·k −1)·(k ·w·h)+k ·w·h·n .
-i−1 i i ϕ
+−1)·(k
+i
+·w·h)+k
+i
+·w·h·n
+ϕ
+.
 This is, of course, a naive way of calculating a convolution. There are other ways of
 calculating convolutions [LG16].
 87
 • Afullyconnectedlayerwithnnodesafterk featuremapsofsizew×hneeds2n(k·w·h)
-FLOPs. The total number of FLOPs is 2n·(k·w·h)+n·n .
+FLOPs. The total number of FLOPs is 2n·(k·w·h)+n·n
 ϕ
+.
 • As Dropout is only calculated during training, the number of FLOPs was set to 0.
 • The number of FLOPs for max pooling is dominated by the number of positions to
 which the pooling kernel is applied. For a feature map of size w×h a max pooling
-filter with stride s gets applied w·h. The number of FLOPs per application depends
+filter with stride s gets applied w·h
 s2
+. The number of FLOPs per application depends
 on the kernel size. A 2×2 kernel is assumed to need 5 FLOPs.
 • The number of FLOPs for Batch Normalization is the same as the number of its
 parameters.
@@ -2834,8 +3618,9 @@ non-linear combination of the features of the feature maps.
 Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test
 error rate of 0.8% on MNIST.
 Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98].
-# Type Filters @ Parameters FLOPs Output size
+# Type Filters @
 Patch size / stride
+Parameters FLOPs Output size
 Input 0 0 1@32×32
 1 Convolution 6@5×5×1/1 156 307800 6@28×28
 2 Scaled average pooling 2×2 /2 2 336 6@14×14
@@ -2853,14 +3638,16 @@ than fully connected layers.
 D.2. AlexNet
 ThefirstCNNwhichachievedmajorimprovementsontheImageNetdatasetwasAlexNet[KSH12].
 ItsarchitectureisshowninFigureD.2anddescribedinTableD.2. Ithasabout60·106param-
-eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/g˜uerzhoy/tf_alexnet.
-Note that the uncompressed size is at least 60965224floats·32 bit ≈ 244MB.
+eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜ guerzhoy/tf_alexnet.
+Note that the uncompressed size is at least 60965224floats·32 bit
 float
+≈ 244MB.
 Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed
 by pooling layers multiple times. At the end, a fully connected network is applied.
 Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1).
-# Type Filters @ Parameters FLOPs Output size
+# Type Filters @
 Patch size / stride
+Parameters FLOPs Output size
 Input 3 @ 224×224
 1 Convolution 96 @ 11×11×3 / 4 34944 211M 96@ 55× 55
 LCN 12M 96@ 55× 55
@@ -2893,22 +3680,47 @@ learn parameters. A major difference compared to AlexNet is that VGG-16 uses onl
 filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a
 detailed textual description is given in Table D.3.
 AtrainedVGG-16DforTensorflowcanbedownloadedathttps://github.com/machrisaa/
-tensorflow-vgg. Note that the uncompressed size is at least 138357544floats·32 bit ≈
+tensorflow-vgg. Note that the uncompressed size is at least 138357544floats·32 bit
 float
+≈
 520MB. The downloaded Numpy binary file npz needs 553MB without compression and
 514MB with compression.
-Input maxpooling2×2/1 maxpooling2×2/1 maxpooling2×2/1 maxpooling2×2/1 maxpooling2×2/1
-224×224 112×112 56×56 28×28 14×14 7×7
-C64@3×3/1 C128@3×3/1 C256@3×3/1 C512@3×3/1 C512@3×3/1 FullyConnected4096
-C64@3×3/1 C128@3×3/1 C256@3×3/1 C512@3×3/1 C512@3×3/1 Dropout,p=0.5
-C256@3×3/1 C512@3×3/1 C512@3×3/1 FullyConnected4096
+224×224
+Input
+C64@3×3/1
+C64@3×3/1
+112×112
+maxpooling2×2/1
+C128@3×3/1
+C128@3×3/1
+56×56
+maxpooling2×2/1
+C256@3×3/1
+C256@3×3/1
+C256@3×3/1
+28×28
+maxpooling2×2/1
+C512@3×3/1
+C512@3×3/1
+C512@3×3/1
+14×14
+maxpooling2×2/1
+C512@3×3/1
+C512@3×3/1
+C512@3×3/1
+7×7
+maxpooling2×2/1
+FullyConnected4096
+Dropout,p=0.5
+FullyConnected4096
 Dropout,p=0.5
 FullyConnected1000
 Figure D.3.: Architecture of VGG-16 D. C 512@3×3/1 is a convolutional layer with 512 filters of
 kernel size 3×3 with stride 1. All convolutional layers use SAME padding.
 92
-# Type Filters @ Parameters FLOPs Output size
+# Type Filters @
 Patch size / stride
+Parameters FLOPs Output size
 Input 3 @ 224×224
 1 Convolution 64 @ 3×3× 3 / 1 1792 186M 64@ 224×224
 2 Convolution 64 @ 3×3× 64 / 1 36928 3712M 64@ 224×224
@@ -2988,39 +3800,51 @@ Well-known benchmark datasets for classification problems in computer vision are
 in Table E.1. The best results known to me are given in Table E.2. However, every semantic
 segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers
 using Algorithm 2.
-NumberNumber
+Database
 Image Resolution
-Database of of Channels Data source
 (width × height)
-Images Classes
+Number
+of
+Images
+Number
+of
+Classes
+Channels Data source
 MNIST 28px×28px 70000 10 1 [YL98, LBBH98]
 HASYv2 32px×32px 168233 369 1 [Tho17a]
-[NWC+11b],
 SVHN 32px×32px 630420 10 3
+[NWC+11b],
 [NWC+11a]
 CIFAR-10 32px×32px 60000 10 3 [Kri, KH09]
 CIFAR-100 32px×32px 60000 100 3 [Kri, KH09]
 STL-10 96px×96px 13000 10 3 [CLN11, CLN10]
+Caltech-101
 (80px−3481px)
-Caltech-101 9144 102 3 [FFP03, FFFP06]
 ×(92px−3999px)
+9144 102 3 [FFP03, FFFP06]
+Caltech-256
 (75px−7913px)
-Caltech-256 30607 257 3 [Gri06, GG07]
 ×(75px−7913px)
+30607 257 3 [Gri06, GG07]
+ILSVRC 20121
 (8px−9331px)
-ILSVRC 20121 1.2·106 1000 3 [Ima12, RDS+14]
 ×(10px−6530px)
+1.2·106 1000 3 [Ima12, RDS+14]
+Places3652
 (290px−3158px)
-Places3652 1.8·106 365 3 [Zho16, ZKL+16]
 ×(225px−2630px)
+1.8·106 365 3 [Zho16, ZKL+16]
+GTSRB
 (25px−266px)
-GTSRB 51839 43 3 [SSSI, SSSI12]
 ×(25px−232px)
+51839 43 3 [SSSI, SSSI12]
+Asirra3
 (4px−500px)
-Asirra3 25000 2 3 [Asi17, EDHS07]
 ×(4px−500px)
+25000 2 3 [Asi17, EDHS07]
+Graz-02
 480px×640px
-Graz-02 and 640px×480px 1096 3 3 [Mar08, MS07]
+and 640px×480px 1096 3 3 [Mar08, MS07]
 Table E.1.: An overview over publicly available image databases for classification. The number
 of images row gives the sum of the training and the test images. Some datasets, like
 SVHN, have additional unlabeled data which is not given in this table.
@@ -3028,8 +3852,8 @@ SVHN, have additional unlabeled data which is not given in this table.
 2The dimensions are only calculated for the validation set.
 3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle
 97
-Achieved /
 Dataset Model type / name Result Score
+Achieved /
 Claimed by
 MNIST — 0.21% error [WZZ+13]
 HASYv2 TF-CNN 81.00% accuracy [Tho17a]
@@ -3045,27 +3869,37 @@ Asirra SVM 82.7% accuracy [Gol08]
 Graz-02 Optimal NBNN 78.98% accuracy [BMDP10]
 Table E.2.: An overview over state of the art results achieved in computer vision datasets.
 Algorithm 2 Create a classification dataset from a semantic segmentation dataset
-Require: Semantic segmentation dataset (D )
+Require: Semantic segmentation dataset (D
 S
-procedure CreateDataset(Annotated dataset D )
+)
+procedure CreateDataset(Annotated dataset D
 S
-D ← List
+)
+D
 C
+← List
 w ← desired image width
 h ← desired image height
-for Image and associated label (x,y) in D do
+for Image and associated label (x,y) in D
 S
+do
 i ← randint(0,L.width−w)
 j ← randint(0,L.height−h)
-c ← crop(y,(i,j),(i+w,j +h))
+c
 L
+← crop(y,(i,j),(i+w,j +h))
 if at least 50% of s are of one class then
-c ← crop(x,(i,j),(i+w,j +h))
+c
+I
+← crop(x,(i,j),(i+w,j +h))
+D.append((c
 I
-D.append((c ,c ))
-I L
-return (D )
+,c
+L
+))
+return (D
 C
+)
 98
 F. List of Tables
 2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
diff --git a/read/results/pdfplumber/2201.00021.txt b/read/results/pdfplumber/2201.00021.txt
index fa83b99..0393d19 100644
--- a/read/results/pdfplumber/2201.00021.txt
+++ b/read/results/pdfplumber/2201.00021.txt
@@ -9,189 +9,360 @@ e-mail:yyan@mpifr-bonn.mpg.de
 2 AstronomyDepartment,FacultyofScience,KingAbdulazizUniversity,P.O.Box80203,Jeddah21589,SaudiArabia
 3 XinjiangAstronomicalObservatory,ChineseAcademyofSciences,830011Urumqi,PRChina
 4 NationalRadioAstronomyObservatory,520EdgemontRoad,Charlottesville,VA22903-2475,USA
-2202
 5 CenterforAstrophysics,GuangzhouUniversity,510006Guangzhou,People’sRepublicofChina
 6 SchoolofAstronomyandSpaceScience,NanjingUniversity,163XianlinAvenue,Nanjing210023,People’sRepublicofChina
 7 KeyLaboratoryofModernAstronomyandAstrophysics(NanjingUniversity),MinistryofEducation,Nanjing210023,People’s
 RepublicofChina
-rpA
 Received13December2021/Accepted30December2021
-ABSTRACT 9
+ABSTRACT
 Context. Molecularmaserlinesaresignpostsofhigh-massstarformation,probingtheexcitationandkinematicsofverycompact
-]AG.hp-ortsa[
 regionsinthecloseenvironmentofyoungstellarobjectsandprovidingusefultargetsfortrigonometricparallaxmeasurements.
-Aims.OnlyafewNH (9,6)masersareknownsofar,andtheiroriginisstillpoorlyunderstood.HereweaimtofindnewNH (9,6)
-3 3
-maserstoprovideabetterobservationalbasisforstudyingtheirroleinhigh-massstar-formingregions.
-Methods.WecarriedoutNH (9,6)observationstowardCepheusAandG34.26+0.15withtheEffelsberg100-metertelescope(beam
+Aims.OnlyafewNH
+3
+(9,6)masersareknownsofar,andtheiroriginisstillpoorlyunderstood.HereweaimtofindnewNH
 3
-size49(cid:48)(cid:48))andtheKarlG.JanskyVeryLargeArray(JVLA;beamsizeabout1(cid:48).(cid:48)2).
-Results.WediscoverednewNH (9,6)masersinCepAandG34.26+0.15,whichincreasesthenumberofknownhigh-massstar-
+(9,6)
+maserstoprovideabetterobservationalbasisforstudyingtheirroleinhigh-massstar-formingregions.
+Methods.WecarriedoutNH
 3
-formingregionshostingNH (9,6)masersfromfivetoseven.Long-termmonitoring(20months)atEffelsbergshowsthattheintensity
+(9,6)observationstowardCepheusAandG34.26+0.15withtheEffelsberg100-metertelescope(beam
+size49(cid:48)(cid:48))andtheKarlG.JanskyVeryLargeArray(JVLA;beamsizeabout1(cid:48)(cid:48) .2).
+Results.WediscoverednewNH
 3
+(9,6)masersinCepAandG34.26+0.15,whichincreasesthenumberofknownhigh-massstar-
+formingregionshostingNH 3 (9,6)masersfromfivetoseven.Long-termmonitoring(20months)atEffelsbergshowsthattheintensity
 ofthe(9,6)maserinG34.26+0.15isdecreasing,whiletheCepAmaserremainsstable.ComparedtotheEffelsbergdataandassuming
-linearvariationsbetweentheepochsofobservation,theJVLAdataindicatenomissingflux.ThissuggeststhattheNH (9,6)emission
+linearvariationsbetweentheepochsofobservation,theJVLAdataindicatenomissingflux.ThissuggeststhattheNH
 3
+(9,6)emission
 arisesfromsinglecompactemissionregionsthatarenotresolvedbytheinterferometricmeasurements.AsJVLAimagingshows,the
-NH (9,6)emissioninCepAoriginatesfromasub-arcsecond-sizedregion,slightlytothewest(0(cid:48).(cid:48)28±0(cid:48).(cid:48)10)ofthepeakposition
+NH
+3
+(9,6)emissioninCepAoriginatesfromasub-arcsecond-sizedregion,slightlytothewest(0(cid:48)(cid:48) .28±0(cid:48)(cid:48) .10)ofthepeakposition
+ofthe1.36cmcontinuumobject,HW2.InG34.26+0.15,threeNH
 3
-ofthe1.36cmcontinuumobject,HW2.InG34.26+0.15,threeNH (9,6)maserspotsareobserved:oneisclosetotheheadofthe
-3 cometaryultracompactHiiregionC,andtheothertwoareemittedfromacompactregiontothewestofthehypercompactHiiregion 3v12000.1022:viXra
+(9,6)maserspotsareobserved:oneisclosetotheheadofthe
+cometaryultracompactHiiregionC,andtheothertwoareemittedfromacompactregiontothewestofthehypercompactHiiregion
 A.
 Conclusions.Thenewlyfound(9,6)masersappeartoberelatedtooutflows.ThehigherangularresolutionofJVLAandverylong
 baselineinterferometryobservationsareneededtoprovidemoreaccuratepositionsandconstraintsforpumpingscenarios.
 Keywords. Masers–ISM:clouds–ISM:individualobjects:CepA,G34.26+0.15–ISM:Hiiregions–Radiolines:ISM
-1. Introduction et al. 2007), NH (7,7), NH (9,9), and NH (12,12) (Henkel
-3 3 3
-etal.2013).Thesehaveledtothediscoveryofmetastablemaser
-Since its discovery more than five decades ago (Cheung et al. lines in 22 different regions (Mauersberger et al. 1986, 1987;
-1968), ammonia (NH ) has been a most valuable molecule for
-3 Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991;
+1. Introduction
+Since its discovery more than five decades ago (Cheung et al.
+1968), ammonia (NH 3 ) has been a most valuable molecule for
 investigating the physical properties of molecular clouds (e.g.,
-Cesaronietal.1992;Wilson&Schilke1993;Mangum&Woot-
 Ho & Townes 1983). While thermally excited transitions in
-ten1994;Kraemer&Jackson1995;Zhang&Ho1995;Zhang
 thecentimeter-wavelengthinversiontransitionsofammoniaare
-etal.1999;Walshetal.2007;Hunteretal.2008;Galván-Madrid
 regarded as a reliable thermometer of molecular clouds (e.g.,
-et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh
-Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman &
+Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia
 masershaveattractedattentionsincethefirstdetectionofmaser
-Joyce2014;McEwenetal.2016;Millsetal.2018;Hoggeetal.
 action in the (J,K) = (3,3) metastable (J = K) line toward the
-2019;Meietal.2020;Towneretal.2021).Comparedwiththe
 massive star-forming region W33 (Wilson et al. 1982). Subse-
-metastable ammonia masers, detected non-metastable (J > K)
 quent observations have led to the detection of new metastable
+ammonia masers, including 15NH
+3
+(3,3) (Mauersberger et al.
+1986), NH 3 (1,1) (Gaume et al. 1996), NH 3 (2,2) (Mills et al.
+2018), NH
+3
+(5,5) (Cesaroni et al. 1992), NH
+3
+(6,6) (Beuther
+(cid:63) Member of the International Max Planck Research School (IM-
+PRS)forAstronomyandAstrophysicsattheuniversitiesofBonnand
+Cologne.
+et al. 2007), NH
+3
+(7,7), NH
+3
+(9,9), and NH
+3
+(12,12) (Henkel
+etal.2013).Thesehaveledtothediscoveryofmetastablemaser
+lines in 22 different regions (Mauersberger et al. 1986, 1987;
+Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991;
+Cesaronietal.1992;Wilson&Schilke1993;Mangum&Woot-
+ten1994;Kraemer&Jackson1995;Zhang&Ho1995;Zhang
+etal.1999;Walshetal.2007;Hunteretal.2008;Galván-Madrid
+et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh
+et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman &
+Joyce2014;McEwenetal.2016;Millsetal.2018;Hoggeetal.
+2019;Meietal.2020;Towneretal.2021).Comparedwiththe
+metastable ammonia masers, detected non-metastable (J > K)
 ammoniamasertransitionsaremorenumerous.Thefirsthighly
-ammonia masers, including 15NH (3,3) (Mauersberger et al.
-3 excited non-metastable ammonia maser was detected by Mad-
-1986), NH 3 (1,1) (Gaume et al. 1996), NH 3 (2,2) (Mills et al. denetal.(1986)inthe(J,K)=(9,6)and(6,3)lines.Thereafter,
-2018), NH (5,5) (Cesaroni et al. 1992), NH (6,6) (Beuther
-3 3 many other NH non-metastable inversion transition lines have
-3
-(cid:63) Member of the International Max Planck Research School (IM- beenidentifiedasmasers,includingthe(5,3),(5,4),(6,1),(6,2),
-PRS)forAstronomyandAstrophysicsattheuniversitiesofBonnand (6,4),(6,5),(7,3),(7,4),(7,5)(7,6),(8,3),(8,4),(8,5),(8,6),(9,3),
-Cologne. (9,4),(9,5),(9,7),(9,8),(10,7),(10,8),(10,9),and(11,9)transi-
+excited non-metastable ammonia maser was detected by Mad-
+denetal.(1986)inthe(J,K)=(9,6)and(6,3)lines.Thereafter,
+many other NH
+3
+non-metastable inversion transition lines have
+beenidentifiedasmasers,includingthe(5,3),(5,4),(6,1),(6,2),
+(6,4),(6,5),(7,3),(7,4),(7,5)(7,6),(8,3),(8,4),(8,5),(8,6),(9,3),
+(9,4),(9,5),(9,7),(9,8),(10,7),(10,8),(10,9),and(11,9)transi-
 Articlenumber,page1of10
+a
+r
+X
+i
+v
+:
+2
+2
+0
+1
+.
+0
+0
+0
+2
+1
+v
+3
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+G
+A
+]
+9
+A
+p
+r
+2
+0
+2
+2
 A&Aproofs:manuscriptno.mainArxiv
-tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; away from the source. For observations made before 2021 Au-
-Henkel et al. 2013; Mei et al. 2020). Except for the NH (3,3) gust,weusedaspectrometerthatcovered2GHzwidebackends
+tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007;
+Henkel et al. 2013; Mei et al. 2020). Except for the NH
 3
-masersproposedtobeassociatedwithfoursupernovaremnants withachannelwidthof38.1kHz,correspondingto∼0.62kms−1
-(McEwenetal.2016),almostalltheotherammoniamasersare at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar
-detected in high-mass star-forming regions (HMSFRs). How- 1975).Ahighspectralresolutionbackendwith65536channels
-ever, while many HMSFRs host water (H O), hydroxyl (OH), and a bandwidth of 300 MHz was employed in 2021 August,
+(3,3)
+masersproposedtobeassociatedwithfoursupernovaremnants
+(McEwenetal.2016),almostalltheotherammoniamasersare
+detected in high-mass star-forming regions (HMSFRs). How-
+ever, while many HMSFRs host water (H
 2
-or methanol (CH OH) masers, ammonia masers are quite rare providing a channel width of 0.07 km s−1 at 18.5 GHz. Point-
+O), hydroxyl (OH),
+or methanol (CH
 3
-in these sources, and the role that the environment of a young ing was checked every 2 hours using 3C 286 or NGC 7027.
-high-mass star plays in their excitation remains unclear. There- Focus calibrations were done at the beginning of the observa-
-fore, dedicated searches for ammonia masers in HMSFRs are tionsandduringsunsetandsunrisetowardtheabovementioned
-indispensable in regard to their overall incidence and associa- pointingsources.Thesystemtemperatureswere100–130Kon
-tion with different environments, which can provide additional amain-beambrightnesstemperature,T ,scale.Thisfluxden-
-MB
-constraintsonthepumpingmechanismofammoniamasers. sitywascalibratedassumingaT /S ratioof1.95K/Jy,derived
-MB
-So far, a total of 32 NH inversion transitions (∆K = 0 fromcontinuumcrossscansofNGC7027(thefluxdensitywas
+OH) masers, ammonia masers are quite rare
+in these sources, and the role that the environment of a young
+high-mass star plays in their excitation remains unclear. There-
+fore, dedicated searches for ammonia masers in HMSFRs are
+indispensable in regard to their overall incidence and associa-
+tion with different environments, which can provide additional
+constraintsonthepumpingmechanismofammoniamasers.
+So far, a total of 32 NH
 3
-and ∆J = 0) have been identified as masers. Among these, and adoptedfromOttetal.1994).Calibrationuncertaintiesareesti-
-despite arising from energy levels as high as 1090 K above matedtobe∼10%.
-the ground state, the NH (9,6) maser stands out as being the We used the GILDAS/CLASS2 package (Pety 2005) to re-
+inversion transitions (∆K = 0
+and ∆J = 0) have been identified as masers. Among these, and
+despite arising from energy levels as high as 1090 K above
+the ground state, the NH
 3
-strongestandmostvariableoneinW51-IRS2(e.g.,Henkeletal. duce the spectral line data. A first-order polynomial was sub-
-2013).Maseremissioninthislinehasonlybeendetectedinfive tractedfromeachspectrumforbaselineremoval.
+(9,6) maser stands out as being the
+strongestandmostvariableoneinW51-IRS2(e.g.,Henkeletal.
+2013).Maseremissioninthislinehasonlybeendetectedinfive
 HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al.
-1986), and Sgr B2(N) (Mei et al. 2020). The NH (3,3) masers
+1986), and Sgr B2(N) (Mei et al. 2020). The NH
 3
-2.2. JVLAobservationsanddatareduction
+(3,3) masers
 arethoughttobecollisionallyexcited(e.g.,Floweretal.1990;
 Mangum & Wootten 1994); in contrast, the pumping mecha-
-Observations of the NH (9,6) line toward Cep A and
-3
-nismofNH 3(9,6)masersislesswellconstrained(Maddenetal. G34.26+0.15 were obtained on 2021 July 13 with the JVLA
-1986).Brown&Cragg(1991)havestudiedortho-ammoniaand of the National Radio Astronomy Observatory3 (NRAO) in the
+nismofNH 3 (9,6)masersislesswellconstrained(Maddenetal.
+1986).Brown&Cragg(1991)havestudiedortho-ammoniaand
 found that it could possibly pump the (6,3) inversion line, but
-C configuration (project ID: 21A-157, PI: Yaoting Yan). We
 theydidnotextendtheirmodeltothe(9,6)transitionduetothe
-employed 27 antennas for the observations. The primary beam
-factthatcollisionratesareonlyknownforinversionlevelsupto of the JVLA antennas is 150(cid:48)(cid:48) (FWHM) at 18.5 GHz. A mix-
+factthatcollisionratesareonlyknownforinversionlevelsupto
 J = 6(e.g.,Danbyetal.1988).
-tureofmixedthree-bitandeight-bitsamplerswereusedtoper-
-NH (9,6)masersarefoundtobestronglyvariable,similarto
-3 form the observations. For the NH (9,6) line observations, we
+NH
 3
-H Omasers(Maddenetal.1986;Pratapetal.1991;Henkeletal.
-2 used one subband with the eight-bit sampler covering a band-
+(9,6)masersarefoundtobestronglyvariable,similarto
+H
+2
+Omasers(Maddenetal.1986;Pratapetal.1991;Henkeletal.
 2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6)
-widthof16MHzwithfullpolarization,eightrecirculations,and
 lineshowedsignificantvariationinlineshapewithinatimein-
-four baseline board pairs (BIBPs) to provide a velocity range
-tervalofonlytwodays.Mappingofthe(9,6)masertowardW51 of 260 km s−1 with a channel spacing of 0.13 km s−1. Two
+tervalofonlytwodays.Mappingofthe(9,6)masertowardW51
 withverylongbaselineinterferometry(VLBI)suggeststhatthe
+masers are closer to the H
+2
+O masers than to the OH masers or
+to ultracompact (UC) Hii regions (Pratap et al. 1991). While
+Henkeletal.(2013)andGoddietal.(2015)showedthattheSiO
+andNH
+3
+masersinW51-IRS2areveryclosetoeachother,their
+positions,differingby0(cid:48)(cid:48) .065(∼0.015pc),donotfullycoincide.
+In this paper we report the discovery of NH 3 (9,6) masers
+in two HMSFRs, Cepheus A and G34.26+0.15. This increases
+the number of (9,6) maser detections in our Galaxy from five
+to seven. In Sect. 2 observations with the Effelsberg 100-meter
+telescopeandtheKarlG.JanskyVeryLargeArray(JVLA)are
+described. Results are presented in Sect. 3. The morphology of
+CepAandG34.26+0.15aswellasacomparisonoftheemission
+distributions of different tracers with the NH 3 (9,6) masers are
+presentedinSect.4.OurmainresultsaresummarizedinSect.5.
+2. Observationsanddatareduction
+2.1. Effelsbergobservationsanddatareduction
+The NH
+3
+(9,6) line was observed toward Cep A and
+G34.26+0.15 with the 100-meter Effelsberg telescope1 in 2020
+Januaryand2021February,July,andAugust.TheS14mmdou-
+blebeamsecondaryfocusreceiverwasemployed.Thefullwidth
+at half maximum (FWHM) beam size is 49(cid:48)(cid:48) at 18.5 GHz, the
+frequencyofthetargetline.Theobservationswereperformedin
+positionswitchingmode,andtheoffpositionwas10(cid:48)inazimuth
+1 Based on observations with the 100-meter telescope of the MPIfR
+(Max-Planck-InstitutfürRadioastronomie)atEffelsberg.
+away from the source. For observations made before 2021 Au-
+gust,weusedaspectrometerthatcovered2GHzwidebackends
+withachannelwidthof38.1kHz,correspondingto∼0.62kms−1
+at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar
+1975).Ahighspectralresolutionbackendwith65536channels
+and a bandwidth of 300 MHz was employed in 2021 August,
+providing a channel width of 0.07 km s−1 at 18.5 GHz. Point-
+ing was checked every 2 hours using 3C 286 or NGC 7027.
+Focus calibrations were done at the beginning of the observa-
+tionsandduringsunsetandsunrisetowardtheabovementioned
+pointingsources.Thesystemtemperatureswere100–130Kon
+amain-beambrightnesstemperature,T
+MB
+,scale.Thisfluxden-
+sitywascalibratedassumingaT
+MB
+/S ratioof1.95K/Jy,derived
+fromcontinuumcrossscansofNGC7027(thefluxdensitywas
+adoptedfromOttetal.1994).Calibrationuncertaintiesareesti-
+matedtobe∼10%.
+We used the GILDAS/CLASS2 package (Pety 2005) to re-
+duce the spectral line data. A first-order polynomial was sub-
+tractedfromeachspectrumforbaselineremoval.
+2.2. JVLAobservationsanddatareduction
+Observations of the NH
+3
+(9,6) line toward Cep A and
+G34.26+0.15 were obtained on 2021 July 13 with the JVLA
+of the National Radio Astronomy Observatory3 (NRAO) in the
+C configuration (project ID: 21A-157, PI: Yaoting Yan). We
+employed 27 antennas for the observations. The primary beam
+of the JVLA antennas is 150(cid:48)(cid:48) (FWHM) at 18.5 GHz. A mix-
+tureofmixedthree-bitandeight-bitsamplerswereusedtoper-
+form the observations. For the NH
+3
+(9,6) line observations, we
+used one subband with the eight-bit sampler covering a band-
+widthof16MHzwithfullpolarization,eightrecirculations,and
+four baseline board pairs (BIBPs) to provide a velocity range
+of 260 km s−1 with a channel spacing of 0.13 km s−1. Two
 additional subbands of bandwidth 16 MHz were used to cover
-masers are closer to the H O masers than to the OH masers or
-2 the NH (8,5) and (10,7) lines. The three-bit sampler with 32
-to ultracompact (UC) Hii regions (Pratap et al. 1991). While 3
+the NH
+3
+(8,5) and (10,7) lines. The three-bit sampler with 32
 subbands, each with a bandwidth of 128 MHz to cover a to-
-Henkeletal.(2013)andGoddietal.(2015)showedthattheSiO
 tal range of 4 GHz between 20–24 GHz, was used to mea-
-andNH masersinW51-IRS2areveryclosetoeachother,their
-3 sure the continuum emission. 3C 286 with a flux density of
-positions,differingby0(cid:48).(cid:48)065(∼0.015pc),donotfullycoincide.
+sure the continuum emission. 3C 286 with a flux density of
 2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a
-In this paper we report the discovery of NH 3 (9,6) masers calibratorforpointing,fluxdensity,bandpass,andpolarization.
-in two HMSFRs, Cepheus A and G34.26+0.15. This increases J2230+6946andJ1851+0035servedasgaincalibratorsforCep
-the number of (9,6) maser detections in our Galaxy from five A and G34.26+0.15, respectively. The on-source times were
-to seven. In Sect. 2 observations with the Effelsberg 100-meter 4m30sand4m50stowardCepAandG34.26+0.15,respectively.
-telescopeandtheKarlG.JanskyVeryLargeArray(JVLA)are
+calibratorforpointing,fluxdensity,bandpass,andpolarization.
+J2230+6946andJ1851+0035servedasgaincalibratorsforCep
+A and G34.26+0.15, respectively. The on-source times were
+4m30sand4m50stowardCepAandG34.26+0.15,respectively.
 Data from two antennas were lost due to technical is-
-described. Results are presented in Sect. 3. The morphology of
 sues. The data from the remaining 25 antennas were reduced
-CepAandG34.26+0.15aswellasacomparisonoftheemission
 through the Common Astronomy Software Applications pack-
-distributions of different tracers with the NH 3 (9,6) masers are age(CASA4;McMullinetal.2007).Wecalibratedthedatawith
-presentedinSect.4.OurmainresultsaresummarizedinSect.5.
+age(CASA4;McMullinetal.2007).Wecalibratedthedatawith
 the JVLA CASA calibration pipeline using CASA 6.1.2. The
 results were obtained after flagging data that contain artifacts.
 We inspected the phase, amplitude, and bandpass variations of
-2. Observationsanddatareduction
 thecalibratedvisibilitydatatosearchforadditionalartifactsbe-
 fore imaging. Then, the uvcontsub task in CASA was used to
-2.1. Effelsbergobservationsanddatareduction
 separatethecalibratedvisibilitiesintotwoparts,onewithline-
-The NH (9,6) line was observed toward Cep A and onlydataandtheotherwiththecontinuumdata.Thetcleantask
-3
-G34.26+0.15 with the 100-meter Effelsberg telescope1 in 2020 withacellsizeof0(cid:48).(cid:48)2andBriggsweightingwithrobust=0was
-Januaryand2021February,July,andAugust.TheS14mmdou- usedtoproducetheimagesofspectrallineandcontinuumemis-
-blebeamsecondaryfocusreceiverwasemployed.Thefullwidth sion. The synthesized beams for NH (9,6) are 1(cid:48).(cid:48)47×0(cid:48).(cid:48)99 at
+onlydataandtheotherwiththecontinuumdata.Thetcleantask
+withacellsizeof0(cid:48)(cid:48) .2andBriggsweightingwithrobust=0was
+usedtoproducetheimagesofspectrallineandcontinuumemis-
+sion. The synthesized beams for NH
 3
-at half maximum (FWHM) beam size is 49(cid:48)(cid:48) at 18.5 GHz, the
-frequencyofthetargetline.Theobservationswereperformedin 2 https://www.iram.fr/IRAMFR/GILDAS/
-positionswitchingmode,andtheoffpositionwas10(cid:48)inazimuth 3 TheNationalRadioAstronomyObservatoryisafacilityoftheNa-
+(9,6) are 1(cid:48)(cid:48) .47×0(cid:48)(cid:48) .99 at
+2 https://www.iram.fr/IRAMFR/GILDAS/
+3 TheNationalRadioAstronomyObservatoryisafacilityoftheNa-
 tionalScienceFoundationoperatedundercooperativeagreementbyAs-
-1 Based on observations with the 100-meter telescope of the MPIfR sociatedUniversities,Inc.
-(Max-Planck-InstitutfürRadioastronomie)atEffelsberg. 4 https://casa.nrao.edu/
+sociatedUniversities,Inc.
+4 https://casa.nrao.edu/
 Articlenumber,page2of10
 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions
-P.A. = 58◦.79 and 1(cid:48).(cid:48)33 × 1(cid:48).(cid:48)06 at P.A. = 5◦.36 toward Cep A
+P.A. = 58◦.79 and 1(cid:48)(cid:48) .33 × 1(cid:48)(cid:48) .06 at P.A. = 5◦.36 toward Cep A
 and G34.26+0.15, respectively. For the 1.36cm (20–24 GHz)
-continuumemission,thesynthesizedbeamsare1(cid:48).(cid:48)08×0(cid:48).(cid:48)67at
-P.A.=60◦.64and0(cid:48).(cid:48)95×0(cid:48).(cid:48)71atP.A.=5◦.91towardCepAand
+continuumemission,thesynthesizedbeamsare1(cid:48)(cid:48) .08×0(cid:48)(cid:48) .67at
+P.A.=60◦.64and0(cid:48)(cid:48) .95×0(cid:48)(cid:48) .71atP.A.=5◦.91towardCepAand
 G34.26+0.15. The typical absolute astrometric accuracy of the
 JVLAis∼10%ofthesynthesizedbeam5.Thefluxdensityscale
 calibrationaccuracyisestimatedtobewithin15%.
-Fig. 2. NH (9,6) line profiles emphasizing, in contrast to the spectra
+Fig. 1. Spectra from NH
+3
+(9,6) transition lines. Left: Top to bottom:
+TimesequenceofNH
 3
+(9,6)profilesobservedtowardCepAwiththe
+Effelsberg 100-meter telescope (after subtracting a first-order polyno-
+mialbaseline).AJVLAspectrumisinterspersed.Thesystemicveloc-
+ity from CO and HCO+ lines is indicated by a dashed blue line. The
+two dashed red lines at LSR velocities, V
+LSR
+, of −0.90 km s−1 and
+−0.28 km s−1 indicate the central velocities of the two major compo-
+nents.Right:NH 3 (9,6)spectrafromG34.26+0.15.Thesystemicve-
+locityfromC17Oisindicatedbyadashedblueline.Thethreedashed
+redlinesatV
+LSR
+=54.1kms−1,55.8kms−1,and62.5kms−1showthe
+centralvelocitiesofthemainammoniaemissioncomponents.
+3. Results
+The spectra from different epochs are shown in Figs. 1 and 2.
+TowardCepA,theNH
+3
+(9,6)lineprofilefromtheJVLAisex-
+tractedfromanEffelsberg-beam-sizedregion(FWHM,49(cid:48)(cid:48)).In
+the case of G34.26+0.15, the NH
+3
+spectrum is below the noise
+level if a similarly large beam size is used. Therefore, we de-
+rivedtheJVLANH
+3
+(9,6)spectrumfromasmallerregion,with
+radius3(cid:48)(cid:48) .5,thatcontainsallthedetectedNH
+3
+(9,6)emission.In
+Table A.1, the observed NH
+3
+(9,6) line parameters obtained by
+Gaussianfitsarelisted.NH
+3
+(8,5)and(10,7)emissionisnotde-
+tected by our JVLA observations. The 3σ upper limits for the
+NH
+3
+(8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1
+5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance-
+/positional-accuracy
+Fig. 2. NH
+3
+(9,6) line profiles emphasizing, in contrast to the spectra
 in Fig. 1, weaker features. Cep A spectra are presented on the left,
 G34.26+0.15spectraontheright.Thetwodashedredlinesintheleft
-panelsindicateV =1.48kms−1and2.89kms−1.Intherightpanels,
+panelsindicateV
 LSR
+=1.48kms−1and2.89kms−1.Intherightpanels,
 thetwodashedredlinesreferto54.1kms−1and55.8kms−1.
 and 27.2 mJy beam−1, respectively. In G34.26+0.15, the corre-
-sponding3σupperlimitsfortheNH (8,5)and(10,7)linesare
+sponding3σupperlimitsfortheNH
 3
+(8,5)and(10,7)linesare
 22.1mJybeam−1 and30.4mJybeam−1.Forbothsources,sen-
 sitivity levels refer to emission from a single channel of width
 0.13kms−1.Takingthelargermeasuredlinewidthsofthe(9,6)
@@ -203,336 +374,500 @@ toward Cep A is presented in Fig. 3. Six published compact
 sources,HW2,HW3a,HW3b,HW3c,HW3d,andHW9,arede-
 tected in our observations. Figure 4 shows the 1.36cm contin-
 uuminG34.26+0.15.Threemaincontinuumobjects,A,B,and
-Fig. 1. Spectra from NH (9,6) transition lines. Left: Top to bottom:
-3 C,aredetected.Byusingtheimfit taskinCASA,wemeasured
-TimesequenceofNH (9,6)profilesobservedtowardCepAwiththe
-3
-Effelsberg 100-meter telescope (after subtracting a first-order polyno- thecontinuumfluxat1.36cmtowardindividualcompactsource
+C,aredetected.Byusingtheimfit taskinCASA,wemeasured
+thecontinuumfluxat1.36cmtowardindividualcompactsource
 componentsinCepAandG34.26+0.15.DetailsaregiveninTa-
-mialbaseline).AJVLAspectrumisinterspersed.Thesystemicveloc-
-ity from CO and HCO+ lines is indicated by a dashed blue line. The bleA.2.
-two dashed red lines at LSR velocities, V , of −0.90 km s−1 and
-LSR
-−0.28 km s−1 indicate the central velocities of the two major compo-
-nents.Right:NH 3 (9,6)spectrafromG34.26+0.15.Thesystemicve- 3.2. NH 3 (9,6)emissioninCepA
-locityfromC17Oisindicatedbyadashedblueline.Thethreedashed
-redlinesatV =54.1kms−1,55.8kms−1,and62.5kms−1showthe In2020January,NH 3 (9,6)emissionwithapeakfluxdensityof
-centralvelociL tS ieR 0.67±0.07JywasfirstdetectedwiththeEffelsberg100-meter
-softhemainammoniaemissioncomponents.
+bleA.2.
+3.2. NH 3 (9,6)emissioninCepA
+In2020January,NH 3 (9,6)emissionwithapeakfluxdensityof
+0.67±0.07JywasfirstdetectedwiththeEffelsberg100-meter
 telescopeinCepA.Emissionwithsimilarstrengthwasalsode-
 tected in 2021 February and August with the same telescope.
 Higher velocity resolution data, which were obtained in 2021
 August, again with the Effelsberg 100-meter telescope, show
-3. Results
 thatthe(9,6)emissioncontainstwomainvelocitycomponents.
-The spectra from different epochs are shown in Figs. 1 and 2. Overall, the flux densities of the NH 3 (9,6) emission line mea-
+Overall, the flux densities of the NH 3 (9,6) emission line mea-
 suredwiththeEffelsberg100-metertelescopeare,withinthecal-
-TowardCepA,theNH (9,6)lineprofilefromtheJVLAisex-
+ibrationuncertainties,unchanged.Thisisvalidforthetimeinter-
+valbetween2020JanuaryandAugust2021,whenwesmoothed
+the obtained spectra to the same velocity resolution. We also
+seeanothertwoweakercomponents.Figure2emphasizesthese
+weakcomponentswithanexpandedfluxdensityscale.
+Higher angular resolution data from the JVLA pinpoint the
+position of the NH
 3
-tractedfromanEffelsberg-beam-sizedregion(FWHM,49(cid:48)(cid:48)).In ibrationuncertainties,unchanged.Thisisvalidforthetimeinter-
-the case of G34.26+0.15, the NH spectrum is below the noise valbetween2020JanuaryandAugust2021,whenwesmoothed
+(9,6) emission with an offset of (−0(cid:48)(cid:48) .28,
+0(cid:48)(cid:48) .02) relative to the 1.36cm continuum peak of Cep A HW2
+(Fig.3).ThedeconvolvedNH
 3
-level if a similarly large beam size is used. Therefore, we de- the obtained spectra to the same velocity resolution. We also
-rivedtheJVLANH (9,6)spectrumfromasmallerregion,with seeanothertwoweakercomponents.Figure2emphasizesthese
-3
-radius3(cid:48).(cid:48)5,thatcontainsallthedetectedNH (9,6)emission.In weakcomponentswithanexpandedfluxdensityscale.
-3
-Table A.1, the observed NH (9,6) line parameters obtained by Higher angular resolution data from the JVLA pinpoint the
-3
-Gaussianfitsarelisted.NH (8,5)and(10,7)emissionisnotde- position of the NH (9,6) emission with an offset of (−0(cid:48).(cid:48)28,
-3 3
-tected by our JVLA observations. The 3σ upper limits for the 0(cid:48).(cid:48)02) relative to the 1.36cm continuum peak of Cep A HW2
-NH (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 (Fig.3).ThedeconvolvedNH (9,6)componentsizeis(0(cid:48).(cid:48)29±
-3 3
-0(cid:48).(cid:48)15)×(0(cid:48).(cid:48)19±0(cid:48).(cid:48)14)atP.A.=174◦,derivedwiththeimfittask
-5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance- inCASA,andcanthusbeconsidered,accountingfortheuncer-
-/positional-accuracy tainties,asunresolved.
+(9,6)componentsizeis(0(cid:48)(cid:48) .29±
+0(cid:48)(cid:48) .15)×(0(cid:48)(cid:48) .19±0(cid:48)(cid:48) .14)atP.A.=174◦,derivedwiththeimfittask
+inCASA,andcanthusbeconsidered,accountingfortheuncer-
+tainties,asunresolved.
 Articlenumber,page3of10
 A&Aproofs:manuscriptno.mainArxiv
 Fig. 3. Cepheus A. White contours mark the 1.36cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90,
 and 110 × 0.125 mJy beam−1. The background image is the Spitzer 4.5µm emission, taken from the Galactic Legacy Infrared Mid-Plane
-Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is α = 22h56m17s.972, and
+Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is α
 J2000
-δ = 62◦01(cid:48)49(cid:48).(cid:48)587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
+= 22h56m17s.972, and
+δ
 J2000
-ellipsedenotingthepositionoftheNH (9,6)emissionwithapurplestaratitscenter.OH(Bartkiewiczetal.2005),H O(Sobolevetal.2018),
-3 2
-andCH OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarontheright-handsideindicates
+= 62◦01(cid:48)49(cid:48)(cid:48) .587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
+ellipsedenotingthepositionoftheNH
 3
+(9,6)emissionwithapurplestaratitscenter.OH(Bartkiewiczetal.2005),H
+2
+O(Sobolevetal.2018),
+andCH
+3
+OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarontheright-handsideindicates
 theLSRvelocityrangeofthemaserspots.
 Fig. 4. 1.36cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130,
 150,180,and200×5.0mJybeam−1.ThebackgroundimageistheSpitzer 4.5µmemission,takenfromGLIMPSE.Thereferencepositionis
-α =18h53m18s.560,andδ =01◦14(cid:48)58(cid:48).(cid:48)201,thepeakposition,ismarkedbyablackcross.TheblackellipsesshowthepositionsofNH
-J2000 J2000 3
-(9,6)emissionswithstarsattheircenter(i.e.,M1,M2,andM3).OH(Zhengetal.2000),H O(Imaietal.2011),andCH OH(Bartkiewiczetal.
-2 3
-2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicatesthevelocityrange(V )ofmaserspots.
+α
+J2000
+=18h53m18s.560,andδ
+J2000
+=01◦14(cid:48)58(cid:48)(cid:48) .201,thepeakposition,ismarkedbyablackcross.TheblackellipsesshowthepositionsofNH
+3
+(9,6)emissionswithstarsattheircenter(i.e.,M1,M2,andM3).OH(Zhengetal.2000),H
+2
+O(Imaietal.2011),andCH
+3
+OH(Bartkiewiczetal.
+2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicatesthevelocityrange(V
 LSR
-InviewoftheconstancyofthefluxdensitiesobtainedatEf- velocity resolution data from 2021 August show the NH (9,6)
+)ofmaserspots.
+InviewoftheconstancyofthefluxdensitiesobtainedatEf-
+felsberg and the similar JVLA flux density, measured in 2021
+July,thereisnomissinginterferometricfluxdensityintheJVLA
+data.
+3.3. NH 3 (9,6)emissioninG34.26+0.15
+TheNH
 3
-felsberg and the similar JVLA flux density, measured in 2021 emissiontobecomposedoftwodifferentcomponents.Thespec-
-July,thereisnomissinginterferometricfluxdensityintheJVLA traofweakcomponentsonasmallerfluxdensityscalearepre-
-data. sentedinFig.2.
-3.3. NH 3 (9,6)emissioninG34.26+0.15 Three different locations showing NH 3 (9,6) emission are
-foundtowardG34.26+0.15(Fig.4).ThedeconvolvedNH (9,6)
+(9,6)emissionwasfirstdetectedtowardG34.26+0.15
+in2020JanuarywiththeEffelsberg100-metertelescope.Higher
+velocity resolution data from 2021 August show the NH
 3
-TheNH (9,6)emissionwasfirstdetectedtowardG34.26+0.15 componentsizesare(1(cid:48).(cid:48)42±0(cid:48).(cid:48)43)×(0(cid:48).(cid:48)54±0(cid:48).(cid:48)62)atP.A.=97◦
+(9,6)
+emissiontobecomposedoftwodifferentcomponents.Thespec-
+traofweakcomponentsonasmallerfluxdensityscalearepre-
+sentedinFig.2.
+Three different locations showing NH 3 (9,6) emission are
+foundtowardG34.26+0.15(Fig.4).ThedeconvolvedNH
 3
-in2020JanuarywiththeEffelsberg100-metertelescope.Higher (M1),(0(cid:48).(cid:48)42±0(cid:48).(cid:48)27)×(0(cid:48).(cid:48)15±0(cid:48).(cid:48)27)atP.A.=150◦ (M2),and
+(9,6)
+componentsizesare(1(cid:48)(cid:48) .42±0(cid:48)(cid:48) .43)×(0(cid:48)(cid:48) .54±0(cid:48)(cid:48) .62)atP.A.=97◦
+(M1),(0(cid:48)(cid:48) .42±0(cid:48)(cid:48) .27)×(0(cid:48)(cid:48) .15±0(cid:48)(cid:48) .27)atP.A.=150◦ (M2),and
 Articlenumber,page4of10
 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions
-(1(cid:48).(cid:48)17±0(cid:48).(cid:48)34)×(0(cid:48).(cid:48)27±0(cid:48).(cid:48)46)atP.A.=53◦ (M3)andarethus the NH (9,6) emission in Cep A is due to maser action. Be-
+(1(cid:48)(cid:48) .17±0(cid:48)(cid:48) .34)×(0(cid:48)(cid:48) .27±0(cid:48)(cid:48) .46)atP.A.=53◦ (M3)andarethus
+comparabletoorsmallerthanthebeamsize.
+Overall, the NH
+3
+(9,6) line from G34.26+0.15 weakened
+during the time interval from 2020 January to 2021 August by
+about70%.AcomparisonbetweentheJVLAspectrumandthe
+Effelsbergdata,assumingalineardecreaseintheintegratedin-
+tensity as a function of time between different epochs of the
+100-meterobservations,suggeststhereisnomissingfluxinthe
+JVLAdata.ThisissimilartothesituationinCepA.
+4. Discussion
+4.1. MorphologyofCepAandG34.26+0.15
+Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc
+(Moscadellietal.2009;Dzibetal.2011),isthesecondclosest
+HMSFR (after Orion) and by far the closest NH
+3
+(9,6) maser
+known.About16compact(∼1(cid:48)(cid:48))radiosources(e.g.,Hughes&
+Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been
+identified in Cep A. Hughes & Wouterloot (1984) discovered
+thesetargetsatradiowavelengths,whichareUCandhypercom-
+pact(HC)Hiiregionsand/orstellarwindsources,subsequently
+namedasHWsources.TheHW2objectisoneofthebestknown
+examplesofaprotostellarjetordisksystemdrivingapowerful
+outflow(e.g.,Rodriguezetal.1980;Güstenetal.1984;Torrelles
+et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021).
+TheobservedNH 3 (9,6)emissionisslightlyoffset(−0(cid:48)(cid:48) .28,0(cid:48)(cid:48) .02)
+fromthecenterofHW2(seeFig.3).
+G34.26+0.15isanHMSFRlocatedatadistanceof3.3kpc
+(Kuchar & Bania 1994). It hosts four radio continuum compo-
+nents named A, B, C, and D. Component C is a prototypical
+cometaryUCHiiregioncontainingacompactheadandadiffuse
+tailthatextendsfromeasttowest(e.g.,Reid&Ho1985;Garay
+etal.1986;Sewiloetal.2004;Sewiłoetal.2011).Components
+A and B are HC Hii regions, located to the east of component
+C.Anextendedring-likeHiiregion,calledcomponentD,islo-
+cated southeast of components A-C. One of the three observed
+NH 3 (9,6)emissionlinesources,M1,isclosetotheheadofcom-
+ponentC,whereasM2andM3originatefromanothercompact
+regioninthewestoftheHCHiicomponentA(seeFig.4).
+4.2. NH
+3
+(9,6)emissionpossiblycausedbymaseraction
+As shown in Fig. 1, the NH
+3
+(9,6) profiles in Cep A and
+G34.26+0.15 are narrow (∆V
+1/2
+≤2.0 km s−1), much narrower
+than the expected line widths ((cid:38)4 km s−1) of thermal lines ob-
+servedatasimilarangularresolution(e.g.,Torrellesetal.1985,
+1986,1993,1999;Henkeletal.1987;Comitoetal.2007;Mook-
+erjeaetal.2007;Wyrowskietal.2012;Beutheretal.2018).Ve-
+locity shifts with respect to the systemic velocities of the two
+sourcesarebothobserved,thatis,V ∼10kms−1 inCepAand
+V ∼4kms−1 inG34.26+0.15(seedetailsinSect.4.3).Further-
+more, time variability is observed in the case of G34.26+0.15,
+whichisalsoacharacteristicfeatureofmaseremission.
+Additionalevidenceoftheirmasernatureisthehighbright-
+nesstemperaturesofthe(9,6)emissionspotstowardCepAand
+G34.26+0.15. The spectral parameters are listed in Table A.3.
+Because at least a significant part of the NH
 3
-comparabletoorsmallerthanthebeamsize. causeG34.26+0.15islocatedataboutfivetimesthedistanceto
-Overall, the NH (9,6) line from G34.26+0.15 weakened CepA,beamdilutioneffectsreducethelowermainbeambright-
+(9,6) emission
+is not resolved by our JVLA observations, the derived bright-
+nesstemperaturesareonlylowerlimits.Nevertheless,thelower
+limits on the brightness temperature are >800 K in Cep A (see
+Table A.3), which is much higher than the expected thermal
+gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito
+et al. 2007; Beuther et al. 2018). This strongly suggests that
+the NH
 3
-during the time interval from 2020 January to 2021 August by ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta-
-about70%.AcomparisonbetweentheJVLAspectrumandthe bleA.3).WealsonotethattheluminosityoftheNH (9,6)emis-
+(9,6) emission in Cep A is due to maser action. Be-
+causeG34.26+0.15islocatedataboutfivetimesthedistanceto
+CepA,beamdilutioneffectsreducethelowermainbeambright-
+ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta-
+bleA.3).WealsonotethattheluminosityoftheNH
 3
-Effelsbergdata,assumingalineardecreaseintheintegratedin- sioninG34.26+0.15ishigherthanorcomparabletothatinCep
-tensity as a function of time between different epochs of the A,dependingontheepochofourobservations.
-100-meterobservations,suggeststhereisnomissingfluxinthe Finally,thenon-detectionsofthe(8,5)and(10,7)linesalso
-JVLAdata.ThisissimilartothesituationinCepA. indicate that the (9,6) line is special. This allows us to derive
+(9,6)emis-
+sioninG34.26+0.15ishigherthanorcomparabletothatinCep
+A,dependingontheepochofourobservations.
+Finally,thenon-detectionsofthe(8,5)and(10,7)linesalso
+indicate that the (9,6) line is special. This allows us to derive
 lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity
-ratios.The(9,6)linearisesfromortho-NH (K = 3n),whereas
+ratios.The(9,6)linearisesfromortho-NH
 3
-4. Discussion the NH (8,5) and (10,7) lines are para-NH (K (cid:44) 3n) lines.
-3 3
-4.1. MorphologyofCepAandG34.26+0.15 Theminimumortho-to-pararatiosareintherange12–42and1–
+(K = 3n),whereas
+the NH
+3
+(8,5) and (10,7) lines are para-NH
+3
+(K (cid:44) 3n) lines.
+Theminimumortho-to-pararatiosareintherange12–42and1–
 8 toward Cep A and G34.26+0.15, respectively. The statistical
-Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc weights for the ortho states are twice as large as those for the
-(Moscadellietal.2009;Dzibetal.2011),isthesecondclosest parastates(e.g.,Umemotoetal.1999;Goddietal.2011;Henkel
-HMSFR (after Orion) and by far the closest NH (9,6) maser etal.2013).InCepA,thelineintensityratiosarefarhigherthan
-3
-known.About16compact(∼1(cid:48)(cid:48))radiosources(e.g.,Hughes& thisfactoroftwo.Thus,atleastinCepAthehighermainbeam
-Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been brightness peak temperature of the (9,6) emission is caused by
-identified in Cep A. Hughes & Wouterloot (1984) discovered maser action, perhaps involving exponential amplification, and
-thesetargetsatradiowavelengths,whichareUCandhypercom- thecaseofG34.26+0.15islikelysimilar.
-pact(HC)Hiiregionsand/orstellarwindsources,subsequently
-namedasHWsources.TheHW2objectisoneofthebestknown
-4.3. ComparisonofNH (9,6)maserswithpreviously
-examplesofaprotostellarjetordisksystemdrivingapowerful 3
-published(quasi-)thermalNH emission
-outflow(e.g.,Rodriguezetal.1980;Güstenetal.1984;Torrelles 3
-et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
-TheobservedNH 3(9,6)emissionisslightlyoffset(−0(cid:48).(cid:48)28,0(cid:48).(cid:48)02) show thermal emission toward Cep A over a velocity range of
-fromthecenterofHW2(seeFig.3). −13 km s−1 ≤ V ≤ −4 km s−1 (Brown et al. 1981; Güsten
+weights for the ortho states are twice as large as those for the
+parastates(e.g.,Umemotoetal.1999;Goddietal.2011;Henkel
+etal.2013).InCepA,thelineintensityratiosarefarhigherthan
+thisfactoroftwo.Thus,atleastinCepAthehighermainbeam
+brightness peak temperature of the (9,6) emission is caused by
+maser action, perhaps involving exponential amplification, and
+thecaseofG34.26+0.15islikelysimilar.
+4.3. ComparisonofNH
+3
+(9,6)maserswithpreviously
+published(quasi-)thermalNH
+3
+emission
+The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
+show thermal emission toward Cep A over a velocity range of
+−13 km s−1 ≤ V
 LSR
-G34.26+0.15isanHMSFRlocatedatadistanceof3.3kpc
+≤ −4 km s−1 (Brown et al. 1981; Güsten
 etal.1984;Torrellesetal.1985,1986,1993,1999).Anaverage
-(Kuchar & Bania 1994). It hosts four radio continuum compo- NH columndensityof∼5×1015cm−2wasestimatedforaregion
+NH
 3
-nents named A, B, C, and D. Component C is a prototypical of3(cid:48)(cid:48)aroundHW2(Torrellesetal.1999).ThishighNH abun-
+columndensityof∼5×1015cm−2wasestimatedforaregion
+of3(cid:48)(cid:48)aroundHW2(Torrellesetal.1999).ThishighNH
 3
-cometaryUCHiiregioncontainingacompactheadandadiffuse
+abun-
 dance could provide a suitable environment for maser species.
-tailthatextendsfromeasttowest(e.g.,Reid&Ho1985;Garay Largelinewidths(∆V (cid:39)7.0kms−1)withV ∼ −10kms−1
-1/2 LSR
-etal.1986;Sewiloetal.2004;Sewiłoetal.2011).Components inboth(1,1)and(2,2)lineswerefoundtowardHW2(Torrelles
-A and B are HC Hii regions, located to the east of component et al. 1993). The velocity is similar to the cloud’s systemic lo-
-C.Anextendedring-likeHiiregion,calledcomponentD,islo- cal standard of rest (LSR) velocity of −11.2 km s−1, which
-cated southeast of components A-C. One of the three observed is based on CO (Narayanan & Walker 1996) and HCO+ ob-
-NH 3(9,6)emissionlinesources,M1,isclosetotheheadofcom- servations (Gómez et al. 1999). Our (9,6) maser is redshifted
-ponentC,whereasM2andM3originatefromanothercompact (−0.9 km s−1 ≤ V ≤2.9 km s−1) and shares positions with
+Largelinewidths(∆V
+1/2
+(cid:39)7.0kms−1)withV
+LSR
+∼ −10kms−1
+inboth(1,1)and(2,2)lineswerefoundtowardHW2(Torrelles
+et al. 1993). The velocity is similar to the cloud’s systemic lo-
+cal standard of rest (LSR) velocity of −11.2 km s−1, which
+is based on CO (Narayanan & Walker 1996) and HCO+ ob-
+servations (Gómez et al. 1999). Our (9,6) maser is redshifted
+(−0.9 km s−1 ≤ V
 LSR
-regioninthewestoftheHCHiicomponentA(seeFig.4). the outflowing gas seen in CO and HCO+ with similarly red-
+≤2.9 km s−1) and shares positions with
+the outflowing gas seen in CO and HCO+ with similarly red-
 shiftedvelocities.Therefore,wearguethatthe(9,6)masersare
 relatedtooutflowinggas.
-4.2. NH (9,6)emissionpossiblycausedbymaseraction
-3 In G34.26+0.15, a large NH column density,
+In G34.26+0.15, a large NH
 3
-As shown in Fig. 1, the NH (9,6) profiles in Cep A and 1018.5±0.2 cm−2, and a kinetic temperature of 225±75 K
+column density,
+1018.5±0.2 cm−2, and a kinetic temperature of 225±75 K
+were derived by Henkel et al. (1987) based on measurements
+of 15 NH
 3
-G34.26+0.15 are narrow (∆V ≤2.0 km s−1), much narrower were derived by Henkel et al. (1987) based on measurements
-1/2
-than the expected line widths ((cid:38)4 km s−1) of thermal lines ob- of 15 NH inversion transitions in the frequency range of
-3
-servedatasimilarangularresolution(e.g.,Torrellesetal.1985, 22.0–26.0 GHz. These did not include the (9,6) transition.
-1986,1993,1999;Henkeletal.1987;Comitoetal.2007;Mook- While these lines were measured with a beam size of about
-erjeaetal.2007;Wyrowskietal.2012;Beutheretal.2018).Ve- 40(cid:48)(cid:48), a comparison of the peak intensities of the optically thick
-locity shifts with respect to the systemic velocities of the two lines with the kinetic temperature reveals the size of the hot,
-sourcesarebothobserved,thatis,V ∼10kms−1 inCepAand ammonia-emitting core to be only ∼2.5(cid:48)(cid:48). All those measured
-V ∼4kms−1 inG34.26+0.15(seedetailsinSect.4.3).Further- NH lines were quasi-thermal and had LSR velocities of
-3
-more, time variability is observed in the case of G34.26+0.15, ∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1
-whichisalsoacharacteristicfeatureofmaseremission. obtained from C17O observations (Wyrowski et al. 2012).
-Additionalevidenceoftheirmasernatureisthehighbright- Their line widths (∆V ≥3.6 km s−1) are larger than what
+inversion transitions in the frequency range of
+22.0–26.0 GHz. These did not include the (9,6) transition.
+While these lines were measured with a beam size of about
+40(cid:48)(cid:48), a comparison of the peak intensities of the optically thick
+lines with the kinetic temperature reveals the size of the hot,
+ammonia-emitting core to be only ∼2.5(cid:48)(cid:48). All those measured
+NH
+3
+lines were quasi-thermal and had LSR velocities of
+∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1
+obtained from C17O observations (Wyrowski et al. 2012).
+Their line widths (∆V
 1/2
-nesstemperaturesofthe(9,6)emissionspotstowardCepAand we find (0.35 km s−1 ≤ ∆V ≤ 0.94 km s−1) for each (9,6)
+≥3.6 km s−1) are larger than what
+we find (0.35 km s−1 ≤ ∆V
 1/2
-G34.26+0.15. The spectral parameters are listed in Table A.3. maser component (see details in Table A.3). In all, we may
-Because at least a significant part of the NH (9,6) emission have observed four different (9,6) velocity features. Three
-3
-is not resolved by our JVLA observations, the derived bright- are blueshifted at V ∼ 53.8 km s−1, 55.8 km s−1, and
+≤ 0.94 km s−1) for each (9,6)
+maser component (see details in Table A.3). In all, we may
+have observed four different (9,6) velocity features. Three
+are blueshifted at V
 LSR
-nesstemperaturesareonlylowerlimits.Nevertheless,thelower 56.8kms−1,andafourth,tentativelydetected,at62.5 kms−1.
-limits on the brightness temperature are >800 K in Cep A (see This tentative redshifted feature was only potentially detected
-Table A.3), which is much higher than the expected thermal with Effelsberg in 2020 January. The velocity is similar to that
-gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito of the JVLA measurements on the NH (1,1) absorption line
+∼ 53.8 km s−1, 55.8 km s−1, and
+56.8kms−1,andafourth,tentativelydetected,at62.5 kms−1.
+This tentative redshifted feature was only potentially detected
+with Effelsberg in 2020 January. The velocity is similar to that
+of the JVLA measurements on the NH
 3
-et al. 2007; Beuther et al. 2018). This strongly suggests that againstcontinuumsourceC(∼ 7(cid:48)(cid:48) resolution;Ketoetal.1987)
+(1,1) absorption line
+againstcontinuumsourceC(∼ 7(cid:48)(cid:48) resolution;Ketoetal.1987)
 Articlenumber,page5of10
 A&Aproofs:manuscriptno.mainArxiv
-andtheNH (3,3)emissionsurroundingcontinuumsourceBas etal.2013).BothCepAandG34.26+0.15havesimilarkinetic
+andtheNH
+3
+(3,3)emissionsurroundingcontinuumsourceBas
+wellastheheadofC(1(cid:48)(cid:48) .4×1(cid:48)(cid:48) .2resolution;Heatonetal.1989).
+However, we did not find this redshifted component in our
+JVLAobservations.Therefore,itspositionwithinG34.26+0.15
+cannot be determined. The blueshifted (9,6) masers with a
+velocity range of 53.8–56.8 km s−1 (M1, M2, and M3) show
+velocities compatible with those of the NH
+3
+(3,3) emission at
+the proper positions (Heaton et al. 1989), which might be a
+suitableenvironmentformaserspecies.
+4.4. ComparisonofNH 3 (9,6)maserswithothermaserlines
+To characterize the environment of NH
+3
+(9,6) masers, we can
+compare their positions with respect to those of other maser
+species (i.e., OH, H
+2
+O, and CH
+3
+OH). Toward Cep A HW2,
+manyCH
+3
+OH(e.g.,Menten1991;Sugiyamaetal.2008;Sanna
+et al. 2017) and H
+2
+O maser spots (e.g., Torrelles et al. 1998,
+2011;Sobolevetal.2018)aredetectedandareassociatedwith
+its disk. Sobolev et al. (2018) also found that most of the H
+2
+O
+maserfluxisassociatedwiththecompactHiiregionHW3d.OH
+maser features close to the Hii regions are also seen in HW2
+(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These
+three kinds of masers in Cep A have a large velocity range of
+−25 km s−1 ≤ V
+LSR
+≤ −2 km s−1 and are widespread around
+HW2 and HW3, while NH
+3
+(9,6) emission is only detected at
+−0.9 km s−1 ≤ V
+LSR
+≤2.9 km s−1 toward a sub-arcsecond-
+sizedregiontothewestofthepeakcontinuumpositionofHW2
+(see Fig. 3). This suggests that the NH
+3
+(9,6) maser in Cep A
+isuniqueandnotrelatedtomaserspotsseeninothermolecular
+species.
+In G34.26+0.15, OH (Zheng et al. 2000), H
+2
+O (Imai et al.
+2011),andCH 3 OH(Bartkiewiczetal.2016)masershavebeen
+detected east of source C (Fig. 4), and none of them coincides
+with the head of C. The NH
+3
+(9,6) maser M1 is also found
+slightly off the head of source C. This could suggest that M1
+ispoweredbycontinuumsourceCorbyanoutflow.Nearcom-
+ponent B, there are some OH and CH
+3
+OH masers but no H
+2
+O
+or NH 3 masers. A group of H 2 O masers, well-known tracers
+of outflows, with a large velocity distribution of 43 km s−1 ≤
+V
+LSR
+≤54 km s−1, was found to the west of the centimeter-
+continuum source A and close to the peak of the millimeter-
+continuumemission(seedetailsinourFig.A.2andalsoinFig.5
+ofImaietal.2011).TheclosenessofNH
 3
-wellastheheadofC(1(cid:48).(cid:48)4×1(cid:48).(cid:48)2resolution;Heatonetal.1989). temperatures of (cid:38)200 K (Henkel et al. 1987; Patel et al. 2005;
-However, we did not find this redshifted component in our Comito et al. 2007; Beuther et al. 2018). This suggests that
-JVLAobservations.Therefore,itspositionwithinG34.26+0.15 highkinetictemperaturesareneededtoexciteNH (9,6)masers.
+(9,6)maserspotsM2
+andM3tothisgroupofwatermasersandtheirsimilarvelocities
+again suggest an association of NH
 3
-cannot be determined. The blueshifted (9,6) masers with a However,itshouldbenotedthatthesilicatedustabsorptionfea-
-velocity range of 53.8–56.8 km s−1 (M1, M2, and M3) show turemightdominateat10µm(seethespectralenergydistribu-
-velocities compatible with those of the NH (3,3) emission at tion of Cep A in De Buizer et al. 2017). Additionally, there is
+(9,6) masers with outflow
+activity.
+4.5. Constraintsonpumpingscenarios
+Our observations have resulted in the detection of NH 3 (9,6)
+masers in Cep A and G34.26+0.15. The new detections could
+provide additional constraints on the maser line’s pumping
+mechanism. As mentioned in Sect. 1, the pumping mechanism
+of the (9,6) maser is unclear (Madden et al. 1986; Brown &
+Cragg1991).Previousstudieshavesuggestedthattherearethree
+main pumping scenarios to explain the observed NH
 3
-the proper positions (Heaton et al. 1989), which might be a nobrightinfraredemissionaroundthetwo(9,6)masers,M2and
-suitableenvironmentformaserspecies. M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer
+maser
+lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra-
+diationfromthedustcontinuumemission,(2)lineoverlap,and
+(3)collisionalpumping.
+For the first mechanism, infrared photons near 10 µm are
+needed for vibrational excitation. The high dust temperature
+(∼300 K) of W51-IRS2 can provide substantial infrared pho-
+tons near 10 µm, which is used for radiative pumping (Henkel
+etal.2013).BothCepAandG34.26+0.15havesimilarkinetic
+temperatures of (cid:38)200 K (Henkel et al. 1987; Patel et al. 2005;
+Comito et al. 2007; Beuther et al. 2018). This suggests that
+highkinetictemperaturesareneededtoexciteNH
+3
+(9,6)masers.
+However,itshouldbenotedthatthesilicatedustabsorptionfea-
+turemightdominateat10µm(seethespectralenergydistribu-
+tion of Cep A in De Buizer et al. 2017). Additionally, there is
+nobrightinfraredemissionaroundthetwo(9,6)masers,M2and
+M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer
 etal.2003fora10.5µmmap).Thisindicatesthatthepumping
 mechanism via infrared photons near 10 µm may not be viable
-4.4. ComparisonofNH 3 (9,6)maserswithothermaserlines toexplainthe(9,6)masersinCepAandG34.26+0.15.Further-
+toexplainthe(9,6)masersinCepAandG34.26+0.15.Further-
 more,Wilson&Schilke(1993)arguedthatradiativepumpingby
-To characterize the environment of NH (9,6) masers, we can
-3 dustemissiontendstoexcitemultipleadjacentammoniamaser
-compare their positions with respect to those of other maser
+dustemissiontendstoexcitemultipleadjacentammoniamaser
 transitions,whichappearstocontradictourfailuretodetectthe
-species (i.e., OH, H O, and CH OH). Toward Cep A HW2,
-2 3 adjacent(8,5)and(10,7)lines(withrespecttoquantumnumbers
-manyCH OH(e.g.,Menten1991;Sugiyamaetal.2008;Sanna
-3 andfrequency)andtoonlymeasurethe(9,6)transitionsinCep
-et al. 2017) and H O maser spots (e.g., Torrelles et al. 1998,
-2 A and G34.26+0.15. Therefore, we suggest that infrared radia-
-2011;Sobolevetal.2018)aredetectedandareassociatedwith
+adjacent(8,5)and(10,7)lines(withrespecttoquantumnumbers
+andfrequency)andtoonlymeasurethe(9,6)transitionsinCep
+A and G34.26+0.15. Therefore, we suggest that infrared radia-
 tionfromdustisnotthemainpumpingsource.
-its disk. Sobolev et al. (2018) also found that most of the H O
-maserfluxisassociatedwiththecompactHiiregionHW3d.O2 Madden et al. (1986) suggested that there might be some
-H
-maser features close to the Hii regions are also seen in HW2 line overlaps between the rotational NH 3 transitions in the far-
+Madden et al. (1986) suggested that there might be some
+line overlaps between the rotational NH 3 transitions in the far-
 infraredband.However,thiswouldbeunlikelytoaffectonlythe
-(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These
 (9,6) line. Nevertheless, far-infrared spectral observations will
-three kinds of masers in Cep A have a large velocity range of
 beneededtoclarifythisscenario.
-−25 km s−1 ≤ V ≤ −2 km s−1 and are widespread around
-LSR Based on our observations, the (9,6) maser spots are close
-HW2 and HW3, while NH (9,6) emission is only detected at
-3 to, but not coincident with, the peaks of the radio continuum
-−0.9 km s−1 ≤ V ≤2.9 km s−1 toward a sub-arcsecond-
-LSR emission in Cep A and G34.26+0.15. Furthermore, the (9,6)
-sizedregiontothewestofthepeakcontinuumpositionofHW2
+Based on our observations, the (9,6) maser spots are close
+to, but not coincident with, the peaks of the radio continuum
+emission in Cep A and G34.26+0.15. Furthermore, the (9,6)
 masers show velocity offsets with respect to their systemic ve-
-(see Fig. 3). This suggests that the NH (9,6) maser in Cep A
-3 locities. This indicates that the (9,6) masers are located at the
-isuniqueandnotrelatedtomaserspotsseeninothermolecular
-base of outflows, similar to the H O masers. This is supported
-species. 2
+locities. This indicates that the (9,6) masers are located at the
+base of outflows, similar to the H
+2
+O masers. This is supported
 by VLBI observations that show that (9,6) masers tend to be
-In G34.26+0.15, OH (Zheng et al. 2000), H O (Imai et al.
-2 closelyassociatedwithH Omasers(Pratapetal.1991).Theob-
-2011),andCH 3OH(Bartkiewiczetal.2016)masershavebeen servedtimevariabilityin2 G34.26+0.15andW51-IRS2canalso
-detected east of source C (Fig. 4), and none of them coincides
+closelyassociatedwithH
+2
+Omasers(Pratapetal.1991).Theob-
+servedtimevariabilityinG34.26+0.15andW51-IRS2canalso
 beattributedtoepisodicmolecularoutflows.Thisindicatesthat
-with the head of C. The NH (9,6) maser M1 is also found
-3 collisional pumping could be the driver of the (9,6) maser. On
-slightly off the head of source C. This could suggest that M1
+collisional pumping could be the driver of the (9,6) maser. On
 the other hand, collisional pumping has been successfully used
-ispoweredbycontinuumsourceCorbyanoutflow.Nearcom-
-toexplaintheNH (3,3)maser(Walmsley&Ungerechts1983;
-ponent B, there are some OH and CH OH masers but no H O 3
-3 2 Floweretal.1990;Mangum&Wootten1994).Collisionstendto
-or NH 3 masers. A group of H 2O masers, well-known tracers pumpfromtheK=0leveltotheK=3levelwithparitychanges,
-of outflows, with a large velocity distribution of 43 km s−1 ≤
+toexplaintheNH
+3
+(3,3)maser(Walmsley&Ungerechts1983;
+Floweretal.1990;Mangum&Wootten1994).Collisionstendto
+pumpfromtheK=0leveltotheK=3levelwithparitychanges,
 thatis,theupperlevelofthe(3,3)metastabletransitionwillbe
-V ≤54 km s−1, was found to the west of the centimeter-
-LSR overpopulated.NH (9,6)arisesfromtheorthospecies,soasim-
-continuum source A and close to the peak of the millimeter- 3
+overpopulated.NH
+3
+(9,6)arisesfromtheorthospecies,soasim-
 ilarmechanismmightalsooccurinthecaseofthe(9,6)transi-
-continuumemission(seedetailsinourFig.A.2andalsoinFig.5
 tion.Furthermeasurementsofcollisionalratesofammoniawill
-ofImaietal.2011).TheclosenessofNH (9,6)maserspotsM2
-3 allowustotestthisscenario.
-andM3tothisgroupofwatermasersandtheirsimilarvelocities
-again suggest an association of NH (9,6) masers with outflow
-3
-activity. 5. Summary
-We report the discovery of NH (9,6) masers in two HMSFRs,
+allowustotestthisscenario.
+5. Summary
+We report the discovery of NH
 3
-4.5. Constraintsonpumpingscenarios Cep A and G34.26+0.15. The narrow line width of the emis-
-sionfeatures(∆V ≤2.0kms−1)andtheirhighbrightnesstem-
+(9,6) masers in two HMSFRs,
+Cep A and G34.26+0.15. The narrow line width of the emis-
+sionfeatures(∆V
 1/2
-Our observations have resulted in the detection of NH 3 (9,6) peratures (> 400 K) indicate the maser nature of the lines.
-masers in Cep A and G34.26+0.15. The new detections could The intensity of the (9,6) maser in G34.26+0.15 is decreasing
-provide additional constraints on the maser line’s pumping withtime, whiletoward CepA themaser isstable basedon 20
-mechanism. As mentioned in Sect. 1, the pumping mechanism months of monitoring at Effelsberg. Linearly interpolating the
-of the (9,6) maser is unclear (Madden et al. 1986; Brown & integratedintensitiesobtainedatEffelsbergasafunctionoftime,
-Cragg1991).Previousstudieshavesuggestedthattherearethree theJVLAmeasurementsshowthatthereisnomissingfluxden-
-main pumping scenarios to explain the observed NH maser sityonscalesontheorderof1.2arcsec(4×10−3and2×10−2pc)
-3
-lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra- to the total single-dish flux. The JVLA-detected emission in-
-diationfromthedustcontinuumemission,(2)lineoverlap,and dicates that the NH (9,6) maser in Cep A originates from a
-3
-(3)collisionalpumping. sub-arcsecond-sized region slightly (0(cid:48).(cid:48)28 ± 0(cid:48).(cid:48)10) to the west
-For the first mechanism, infrared photons near 10 µm are of the peak position of the 1.36cm continuum object, HW2. In
-needed for vibrational excitation. The high dust temperature G34.26+0.15,threeNH (9,6)maserspotsareobserved:oneis
-3
-(∼300 K) of W51-IRS2 can provide substantial infrared pho- closetotheheadofthecometaryUCHiiregionC,andtheother
-tons near 10 µm, which is used for radiative pumping (Henkel twoareemittedfromacompactregiontothewestoftheHCHii
+≤2.0kms−1)andtheirhighbrightnesstem-
+peratures (> 400 K) indicate the maser nature of the lines.
+The intensity of the (9,6) maser in G34.26+0.15 is decreasing
+withtime, whiletoward CepA themaser isstable basedon 20
+months of monitoring at Effelsberg. Linearly interpolating the
+integratedintensitiesobtainedatEffelsbergasafunctionoftime,
+theJVLAmeasurementsshowthatthereisnomissingfluxden-
+sityonscalesontheorderof1.2arcsec(4×10−3and2×10−2pc)
+to the total single-dish flux. The JVLA-detected emission in-
+dicates that the NH
+3
+(9,6) maser in Cep A originates from a
+sub-arcsecond-sized region slightly (0(cid:48)(cid:48) .28 ± 0(cid:48)(cid:48) .10) to the west
+of the peak position of the 1.36cm continuum object, HW2. In
+G34.26+0.15,threeNH
+3
+(9,6)maserspotsareobserved:oneis
+closetotheheadofthecometaryUCHiiregionC,andtheother
+twoareemittedfromacompactregiontothewestoftheHCHii
 Articlenumber,page6of10
 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions
-regionA.Wesuggestthatthe(9,6)masersmaybeconnectedto Mauersberger,R.,Wilson,T.L.,&Henkel,C.1988,A&A,201,123
-outflowinggas.HigherangularresolutionJVLAandVLBIob- McEwen,B.C.,Pihlström,Y.M.,&Sjouwerman,L.O.2016,ApJ,826,189
-servations are planned to provide more accurate positions and McMullin,J.P.,Waters,B.,Schiebel,D.,Young,W.,&Golap,K.2007,inAs-
-tronomicalSocietyofthePacificConferenceSeries,Vol.376,Astronomical
+regionA.Wesuggestthatthe(9,6)masersmaybeconnectedto
+outflowinggas.HigherangularresolutionJVLAandVLBIob-
+servations are planned to provide more accurate positions and
 constraintsonpumpingscenarios.
-DataAnalysisSoftwareandSystemsXVI,ed.R.A.Shaw,F.Hill,&D.J.
-Acknowledgements. Wewouldliketothanktheanonymousrefereefortheuse- Bell,127
-fulcommentsthatimprovethemanuscript.Y.T.Y.isamemberoftheInterna- Mei,Y.,Chen,X.,Shen,Z.-Q.,&Li,B.2020,ApJ,898,157
-tionalMaxPlanckResearchSchool(IMPRS)forAstronomyandAstrophysics Menten,K.M.1991,ApJ,380,L75
-attheUniversitiesofBonnandCologne.Y.T.Y.wouldliketothanktheChina Mills,E.A.C.,Ginsburg,A.,Clements,A.R.,etal.2018,ApJ,869,L14
-ScholarshipCouncil(CSC)foritssupport.Wewouldliketothankthestaffat Mookerjea,B.,Casper,E.,Mundy,L.G.,&Looney,L.W.2007,ApJ,659,447
-theEffelsbergfortheirhelpprovidedduringtheobservations.Wethankthestaff Moscadelli,L.,Reid,M.J.,Menten,K.M.,etal.2009,ApJ,693,406
-oftheJVLA,especiallyTonyPerreaultandEdwardStarr,fortheirassistance Narayanan,G.&Walker,C.K.1996,ApJ,466,844
-with the observations and data reduction. This research has made use of the Ott,M.,Witzel,A.,Quirrenbach,A.,etal.1994,A&A,284,331
-NASA/IPACInfraredScienceArchive,whichisfundedbytheNationalAero- Patel,N.A.,Curiel,S.,Sridharan,T.K.,etal.2005,Nature,437,109
-nautics and Space Administration and operated by the California Institute of Perley,R.A.&Butler,B.J.2013,ApJS,204,19
-Technology. Pety,J.2005,inSF2A-2005:Semainedel’AstrophysiqueFrancaise,ed.F.Ca-
-soli,T.Contini,J.M.Hameury,&L.Pagani,721
-Poynter,R.L.&Kakar,R.K.1975,ApJS,29,87
-Pratap,P.,Menten,K.M.,Reid,M.J.,Moran,J.M.,&Walmsley,C.M.1991,
-References ApJ,373,L13
-Reid,M.J.&Ho,P.T.P.1985,ApJ,288,L17
-Bartkiewicz,A.,Szymczak,M.,Cohen,R.J.,&Richards,A.M.S.2005,MN- Rodriguez,L.F.,Ho,P.T.P.,&Moran,J.M.1980,ApJ,240,L149
-RAS,361,623 Sanna,A.,Moscadelli,L.,Surcis,G.,etal.2017,A&A,603,A94
-Bartkiewicz,A.,Szymczak,M.,&vanLangevelde,H.J.2016,A&A,587,A104 Sewilo,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2004,ApJ,
-Benjamin,R.A.,Churchwell,E.,Babler,B.L.,etal.2003,PASP,115,953 605,285
-Beuther,H.,Mottram,J.C.,Ahmadi,A.,etal.2018,A&A,617,A100 Sewiło,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2011,ApJS,
-Beuther,H.,Walsh,A.J.,Thorwirth,S.,etal.2007,A&A,466,989 194,44
-Brogan,C.L.,Hunter,T.R.,Cyganowski,C.J.,etal.2011,ApJ,739,L16 Sobolev,A.M.,Moran,J.M.,Gray,M.D.,etal.2018,ApJ,856,60
-Brown,A.T.,Little,L.T.,MacDonald,G.H.,Riley,P.W.,&Matheson,D.N. Sugiyama,K.,Fujisawa,K.,Doi,A.,etal.2008,PASJ,60,1001
-1981,MNRAS,195,607 Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1998,ApJ,509,262
-Brown,R.D.&Cragg,D.M.1991,ApJ,378,445 Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1999,MNRAS,307,58
-Carrasco-González,C.,Sanna,A.,Rodríguez-Kamenetzky,A.,etal.2021,ApJ, Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1985,ApJ,288,595
-914,L1 Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1986,ApJ,305,721
-Cesaroni,R.,Walmsley,C.M.,&Churchwell,E.1992,A&A,256,618 Torrelles,J.M.,Patel,N.A.,Curiel,S.,etal.2011,MNRAS,410,627
-Cheung,A.C.,Rank,D.M.,Townes,C.H.,Thornton,D.D.,&Welch,W.J. Torrelles,J.M.,Verdes-Montenegro,L.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,
-1968,Phys.Rev.Lett.,21,1701 J.1993,ApJ,410,202
-Churchwell,E.,Babler,B.L.,Meade,M.R.,etal.2009,PASP,121,213 Towner,A.P.M.,Brogan,C.L.,Hunter,T.R.,&Cyganowski,C.J.2021,ApJ,
-Cohen,R.J.&Brebner,G.C.1985,MNRAS,216,51P 923,263
-Comito,C.,Schilke,P.,Endesfelder,U.,Jiménez-Serra,I.,&Martín-Pintado,J. Umemoto,T.,Mikami,H.,Yamamoto,S.,&Hirano,N.1999,ApJ,525,L105
-2007,A&A,469,207 Urquhart,J.S.,Morgan,L.K.,Figura,C.C.,etal.2011,MNRAS,418,1689
-Curiel,S.,Ho,P.T.P.,Patel,N.A.,etal.2006,ApJ,638,878 Walmsley,C.M.&Ungerechts,H.1983,A&A,122,164
-Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988, Walsh,A.J.,Breen,S.L.,Britton,T.,etal.2011,MNRAS,416,1764
-MNRAS,235,229 Walsh,A.J.,Longmore,S.N.,Thorwirth,S.,Urquhart,J.S.,&Purcell,C.R.
-DeBuizer,J.M.,Liu,M.,Tan,J.C.,etal.2017,ApJ,843,33 2007,MNRAS,382,L35
-DeBuizer,J.M.,Radomski,J.T.,Telesco,C.M.,&Piña,R.K.2003,ApJ,598, Wang,K.,Zhang,Q.,Wu,Y.,Li,H.-b.,&Zhang,H.2012,ApJ,745,L30
-1127 Wilson,T.L.,Batrla,W.,&Pauls,T.A.1982,A&A,110,L20
-Dzib,S.,Loinard,L.,Rodríguez,L.F.,Mioduszewski,A.J.,&Torres,R.M. Wilson,T.L.&Henkel,C.1988,A&A,206,L26
-2011,ApJ,733,71 Wilson,T.L.,Johnston,K.J.,&Henkel,C.1990,A&A,229,L1
+Acknowledgements. Wewouldliketothanktheanonymousrefereefortheuse-
+fulcommentsthatimprovethemanuscript.Y.T.Y.isamemberoftheInterna-
+tionalMaxPlanckResearchSchool(IMPRS)forAstronomyandAstrophysics
+attheUniversitiesofBonnandCologne.Y.T.Y.wouldliketothanktheChina
+ScholarshipCouncil(CSC)foritssupport.Wewouldliketothankthestaffat
+theEffelsbergfortheirhelpprovidedduringtheobservations.Wethankthestaff
+oftheJVLA,especiallyTonyPerreaultandEdwardStarr,fortheirassistance
+with the observations and data reduction. This research has made use of the
+NASA/IPACInfraredScienceArchive,whichisfundedbytheNationalAero-
+nautics and Space Administration and operated by the California Institute of
+Technology.
+References
+Bartkiewicz,A.,Szymczak,M.,Cohen,R.J.,&Richards,A.M.S.2005,MN-
+RAS,361,623
+Bartkiewicz,A.,Szymczak,M.,&vanLangevelde,H.J.2016,A&A,587,A104
+Benjamin,R.A.,Churchwell,E.,Babler,B.L.,etal.2003,PASP,115,953
+Beuther,H.,Mottram,J.C.,Ahmadi,A.,etal.2018,A&A,617,A100
+Beuther,H.,Walsh,A.J.,Thorwirth,S.,etal.2007,A&A,466,989
+Brogan,C.L.,Hunter,T.R.,Cyganowski,C.J.,etal.2011,ApJ,739,L16
+Brown,A.T.,Little,L.T.,MacDonald,G.H.,Riley,P.W.,&Matheson,D.N.
+1981,MNRAS,195,607
+Brown,R.D.&Cragg,D.M.1991,ApJ,378,445
+Carrasco-González,C.,Sanna,A.,Rodríguez-Kamenetzky,A.,etal.2021,ApJ,
+914,L1
+Cesaroni,R.,Walmsley,C.M.,&Churchwell,E.1992,A&A,256,618
+Cheung,A.C.,Rank,D.M.,Townes,C.H.,Thornton,D.D.,&Welch,W.J.
+1968,Phys.Rev.Lett.,21,1701
+Churchwell,E.,Babler,B.L.,Meade,M.R.,etal.2009,PASP,121,213
+Cohen,R.J.&Brebner,G.C.1985,MNRAS,216,51P
+Comito,C.,Schilke,P.,Endesfelder,U.,Jiménez-Serra,I.,&Martín-Pintado,J.
+2007,A&A,469,207
+Curiel,S.,Ho,P.T.P.,Patel,N.A.,etal.2006,ApJ,638,878
+Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988,
+MNRAS,235,229
+DeBuizer,J.M.,Liu,M.,Tan,J.C.,etal.2017,ApJ,843,33
+DeBuizer,J.M.,Radomski,J.T.,Telesco,C.M.,&Piña,R.K.2003,ApJ,598,
+1127
+Dzib,S.,Loinard,L.,Rodríguez,L.F.,Mioduszewski,A.J.,&Torres,R.M.
+2011,ApJ,733,71
 Flower,D.R.,Offer,A.,&Schilke,P.1990,MNRAS,244,4P
-Wilson,T.L.&Schilke,P.1993,inLectureNotesinPhysics,Astrophysical
-Galván-Madrid,R.,Keto,E.,Zhang,Q.,etal.2009,ApJ,706,1036 Masers,ed.A.W.Clegg&G.E.Nedoluha,Vol.412,123–126
-Garay,G.,Ramirez,S.,Rodriguez,L.F.,Curiel,S.,&Torrelles,J.M.1996,ApJ, Wyrowski,F.,Güsten,R.,Menten,K.M.,Wiesemeyer,H.,&Klein,B.2012,
-459,193 A&A,542,L15
-Garay,G.,Rodriguez,L.F.,&vanGorkom,J.H.1986,ApJ,309,553 Zhang,Q.&Ho,P.T.P.1995,ApJ,450,L63
-Gaume,R.A.,Wilson,T.L.,&Johnston,K.J.1996,ApJ,457,L47 Zhang,Q.,Hunter,T.R.,Sridharan,T.K.,&Cesaroni,R.1999,ApJ,527,L117
-Goddi,C.,Greenhill,L.J.,Humphreys,E.M.L.,Chandler,C.J.,&Matthews, Zheng,X.W.,Moran,J.M.,&Reid,M.J.2000,MNRAS,317,192
+Galván-Madrid,R.,Keto,E.,Zhang,Q.,etal.2009,ApJ,706,1036
+Garay,G.,Ramirez,S.,Rodriguez,L.F.,Curiel,S.,&Torrelles,J.M.1996,ApJ,
+459,193
+Garay,G.,Rodriguez,L.F.,&vanGorkom,J.H.1986,ApJ,309,553
+Gaume,R.A.,Wilson,T.L.,&Johnston,K.J.1996,ApJ,457,L47
+Goddi,C.,Greenhill,L.J.,Humphreys,E.M.L.,Chandler,C.J.,&Matthews,
 L.D.2011,ApJ,739,L13
 Goddi,C.,Henkel,C.,Zhang,Q.,Zapata,L.,&Wilson,T.L.2015,A&A,573,
 A109
@@ -557,20 +892,83 @@ Madden,S.C.,Irvine,W.M.,Matthews,H.E.,Brown,R.D.,&Godfrey,P.D.
 Mangum,J.G.&Wootten,A.1994,ApJ,428,L33
 Mauersberger,R.,Henkel,C.,&Wilson,T.L.1987,A&A,173,352
 Mauersberger,R.,Wilson,T.L.,&Henkel,C.1986,A&A,160,L13
+Mauersberger,R.,Wilson,T.L.,&Henkel,C.1988,A&A,201,123
+McEwen,B.C.,Pihlström,Y.M.,&Sjouwerman,L.O.2016,ApJ,826,189
+McMullin,J.P.,Waters,B.,Schiebel,D.,Young,W.,&Golap,K.2007,inAs-
+tronomicalSocietyofthePacificConferenceSeries,Vol.376,Astronomical
+DataAnalysisSoftwareandSystemsXVI,ed.R.A.Shaw,F.Hill,&D.J.
+Bell,127
+Mei,Y.,Chen,X.,Shen,Z.-Q.,&Li,B.2020,ApJ,898,157
+Menten,K.M.1991,ApJ,380,L75
+Mills,E.A.C.,Ginsburg,A.,Clements,A.R.,etal.2018,ApJ,869,L14
+Mookerjea,B.,Casper,E.,Mundy,L.G.,&Looney,L.W.2007,ApJ,659,447
+Moscadelli,L.,Reid,M.J.,Menten,K.M.,etal.2009,ApJ,693,406
+Narayanan,G.&Walker,C.K.1996,ApJ,466,844
+Ott,M.,Witzel,A.,Quirrenbach,A.,etal.1994,A&A,284,331
+Patel,N.A.,Curiel,S.,Sridharan,T.K.,etal.2005,Nature,437,109
+Perley,R.A.&Butler,B.J.2013,ApJS,204,19
+Pety,J.2005,inSF2A-2005:Semainedel’AstrophysiqueFrancaise,ed.F.Ca-
+soli,T.Contini,J.M.Hameury,&L.Pagani,721
+Poynter,R.L.&Kakar,R.K.1975,ApJS,29,87
+Pratap,P.,Menten,K.M.,Reid,M.J.,Moran,J.M.,&Walmsley,C.M.1991,
+ApJ,373,L13
+Reid,M.J.&Ho,P.T.P.1985,ApJ,288,L17
+Rodriguez,L.F.,Ho,P.T.P.,&Moran,J.M.1980,ApJ,240,L149
+Sanna,A.,Moscadelli,L.,Surcis,G.,etal.2017,A&A,603,A94
+Sewilo,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2004,ApJ,
+605,285
+Sewiło,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2011,ApJS,
+194,44
+Sobolev,A.M.,Moran,J.M.,Gray,M.D.,etal.2018,ApJ,856,60
+Sugiyama,K.,Fujisawa,K.,Doi,A.,etal.2008,PASJ,60,1001
+Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1998,ApJ,509,262
+Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1999,MNRAS,307,58
+Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1985,ApJ,288,595
+Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1986,ApJ,305,721
+Torrelles,J.M.,Patel,N.A.,Curiel,S.,etal.2011,MNRAS,410,627
+Torrelles,J.M.,Verdes-Montenegro,L.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,
+J.1993,ApJ,410,202
+Towner,A.P.M.,Brogan,C.L.,Hunter,T.R.,&Cyganowski,C.J.2021,ApJ,
+923,263
+Umemoto,T.,Mikami,H.,Yamamoto,S.,&Hirano,N.1999,ApJ,525,L105
+Urquhart,J.S.,Morgan,L.K.,Figura,C.C.,etal.2011,MNRAS,418,1689
+Walmsley,C.M.&Ungerechts,H.1983,A&A,122,164
+Walsh,A.J.,Breen,S.L.,Britton,T.,etal.2011,MNRAS,416,1764
+Walsh,A.J.,Longmore,S.N.,Thorwirth,S.,Urquhart,J.S.,&Purcell,C.R.
+2007,MNRAS,382,L35
+Wang,K.,Zhang,Q.,Wu,Y.,Li,H.-b.,&Zhang,H.2012,ApJ,745,L30
+Wilson,T.L.,Batrla,W.,&Pauls,T.A.1982,A&A,110,L20
+Wilson,T.L.&Henkel,C.1988,A&A,206,L26
+Wilson,T.L.,Johnston,K.J.,&Henkel,C.1990,A&A,229,L1
+Wilson,T.L.&Schilke,P.1993,inLectureNotesinPhysics,Astrophysical
+Masers,ed.A.W.Clegg&G.E.Nedoluha,Vol.412,123–126
+Wyrowski,F.,Güsten,R.,Menten,K.M.,Wiesemeyer,H.,&Klein,B.2012,
+A&A,542,L15
+Zhang,Q.&Ho,P.T.P.1995,ApJ,450,L63
+Zhang,Q.,Hunter,T.R.,Sridharan,T.K.,&Cesaroni,R.1999,ApJ,527,L117
+Zheng,X.W.,Moran,J.M.,&Reid,M.J.2000,MNRAS,317,192
 Articlenumber,page7of10
 A&Aproofs:manuscriptno.mainArxiv
 AppendixA:
-TableA.1.SummaryofNH (9,6)maserobservations.
+TableA.1.SummaryofNH
 3
+(9,6)maserobservations.
+Source Telescope Beam Epoch Channel S
+ν
+rms
 (cid:82)
-Source Telescope Beam Epoch Channel S rms S dv V ∆V
-ν ν LSR 1/2
+S
+ν
+dv V
+LSR
+∆V
+1/2
 size spacing
 (kms−1) (Jy) (mJy) (Jykms−1) (kms−1)
 CepA Effelsberg 49(cid:48)(cid:48) 2020,Jan.04 0.62 0.67 3.41 1.19±0.02 -1.11±0.02 1.67±0.04
 Effelsberg 49(cid:48)(cid:48) 2021,Feb.11 0.62 0.59 5.97 1.08±0.02 -0.74±0.02 1.70±0.04
 Effelsberg 49(cid:48)(cid:48) 2021,Feb.15 0.62 0.65 10.98 1.11±0.03 -0.75±0.02 1.60±0.05
-JVLAa 1(cid:48).(cid:48)47 × 0(cid:48).(cid:48)99 2021,Jul.13 0.13 1.13 144 0.89±0.09 -0.86±0.03 0.74±0.12
+JVLAa 1(cid:48)(cid:48) .47 × 0(cid:48)(cid:48) .99 2021,Jul.13 0.13 1.13 144 0.89±0.09 -0.86±0.03 0.74±0.12
 Effelsberg 49(cid:48)(cid:48) 2021,Aug.11 0.07 0.98 13.36 0.49±0.02 -0.90±0.01 0.47±0.01
 0.35 0.26±0.02 -0.28±0.02 0.69±0.05
 Effelsberg 49(cid:48)(cid:48) 2021,Aug.12 0.07 0.98 13.35 0.50±0.01 -0.89±0.07 0.48±0.07
@@ -581,7 +979,7 @@ Effelsberg 49(cid:48)(cid:48) 2021,Aug.12 0.07 0.98 13.35 0.50±0.01 -0.89±0.07
 G34.26+0.15 Effelsberg 49(cid:48)(cid:48) 2020,Jan.03 0.62 0.30 1.26 0.65±0.03 62.50±0.05 2.05±0.13
 Effelsberg 49(cid:48)(cid:48) 2021,Feb.11 0.62 0.24 2.42 0.40±0.02 55.76±0.04 1.60±0.12
 Effelsberg 49(cid:48)(cid:48) 2021,Feb.15 0.62 0.20 4.86 0.38±0.02 55.71±0.05 1.80±0.14
-JVLAb 1(cid:48).(cid:48)33 × 1(cid:48).(cid:48)06 2021,Jul.13 0.13 0.23 37.1 0.09±0.02 54.41±0.03 0.38±0.09
+JVLAb 1(cid:48)(cid:48) .33 × 1(cid:48)(cid:48) .06 2021,Jul.13 0.13 0.23 37.1 0.09±0.02 54.41±0.03 0.38±0.09
 0.22 0.22±0.02 55.82±0.05 0.95±0.12
 0.15 0.06±0.01 57.21±0.04 0.35±0.08
 Effelsberg 49(cid:48)(cid:48) 2021,Aug.11 0.07 0.08 13.92 0.06±0.007 54.10±0.05 0.68±0.12
@@ -589,10 +987,12 @@ Effelsberg 49(cid:48)(cid:48) 2021,Aug.11 0.07 0.08 13.92 0.06±0.007 54.10±0.0
 0.12 0.10±0.006 55.85±0.02 0.75±0.06
 Effelsberg 49(cid:48)(cid:48) 2021,Aug.12 0.07 0.16 27.40 0.09±0.008 55.83±0.02 0.56±0.05
 Notes.ThespectralparametersareobtainedfromGaussianfitting.(a)TheJVLAspectrumtowardCepAisextractedfromtheEffelsberg-beam-
-sizedregion(FWHM49(cid:48)(cid:48)).(b)ForG34.26+0.15,theJVLAbeamsamplestheNH (9,6)spectrumoveraregionofradius3(cid:48).(cid:48)5,whichcontainsall
+sizedregion(FWHM49(cid:48)(cid:48)).(b)ForG34.26+0.15,theJVLAbeamsamplestheNH
 3
-detectedNH (9,6)emissions.
+(9,6)spectrumoveraregionofradius3(cid:48)(cid:48) .5,whichcontainsall
+detectedNH
 3
+(9,6)emissions.
 TableA.2.1.36cmJVLAfluxdensitiesofindividualcontinuumsources.
 Source R.A. Dec. Size P.A. S
 ν
@@ -608,10 +1008,17 @@ B 185318.649±0.005 +011500.071±0.180 (2.31±0.49)×(0.85±0.21) 17.4 597±110
 C 185318.560±0.004 +011458.201±0.112 (2.03±0.30)×(1.34±0.20) 178.0 5070±660
 Articlenumber,page8of10
 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions
-TableA.3.NH (9,6)maserpositionsderivedfromtheJVLAobservations.
+TableA.3.NH
 3
-Source R.A. Dec. S T V ∆V
-ν MB LSR 1/2
+(9,6)maserpositionsderivedfromtheJVLAobservations.
+Source R.A. Dec. S
+ν
+T
+MB
+V
+LSR
+∆V
+1/2
 (h m s) (◦ (cid:48) (cid:48)(cid:48)) (mJybeam−1) (K) (kms−1)
 CepA M 225617.933±0.002 +620149.608±0.011 985.2 2464.8 -0.88±0.01 0.51±0.02
 343.2 829.5 -0.24±0.03 0.63±0.05
@@ -621,28 +1028,42 @@ M2 185318.696±0.002 +011455.807±0.034 48.4 122.4 53.77±0.05 0.35±0.08
 180.8 457.6 55.83±0.01 0.59±0.03
 M3 185318.667±0.005 +011455.348±0.066 78.1 197.2 54.22±0.04 0.94±0.08
 73.7 186.3 55.78±0.04 0.79±0.08
-Fig.A.1.CepheusA.Thegreyshadedareasmarkthe1.36cmJVLAcontinuummapofCepA.Thereferencepositionisα =22h56m17s.972,
+Fig.A.1.CepheusA.Thegreyshadedareasmarkthe1.36cmJVLAcontinuummapofCepA.Thereferencepositionisα
 J2000
-andδ =62◦01(cid:48)49(cid:48).(cid:48)587,thepeakpositionofthecontinuummap,ismarkedbyaredcross.Slightlytothewestofthecrossisthewhiteellipse
+=22h56m17s.972,
+andδ
 J2000
-denotingthepositionoftheNH (9,6)emissionwithapurplestaratitscenter.TheredcontoursshowtheNOrthernExtendedMillimeterArray
+=62◦01(cid:48)49(cid:48)(cid:48) .587,thepeakpositionofthecontinuummap,ismarkedbyaredcross.Slightlytothewestofthecrossisthewhiteellipse
+denotingthepositionoftheNH
 3
+(9,6)emissionwithapurplestaratitscenter.TheredcontoursshowtheNOrthernExtendedMillimeterArray
 (NOEMA)1.37mmcontinuum,takenfromBeutheretal.(2018).Contourlevelsare-5,5,10,20,40,80,100,150,and200×2.43mJybeam−1.
-OH(Bartkiewiczetal.2005),H O(Sobolevetal.2018),andCH OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,
-2 3
-respectively.Thecolorbarontheright-handsideindicatesthevelocityrange(V )ofmaserspots.
+OH(Bartkiewiczetal.2005),H
+2
+O(Sobolevetal.2018),andCH
+3
+OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,
+respectively.Thecolorbarontheright-handsideindicatesthevelocityrange(V
 LSR
+)ofmaserspots.
 Articlenumber,page9of10
 A&Aproofs:manuscriptno.mainArxiv
-Fig.A.2.1.36cmJVLAcontinuummapofG34.26+0.15presentedasgrayshadedareas.Thereferencepositionisα =18h53m18s.560,and
+Fig.A.2.1.36cmJVLAcontinuummapofG34.26+0.15presentedasgrayshadedareas.Thereferencepositionisα
+J2000
+=18h53m18s.560,and
+δ
 J2000
-δ =01◦14(cid:48)58(cid:48).(cid:48)201,thepeakposition,ismarkedbyaredcross.TheredellipsesshowthepositionsofNH (9,6)emissionwithstarsattheir
-J2000 3
+=01◦14(cid:48)58(cid:48)(cid:48) .201,thepeakposition,ismarkedbyaredcross.TheredellipsesshowthepositionsofNH
+3
+(9,6)emissionwithstarsattheir
 center(i.e.,M1,M2,andM3).ThebluecontoursshowtheBerkeley-Illinois-MarylandAssociation(BIMA)array2.8mmcontinuum,takenfrom
-Mookerjeaetal.(2007).Contourlevelsare-3,3,10,20,30,40,50,70,90,100,120,and140×20mJybeam−1.OH(Zhengetal.2000),H O(Imai
+Mookerjeaetal.(2007).Contourlevelsare-3,3,10,20,30,40,50,70,90,100,120,and140×20mJybeam−1.OH(Zhengetal.2000),H
 2
-etal.2011),andCH OH(Bartkiewiczetal.2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicates
+O(Imai
+etal.2011),andCH
 3
-thevelocityrange(V )ofmaserspots.
+OH(Bartkiewiczetal.2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicates
+thevelocityrange(V
 LSR
+)ofmaserspots.
 Articlenumber,page10of10
diff --git a/read/results/pdfplumber/2201.00022.txt b/read/results/pdfplumber/2201.00022.txt
index a77c95c..9878b03 100644
--- a/read/results/pdfplumber/2201.00022.txt
+++ b/read/results/pdfplumber/2201.00022.txt
@@ -1,246 +1,411 @@
-Draft version January 4, 2022
+Draft version July 7, 2022
 TypesetusingLATEXtwocolumnstyleinAASTeX631
 The Formation of Intermediate Mass Black Holes in Galactic Nuclei
 Sanaea C. Rose,1,2 Smadar Naoz,1,2 Re’em Sari,3 and Itai Linial3
 1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA
 2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA
 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel
-1202
 ABSTRACT
 Most stellar evolution models predict that black holes (BHs) should not exist above approximately
-50−70M . However,recentLIGO/VirgodetectionsindicatetheexistenceofBHswithmassesatand
-(cid:12) ceD
-abovethisthreshold. WesuggestthatmassiveBHs,includingintermediatemassblackholes(IMBHs),
-can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding
-main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax-
-13
-ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M . Our
-(cid:12)
-results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for-
-mation channel also has implications for observations. Collisions between stars and BHs can produce ]AG.hp-ortsa[
-electromagnetic signatures, for example, fromx-raybinaries and tidaldisruption events. Additionally,
-formed through this channel, both black holes in the mass gap and IMBHs can merge with the super-
-massiveblackholeatthecenterofagalacticnucleusthroughgravitationalwaves. Thesegravitational
-wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively).
-1. INTRODUCTION tic nuclei as well. Several studies propose that our
-own galactic center may host an IMBH in the inner pc
+50−70 M
+(cid:12)
+, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
+indicate the existence of BHs with masses at and above this threshold. We suggest that massive
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
+processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite
+efficient, forming IMBHs as massive as 104 M
+(cid:12)
+. This upper limit assumes that (1) the BHs accrete a
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
+new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar
+disruptionsandstar-starcollisions. Wediscussdeviationsfromthesekeyassumptionsinthetext. Our
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+Thesegravitationalwaveeventsareextremeandintermediatemassratioinspirals(EMRIsandIMRIs,
+respectively).
+1. INTRODUCTION
 The recently detected gravitational wave source
-(e.g.,Hansen&Milosavljevi´c2003;Maillardetal.2004;
 GW190521 (The LIGO Scientific Collaboration et al.
-Gu¨rkan&Rasio2005;Gualandris&Merritt2009;Chen
-2020a,b) produced an intermediate mass black hole of 1v22000.1022:viXra
-&Liu2013;Generozov&Madigan2020;Fragioneetal.
-approximately142M . Thiseventmayhavealsohada
+2020a,b) produced an intermediate mass black hole of
+approximately142M
 (cid:12)
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-85M progenitor,whichfallswithinthepair-instability
+. Thiseventmayhavealsohada
+85M
 (cid:12)
-Collaboration et al. 2020).
+progenitor,whichfallswithinthepair-instability
 mass gap that limits stellar black holes (BHs) to no
-more than < 50 M (e.g., Heger et al. 2003; Woosley SeveralIMBHformationchannelshavebeensuggested
-∼ (cid:12)
-2017)1. Similarly, the merger products of GW150914, in the literature. For example, IMBHs may have a cos-
-mological origin, forming in the early universe either
+more than
+∼
+< 50 M
+(cid:12)
+(e.g., Heger et al. 2003; Woosley
+2017)1. Similarly, the merger products of GW150914,
 GW170104, and GW170814 fall within the mass gap
-as a result of the very first stars (e.g., Madau & Rees
 (e.g., Abbott et al. 2016, 2017a,b). BH mergers that
-2001; Schneider et al. 2002; Johnson & Bromm 2007;
 form second generation BHs and, in some cases, inter-
-Valiante et al. 2016) or from direct collapse of accumu-
 mediate mass BHs (IMBHs), these gravitational wave
-lated gas (e.g., Begelman et al. 2006; Yue et al. 2014;
 (GW) events can occur in globular clusters, young stel-
-Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
+Correspondingauthor: SanaeaC.Rose
+srose@astro.ucla.edu
+1Note that the exact lower and upper limits may be sensitive to
+metallicityoftheprogenitor(e.g.,Woosley2017;Spera&Mapelli
+2017a;Limongi&Chieffi2018a;Saksteinetal.2020;Belczynski
+etal.2020a;Renzoetal.2020;Vinketal.2021).
 lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro-
-2016). These high redshift IMBHs would need to sur-
 driguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
-vive galaxy evolution and mergers to present day (e.g.,
 2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
-Rashkov&Madau2014),withsignificanteffectsontheir
 2021; Arca Sedda et al. 2021). However, IMBHs are
-stellarandevendarkmattersurroundings(e.g.,Bertone
 not limited to these locations and may reside in galac-
+tic nuclei as well. Several studies propose that our
+own galactic center may host an IMBH in the inner pc
+(e.g.,Hansen&Milosavljevi´c2003;Maillardetal.2004;
+G¨ urkan&Rasio2005;Gualandris&Merritt2009;Chen
+&Liu2013;Generozov&Madigan2020;Fragioneetal.
+2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
+Collaboration et al. 2020).
+SeveralIMBHformationchannelshavebeensuggested
+in the literature. For example, IMBHs may have a cos-
+mological origin, forming in the early universe either
+as a result of the very first stars (e.g., Madau & Rees
+2001; Schneider et al. 2002; Johnson & Bromm 2007;
+Valiante et al. 2016) or from direct collapse of accumu-
+lated gas (e.g., Begelman et al. 2006; Yue et al. 2014;
+Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
+2016). These high redshift IMBHs would need to sur-
+vive galaxy evolution and mergers to present day (e.g.,
+a
+r
+X
+i
+v
+:
+2
+2
+0
+1
+.
+0
+0
+0
+2
+2
+v
+2
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+G
+A
+]
+6
+J
+u
+l
+2
+0
+2
+2
+2 Rose et al.
+Rashkov&Madau2014),withsignificanteffectsontheir
+stellarandevendarkmattersurroundings(e.g.,Bertone
 etal.2009;Chen&Liu2013;Bringmannetal.2012;Eda
 etal.2013;Naoz&Silk2014;Naozetal.2019). Another
-Correspondingauthor: SanaeaC.Rose popular formation channel relies on the coalescence of
-srose@astro.ucla.edu many stellar-mass black holes. For example, IMBHs
-1Note that the exact lower and upper limits may be sensitive to may form in the centers of globular clusters, where few-
-metallicityoftheprogenitor(e.g.,Woosley2017;Spera&Mapelli bodyinteractionsleadtothemergerofstellar-massBHs
-2017a;Limongi&Chieffi2018a;Saksteinetal.2020;Belczynski (e.g., O’Leary et al. 2006; Gu¨rkan et al. 2006; Blecha
-etal.2020a;Renzoetal.2020;Vinketal.2021).
+popular formation channel relies on the coalescence of
+many stellar-mass black holes, which may seed objects
+asmassiveasSMBHs(e.g.,Kroupaetal.2020). IMBHs
+may form in the centers of globular clusters, where few-
+bodyinteractionsleadtothemergerofstellar-massBHs
+(e.g., O’Leary et al. 2006; G¨ urkan et al. 2006; Blecha
 et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
-2 Rose et al.
-driguezetal.2018;Rodriguezetal.2019;Fragioneetal. a statistical approach to estimate the stellar encounters
-2020b). Other formation mechanisms invoke successive and final IMBH masses.
-collisions and mergers of massive stars (e.g., Portegies
-Zwart & McMillan 2002; Portegies Zwart et al. 2004; 2.1. Physical Picture
-Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al.
+driguezetal.2018;Rodriguezetal.2019;Fragioneetal.
+2020b). Other formation mechanisms invoke successive
+collisions and mergers of massive stars (e.g., Ebisuzaki
+etal.2001;PortegiesZwart&McMillan2002;Portegies
+Zwartetal.2004;Freitagetal.2006;Sakuraietal.2017;
+Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al.
+2021; Das et al. 2021a,b; Escala 2021).
+The main obstacle to sequential BH mergers in clus-
+ters is that the merger recoil velocity kick often exceeds
+the escape velocity from the cluster (e.g., Schnittman
+& Buonanno 2007; Centrella et al. 2010; O’Leary et al.
+2006; Baibhav et al. 2020, Rom & Sari, in prep.). How-
+ever, nuclear star clusters at the centers of galaxies do
+notencounterthisproblem. Forexample,Fragioneetal.
+(2021) explore repeated BH-BH mergers in nuclear star
+clusters without a SMBH. They considered BH binary-
+single interactions, binary BH GW merger, and GW
+mergerrecoilkicks. Thepost-kickmergerproductsinks
+back towards the cluster center over a dynamical fric-
+tion timescale. Using this approach, they showed that
+103−104 M
+(cid:12)
+IMBHs can form efficiently over the life-
+time of a cluster.
+However, as discussed in Section 2.2, direct BH-star
+collisions are much more frequent than BH-BH collision
+in galactic nuclei, making the former a promising chan-
+nel for BH growth. In an N-body study of young star
+clusters, Rizzuto et al. (2022) find that BH-star colli-
+sions are a main contributor to the formation of BHs
+in the mass gap and IMBHs. In a similar vein, Stone
+et al. (2017) demonstrate that massive BHs can form
+from repeated tidal encounters between stars and BHs.
+Moregenerally, several studieshaveexplored therole of
+collisions in a GN, with implications for the stellar and
+red giant populations (e.g., Dale & Davies 2006; Dale
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+etal.2021). WeproposethatIMBHscanformnaturally
+within the central pc of a galactic center through re-
+peatedcollisionsbetweenBHsandmain sequence stars.
+During a collision, the BH can accrete some portion of
+the star’s mass. Over many collisions, it can grow ap-
+preciablyinsize. Wedemonstratethatthischannelcan
+createIMBHswithmassesaslargeas104 M
+(cid:12)
+,anupper
+limitthatdependsonthedensityprofileofthesurround-
+ing stars and the efficiency of the accretion.
+The paper is structured as follows: we describe rele-
+vant physical processes and our approach in Section 2.
+In particular, we provide an overview of collisions in
+Section 2.2 and present our statistical approach in Sec-
+tion 2.3. Section 2.4 discusses our treatment of the
+mass growth with each collision and presents analytic
+solutions to our equations in two different regimes, ef-
+ficient collisions and inefficient collisions We compare
+these solutions to our statistical results. Sections 2.6
+and 2.8 discuss implications for GW merger events be-
+tween IMBHs and the SMBH. We then incorporate re-
+laxation processes and discuss the subsequent results in
+Section2.9. Finally,wediscussandsummarizeourfind-
+ings in Section 3.
+2. METHODOLOGY
+We consider a population of stellar mass BHs embed-
+ded in a cluster of 1 M
+(cid:12)
+stars. When stars and BHs
+collide, the BHs can accrete mass. The growth rate de-
+pends on the physical processes outlined below. We use
+a statistical approach to estimate the stellar encounters
+and final IMBH masses.
+2.1. Physical Picture
 WeconsiderapopulationofBHswithintheinnerfew
-2021; Di Carlo et al. 2021).
 parsecsoftheSMBHinagalacticnucleus(GN).Weas-
-The main obstacle to sequential BH mergers in clus-
 sume that the BH mass distribution follows that of the
-ters is that the merger recoil velocity kick often exceeds
 stars from which they originate, a Kroupa initial mass
-the escape velocity from the cluster (e.g., Schnittman functiondN/dm∝m−2.35. Whilethischoicerepresents
-& Buonanno 2007; Centrella et al. 2010; O’Leary et al.
+functiondN/dm∝m−2.35. Whilethischoicerepresents
 a gross oversimplification, it has very little bearing on
-2006; Baibhav et al. 2020, Rom & Sari, in prep.). How-
 our final results. Future work may address the particu-
-ever, nuclear star clusters at the centers of galaxies do
 lars of the BH mass distribution, but we do not expect
-notencounterthisproblem. Forexample,Fragioneetal.
 that it will significantly alter the outcome. The upper
-(2021) explore repeated BH-BH mergers in nuclear star
 and lower limits of the BH mass distribution are 5 and
-clusters without a SMBH. They considered BH binary-
-50M , respectively. We select the upper limit to en-
+50M
 (cid:12)
-single interactions, binary BH GW merger, and GW
+, respectively. We select the upper limit to en-
 compass the range of upper bounds predicted by stellar
-mergerrecoilkicks. Thepost-kickmergerproductsinks
 evolution models, which vary between 40 and 125M
 (cid:12)
-back towards the cluster center over a dynamical fric-
 dependingonthemetallicity(Hegeretal.2003;Woosley
-tion timescale. Using this approach, they showed that
 2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b;
-103−104 M IMBHs can form efficiently over the life-
-(cid:12) Belczynski et al. 2020b; Renzo et al. 2020). We assume
-time of a cluster.
+Belczynski et al. 2020b; Renzo et al. 2020). We assume
 that the orbits of the BHs follow a thermal eccentricity
-However, as discussed in Section 2.2, direct star-BH
-distribution. We draw their semimajor axes, a , from a
+distribution. We draw their semimajor axes, a
 •
-collisions are much more frequent than BH-BH collision
+, from a
 uniform distribution in log distance, dN/d(logr) being
-in galactic nuclei, making the former a promising chan-
 constant. While this distribution is not necessarily rep-
-nel for BH growth. We propose that IMBHs can form
 resentative of actual conditions in the GN, we use it to
-naturally within the central pc of a SMBH in a galactic
 build a comprehensive physical picture of BH growth at
-center. Specifically,theseIMBHsformthroughrepeated
 all distances from the SMBH, including within 0.01 pc.
-collisions with main sequence stars, accreting some or
 Otherwise, the innermost region of the GN would be
-all of the star’s mass depending on the details of the
 poorly represented in our sample. We consider other
-collision. We demonstrate that this channel can create
-observationally motivated distributions in Section 2.8,
-IMBHs with masses as large as 104 M , depending on
-(cid:12) butreserveamoredetailedexaminationofthedistribu-
-the density profile of the surrounding stars.
+IMBH Formation in Galactic Nuclei 3
+Figure 1. We plot the relevant timescales, including col-
+lision (green), relaxation (gold), and BH-BH GW capture
+(purple),forasingleBHintheGNasafunctionofdistance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit. The timescales depend on the
+density, so we adopt a range of density profiles, bounded by
+α=1(dashedcurve)toα=2(dark,solidcurve). Thedark
+bluelinerepresentsthetimefora105 M (cid:12) BHtomergewith
+the SMBH through GW emission.
+observationally motivated distributions in Section 2.9,
+butreserveamoredetailedexaminationofthedistribu-
 tion’s impact for future work.
-The paper is structured as follows: we describe rele-
-vant physical processes and our approach in Section 2.
 2.2. Direct Collisions
-In particular, we provide an overview of collisions in
 BHsintheGNcanundergodirectcollisionswithother
-Section 2.2 and present our statistical approach in Sec-
-objects. The timescale for this process, t , can be es-
-tion 2.3. Section 2.4 discusses our treatment of the coll
-timated using a simple rate calculation: t−1 = nσA,
-mass growth with each collision and presents analytic coll
+objects. The timescale for this process, t coll , can be es-
+timated using a simple rate calculation: t−1
+coll
+= nσA,
 where n is the number density of objects, σ is the ve-
-solutions to our equations in two different regimes, ef-
 locitydispersion, andAisthecross-section. Weusethe
-ficient collisions and inefficient collisions We compare
 collision timescale from Rose et al. (2020):
-these solutions to our statistical results. Sections 2.5
-and 2.7 discuss implications for GW merger events be-
-t−1 =πn(a )σ(a )
-tween IMBHs and the SMBH. We then incorporate re- coll • •
-(cid:18) (cid:19)
-2G(m +m )
-laxation processes and discuss the subsequent results in × f (e )r2+f (e )r BH (cid:63) . (1)
-Section2.8. Finally,wediscussandsummarizeourfind- 1 • c 2 • c σ(a •)2
-ings in Section 3.
-where G is the gravitational constant and r is the sum
+t−1
+coll
+=πn(a
+•
+)σ(a
+•
+)
+×
+(cid:18)
+f
+1
+(e
+•
+)r2
+c
++f
+2
+(e
+•
+)r
+c
+2G(m
+BH
++m
+(cid:63)
+)
+σ(a
+•
+)2
+(cid:19)
+. (1)
+where G is the gravitational constant and r
 c
+is the sum
 of the radii of the interacting objects, a black hole with
-2. METHODOLOGY
-mass m and a star with mass m . Detailed in Rose
-BH (cid:63)
-We consider a population of stellar mass BHs embed- et al. (2020), f (e ) and f (e ) account for the effect of
-1 • 2 •
-ded in a cluster of 1 M stars. When stars and BHs the eccentricity of the BH’s orbit about the SMBH on
-(cid:12)
-collide, the BHs can accrete mass. The growth rate de- the collision rate, while n and σ are simply evaluated
-pends on the physical processes outlined below. We use at the semimajor axis of the orbit (see below). Note
-IMBH Formation in Galactic Nuclei 3
+mass m
+BH
+and a star with mass m
+(cid:63)
+. Detailed in Rose
+et al. (2020), f
+1
+(e
+•
+) and f
+2
+(e
+•
+) account for the effect of
+the eccentricity of the BH’s orbit about the SMBH on
+the collision rate, while n and σ are simply evaluated
+at the semimajor axis of the orbit (see below). Note
+thatthistimescaleequationincludestheeffectsofgrav-
+itational focusing, which enhances the cross-section of
+interaction.
+Assuming a circular orbit for simplicity, we plot the
+timescale for a BH orbiting in the GN to collide with
+a 1M
+(cid:12)
+star as a function of distance from the SMBH
+in Figure 1.2 As this timescale depends on the density
+of surrounding stars, we adopt a density profile of the
+form:
+ρ(r
+•
+)=ρ
+0
+(cid:18)
+r
+•
+r
+0
+(cid:19)−α
+, (2)
+wherer
+•
+denotesthedistancefromtheSMBH.Weadopt
+a SMBH mass of 4×106 M
+(cid:12)
+such that our fiducial GN
+matches our own galactic center (e.g., Ghez et al. 2005;
+Genzel et al. 2003). In this case, the normalization in
+Eq.(2)isρ
+0
+=1.35×106M
+(cid:12)
+/pc3 atr
+0
+=0.25pc(Gen-
+zel et al. 2010). Additionally, in Eq. (2), α gives the
+slope of the power law. We assume that a uniform pop-
+ulation of solar mass stars account for most of the mass
+in the GN, making the stellar number density:
+n(r
+•
+)= ρ(r • )
+1M
+(cid:12)
+. (3)
 The collision timescale also depends on the velocity dis-
 persion, which we express as:
+σ(r • )=
 (cid:115)
 GM
-σ(r )= • , (4)
-• r (1+α)
+• r
 •
-where α is the slope of the density profile and M de-
+(1+α) , (4)
+where α is the slope of the density profile and M
 •
+de-
 notes the mass of the SMBH (Alexander 1999; Alexan-
 der&Pfuhl2014). Asmentionedabove,Eq.(1)depends
-on the sum of the radii of the colliding objects, r . We
-c
-take r =1R because these interactions involve a BH
-c (cid:12)
+on the sum of the radii of the colliding objects, r c . We
+take r c =1R (cid:12) because these interactions involve a BH
 and a star, and the former has a much smaller physi-
 calcross-section. Forexample,theSchwarzschildradius
-of a 10M BH is only 30 km, or 4.31×10−5R . For
-(cid:12) (cid:12)
-Figure 1. We plot the relevant timescales, including col-
+of a 10M (cid:12) BH is only 30 km, or 4.31×10−5R (cid:12) . For
 this reason, direct collisions between compact objects
-lision (green), relaxation (gold), and BH-BH GW capture
 are very rare and not included in our model.
-(purple),forasingleBHintheGNasafunctionofdistance
 We note that direct collisions between BHs, via GW
-from the SMBH. For the collision timescale, we assume the
-BH is on a circular orbit. The timescales depend on the emission, wereshowntobeefficientinnuclearstarclus-
-density, so we adopt a range of density profiles, bounded by ters without SMBHs (e.g., Portegies Zwart & McMil-
-α=1(dashedcurve)toα=2(dark,solidcurve). Thedark lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016).
-bluelinerepresentsthetimefora105 M (cid:12) BHtomergewith However, in the GN, star-BH collisions are much more
-the SMBH through GW emission.
+emission, wereshowntobeefficientinnuclearstarclus-
+ters without SMBHs (e.g., Portegies Zwart & McMil-
+lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016).
+However, in the GN, star-BH collisions are much more
 frequent than direct BH-BH collisions. As depicted in
 Figure 1, the star-BH collision timescale for a range
-thatthistimescaleequationincludestheeffectsofgrav- of density profiles is many orders of magnitude shorter
-itational focusing, which enhances the cross-section of than the BH-BH GW collision timescale (for the rele-
-interaction. vant equations, see O’Leary et al. 2009; Gond´an et al.
-Assuming a circular orbit for simplicity, we plot the 2018, for example). Thus, we expect that star-BH col-
-timescale for a BH orbiting in the GN to collide with lisions will be the main driver of IMBH growth in the
-a 1M (cid:12) star as a function of distance from the SMBH GN.
-in Figure 1.2 As this timescale depends on the density
-of surrounding stars, we adopt a density profile of the 2.3. Statistical Approach to Collisions
-form: We simulate the mass growth of a population of BHs
+of density profiles is many orders of magnitude shorter
+than the BH-BH GW collision timescale (for the rele-
+vant equations, see O’Leary et al. 2009; Gond´an et al.
+2018, for example). Thus, we expect that star-BH col-
+lisions will be the main driver of IMBH growth in the
+GN.
+2We note that the eccentricity has a very minor effect on the
+collisiontimescale(Roseetal.2020).
+4 Rose et al.
+2.3. Statistical Approach to Collisions
+We simulate the mass growth of a population of BHs
 with initial conditions detailed in Section 2.1. Over an
-(cid:18) (cid:19)−α increment ∆t of 106 yr, we calculate the probability of
-r
-ρ(r )=ρ • , (2) a collision occurring, given by ∆t/t . This choice of
-• 0 r coll
-0 ∆t is motivated by our galactic center’s star formation
+increment ∆t of 106 yr, we calculate the probability of
+a collision occurring, given by ∆t/t
+coll
+. This choice of
+∆t is motivated by our galactic center’s star formation
 timescale (e.g., Lu et al. 2009), allowing for regular re-
-wherer denotesthedistancefromtheSMBH.Weadopt plenishmentofthestellarpopulationintheGN.Wehave
-•
-a SMBH mass of 4×106 M such that our fiducial GN checked that the results are not sensitive to this choice
-(cid:12)
-matches our own galactic center (e.g., Ghez et al. 2005; of∆t,omittedheretoavoidclutter. Wedrawanumber
-Genzel et al. 2003). In this case, the normalization in between 0 and 1 using a random number generator. If
-Eq.(2)isρ =1.35×106M /pc3 atr =0.25pc(Gen- that number is less than or equal to the probability, we
-0 (cid:12) 0
-zel et al. 2010). Additionally, in Eq. (2), α gives the increasetheBH’smassby∆m,themassthattheBHis
-slope of the power law. We assume that a uniform pop- expected to accrete in a single collision (see Section 2.4
-ulation of solar mass stars account for most of the mass for details). We recalculate the collision timescale using
-in the GN, making the stellar number density: the updated BH mass and repeat this process until the
+plenishmentofthestellarpopulationintheGN.Wehave
+checked that the results are not sensitive to this choice
+of∆t,omittedheretoavoidclutter. Wedrawanumber
+between 0 and 1 using a random number generator. If
+that number is less than or equal to the probability, we
+increasetheBH’smassby∆m,themassthattheBHis
+expected to accrete in a single collision (see Section 2.4
+for details). We recalculate the collision timescale using
+the updated BH mass and repeat this process until the
 time elapsed equals the simulation time of 10 Gyr3.
-ρ(r )
-n(r )= • . (3)
-• 1M 3Closer to the SMBH, ∆t may exceed the collision timescale by
-(cid:12)
-a factor of a few for steep density profiles. We include a safe-
-guard in our code which takes the ratio t /∆t and rounds it
-coll
-2We note that the eccentricity has a very minor effect on the tothenearestinteger. Wetakethisintegertobethenumberof
-collisiontimescale(Roseetal.2020). collisionsandincreasetheBHmassaccordingly.
-4 Rose et al.
 2.4. Mass Growth
 When a BH collides with a star, it may accrete ma-
 terial and grow in mass. The details of the accretion
@@ -250,453 +415,1073 @@ two objects experience a head on collision, with the BH
 passing through the star’s center. We begin by con-
 sidering the escape velocity from the BH at the star’s
 outermost point, its surface, which corresponds to the
-maximum impact parameter 1 R . Qualitatively, one
-(cid:12)
-might expect that the BH could accrete the entire star
-(i.e.,∆m∼1M )iftherelativevelocityissmallerthan
-(cid:12)
+maximum impact parameter 1 R (cid:12) . Qualitatively, one
+might expect that the BH could capture the entire star
+(i.e.,∆m∼1M (cid:12) )iftherelativevelocityissmallerthan
 theescapevelocityfromtheBHatthispoint. However,
-Figure 2. Weconsideranexamplethathighlightsthemass
 in the vicinity of the SMBH, the dispersion velocity of
-growthasafunctionofdistancefromtheSMBH.Greydots
 the stars may be much larger than the escape velocity
-represent the initial masses and distances from the SMBH
 from the BH at the star’s surface. In this case, the BH
-oftheBHsinvolvedinthesimulation. Forsimplicity,weset
-accretes a “tunnel” of material through the star. This
-theinitalmassequalto10M foralloftheBHs. Assuming
+captures a “tunnel” of material through the star. This
+tunnel has radius equal to the Bondi radius and length
+approximately 1R
 (cid:12)
-tunnel has radius equal to the Bondi radius and length thedensityprofileofstarshasα=1,weconsidertwocases:
-approximately 1R . BHsaccreteallofthestar’smassduringacollision(red)and
+. For the purposes of this study, we
+assume that the BH accretes all of the material that
+it captures. The details of the accretion are uncertain,
+however, and it may be much less efficient than our re-
+sults imply. We discuss accretion in Section 2.5.
+To estimate ∆m, we begin with the Bondi-Hoyle ac-
+cretion rate, ˙ m, given by:
+˙ m=
+4πG2m2
+BH
+ρ
+star
+(c2
+s
++σ2)3/2
+, (5)
+3Closer to the SMBH, ∆t may exceed the collision timescale by
+a factor of a few for steep density profiles. We include a safe-
+guard in our code which takes the ratio t
+coll
+/∆t and rounds it
+tothenearestinteger. Wetakethisintegertobethenumberof
+collisionsandincreasetheBHmassaccordingly.
+Figure 2. Weconsideranexamplethathighlightsthemass
+growthasafunctionofdistancefromtheSMBH.Greydots
+represent the initial masses and distances from the SMBH
+oftheBHsinvolvedinthesimulation. Forsimplicity,weset
+theinitalmassequalto10M
 (cid:12)
-To estimate ∆m, we begin with the Bondi-Hoyle ac- onlyaportionofthestar’smassisaccretedduringacollision
-cretion rate, m˙, given by: given by Eq. 6 (blue). The latter case results in less growth
+foralloftheBHs. Assuming
+thedensityprofileofstarshasα=1,weconsidertwocases:
+BHsaccreteallofthestar’smassduringacollision(red)and
+onlyaportionofthestar’smassisaccretedduringacollision
+given by Eq. 6 (blue). The latter case results in less growth
 closer to the SMBH where the velocity dispersion becomes
-m˙ = 4πG2m2 BHρ star , (5) high. The shaded regions and dashed lines represent the
-(c2+σ2)3/2 analytical predictions detailed in Section 2.4.
-s
-wherec isthespeedofsoundinthestarandρ isits
-s star
-and their final masses can be approximated using the
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+wherec s isthespeedofsoundinthestarandρ star isits
 density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
-following equation:
 et al. 1985; Edgar 2004, see latter for a review). We
-approximate the density as 1M /(4πR3/3) and take T
-(cid:12) (cid:12) m (t →const.)=m +∆m , (7)
-the conservative value of c = 500 km s−1, which is final coll initial t
-s coll
-consistent with the sound speed inside a 1 M star
-(cid:12) in which T represents the simulation time and ∆m and
+approximate the density as 1M (cid:12) /(4πR3 (cid:12) /3) and take
+the conservative value of c s = 500 km s−1, which is
+consistent with the sound speed inside a 1 M (cid:12) star
 (Christensen-Dalsgaardetal.1996)andallowsustoset
-t remain constant, approximated as their initial val-
-coll
 a lower limit on ∆m. To find ∆m, at each collision, we
-ues.
 have:
-This equation is plotted in Figure 2 for both cases,
-∆m=min(m˙ ×t ,1 M ) , (6)
-(cid:63),cross (cid:12) ∆m=1M (red) and ∆m from Bondi-Hoyle-Lyttleton
+∆m=min(˙ m×t (cid:63),cross ,1 M (cid:12) ) , (6)
+where t
+(cid:63),cross
+∼R
 (cid:12)
-where t ∼R /σ is the crossing time of the BH in accretion (blue), and the curves coincide with the cor-
-(cid:63),cross (cid:12)
-thestar. Wetaketheminimumbetweenm˙ ×t and responding simulated results. The shaded regions rep-
+/σ is the crossing time of the BH in
+thestar. Wetaketheminimumbetween ˙ m×t
 (cid:63),cross
-1 M because the BH cannot accrete more mass than resent one standard deviation from Eq. (7), calculated
+and
+1 M
 (cid:12)
-one star at each collision. usingthesquarerootofthenumberofcollisions,T/t .
-coll
-Figure2juxtaposestheexpectedgrowthusingBondi- As indicated by the results in red, in the absence of
-Hoyle-Lyttleton accretion (blue small points) with a Bondi-Hoyle-Lyttletonaccretion, theBHsclosesttothe
-muchsimplermodelinwhichtheBHaccretesthestar’s SMBH experience the most growth because they have
-entire mass, 1M (red large points). Both examples shorter collision timescales. However, Bondi-Hoyle-
+because the BH cannot accrete more mass than
+one star at each collision.
+Figure2juxtaposestheexpectedgrowthusingBondi-
+Hoyle-Lyttleton accretion (blue small points) with a
+muchsimplermodelinwhichtheBHaccretesthestar’s
+entire mass, 1M
 (cid:12)
-start with identical populations of 10M BHs (grey) Lyttleton accretion becomes important closer to the
+(red large points). Both examples
+start with identical populations of 10M
 (cid:12)
-and simulate growth through collisions using a statisti- SMBH, where the velocity dispersion is large compared
-cal approach. As the BHs grow, the collision timescale, with the stars’ escape velocity, and curtails the mass
-which depends on m , decreases. Simultaneously, growthforBHsinthisregion. Outsideof10−2 pc,aBH
+BHs (grey)
+and simulate growth through collisions using a statisti-
+cal approach. As the BHs grow, the collision timescale,
+which depends on m
 BH
-∆m, which also depends on m , increases. The re- consumes the star’s entire mass: the accretion-limited
+, decreases. Simultaneously,
+∆m, which also depends on m
 BH
-sult is exponential growth (see discussion and details ∆m governed by Eq. (7) is greater than or equal to the
-surrounding Eq. (8)). In Figure 2, however, the simula- star’s mass.
-tions assume α=1 for the stellar density profile, ensur- Eq. 7 does not apply for other values of α. When the
-ing the collision timescale is long compared to the sim- collision timescale is shorter, corresponding to a larger
-ulation time, 10 Gyr. Therefore, the BHs grow slowly, index α in the density profile (see Figure 1), the growth
+, increases. The re-
+sult is exponential growth (see discussion and details
+surrounding Eq. (8)). In Figure 2, however, the simula-
+tions assume α=1 for the stellar density profile, ensur-
+ing the collision timescale is long compared to the sim-
 IMBH Formation in Galactic Nuclei 5
-isveryefficientand∆mquicklyapproaches1M . Con- muchsmallerthatthe10Gyrsimulationtime. Figure3
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
+and their final masses can be approximated using the
+following equation:
+m
+final
+(t
+coll
+→const.)=m
+initial
++∆m
+T
+t
+coll
+, (7)
+in which T represents the simulation time and ∆m and
+t
+coll
+remain constant, approximated as their initial val-
+ues.
+This equation is plotted in Figure 2 for both cases,
+∆m=1M
+(cid:12)
+(red) and ∆m from Bondi-Hoyle-Lyttleton
+accretion (blue), and the curves coincide with the cor-
+responding simulated results. The shaded regions rep-
+resent one standard deviation from Eq. (7), calculated
+usingthesquarerootofthenumberofcollisions,T/t
+coll
+.
+As indicated by the results in red, in the absence of
+Bondi-Hoyle-Lyttletonaccretion, theBHsclosesttothe
+SMBH experience the most growth because they have
+shorter collision timescales. However, Bondi-Hoyle-
+Lyttleton accretion becomes important closer to the
+SMBH, where the velocity dispersion is large compared
+with the stars’ escape velocity, and curtails the mass
+growthforBHsinthisregion. Outsideof10−2 pc,aBH
+consumes the star’s entire mass: the accretion-limited
+∆m governed by Eq. (7) is greater than or equal to the
+star’s mass.
+Eq. 7 does not apply for other values of α. When the
+collision timescale is shorter, corresponding to a larger
+index α in the density profile (see Figure 1), the growth
+isveryefficientand∆mquicklyapproaches1M
 (cid:12)
-sequently, while we can now assume ∆m = 1M , we confirmsthisexpectation. Itdepictsthemassgrowthof
+. Con-
+sequently, while we can now assume ∆m = 1M
 (cid:12)
-cannolongerassumethecollisiontimescaleisconstant. auniformdistributionofBHswithinitialconditionsde-
-The final mass grows exponentially as a result. For tailedinSection2.1forfiveαvalues,spanning1(green)
-∆m = 1M , the general solution is reached by solving to 2 (purple). The most massive IMBHs form inwards
+, we
+cannolongerassumethecollisiontimescaleisconstant.
+The final mass grows exponentially as a result. For
+∆m = 1M
 (cid:12)
-the differential equation dm/dt=1M /t (m), which of 0.25 pc for the α=2 case.
-(cid:12) coll
+, the general solution is reached by solving
+the differential equation dm/dt=1M
+(cid:12)
+/t
+coll
+(m), which
 gives:
-2.7. Gravitational Wave Mergers and Intermediate
-m (∆m→1M )=−A+(m +A)eCT (8) and Extreme Mass Ratio Inspiral Candidates
-final (cid:12) initial
-where A=σ2R /G and C =2πGn R /σ. As an TowardstheSMBH,efficientcollisionscancreateBHs
-star star star
-massiveenoughtomergewiththeSMBHthroughGWs.
+m
+final
+(∆m→1M
+(cid:12)
+)=−A+(m
+initial
++A)eCT (8)
+where A=σ2R
+star
+/G and C =2πGn
+star
+R
+star
+/σ. As an
 example,weplotthiscurveinpurplefortheα=2case,
-Following the method detailed in Section 2.5, when a
 in Figure 3, which agrees with the simulated masses.
-given BH meets the criterion t < t , we mark
-GW elapsed
-2.5. GW Inspiral it as merged with the SMBH. We assume that at this
-pointthedynamicsoftheBHwillbedeterminedbyGW
+2.5. Uncertainties in Accretion
+We note that the ∆M calculated in this proof-of-
+concept study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challeng-
+ing; this complex problem requires numerically solving
+the generalized GR fluid equations with cooling, heat-
+ing, and radiative transfer, etc. and remains an active
+fieldofresearch(e.g.,Blandford&Begelman1999;Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star re-
+sults in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity re-
+main uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accu-
+mulatesignificantamountsofmassovermanycollisions
+even if it accretes very little in a single one. We ex-
+plore the viability of our channel using a physically mo-
+tivated inefficient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately v esc /(cη), where v esc is
+the escape velocity from the BH at 1 R (cid:12) and η is the
+accretion efficiency at the ISCO. We take η to be 0.1
+(e.g., Yu & Tremaine 2002). This expression for the
+fractionaccretedisconsistentwithKremeretal.(2022)
+equation 19 for s = 0.5, which is a reasonable value for
+s, a free parameter between 0.2 and 0.8. We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more efficient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radi-
+ation away from the captured material (e.g., Blandford
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumen-
+shchev 2008; Begelman 2012a,b; McKinney et al. 2014).
+2.6. GW Inspiral
 When a BH is close to the SMBH, GW emission can
-emission, shrinking and circularizing the BHs orbit un-
 circularize and shrink its orbit. We implement the ef-
-til it undergoes an extreme or intermediate mass ratio
 fects of GW emission on the BH’s semimajor axis and
-inspiral(EMRIandIMRI,respectively). Therighthand
 eccentricity following Peters & Mathews (1963a). The
-plot in Figure 3 shows the BH masses versus time of
 characteristic timescale to merge a BH with an SMBH
-merger. It is interesting to note that even in the ab-
 is given by:
-sence of relaxation processes, which are often invoked
-(cid:18) M (cid:19)−1(cid:18) m (cid:19)−1 toexplaintheformationofEMRIs, EMRIsandnotably
-t ≈2.9×1012 yr • BH
-GW 106 M 106 M IMRIs can form in this region.
-(cid:12) (cid:12)
-(cid:18) (cid:19)−1(cid:18) (cid:19)4
-M +m a
-× • BH • 2.8. Two Body Relaxation Processes
-2×106 M 10−4 pc
-(cid:12) A BH orbiting the SMBH experiences weak gravita-
-×f(e •)(1−e2 •)7/2 , (9) tionalinteractionswithotherobjectsintheGN.Overa
-relaxation time, these interactions alter its orbit about
-where f(e ) is a function of e . For all values of e ,
-• • • the SMBH. The two-body relaxation timescale for a
-f(e ) is between 0.979 and 1.81 (Blaes et al. 2002). We
-• single-mass system is:
-plot this timescale for a 1×105M BH in Figure 1 in
-(cid:12)
-blue. σ3
-t =0.34 , (10)
-In our simulations, we assume a BH has merged with relax G2ρ(cid:104)M (cid:105)lnΛ
-∗ rlx
-the SMBH when the condition t < t is met.
-GW elapsed
-When this condition is satisfied, we terminate mass where lnΛ rlx is the Coulomb logarithm and (cid:104)M ∗(cid:105) is the
-growth through collisions for that BH.4 average mass of the surrounding objects, here assumed
-to be 1M (Spitzer 1987; Binney & Tremaine 2008,
-(cid:12)
-2.6. IMBH growth Eq. (7.106)). This equation represents the approximate
-As detailed above, BH-stellar collisions can increase timescale for a BH on a semi-circular orbit to change
-the BH masses as a function of time. Here, we examine its orbital energy and angular momentum by order of
-the sensitivity of the BH growth to the density power themselves. The BH experiences diffusion in its angular
-law. From Eq. (1), it is clear that the growth rate de- momentumandenergyasafunctionoftime(depending
-pends on the stellar density profile, governed by the in- ontheeccentricityoftheorbit,thisprocesscanbemore
-dex α. We expect that higher values of α, or steeper efficientFragione&Sari2018;Sari&Fragione2019). In
-profiles, will result in more efficient mass growth. In Figure 1, we plot the relaxation timescale in gold for a
-Figure 1, larger values of α lead to collision timescales rangeofα. WenotethattheBahcall&Wolf(1976)pro-
-in the GN’s inner region, inwards of 0.25 pc, that are file, α=7/4, corresponds to zero net flux and therefore
-does not preferentially migrate objects inward.
-Additionally, because they are more massive on
-4For comparison, we also incrementally changed the semimajor average than the surrounding objects, BHs are ex-
-axis and eccentricity from GW emission following the equations
-pected to segregate inwards in the GN (e.g., Shapiro
-in Peters & Mathews (1963b). This method leads to a slight
-increase in the final IMBH masses because it accounts for the & Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
-collisionsthattakeplacewhiletheorbitisgraduallyshrinking. Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
+t
+GW
+≈2.9×1012 yr
+(cid:18)
+M
+•
+106 M
+(cid:12)
+(cid:19)−1(cid:18)
+m
+BH
+106 M
+(cid:12)
+(cid:19)−1
+×
+(cid:18)
+M • +m BH
+2×106 M (cid:12)
+(cid:19)−1(cid:18)
+a •
+10−2 pc
+(cid:19)4
+×f(e
+•
+)(1−e2
+•
+)7/2 , (9)
+where f(e
+•
+) is a function of e
+•
+. For all values of e
+•
+,
+f(e
+•
+) is between 0.979 and 1.81 (Blaes et al. 2002). We
+plot this timescale for a 1×105M
+(cid:12)
+BH in Figure 1 in
+blue.
 6 Rose et al.
 Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
-cuspy(α=2). Forthelattercase,thepurplelineshowstheanalyticalresultfromEq.8,takingm tobetheaveragemass
+cuspy(α=2). Forthelattercase,thepurplelineshowstheanalyticalresultfromEq.8,takingm
 initial
+tobetheaveragemass
 of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
 merger times of these BHs.
-They sink toward the SMBH on the mass segregation Therefore, after the initial mass segregation, we allow
-timescale, t ≈ (cid:104)M (cid:105)/m ×t (e.g., Spitzer 1987; the BHs to begin diffusing over a relaxation timescale,
-seg ∗ BH relax
-Fregeau et al. 2002; Merritt 2006), which is typically an their orbital parameters changing slowly through a ran-
-orderofmagnitudesmallerthantherelaxationtimescale dom process. In this random process, some of the BHs
-plotted in Figure 1. may migrate closer to the SMBH. We terminate mass
-We incorporate relaxation processes by introducing a growthwhentheBHenterstheinner200auoftheGN,
-small change in the BH’s energy and angular momen- withinwhichthedensityofstarsisuncertain. Thiscut-
-tum each time it orbits the SMBH. We apply a small off is based on the 120 au pericenter of S0-2, the closest
-instantaneous velocity kick to the BH, denoted as ∆v. known star to the SMBH (e.g., Ghez et al. 2005).
-We draw ∆v from a Guassian distribution with average Another physical process that causes inward migra-
+In our simulations, we assume a BH has merged with
+the SMBH when the condition t
+GW
+< t
+elapsed
+is met.
+When this condition is satisfied, we terminate mass
+growth through collisions for that BH.4
+2.7. IMBH growth
+As detailed above, BH-stellar collisions can increase
+the BH masses as a function of time. Here, we examine
+the sensitivity of the BH growth to the density power
+law. From Eq. (1), it is clear that the growth rate de-
+pends on the stellar density profile, governed by the in-
+dex α. We expect that higher values of α, or steeper
+profiles, will result in more efficient mass growth. In
+Figure 1, larger values of α lead to collision timescales
+in the GN’s inner region, inwards of 0.25 pc, that are
+muchsmallerthatthe10Gyrsimulationtime. Figure3
+confirmsthisexpectation. Itdepictsthemassgrowthof
+auniformdistributionofBHswithinitialconditionsde-
+tailedinSection2.1forfiveαvalues,spanning1(green)
+to 2 (purple). The most massive IMBHs form inwards
+of 0.25 pc for the α=2 case.
+2.8. Gravitational Wave Mergers and Intermediate
+and Extreme Mass Ratio Inspiral Candidates
+TowardstheSMBH,efficientcollisionscancreateBHs
+massiveenoughtomergewiththeSMBHthroughGWs.
+Following the method detailed in Section 2.6, when a
+given BH meets the criterion t
+GW
+< t
+elapsed
+, we mark
+4For comparison, we also incrementally changed the semimajor
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b). This method leads to a slight
+increase in the final IMBH masses because it accounts for the
+collisionsthattakeplacewhiletheorbitisgraduallyshrinking.
+it as merged with the SMBH. We assume that at this
+pointthedynamicsoftheBHwillbedeterminedbyGW
+emission, shrinking and circularizing the BHs orbit un-
+til it undergoes an extreme or intermediate mass ratio
+inspiral(EMRIandIMRI,respectively). Therighthand
+plot in Figure 3 shows the BH masses versus time of
+merger. It is interesting to note that even in the ab-
+sence of relaxation processes, which are often invoked
+toexplaintheformationofEMRIs, EMRIsandnotably
+IMRIs can form in this region.
+2.9. Two Body Relaxation Processes
+A BH orbiting the SMBH experiences weak gravita-
+tionalinteractionswithotherobjectsintheGN.Overa
+relaxation time, these interactions alter its orbit about
+the SMBH. The two-body relaxation timescale for a
+single-mass system is:
+t
+relax
+=0.34
+σ3
+G2ρ(cid:104)M
+∗
+(cid:105)lnΛ
+rlx
+, (10)
+where lnΛ
+rlx
+is the Coulomb logarithm and (cid:104)M
+∗
+(cid:105) is the
+average mass of the surrounding objects, here assumed
+to be 1M
+(cid:12)
+(Spitzer 1987; Binney & Tremaine 2008,
+Eq. (7.106)). This equation represents the approximate
+timescale for a BH on a semi-circular orbit to change
+its orbital energy and angular momentum by order of
+themselves. The BH experiences diffusion in its angular
+momentumandenergyasafunctionoftime(depending
+ontheeccentricityoftheorbit,thisprocesscanbemore
+efficient Fragione & Sari 2018; Sari & Fragione 2019).
+Relaxation can cause the orbit of an object in a GN to
+reach high eccentricities. If the object is a BH, it can
+spiral into the SMBH and form an EMRI, while a star
+IMBH Formation in Galactic Nuclei 7
+can be tidally disrupted by the SMBH (e.g. Magorrian
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
+Alexander 2005; Aharon & Perets 2016; Stone & Met-
+zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
+Naoz et al. 2022). The relaxation process is therefore
+crucial to our study. In Figure 1, we plot the relaxation
+timescaleingoldforarangeofα. WenotethattheBah-
+call & Wolf (1976) profile, α=7/4, corresponds to zero
+net flux and therefore does not preferentially migrate
+objects inward.
+Additionally, because BHs are more massive on av-
+erage than the surrounding objects, they are expected
+to segregate inwards in the GN (e.g., Shapiro &
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
+They sink toward the SMBH on the mass segregation
+timescale, t
+seg
+≈ (cid:104)M
+∗
+(cid:105)/m
+BH
+×t
+relax
+(e.g., Spitzer 1987;
+Fregeau et al. 2002; Merritt 2006), which is typically an
+orderofmagnitudesmallerthantherelaxationtimescale
+plotted in Figure 1.
+We incorporate relaxation processes by introducing a
+small change in the BH’s energy and angular momen-
+tum each time it orbits the SMBH. We apply a small
+instantaneous velocity kick to the BH, denoted as ∆v.
+We draw ∆v from a Guassian distribution with average
+of zero and a standard deviation of ∆v
+rlx
+/
 √
-of zero and a standard deviation of ∆v / 3, where tion is dynamical friction. A cursory derivation based
+3, where
+∆v
 rlx
+= v
+•
 (cid:112)
-∆v = v P /t (see Bradnick et al. 2017, for an onthedynamicalfrictionequationsdescribedinBinney
-rlx • • rlx
-approach to changes in the angular momentum). The & Tremaine (2008) reveals the process to have a simi-
-new orbital parameters can be calculated following Lu lar timescale to mass segregation. If a BH diffuses to
-& Naoz (2019), and see Naoz et al. in prep for full set a distance greater than 2 pc from the SMBH, exiting
-of equations. the sphere of influence, we have it sink inwards, back
-We account for the effects of relaxation processes, towards the center, over a dynamical friction timescale.
-including mass-segregation, using a multi-faceted ap- After one dynamical friction timescale has passed, we
-proach. We begin by migrating each BH towards the restart diffusion.
-centeroveritsmass-segregationtimescale,shiftingitin- Wenotethatourprescriptionignoresself-interactions
-crementally inward such that its orbital energy changes betweentheBHs. Asmentionedabove, astheBHssink
-by order of itself within the segregation timescale. towards the SMBH, their concentration in the inner re-
-As the BHs segregate down the potential well, their gionoftheGNincreases,allowingthemtodominatethe
-abundancewithrespecttostarsincreases,untilatsome scattering. Wereservetheinclusionoftheseinteractions
-turnover radius, BHs become the dominant source of for future study.
+P
+•
+/t
+rlx
+(see Bradnick et al. 2017, for an
+approach to changes in the angular momentum). The
+new orbital parameters can be calculated following Lu
+& Naoz (2019), and see Naoz et al. (2022) for the full
+set of equations.
+We account for the effects of relaxation processes,
+including mass-segregation, using a multi-faceted ap-
+proach. We begin by migrating each BH towards the
+centeroveritsmass-segregationtimescale,shiftingitin-
+crementally inward such that its orbital energy changes
+by order of itself within the segregation timescale.
+As the BHs segregate down the potential well, their
+abundancewithrespecttostarsincreases,untilatsome
+turnover radius, BHs become the dominant source of
 scatteringforbothblackholesandstars. Withinthisra-
 dius, BH self-interaction dominates over two-body scat-
-2.9. Effect of Relaxation Processes
 terings with the now rarer main-sequence stars. The
-AsdepictedinFigure4,two-bodyrelaxationprocesses
 BHs will then settle onto a Bahcall-Wolf profile, while
-result in more EMRIs and IMRIs events. These pro-
 the stars may follow a shallower profile, with approx-
-cesses allow BHs that begin further from the SMBH
-imately n ∝ r−1.5, inwards of the transition radius
+imately n
 (cid:63)
-to migrate inwards and grow more efficiently in mass.
+∝ r−1.5, inwards of the transition radius
 (Linial & Sari in prep.).
+Therefore, after the initial mass segregation, we allow
+the BHs to begin diffusing over a relaxation timescale,
+their orbital parameters changing slowly through a ran-
+dom process. In this random process, some of the BHs
+may migrate closer to the SMBH. We terminate mass
+growthwhentheBHenterstheinner200auoftheGN,
+withinwhichthedensityofstarsisuncertain. Thiscut-
+off is based on the 120 au pericenter of S0-2, the closest
+known star to the SMBH (e.g., Ghez et al. 2005).
+Another physical process that causes inward migra-
+tion is dynamical friction. A cursory derivation based
+onthedynamicalfrictionequationsdescribedinBinney
+& Tremaine (2008) reveals the process to have a simi-
+lar timescale to mass segregation. If a BH diffuses to
+a distance greater than 2 pc from the SMBH, exiting
+the sphere of influence, we have it sink inwards, back
+towards the center, over a dynamical friction timescale.
+After one dynamical friction timescale has passed, we
+restart diffusion.
+Wenotethatourprescriptionignoresself-interactions
+betweentheBHs. Asmentionedabove, astheBHssink
+towards the SMBH, their concentration in the inner re-
+gionoftheGNincreases,allowingthemtodominatethe
+scattering. Wereservetheinclusionoftheseinteractions
+for future study.
+2.10. Effect of Relaxation Processes
+AsdepictedinFigure4,two-bodyrelaxationprocesses
+result in more EMRIs and IMRIs events. These pro-
+cesses allow BHs that begin further from the SMBH
+to migrate inwards and grow more efficiently in mass.
 However, it also impedes the growth of BHs that are
 initially closer to the SMBH by allowing them to dif-
-IMBH Formation in Galactic Nuclei 7
-Figure 4. SimilartoFigure3,weplottheinitialmassesversusinitialdistance(grey)andfinalmassversusfinaldistance(red)
-for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We
-assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward
-migration,BHsmergemorequicklywiththeSMBH,beforetheycanbecomeasmassiveasthoseinFigure3. Additionally,more
-BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two
-differentvaluesofα,1.5(orange,solid),α,1.75(red,dashed),and2(purple,dash-dotted),accountingforrelaxationprocesses.
-The dashed, faded lines represent the corresponding initial histograms. We assume α=1.75 for the GN density profile. Faded
-stars represent BHs that merged with the SMBH.
-fuseoutoftheinnerregionwherecollisionsareefficient. 10−2 pcfromtheSMBHcanaccretetheentirestar(see
-As can be seen in Figure 4, the net result is that more Figure 2).
-BHs grow, but the maximum mass is lower compared The efficiency of collisions, and therefore IMBH,
-to the scenario that ignores two-body relaxation. The EMRI, and IMRI formation as well, are sensitive to
-histograminFigure4presentsthefinalBHmassdistri- the underlying stellar density. As shown in Figure 3, a
-butions for different power law indices α. As expected, steeperdensityprofileresultsinlargerIMBHs. Thisbe-
-the two-body relaxation suppresses the α dependence havior can be understood from the collision timescale’s
-highlighted in Figure 3. In fact, using a KS test, we dependenceonthestellardensityprofile. Asteeperpro-
-find that we cannot reject the hypothesis that the two file yields shorter collision timescales near the SMBH.
-distributions were drawn from the same sample for the However, the inclusion of relaxation processes in the
-α = 1.75 and α = 2 results. Interestingly, a BH mass simulations dampens the influence of the stellar density
-IMF with an average of 10 M leads to a final distri- profile by allowing BHs to diffuse into regions of more
-(cid:12)
-bution with an average of ∼ 200 M and a median of or less efficient growth. As a result, more BHs grow in
-(cid:12)
-∼45 M , which lies within the mass gap. mass, but their maximum mass is smaller (∼ 104 M ).
-(cid:12) (cid:12)
+fuseoutoftheinnerregionwherecollisionsareefficient.
+As can be seen in Figure 4, the net result is that more
+BHs grow, but the maximum mass is lower compared
+to the scenario that ignores two-body relaxation. The
+histograminFigure4presentsthefinalBHmassdistri-
+butions for different power law indices α. As expected,
+the two-body relaxation suppresses the α dependence
+highlighted in Figure 3. In fact, using a KS test, we
+find that we cannot reject the hypothesis that the two
+distributions were drawn from the same sample for the
+α = 1.75 and α = 2 results. Interestingly, a BH mass
+IMF with an average of 10 M
+(cid:12)
+leads to a final distri-
+bution with an average of ∼ 200 M
+(cid:12)
+and a median of
+∼45 M
+(cid:12)
+, which lies within the mass gap.
+3. DISCUSSION AND PREDICTIONS
+We explore the feasibility of forming IMBHs in a
+GN through successive collisions between a stellar-mass
+BH and main-sequence stars. Taking both a statisti-
+cal and analytic approach, we show that this channel
+can produce IMBHs efficiently with masses as high as
+103−4 M
+(cid:12)
+and may result in many IMBH-SMBH merg-
+ers (intermediate-mass ratio inspirals, or IMRIs) and
+EMRIs.
+8 Rose et al.
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction.
+We assume α=1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α=1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
+As the stellar mass BH collides with a star, the BH
+will grow in mass. The increase may equal star’s en-
+tire mass if the relative velocity is smaller than the es-
+cape velocity from the BH at 1 R
+(cid:12)
+. However, near the
+SMBH, the velocity dispersion may be larger than the
+escapevelocityfromtheBHatthestar’sradius. Inthis
+limit, the BH captures a “tunnel” of material through
+the star, estimated using Bondi-Hoyle-Lyttleton accre-
+tion. In our statistical analysis, we account for Bondi-
+Hoyle-Lyttleton accretion and find that BHs outside of
+10−2 pcfromtheSMBHcancapturetheentirestar(see
+Figure 2).
+The efficiency of collisions, and therefore IMBH,
+EMRI, and IMRI formation as well, are sensitive to
+the underlying stellar density. As shown in Figure 3, a
+steeperdensityprofileresultsinlargerIMBHs. Thisbe-
+havior can be understood from the collision timescale’s
+dependenceonthestellardensityprofile. Asteeperpro-
+file yields shorter collision timescales near the SMBH.
+However, the inclusion of relaxation processes in the
+simulations dampens the influence of the stellar density
+profile by allowing BHs to diffuse into regions of more
+or less efficient growth. As a result, more BHs grow in
+mass, but their maximum mass is smaller (∼ 104 M
+(cid:12)
+).
 Additionally, the final masses have no apparent depen-
 dence on distance from the SMBH (see Figure 4).
-3. DISCUSSION AND PREDICTIONS Mass growth through BH-main-sequence star colli-
-We explore the feasibility of forming IMBHs in a sions may act in concert with other IMBH formation
-GN through successive collisions between a stellar-mass channels, such as compact object binary mergers (e.g.,
-BH and main-sequence stars. Taking both a statisti- Hoang et al. 2018; Stephan et al. 2019; Fragione et al.
-cal and analytic approach, we show that this channel 2021; Wang et al. 2021). While in some cases colli-
-can produce IMBHs efficiently with masses as high as sions can unbind a binary (e.g., Sigurdsson & Phinney
-103−4 M and may result in many IMBH-SMBH merg- 1993; Fregeau et al. 2004), BH binaries can be tightly
-(cid:12)
-ers (intermediate-mass ratio inspiral, IMRIs) and EM- bound enough to withstand the collisions. Wide bina-
-RIs. ries may also become unbound due to interactions with
-As the stellar mass BH collides with a star, the BH theneighboringstarsandcompactobjects(e.g.,Binney
-will grow in mass. The increase may equal star’s en- & Tremaine 1987; Rose et al. 2020, see latter study for
-tire mass if the relative velocity is smaller than the es- the timescale for an arbitrary eccentricity). However,
-cape velocity from the BH at 1 R . However, near the as highlighted in previous studies, a substantial frac-
-(cid:12)
-SMBH, the velocity dispersion may be larger than the tion of these binaries may merge due to the Eccentric
-escapevelocityfromtheBHatthestar’sradius. Inthis Kozai Lidov mechanism, leaving behind a single star or
-limit, the BH accretes a “tunnel” of material through a single compact object (e.g., Stephan et al. 2016, 2019;
-the star, estimated using Bondi-Hoyle-Lyttleton accre- Hoang et al. 2018). Additionally, to be susceptible to
-tion. In our statistical analysis, we account for Bondi- evaporation, BH binaries must have a wider configura-
-Hoyle-Lyttleton accretion and find that BHs outside of tion. Otherwise, they will be more tightly bound that
-8 Rose et al.
-the average kinetic energy of the surrounding objects, Our results also suggest that IMBHs are likely to ex-
-and will only harden through weak gravitational inter- ists in many galactic nuclei, as well as within our own
-actions with neighboring stars (see for example Figure galactic center. This implication seems to be consis-
-6 in Rose et al. 2020). tent with recent observational and theoretical studies
-Not included in this study, collisions between the BH (e.g.,Hansen&Milosavljevi´c2003;Maillardetal.2004;
-and other compact objects will increase the BH growth Gu¨rkan&Rasio2005;Gualandris&Merritt2009;Chen
-rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra- &Liu2013;Generozov&Madigan2020;Fragioneetal.
-gione et al. 2021) and even neutron star BH mergers 2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-(e.g., Hoang et al. 2020) become more likely as the BHs Collaboration et al. 2020).
-increase in mass through stellar collisions. As a result, Lastly, the collisions between stellar mass BHs and
-the BH-BH collision timescale, discussed in Section 2.2, stars may contribute to the x-ray emission from our
-will become relevant to our simulations, allowing the galacticcentre(e.g.,Munoetal.2005,2009;Haileyetal.
-BHs to grow through this channel in addition to stel- 2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter-
-larcollisions. Additionally,thiscompactobjectmergers actions, in particular grazing collisions, may also result
-result in GW recoil, which may have a large impact on in tidal disruption events (e.g., Perets et al. 2016; Sam-
-the dynamics (e.g., Baibhav et al. 2020; Fragione et al. sing et al. 2019; Kremer et al. 2021). Thus, the process
-2021) outlined here may produce electromagnetic signatures
-The BH’s mass growth increases GW emission, which in addition to GW mergers.
-dissipates energy from the orbit. Along with relaxation
-processes, GW emission causes BHs to sink towards the SR thanks the Charles E Young fellowship, the Nina
-SMBH and eventually undergo a merger. As a result, Byers Fellowship, and the Michael A. Jura Memorial
-the GN environment is conducive to the formation of Graduate Award for support. SR and SN acknowledge
-EMRIsandIMRIs. TheGWemissionfromEMRIsand the partial support from NASA ATP 80NSSC20K0505.
-IMRIs is expected to be at mHz frequencies, making SN thanks Howard and Astrid Preston for their gener-
-them promising candidates for LISA to observe. While oussupport. ILthankssupportfromtheAdamsFellow-
-the exact rate calculation is beyond the scope of this ship. SN and RS thank the Bhaumik Institute visitor
-study, the mechanism outlined here seems very promis- program.
-ing.
+Most simulations in our study assume that the BHs
+accrete all of the mass that they capture. The final BH
+masses can be taken as an upper limit. We note that
+the accretion is a highly uncertain process and repre-
+sents an active field of study (e.g., Blandford & Begel-
+man 1999; Park & Ostriker 2001; Narayan et al. 2003;
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+Narayan et al. 2022). To assess the limits of our model,
+wealsoconsideraphysicallymotivatedaccretionmodel,
+momentum-driven winds (Section 2.5). We present the
+final mass distribution for momentum-driven winds in
+Figure 4. Importantly, we find that BHs within the
+mass gap still form naturally despite the substantially
+reduced accretion. About 5% of the BHs grow by 10
+to 100 M
+(cid:12)
+. Furthermore, if we increase this ∆M esti-
+mate by a factor of 2 (i.e., use η = 0.05), the simula-
+tionproducesa3.5×103 M
+(cid:12)
+IMBHforthesameinitial
+conditions. Ourproof-of-conceptdemonstratesthatcol-
+lisions between BH and stars are an important process
+that should be taken into account in dense places such
+as a GN.
+Mass growth through BH-main-sequence star colli-
+sions may act in concert with other IMBH formation
+channels, such as compact object binary mergers (e.g.,
+Hoang et al. 2018; Stephan et al. 2019; Fragione et al.
+2021; Wang et al. 2021). While in some cases colli-
+sions can unbind a binary (e.g., Sigurdsson & Phinney
+1993; Fregeau et al. 2004), BH binaries can be tightly
+bound enough to withstand the collisions. Wide bina-
+ries may also become unbound due to interactions with
+theneighboringstarsandcompactobjects(e.g.,Binney
+& Tremaine 1987; Rose et al. 2020, see latter study for
+the timescale for an arbitrary eccentricity). However,
+as highlighted in previous studies, a substantial frac-
+tion of these binaries may merge due to the Eccentric
+Kozai Lidov mechanism, leaving behind a single star or
+a single compact object (e.g., Stephan et al. 2016, 2019;
+Hoang et al. 2018). Additionally, to be susceptible to
+evaporation, BH binaries must have a wider configura-
+tion. Otherwise, they will be more tightly bound than
+the average kinetic energy of the surrounding objects
+and will only harden through weak gravitational inter-
+IMBH Formation in Galactic Nuclei 9
+actions with neighboring stars (see for example Figure
+6 in Rose et al. 2020).
+We note that we assume a steady-state and treat the
+starsasareservoirinthismodel. Futureworkwilltakea
+more nuanced approach to the background stars, whose
+densityasafunctionoftimecanbeinfluencedbyseveral
+factors. Firstly, the relaxation of the stellar population
+occurs on Gyr timescales. Some studies have suggested
+that in situ star formation can occur in the Galactic
+Center as close as 0.04 pc from the SMBH (e.g., Levin
+& Beloborodov 2003; Paumard et al. 2006), and star
+formation episodes can occur as often as every ∼5 Myr
+(e.g. Lu et al. 2009). Therefore, we expect that after
+the first Gyr, stars within (cid:46)0.01 pc will be replenished
+at intervals consistent with the star formation episodes;
+the infalling populations of stars are separated by ∼
+5−10Myr,whichisshorterthanthecollisiontimescale.
+However, star-star collisions may complicate this pic-
+ture within ∼0.01 pc. As discussed above, regular star
+formationensurestheBHsalwayshaveastellarpopula-
+tion to interact with outside of ∼0.01 pc.5 At 0.01 pc,
+however, the kinetic energy during a collision between
+two 1 M
+(cid:12)
+stars is larger than their binding energies.
+Collisions can therefore thin out the stellar populations
+during the time it takes them to diffuse to these small
+radii, (cid:46)0.01 pc, and may reduce the BH growth in the
+innermost region. We reserve the inclusion of star-star
+collisions for future work. We also note that the disrup-
+tion of binary stars by the SMBH may help replenish
+the stellar population even as collisions work to deplete
+it(e.g.,Balbergetal.2013);whenabinaryisdisrupted,
+one of the stars is captured on a tightly bound orbit
+about the SMBH.
+An IMBH may also affect the stellar density profile.
+AsitspiralsintotheSMBH,itcanperturbstellarorbits,
+and these interactions can lead to hypervelocity stars
+(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt
+2008). L¨ockmann & Baumgardt (2008) show that an
+IMBH can modify an initially steep stellar density pro-
+file to become consistent with the flatter cusp observed
+in the Galactic Center. The stars may then be replen-
+ished on 100 Myr timescales (Baumgardt et al. 2006a).
+Therefore, after the formation of the first few IMBHs,
+subsequent BH growth may occur in bursts, coinciding
+with replenishment of the stars.
+Whiletherearemanycompetingdynamicalprocesses
+that shape the stellar density profile, we stress that α
+5In fact, the star-star collision timescale is greater than 10 Myr
+fortheentireparameterspace,saveat0.001pcforlargervalues
+ofα;theBH-starcollisiontimescaleplottedinFig. 1isthesame
+orderofmagnitudeasthestar-starcollisiontimescale.
+can simply be chosen to encapsulate all of the relevant
+physics. A value for α that is constrained by observa-
+tions must already reflect ongoing processes like star-
+star collisions and replenishment. Sch¨odel et al. (2018)
+findtheobservedstellarmassenclosedwithin0.01pcof
+the Milky Way’s Galactic Center to be approximately
+180 M
+(cid:12)
+. This estimate is consistent to order of magni-
+tude with our α = 1.25 case. In a simulation like those
+depictedinFigure4, whichincluderelaxation, α=1.25
+leads to a maximum IMBH mass of 140 M
+(cid:12)
+. Further-
+more, while the stellar mass within 0.01 pc may be a
+few hundred M
+(cid:12)
+, Do et al. (2019) and GRAVITY Col-
+laboration et al. (2020) set an upper limit on the mass
+enclosedwithintheorbitofS0-2tobeaboutafewthou-
+sand M
+(cid:12)
+, or 0.1% of the central mass. This upper limit
+canincludemassthatwaspreviouslyinstarsbutisnow
+inBHs. Inthatcase,the180M
+(cid:12)
+iswhatremainsofthe
+stars, while BHs and IMBHs make up the ∼ 1000 M
+(cid:12)
+in the innermost region.
+Alsonotincludedinthisstudy, collisionsbetweenthe
+BH and other compact objects will increase the BH
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+Fragione et al. 2021) and even neutron star BH mergers
+(e.g., Hoang et al. 2020) become more likely as the BHs
+increase in mass through stellar collisions. As a result,
+the BH-BH collision timescale, discussed in Section 2.2,
+will become relevant to our simulations, allowing the
+BHs to grow through this channel in addition to stel-
+larcollisions. Additionally,thiscompactobjectmergers
+result in GW recoil, which may have a large impact on
+the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
+2021).
+The BH’s mass growth increases GW emission, which
+dissipatesenergyfromtheorbit. Alongwithrelaxation,
+GW emission causes BHs to sink towards the SMBH
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
+and IMRIs. The GW emission from EMRIs and IM-
+RIs is expected to be at mHz frequencies, making them
+promising candidates for LISA to observe. While the
+exact rate calculation is beyond the scope of this study,
+the mechanism outlined here seems very promising.
+OurresultsalsosuggestthatBHswithinthemassgap
+aswellasIMBHslikely existinmanygalactic nuclei, as
+well as within our own galactic center. This implication
+seems to be consistent with recent observational and
+theoretical studies (e.g., Hansen & Milosavljevi´c 2003;
+Maillard et al. 2004; G¨ urkan & Rasio 2005; Gualandris
+& Merritt 2009; Chen & Liu 2013; Generozov & Madi-
+gan2020;Fragioneetal.2020a;Zhengetal.2020;Naoz
+et al. 2020; GRAVITY Collaboration et al. 2020).
+10 Rose et al.
+Lastly, the collisions between stellar mass BHs and
+stars may contribute to the x-ray emission from our
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre-
+meretal.(2022)foradiscussionofelectromagneticsig-
+natures from BH-star collisions)6. These interactions,
+in particular grazing collisions, may also result in tidal
+disruption events (e.g., Baumgardt et al. 2006b; Perets
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre-
+mer et al. 2021). Thus, the process outlined here may
+produce electromagnetic signatures in addition to GW
+mergers.
+We thank the anonymous referee for useful comments.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use-
+ful discussion.
+SRthankstheCharlesE.YoungFellowship, theNina
+Byers Fellowship, and the Michael A. Jura Memorial
+Graduate Award for support. SR and SN acknowledge
+the partial support from NASA ATP 80NSSC20K0505.
+SN thanks Howard and Astrid Preston for their gener-
+oussupport. ILthankssupportfromtheAdamsFellow-
+ship. SN and RS thank the Bhaumik Institute visitor
+program. This work was performed in part at the As-
+pen Center for Physics, which is supported by National
+Science Foundation grant PHY-1607611.
 REFERENCES
-Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, Baibhav,V.,Gerosa,D.,Berti,E.,etal.2020,PhRvD,102,
-PhRvL, 116, 241102, 043002, doi: 10.1103/PhysRevD.102.043002
-doi: 10.1103/PhysRevLett.116.241102 Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
-—. 2017a, PhRvL, 118, 221101, 613, 1143, doi: 10.1086/423299
-doi: 10.1103/PhysRevLett.118.221101 Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
-—. 2017b, PhRvL, 119, 141101, MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
-doi: 10.1103/PhysRevLett.119.141101 Belczynski,K., Hirschi,R., Kaiser,E.A., etal.2020a, ApJ,
-Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 890, 113, doi: 10.3847/1538-4357/ab6d77
-Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, —. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77
-doi: 10.1088/0004-637X/780/2/148 Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R.
-2009, New Journal of Physics, 11, 105016,
+Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
+PhRvL, 116, 241102,
+doi: 10.1103/PhysRevLett.116.241102
+—. 2017a, PhRvL, 118, 221101,
+doi: 10.1103/PhysRevLett.118.221101
+—. 2017b, PhRvL, 119, 141101,
+doi: 10.1103/PhysRevLett.119.141101
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
+doi: 10.3847/2041-8205/830/1/L1
+Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
+Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
+doi: 10.1088/0004-637X/780/2/148
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
+doi: 10.1007/s41114-018-0013-8
+6TheconnectionbetweentheobservedX-raysourcesattheGalac-
+tic Center and tidal capture has been suggested by Generozov
+etal.(2018),butseeZhuetal.(2018);Stephanetal.(2019)for
+alternativechannels.
 Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
-doi: 10.1088/1367-2630/11/10/105016
 2021, arXiv e-prints, arXiv:2109.12119.
-Binney, J., & Tremaine, S. 1987, Galactic dynamics
 https://arxiv.org/abs/2109.12119
-—. 2008, Galactic Dynamics: Second Edition
 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
-Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
 doi: 10.1086/154711
+Baibhav,V.,Gerosa,D.,Berti,E.,etal.2020,PhRvD,102,
+043002, doi: 10.1103/PhysRevD.102.043002
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+doi: 10.1093/mnrasl/slt071
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+2006a, MNRAS, 372, 174,
+doi: 10.1111/j.1365-2966.2006.10818.x
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+Makino, J. 2006b, MNRAS, 372, 467,
+doi: 10.1111/j.1365-2966.2006.10885.x
+Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
+613, 1143, doi: 10.1086/423299
+Begelman, M. C. 1979, MNRAS, 187, 237,
+doi: 10.1093/mnras/187.2.237
+—.2012a,ApJL,749,L3,doi:10.1088/2041-8205/749/1/L3
+IMBH Formation in Galactic Nuclei 11
+—. 2012b, MNRAS, 420, 2912,
+doi: 10.1111/j.1365-2966.2011.20071.x
+Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
+MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
+Belczynski,K., Hirschi,R., Kaiser,E.A., etal.2020a, ApJ,
+890, 113, doi: 10.3847/1538-4357/ab6d77
+—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77
+Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R.
+2009, New Journal of Physics, 11, 105016,
+doi: 10.1088/1367-2630/11/10/105016
+Binney, J., & Tremaine, S. 1987, Galactic dynamics
+—. 2008, Galactic Dynamics: Second Edition
+Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
 doi: 10.1086/342655
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433,
+doi: 10.1093/mnras/179.3.433
 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
-5TheconnectionbetweentheobservedX-raysourcesattheGalac- 427, doi: 10.1086/500727
-tic Center and tidal capture has been suggested by Generozov
+427, doi: 10.1086/500727
 Bondi, H. 1952, MNRAS, 112, 195,
-etal.(2018),butseeZhuetal.(2018);Stephanetal.(2019)for
-alternativechannels. doi: 10.1093/mnras/112.2.195
-IMBH Formation in Galactic Nuclei 9
-Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
-doi: 10.1093/mnras/104.5.273 649, 91, doi: 10.1086/506193
-Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
-2042, doi: 10.1093/mnras/stx1007 doi: 10.3847/1538-4357/ab94bc
-Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
-C. 2012, JCAP, 2012, 054, J. P. 2018, MNRAS, 478, 4030,
-doi: 10.1088/1475-7516/2012/07/054 doi: 10.1093/mnras/sty1262
-Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
-2010, Reviews of Modern Physics, 82, 3069, Modern Physics, 82, 3121,
-doi: 10.1103/RevModPhys.82.3069 doi: 10.1103/RevModPhys.82.3121
-Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812,
-doi: 10.1088/0004-637X/762/2/95 doi: 10.1086/377127
-Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ,
-doi: 10.3847/1538-4357/aaba16 620, 744, doi: 10.1086/427175
-Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, Gonda´n, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ,
-MNRAS, 450, 4411, doi: 10.1093/mnras/stv694 860, 5, doi: 10.3847/1538-4357/aabfee
-Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., Gonza´lez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL,
-et al. 1996, Science, 272, 1286, 908, L29, doi: 10.3847/2041-8213/abdf5b
-doi: 10.1126/science.272.5266.1286 GRAVITY Collaboration, Abuter, R., Amorim, A., et al.
-Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, 2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813
-doi: 10.1086/156685 Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361,
-Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, doi: 10.1088/0004-637X/705/1/361
-MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 Gu¨rkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL,
-Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, 640, L39, doi: 10.1086/503295
-MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 Gu¨rkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236,
-Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, doi: 10.1086/430694
-MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature,
-Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 556, 70, doi: 10.1038/nature25029
-110, 221101, doi: 10.1103/PhysRevLett.110.221101 Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593,
-Edgar, R. 2004, NewAR, 48, 843, L77, doi: 10.1086/378182
-doi: 10.1016/j.newar.2004.06.001 Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., &
-Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Hartmann, D. H. 2003, ApJ, 591, 288,
-Monthly Notices of the Royal Astronomical Society, 443, doi: 10.1086/375341
-2410, doi: 10.1093/mnras/stu1280 Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., &
-Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, Dosopoulou, F. 2018, ApJ, 856, 140,
-L31, doi: 10.3847/2041-8213/ab77c9 doi: 10.3847/1538-4357/aaafce
-Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
-arXiv e-prints, arXiv:2107.04639. doi: 10.3847/1538-4357/abb66a
-https://arxiv.org/abs/2107.04639 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
-Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, Royal Astronomical Society, 374, 1557,
-ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 doi: 10.1111/j.1365-2966.2006.11275.x
-Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
-L26, doi: 10.3847/2041-8213/abbc0a doi: 10.3847/1538-4357/abeb14
-Fragione, G., & Sari, R. 2018, ApJ, 852, 51, Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
-doi: 10.3847/1538-4357/aaa0d7 45, doi: 10.3847/1538-4357/abb945
-Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
-Rasio, F. A. 2004, MNRAS, 352, 1, doi: 10.3847/1538-4365/aacb24
-doi: 10.1111/j.1365-2966.2004.07914.x —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
-Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
-Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 doi: 10.1093/mnras/stz036
-10 Rose et al.
-Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
-690, 1463, doi: 10.1088/0004-637X/690/2/1463 Rasio, F. A. 2018, PhRvL, 120, 151101,
-Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1103/PhysRevLett.120.151101
-doi: 10.1086/319848 Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
-Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029
-2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019,
-Phys. Rev. D, 100, 043027,
+doi: 10.1093/mnras/112.2.195
+Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
+doi: 10.1093/mnras/104.5.273
+Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
+2042, doi: 10.1093/mnras/stx1007
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+860, 14, doi: 10.3847/1538-4357/aac2c4
+Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
+C. 2012, JCAP, 2012, 054,
+doi: 10.1088/1475-7516/2012/07/054
+Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R.
+2010, Reviews of Modern Physics, 82, 3069,
+doi: 10.1103/RevModPhys.82.3069
+Chen, X., & Liu, F. K. 2013, ApJ, 762, 95,
+doi: 10.1088/0004-637X/762/2/95
+Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33,
+doi: 10.3847/1538-4357/aaba16
+Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015,
+MNRAS, 450, 4411, doi: 10.1093/mnras/stv694
+Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V.,
+et al. 1996, Science, 272, 1286,
+doi: 10.1126/science.272.5266.1286
+Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
+doi: 10.1086/156685
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
+doi: 10.1111/j.1365-2966.2005.09937.x
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
+2009, MNRAS, 393, 1016,
+doi: 10.1111/j.1365-2966.2008.14254.x
+Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
+MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+C. N. 2021a, MNRAS, 505, 2186,
+doi: 10.1093/mnras/stab1428
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+T. C. N. 2021b, MNRAS, 503, 1051,
+doi: 10.1093/mnras/stab402
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+2005, ApJ, 620, 878, doi: 10.1086/427142
+Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
+MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
+Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
+MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+doi: 10.1126/science.aav8137
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+562, L19, doi: 10.1086/338118
+Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
+110, 221101, doi: 10.1103/PhysRevLett.110.221101
+Edgar, R. 2004, NewAR, 48, 843,
+doi: 10.1016/j.newar.2004.06.001
+Escala, A. 2021, ApJ, 908, 57,
+doi: 10.3847/1538-4357/abd93c
+Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
+Monthly Notices of the Royal Astronomical Society, 443,
+2410, doi: 10.1093/mnras/stu1280
+Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891,
+L31, doi: 10.3847/2041-8213/ab77c9
+Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021,
+arXiv e-prints, arXiv:2107.04639.
+https://arxiv.org/abs/2107.04639
+Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a,
+ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2
+Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902,
+L26, doi: 10.3847/2041-8213/abbc0a
+Fragione, G., & Sari, R. 2018, ApJ, 852, 51,
+doi: 10.3847/1538-4357/aaa0d7
+Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., &
+Rasio, F. A. 2004, MNRAS, 352, 1,
+doi: 10.1111/j.1365-2966.2004.07914.x
+Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
+Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
+Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
+649, 91, doi: 10.1086/506193
+Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
+doi: 10.3847/1538-4357/ab94bc
+Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
+J. P. 2018, MNRAS, 478, 4030,
+doi: 10.1093/mnras/sty1262
+12 Rose et al.
+Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
+Modern Physics, 82, 3121,
+doi: 10.1103/RevModPhys.82.3121
+Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812,
+doi: 10.1086/377127
+Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ,
+620, 744, doi: 10.1086/427175
+Gond´ an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ,
+860, 5, doi: 10.3847/1538-4357/aabfee
+Gonz´ alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL,
+908, L29, doi: 10.3847/2041-8213/abdf5b
+GRAVITY Collaboration, Abuter, R., Amorim, A., et al.
+2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813
+Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361,
+doi: 10.1088/0004-637X/705/1/361
+G¨ urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL,
+640, L39, doi: 10.1086/503295
+G¨ urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236,
+doi: 10.1086/430694
+Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature,
+556, 70, doi: 10.1038/nature25029
+Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593,
+L77, doi: 10.1086/378182
+Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., &
+Hartmann, D. H. 2003, ApJ, 591, 288,
+doi: 10.1086/375341
+Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., &
+Dosopoulou, F. 2018, ApJ, 856, 140,
+doi: 10.3847/1538-4357/aaafce
+Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
+doi: 10.3847/1538-4357/abb66a
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
+doi: 10.1086/431475
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
+doi: 10.1086/529025
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
+2003, ApJ, 592, 1042, doi: 10.1086/375769
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
+106, doi: 10.1088/0004-637X/796/2/106
+Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
+Royal Astronomical Society, 374, 1557,
+doi: 10.1111/j.1365-2966.2006.11275.x
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+https://arxiv.org/abs/2201.12368
+Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
+doi: 10.3847/1538-4357/abeb14
+Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
+45, doi: 10.3847/1538-4357/abb945
+Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
+doi: 10.1086/376675
+Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
+doi: 10.3847/1538-4365/aacb24
+—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
+L¨ ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
+doi: 10.1111/j.1365-2966.2007.12699.x
+Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
+doi: 10.1093/mnras/stz036
+Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
+690, 1463, doi: 10.1088/0004-637X/690/2/1463
+Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
+doi: 10.1086/319848
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+doi: 10.1046/j.1365-8711.1999.02853.x
+Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
+2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda,
-doi: 10.1103/PhysRevD.100.043027
 M., & Artale, M. C. 2021a, arXiv e-prints,
-Rose,S.C.,Naoz,S.,Gautam,A.K.,etal. 2020,ApJ,904,
 arXiv:2109.06222. https://arxiv.org/abs/2109.06222
-113, doi: 10.3847/1538-4357/abc557
 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b,
-Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
 MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
-& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+doi: 10.1111/j.1365-2966.2006.10256.x
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+doi: 10.1086/422244
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+Narayan, R. 2014, MNRAS, 441, 3177,
+doi: 10.1093/mnras/stu762
 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
-https://arxiv.org/abs/2009.01213
 doi: 10.1088/0034-4885/69/9/R01
-Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
 Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847,
-100, 043009, doi: 10.1103/PhysRevD.100.043009
 doi: 10.1086/317837
-Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
 Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607
-doi: 10.3847/1538-4357/ab43df
 Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL,
-Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K.
 622, L113, doi: 10.1086/429721
+Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009,
+ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+618, 569, doi: 10.1086/426067
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+L18, doi: 10.3847/2041-8213/ac574b
+Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
+doi: 10.1088/0004-637X/795/2/102
+Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
+L35, doi: 10.3847/2041-8213/ab4fed
+IMBH Formation in Galactic Nuclei 13
+Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
+888, L8, doi: 10.3847/2041-8213/ab5e3b
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+Curd, B. 2022, MNRAS, 511, 3795,
+doi: 10.1093/mnras/stac285
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+Ohsuga,K., Mori,M.,Nakamoto,T., &Mineshige,S.2005,
+ApJ, 628, 368, doi: 10.1086/430728
+O’Leary,R.M.,Kocsis,B.,&Loeb,A.2009,MNRAS,395,
+2127, doi: 10.1111/j.1365-2966.2009.14653.x
+O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
+& O’Shaughnessy, R. 2006, ApJ, 637, 937,
+doi: 10.1086/498446
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+D. 2010, ApJ, 722, 642,
+doi: 10.1088/0004-637X/722/1/642
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+doi: 10.1086/319042
+Paumard,T.,Genzel,R.,Martins,F.,etal.2006,ApJ,643,
+1011, doi: 10.1086/503273
+Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
+Stephen R., J. 2016, ApJ, 823, 113,
+doi: 10.3847/0004-637X/823/2/113
+Peters, P. C., & Mathews, J. 1963a, Physical Review, 131,
+435, doi: 10.1103/PhysRev.131.435
+—. 1963b, Physical Review, 131, 435,
+doi: 10.1103/PhysRev.131.435
+Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J.,
+& McMillan, S. L. W. 2004, Nature, 428, 724,
+doi: 10.1038/nature02448
+Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL,
+528, L17, doi: 10.1086/312422
+—. 2002, ApJ, 576, 899, doi: 10.1086/341798
+Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
+doi: 10.1088/0004-637X/780/2/187
+Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
+A56, doi: 10.1051/0004-6361/202037710
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+MNRAS, doi: 10.1093/mnras/stac231
+Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
+Rasio, F. A. 2018, PhRvL, 120, 151101,
+doi: 10.1103/PhysRevLett.120.151101
+Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
+PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029
+Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019,
+Phys. Rev. D, 100, 043027,
+doi: 10.1103/PhysRevD.100.043027
+Rose,S.C.,Naoz,S.,Gautam,A.K.,etal. 2020,ApJ,904,
+113, doi: 10.3847/1538-4357/abc557
+Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
+& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
+https://arxiv.org/abs/2009.01213
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
+Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
+100, 043009, doi: 10.1103/PhysRevD.100.043009
+Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
+doi: 10.3847/1538-4357/ab43df
+Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K.
 2002, The Astrophysical Journal, 571, 30,
-Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, doi: 10.1086/339917
-ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
-Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1086/519309
-doi: 10.1088/0004-637X/795/2/102 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
-Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, doi: 10.1086/156521
-L35, doi: 10.3847/2041-8213/ab4fed Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
-Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367
-888, L8, doi: 10.3847/2041-8213/ab5e3b Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine,
-O’Leary,R.M.,Kocsis,B.,&Loeb,A.2009,MNRAS,395, K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700
-2127, doi: 10.1111/j.1365-2966.2009.14653.x Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631,
-O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., doi: 10.1086/173190
+doi: 10.1086/339917
+Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
+doi: 10.1086/519309
+Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
+609, A27, doi: 10.1051/0004-6361/201730452
+Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
+doi: 10.1086/156521
+Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
+MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367
+Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine,
+K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700
+Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631,
+doi: 10.1086/173190
 Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739,
-& O’Shaughnessy, R. 2006, ApJ, 637, 937,
 doi: 10.1093/mnras/stx1576
-doi: 10.1086/498446
 —. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576
-Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
 Spitzer, L. 1987, Dynamical evolution of globular clusters
-Stephen R., J. 2016, ApJ, 823, 113,
 Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
-doi: 10.3847/0004-637X/823/2/113
 e-prints. https://arxiv.org/abs/1603.02709
-Peters, P. C., & Mathews, J. 1963a, Physical Review, 131,
 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
-435, doi: 10.1103/PhysRev.131.435
+Stone, N. C., K¨ upper, A. H. W., & Ostriker, J. P. 2017,
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+doi: 10.1093/mnras/stv2281
 The LIGO Scientific Collaboration, the Virgo
-—. 1963b, Physical Review, 131, 435,
 Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
-doi: 10.1103/PhysRev.131.435
 arXiv:2009.01075. https://arxiv.org/abs/2009.01075
-Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J.,
 —. 2020b, arXiv e-prints, arXiv:2009.01190.
-& McMillan, S. L. W. 2004, Nature, 428, 724,
 https://arxiv.org/abs/2009.01190
-doi: 10.1038/nature02448
 Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A.
-Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL,
 2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31
-528, L17, doi: 10.1086/312422
 Valiante, R., Schneider, R., Volonteri, M., & Omukai, K.
-—. 2002, ApJ, 576, 899, doi: 10.1086/341798 2016, Monthly Notices of the Royal Astronomical
-Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, Society, 457, 3356, doi: 10.1093/mnras/stw225
-doi: 10.1088/0004-637X/780/2/187 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
-Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, G. N. 2021, MNRAS, 504, 146,
-A56, doi: 10.1051/0004-6361/202037710 doi: 10.1093/mnras/stab842
-IMBH Formation in Galactic Nuclei 11
-Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
-2014, Monthly Notices of the Royal Astronomical
+2016, Monthly Notices of the Royal Astronomical
+Society, 457, 3356, doi: 10.1093/mnras/stw225
+Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
+G. N. 2021, MNRAS, 504, 146,
+doi: 10.1093/mnras/stab842
+14 Rose et al.
+Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
 Breivik, K. 2021, ApJ, 917, 76,
+doi: 10.3847/1538-4357/ac088d
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+doi: 10.1086/379767
+Woosley, S. E. 2017, ApJ, 836, 244,
+doi: 10.3847/1538-4357/836/2/244
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+doi: 10.1046/j.1365-8711.2002.05532.x
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+doi: 10.1088/0004-637X/761/2/129
+Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
+2014, Monthly Notices of the Royal Astronomical
 Society, 440, 1263, doi: 10.1093/mnras/stu351
-doi: 10.3847/1538-4357/ac088d Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints,
+Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints,
 arXiv:2011.04653. https://arxiv.org/abs/2011.04653
-Woosley, S. E. 2017, ApJ, 836, 244,
 Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26,
-doi: 10.3847/1538-4357/836/2/244 doi: 10.3847/1538-4365/aab14f
+doi: 10.3847/1538-4365/aab14f
diff --git a/read/results/pdfplumber/2201.00029.txt b/read/results/pdfplumber/2201.00029.txt
index 2700519..462334c 100644
--- a/read/results/pdfplumber/2201.00029.txt
+++ b/read/results/pdfplumber/2201.00029.txt
@@ -51,8 +51,9 @@ The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon
 previous studies, this research investigated novel techniques of analyzing variability in white
 dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on
 the star, allowing for the validation of results using our methods. KIC 8626021 has an effective
-temperature of 29,700 K, log g = 7.890, and mass of 0.56 M (Córsico, 2020). Other research
+temperature of 29,700 K, log g = 7.890, and mass of 0.56 M
 ☉
+(Córsico, 2020). Other research
 has found that this white dwarf is the DBV with the highest known temperature, and its helium
 layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too
 noisy to draw many conclusions, other FTs of short-cadence data have been performed to find
@@ -67,10 +68,14 @@ analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel,
 were performed in Mathematica. The re-binning process consisted of summing adjacent light
 curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one
 hour, and then repeating this process on the data sample for a total of three times. In addition, a
-significant detection was defined as being 3 above the mean of the relative flux, and 0 on the
-graphs below represents this 3 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To
-find the SNR, we converted to decibels. Usi𝝈ng these SNRs, we were able to easily identify
-improvement in signal strength𝝈.
+significant detection was defined as being 3
+𝝈
+above the mean of the relative flux, and 0 on the
+graphs below represents this 3
+𝝈
+cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To
+find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify
+improvement in signal strength.
 Results
 Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13),
 with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs
@@ -95,30 +100,49 @@ binning process.
 6
 FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins
 of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In
-addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3 and are
+addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3
+𝝈
+and are
 nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a
-starspot (Santos et al., 2017). 𝝈
+starspot (Santos et al., 2017).
 7
-Q7 Significant Light Corrected Flux Period (days) Signal-to-Noise
-Data Points Variability Magnitude (dB)
-Frequency (ppm)
+Q7 Significant
+Data Points
+Light
+Variability
+Frequency
 (µHz)
-Q7 First 5.886 -1.198 1.966 9.9
+Corrected Flux
+Magnitude
+(ppm)
+Period (days) Signal-to-Noise
+(dB)
+Q7 First
 Iteration
+5.886 -1.198 1.966 9.9
 Q7 Re-bin 1 5.886 -1.477 1.966 12.8
 Q7 Re-bin 2 5.889 0.597 1.965 19.2
 TABLE I: The table displays the various frequencies collected from Q7 and the information
 found through calculations to find period and SNR. The frequency of 5.464 µHz is not included,
 and therefore was not used in any calculations determining the average period of rotation. The
-values under corrected flux magnitude are relative to our significant frequency cutoff of 3 , thus
-negative numbers are under the cutoff.
+values under corrected flux magnitude are relative to our significant frequency cutoff of 3
 𝝈
-Q13 Significant Light Corrected Flux Period (days) Signal-to-Noise
-Data Points Variability Magnitude (dB)
-Frequency (ppm)
+, thus
+negative numbers are under the cutoff.
+Q13 Significant
+Data Points
+Light
+Variability
+Frequency
 (µHz)
-Q13 First 5.784 1.555 2.001 15.6
+Corrected Flux
+Magnitude
+(ppm)
+Period (days) Signal-to-Noise
+(dB)
+Q13 First
 Iteration
+5.784 1.555 2.001 15.6
 Q13 Re-bin 1 5.784 2.873 2.001 17.7
 Q13 Re-bin 2 5.787 4.938 2.000 22.6
 Q13 Re-bin 3 5.787 6.909 2.000 26.3
@@ -128,8 +152,9 @@ TABLE II: The table displays the various frequencies collected from Q13 and the
 found through calculations to find period and SNR. The last two significant frequencies (11.641
 µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in
 further detail in the Conclusions section of this paper. The values under corrected flux magnitude
-are relative to our significant frequency cutoff of 3 , thus negative numbers are under the cutoff.
+are relative to our significant frequency cutoff of 3
 𝝈
+, thus negative numbers are under the cutoff.
 8
 First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
 0.933 0.933 0.215 0.216
@@ -160,9 +185,10 @@ First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µ
 16.463
 16.894
 TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm)
-above the cutoff of 3 The minor shifting of significant frequencies between re-bins is a by-
-product of the method, and we calculated for such errors when finding our average.
+above the cutoff of 3
 𝝈.
+The minor shifting of significant frequencies between re-bins is a by-
+product of the method, and we calculated for such errors when finding our average.
 9
 First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
 3.094 2.018 2.019 1.951
@@ -185,17 +211,20 @@ First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µ
 15.881
 16.823
 TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm)
-above the cutoff of 3 . The minor shifting of significant frequencies between re-bins is a by-
-product of the method, and we calculated for such errors when finding our average.
+above the cutoff of 3
 𝝈
+. The minor shifting of significant frequencies between re-bins is a by-
+product of the method, and we calculated for such errors when finding our average.
 Conclusions
 As our research used the long-cadence data from Kepler, much of the high-frequency
 variability due to gravitational wave pulsations is lost. However, this presents an opportunity to
 verify our results with the work of research groups that analyzed short-cadence data.With the
 data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the
-periods and accounting for a 1 margin of error, our research hypothesizes that the rotation
+periods and accounting for a 1
+𝝈
+margin of error, our research hypothesizes that the rotation
 period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation
-period to be 1.8 ± 0.4 days, by𝝈 analyzing the structures of independent modes (Bischoff-Kim et
+period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et
 al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and
 these periods indicate that the more precise significant period identified through our re-binning
 relates to the rotation of the white dwarf.
@@ -207,9 +236,11 @@ processes. The frequency 5.464 µHz rises as another significant frequency; howe
 that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR
 improvement ranging from 1.1 dB to 1.3 dB.
 Through the re-binning process, more lines, or significant frequencies, appeared above
-the 3 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to
+the 3
+𝝈
+cutoff, particularly at lower frequencies. These findings suggest that as an alternative to
 short-cadence analysis, the re-binning process of long-cadence data can be used to identify
-signif𝝈icant lower frequencies in white dwarfs. The methods we used are also simple and
+significant lower frequencies in white dwarfs. The methods we used are also simple and
 replicable, which allows even those with less experience to quickly analyze the large amounts of
 data being collected by orbiting telescopes, such as the currently active TESS (Transiting
 Exoplanet Survey Satellite) telescope.
diff --git a/read/results/pdfplumber/2201.00037.txt b/read/results/pdfplumber/2201.00037.txt
index 99ce833..8709bfc 100644
--- a/read/results/pdfplumber/2201.00037.txt
+++ b/read/results/pdfplumber/2201.00037.txt
@@ -3,19 +3,55 @@ The influence of a fluid core and a solid inner core on the
 Cassini sate of Mercury
 Mathieu Dumberry 1
 1DepartmentofPhysics,UniversityofAlberta,Edmonton,Alberta,Canada.
-1202
 Key Points:
 • The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid
 planet by no more than 0.01 arcmin.
-ceD
 • For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
 cores into a common precession motion.
-• The larger the inner core is, the more the obliquity of the polar moment of inertia ap- 13
+• The larger the inner core is, the more the obliquity of the polar moment of inertia ap-
 proaches that expected for a rigid planet.
-]PE.hp-ortsa[
-1v73000.1022:viXra
 Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca
 –1–
+a
+r
+X
+i
+v
+:
+2
+2
+0
+1
+.
+0
+0
+0
+3
+7
+v
+1
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+E
+P
+]
+3
+1
+D
+e
+c
+2
+0
+2
+1
 Confidential manuscript submitted to JGR-Planets
 Abstract
 We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core
@@ -54,8 +90,9 @@ its present-day orientation can be reconstructed from ephemerides data [Yseboodt
 2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is
 reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513
 yr with an inclination angle of I =8.5330◦ between the orbit and Laplace plane normals [Ba-
-land et al., 2017]. Measurements of the obliquity ε , defined as the angle of misalignment be-
+land et al., 2017]. Measurements of the obliquity ε
 m
+, defined as the angle of misalignment be-
 tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques,
 including ground based radar observations [Margot et al., 2007, 2012], and stereo digital ter-
 rain images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Mar-
@@ -65,37 +102,65 @@ all techniques yield an obliquity which is coplanar with the orbit and Laplace p
 and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042±0.08
 –2–
 Confidential manuscript submitted to JGR-Planets
-êL êI
-3 3
-I êp
+I
+descending
+node of orbit
+Ω
+p
+ê
+3
+I
+I
+ê
+3
+L
 ε
-m 3 êL
-Ω 3
+m
+I ê
+3
 p
-êI
+ascending
+node of orbit
+descending
+node of equator
+equatorial
+plane
+orbital
+direction
+S
+ê
 3
-ple aq nu eatorial I a ns oc de en od fi n og
-rbit
+I ê
+3
+L
 M
+ε
+m
 orbital
-o dr irb ei cta til
 plane
-on
-ε
-descending m
-node of equator S
-I
-descending
-node of orbit
 Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded
-rectangle) and the Cassini state of Mercury. The normal to the orbital plane (eˆI) is offset from the nor-
+rectangle) and the Cassini state of Mercury. The normal to the orbital plane (ˆ eI
 3
-mal to the Laplace plane (eˆL) by an angle I = 8.5330◦. The symmetry axis of the mantle eˆp is offset
-3 3
-from eˆI by ε ≈ 2 arcmin. eˆI and eˆp are coplanar with, and precess about, eˆL in a retrograde direction
-3 m 3 3 3
-at frequency Ω = 2π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit
+) is offset from the nor-
+mal to the Laplace plane (ˆ eL
+3
+) by an angle I = 8.5330◦. The symmetry axis of the mantle ˆ ep
+3
+is offset
+from ˆ eI
+3
+by ε
+m
+≈ 2 arcmin. ˆ eI
+3
+and ˆ ep
+3
+are coplanar with, and precess about, ˆ eL
+3
+in a retrograde direction
+at frequency Ω
 p
+= 2π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit
 when Mercury is above (below) the Laplace plane. Angles are not drawn to scale.
 arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Gen-
 ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1.
@@ -115,15 +180,17 @@ approximate limit of 800 km on the inner core radius [Grott et al., 2011]. Howev
 core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history.
 –3–
 Confidential manuscript submitted to JGR-Planets
-With a fluid core, and possibly a solid inner core, the observed obliquity ε reflects the
+With a fluid core, and possibly a solid inner core, the observed obliquity ε
 m
+reflects the
 orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dis-
 sipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spin-
 symmetry axis of the inner core should both also precess about the normal to the Laplace plane
 in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the
 plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek, 2016], although
-their obliquity angles may be different than ε . Whether the spin axis of the fluid core is brought
+their obliquity angles may be different than ε
 m
+. Whether the spin axis of the fluid core is brought
 into an alignment with the mantle obliquity depends primarily on the pressure torque (also re-
 ferred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the
 misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e, 1910]. The more flat-
@@ -139,8 +206,9 @@ thermore, viscous and electromagnetic (EM) coupling at the CMB can further restr
 alignment between the mantle and core [Peale et al., 2014].
 If an inner core is present, its obliquity angle is determined by the sum of the torques act-
 ing on it. This includes the gravitational torque from the Sun acting on its tilted figure, anal-
-ogous to the torque applied on the tilted mantle that sets the obliquity ε . In addition, the
+ogous to the torque applied on the tilted mantle that sets the obliquity ε
 m
+. In addition, the
 tilt of the inner core also depends on the gravitational torque imposed by the mantle and the
 pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle grav-
 itational torque dominates, the inner core tilt is expected to remain closely aligned with the
@@ -181,29 +249,59 @@ symmetry axis of the mantle and gravity field may differ.
 2.1 The interior structure of Mercury
 Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid
 outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted
-by r , r , r , and R, and their densities by ρ , ρ , ρ , and ρ , respectively. The inner core ra-
-s f m s f m c
-dius r corresponds to the ICB radius, the fluid core radius r to the CMB radius, and R=
-s f
+by r
+s
+, r
+f
+, r
+m
+, and R, and their densities by ρ
+s
+, ρ
+f
+, ρ
+m
+, and ρ
+c
+, respectively. The inner core ra-
+dius r
+s
+corresponds to the ICB radius, the fluid core radius r
+f
+to the CMB radius, and R=
 2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pres-
 sure with depth are not negligible in the core of Mercury. However adopting uniform densities
 simplifies the analytical expressions of the model while still capturing the first order rotational
 dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same
 strategy facilitates comparisons between our results.
-We build our interior model as detailed in Peale et al. [2016]. We first specify r , ρ (or
-s s
-a density contrast at the ICB), the crustal density ρ and crustal thickness h=R−r . The
-c m
-three unknowns r , ρ and ρ are then solved such that the interior model is consistent with
-f f m
+We build our interior model as detailed in Peale et al. [2016]. We first specify r
+s
+, ρ
+s
+(or
+a density contrast at the ICB), the crustal density ρ
+c
+and crustal thickness h=R−r
+m
+. The
+three unknowns r
+f
+, ρ
+f
+and ρ
+m
+are then solved such that the interior model is consistent with
 the known mass M and chosen values of the moments of inertia of the whole planet C and that
-of the mantle and crust C .
+of the mantle and crust C
 m
+.
 Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity)
-by (cid:15) , defined as the difference between the mean equatorial and polar radii, divided by the mean
+by (cid:15)
 i
-spherical radius. Likewise, we denote the equatorial flattening by the variable ξ , defined as the
+, defined as the difference between the mean equatorial and polar radii, divided by the mean
+spherical radius. Likewise, we denote the equatorial flattening by the variable ξ
 i
+, defined as the
 difference between the maximum and minimum equatorial radii, divided by the mean spher-
 ical radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equa-
 torial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface.
@@ -211,119 +309,342 @@ The measured polar and equatorial flattenings are taken from Perry et al. [2015]
 numerical values are given in Table 1. We then assume that the ICB and CMB are both at hy-
 drostatic equilibrium with the imposed gravitational potential induced by the flattenings at the
 CrMB and surface. The flattenings at all interior boundaries are specified such that they are
-consistent with the observed degree 2 spherical harmonic coefficients of gravity J and C ; their
-2 22
-numerical values are given in Table 1. Specifically, J and C are connected to the principal
-2 22
+consistent with the observed degree 2 spherical harmonic coefficients of gravity J
+2
+and C
+22
+; their
+numerical values are given in Table 1. Specifically, J
+2
+and C
+22
+are connected to the principal
 moments of inertia of Mercury (C >B >A) and to the polar and equatorial flattenings by
-J = C−A¯ = 8π 1 (cid:2) (ρ −ρ )r5(cid:15) +(ρ −ρ )r5(cid:15) +(ρ −ρ )r5 (cid:15) +ρ R5(cid:15) (cid:3) , (1a)
-2 MR2 15MR2 s f s s f m f f m c m m c r
-C = B−A = 8π 1 (cid:2) (ρ −ρ )r5ξ +(ρ −ρ )r5ξ +(ρ −ρ )r5 ξ +ρ R5ξ (cid:3) . (1b)
-22 4MR2 154MR2 s f s s f m f f m c m m c r
-where A¯ is the mean equatorial moment of inertia defined below. The same procedure was used
+J
+2
+= C− ¯ A
+MR2
+= 8π
+15
+1
+MR2
+(cid:2) (ρ
+s
+−ρ
+f
+)r5
+s
+(cid:15)
+s
++(ρ
+f
+−ρ
+m
+)r5
+f
+(cid:15)
+f
++(ρ
+m
+−ρ
+c
+)r5
+m
+(cid:15)
+m
++ρ
+c
+R5(cid:15)
+r
+(cid:3) , (1a)
+C
+22
+= B−A
+4MR2
+= 8π
+15
+1
+4MR2
+(cid:2) (ρ
+s
+−ρ
+f
+)r5
+s
+ξ
+s
++(ρ
+f
+−ρ
+m
+)r5
+f
+ξ
+f
++(ρ
+m
+−ρ
+c
+)r5
+m
+ξ
+m
++ρ
+c
+R5ξ
+r
+(cid:3) . (1b)
+where ¯ A is the mean equatorial moment of inertia defined below. The same procedure was used
 in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry
 –5–
 Confidential manuscript submitted to JGR-Planets
 Mercury Parameter Numerical value Reference
 mean motion, n 2π/87.96935 day−1 Stark et al. [2015b]
-rotation rate, Ω =1.5n 2π/58.64623 day−1 Stark et al. [2015b]
+rotation rate, Ω
 o
-orbit precession rate, Ω 2π/325,513 yr−1 Baland et al. [2017]
+=1.5n 2π/58.64623 day−1 Stark et al. [2015b]
+orbit precession rate, Ω
+p
+2π/325,513 yr−1 Baland et al. [2017]
+Poincar´e number, δω =Ω
 p
-Poincar´e number, δω =Ω /Ω 4.9327×10−7
-p o
-orbital eccentricity, e 0.20563 Baland et al. [2017]
+/Ω
+o
+4.9327×10−7
+orbital eccentricity, e
 c
+0.20563 Baland et al. [2017]
 orbital inclination, I 8.5330◦ Baland et al. [2017]
 mean planetary radius, R 2439.360 km Perry et al. [2015]
 mass, M 3.3012×1023 kg Genova et al. [2019]
-mean density, ρ¯ 5429.5 kg m−3
-J 5.0291×10−5 Genova et al. [2019]
+mean density, ¯ ρ 5429.5 kg m−3
+J
 2
-C 8.0415×10−6 Genova et al. [2019]
+5.0291×10−5 Genova et al. [2019]
+C
 22
-polar surface flattening, (cid:15) 6.7436×10−4 Perry et al. [2015]
+8.0415×10−6 Genova et al. [2019]
+polar surface flattening, (cid:15)
 r
-equatorial surface flattening, ξ 5.1243×10−4 Perry et al. [2015]
+6.7436×10−4 Perry et al. [2015]
+equatorial surface flattening, ξ
 r
+5.1243×10−4 Perry et al. [2015]
 Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636×109
-m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4πρ¯R3 =M. The numerical
+m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4π
 3
-values of (cid:15) and ξ are calculated from (cid:15) =(a¯−c)/R and ξ =(a−b)/R, where a¯= 1(a+b) and where
-r r r r 2
+¯ ρR3 =M. The numerical
+values of (cid:15)
+r
+and ξ
+r
+are calculated from (cid:15)
+r
+=(¯ a−c)/R and ξ
+r
+=(a−b)/R, where ¯ a= 1
+2
+(a+b) and where
 a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor
-axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J and C are
-2 22
+axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J
+2
+and C
+22
+are
 computed from Equation (4) in the Supporting Information of Genova et al. [2019].
 and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon.
 Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog-
 raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼2◦
 and an equatorial offset of ∼15◦ [Perry et al., 2015].
 Once the densities and flattenings of all interior regions are known, we can specify the mo-
-ments of inertia of the fluid core (C > B > A ) and solid inner core (C > B > A )
-f f f s s s
+ments of inertia of the fluid core (C
+f
+> B
+f
+> A
+f
+) and solid inner core (C
+s
+> B
+s
+> A
+s
+)
 along with the mean equatorial moments of inertia
-1 1 1
-A¯= (A+B), A¯ = (A +B ), A¯ = (A +B ). (2)
-2 f 2 f f s 2 s s
-From these, we define the polar (e, e , e ) and equatorial (γ, γ ) dynamical ellipticities of the
-f s s
+¯ A=
+1
+2
+(A+B), ¯ A
+f
+=
+1
+2
+(A
+f
++B
+f
+), ¯ A
+s
+=
+1
+2
+(A
+s
++B
+s
+). (2)
+From these, we define the polar (e, e
+f
+, e
+s
+) and equatorial (γ, γ
+s
+) dynamical ellipticities of the
 whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which
 enter our rotational model,
-C−A¯ C −A¯ C −A¯
-e= e = f f e = s s , (3a)
-A¯ f A¯ s A¯
-f s
-B−A B −A
-γ = γ = s s . (3b)
-A¯ s A¯
-s
-We further note that e and γ are connected to J and C by
-2 22
-MR2 4MR2
-e= J , γ = C . (4)
-A¯ 2 A¯ 22
+e=
+C− ¯ A
+¯ A
+e
+f
+=
+C
+f
+− ¯ A
+f
+¯ A
+f
+e
+s
+=
+C
+s
+− ¯ A
+s
+¯ A
+s
+, (3a)
+γ =
+B−A
+¯ A
+γ
+s
+=
+B
+s
+−A
+s
+¯ A
+s
+. (3b)
+We further note that e and γ are connected to J
+2
+and C
+22
+by
+e=
+MR2
+¯ A
+J
+2
+, γ =
+4MR2
+¯ A
+C
+22
+. (4)
 –6–
 Confidential manuscript submitted to JGR-Planets
-êp
-a) 3 b)
-êI Ω
-3 ê 3s ê 3p
 θ
-ε m Ω
-êL m θ n s
-3 I ê 3I Cassini
-θ s Ω ê 3L plane
+m
+θ n
+θ s
 θ f
+Ω
+Ω
+s
+Ω f
+ê
+3
 p
-θ f I ε
-m êp
-2
+ê 3 s
+ê
+3
+I
+I
+ε
+m
+θ
+p
+ê
+3
+L
 ê
 1
-ωΩt
+p
+ê
+2
+p
+Cassini plane
+ωΩ
 o
-êp
-1 êp
+t
+ê 3 I
+I ε
+m
+ê 3 p
+ê
+1
+ê
 2
+p
+ê 3 L
+a) b)
 Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b)
-in a frame attached to the rotating mantle. The orbit normal (eˆI) is tilted by an angle I = 8.533◦ from
+in a frame attached to the rotating mantle. The orbit normal (ˆ eI
+3
+) is tilted by an angle I = 8.533◦ from
+the Laplace normal (ˆ eL
+3
+) and the symmetry axis of Mercury’s mantle (ˆ ep
+3
+) is tilted by an obliquity ε
+m
+with respect to ˆ eI
+3
+. Shown in (a) are the orientations of the symmetry axis of the inner core (ˆ es
 3
-the Laplace normal (eˆL) and the symmetry axis of Mercury’s mantle (eˆp) is tilted by an obliquity ε
-3 3 m
-with respect to eˆI. Shown in (a) are the orientations of the symmetry axis of the inner core (eˆs), the
-3 3
-rotation rate vectors of the mantle (Ω), fluid core (Ω ) and inner core (Ω ) and angles θ , θ , θ , θ
-f f p n m f
-and θ in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer
+), the
+rotation rate vectors of the mantle (Ω), fluid core (Ω
+f
+) and inner core (Ω
+f
+) and angles θ
+p
+, θ
+n
+, θ
+m
+, θ
+f
+and θ
 s
+in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer
 to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of
 the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section.
 The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial
-mantle axes eˆp and eˆp with respect to the Cassini plane. Viewed in the frame attached to the rotating
-1 2
-mantle (b), the Cassini plane is rotating at frequency ωΩ = −Ω − Ω cosI in the longitudinal direc-
-o o p
+mantle axes ˆ ep
+1
+and ˆ ep
+2
+with respect to the Cassini plane. Viewed in the frame attached to the rotating
+mantle (b), the Cassini plane is rotating at frequency ωΩ
+o
+= −Ω
+o
+− Ω
+p
+cosI in the longitudinal direc-
 tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of
 illustration.
 –7–
@@ -332,81 +653,167 @@ Confidential manuscript submitted to JGR-Planets
 Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes
 3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is
 87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These de-
-fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ω = 2π/58.64623
+fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ω
 o
-day−1, with Ω =1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby
+= 2π/58.64623
+day−1, with Ω
 o
-the orientations of the orbit normal (eˆI) and of the mantle symmetry axis (eˆp) are both copla-
-3 3
-nar with, and precess about, the normal to the Laplace plane (eˆL). The orientation of the Laplace
+=1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby
+the orientations of the orbit normal (ˆ eI
+3
+) and of the mantle symmetry axis (ˆ ep
 3
+) are both copla-
+nar with, and precess about, the normal to the Laplace plane (ˆ eL
+3
+). The orientation of the Laplace
 plane varies on long timescales, but it can be taken as invariable in inertial space for our present
-purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between eˆL and eˆI
-3 3
-is the orbital inclination I =8.5330◦ [Baland et al., 2017], the angle between eˆI and eˆp is the
-3 3
-obliquity ε and the angle between eˆL and eˆp is θ = I +ε . The precession of eˆI and eˆp
-m 3 3 p m 3 3
-about the Laplace pole is retrograde with frequency Ω =2π/325,513 yr−1 [Baland et al., 2017].
+purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆ eL
+3
+and ˆ eI
+3
+is the orbital inclination I =8.5330◦ [Baland et al., 2017], the angle between ˆ eI
+3
+and ˆ ep
+3
+is the
+obliquity ε
+m
+and the angle between ˆ eL
+3
+and ˆ ep
+3
+is θ
+p
+= I +ε
+m
+. The precession of ˆ eI
+3
+and ˆ ep
+3
+about the Laplace pole is retrograde with frequency Ω
 p
+=2π/325,513 yr−1 [Baland et al., 2017].
 The mantle and crust are welded together and form a single rotating region which we re-
 fer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes
 of the mantle are expected to remain in close alignment, but they do not coincide exactly. We
-define the rotation rate vector of the mantle by Ω, and its misalignment from eˆp by an angle
+define the rotation rate vector of the mantle by Ω, and its misalignment from ˆ ep
 3
-θ . Note that θ (cid:28) ε and it is often the spin axis of Mercury which is used to define the
-m m m
-obliquity ε [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, eˆp and Ω would
-m 3
+by an angle
+θ
+m
+. Note that θ
+m
+(cid:28) ε
+m
+and it is often the spin axis of Mercury which is used to define the
+obliquity ε
+m
+[e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆ ep
+3
+and Ω would
 characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and
-the angles I, ε and θ would completely describe the Cassini state. The presence of a fluid
-m m
+the angles I, ε
+m
+and θ
+m
+would completely describe the Cassini state. The presence of a fluid
 outer core and solid inner core require three additional orientation vectors and angles. The sym-
-metry axis of the inner core is defined by unit vector eˆs and its misalignment from eˆp by an
-3 3
-angle θ . The rotation vectors of the fluid core and inner core are defined as Ω and Ω , re-
-n f s
+metry axis of the inner core is defined by unit vector ˆ es
+3
+and its misalignment from ˆ ep
+3
+by an
+angle θ
+n
+. The rotation vectors of the fluid core and inner core are defined as Ω
+f
+and Ω
+s
+, re-
 spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an-
-gles θ and θ (see Figure 2a). The rotation and symmetry axes of the inner core remain in close
-f s
-alignment, so θ ≈θ . To be formal in our definition of the different angles of misalignment,
-n s
+gles θ
+f
+and θ
+s
+(see Figure 2a). The rotation and symmetry axes of the inner core remain in close
+alignment, so θ
+n
+≈θ
+s
+. To be formal in our definition of the different angles of misalignment,
 for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise
 direction.
-At equilibrium in the Cassini state, the three orientation vectors (eˆI, eˆp, eˆs) and three
-3 3 3
-rotation vectors (Ω, Ω , Ω ) are forced to precess about eˆL at the same frequency. If we ne-
-f s 3
+At equilibrium in the Cassini state, the three orientation vectors (ˆ eI
+3
+, ˆ ep
+3
+, ˆ es
+3
+) and three
+rotation vectors (Ω, Ω
+f
+, Ω
+s
+) are forced to precess about ˆ eL
+3
+at the same frequency. If we ne-
 glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed
-in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω . Viewed
+in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω
 p
-in the frame attached to the mantle rotating at sidereal frequency Ω , the Cassini plane is ro-
+. Viewed
+in the frame attached to the mantle rotating at sidereal frequency Ω
 o
-tating in a retrograde direction at frequency ωΩ (see Figure 2b), where ω, expressed in cycles
+, the Cassini plane is ro-
+tating in a retrograde direction at frequency ωΩ
 o
+(see Figure 2b), where ω, expressed in cycles
 per Mercury day, is equal to
-ω =−1−δωcos(θ ). (5)
+ω =−1−δωcos(θ
 p
-The factor δω = Ω /Ω = 4.933×10−7 is the Poincar´e number, expressing the ratio of the
-p o
+). (5)
+The factor δω = Ω
+p
+/Ω
+o
+= 4.933×10−7 is the Poincar´e number, expressing the ratio of the
 forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal
 as seen in the mantle frame is expressed as
 d
-eˆL+Ω×eˆL =0, (6)
-dt 3 3
+dt
+ˆ eL
+3
++Ω×ˆ eL
+3
+=0, (6)
 or equivalently, by Equation (19e) of Stys and Dumberry [2018],
-ωsin(θ )+sin(θ +θ )=0. (7)
-p m p
+ωsin(θ
+p
+)+sin(θ
+m
++θ
+p
+)=0. (7)
 –8–
 Confidential manuscript submitted to JGR-Planets
-This expresses a formal connection between θ and θ which is independent of the interior struc-
-p m
-ture of Mercury. Using Equation (5) and cos(θ )→1, this connection can be rewritten as
-m
-sin(θ )=δω sin(θ ). (8)
-m p
-and thus the relative amplitudes of θ and θ depend of the Poincar´e number δω.
-m p
+This expresses a formal connection between θ
+p
+and θ
+m
+which is independent of the interior struc-
+ture of Mercury. Using Equation (5) and cos(θ
+m
+)→1, this connection can be rewritten as
+sin(θ
+m
+)=δω sin(θ
+p
+). (8)
+and thus the relative amplitudes of θ
+m
+and θ
+p
+depend of the Poincar´e number δω.
 To investigate Mercury’s response to the gravitational torque from the Sun, we take ad-
 vantage of the framework developed in Mathews et al. [1991] to model the forced nutations of
 Earth [see also Mathews et al., 2002; Dehant and Mathews, 2015]. This model takes into ac-
@@ -423,49 +830,111 @@ ods of Mercury, the gravitational solar torque that is relevant to the Cassini s
 torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, point-
 ing in the same direction as the vector connecting the Sun to the descending node of Mercury’s
 orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque
-is periodic, rotating at frequency ωΩ . Setting the equatorial directions eˆp and eˆp to correspond
-o 1 2
+is periodic, rotating at frequency ωΩ
+o
+. Setting the equatorial directions ˆ ep
+1
+and ˆ ep
+2
+to correspond
 to the real and imaginary axes of the complex plane, respectively, we can write the equatorial
 components of this periodic applied torque in a compact form as
-Γ (t)+iΓ (t)=−iΓ˜(ω) exp[iωΩ t], (9)
-1 2 o
-where Γ˜(ω) represents the amplitude of the torque at frequency ωΩ . In response to this torque,
+Γ
+1
+(t)+iΓ
+2
+(t)=−i˜ Γ(ω) exp[iωΩ
 o
-the axes defining all angles (θ , ε , θ , θ , θ , θ ) as viewed in the mantle frame are also ro-
-p m m f s n
-tating at frequency ωΩ (see Figure 2). The longitudinal direction of each of these angles at
+t], (9)
+where ˜ Γ(ω) represents the amplitude of the torque at frequency ωΩ
 o
-a specific time t can then also be written in the equatorial complex plane and is proportional
-to exp[iωΩ t]. For instance, the two equatorial time-dependent components θ and θ of the
-o m1 m2
-angle θ , as seen in the mantle frame, can be written as
+. In response to this torque,
+the axes defining all angles (θ
+p
+, ε
 m
-θ (t)+iθ (t)=m˜ exp[iωΩ t], (10a)
-m1 m2 o
-where
-m˜ ≡m˜(ω)=Re[m˜]+iIm[m˜], (10b)
-is the amplitude at frequency ωΩ . Equivalent definitions apply for all other angles, with the
-o
-connection as follows:
-θ ⇔m˜ , θ ⇔m˜ , θ ⇔m˜ , θ ⇔n˜ , θ ⇔p˜, ε ⇔ε˜ . (11)
-m f f s s n s p m m
-The notation m˜, m˜ , m˜ , n˜ follows that introduced in the original model of Mathews et al. [1991].
-f s s
+, θ
+m
+, θ
+f
+, θ
+s
+, θ
+n
+) as viewed in the mantle frame are also ro-
+tating at frequency ωΩ
+o
+(see Figure 2). The longitudinal direction of each of these angles at
+a specific time t can then also be written in the equatorial complex plane and is proportional
+to exp[iωΩ
+o
+t]. For instance, the two equatorial time-dependent components θ
+m1
+and θ
+m2
+of the
+angle θ
+m
+, as seen in the mantle frame, can be written as
+θ
+m1
+(t)+iθ
+m2
+(t)= ˜ m exp[iωΩ
+o
+t], (10a)
+where
+˜ m≡ ˜ m(ω)=Re[˜ m]+iIm[˜ m], (10b)
+is the amplitude at frequency ωΩ
+o
+. Equivalent definitions apply for all other angles, with the
+connection as follows:
+θ
+m
+⇔ ˜ m, θ
+f
+⇔ ˜ m
+f
+, θ
+s
+⇔ ˜ m
+s
+, θ
+n
+⇔ ˜ n
+s
+, θ
+p
+⇔ ˜ p, ε
+m
+⇔ ˜ ε
+m
+. (11)
+The notation ˜ m, ˜ m
+f
+, ˜ m
+s
+, ˜ n
+s
+follows that introduced in the original model of Mathews et al. [1991].
 Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re-
 sponse to the applied torque as a result of dissipation, for instance from viscous or EM coupling
 –9–
 Confidential manuscript submitted to JGR-Planets
 at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely
 real. We concentrate our analysis in this work on the real part of the solutions, which corre-
-sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ε˜
+sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ ε
 m
-corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε ,
+corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε
 m
+,
 though we keep the tilde notation in the presentation of our results to emphasize that it rep-
-resents the real part of the solution from our system. Furthermore, since m˜ (cid:28) ε˜ , we often
+resents the real part of the solution from our system. Furthermore, since ˜ m (cid:28) ˜ ε
 m
-refer to ε˜ as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
+, we often
+refer to ˜ ε
 m
+as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
 more customarily described in terms of the latter in the literature.
 The model of Mathews et al. [1991] is developed under the assumption of small angles as
 appropriate for the nutations on Earth. The details on how the equations of the model are de-
@@ -473,119 +942,434 @@ rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. T
 tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer-
 cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three
 equations are
-(cid:34) (cid:35)
-A¯ A¯ A¯ 1 (cid:16) (cid:17)
-(ω−e)m˜ +(1+ω) fm˜ + sm˜ +α e sn˜ = Γ˜ , (12a)
-A¯ f A¯ s 3 s A¯ s iΩ2A¯ sun
+(ω−e)˜ m+(1+ω)
+(cid:34)
+¯ A
+f
+¯ A
+˜ m
+f
++
+¯ A
+s
+¯ A
+˜ m
+s
++α
+3
+e
+s
+¯ A
+s
+¯ A
+˜ n
+s
+(cid:35)
+=
+1
+iΩ2
 o
-A¯ 1 (cid:16) (cid:17)
-ωm˜ +(1+ω+e )m˜ −ωα e sn˜ = −Γ˜ −Γ˜ , (12b)
-f f 1 s A¯ s iΩ2A¯ cmb icb
-f o f
-1 (cid:16) (cid:17)
-(ω−α e )m˜ +α e m˜ +(1+ω)m˜ +(1+ω−α )e n˜ = Γ˜s +Γ˜ , (12c)
-3 s 1 s f s 2 s s iΩ2A¯ sun icb
-o s
+¯ A
+(cid:16)
+˜ Γ
+sun
+(cid:17)
+, (12a)
+ω˜ m+(1+ω+e
+f
+) ˜ m
+f
+−ωα
+1
+e
+s
+¯ A
+s
+¯ A
+f
+˜ n
+s
+=
+1
+iΩ2
+o
+¯ A
+f
+(cid:16)
+−˜ Γ
+cmb
+−˜ Γ
+icb
+(cid:17)
+, (12b)
+(ω−α
+3
+e
+s
+)˜ m+α
+1
+e
+s
+˜ m
+f
++(1+ω) ˜ m
+s
++(1+ω−α
+2
+)e
+s
+˜ n
+s
+=
+1
+iΩ2
+o
+¯ A
+s
+(cid:16)
+˜ Γs
+sun
++˜ Γ
+icb
+(cid:17)
+, (12c)
 and a fourth equation consists of a kinematic relation that expresses the change in the orien-
 tation of the inner core figure as a result of its own rotation,
-m˜ +ωn˜ =0. (12d)
-s s
-In these equations, the parameters α , α and α involve the density contrast at the ICB
-1 2 3
+˜ m
+s
++ω˜ n
+s
+=0. (12d)
+In these equations, the parameters α
+1
+, α
+2
+and α
+3
+involve the density contrast at the ICB
 and are given by
+α
+1
+=
+ρ
+f
 ρ
-α = f , α =1−α , α =α −α α , (13a)
-1 ρ 3 1 2 1 3 g
 s
-where the parameter α is a measure of the ratio of the gravitational to inertial torque applied
+, α
+3
+=1−α
+1
+, α
+2
+=α
+1
+−α
+3
+α
+g
+, (13a)
+where the parameter α
 g
+is a measure of the ratio of the gravitational to inertial torque applied
 on the inner core,
+α
+g
+=
 8πG
-α = [ρ ((cid:15) −(cid:15) )+ρ ((cid:15) −(cid:15) )+ρ (cid:15) ] , (13b)
-g 5Ω2 c r m m m f f f
+5Ω2
 o
+[ρ
+c
+((cid:15)
+r
+−(cid:15)
+m
+)+ρ
+m
+((cid:15)
+m
+−(cid:15)
+f
+)+ρ
+f
+(cid:15)
+f
+] , (13b)
 where G is the gravitational constant.
-Γ˜ is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
+˜ Γ
+sun
+is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
+a small mantle obliquity ˜ ε
+m
+and a small inner core tilt ˜ n
+s
+, it is given by
+˜ Γ
 sun
-a small mantle obliquity ε˜ and a small inner core tilt n˜ , it is given by
-m s
-(cid:18) A¯ (cid:19)
-Γ˜ =−iΩ2A¯ φ ε˜ + sα φ n˜ , (14)
-sun o m m A¯ 3 s s
+=−iΩ2
+o
+¯ A
+(cid:18)
+φ
+m
+˜ ε
+m
++
+¯ A
+s
+¯ A
+α
+3
+φ
+s
+˜ n
+s
+(cid:19)
+, (14)
 where
 –10–
 Confidential manuscript submitted to JGR-Planets
-3n2 (cid:20) 1 (cid:21)
-φ = G e+ G γ , (15a)
-m 2Ω2 210 2 201
+φ
+m
+=
+3
+2
+n2
+Ω2
 o
-3n2 (cid:20) 1 (cid:21)
-φ = G e + G γ , (15b)
-s 2Ω2 210 s 2 201 s
+(cid:20)
+G
+210
+e+
+1
+2
+G
+201
+γ
+(cid:21)
+, (15a)
+φ
+s
+=
+3
+2
+n2
+Ω2
 o
-and where G and G are functions of the orbital eccentricity e ,
-210 201 c
+(cid:20)
+G
+210
+e
+s
++
 1
-G = , (16a)
-210 (1−e2)3/2
+2
+G
+201
+γ
+s
+(cid:21)
+, (15b)
+and where G
+210
+and G
+201
+are functions of the orbital eccentricity e
 c
-7 123 489
-G = e − e3+ e5. (16b)
-201 2 c 16 c 128 c
-The gravitational torque by the Sun acting on the inner core alone, Γ˜s , is
+,
+G
+210
+=
+1
+(1−e2
+c
+)3/2
+, (16a)
+G
+201
+=
+7
+2
+e
+c
+−
+123
+16
+e3
+c
++
+489
+128
+e5
+c
+. (16b)
+The gravitational torque by the Sun acting on the inner core alone, ˜ Γs
 sun
-Γ˜s =−iΩ2A¯ α φ (ε˜ +n˜ ). (17)
-sun o s 3 s m s
-Γ˜ and Γ˜ are the torques from tangential stresses by the fluid core on the mantle at the
-cmb icb
+, is
+˜ Γs
+sun
+=−iΩ2
+o
+¯ A
+s
+α
+3
+φ
+s
+(˜ ε
+m
++˜ n
+s
+). (17)
+˜ Γ
+cmb
+and ˜ Γ
+icb
+are the torques from tangential stresses by the fluid core on the mantle at the
 CMB and on the inner core at the ICB, respectively. These torques can be parameterized in
-terms of dimensionless complex coupling constants K and K and the differential angu-
-icb cmb
+terms of dimensionless complex coupling constants K
+icb
+and K
+cmb
+and the differential angu-
 lar velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002],
-Γ˜ =iΩ2A¯ K (m˜ −m˜ ), (18a)
-icb o s icb f s
-Γ˜ =iΩ2A¯ K m˜ . (18b)
-cmb o f cmb f
-Specific expressions for K and K are delayed to sections 4 and 5 when we consider the
-icb cmb
+˜ Γ
+icb
+=iΩ2
+o
+¯ A
+s
+K
+icb
+(˜ m
+f
+− ˜ m
+s
+), (18a)
+˜ Γ
+cmb
+=iΩ2
+o
+¯ A
+f
+K
+cmb
+˜ m
+f
+. (18b)
+Specific expressions for K
+icb
+and K
+cmb
+are delayed to sections 4 and 5 when we consider the
 effects of viscous and EM coupling, respectively.
 A fifth equation is required to connect this interior model to the obliquity of the mantle,
-and this is provided by Equation (7). For small angles θ and θ , this gives [e.g. Mathews et al.,
-m p
+and this is provided by Equation (7). For small angles θ
+m
+and θ
+p
+, this gives [e.g. Mathews et al.,
 1991; Dumberry and Wieczorek, 2016; Baland et al., 2019]
-m˜ +(1+ω)p˜=0. (19)
-For Mercury, it is more convenient to connect the internal model with ε˜ instead of p˜. This
+˜ m+(1+ω)˜ p=0. (19)
+For Mercury, it is more convenient to connect the internal model with ˜ ε
 m
-is because θ ≈ 8.567◦ whereas ε˜ ≈ 2 arcmin and thus the latter obeys more strictly the
-p m
+instead of ˜ p. This
+is because θ
+p
+≈ 8.567◦ whereas ˜ ε
+m
+≈ 2 arcmin and thus the latter obeys more strictly the
 condition of small angles assumed in our framework. Furthermore, the external torques act-
-ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ε˜ . Writ-
+ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ ε
+m
+. Writ-
+ten in terms of ˜ ε
 m
-ten in terms of ε˜ , and with the approximation of ε˜ (cid:28)1 and m˜ (cid:28)1, Equation (7) becomes
-m m
-m˜ +(1+ω)ε˜ =−(1+ω)tanI. (20)
+, and with the approximation of ˜ ε
 m
+(cid:28)1 and ˜ m(cid:28)1, Equation (7) becomes
+˜ m+(1+ω)˜ ε
+m
+=−(1+ω)tanI. (20)
 Likewise, the frequency ω from Equation (5) can be written simply in terms of I,
 ω =−1−δωcosI. (21)
 The set of four Equations (12) with the addition of Equation (20) form a linear system
-of equations for the five rotational variables m˜, m˜ , m˜ , n˜ and ε˜ . It captures the response
-f s s m
+of equations for the five rotational variables ˜ m, ˜ m
+f
+, ˜ m
+s
+, ˜ n
+s
+and ˜ ε
+m
+. It captures the response
 of Mercury, in the frequency domain, when subject to a periodic solar torque applied at fre-
 quency ω. The system can be written in a matrix form as
 –11–
 Confidential manuscript submitted to JGR-Planets
 M·x =y, (22a)
 where the solution (x) and forcing (y) vectors are
-xT =[m˜,m˜ ,m˜ ,n˜ ,ε˜ ] , (22b)
-f s s m
+xT =[˜ m, ˜ m
+f
+, ˜ m
+s
+,˜ n
+s
+,˜ ε
+m
+] , (22b)
 yT =[0,0,0,0,−(1+ω)tanI] , (22c)
 and the elements of matrix M are
- ω−e (1+ω)A¯ A¯f (1+ω)A A¯ ¯s A A¯ ¯sα 3(cid:0) (1+ω)e s+φ s(cid:1) φ m 
-M=  ω−ω 1+ω+e αf 1e+ sK −c Kmb ic+ AA ¯¯ fsK icb 1+− AA ω¯¯ fs +K Kicb (1+ω− −ωe αs 2α )1 sAA ¯¯ +fs 30  
- α 3e s b icb e α 3φ s α φ s   .
- 
- 0 0 1 ω 0 
+M=
+
+
+
+ 
+
+
+ω−e (1+ω) ¯ Af
+¯ A
+(1+ω) ¯ As
+¯ A
+¯ As
+¯ A
+α
+3
+(cid:0) (1+ω)e
+s
++φ
+s
+(cid:1) φ
+m
+ω 1+ω+e
+f
++K
+cmb
++ ¯ As
+¯ Af
+K
+icb
+− ¯ As
+¯ Af
+K
+icb
+−ωe
+s
+α
+1
+¯ As
+¯ Af
+0
+ω−α 3 e s α 1 e s −K icb 1+ω+K icb (1+ω−α 2 )e s +α 3 φ s α 3 φ s
+0 0 1 ω 0
 1 0 0 0 (1+ω)
+
+
+
+ 
+
+
+.
 (22d)
 Solutions of the homogeneous system (i.e. y=0) represent free modes of precession. Three
 modes have periods which, when seen in inertial space, are typically in the range of a few hun-
@@ -625,216 +1409,585 @@ Confidential manuscript submitted to JGR-Planets
 2.3.1 The Cassini state of a single-body, rigid Mercury
 For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa-
 tions (12a) and (20),
-(ω−e)m˜ +φ ε˜ =0, (23a)
-m m
-m˜ +(1+ω)ε˜ =−(1+ω)tanI. (23b)
+(ω−e)˜ m+φ
+m
+˜ ε
 m
-Using Equation (21), δω (cid:28)1, and the approximation A¯(1+e+δωcosI)=C+A¯δωcosI ≈
+=0, (23a)
+˜ m+(1+ω)˜ ε
+m
+=−(1+ω)tanI. (23b)
+Using Equation (21), δω (cid:28)1, and the approximation ¯ A(1+e+δωcosI)=C+ ¯ AδωcosI ≈
 C, these can be written as
-Cm˜ =A¯φ ε˜ , (24a)
-m m
-(cid:0) (cid:1)
-m˜ =δω sinI+cosIε˜ . (24b)
+C˜ m= ¯ Aφ
+m
+˜ ε
 m
-Equation (24b) gives a direct relationship between m˜ and ε˜ . For I = 8.5330◦, δω =
+, (24a)
+˜ m=δω
+(cid:0)
+sinI+cosI ˜ ε
 m
-4.9327×10−7 and taking ε˜ =2.04 arcmin, this gives m˜ =2.52×10−4 arcmin, much smaller
+(cid:1)
+. (24b)
+Equation (24b) gives a direct relationship between ˜ m and ˜ ε
 m
-than ε˜ : the offset of the rotation axis of the mantle with respect to its symmetry axis is very
+. For I = 8.5330◦, δω =
+4.9327×10−7 and taking ˜ ε
 m
+=2.04 arcmin, this gives ˜ m=2.52×10−4 arcmin, much smaller
+than ˜ ε
+m
+: the offset of the rotation axis of the mantle with respect to its symmetry axis is very
 small. Substituting Equation (24b) in Equation (24a) gives
-CΩ (cid:0) sinI+cosIε˜ (cid:1) =A¯Ω φ ε˜ , (25)
-p m o m m
-and isolating for ε˜ ,
-m
-CΩ sinI
-ε˜ = p . (26)
-m −CΩ cosI+A¯Ω φ
-p o m
-Upon using Equations (4), (15a), and Ω = 3n, we can write
-o 2
-CΩ sinI
-ε˜ = p . (27)
-m −CΩ cosI+nMR2(G J +2G C )
-p 210 2 201 22
+CΩ
+p
+(cid:0) sinI+cosI ˜ ε
+m
+(cid:1) = ¯ AΩ
+o
+φ
+m
+˜ ε
+m
+, (25)
+and isolating for ˜ ε
+m
+,
+˜ ε
+m
+=
+CΩ
+p
+sinI
+−CΩ
+p
+cosI+ ¯ AΩ
+o
+φ
+m
+. (26)
+Upon using Equations (4), (15a), and Ω
+o
+= 3
+2
+n, we can write
+˜ ε
+m
+=
+CΩ
+p
+sinI
+−CΩ
+p
+cosI+nMR2(G
+210
+J
+2
++2G
+201
+C
+22
+)
+. (27)
 This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1
-[see for instance Equation (1) of Baland et al., 2017, where their definition of Ω˙ is equal to −Ω ].
+[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ Ω is equal to −Ω
 p
+].
 Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of
 Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo-
-ment of inertia Cˆ,
-C n G J +2G C
-Cˆ = = 210 2 201 22 . (28)
-MR2 Ω cosI+sinI/ε˜
-p m
+ment of inertia ˆ C,
+ˆ C =
+C
+MR2
+=
+n
+Ω
+p
+G
+210
+J
+2
++2G
+201
+C
+22
+cosI+sinI/˜ ε
+m
+. (28)
 which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation
-that a measurement of the obliquity gives a constraint on Cˆ.
+that a measurement of the obliquity gives a constraint on ˆ C.
 Two free modes of precession are found by setting y=0 in Equation (23). One mode cor-
 responds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession
 of the rotation axis about the symmetry axis. The second mode is the free retrograde axial pre-
 cession of Mercury. As seen in the inertial frame, its frequency is given by
 –13–
 Confidential manuscript submitted to JGR-Planets
-MR2(cid:16) (cid:17)
-ω =n G J +2G C , (29)
-fp C 210 2 201 22
+ω
+fp
+=n
+MR2
+C
+(cid:16)
+G
+210
+J
+2
++2G
+201
+C
+22
+(cid:17)
+, (29)
 which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com-
 ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid-
-body precession and hence C was replaced by C . Using C = 0.346 · MR2 [Margot et al.,
-m
-2012] and the numerical values for n, J , C and e given in Table 1, we obtain a free preces-
-2 22 c
-sion period of T =2π/ω =1298 yr. If we use C instead of C in Equation (29), and take
-fp fp m
-C =0.431·C =0.431·0.346·MR2 [Margot et al., 2012], we obtain T =2π/ω =560 yr.
-m fp fp
+body precession and hence C was replaced by C
+m
+. Using C = 0.346 · MR2 [Margot et al.,
+2012] and the numerical values for n, J
+2
+, C
+22
+and e
+c
+given in Table 1, we obtain a free preces-
+sion period of T
+fp
+=2π/ω
+fp
+=1298 yr. If we use C
+m
+instead of C in Equation (29), and take
+C
+m
+=0.431·C =0.431·0.346·MR2 [Margot et al., 2012], we obtain T
+fp
+=2π/ω
+fp
+=560 yr.
 These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical,
 the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid
 core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The
 true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value,
 the free precession period is much shorter than the forcing period of 325 kyr. Using Equation
 (29), Equation (27) can be written as [e.g. Baland et al., 2017]
-Ω sinI
-ε˜ = p . (30)
-m −Ω cosI+ω
-p fp
-The obliquity of Mercury is thus determined by how the forcing frequency Ω compares with
-p
-the free precession frequency ω . Because ω >Ω , Mercury occupies Cassini state 1 [Peale,
-fp fp p
+˜ ε
+m
+=
+Ω
+p
+sinI
+−Ω
+p
+cosI+ω
+fp
+. (30)
+The obliquity of Mercury is thus determined by how the forcing frequency Ω
+p
+compares with
+the free precession frequency ω
+fp
+. Because ω
+fp
+>Ω
+p
+, Mercury occupies Cassini state 1 [Peale,
 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant
-amplification if Ω ≈ ω . Since ω (cid:29) Ω , resonant amplification is minimal and the re-
-p fp fp p
-sulting obliquity, ε˜ ≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦.
+amplification if Ω
+p
+≈ ω
+fp
+. Since ω
+fp
+(cid:29) Ω
+p
+, resonant amplification is minimal and the re-
+sulting obliquity, ˜ ε
 m
+≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦.
 2.3.2 The misalignment of the fluid and solid cores
-With ω =−1−δωcosI and δω (cid:28)1, Equation (12d) gives n˜ ≈m˜ ; as for the mantle,
-s s
+With ω =−1−δωcosI and δω (cid:28)1, Equation (12d) gives ˜ n
+s
+≈ ˜ m
+s
+; as for the mantle,
 the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state.
-The relationship between m˜ and ε˜ of Equation (24b) is independent of the interior structure,
+The relationship between ˜ m and ˜ ε
 m
+of Equation (24b) is independent of the interior structure,
 so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa-
-tion (12a), and setting n˜ =m˜ , the angular momentum equation of the whole planet becomes
-s s
-CΩ (cid:0) sinI+cosIε˜ (cid:1) +(A¯ cosIΩ )m˜ +A¯ (cosIΩ −Ω α φ )n˜ =A¯Ω φ ε˜ . (31)
-p m f p f s p o 3 s s o m m
+tion (12a), and setting ˜ n
+s
+= ˜ m
+s
+, the angular momentum equation of the whole planet becomes
+CΩ
+p
+(cid:0) sinI+cosI ˜ ε
+m
+(cid:1) +(¯ A
+f
+cosIΩ
+p
+)˜ m
+f
++ ¯ A
+s
+(cosIΩ
+p
+−Ω
+o
+α
+3
+φ
+s
+)˜ n
+s
+= ¯ AΩ
+o
+φ
+m
+˜ ε
+m
+. (31)
 This latter equation shows how the misaligned inner core and fluid core can lead to a modifi-
-cation of the mantle obliquity ε˜ . Approximate analytical solutions of n˜ and m˜ are given by
-m s f
-(cid:18) (cid:19)
-n˜ ≈ Ω p 1+ Ω o(K icb−α 1e s) (cid:0) sinI+cosIε˜ (cid:1) − Ω oα 3φ sε˜ , (32a)
-s κλ λ m κλ m
-s f s
-m˜ ≈ Ω p(cid:0) sinI+cosIε˜ (cid:1) + Ω o A¯ s(cid:0) K −α e (cid:1) n˜ , (32b)
-f λ m λ A¯ icb 1 s s
-f f f
+cation of the mantle obliquity ˜ ε
+m
+. Approximate analytical solutions of ˜ n
+s
+and ˜ m
+f
+are given by
+˜ n
+s
+≈ Ω p
+κλ
+s
+(cid:18)
+1+ Ω o (K icb −α 1 e s )
+λ
+f
+(cid:19)
+(cid:0) sinI+cosI ˜ ε
+m
+(cid:1) − Ω o α 3 φ s
+κλ
+s
+˜ ε
+m
+, (32a)
+˜ m
+f
+≈ Ω p
+λ
+f
+(cid:0) sinI+cosI ˜ ε
+m
+(cid:1) + Ω o
+λ
+f
+¯ A s
+¯ A
+f
+(cid:0) K
+icb
+−α
+1
+e
+s
+(cid:1) ˜ n
+s
+, (32b)
 where
-A¯ Ω2(cid:0) K −α e (cid:1)2
-κ=1− s o icb 1 s , (33a)
-A¯ λ λ
-f s f
-λ =σ¯ −Ω cosI, (33b)
-f f p
-λ =σ¯ −Ω cosI, (33c)
-s s p
+κ=1−
+¯ A
+s
+¯ A
+f
+Ω2
+o
+(cid:0) K
+icb
+−α
+1
+e
+s
+(cid:1)2
+λ
+s
+λ
+f
+, (33a)
+λ
+f
+= ¯ σ
+f
+−Ω
+p
+cosI, (33b)
+λ
+s
+= ¯ σ
+s
+−Ω
+p
+cosI, (33c)
 –14–
 Confidential manuscript submitted to JGR-Planets
 and where we have introduced the frequencies
-(cid:18) A¯ (cid:19)
-σ¯ =Ω e +K + sK , (33d)
-f o f cmb A¯ icb
+¯ σ
 f
-(cid:16) (cid:17)
-σ¯ =Ω e α α −e α +α φ +K . (33e)
-s o s 3 g s 1 3 s icb
+=Ω
+o
+(cid:18)
+e
+f
++K
+cmb
++
+¯ A
+s
+¯ A
+f
+K
+icb
+(cid:19)
+, (33d)
+¯ σ
+s
+=Ω
+o
+(cid:16)
+e
+s
+α
+3
+α
+g
+−e
+s
+α
+1
++α
+3
+φ
+s
++K
+icb
+(cid:17)
+. (33e)
 These solutions are good approximations for all the results that we present in section 3. For
-an observed mantle obliquity ε˜ and for a chosen set of interior model parameters, they pro-
+an observed mantle obliquity ˜ ε
 m
-vide useful predictions of n˜ and m˜ .
-s f
-In the limit of a very strong coupling between the fluid core, solid core and mantle, σ¯ (cid:29)
+and for a chosen set of interior model parameters, they pro-
+vide useful predictions of ˜ n
+s
+and ˜ m
+f
+.
+In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯ σ
+s
+(cid:29)
+Ω
+p
+and ¯ σ
+f
+(cid:29) Ω
+p
+, so that ˜ n
 s
-Ω and σ¯ (cid:29) Ω , so that n˜ → 0, m˜ → 0 and Equation (31) reverts back to Equation (25)
-p f p s f
+→ 0, ˜ m
+f
+→ 0 and Equation (31) reverts back to Equation (25)
 for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and
-mantle (i.e. for spherical internal boundaries, e = e = γ = 0 and no viscous or EM cou-
-f s s
-pling, K =K =0), then
-cmb icb
-φ =0, κ=1, λ =λ =−Ω cosI, m˜ =n˜ =−(tanI+ε˜ ). (34)
-s f s p f s m
-Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C =
-m
-C−A¯ −A¯ , we obtain
-f s
-C Ω (cid:0) sinI+cosIε˜ (cid:1) =A¯Ω φ ε˜ . (35)
-m p m o m m
+mantle (i.e. for spherical internal boundaries, e
+f
+= e
+s
+= γ
+s
+= 0 and no viscous or EM cou-
+pling, K
+cmb
+=K
+icb
+=0), then
+φ
+s
+=0, κ=1, λ
+f
+=λ
+s
+=−Ω
+p
+cosI, ˜ m
+f
+= ˜ n
+s
+=−(tanI+˜ ε
+m
+). (34)
+Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C
+m
+=
+C− ¯ A
+f
+− ¯ A
+s
+, we obtain
+C
+m
+Ω
+p
+(cid:0) sinI+cosI ˜ ε
+m
+(cid:1) = ¯ AΩ
+o
+φ
+m
+˜ ε
+m
+. (35)
 which describes, as expected, a forced precession of the mantle alone. If this was the case for
-Mercury, taking C /C =0.431, the obliquity should be ε˜ ≈0.88 arcmin, substantially smaller
-m m
-than the observed obliquity of ε˜ ≈2 arcmin.
-m
-If σ¯ ≈ Ω (and thus λ → 0) and/or σ¯ ≈ Ω (and thus λ → 0) resonant amplifica-
-f p f s p s
-tion leads to large amplitudes for m˜ , n˜ and the mantle obliquity ε˜ . The frequencies σ¯ and
-f s m f
-σ¯ are closely related to the FCN and FICN frequencies ω and ω , respectively. Hence,
-s fcn ficn
+Mercury, taking C
+m
+/C =0.431, the obliquity should be ˜ ε
+m
+≈0.88 arcmin, substantially smaller
+than the observed obliquity of ˜ ε
+m
+≈2 arcmin.
+If ¯ σ
+f
+≈ Ω
+p
+(and thus λ
+f
+→ 0) and/or ¯ σ
+s
+≈ Ω
+p
+(and thus λ
+s
+→ 0) resonant amplifica-
+tion leads to large amplitudes for ˜ m
+f
+, ˜ n
+s
+and the mantle obliquity ˜ ε
+m
+. The frequencies ¯ σ
+f
+and
+¯ σ
+s
+are closely related to the FCN and FICN frequencies ω
+fcn
+and ω
+ficn
+, respectively. Hence,
 just as a large mantle obliquity can result from resonant amplification when the forcing frequency
 approaches the free precession frequency, a large mantle obliquity can likewise result from res-
 onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These
 frequencies depend on the interior density structure and are not known. However, we will show
 that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of
 a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex-
-pect an important amplification effect. Furthermore, since ω ,ω (cid:29) Ω , then σ¯ (cid:29) Ω
-fcn ficn p f p
-and σ¯ (cid:29)Ω , and we are in the strong coupling limit. The mantle obliquity should be close
-s p
-to that expected for a rigid planet, as observations suggest. Therefore, we expect that m˜ and
-f
-n˜ should be of the order of ε˜ or smaller. This further justifies the assumption of small an-
-s m
+pect an important amplification effect. Furthermore, since ω
+fcn
+,ω
+ficn
+(cid:29) Ω
+p
+, then ¯ σ
+f
+(cid:29) Ω
+p
+and ¯ σ
+s
+(cid:29)Ω
+p
+, and we are in the strong coupling limit. The mantle obliquity should be close
+to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜ m
+f
+and
+˜ n
+s
+should be of the order of ˜ ε
+m
+or smaller. This further justifies the assumption of small an-
 gles that we have adopted.
 3 Results
 3.1 Geodetic constraints and interior density structure
 All our interior models are constrained to match the mass M of Mercury and specific choices
-of Cˆ = C/MR2 and C /C. The choice of Cˆ is determined from Equation (28). For the pa-
+of ˆ C = C/MR2 and C
 m
-rameters listed in Table 1, and an observed obliquity of ε =2.04 arcmin [Margot et al., 2012],
+/C. The choice of ˆ C is determined from Equation (28). For the pa-
+rameters listed in Table 1, and an observed obliquity of ε
 m
-this gives Cˆ = C/MR2 = 0.3455 and all our interior models are consistent with this choice.
+=2.04 arcmin [Margot et al., 2012],
+this gives ˆ C = C/MR2 = 0.3455 and all our interior models are consistent with this choice.
 Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are
 –15–
 Confidential manuscript submitted to JGR-Planets
 perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in es-
-timating Cˆ from Equation (28), or conversely in predicting ε based on a given choice for Cˆ.
+timating ˆ C from Equation (28), or conversely in predicting ε
 m
-Part of the objective of our study is to estimate how large this error is. The ratio C /C is ob-
+based on a given choice for ˆ C.
+Part of the objective of our study is to estimate how large this error is. The ratio C
 m
-tained from the amplitude of the 88-day longitudinal mantle libration φ , which is given by
+/C is ob-
+tained from the amplitude of the 88-day longitudinal mantle libration φ
+o
+, which is given by
+φ
 o
-MR2 C 1
-φ =6·f(e )C , (36)
-o c 22 C C 1+ζ
+=6·f(e
+c
+)C
+22
+MR2
+C
+C
+C
 m
+1
+1+ζ
+, (36)
 where
+f(e
+c
+)=1−11e2
+c
++
 959
-f(e )=1−11e2+ e4, (37)
-c c 48 c
+48
+e4
+c
+, (37)
 and where ζ is a correction that takes into account the entrainment of the inner core in the li-
 bration [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini, 2015]; this cor-
 rection is small and, to simplify, we neglect it here. Taking the observed libration amplitude
-to be 38.5 arcsec [Margot et al., 2012], Cˆ = C/MR2 = 0.3455 and C and e from Table 1,
-22 c
-this corresponds to a ratio C /C =0.4269, or equivalently Cˆ =C /MR2 =0.1475.
-m m m
-For all results presented in our study, the crustal density is set at ρ =2974 kg m−3 [Sori,
+to be 38.5 arcsec [Margot et al., 2012], ˆ C = C/MR2 = 0.3455 and C
+22
+and e
 c
+from Table 1,
+this corresponds to a ratio C
+m
+/C =0.4269, or equivalently ˆ C
+m
+=C
+m
+/MR2 =0.1475.
+For all results presented in our study, the crustal density is set at ρ
+c
+=2974 kg m−3 [Sori,
 2018]. Our standard choice for the crustal thickness is h = 26 km [Sori, 2018], although in
 section 3.2 we also present some results with other choices of h. We have considered two pos-
 sible prescriptions connected to the density of the inner core. First, for all the results presented
-in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ =8800 kg m−3 ap-
+in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ
 s
+=8800 kg m−3 ap-
 proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure
 Fe composition in face-centered cubic phase. This captures an end-member scenario where the
 core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively
@@ -845,304 +1998,721 @@ ICB is expected to be small, although since density increases with depth, the co
 the mean densities of the fluid and solid cores is larger. It is these mean densities that enter
 our Mercury model with uniform density layers. To capture this other end-member core com-
 position scenario, in section 3.5 we present results where we instead prescribe a fixed density
-contrast between the fluid and solid core; specifically, we set the numerical value of α .
-3
-For a given choice of inner core radius r , the densities of the mantle (ρ ) and fluid core
-s m
-(ρ ) and the radius of the CMB (r ) are determined such that the interior model matches M,
-f f
-Cˆ = 0.3455 and Cˆ = 0.1475. Figure 3a shows how ρ , ρ and r vary as a function of in-
-m m f f
-ner core radius r for each of the two inner core density scenarios: a fixed ρ , or a fixed α . When
-s s 3
+contrast between the fluid and solid core; specifically, we set the numerical value of α
+3
+.
+For a given choice of inner core radius r
+s
+, the densities of the mantle (ρ
+m
+) and fluid core
+(ρ
+f
+) and the radius of the CMB (r
+f
+) are determined such that the interior model matches M,
+ˆ C = 0.3455 and ˆ C
+m
+= 0.1475. Figure 3a shows how ρ
+m
+, ρ
+f
+and r
+f
+vary as a function of in-
+ner core radius r
+s
+for each of the two inner core density scenarios: a fixed ρ
+s
+, or a fixed α
+3
+. When
 the inner core is small, its presence has a limited influence on the resulting density structure,
-and we find ρ = 3197 kg m−3, ρ = 7263 kg m−3 and r = 2000 km in each of the two
-m f f
-scenarios. When ρ is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, r in-
-s f
-creases to above 2100 km, ρ approaches 4000 kg m−3 and ρ is reduced to below 5000 kg m−3.
-m f
-Figure 3a illustrates that when adopting a fixed ρ , there is a limit in the possible inner core
+and we find ρ
+m
+= 3197 kg m−3, ρ
+f
+= 7263 kg m−3 and r
+f
+= 2000 km in each of the two
+scenarios. When ρ
 s
-size, as otherwise ρ gets unreasonably large and ρ gets inappropriately small (as it would
-m f
+is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, r
+f
+in-
+creases to above 2100 km, ρ
+m
+approaches 4000 kg m−3 and ρ
+f
+is reduced to below 5000 kg m−3.
+Figure 3a illustrates that when adopting a fixed ρ
+s
+, there is a limit in the possible inner core
+size, as otherwise ρ
+m
+gets unreasonably large and ρ
+f
+gets inappropriately small (as it would
 require an excessively large concentration of light elements). When adopting instead a fixed den-
-sity contrast, with α =0.1, the changes in r , ρ and ρ with inner core radius are more mod-
-3 f m f
-est, allowing larger possible inner core sizes. Different assumptions on ρ and h would alter the
+sity contrast, with α
+3
+=0.1, the changes in r
+f
+, ρ
+m
+and ρ
+f
+with inner core radius are more mod-
+est, allowing larger possible inner core sizes. Different assumptions on ρ
 c
-numerical values shown on Figure 3a but not their trends with r .
+and h would alter the
+numerical values shown on Figure 3a but not their trends with r
 s
-Figure 3b shows how the FCN and FICN periods vary with r for each of the two inner
+.
+Figure 3b shows how the FCN and FICN periods vary with r
 s
-core density scenarios and in the absence of viscous and EM coupling (i.e. K = K =
-cmb icb
+for each of the two inner
+core density scenarios and in the absence of viscous and EM coupling (i.e. K
+cmb
+= K
+icb
+=
 –16–
 Confidential manuscript submitted to JGR-Planets
-8000
-a b
-2100 1400
-7000 fluid core density
+0
+200
+400
+600
+800 1000
 1200
-2080 FCNint )mk(
-)3m/gk(
-6000 1000 suidar )ry(
-2060
-800 doirep
-5000 radius ytisned eroc
+1400
+p
+eri o d ( yr)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+3000
+4000
+5000 6000
+7000
+8000
+d e n
+sit y ( k g/ m 3)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+2000
+2020
 2040
-B 600 M diulF
-C FCN
-4000 2020 400
-mantle density 200 FICN
-3000 2000
-0
-0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400
-Inner core radius (km) Inner core radius (km)
+2060
+2080
+2100
+Fl
+ui d c
+or e r a di u s ( k
+m)
+fluid core density
+C M B ra
+di us
+FICN
+FCNint
+mantle density
+a b
+FCN
 Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand
 side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN
 period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to
 a scenario where the density of the inner core is set to 8800 kg m−3; thin dashed lines correspond to a
-scenario where the density contrast between the fluid and solid cores is set to α =0.1.
+scenario where the density contrast between the fluid and solid cores is set to α
 3
+=0.1.
 0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small in-
-ner core, increasing to approximately 600 yr at the largest r . The FICN period is shorter, close
+ner core, increasing to approximately 600 yr at the largest r
 s
+. The FICN period is shorter, close
 to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the
-largest r under the fixed ρ (fixed α ) scenario. This confirms that the FCN and FICN peri-
-s s 3
+largest r
+s
+under the fixed ρ
+s
+(fixed α
+3
+) scenario. This confirms that the FCN and FICN peri-
 ods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away
-from it that we do not expect large m˜ and n˜ from resonant amplification.
-f s
+from it that we do not expect large ˜ m
+f
+and ˜ n
+s
+from resonant amplification.
 The FCN and FICN periods that we have computed include the influence of the exter-
 nal torque. As shown by Baland et al. [2019], the external torque allow solid regions to have
 a free motion in inertial space thereby affecting the free rotational modes. To a good approx-
-imation, the FCN and FICN frequencies (as seen in an inertial frame) for K = K = 0
-cmb icb
+imation, the FCN and FICN frequencies (as seen in an inertial frame) for K
+cmb
+= K
+icb
+= 0
 are given by
-(cid:18) A¯ (cid:19)(cid:16) (cid:17) e φ
-ω ≈−Ω e +φ +Ω f m , (38a)
-fcn o A¯ +A¯ f m o(e +φ )
-m s f m
-(cid:18) A¯+A¯ (cid:19)(cid:16) (cid:17)
-ω ≈Ω s e α −e α α −α φ . (38b)
-ficn o A¯−A¯ s 1 s 3 g 3 s
-s
-The expression of the FICN frequency involves the inertial torque (term e α ) and the grav-
-s 1
-itational torque from the rest of Mercury (e α α ) and the Sun (α φ ) acting on the inner core.
-s 3 g 3 s
-For both of our inner core density scenarios (and our choices of ρ =8800 kg m−3 and α =
-s 3
-0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α α (cid:29)α ;
-3 g 1
+ω
+fcn
+≈−Ω
+o
+(cid:18) ¯ A
+¯ A
+m
++ ¯ A
+s
+(cid:19)(cid:16)
+e
+f
++φ
+m
+(cid:17)
++Ω
+o
+e
+f
+φ
+m
+(e
+f
++φ
+m
+)
+, (38a)
+ω
+ficn
+≈Ω
+o
+(cid:18) ¯ A+ ¯ A
+s
+¯ A− ¯ A
+s
+(cid:19)(cid:16)
+e
+s
+α
+1
+−e
+s
+α
+3
+α
+g
+−α
+3
+φ
+s
+(cid:17)
+. (38b)
+The expression of the FICN frequency involves the inertial torque (term e
+s
+α
+1
+) and the grav-
+itational torque from the rest of Mercury (e
+s
+α
+3
+α
+g
+) and the Sun (α
+3
+φ
+s
+) acting on the inner core.
+For both of our inner core density scenarios (and our choices of ρ
+s
+=8800 kg m−3 and α
+3
+=
+0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α
+3
+α
+g
+(cid:29)α
+1
+;
 the gravitational torque dominates the inertial torque, in large part because of the slow rota-
 tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion
 is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and
-Dumberry, 2018], but it is different for Earth, where α >α α because of its faster rotation
-1 3 g
+Dumberry, 2018], but it is different for Earth, where α
+1
+>α
+3
+α
+g
+because of its faster rotation
 and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres-
 –17–
 Confidential manuscript submitted to JGR-Planets
-sion for the FICN differs by a factor (A¯+A¯ )/(A¯−A¯ ) compared to that given in Dumberry
-s s
+sion for the FICN differs by a factor (¯ A+ ¯ A
+s
+)/(¯ A− ¯ A
+s
+) compared to that given in Dumberry
 and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon.
 The expression for FCN frequency differs from the usual expression for Earth. First, it
-involves the external torque from the Sun captured by the parameter φ . If we set φ = 0,
-m m
+involves the external torque from the Sun captured by the parameter φ
+m
+. If we set φ
+m
+= 0,
 we obtain the FCN frequency for a decoupled model in which only interior torques contribute,
-(cid:18) A¯ (cid:19)
-ω ≈−Ω e . (38c)
-fcn,int o A¯ +A¯ f
-m s
-This frequency is slightly different from the usual expression for Earth, involving the ratio A¯/(A¯ +
+ω
+fcn,int
+≈−Ω
+o
+(cid:18) ¯ A
+¯ A
+m
++ ¯ A
+s
+(cid:19)
+e
+f
+. (38c)
+This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ A/(¯ A
+m
++
+¯ A
+s
+) rather than ¯ A/¯ A
 m
-A¯ ) rather than A¯/A¯ . This is because of the relatively thin mantle of Mercury; for the largest
-s m
-r considered, the moment of inertia of the inner core can get close to 40% of that of the man-
+. This is because of the relatively thin mantle of Mercury; for the largest
+r
 s
+considered, the moment of inertia of the inner core can get close to 40% of that of the man-
 tle and is not negligible. The period of the FCN when only interior torques contribute is shown
 in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr
-at the largest r . Hence, the influence of the solar torque reduces the FCN period by a factor
+at the largest r
 s
+. Hence, the influence of the solar torque reduces the FCN period by a factor
 of approximately 3. We note that the FICN period, in contrast, is not altered substantially when
 the external torque is set to zero.
 3.2 Gravitational and inertial coupling
 Let us now investigate the obliquities of the mantle, fluid core and inner core in their equi-
-librium Cassini state. We assume a fixed inner core density scenario in this section, with ρ =
+librium Cassini state. We assume a fixed inner core density scenario in this section, with ρ
 s
+=
 8800 kg m−3. Viscous and EM coupling are set to zero in order to isolate the influence of grav-
-itational and inertial coupling. Figure 4 shows how ε˜ , m˜ and n˜ vary as functions of inner
-m f s
+itational and inertial coupling. Figure 4 shows how ˜ ε
+m
+, ˜ m
+f
+and ˜ n
+s
+vary as functions of inner
 core radius. We show calculations for three different choices of crustal thickness, but let us con-
-centrate first on the case for h=26 km. For small r , we retrieve an obliquity of ε˜ =2.0494
-s m
-arcmin (Figure 4a). ε˜ decreases with r , but not substantially; at the largest r (1500 km),
-m s s
-ε˜ = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ε˜ = 2.04
-m m
-arcmin, the obliquity that we used in setting the constraint for Cˆ – and hence the prediction
+centrate first on the case for h=26 km. For small r
+s
+, we retrieve an obliquity of ˜ ε
+m
+=2.0494
+arcmin (Figure 4a). ˜ ε
+m
+decreases with r
+s
+, but not substantially; at the largest r
+s
+(1500 km),
+˜ ε
+m
+= 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ ε
+m
+= 2.04
+arcmin, the obliquity that we used in setting the constraint for ˆ C – and hence the prediction
 we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which
 occurs for small inner cores.
-The deviation of ε˜ from that of a rigid planet is due to the misalignments of the fluid
+The deviation of ˜ ε
 m
-core (m˜ ) and solid inner core (n˜ ) with respect to the mantle (Figure 4b). The misalignment
-f s
-of the fluid core spin axis from the mantle is significant: m˜ is approximately 4.02 arcmin for
+from that of a rigid planet is due to the misalignments of the fluid
+core (˜ m
+f
+) and solid inner core (˜ n
+s
+) with respect to the mantle (Figure 4b). The misalignment
+of the fluid core spin axis from the mantle is significant: ˜ m
 f
+is approximately 4.02 arcmin for
 a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin
-at the largest r . Recall that m˜ is measured with respect to the mantle rotation axis (which
-s f
+at the largest r
+s
+. Recall that ˜ m
+f
+is measured with respect to the mantle rotation axis (which
 coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with
-respect to the orbit normal is ε˜ +m˜ ≈6 arcmin. The reason why the obliquity of the spin
-m f
+respect to the orbit normal is ˜ ε
+m
++˜ m
+f
+≈6 arcmin. The reason why the obliquity of the spin
 axis of the fluid core is larger than that of the mantle can be understood from Equation (32b),
-which shows that m˜ is determined by the resonant amplification of the FCN mode at the forc-
+which shows that ˜ m
 f
+is determined by the resonant amplification of the FCN mode at the forc-
 ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the
-case for Mercury, the resonant amplification is very weak but remains present and m˜ is larger
+case for Mercury, the resonant amplification is very weak but remains present and ˜ m
 f
+is larger
 than zero.
-In contrast to m˜ , the misalignment of the inner core with respect to the mantle is much
+In contrast to ˜ m
 f
-smaller; n˜ is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ε˜ .
-s m
+, the misalignment of the inner core with respect to the mantle is much
+smaller; ˜ n
+s
+is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ ε
+m
+.
 Physically, this is because the gravitational torque acting on the inner core when it is tilted from
 the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner
 core must remain in close alignment with the mantle. Presented differently, since the FICN pe-
 riod is more than 3000 times shorter than the forced precession period, the inner core can eas-
 –18–
 Confidential manuscript submitted to JGR-Planets
-a 4.5 b
-2.050
-4.0
-2.048 )nimcra( )nimcra(
-2.046 crustal th 1i 6c k kn mess ε εm 3.5 crustal th 1i 6c k kn mess m f
-26 km g
-36 km 26 km elgna elgna
-2.044 3.0 36 km
-n (x100)
-s ytiuqilbO ytiuqilbO
-2.042
-2.5
-ε
-m for a rigid planet
-2.040
-2.0
 2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050
+O
+bli q
+uit
+y
+a n gl e (
+ar c mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
 1.5
-0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400
-Inner core radius (km) Inner core radius (km)
-Figure 4. a) Obliquity of the mantle (ε˜ , solid lines) and of the principal moment of inertia (ε˜ ,
-m g
-dashed line) b) m˜ (solid lines) and n˜ (dashed lines, x100) as a function of inner core radius and for
-f s
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5
+O
+bli q
+uit
+y
+a n gl e (
+ar c mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+crustal thickness 16 km
+36 km 26 km
+crustal thickness 16 km
+36 km 26 km
+ε m ε
+g
+for a rigid planet ε m
+m f
+n
+s
+(x100)
+a b
+Figure 4. a) Obliquity of the mantle (˜ ε
+m
+, solid lines) and of the principal moment of inertia (˜ ε
+g
+,
+dashed line) b) ˜ m
+f
+(solid lines) and ˜ n
+s
+(dashed lines, x100) as a function of inner core radius and for
 different choices of crustal thickness.
-ily follow the forced precession of the mantle and remains gravitationally locked to it. n˜ does
+ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ n
 s
+does
 not change substantially as the inner core increases in size.
-When K =K =0, a good approximation of ε˜ is given by
-icb cmb m
-C(cid:48)Ω sinI
-ε˜ = p , (39)
-m −C(cid:48)Ω cosI+A¯Ω φ
-p o m
+When K
+icb
+=K
+cmb
+=0, a good approximation of ˜ ε
+m
+is given by
+˜ ε
+m
+=
+C(cid:48)Ω
+p
+sinI
+−C(cid:48)Ω
+p
+cosI+ ¯ AΩ
+o
+φ
+m
+, (39)
 which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced
 by C(cid:48). The latter represents an effective moment of inertia that accounts for the coupling of
 the core to the mantle,
-C(cid:48) =C+A¯ χ, (40)
+C(cid:48) =C+ ¯ A
+c
+χ, (40)
+where ¯ A
+c
+= ¯ A
+f
++ ¯ A
+s
+and
+χ=
+Ω
+p
+cosI
+¯ A
+c
+(cid:18) ¯ A
+f
+(¯ σ
+f
+−Ω
+p
+cosI)
++
+¯ A
+s
+(¯ σ
+s
+−Ω
+p
+cosI)
+(cid:19)
+−
+¯ A
+s
+¯ A
 c
-where A¯ =A¯ +A¯ and
-c f s
-Ω cosI (cid:18) A¯ A¯ (cid:19) A¯ Ω α φ
-χ= p f + s − s o 3 s . (41)
-A¯ (σ¯ −Ω cosI) (σ¯ −Ω cosI) A¯ (σ¯ −Ω cosI)
-c f p s p c s p
-The frequencies σ¯ and σ¯ are given in Equations (33d-33e) and closely approximate the FCN
-f s
+Ω
+o
+α
+3
+φ
+s
+(¯ σ
+s
+−Ω
+p
+cosI)
+. (41)
+The frequencies ¯ σ
+f
+and ¯ σ
+s
+are given in Equations (33d-33e) and closely approximate the FCN
 and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then
 how the core is entrained to precess with the mantle, with the coupling between the two ex-
 pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit
-of σ¯ ,σ¯ → 0, then χ = −1, C(cid:48) = C , the core is fully decoupled from the mantle and we
-f s m
-retrieve Equation (35). If instead σ¯ ,σ¯ → ∞, then χ = 0, C(cid:48) = C and we retrieve the pre-
-f s
-diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω ,
+of ¯ σ
+f
+,¯ σ
+s
+→ 0, then χ = −1, C(cid:48) = C
+m
+, the core is fully decoupled from the mantle and we
+retrieve Equation (35). If instead ¯ σ
+f
+,¯ σ
+s
+→ ∞, then χ = 0, C(cid:48) = C and we retrieve the pre-
+diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω
 p
+,
 as is the case here, resonant amplification is weak, χ is small and positive, C(cid:48) > C and this
-leads to a slightly larger ε˜ compared to a rigid planet. Because the inner core core is grav-
+leads to a slightly larger ˜ ε
 m
+compared to a rigid planet. Because the inner core core is grav-
 itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the
-misalignment of the fluid core. In Equation (41), σ¯ (cid:29)σ¯ , so to a good approximation
-s f
+misalignment of the fluid core. In Equation (41), ¯ σ
+s
+(cid:29) ¯ σ
+f
+, so to a good approximation
 –19–
 Confidential manuscript submitted to JGR-Planets
-A¯ Ω cosI
-χ≈ f o . (42)
-A¯ (σ¯ −Ω cosI)
-c f p
-For a small inner core, χ≈7.55×10−3. As the inner core grows, A¯ decreases, and the com-
+χ≈
+¯ A
+f
+¯ A
+c
+Ω
+o
+cosI
+(¯ σ
+f
+−Ω
+p
+cosI)
+. (42)
+For a small inner core, χ≈7.55×10−3. As the inner core grows, ¯ A
 f
-bination A¯ χ also decreases. This implies that C(cid:48) decreases with inner core size and, consequently,
+decreases, and the com-
+bination ¯ A
 c
-ε˜ also decreases with inner core size, as seen in Figure 4a, though it remains larger than the
+χ also decreases. This implies that C(cid:48) decreases with inner core size and, consequently,
+˜ ε
 m
+also decreases with inner core size, as seen in Figure 4a, though it remains larger than the
 prediction for a rigid planet.
-The specific predictions of ε˜ , m˜ and n˜ on Figure 4 depend sensitively on the assumed
-m f s
-interior density model and on the dynamical ellipticities of the inner core (e ) and fluid core
-s
-(e ). Hence, it depends on the choices we have made for the inner core density ρ , the crustal
-f s
-density ρ and its thickness h. Changing ρ , ρ and/or h requires a different combination of ρ ,
-c s c f
-ρ and r in order to match M, Cˆ and Cˆ . In turn, this leads to different ellipticities at in-
-m f m
-terior boundary in order to match J and C , and thus different predictions for ε˜ , m˜ and
-2 22 m f
-n˜ . To illustrate this, we show on Figure 4 two additional predictions computed with crustal
-s
-thicknesses changed to h=16 and 36 km. The change in ε˜ remains modest, ∼0.025%, but
-m
-the changes in m˜ and n˜ are more substantial, ∼5% and ∼10%, respectively.
-f s
+The specific predictions of ˜ ε
+m
+, ˜ m
+f
+and ˜ n
+s
+on Figure 4 depend sensitively on the assumed
+interior density model and on the dynamical ellipticities of the inner core (e
+s
+) and fluid core
+(e
+f
+). Hence, it depends on the choices we have made for the inner core density ρ
+s
+, the crustal
+density ρ
+c
+and its thickness h. Changing ρ
+s
+, ρ
+c
+and/or h requires a different combination of ρ
+f
+,
+ρ
+m
+and r
+f
+in order to match M, ˆ C and ˆ C
+m
+. In turn, this leads to different ellipticities at in-
+terior boundary in order to match J
+2
+and C
+22
+, and thus different predictions for ˜ ε
+m
+, ˜ m
+f
+and
+˜ n
+s
+. To illustrate this, we show on Figure 4 two additional predictions computed with crustal
+thicknesses changed to h=16 and 36 km. The change in ˜ ε
+m
+remains modest, ∼0.025%, but
+the changes in ˜ m
+f
+and ˜ n
+s
+are more substantial, ∼5% and ∼10%, respectively.
 We also show on Figure 4a (only for h=26 km) the obliquity of the principal moment
-of inertia of the whole planet, which we denote by ε˜ . A difference between ε˜ and ε˜ occurs
-g g m
+of inertia of the whole planet, which we denote by ˜ ε
+g
+. A difference between ˜ ε
+g
+and ˜ ε
+m
+occurs
 if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core
-(with n˜ assumed small) leads to an off-diagonal component of the moment of inertia tensor
+(with ˜ n
+s
+assumed small) leads to an off-diagonal component of the moment of inertia tensor
+of (C
+s
+−¯ A
+s
+)α
+3
+˜ n
+s
+= ¯ A
+s
+e
+s
+α
+3
+˜ n
+s
+. The angle by which the mantle frame must be rotated so that
+the moment of inertia of the whole planet is purely diagonal is (¯ A
+s
+e
+s
+α
+3
+˜ n
 s
-of (C −A¯ )α n˜ =A¯ e α n˜ . The angle by which the mantle frame must be rotated so that
-s s 3 s s s 3 s
-the moment of inertia of the whole planet is purely diagonal is (A¯ e α n˜ )/(A¯e), and hence a
-s s 3 s
-good approximation of ε˜ is
+)/(¯ Ae), and hence a
+good approximation of ˜ ε
+g
+is
+˜ ε
 g
-A¯ e
-ε˜ =ε˜ + s sα n˜ . (43)
-g m A¯e 3 s
+= ˜ ε
+m
++
+¯ A
+s
+e
+s
+¯ Ae
+α
+3
+˜ n
+s
+. (43)
 Since the inner core is gravitationally forced into a close alignment with the mantle, the dif-
-ference between ε˜ and ε˜ remains very small. For the largest inner core radius that we have
-g m
-considered, ε˜ differs from ε˜ only by approximately 0.001 arcmin.
-g m
+ference between ˜ ε
+g
+and ˜ ε
+m
+remains very small. For the largest inner core radius that we have
+considered, ˜ ε
+g
+differs from ˜ ε
+m
+only by approximately 0.001 arcmin.
 3.3 Viscous coupling
 We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini
 state. Peale et al. [2014] present two different parameterizations of viscous coupling based on
 the timescale of attenuation of the differential rotation between the fluid core and mantle. More
 complete analytical solutions for the flow resulting from a differentially precessing shell have
 been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester, 1976] and we exploit
-these solutions here. The parametrization of the viscous coupling constants K and K based
-cmb icb
+these solutions here. The parametrization of the viscous coupling constants K
+cmb
+and K
+icb
+based
 on them are given in Mathews and Guo [2005],
-πρ fr f4(cid:114) ν (cid:16) 0.195−1.976i(cid:17)
-K = , (44a)
-cmb A¯ 2Ω
-f o
-πρ r4(cid:114) ν (cid:16) (cid:17)
-K = f s 0.195−1.976i , (44b)
-icb A¯ 2Ω
-s o
+K
+cmb
+= πρ f r4 f
+¯ A
+f
+(cid:114) ν
+2Ω
+o
+(cid:16) 0.195−1.976i (cid:17) , (44a)
+K
+icb
+=
+πρ
+f
+r4
+s
+¯ A
+s
+(cid:114) ν
+2Ω
+o
+(cid:16)
+0.195−1.976i
+(cid:17)
+, (44b)
 where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte-
 rior is not well known but based on theoretical and experimental studies it is expected to be
 of the order of 10−6 m2 s−1 [e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al.,
@@ -1151,25 +2721,52 @@ of the order of 10−6 m2 s−1 [e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et
 Confidential manuscript submitted to JGR-Planets
 The above parameterizations are valid only under the assumption that the flow in the bound-
 ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds
-number Re=r ∆u /ν, associated with the differential velocity ∆u =r Ω m˜ at the CMB.
-f f f f o f
-For r = 2000 km, and taking m˜ = 4 arcmin ≈ 0.001 rad from the results in the previous
-f f
-section, we get ∆u ∼ 2 mm/s and Re ∼ 6×109. Such a large Reynolds number indicates
+number Re=r
+f
+∆u
+f
+/ν, associated with the differential velocity ∆u
+f
+=r
+f
+Ω
+o
+˜ m
+f
+at the CMB.
+For r
+f
+= 2000 km, and taking ˜ m
 f
+= 4 arcmin ≈ 0.001 rad from the results in the previous
+section, we get ∆u
+f
+∼ 2 mm/s and Re ∼ 6×109. Such a large Reynolds number indicates
 that the viscous friction between the fluid core and mantle should induce turbulent flows, as
 is the case for the Cassini state of the Moon [Yoder, 1981; Williams et al., 2001; C´ebron et al.,
 2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde-
 pendent of the fluid viscosity and proportional to the square of the differential velocity. The
-coupling constant K should be in the form
+coupling constant K
 cmb
-(cid:12) (cid:12)(cid:16) (cid:17)
-K cmb =f cmb(cid:12)m˜ f(cid:12) 0.195−1.976i , (45)
-where f is a numerical factor that depends among other things on surface roughness. In-
+should be in the form
+K cmb =f cmb
+(cid:12)
+(cid:12)˜ m f
+(cid:12)
+(cid:12)
+(cid:16)
+0.195−1.976i
+(cid:17)
+, (45)
+where f
 cmb
+is a numerical factor that depends among other things on surface roughness. In-
 corporating a viscous coupling of this form in our rotational model is more challenging not only
-because f is not known but also because the viscous torque is no longer linear in m˜ . One
-cmb f
+because f
+cmb
+is not known but also because the viscous torque is no longer linear in ˜ m
+f
+. One
 strategy is to find solutions through an iterative process. The simpler alternative strategy that
 we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν
 represents an effective turbulent viscosity.
@@ -1177,20 +2774,48 @@ To give an estimate of an appropriate turbulent value for ν, we turn to the Cas
 of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained
 by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR)
 [Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms
-of a coupling parameter K and a recent estimate is K/C =(1.41±0.34)×10−8 day−1 [Williams
+of a coupling parameter K and a recent estimate is K/C
+L
+=(1.41±0.34)×10−8 day−1 [Williams
+and Boggs, 2015], where C
+L
+is the lunar polar moment of inertia. The connection between K
+and K
+cmb
+is
+(cid:12)
+(cid:12)
+(cid:12)
+Im[K
+cmb
+]
+(cid:12)
+(cid:12)
+(cid:12)
+=
+K
+C
+L
+C
 L
-and Boggs, 2015], where C is the lunar polar moment of inertia. The connection between K
+C
+fL
+1
+Ω
+L
+, (46)
+where C
+fL
+is the moment of inertia of the lunar core and Ω
 L
-and K is
+= 2.66 × 10−6 s−1 the lunar
+rotation rate. With C
+fL
+/C
+L
+∼7×10−4 [e.g. Williams et al., 2014], this gives |Im[K
 cmb
-(cid:12) (cid:12) K C 1
-(cid:12)Im[K ](cid:12)= L , (46)
-(cid:12) cmb (cid:12) C C Ω
-L fL L
-where C is the moment of inertia of the lunar core and Ω = 2.66 × 10−6 s−1 the lunar
-fL L
-rotation rate. With C /C ∼7×10−4 [e.g. Williams et al., 2014], this gives |Im[K ]|∼
-fL L cmb
+]|∼
 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and as-
 suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2
 s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the
@@ -1200,80 +2825,136 @@ cmb
 is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer-
 cury should be smaller. Thus, ν ≈5×10−4 m2 s−1 gives a conservative upper bound for the
 possible effective turbulent viscosity that can be expected for Mercury.
-Figure 5 shows how ε˜ , m˜ and n˜ vary as functions of inner core radius for different choices
-m f s
-of effective viscosities. For ν = 10−5 m2 s−1, viscous coupling is too weak to affect ε˜ and
+Figure 5 shows how ˜ ε
+m
+, ˜ m
+f
+and ˜ n
+s
+vary as functions of inner core radius for different choices
+of effective viscosities. For ν = 10−5 m2 s−1, viscous coupling is too weak to affect ˜ ε
 m
-m˜ and they are essentially unchanged from the solutions shown in Figure 4. With increasing
+and
+˜ m
 f
+and they are essentially unchanged from the solutions shown in Figure 4. With increasing
 ν, the stronger viscous coupling between the core and the mantle reduces their differential ve-
-locity, and m˜ is reduced. With the reduced differential velocity at the CMB, the prediction
+locity, and ˜ m
 f
-of ε˜ gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
+is reduced. With the reduced differential velocity at the CMB, the prediction
+of ˜ ε
 m
-viscous coupling model is different than the one used by Peale et al. [2014], our results for ε˜
+gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
+viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ ε
 m
-and m˜ are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
+and ˜ m
 f
-fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis-
-cosity that we have identified above (i.e ν ≈ 5×10−4 m2 s−1), the influence of viscous cou-
-–21–
-Confidential manuscript submitted to JGR-Planets
-kinematic viscosity: 0.01 m2 s-1 0.001 m2 s-1 0.0005 m2 s-1 0.0001 m2 s-1 0.00001 m2 s-1
-4.5
-a b
-2.050
-4.0
-2.048 3.5 )nimcra( )nimcra(
-2.046 ε g 3.0
-ε m 2.5 elgna elgna
-2.044
-m
-2.0 f ytiuqilbO ytiuqilbO
+are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
+fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis-
+cosity that we have identified above (i.e ν ≈ 5×10−4 m2 s−1), the influence of viscous cou-
+–21–
+Confidential manuscript submitted to JGR-Planets
+ε m
+ε g
+m f
 n
-2.042 1.5 s
-ε
-m for a rigid planet
-2.040 1.0
-0.5
+s
 2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050
+O
+bli q
+uit
+y a n
+gl e
+(
+ar c
+mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
 0.0
-0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400
-Inner core radius (km) Inner core radius (km)
-Figure 5. a) Obliquity of the mantle (ε˜ , solid lines) and gravity field (ε˜ , dashed lines) b) m˜
-m g f
-(solid lines) and n˜ (dashed lines) as a function of inner core radius and for different choices of kinematic
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5
+O
+bli q
+uit
+y a n
+gl e
+(
+ar c
+mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-1 0.0001 m2 s-1 0.0005 m2 s-1 0.001 m2 s-1
+a b
+for a rigid planet
+ε
+m
+Figure 5. a) Obliquity of the mantle (˜ ε
+m
+, solid lines) and gravity field (˜ ε
+g
+, dashed lines) b) ˜ m
+f
+(solid lines) and ˜ n
 s
+(dashed lines) as a function of inner core radius and for different choices of kinematic
 viscosity (color in legend).
-pling on ε˜ remains modest, reducing its amplitude by a maximum of approximately 0.0015
+pling on ˜ ε
 m
+remains modest, reducing its amplitude by a maximum of approximately 0.0015
 arcmin.
 The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core
 tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the in-
 ner core with the fluid core spin axis. The viscous coupling strength is inversely proportional
-to r , so a larger viscosity results in a larger inner core radius at which viscous coupling is of
+to r
 s
+, so a larger viscosity results in a larger inner core radius at which viscous coupling is of
 a similar magnitude to gravitational coupling. Taking again an upper bound of ν =5×10−4
-m2 s−1, Figure 5 indicates that n˜ may be 1 arcmin or larger only if the inner core radius is
+m2 s−1, Figure 5 indicates that ˜ n
 s
+may be 1 arcmin or larger only if the inner core radius is
 smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi-
 tational coupling is much larger than viscous coupling, and the inner core tilt is limited to a
 fraction of 1 arcmin.
 The larger inner core tilt observed with increasing effective viscosity results in a larger
-offset between the obliquity of the principal moment of inertia ε˜ and that of the mantle ε˜ ,
-g m
-though it remains limited. For the upper bound of ν = 5×10−4 m2 s−1, and for r = 1500
+offset between the obliquity of the principal moment of inertia ˜ ε
+g
+and that of the mantle ˜ ε
+m
+,
+though it remains limited. For the upper bound of ν = 5×10−4 m2 s−1, and for r
 s
-km, the difference between ε˜ and ε˜ is limited to 0.0013 arcmin.
-g m
+= 1500
+km, the difference between ˜ ε
+g
+and ˜ ε
+m
+is limited to 0.0013 arcmin.
 The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller
 the misalignments of both the fluid core and inner core are with respect to the mantle. This
 implies that the larger the inner core is, the more we approach a planet precessing as a rigid
 body, although the misalignment of the spin axis of the fluid core remains important, approx-
-imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ε˜ , m˜
-m f
-and n˜ change with inner core size would certainly be different for a turbulent model of viscous
+imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ ε
+m
+, ˜ m
+f
+and ˜ n
 s
+change with inner core size would certainly be different for a turbulent model of viscous
 coupling. But the general conclusion remains that the addition of viscous coupling at the CMB
 and ICB does not significantly modify the Cassini state equilibrium angle of the mantle.
 –22–
@@ -1286,68 +2967,152 @@ electrically conducting regions stretches existing magnetic field lines that thr
 This induces a secondary magnetic field (or equivalently, an electrical current) and an associ-
 ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB
 acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength
-of the radial magnetic field B and the electrical conductivity σ on either side of the bound-
+of the radial magnetic field B
 r
+and the electrical conductivity σ on either side of the bound-
 ary [Rochester, 1960, 1962, 1968].
-The parametrization of EM coupling in terms of the coupling constants K and K
-cmb icb
+The parametrization of EM coupling in terms of the coupling constants K
+cmb
+and K
+icb
 has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot,
 2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given
+by B
+r
+=
 √
-3(cid:10) Bd(cid:11) (cid:10) Bd(cid:11)
-by B = cosθ, where is the r.m.s. strength of the field, the coupling constant
-r r r
-K can be written is the form
+3 (cid:10) Bd
+r
+(cid:11) cosθ, where (cid:10) Bd
+r
+(cid:11) is the r.m.s. strength of the field, the coupling constant
+K
+cmb
+can be written is the form
+K
 cmb
-(cid:10) Bd(cid:11)2
-K =3(1−i)F , (47)
-cmb cmb r
+=3(1−i)F
+cmb
+(cid:10) Bd
+r
+(cid:11)2 , (47)
 where
-(cid:18) (cid:19)−1
-1 1 1
-F = + , (48)
-cmb Ω ρ r σ δ σ δ
-o f f m m f f
-(cid:112) (cid:112)
-and where σ , δ = 2/(σ µΩ ) and σ , δ = 2/(σ µΩ ) are the electrical conductivi-
-m m m o f f f o
+F
+cmb
+=
+1
+Ω
+o
+ρ
+f
+r
+f
+(cid:18)
+1
+σ
+m
+δ
+m
++
+1
+σ
+f
+δ
+f
+(cid:19)−1
+, (48)
+and where σ
+m
+, δ
+m
+=
+(cid:112)
+2/(σ
+m
+µΩ
+o
+) and σ
+f
+, δ
+f
+=
+(cid:112)
+2/(σ
+f
+µΩ
+o
+) are the electrical conductivi-
 ties and magnetic skin depths in the mantle and fluid core, respectively, with µ=4π×10−7
-N A−2 the magnetic permeability of free space. The r.m.s. field strength (cid:10) Bd(cid:11) is connected to
+N A−2 the magnetic permeability of free space. The r.m.s. field strength (cid:10) Bd
 r
-the Gauss coefficient g0 of the surface magnetic field by
+(cid:11) is connected to
+the Gauss coefficient g0
 1
-(cid:18) R(cid:19)3
-(cid:10) Bd(cid:11) √2 (cid:12) (cid:12)g0(cid:12)
-= (cid:12) . (49)
-r 3 r f 1
-We can readily build an estimate of the amplitude of K . The electrical conductivity
+of the surface magnetic field by
+(cid:10) Bd
+r
+(cid:11) = 2 √
+3
+(cid:18)
+R
+r f
+(cid:19)3
+(cid:12) (cid:12)g0
+1
+(cid:12) (cid:12) . (49)
+We can readily build an estimate of the amplitude of K
 cmb
+. The electrical conductivity
 of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding
-to the CMB of Mercury is in the range of σ ∼ 0.01 − 1 S m−1 [Constable, 2015]. In con-
+to the CMB of Mercury is in the range of σ
 m
-trast, the electrical conductivity of Fe in planetary cores is expected to be close σ ∼ 106 S
+∼ 0.01 − 1 S m−1 [Constable, 2015]. In con-
+trast, the electrical conductivity of Fe in planetary cores is expected to be close σ
 f
-m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ δ )−1 (cid:29)(σ δ )−1. Tak-
-m m f f
-(cid:12) (cid:12)
-ing σ = 1 S m−1, (cid:12)g 10(cid:12) = 190 nT for Mercury’s dipole field [Anderson et al., 2012], r =
-m f
-2000 km, ρ =7000 kg m−3, this gives K ≈(3.1×10−11)·(1−i). To put this amplitude
-f cmb
+∼ 106 S
+m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ
+m
+δ
+m
+)−1 (cid:29)(σ
+f
+δ
+f
+)−1. Tak-
+ing σ m = 1 S m−1,
+(cid:12)
+(cid:12)g0 1
+(cid:12)
+(cid:12) = 190 nT for Mercury’s dipole field [Anderson et al., 2012], r f =
+2000 km, ρ
+f
+=7000 kg m−3, this gives K
+cmb
+≈(3.1×10−11)·(1−i). To put this amplitude
 in perspective, taking a molecular viscosity of ν =10−6 m2 s−1 in Equation (44a) gives a vis-
-cous coupling constant of K ≈(6.0×10−7)·(0.195−1.976i). Hence, EM coupling at the
+cous coupling constant of K
 cmb
+≈(6.0×10−7)·(0.195−1.976i). Hence, EM coupling at the
 CMB is much weaker than viscous coupling, even if we include other spherical harmonic com-
 ponents of the radial magnetic field.
 EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by
-CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σ could be
+CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σ
+m
+could be
+closer to σ
+f
+. Likewise, σ
 m
-closer to σ . Likewise, σ can be increased if a more electrically conducting layer has formed
-f m
+can be increased if a more electrically conducting layer has formed
 at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction
 of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even
-in the extreme case of σ = σ = 106 S m−1, K ≈ (1.6×10−8)·(1−i), which remains
-m f cmb
+in the extreme case of σ
+m
+= σ
+f
+= 106 S m−1, K
+cmb
+≈ (1.6×10−8)·(1−i), which remains
 –23–
 Confidential manuscript submitted to JGR-Planets
 smaller by a factor ∼60 than the smallest possible viscous coupling constant. Viscous forces
@@ -1357,59 +3122,113 @@ and fluid core to be similar, and because the radial magnetic field is likely mu
 coupling can be much larger and dominate viscous coupling. We assume that the magnetic field
 morphology at the ICB is dominantly comprised of small spatial scales for example as predicted
 by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in
-terms of an equivalent uniform radial magnetic field (cid:104)B (cid:105) capturing its r.m.s. strength [Buf-
+terms of an equivalent uniform radial magnetic field (cid:104)B
 r
+(cid:105) capturing its r.m.s. strength [Buf-
 fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the
-fluid and solid core, the coupling constant K can be written in the form
+fluid and solid core, the coupling constant K
+icb
+can be written in the form
+K
 icb
+=
 5
-K = (1−i)F (cid:104)B (cid:105)2 , (50)
-icb 4 icb r
+4
+(1−i)F
+icb
+(cid:104)B
+r
+(cid:105)2 , (50)
 where
+F
+icb
+=
 σδ
-F = , (51)
-icb Ω ρ r
-o s s
+Ω
+o
+ρ
+s
+r
+s
+, (51)
+and where δ =
 (cid:112)
-and where δ = 2/(σµΩ ) is the magnetic skin depth. As F is inversely proportional to
-o icb
-r , K is inversely proportional to inner core size. Note that computing the EM coupling based
-s icb
-on the r.m.s. strength (cid:104)B (cid:105) rather than a true field morphology tends to overestimate the strength
+2/(σµΩ
+o
+) is the magnetic skin depth. As F
+icb
+is inversely proportional to
+r
+s
+, K
+icb
+is inversely proportional to inner core size. Note that computing the EM coupling based
+on the r.m.s. strength (cid:104)B
 r
+(cid:105) rather than a true field morphology tends to overestimate the strength
 of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic
 field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are
-absorbed in the range of possible (cid:104)B (cid:105) values.
+absorbed in the range of possible (cid:104)B
 r
+(cid:105) values.
 The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al.,
 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected.
-When (cid:104)B (cid:105) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
+When (cid:104)B
 r
+(cid:105) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
 field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which
-K increases linearly with (cid:104)B (cid:105) instead of quadratically. A good approximation of K cal-
-icb r icb
+K
+icb
+increases linearly with (cid:104)B
+r
+(cid:105) instead of quadratically. A good approximation of K
+icb
+cal-
 culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012],
-KE =(0.175−i0.138)(cid:104)B (cid:105) , (52)
-icb r
-where (cid:104)B (cid:105) is in units of Tesla. The superscript E emphasizes that the numerical factors are
+KE
+icb
+=(0.175−i0.138)(cid:104)B
 r
+(cid:105) , (52)
+where (cid:104)B
+r
+(cid:105) is in units of Tesla. The superscript E emphasizes that the numerical factors are
 appropriate for the parameter values adopted for Earth in the computation of Dumberry and
 Koot [2012]. To adapt these numerical factors to Mercury, we write,
+K
+icb
+=(0.175−i0.138)
 F
-K =(0.175−i0.138) icb (cid:104)B (cid:105) , (53)
-icb FE r
 icb
-where FE is defined as in Equation (51) but using the parameters for Earth as defined in Dumb-
+FE
 icb
-erry and Koot [2012]. These are Ω = 7.292 × 10−5 s−1, ρ = 12846 kg m−3, r = 1221.5
-o s s
-km, σ =5×105 S m−1, which gives FE =90.36 T−2.
+(cid:104)B
+r
+(cid:105) , (53)
+where FE
+icb
+is defined as in Equation (51) but using the parameters for Earth as defined in Dumb-
+erry and Koot [2012]. These are Ω
+o
+= 7.292 × 10−5 s−1, ρ
+s
+= 12846 kg m−3, r
+s
+= 1221.5
+km, σ =5×105 S m−1, which gives FE
 icb
-To compute F , we assume an electrical conductivity of σ =106 S m−1 in the core of
+=90.36 T−2.
+To compute F
 icb
+, we assume an electrical conductivity of σ =106 S m−1 in the core of
 Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and
-strong field regime occurs when (cid:104)B (cid:105) ≈ 1.53 mT for the real part of K . (cid:104)B (cid:105) at the ICB
-r icb r
+strong field regime occurs when (cid:104)B
+r
+(cid:105) ≈ 1.53 mT for the real part of K
+icb
+. (cid:104)B
+r
+(cid:105) at the ICB
 of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom-
 etry inside the core could be dominated by small length scales, yet only the weaker lower har-
 monics of the field would penetrate through a thermally stratified layer in the upper region of
@@ -1417,120 +3236,272 @@ monics of the field would penetrate through a thermally stratified layer in the
 Confidential manuscript submitted to JGR-Planets
 the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur-
 face field strength by a factor 1000. Taking a surface field strength equal to ∼300 nT [e.g An-
-derson et al., 2012], (cid:104)B (cid:105) at the ICB could be as large as 0.3 mT, corresponding to approxi-
+derson et al., 2012], (cid:104)B
 r
+(cid:105) at the ICB could be as large as 0.3 mT, corresponding to approxi-
 mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer-
 cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of
 Mercury remains in the weak field regime.
-Figure 6 shows how ε˜ , m˜ and n˜ vary as functions of inner core radius for different choices
-m f s
-of (cid:104)B (cid:105). The larger (cid:104)B (cid:105) is, the stronger is the EM coupling at the ICB, and the smaller is the
-r r
+Figure 6 shows how ˜ ε
+m
+, ˜ m
+f
+and ˜ n
+s
+vary as functions of inner core radius for different choices
+of (cid:104)B
+r
+(cid:105). The larger (cid:104)B
+r
+(cid:105) is, the stronger is the EM coupling at the ICB, and the smaller is the
 differential rotation between the fluid core and inner core. The inner core and fluid core are vir-
-tually locked into a common precession motion when (cid:104)B (cid:105)>0.3 mT. Further increasing (cid:104)B (cid:105)
-r r
+tually locked into a common precession motion when (cid:104)B
+r
+(cid:105)>0.3 mT. Further increasing (cid:104)B
+r
+(cid:105)
 above 1 mT does not change the solution as EM coupling already dominates all other torques
 on the inner core. This is the case even when EM coupling transitions into the strong field regime.
-(cid:12) (cid:12)
-EM coupling at the CMB is included in these calculations, with σ = 1 S m−1 and (cid:12)g 10(cid:12) =
-m
+EM coupling at the CMB is included in these calculations, with σ m = 1 S m−1 and
+(cid:12)
+(cid:12)g0 1
+(cid:12)
+(cid:12) =
 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core
-we retrieved the solutions of ε˜ and m˜ shown in Figure 4.
-m f
-As the inner core radius is increased, both ε˜ and m˜ get smaller, as it was the case with
-m f
+we retrieved the solutions of ˜ ε
+m
+and ˜ m
+f
+shown in Figure 4.
+As the inner core radius is increased, both ˜ ε
+m
+and ˜ m
+f
+get smaller, as it was the case with
 viscous coupling alone, although the addition of EM coupling lead to more substantial changes.
 The inner core needs to be larger than approximately 500 km for changes in the Cassini state
-equilibrium to be noticeable. It is important to point out that m˜ is reduced not because of
+equilibrium to be noticeable. It is important to point out that ˜ m
 f
+is reduced not because of
 EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which
 pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the
 inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the
-greater is the reduction in ε˜ and m˜ .
-m f
+greater is the reduction in ˜ ε
+m
+and ˜ m
+f
+.
 When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are
-locked into a common precession motion, a good approximation of ε˜ is given by the same pre-
+locked into a common precession motion, a good approximation of ˜ ε
 m
+is given by the same pre-
 diction as Equations (39-40) involving the effective moment of inertia C(cid:48), except χ is now given
 by
-A¯ Ω cosI−A¯ Ω α φ
-χ= c p s o 3 s . (54)
-A¯ Ω (e +K )+A¯ Ω e α α −A¯ Ω cosI
-f o f cmb s o s 3 g c p
-For a small inner core, A¯ Ω cosI >A¯ Ω α φ and χ is positive. Because A¯ Ω α φ increases
-c p s o 3 s s o 3 s
-with inner core size, χ gets smaller, and so do C(cid:48) and ε˜ . The mantle obliquity drops from 2.049
+χ=
+¯ A
+c
+Ω
+p
+cosI− ¯ A
+s
+Ω
+o
+α
+3
+φ
+s
+¯ A
+f
+Ω
+o
+(e
+f
++K
+cmb
+)+ ¯ A
+s
+Ω
+o
+e
+s
+α
+3
+α
+g
+− ¯ A
+c
+Ω
+p
+cosI
+. (54)
+For a small inner core, ¯ A
+c
+Ω
+p
+cosI > ¯ A
+s
+Ω
+o
+α
+3
+φ
+s
+and χ is positive. Because ¯ A
+s
+Ω
+o
+α
+3
+φ
+s
+increases
+with inner core size, χ gets smaller, and so do C(cid:48) and ˜ ε
 m
+. The mantle obliquity drops from 2.049
 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015
-arcmin. For an inner core larger than ≈1000 km, A¯ Ω cosI <A¯ Ω α φ , so χ becomes neg-
-c p s o 3 s
-ative, C(cid:48) becomes smaller than the moment of inertia of a rigid Mercury C, and ε˜ becomes
+arcmin. For an inner core larger than ≈1000 km, ¯ A
+c
+Ω
+p
+cosI < ¯ A
+s
+Ω
+o
+α
+3
+φ
+s
+, so χ becomes neg-
+ative, C(cid:48) becomes smaller than the moment of inertia of a rigid Mercury C, and ˜ ε
 m
+becomes
 smaller than the prediction based on a rigid planet.
 The larger the inner core is, the smaller are the misalignments of the fluid and solid cores
 with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone
 is not altered with the addition of EM coupling but further strengthened; the larger the inner
 core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the
-obliquity of the gravity field ε˜ which, for a large inner core, asymptotically approaches the obliq-
+obliquity of the gravity field ˜ ε
 g
+which, for a large inner core, asymptotically approaches the obliq-
 uity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset be-
-tween ε˜ and ε˜ can be as large as 0.008 arcmin for a large inner core.
-m g
+tween ˜ ε
+m
+and ˜ ε
+g
+can be as large as 0.008 arcmin for a large inner core.
 3.5 Fixed inner core density versus fixed ICB density contrast
 Coupling models when viscous and EM stresses are both present have been presented in
 Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results,
 –25–
 Confidential manuscript submitted to JGR-Planets
-Br at ICB: 1 mT 0.3 mT 0.1 mT 0.03 mT 0.01 mT
-a 4.5 b
+2.032
+2.034
+2.036
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
 2.050
+O
+bli q
+uit
+y a n
+gl e
+(
+ar
+c
+mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
 4.0
-2.048 ε
+4.5
+O
+bli q
+uit
+y a n
+gl e
+(
+ar
+c
+mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+Br at ICB: 1 mT 0.01 mT 0.03 mT 0.1 mT 0.3 mT
+ε
 m
-ε 3.5 )nimcra( )nimcra(
-2.046 g
-3.0
-2.044
-2.5 elgna elgna
-2.042
 ε
-2.040 m for a rigid planet 2.0 m ytiuqilbO ytiuqilbO
-f
-2.038 1.5 n s
-2.036 1.0
-2.034 0.5
-2.032 0.0
-0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400
-Inner core radius (km) Inner core radius (km)
-Figure 6. a) Obliquity of the mantle (ε˜ , solid lines) and gravity field (ε˜ , dashed lines) b) m˜
-m g f
-(solid lines) and n˜ (dashed lines) as a function of inner core radius and for different choices of B
-s r
+g
+m
+f
+n s
+a b
+for a rigid planet ε m
+Figure 6. a) Obliquity of the mantle (˜ ε
+m
+, solid lines) and gravity field (˜ ε
+g
+, dashed lines) b) ˜ m
+f
+(solid lines) and ˜ n
+s
+(dashed lines) as a function of inner core radius and for different choices of B
+r
 (colour in legend).
 for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by
 viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we con-
-sider a model where K is purely from viscous coupling and K purely from EM coupling.
-cmb icb
+sider a model where K
+cmb
+is purely from viscous coupling and K
+icb
+purely from EM coupling.
 We choose an effective viscosity at the CMB of ν = 10−4 m2 s−1, which we believe to be a
 representative value given the comparison with the Moon (see section 3.3). We take a radial
-field strength at the ICB of (cid:104)B (cid:105)=0.3 mT, approximately the field strength expected under
+field strength at the ICB of (cid:104)B
 r
+(cid:105)=0.3 mT, approximately the field strength expected under
 the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa-
-tive’ coupling model, although the uncertainty on ν and (cid:104)B (cid:105) obviously remains high.
+tive’ coupling model, although the uncertainty on ν and (cid:104)B
 r
-Figure 7 shows how ε˜ , m˜ and n˜ vary with inner core radius for the ’representative’
-m f s
+(cid:105) obviously remains high.
+Figure 7 shows how ˜ ε
+m
+, ˜ m
+f
+and ˜ n
+s
+vary with inner core radius for the ’representative’
 coupling model (black lines) under the fixed inner core density scenario that we have used in
 sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same rep-
 resentative coupling model, we adopt instead a fixed density contrast between the fluid and solid
-cores and for different choices of α (coloured lines). For a relatively high density contrast (α =
-3 3
+cores and for different choices of α
+3
+(coloured lines). For a relatively high density contrast (α
+3
+=
 0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller
-α , the point at which the orientation of the co-precessing fluid and inner cores begins to be
+α
 3
+, the point at which the orientation of the co-precessing fluid and inner cores begins to be
 pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the
-general behaviour of ε˜ , m˜ and n˜ as functions of inner core radius is unchanged. Hence, all
-m f s
+general behaviour of ˜ ε
+m
+, ˜ m
+f
+and ˜ n
+s
+as functions of inner core radius is unchanged. Hence, all
 our results in the previous three sections would be qualitatively similar under a fixed density
 contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core
 is required in order to produce an equivalent change in the Cassini state equilibrium.
@@ -1541,47 +3512,118 @@ model included the tangential viscous stress at the ICB and CMB, but not the EM
 Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as
 –26–
 Confidential manuscript submitted to JGR-Planets
-ρ s = 8800 kg m-3 α 3: 0.20 0.15 0.10 0.05 0.01
-4.5
-a b
+2.032
+2.034
+2.036
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
 2.050
-4.0
-2.048 ε
-m 3.5 )nimcra( )nimcra(
-2.046 ε
-g
+O
+bli q
+uit y a
+n
+gl e
+(
+ar
+c
+mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
 3.0
-2.044
-2.5 elgna elgna
-2.042
+3.5
+4.0
+4.5
+O
+bli q
+uit y a
+n
+gl e
+(
+ar
+c
+mi
+n)
+0 200 400 600 800 1000 1200 1400
+Inner core radius (km)
+for a rigid planet
 ε
-2.040 m for a rigid planet 2.0 m ytiuqilbO ytiuqilbO
+m
+a b
+α 3: 0.20 0.01 0.05 0.10 0.15 ρ s = 8800 kg m-3
+m
 f
-2.038 1.5 n
+n
 s
-2.036 1.0
-2.034 0.5
-2.032 0.0
-0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400
-Inner core radius (km) Inner core radius (km)
-Figure 7. a) Obliquity of the mantle (ε˜ , solid lines) and gravity field (ε˜ , dashed lines) b) m˜
-m g f
-(solid lines) and n˜ (dashed lines) as a function of inner core radius, for a fixed inner core density of
+ε
+m
+ε g
+Figure 7. a) Obliquity of the mantle (˜ ε
+m
+, solid lines) and gravity field (˜ ε
+g
+, dashed lines) b) ˜ m
+f
+(solid lines) and ˜ n
 s
-8800 kg m−3 (black lines) and for different choices of α (coloured lines).
+(dashed lines) as a function of inner core radius, for a fixed inner core density of
+8800 kg m−3 (black lines) and for different choices of α
 3
-i(cid:48) , i(cid:48) and i(cid:48); these represent the obliquities with respect to the orbital plane and are connected
-m f s
-to our variables by: i(cid:48) =ε˜ , i(cid:48) =ε˜ +m˜+m˜ ≈ε˜ +m˜ and i(cid:48) =ε˜ +n˜ . To summarize
-m m f m f m f s m s
-their results, i(cid:48) and i(cid:48) vary substantially for different inner core sizes, are always of compara-
-f s
-ble amplitude, and i(cid:48) is always larger than i(cid:48). Furthermore, they find that as the inner core
-s f
-size is increased, the mantle obliquity i(cid:48) gets progressively larger and is displaced further away
+(coloured lines).
+i(cid:48)
+m
+, i(cid:48)
+f
+and i(cid:48)
+s
+; these represent the obliquities with respect to the orbital plane and are connected
+to our variables by: i(cid:48)
+m
+= ˜ ε
+m
+, i(cid:48)
+f
+= ˜ ε
+m
++˜ m+˜ m
+f
+≈ ˜ ε
+m
++˜ m
+f
+and i(cid:48)
+s
+= ˜ ε
+m
++˜ n
+s
+. To summarize
+their results, i(cid:48)
+f
+and i(cid:48)
+s
+vary substantially for different inner core sizes, are always of compara-
+ble amplitude, and i(cid:48)
+s
+is always larger than i(cid:48)
+f
+. Furthermore, they find that as the inner core
+size is increased, the mantle obliquity i(cid:48)
 m
-from its expected orientation based of a rigid planet (see their Figure 6). The change in i(cid:48) they
+gets progressively larger and is displaced further away
+from its expected orientation based of a rigid planet (see their Figure 6). The change in i(cid:48)
 m
+they
 obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan-
 etary radius (≈1463 km, close to the maximum inner core size of 1500 km we have considered),
 is approximately an increase of 5×10−5 rad = 0.17 arcmin. This also corresponds approxi-
@@ -1622,15 +3664,26 @@ amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than f
 pling alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016],
 and again, importantly, in the reverse direction.
 Our results suggest then that the presence and size of an inner core leads to only mod-
-est changes of the mantle obliquity ε compared to the obliquity predicted on the basis of an
+est changes of the mantle obliquity ε
+m
+compared to the obliquity predicted on the basis of an
+entirely rigid planet (εr
+m
+). Let us denote this difference as ∆ε
+m
+=ε
+m
+−εr
 m
-entirely rigid planet (εr ). Let us denote this difference as ∆ε =ε −εr . The largest ∆ε
-m m m m m
-occurs for a small or no inner core, and is ∆ε ≈ 0.01 arcmin. This difference is decreased
+. The largest ∆ε
 m
+occurs for a small or no inner core, and is ∆ε
+m
+≈ 0.01 arcmin. This difference is decreased
 as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM
-coupling and large density contrast at the ICB, ∆ε can be negative, but its absolute value
+coupling and large density contrast at the ICB, ∆ε
 m
+can be negative, but its absolute value
 remains smaller than 0.01 arcmin.
 To put these results in perspective, the uncertainty in the measurement of the mantle obliq-
 uity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much
@@ -1652,34 +3705,50 @@ necessary in order to properly tie Mercury’s obliquity to its interior structu
 the possibility of further constraining the interior structure of Mercury on the basis of its obliq-
 uity.
 Obliquity measurements based on tracking topographic features reflect the orientation of
-the spin-symmetry axis of the mantle (ε ). Measurements based on tracking the gravity field
+the spin-symmetry axis of the mantle (ε
 m
-of Mercury reflect instead the orientation of the principal moment of the whole planet (ε ). These
+). Measurements based on tracking the gravity field
+of Mercury reflect instead the orientation of the principal moment of the whole planet (ε
 g
+). These
 two orientations do not coincide when an inner core is present and is misaligned from the man-
 tle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we
 –28–
 Confidential manuscript submitted to JGR-Planets
-find that the misalignment ∆ε = ε − ε is limited. The maximum offset that we obtain
-g g m
-is approximately ∆ε ≈ 0.007 arcmin. This limited magnitude of offset is important in the
+find that the misalignment ∆ε
+g
+= ε
+g
+− ε
+m
+is limited. The maximum offset that we obtain
+is approximately ∆ε
 g
-light of the recent obliquity of the gravity field estimated in Genova et al. [2019], ε =1.968±
+≈ 0.007 arcmin. This limited magnitude of offset is important in the
+light of the recent obliquity of the gravity field estimated in Genova et al. [2019], ε
 g
+=1.968±
 0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the
-spin-symmetry axis of the mantle: ε = 2.04±0.08 arcmin [Margot et al., 2012] and ε =
-m m
+spin-symmetry axis of the mantle: ε
+m
+= 2.04±0.08 arcmin [Margot et al., 2012] and ε
+m
+=
 2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent
 with one another within their error estimates. In their interpretation, Genova et al. [2019] sug-
 gest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 ar-
-cmin) is perhaps explained by an offset ∆ε due to the presence of a (possibly large) solid in-
+cmin) is perhaps explained by an offset ∆ε
 g
+due to the presence of a (possibly large) solid in-
 ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆ε
 g
 that we predict. Moreover, we predict that the obliquity of the gravity field should be larger
 than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision
-of the measurements, ε and ε should coincide, and their difference cannot be interpreted as
-g m
+of the measurements, ε
+g
+and ε
+m
+should coincide, and their difference cannot be interpreted as
 reflecting the misalignment between the polar moment of inertia of the whole planet and the
 mantle spin axis.
 Lastly, we have concentrated our efforts on the mutual orientations of the different spin
diff --git a/read/results/pdfplumber/2201.00069.txt b/read/results/pdfplumber/2201.00069.txt
index 6282a88..f38f17d 100644
--- a/read/results/pdfplumber/2201.00069.txt
+++ b/read/results/pdfplumber/2201.00069.txt
@@ -4,22 +4,19 @@ and transient emission associated with three localised FRBs
 J. O. Chibueze,1,2★ M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4
 C. Venter,1 I. Heywood,7,8,9 A. M. S. Richards,3 D. R. A. Williams,3 M. Kramer,3,5
 R. Beswick,3 M. C. Bezuidenhout,3 R. P. Breton,3 L. N. Driessen,3 F. Jankowski,3
-E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3, H. Qiu,11 K. Rajwade,3 1202
+E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3, H. Qiu,11 K. Rajwade,3
 S. Sanidas,3 M. Surnis,3 T. W. Scragg,3 C. R. H. Walker,5 and N. Wrigley,3
-ceD
 H.E.S.S. Collaboration: F. Aharonian,12,13,14 F. Ait Benkhali,15 E.O. Angüner,16 M. Backes,18,1
 V. Baghmanyan,19 V. Barbosa Martins,20 R. Batzofin,21 Y. Becherini,22,23 D. Berge,20
-13
 M. Böttcher,1 C. Boisson,24 J. Bolmont,25 M. de Bony de Lavergne,26 M. Breuhaus,13
 R. Brose,12 F. Brun,6 T. Bulik,27 F. Cangemi,25 S. Caroff,25 S. Casanova,19
-]EH.hp-ortsa[
 J. Catalano,28 M. Cerruti,22 T. Chand,1 A. Chen,21 O.U. Chibueze,1
 G. Cotter,29 P. Cristofari,24 J. Damascene Mbarubucyeye,20 J. Devin,30 A. Djannati-Ataï,22
 A. Dmytriiev,1 K. Egberts,31 J.-P. Ernenwein,16 A. Fiasson,26 G. Fichet de Clairfontaine,24
 G. Fontaine,17 S. Funk,28 S. Gabici,22 S. Ghafourizadeh,15 G. Giavitto,20
 D. Glawion,28 M.-H. Grondin,30 M. Hörbe,29 C. Hoischen,31 T. L. Holch,20
 Zhiqiu Huang,13 M. Jamrozy,32 F. Jankowsky,15 I. Jung-Richardt,28 E. Kasai,18
-K. Katarzyński,33 U. Katz,28 B. Khélifi,22 W. Kluźniak,34 Nu. Komin,21 1v96000.1022:viXra
+K. Katarzyński,33 U. Katz,28 B. Khélifi,22 W. Kluźniak,34 Nu. Komin,21
 K. Kosack,6 D. Kostunin,20 A. Lemière,22 J.-P. Lenain,25 F. Leuschner,35
 T. Lohse,36 A. Luashvili,24 I. Lypova,15 J. Mackey,12 D. Malyshev,35
 V. Marandon,13 P. Marchegiani,21 A. Marcowith,37 G. Martí-Devesa,38 R. Marx,15
@@ -36,8 +33,47 @@ C. Thorpe-Morgan,35 N. Tsuji,45 C. van Eldik,28 J. Veh,28
 J. Vink,40 S.J. Wagner,15 A. Wierzcholska,19 Yu Wun Wong,28 A. Yusafzai,28
 M. Zacharias,24,1 D. Zargaryan,12,14 A.A. Zdziarski,34 A. Zech,24 S.J. Zhu,20
 S. Zouari,22 N. Żywucka,1
-MNRAS000,1–15(2021)
 AcceptedXXX.ReceivedYYY;inoriginalformZZZ
+MNRAS000,1–15(2021)
+a
+r
+X i
+v
+:
+2
+2
+0
+1
+.
+0
+0
+0
+6
+9
+v
+1
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+H
+E
+]
+3
+1
+D
+e
+c
+2
+0
+2
+1
 MNRAS000,1–15(2021) Preprint4January2022 CompiledusingMNRASLATEXstylefilev3.0
 ABSTRACT
 Wereportonasearchforpersistentradioemissionfromtheone-offFastRadioBurst(FRB)
@@ -57,365 +93,478 @@ whoseageliesbetweenthatofFRB20121102AandFRB20180916A.Aparallelsearchfor
 repeatburstsfromtheseFRBsrevealednonewdetectionsdowntoafluenceof0.08Jymsfor
 a1msdurationburst.
 Keywords: fastradiobursts–radiocontinuum:galaxies–radiationmechanisms:non-thermal
-1 INTRODUCTION Thepersistentemissionispoweredbythenebulaofrelativisticelec-
-tronsandmagneticfieldsinflatedbythemagnetarflares(Margalit
+1 INTRODUCTION
 Fast radio bursts (FRBs) are luminous transients that last for mi-
-et al. 2019). The existence of persistent emission associated with
 croseconds to milliseconds and occur at extragalactic to cosmo-
-FRBscouldprovidevitalcluestotheirorigin.Moreover,potential
 logical distances (e.g. Lorimer et al. 2007; Thornton et al. 2013;
-candidates and models for FRB progenitors predict counterparts
 Macquartetal.2020).Theestimatedhighradioluminositiesand
-in the X-ray an TeV bands. For example, a model by Lyubarsky
 associatedbrightnesstemperaturesrequiredtoproducetheseshort-
-(2014)predictsmillisecondoutburstsofTeVemissionaccompany-
 timescaleenergeticeventsatsuchdistancesarewhatmakesthem
-ingFRBsfrommagnetars.In2020,FRB20200428wasdiscovered
 intriguing(Petroffetal.2021;Caleb&Keane2021).Theyhavebeen
-forthefirsttimefromagalacticmagnetar,SGR1935+2154.Fur-
 observedtoemitfrom∼110MHz−8GHz,thoughnotyetacross
-thermore,anX-raycounterparttothisFRBwasdetecedforthefirst
 a wide and continuous frequency band due to the variable band-
-timebyseveralinstruments(Tavanietal.2021;Ridnaiaetal.2021;
 limitedspectraofthesinglepulses.Over600FRBshavebeendis-
-Mereghettietal.2020;Insight-HXMT2020).
 covered1ofwhich∼20havebeenseentorepeat,anditispresently
-Of the 19 FRBs that have been associated with host galax-
-uncertainwhethertheyalldo(Calebetal.2019;Jamesetal.2020). ies2, only the sub-arcsecond localisation of the repeating FRB
+uncertainwhethertheyalldo(Calebetal.2019;Jamesetal.2020).
 Theextraordinaryobservedcharacteristicsoftherepeatingandnon-
-20121102Atoahostgalaxyataredshiftof𝑧 =0.19273±0.0008
 repeatingFRBshaveledtovariousprogenitormodelswiththebulk
-(Tendulkaretal.2017;Bassaetal.2017)showedthatitisphysi-
 ofthemfavouringneutronstars.Progenitortheoriesincludebinary
-callyassociatedwithacompact(≤0.7pc),persistentradiosource
-neutronstarmergersandcollisions(Totani2013;Yamasakietal. of luminosity 𝜈𝐿 𝜈 ∼ 1039 erg s−1 at a few GHz (Marcote et al.
+neutronstarmergersandcollisions(Totani2013;Yamasakietal.
 2018),giantpulsesfromextragalacticpulsars(Cordes&Wasser-
-2017).Thissourceisdetectablefrom300MHz–26GHz(Resmi
 man 2016; Popov & Pshirkov 2016), hyperflares and giant flares
-etal.2020;Chatterjeeetal.2017)andisseentoexhibit∼10%vari-
 frommagnetars(Popov&Postnov2013;Popovetal.2018),binary
-abilityondaytimescales.Incontrast,asimilarsub-milliarcsecond
 whitedwarfmergers(Kashiyamaetal.2013),neutronstar“comb-
-localisationofanotherrepeatingFRB20180916Btoanearbymas-
 ing" (Zhang 2018) and interactions of neutron stars with active
-sive spiral galaxy at 𝑧 = 0.0337±0.0002 (Marcote et al. 2020)
 galacticnuclei(Vieyroetal.2017)(seePlattsetal.(2019)foralist
+ofpotentialprogenitors).Someofthesemodelspredictradioafter-
+glowsaccompanyinganFRBwithtimescalesofdaystoyears.Liu
+etal.(2016)proposethatthemergerofaKerr-Newmanblackhole
+binary is one of the plausible central engines for FRBs and their
+afterglows. Dai et al. (2017), however, suggest that the persistent
+emissionisduetoanultra-relativisticpulsarwindnebulasweeping
+upitsambientmediumwithFRBsrepeatedlyproducedthroughone
+ofseveralpotentialmechanisms.InthemagnetarmodelbyMargalit
+et al. (2019), FRBs produced by binary neutron star mergers and
+accretioninducedcollapseareexpectedtobeaccompaniedbyper-
+sistentradiocontinuumemissionontimescalesofmonthstoyears.
+★ james.chibueze@nwu.ac.za
+† manisha.caleb@manchester.ac.uk
+1 https://www.wis-tns.org/
+Thepersistentemissionispoweredbythenebulaofrelativisticelec-
+tronsandmagneticfieldsinflatedbythemagnetarflares(Margalit
+et al. 2019). The existence of persistent emission associated with
+FRBscouldprovidevitalcluestotheirorigin.Moreover,potential
+candidates and models for FRB progenitors predict counterparts
+in the X-ray an TeV bands. For example, a model by Lyubarsky
+(2014)predictsmillisecondoutburstsofTeVemissionaccompany-
+ingFRBsfrommagnetars.In2020,FRB20200428wasdiscovered
+forthefirsttimefromagalacticmagnetar,SGR1935+2154.Fur-
+thermore,anX-raycounterparttothisFRBwasdetecedforthefirst
+timebyseveralinstruments(Tavanietal.2021;Ridnaiaetal.2021;
+Mereghettietal.2020;Insight-HXMT2020).
+Of the 19 FRBs that have been associated with host galax-
+ies2, only the sub-arcsecond localisation of the repeating FRB
+20121102Atoahostgalaxyataredshiftof𝑧 =0.19273±0.0008
+(Tendulkaretal.2017;Bassaetal.2017)showedthatitisphysi-
+callyassociatedwithacompact(≤0.7pc),persistentradiosource
+of luminosity 𝜈𝐿 𝜈 ∼ 1039 erg s−1 at a few GHz (Marcote et al.
+2017).Thissourceisdetectablefrom300MHz–26GHz(Resmi
+etal.2020;Chatterjeeetal.2017)andisseentoexhibit∼10%vari-
+abilityondaytimescales.Incontrast,asimilarsub-milliarcsecond
+localisationofanotherrepeatingFRB20180916Btoanearbymas-
+sive spiral galaxy at 𝑧 = 0.0337±0.0002 (Marcote et al. 2020)
 showednoassociatedpersistentradioemission.Thisplacesastrong
-ofpotentialprogenitors).Someofthesemodelspredictradioafter- upperlimitonthepersistentsourceluminosityof𝜈𝐿 (cid:46)7.6×1035
+upperlimitonthepersistentsourceluminosityof𝜈𝐿
 𝜈
-glowsaccompanyinganFRBwithtimescalesofdaystoyears.Liu ergs−1 at1.6GHz,whichisthreeordersofmagnitudelowerthan
-etal.(2016)proposethatthemergerofaKerr-Newmanblackhole
+(cid:46)7.6×1035
+ergs−1 at1.6GHz,whichisthreeordersofmagnitudelowerthan
 thatofFRB20121102A.Recently,theCHIME/FRBcollaboration
-binary is one of the plausible central engines for FRBs and their
 announced heightened activity in the repeating FRB 20201124A
-afterglows. Dai et al. (2017), however, suggest that the persistent
 (Chime/FRB Collaboration 2021), which was localised to a host
-emissionisduetoanultra-relativisticpulsarwindnebulasweeping
 galaxy at a redshift of 𝑧 = 0.0979±0.0001 (Fong et al. 2021).
-upitsambientmediumwithFRBsrepeatedlyproducedthroughone
 PersistentradioemissionwasdetectedbytheupgradedGiantMe-
-ofseveralpotentialmechanisms.InthemagnetarmodelbyMargalit
 trewaveRadioTelescope(uGMRT)(Whartonetal.2021)andthe
-et al. (2019), FRBs produced by binary neutron star mergers and
 Karl G. Jansky Very Large Array (JVLA) (Ricci et al. 2021) on
-accretioninducedcollapseareexpectedtobeaccompaniedbyper-
 angular scales of a few arcseconds, but resolved out to scales of
-sistentradiocontinuumemissionontimescalesofmonthstoyears.
 ∼0.1arcsecondswiththeEuropeanVLBINetwork(Marcoteetal.
 2021).
 Localisations of four one-off FRBs through imaging of
-★ james.chibueze@nwu.ac.za
-† manisha.caleb@manchester.ac.uk
-1 https://www.wis-tns.org/ 2 https://frbhosts.org/
+2 https://frbhosts.org/
 ©2021TheAuthors
 MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 3
-buffered raw voltage data at 1.4GHz (Bannister et al. 2019; ucts, including reduced and calibrated visibility data (including
-Prochaska et al. 2019; Macquart et al. 2020) by the Australian self-calibration), continuum (including sub-band) images as well
-SKAPathfinder(ASKAP)telescopedidnotyieldpersistentradio as diagnostic plots, are provided by the pipeline. The customary
-continuumemissionfromthehostgalaxies(Bhandarietal.2020). configuration of the 𝑜𝑥𝑘𝑎𝑡 pipeline incorporates flagging, cross-
-AustralianTelescopeCompactArray(ATCA)observationsofFRBs calibrationandself-calibrationprocesses.Intheflaggingprocess,
-20180924B, 20181112A, 20190102C and 20190608B were con- thelow-gainbandpassedges(856MHzto880MHzand1658MHz
-ductedatacentrefrequencyof6.5GHz.Nopersistentemissionas to1800MHz)areflaggedonallbaselines,alongwiththelocationof
-luminousastheoneassociatedwithFRB20121102Awasdetected theGalacticneutralhydrogenlineat1419.8MHzto1421.3MHz.
-fortheASKAPFRBs(Bhandarietal.2020).Whilethetrueageof Severalotherradiofrequencyinterference(RFI)proneregionsof
-FRB 121102A is unknown, models based on polarization studies the spectrum are then flagged on baselines shorter than 600 m.
-predicttheagetobe∼6−17years(Hilmarssonetal.2021).Itis Then, other possible RFI affected data are flagged out using the
-possiblethatyounger,moreactiveFRBslikeFRB20121102Aare CASA routines rflag and tfcrop for the calibrators, and using the
-associatedwithpersistentradioemissionwhiletheemissionmight tricolourpackageforthetargetfields.
-havefadedovertimefortheolderones.Thepossibilityofrepeating Thecross-calibrationstepsusing𝑜𝑥𝑘𝑎𝑡werestandard,includ-
-FRBsnotbeingsouncommonafterall(Ravi2019)alongwiththe ingsettingthefluxscaleandderivingcorrectionsforresidualdelay
-increasingarcsecondlocalisationssuggeststhatweareenteringan calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡 pipeline
-erawherewecanbegintolookforevidenceofmultipleclassesby uses the customary tasks from the CASA (McMullin et al. 2007)
-studyingFRBhostgalaxiesandmulti-wavelengthcounterparts. suiteforcross-calibration.Afterapplyingallthecorrectionstothe
-Inthispaper,wereportonthesearchforpersistentradioemis- targetfield,wechannel-averagedthedatasetbyafactoroffivechan-
-sion in the host galaxies of one apparent one-off source (FRB nelsbeforesplittingoutthesciencetarget.Thisisconsistentwithour
-20190714A) and two repeating sources (FRBs 20171019A and sciencegoals,sincetherelicsourceswetargetareinthecentralparts
-20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case ofourfields,reducingtheeffectofsmearingthroughthechannel
-ofthelatter,wealsoconductedsimultaneousobservationswiththe averaging.Todeconvolveandimagethetargetdata,theWSClean
-HighEnergyStereoscopicSystem(H.E.S.S.)inveryhighenergy imager (Offringa et al. 2014) was used, with the multiscale and
-gammarays.Inaddition,wesearchedforsignalsintheultraviolet, widebanddeconvolutionalgorithmsenabledtobetterallowimag-
-optical,andX-raybands.Thepaperisstructuredasfollows.InSec- ingofthediffuseemissionpresentintheourfields.Deconvolution
-tion2,wediscussourobservationsanddatareduction;inSection3, wasperformedintensub-bandimagesofeach82MHzwide-band.
-wediscussthesingleradiocontinuumdetectionandderivedmulti- WSClean generates the multi-frequency synthesis (MFS) map, in
-wavelengthupperlimits.Ourdiscussionandconclusionsfollowin joined-channel deconvolution mode, with a central frequency of
-Section4and5. 1283MHz.Inotherwords,theMFSmapisafullbandwidthmap.
-InWSClean,eachofthesub-bandsisdeconvolvedseparatelywith
-aninitiallyhighmaskof20𝜎 rms (usingtheautomaskingfunction
-2 OBSERVATIONSANDDATAREDUCTION provided by WSClean), to generate an artefact-free model of the
-targetfieldfortheself-calibrationprocess.Thismaskingthreshold
-2.1 MeerKATobservations was iteratively reduced to a value of 3𝜎 rms in the final iteration
-ofimaging.The𝑜𝑥𝑘𝑎𝑡 pipelineusesthecustomarytasksfromthe
+buffered raw voltage data at 1.4GHz (Bannister et al. 2019;
+Prochaska et al. 2019; Macquart et al. 2020) by the Australian
+SKAPathfinder(ASKAP)telescopedidnotyieldpersistentradio
+continuumemissionfromthehostgalaxies(Bhandarietal.2020).
+AustralianTelescopeCompactArray(ATCA)observationsofFRBs
+20180924B, 20181112A, 20190102C and 20190608B were con-
+ductedatacentrefrequencyof6.5GHz.Nopersistentemissionas
+luminousastheoneassociatedwithFRB20121102Awasdetected
+fortheASKAPFRBs(Bhandarietal.2020).Whilethetrueageof
+FRB 121102A is unknown, models based on polarization studies
+predicttheagetobe∼6−17years(Hilmarssonetal.2021).Itis
+possiblethatyounger,moreactiveFRBslikeFRB20121102Aare
+associatedwithpersistentradioemissionwhiletheemissionmight
+havefadedovertimefortheolderones.Thepossibilityofrepeating
+FRBsnotbeingsouncommonafterall(Ravi2019)alongwiththe
+increasingarcsecondlocalisationssuggeststhatweareenteringan
+erawherewecanbegintolookforevidenceofmultipleclassesby
+studyingFRBhostgalaxiesandmulti-wavelengthcounterparts.
+Inthispaper,wereportonthesearchforpersistentradioemis-
+sion in the host galaxies of one apparent one-off source (FRB
+20190714A) and two repeating sources (FRBs 20171019A and
+20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case
+ofthelatter,wealsoconductedsimultaneousobservationswiththe
+HighEnergyStereoscopicSystem(H.E.S.S.)inveryhighenergy
+gammarays.Inaddition,wesearchedforsignalsintheultraviolet,
+optical,andX-raybands.Thepaperisstructuredasfollows.InSec-
+tion2,wediscussourobservationsanddatareduction;inSection3,
+wediscussthesingleradiocontinuumdetectionandderivedmulti-
+wavelengthupperlimits.Ourdiscussionandconclusionsfollowin
+Section4and5.
+2 OBSERVATIONSANDDATAREDUCTION
+2.1 MeerKATobservations
 TheMeerKAT64-parabolic-disharray(Jonas&MeerKATTeam
-Cubicalsoftware(Kenyonetal.2018)forself-calibration.
 2016;Mauchetal.2020)islocatedintheNorthernKaroodesert
 near Carnarvon, South Africa. Each “offset Gregorian" parabolic
 dishantennahasaneffectivediameterof13.5m.Theinnercoreof
-2.1.2 Singlepulsesearches
 thearraycontains48ofthe64dishesina1kmradius,whilethe
-remaining16dishesarespreadoutwardupto8km.Theshortestand Inadditiontoobtainingcorrelateddata,theoutputdatastreamof
-longestbaselinesoftheMeerKATarrayare29mand8km,respec- theF-enginearecaptured,delaycorrected,phasedandchannelised
+remaining16dishesarespreadoutwardupto8km.Theshortestand
+longestbaselinesoftheMeerKATarrayare29mand8km,respec-
 tively,providingangularscalesof5(cid:48)(cid:48)to27(cid:48)atthecentralfrequency,
+of the L-band receiver used here, of 1283MHz. Multi-epoch ob-
+servations of the FRB fields were conducted with the MeerKAT
+array (Project ID: SCI-20190418-VC-01) at L-band (856MHz to
+1712MHz). Details of the MeerKAT observations are presented
+in Table 1. Only Stokes I (total intensity) of the MeerKAT ob-
+servations are considered in this paper. The data correlation was
+donewiththeSKARABcorrelator(Hickishetal.2016)in4kmode
+whichgives4096channelsacrossthe856MHzbandwidthresulting
+inafrequencyresolutionof∼209kHz.Thedatawerereducedus-
+ingthesemi-automatedMeerKATdataanalysispipelines-𝑜𝑥𝑘𝑎𝑡3
+(Heywood2020).
+2.1.1 Imaginganalysis
+The 𝑜𝑥𝑘𝑎𝑡 pipelineemploysacollectionofpubliclyavailablera-
+dio interferometry data reduction software. The final data prod-
+3 https://ascl.net/code/v/2627
+ucts, including reduced and calibrated visibility data (including
+self-calibration), continuum (including sub-band) images as well
+as diagnostic plots, are provided by the pipeline. The customary
+configuration of the 𝑜𝑥𝑘𝑎𝑡 pipeline incorporates flagging, cross-
+calibrationandself-calibrationprocesses.Intheflaggingprocess,
+thelow-gainbandpassedges(856MHzto880MHzand1658MHz
+to1800MHz)areflaggedonallbaselines,alongwiththelocationof
+theGalacticneutralhydrogenlineat1419.8MHzto1421.3MHz.
+Severalotherradiofrequencyinterference(RFI)proneregionsof
+the spectrum are then flagged on baselines shorter than 600 m.
+Then, other possible RFI affected data are flagged out using the
+CASA routines rflag and tfcrop for the calibrators, and using the
+tricolourpackageforthetargetfields.
+Thecross-calibrationstepsusing𝑜𝑥𝑘𝑎𝑡werestandard,includ-
+ingsettingthefluxscaleandderivingcorrectionsforresidualdelay
+calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡 pipeline
+uses the customary tasks from the CASA (McMullin et al. 2007)
+suiteforcross-calibration.Afterapplyingallthecorrectionstothe
+targetfield,wechannel-averagedthedatasetbyafactoroffivechan-
+nelsbeforesplittingoutthesciencetarget.Thisisconsistentwithour
+sciencegoals,sincetherelicsourceswetargetareinthecentralparts
+ofourfields,reducingtheeffectofsmearingthroughthechannel
+averaging.Todeconvolveandimagethetargetdata,theWSClean
+imager (Offringa et al. 2014) was used, with the multiscale and
+widebanddeconvolutionalgorithmsenabledtobetterallowimag-
+ingofthediffuseemissionpresentintheourfields.Deconvolution
+wasperformedintensub-bandimagesofeach82MHzwide-band.
+WSClean generates the multi-frequency synthesis (MFS) map, in
+joined-channel deconvolution mode, with a central frequency of
+1283MHz.Inotherwords,theMFSmapisafullbandwidthmap.
+InWSClean,eachofthesub-bandsisdeconvolvedseparatelywith
+aninitiallyhighmaskof20𝜎 rms (usingtheautomaskingfunction
+provided by WSClean), to generate an artefact-free model of the
+targetfieldfortheself-calibrationprocess.Thismaskingthreshold
+was iteratively reduced to a value of 3𝜎 rms in the final iteration
+ofimaging.The𝑜𝑥𝑘𝑎𝑡 pipelineusesthecustomarytasksfromthe
+Cubicalsoftware(Kenyonetal.2018)forself-calibration.
+2.1.2 Singlepulsesearches
+Inadditiontoobtainingcorrelateddata,theoutputdatastreamof
+theF-enginearecaptured,delaycorrected,phasedandchannelised
 beforebeingsentoverthecentralbeamformingnetworktothebeam-
-of the L-band receiver used here, of 1283MHz. Multi-epoch ob- formingUserSuppliedEquipment(FBFUSE)thatwasdesignedand
-servations of the FRB fields were conducted with the MeerKAT developedattheMaxPlanckInstituteforRadioAstronomyinBonn.
-array (Project ID: SCI-20190418-VC-01) at L-band (856MHz to Forthisproject,FBFUSEcombinedthedatainto764total-intensity
-1712MHz). Details of the MeerKAT observations are presented tied-arraybeamswhichwereusedtopopulatetheprimarybeamof
-in Table 1. Only Stokes I (total intensity) of the MeerKAT ob- ∼1deg2ofthearray.Thedataarethencapturedat306.24μstime
-servations are considered in this paper. The data correlation was resolution by the Transient User Supplied Equipment (TUSE), a
-donewiththeSKARABcorrelator(Hickishetal.2016)in4kmode real-timetransientdetectionbackendinstrumentdevelopedbythe
-whichgives4096channelsacrossthe856MHzbandwidthresulting MeerTRAP4teamattheUniversityofManchester.Moredetailson
-inafrequencyresolutionof∼209kHz.Thedatawerereducedus- TUSEwillbepresentedinanupcomingpaper(Stappersetal.in
-ingthesemi-automatedMeerKATdataanalysispipelines-𝑜𝑥𝑘𝑎𝑡3 prep).TheGPU-basedsinglepulsesearchpipelineAstroAcceler-
-(Heywood2020). ate5(Dimoudi&Armour2015;Adámek&Armour2016;Adámek
+formingUserSuppliedEquipment(FBFUSE)thatwasdesignedand
+developedattheMaxPlanckInstituteforRadioAstronomyinBonn.
+Forthisproject,FBFUSEcombinedthedatainto764total-intensity
+tied-arraybeamswhichwereusedtopopulatetheprimarybeamof
+∼1deg2ofthearray.Thedataarethencapturedat306.24μstime
+resolution by the Transient User Supplied Equipment (TUSE), a
+real-timetransientdetectionbackendinstrumentdevelopedbythe
+MeerTRAP4teamattheUniversityofManchester.Moredetailson
+TUSEwillbepresentedinanupcomingpaper(Stappersetal.in
+prep).TheGPU-basedsinglepulsesearchpipelineAstroAcceler-
+ate5(Dimoudi&Armour2015;Adámek&Armour2016;Adámek
 etal.2017;Dimoudietal.2018;Adámek&Armour2019)wasused
 tosearchforburstsinreal-timeafterincoherentlyde-dispersingthe
-2.1.1 Imaginganalysis dataintheDMrange0–5118.4pccm−3(seeCalebetal.2020,for
-The 𝑜𝑥𝑘𝑎𝑡 pipelineemploysacollectionofpubliclyavailablera- moredetails).
-dio interferometry data reduction software. The final data prod-
+dataintheDMrange0–5118.4pccm−3(seeCalebetal.2020,for
+moredetails).
 4 https://www.meertrap.org/
-3 https://ascl.net/code/v/2627 5 https://github.com/AstroAccelerateOrg/astro-accelerate
+5 https://github.com/AstroAccelerateOrg/astro-accelerate
 MNRAS000,1–15(2021)
 4 Chibuezeetal.
-2.2 e-MERLINObservations CT5telescope(Bolmontetal.2014).Astandarddataqualityselec-
-tionwasappliedtothedata(Aharonianetal.2006).Theeventshave
+2.2 e-MERLINObservations
 To constrain the position of the persistent continuum emission
-thenbeenselectedandtheirdirectionandenergyreconstructedus-
 associated with FRB20190714A, we conducted L-band (centre
-ingalog-likelihoodminimizationcomparingtherecordedshower
 frequency of 1.51GHz) observations of the target with the en-
-imagesofalltriggeredtelescopes(requiringatleasttwotelescopes
 hancedMulti-ElementRemote-LinkedInterferometerNetwork,e-
-toseethesamegamma-rayevent)toasemi-analyticalmodelofair
 MERLIN array in the United Kingdom (project code: CY10003)
-showers(deNaurois&Rolland2009).
 on 13 January, 2021 (see Section 3.1.2). Six antennas were used
-Wedefineacircularregion-of-interestcenteredontheposition
-i tn recl wud ai sn Rg .Ath .e =75 1- 2m 1L 5o 𝑚v 5e 5ll .t 1e 2le ,s Dco ep ce .=an −d 1t 3h ◦e 01ta (cid:48)r 1g 5e (cid:48).t (cid:48)7p .o 1in 4t 0in 7g +2c 8e 2n 7- ofFRB20171019Awitharadiusof0.12◦,optimalforapoint-like
-ℎ 𝑠
-sourceofemissionasexpectedfromFRB20171019A.Theback-
+including the 75-m Lovell telescope and the target pointing cen-
+trewasR.A.=12 ℎ 15 𝑚 55 𝑠 .12,Dec.= −13◦01(cid:48)15. (cid:48)(cid:48)7.1407+2827
 was used as the bandpass calibrator, 1331+3030 as the flux cal-
-groundlevelinthisONregionwasdeterminedusingthestandard
 ibrator and 1216−1033 as the phase calibrator. The angular sep-
-“ringbackground”technique(Bergeetal.2007)basedonaradially
 aration between the target and the phase calibrator is 2.47◦. The
-symmetricringaroundthesourceposition.Thistechniqueallowsus
 data reduction was done following standard e-MERLIN calibra-
-toderivethebackgroundlevelfromthesamefieldofviewandas-
 tion procedures6 with additional flagging of bad visibilities fol-
-suresthatthegamma-raysignalandbackgroundareestimatedwith
 lowed by imaging. We found two confusing sources in the field,
-at R.A. = 12ℎ 15𝑚 44𝑠 .669, Dec. = −12◦57(cid:48)59(cid:48).(cid:48)56 and R.A. = thesameacceptanceandunderthesameobservationconditions.
-12ℎ 15𝑚 37𝑠 .216,Dec.= −13◦09(cid:48)33(cid:48).(cid:48)44at4.1(cid:48) and9.4(cid:48) fromthe
+at R.A. = 12 ℎ 15 𝑚 44 𝑠 .669, Dec. = −12◦57(cid:48)59. (cid:48)(cid:48)56 and R.A. =
+12 ℎ 15 𝑚 37 𝑠 .216,Dec.= −13◦09(cid:48)33. (cid:48)(cid:48)44at4.1(cid:48) and9.4(cid:48) fromthe
 pointingcentre,respectively.Theyhadapparentfluxdensitiesof4
-and 1.3mJy without primary beam correction. We used these for 3 RESULTS
+and 1.3mJy without primary beam correction. We used these for
 self-calibration of the field and then subtracted them before final
-3.1 MeerKAT
-imaging.Thefinalimagesynthesizedbeamis0(cid:48).(cid:48)65×0(cid:48).(cid:48)15,posi-
-tionangle15◦elongatedintheDeclinationdirectionduetothelow ThetheoreticalthermalnoiseoftheMeerKATcanbecalculatedas
+imaging.Thefinalimagesynthesizedbeamis0. (cid:48)(cid:48)65×0. (cid:48)(cid:48)15,posi-
+tionangle15◦elongatedintheDeclinationdirectionduetothelow
 targetelevationfromtheUK.
-1 SEFD
-𝑆 rms= . (1)
-𝜂 √︃
-𝑐 𝑛 ×𝑁(𝑁−1)×Δ𝜈×𝑡
-2.3 TheSwiftsatellite:UVOTandXRTobservations pol int
-The system equivalent flux density (SEFD) of MeerKAT at the
+2.3 TheSwiftsatellite:UVOTandXRTobservations
 NeilGehrelsSwiftObservatory(Swift)isamulti-wavelengthNASA
+space mission operating in soft-X-rays and optical/UV. Here we
+use data from the X-ray Telescope (XRT) (Burrows et al. 2005)
+whichoperatesinthesoftX-raydomainof0.3−10keVaswellas
+data taken by the UV/Optical Telescope (UVOT) (Roming et al.
+2005) operating in the UV to optical domain (170 − 600 nm).
+DuringtheFRB20171019Amulti-wavelength(MWL)observing
+campaign,two2kstarget-of-opportunity(ToO)observationswere
+performed with Swift from 2019-09-28 18:37:02 to 2019-09-28
+21:52:54and2019-10-1818:03:00to2019-10-1820:03:00onthe
+FRB 20171019A localisation region. Simultaneously with Swift-
+XRT,fiveUVOTimagesweretakenwiththeUVM2filter(central
+wavelengh=2246Å)overthe2epochswithatotalexposureof4ks.
+Theimagesareaspect-correctedandsummedwiththeuvotimsum
+tool (HEASOFT 6.26). Observations were performed with Swift-
+XRTinthestandardPhotonCountingobservingmode(PC).The
+XRTPCdataareprocessedwithxrtpipeline(HEASOFT 6.26).
+Asummedimageisextractedwithxselect.
+2.4 Very-highenergygamma-rayobservationswithH.E.S.S.
+ObservationsofFRB20171019Awerealsoobtainedinthevery-
+highenergygamma-raydomainwiththeH.E.S.S.imagingatmo-
+sphericCherenkovtelescopearray,sensitiveintherangebetweena
+fewtensofGeVsand100TeV.H.E.S.S.islocatedontheKhomas
+HighlandplateauofNamibia(23◦16(cid:48)18(cid:48)(cid:48)South,16◦30(cid:48)00(cid:48)(cid:48)East),
+atanelevationof∼1800mabovesealevel.Observationstookplace
+contemporaneouslytothefirstepochofMeerKATobservationsof
+FRB20171019Adescribedabove.Thedatasetwasobtainedwith
+theH.E.S.S.phaseIIarray,includingtheupgraded12m-diameter
+CT1-4telescopes(Ashtonetal.2020)andthelarge28m-diameter
+6 https://github.com/e-merlin/eMERLIN_CASA_pipeline
+CT5telescope(Bolmontetal.2014).Astandarddataqualityselec-
+tionwasappliedtothedata(Aharonianetal.2006).Theeventshave
+thenbeenselectedandtheirdirectionandenergyreconstructedus-
+ingalog-likelihoodminimizationcomparingtherecordedshower
+imagesofalltriggeredtelescopes(requiringatleasttwotelescopes
+toseethesamegamma-rayevent)toasemi-analyticalmodelofair
+showers(deNaurois&Rolland2009).
+Wedefineacircularregion-of-interestcenteredontheposition
+ofFRB20171019Awitharadiusof0.12◦,optimalforapoint-like
+sourceofemissionasexpectedfromFRB20171019A.Theback-
+groundlevelinthisONregionwasdeterminedusingthestandard
+“ringbackground”technique(Bergeetal.2007)basedonaradially
+symmetricringaroundthesourceposition.Thistechniqueallowsus
+toderivethebackgroundlevelfromthesamefieldofviewandas-
+suresthatthegamma-raysignalandbackgroundareestimatedwith
+thesameacceptanceandunderthesameobservationconditions.
+3 RESULTS
+3.1 MeerKAT
+ThetheoreticalthermalnoiseoftheMeerKATcanbecalculatedas
+𝑆 rms =
+1
+𝜂
+𝑐
+SEFD
+√︃
+𝑛
+pol
+×𝑁(𝑁−1)×Δ𝜈×𝑡
+int
+. (1)
+The system equivalent flux density (SEFD) of MeerKAT at the
 1.28GHzis443Jyand𝜂 𝑐isthecorrelatorefficiency.Weused𝑛
-space mission operating in soft-X-rays and optical/UV. Here we pol
+pol
 =2polarisationproducts(XXandYY),N=64telescopes,Δ𝜈 =
-use data from the X-ray Telescope (XRT) (Burrows et al. 2005)
-whichoperatesinthesoftX-raydomainof0.3−10keVaswellas 856MHz bandwidth and 𝑡 int = 21600 sec observing time for one
+856MHz bandwidth and 𝑡 int = 21600 sec observing time for one
 epoch.Thisgivesthetheoreticalrmsof∼2𝜇Jybeam−1.Thetypical
-data taken by the UV/Optical Telescope (UVOT) (Roming et al.
 imagermsobtainedfromourresidualimagesis∼5𝜇Jybeam−1,
-2005) operating in the UV to optical domain (170 − 600 nm).
 whichis2.5timestheexpectedtheoreticalrms.ThewidebandMFS
-DuringtheFRB20171019Amulti-wavelength(MWL)observing
 imagedoesnotallowprimarybeamcorrectionprocedureasthiscan
-campaign,two2kstarget-of-opportunity(ToO)observationswere
 onlybedoneonthesub-bandimageswithlimitedrmsfordetection
-performed with Swift from 2019-09-28 18:37:02 to 2019-09-28
 ofthesources.However,oursourcesarethephasecentresofour
-21:52:54and2019-10-1818:03:00to2019-10-1820:03:00onthe
 fieldsandthusunaffectedbytheeffectoftheprimarybeam.
-FRB 20171019A localisation region. Simultaneously with Swift-
 Due to the lack of MeerKAT primary beam correction, we
-XRT,fiveUVOTimagesweretakenwiththeUVM2filter(central
 did not compare the flux densities of the discrete sources with
-wavelengh=2246Å)overthe2epochswithatotalexposureof4ks.
 theirNRAO(NationalRadioAstronomyObservatory)VLA(Very
-Theimagesareaspect-correctedandsummedwiththeuvotimsum
 LargeArray)SkySurvey(NVSS)counterparts.However,Chibueze
-tool (HEASOFT 6.26). Observations were performed with Swift-
 et al. (2021, submitted) confirmed that the overall flux densities
-XRTinthestandardPhotonCountingobservingmode(PC).The
 obtained with MeerKAT and NVSS are in good agreement with
-XRTPCdataareprocessedwithxrtpipeline(HEASOFT 6.26).
 eachotherwithinerrorsof∼5%.Wecomparedtheastrometryof
-Asummedimageisextractedwithxselect.
 the discrete radio sources obtained with MeerKAT and NVSS in
 Figure 1. The position uncertainty of the MeerKAT ranges from
-0(cid:48).(cid:48)2(closetothecentreoftheprimarybeam)toafewarcseconds
-2.4 Very-highenergygamma-rayobservationswithH.E.S.S.
+0. (cid:48)(cid:48)2(closetothecentreoftheprimarybeam)toafewarcseconds
 towards the edge of the primary beam. The scatter observed in
-ObservationsofFRB20171019Awerealsoobtainedinthevery- Figure1ismostlyduetotheprobabilityofthecentroidsofemission
-highenergygamma-raydomainwiththeH.E.S.S.imagingatmo- inthe∼45(cid:48)(cid:48)NVSSresolutionbeingdifferentfromthecentroidsat
-sphericCherenkovtelescopearray,sensitiveintherangebetweena MeerKAT’sresolutionandpartlyduetohigherpositionuncertainty
-fewtensofGeVsand100TeV.H.E.S.S.islocatedontheKhomas ofthefaintersources.Therefore,weconcludethatourMeerKAT
-HighlandplateauofNamibia(23◦16(cid:48)18(cid:48)(cid:48)South,16◦30(cid:48)00(cid:48)(cid:48)East), dataarewellcalibratedandthefluxdensityandastrometryareas
-atanelevationof∼1800mabovesealevel.Observationstookplace accurateastheerrorsindicate.
-contemporaneouslytothefirstepochofMeerKATobservationsof
-FRB20171019Adescribedabove.Thedatasetwasobtainedwith
+Figure1ismostlyduetotheprobabilityofthecentroidsofemission
+inthe∼45(cid:48)(cid:48)NVSSresolutionbeingdifferentfromthecentroidsat
+MeerKAT’sresolutionandpartlyduetohigherpositionuncertainty
+ofthefaintersources.Therefore,weconcludethatourMeerKAT
+dataarewellcalibratedandthefluxdensityandastrometryareas
+accurateastheerrorsindicate.
 3.1.1 Lookingforpersistentcontinuumemissionassociatedwith
-theH.E.S.S.phaseIIarray,includingtheupgraded12m-diameter
 theFRBfields
-CT1-4telescopes(Ashtonetal.2020)andthelarge28m-diameter
 ConsideringtheresultsoftheastrometriccomparisonwithNVSS
 (see Figure 1), we considered potential associations of contin-
-6 https://github.com/e-merlin/eMERLIN_CASA_pipeline uum sources in the MeerKAT observations with the FRB loca-
+uum sources in the MeerKAT observations with the FRB loca-
 MNRAS000,1–15(2021)
 MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 5
-tiontosourceswithin5(cid:48)(cid:48).Usingthisspatialcoincidencecriterion, 3.2 Swift
+tiontosourceswithin5(cid:48)(cid:48).Usingthisspatialcoincidencecriterion,
 we identified a persistent 1283MHz continuum source near FRB
-The UVOT summed image is presented in Figure 4. The UVOT
 20190714A, detected in both the 14 September 2019 and the 28
-fieldofviewcorrespondsroughlytotheuncertainty7ofthelocali-
 September2019epoch.ThepeakoftheMeerKATradioemission
-sationregionofFRB20171019A(RA=7.5(cid:48)andDEC=7(cid:48)).Using
 isoffsetby∼2(cid:48)(cid:48).1fromthepeakofthe𝑖-bandmagnitudeoftheop-
-uvotdetect,wefind30sourcesabovethe5𝜎levelandwithinthe
 ticalgalaxyidentifiedinthePanoramicSurveyTelescopeandRapid
-FRB 20171019A uncertainty region. Using a 3 arcsec maximum
 ResponseSystem(PanSTARRS,locatedatHaleakalaObservatory)
-separation,whichisslightlylargerthantheUVOTPSF(Breeveld
 image(shownascontoursinFigures2and3).TheMeerKATra-
-dio source is offset by 1(cid:48).(cid:48)68 from the localisation region of FRB etal.2010),thesesourcesarecross-matchedwithknowncatalogue
-sources.Wefindthatoutofthe30sourcesdetectedbyUVOT,28
+dio source is offset by 1. (cid:48)(cid:48)68 from the localisation region of FRB
 20190714(cyancircleinFigures2and3).
-arespatiallycoincidentwithstarscataloguedintheSDSScatalogue
-(DR12;Alametal.2015),andonesourceiscoincidentwithagalaxy
-(AGNbroadlineSDSSID:1237652599570890948at𝑧 ∼ 0.156).
-3.1.2 e-MERLINdetectionofcompactemissiontowards ThisgalaxyisalsodetectedbytheMeerKATradioobservations.We
-FRB20190714 usetheNASA/IPACExtragalacticDatabase(NED)8 tosearchfor
-knowngalaxiesintheFRB20171019Auncertaintyregions.Wefind
+3.1.2 e-MERLINdetectionofcompactemissiontowards
+FRB20190714
 Compact persistent emission was detected in the 1.51GHz e-
-MERLINimageatR.A.=12ℎ 15𝑚 55𝑠 .116,Dec.=−13◦01(cid:48)14(cid:48).(cid:48)48 m cou nl cti lp ul se iog na sla ox nie ts hew hit oh su tn gk an lao xw yn frr oe mdsh oi uf rts o, bth se er re vf ao tr ioe nw se .Uca sn inn got ad 5ra 0w
-(cid:48)(cid:48)
+MERLINimageatR.A.=12 ℎ 15 𝑚 55 𝑠 .116,Dec.=−13◦01(cid:48)14. (cid:48)(cid:48)48
 at 86𝜇Jybeam−1 by e-MERLIN. The stochastic position uncer-
-circularONregioncentredonthepositionofFRB20171019Aand
-tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa- a50(cid:48)(cid:48) OFFregionthatdoesnotcontainanyofthedetectedsources,
+tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa-
 ration between phase-calibrator and target, and antenna position
-weruntheuvotsourcetoolwitha5𝜎backgroundthresholdand
-uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric obtainafluxupperlimitof1.4×10−16 ergcm−2s−1Å−1 without
+uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric
 uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively.
-applyingaCalacticextinctioncorrection.
 The offset from the FRB position is negligible in R.A. and 1.2
-The XRT summed image is shown in Figure 5. At the edge
 arcsec in Dec. The rms in this region (of full primary beam sen-
-of the field-of-view, we detect a source spatially coincident with
-sitivity) is 20𝜇Jybeam−1, making this a 4.3𝜎 rms detection. It is the Wolf 1561 star. As we consider this source unrelated to the
+sitivity) is 20𝜇Jybeam−1, making this a 4.3𝜎 rms detection. It is
 ∼1.5𝜎 rmshigherthanthatoftheMeerKATdetection.Althoughthe
-FRB,weusetheonlineSwift-XRTdataproductsgenerator(Evans
 e-MERLINfluxscalenominaluncertaintyis∼5%,inthesedatait
-et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3-
-ispossiblyhigherduetothelowdeclinationofthephase-reference 10 keV range on the count rate of 0.001885 counts.s−1. Using
+ispossiblyhigherduetothelowdeclinationofthephase-reference
 source and to the strong RFI which were removed from the data
-but may have affected the linearity of the receiver response. The W 10e 2b 0PI cM mM −S 29( fv ro4 m.11 ta h) ean dd ira es cs tiu om nin og fa thw eei sg oh ut re cd ea ev se tr ia mg ae t𝑁 edH f= ro5 m.1 t2 h×
-e
-peakofthee-MERLINradioemissionisoffsetby∼1(cid:48).(cid:48)4fromthe
-NASA’s HEASARC 10 online tools (HI4PI Collaboration et al.
+but may have affected the linearity of the receiver response. The
+peakofthee-MERLINradioemissionisoffsetby∼1. (cid:48)(cid:48)4fromthe
 peakofthePanSTARRS𝑖-bandemissioninFigures2and3.The
-2016)andapowerlawmodelwithaphotonindex=2,thisupper
-e-MERLINradiosource(shownbythecyancrossinFigures2and limittranslatestoanenergyfluxof6.6×10−14ergcm−2s−1(8.3×
-3)isoffsetby0(cid:48).(cid:48)53fromthelocalisedpositionofFRB20190714.
-10−14ergcm−2s−1unabsorbed).
+e-MERLINradiosource(shownbythecyancrossinFigures2and
+3)isoffsetby0. (cid:48)(cid:48)53fromthelocalisedpositionofFRB20190714.
 Weestimatetheprobabilityofachancealignmentofaback-
 ground persistent radio source and the host galaxy, following the
 procedureofEftekharietal.(2018).InsteadofusingtheFRBlo-
-3.3 H.E.S.S.
 calisationregion,weusetheareaofthegalaxy,whichistakenas
 2(cid:48)(cid:48)×2(cid:48)(cid:48),twicethehalflightradiusfromHeintzetal.(2020).Given
-No significant gamma-ray excess above the expected background
 the source has a flux density of ∼ 90𝜇Jy we estimate the chance
-isdetectedfromthedirectionofFRB20171019A,with52gamma
 alignmentprobabilityof0.0008,whichcorrespondsto3.4𝜎.The
-candidateeventsfromthesourceregionand524backgroundevent.
 fluxdensitythreshold,assuming3𝜎,foranunresolvedradiosource
-Asecondanalysisusinganindependenteventcalibrationandrecon-
 is∼15𝜇Jy.Ifinsteadweconsidertheprobabilityofdetectingany
-struction(Parsons&Hinton2014)confirmsthisresult.Asearchfor
 radiosourceaboveourfluxdensitythresholdof15𝜇Jy,theprobabil-
-variableemissionontimescalesrangingfrommillisecondstosev-
 ityofachancealignmentis,therefore,approximately0.8%,making
-eralminuteswithtoolsprovidedin(Brunetal.2020)doesnotreveal
 thestatisticalsignificanceofourdetection2.6𝜎.Thisrepresentsthe
-anyvariabilityabove2.2𝜎.Forthetotaldatasetof1.8h,95%confi-
 firstdetectionofradiocontinuumemissionassociatedwiththehost
-dencelevel(C.L.)upperlimitsonthephotonfluxarederivedusing
 (galaxy)ofFRB20190714A(seeFigure2and3).
+3.1.3 MeerKATnon-detections
+NocontinuumemissionwasdetectednearFRBs20171019Aand
+20190711A. As each of the images of these sources has an rms
+of ∼5𝜇Jy beam−1, the 3𝜎 intensity upper limit of any emission
+associatedwithFRBs20171019Aand20190711Awillbe∼15𝜇Jy
+beam−1(seeTable1).
+Candidatepulsesaboveasignal-to-noise(S/N)of10fromthe
+singlepulsesearchwithMeerTRAPwerevisuallyinspectedoffline.
+NonewFRBsorrepeatburstsfromtheknownFRBsweredetected
+aboveafluencethresholdof0.08Jymsassuminga1msduration
+burst.
+3.2 Swift
+The UVOT summed image is presented in Figure 4. The UVOT
+fieldofviewcorrespondsroughlytotheuncertainty7ofthelocali-
+sationregionofFRB20171019A(RA=7.5(cid:48)andDEC=7(cid:48)).Using
+uvotdetect,wefind30sourcesabovethe5𝜎levelandwithinthe
+FRB 20171019A uncertainty region. Using a 3 arcsec maximum
+separation,whichisslightlylargerthantheUVOTPSF(Breeveld
+etal.2010),thesesourcesarecross-matchedwithknowncatalogue
+sources.Wefindthatoutofthe30sourcesdetectedbyUVOT,28
+arespatiallycoincidentwithstarscataloguedintheSDSScatalogue
+(DR12;Alametal.2015),andonesourceiscoincidentwithagalaxy
+(AGNbroadlineSDSSID:1237652599570890948at𝑧 ∼ 0.156).
+ThisgalaxyisalsodetectedbytheMeerKATradioobservations.We
+usetheNASA/IPACExtragalacticDatabase(NED)8 tosearchfor
+knowngalaxiesintheFRB20171019Auncertaintyregions.Wefind
+multiplegalaxieswithunknownredshifts,thereforewecannotdraw conclusionsonthehostgalaxyfromourobservations.Usinga50(cid:48)(cid:48)
+circularONregioncentredonthepositionofFRB20171019Aand
+a50(cid:48)(cid:48) OFFregionthatdoesnotcontainanyofthedetectedsources,
+weruntheuvotsourcetoolwitha5𝜎backgroundthresholdand
+obtainafluxupperlimitof1.4×10−16 ergcm−2s−1Å−1 without
+applyingaCalacticextinctioncorrection.
+The XRT summed image is shown in Figure 5. At the edge
+of the field-of-view, we detect a source spatially coincident with
+the Wolf 1561 star. As we consider this source unrelated to the
+FRB,weusetheonlineSwift-XRTdataproductsgenerator(Evans
+et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3-
+10 keV range on the count rate of 0.001885 counts.s−1. Using
+WebPIMMS9(v4.11a)andassumingaweightedaverage𝑁
+H
+=5.12×
+1020 cm−2 from the direction of the source estimated from the
+NASA’s HEASARC 10 online tools (HI4PI Collaboration et al.
+2016)andapowerlawmodelwithaphotonindex=2,thisupper
+limittranslatestoanenergyfluxof6.6×10−14ergcm−2s−1(8.3×
+10−14ergcm−2s−1unabsorbed).
+3.3 H.E.S.S.
+No significant gamma-ray excess above the expected background
+isdetectedfromthedirectionofFRB20171019A,with52gamma
+candidateeventsfromthesourceregionand524backgroundevent.
+Asecondanalysisusinganindependenteventcalibrationandrecon-
+struction(Parsons&Hinton2014)confirmsthisresult.Asearchfor
+variableemissionontimescalesrangingfrommillisecondstosev-
+eralminuteswithtoolsprovidedin(Brunetal.2020)doesnotreveal
+anyvariabilityabove2.2𝜎.Forthetotaldatasetof1.8h,95%confi-
+dencelevel(C.L.)upperlimitsonthephotonfluxarederivedusing
 themethoddescribedbyRolkeetal.(2005).Theenergythreshold
 ofthedataishighlydependentonthezenithangleoftheobserva-
 tions.Fortheseobservations,thezenithanglesrangefrom15to25
-3.1.3 MeerKATnon-detections deg,whichleadstoanenergythresholdforthestackeddatasetof
-𝐸 = 120GeV.TheupperlimitontheVeryHighEnergy(VHE)
+deg,whichleadstoanenergythresholdforthestackeddatasetof
+𝐸
 th
-NocontinuumemissionwasdetectednearFRBs20171019Aand
-20190711A. As each of the images of these sources has an rms
-of ∼5𝜇Jy beam−1, the 3𝜎 intensity upper limit of any emission
-associatedwithFRBs20171019Aand20190711Awillbe∼15𝜇Jy 7 https://www.wis-tns.org/object/20171019a
-beam−1(seeTable1). 8 https://ned.ipac.caltech.edu; NED is funded by the National
+= 120GeV.TheupperlimitontheVeryHighEnergy(VHE)
+7 https://www.wis-tns.org/object/20171019a
+8 https://ned.ipac.caltech.edu; NED is funded by the National
 AeronauticsandSpaceAdministrationandoperatedbytheCaliforniaInsti-
-Candidatepulsesaboveasignal-to-noise(S/N)of10fromthe
 tuteofTechnology
-singlepulsesearchwithMeerTRAPwerevisuallyinspectedoffline. 9 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/
-NonewFRBsorrepeatburstsfromtheknownFRBsweredetected w3pimms.pl
-aboveafluencethresholdof0.08Jymsassuminga1msduration 10 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.
-burst. pl
+9 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/
+w3pimms.pl
+10 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.
+pl
 MNRAS000,1–15(2021)
 6 Chibuezeetal.
 Figure1. AstrometriccomparisonbetweenMeerKATandNVSSdiscretecompactsources.Theopencirclesrepresentthedifferenceinpositionbetweenthe
 MeerKATandNVSSsources.
-gamma-rayfluxabovethatthresholdandassuminganenergydepen- thatofthepersistentsourceassociatedwithFRBs20121102A,one
-dencefollowing𝐸−2isΦ(𝐸 >120GeV) <2.10×10−12cm−2s−1 ofthemostprolificrepeaters,locatedat𝑧=0.19273(8).Persistent
-or Φ(𝐸 > 120GeV) < 1.7×10−12ergcm−2s−1. A variation of radioemissionfromFRB20201124AwasdetectedbytheuGMRT
-±0.5oftheassumedspectralindexleadstoavariationintheupper (Whartonetal.2021)andtheJVLA(Riccietal.2021)onangular
-limitoflessthan±19%.Amapofenergyfluxupperlimitscovering scalesofafewarcseconds.However,itisresolvedoutatscalesof
-the full region accessible within the H.E.S.S. field of view above ∼0.1arcsecondswiththeEuropeanVLBINetwork(Marcoteetal.
-120GeVisgiveninFigure6. 2021)suggestingthatitisnotacompactsourcedirectlyassociated
+gamma-rayfluxabovethatthresholdandassuminganenergydepen-
+dencefollowing𝐸−2isΦ(𝐸 >120GeV) <2.10×10−12cm−2s−1
+or Φ(𝐸 > 120GeV) < 1.7×10−12ergcm−2s−1. A variation of
+±0.5oftheassumedspectralindexleadstoavariationintheupper
+limitoflessthan±19%.Amapofenergyfluxupperlimitscovering
+the full region accessible within the H.E.S.S. field of view above
+120GeVisgiveninFigure6.
+4 DISCUSSION
+Of the targeted FRB fields reported here, only FRB 20190714A
+isobservedtobespatiallycoincidentwithapersistentradiocon-
+tinuumsource.Weobtainanupperlimitof∼ 15𝜇Jybeam−1 for
+FRBs20190711Aand20171019A,respectively,andapeakinten-
+sity of ∼ 53𝜇Jy beam−1 for the emission coincident with FRB
+20190714A. This source is detected at both epochs with similar
+intensitieswithinthemeasuredrmsoftheimages(seeTables1and
+2 for details). The values in the Table 2 are derived by carrying
+out 2D Gaussian fit using similar ellipses enclosing the detected
+persistentemission.Theaveragefluxdensityis∼3timeslessthan
+thatofthepersistentsourceassociatedwithFRBs20121102A,one
+ofthemostprolificrepeaters,locatedat𝑧=0.19273(8).Persistent
+radioemissionfromFRB20201124AwasdetectedbytheuGMRT
+(Whartonetal.2021)andtheJVLA(Riccietal.2021)onangular
+scalesofafewarcseconds.However,itisresolvedoutatscalesof
+∼0.1arcsecondswiththeEuropeanVLBINetwork(Marcoteetal.
+2021)suggestingthatitisnotacompactsourcedirectlyassociated
 with the FRB. In contrast, the other localised, prolific repeating
 FRB20180916Ahasnopersistentradiocounterpart.
-4 DISCUSSION
 IntheimageinFigure3onecanseethatthepersistentradio
-Of the targeted FRB fields reported here, only FRB 20190714A source lies at the edge of the optical extent of the host galaxy
-isobservedtobespatiallycoincidentwithapersistentradiocon- as seen in PanSTARRS observations (Heintz et al. 2020). Our
-tinuumsource.Weobtainanupperlimitof∼ 15𝜇Jybeam−1 for derived 1283MHz peak position with MeerKAT places it just
-FRBs20190711Aand20171019A,respectively,andapeakinten- 1(cid:48).(cid:48)68awayfromthepositionofFRB20190714A(𝛼 𝐽2000,𝛿 𝐽2000
-sity of ∼ 53𝜇Jy beam−1 for the emission coincident with FRB = 12ℎ 15𝑚 55𝑠 .12, -13◦01(cid:48)15(cid:48).(cid:48)70; Heintz et al. 2020). The posi-
-20190714A. This source is detected at both epochs with similar tionaluncertaintyontheFRBpositionis0(cid:48).(cid:48)283.Similarly,thepeak
-intensitieswithinthemeasuredrmsoftheimages(seeTables1and 1.51GHze-MERLINpositionofthepersistentradiosourceissepa-
-2 for details). The values in the Table 2 are derived by carrying ratedfromthepositionofFRB20190714Aby0(cid:48).(cid:48)53.Thepersistent
-out 2D Gaussian fit using similar ellipses enclosing the detected sourcenearFRB20190714Ahasafluxbroadlyconsistentwiththe
-persistentemission.Theaveragefluxdensityis∼3timeslessthan MeerKATfluxandisunresolvedonthee-MERLINbaselines.The
+source lies at the edge of the optical extent of the host galaxy
+as seen in PanSTARRS observations (Heintz et al. 2020). Our
+derived 1283MHz peak position with MeerKAT places it just
+1. (cid:48)(cid:48)68awayfromthepositionofFRB20190714A(𝛼 𝐽2000 ,𝛿 𝐽2000
+= 12 ℎ 15 𝑚 55 𝑠 .12, -13◦01(cid:48)15. (cid:48)(cid:48)70; Heintz et al. 2020). The posi-
+tionaluncertaintyontheFRBpositionis0. (cid:48)(cid:48)283.Similarly,thepeak
+1.51GHze-MERLINpositionofthepersistentradiosourceissepa-
+ratedfromthepositionofFRB20190714Aby0. (cid:48)(cid:48)53.Thepersistent
+sourcenearFRB20190714Ahasafluxbroadlyconsistentwiththe
+MeerKATfluxandisunresolvedonthee-MERLINbaselines.The
 MNRAS000,1–15(2021)
 MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 7
 Figure2. FRB20190714AMeerKATepochIimage(top)andazoom-in(bottom)aroundthepositionoftheFRBindicatedbythecyancircle.Whitecontours
@@ -437,213 +586,330 @@ etal.(2019).
 Table1.DetailsoftheFRBfieldsobservedwithMeerKAT.
 Fieldname Observationdate Synthesizedbeam rms(𝜇Jybeam−1) Detected?
 FRB20171019A 28September2019 – No(calibrationfailure)
-FRB20171019A 18October2019 6(cid:48).(cid:48)8×5(cid:48).(cid:48)0 5.2 <15𝜇Jybeam−1
-FRB20190711A 23August2019 11(cid:48).(cid:48)7×4(cid:48).(cid:48)9 4.9 <15𝜇Jybeam−1
-FRB20190711A 09September2019 12(cid:48).(cid:48)5×4(cid:48).(cid:48)9 4.6 <15𝜇Jybeam−1
-FRB20190714A 14September2019 7(cid:48).(cid:48)1×6(cid:48).(cid:48)2 4.2 54.4𝜇Jybeam−1
-FRB20190714A 28September2019 6(cid:48).(cid:48)5×5(cid:48).(cid:48)1 5.8 52.0𝜇Jybeam−1
+FRB20171019A 18October2019 6. (cid:48)(cid:48)8×5. (cid:48)(cid:48)0 5.2 <15𝜇Jybeam−1
+FRB20190711A 23August2019 11. (cid:48)(cid:48)7×4. (cid:48)(cid:48)9 4.9 <15𝜇Jybeam−1
+FRB20190711A 09September2019 12. (cid:48)(cid:48)5×4. (cid:48)(cid:48)9 4.6 <15𝜇Jybeam−1
+FRB20190714A 14September2019 7. (cid:48)(cid:48)1×6. (cid:48)(cid:48)2 4.2 54.4𝜇Jybeam−1
+FRB20190714A 28September2019 6. (cid:48)(cid:48)5×5. (cid:48)(cid:48)1 5.8 52.0𝜇Jybeam−1
 Table2.DetailsoftheradiocontinuumsourceassociatedwithFRB20190714A.
 Fieldname Observationdate Telescope 𝜈 centre(GHz) 𝛼 J2000 𝛿 J2000 Maj.×min.axis Pos.angle Int.fluxdensity
-FRB20190714A 28September2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.154 -13◦01(cid:48)17(cid:48).(cid:48)30 9(cid:48).(cid:48)6×7(cid:48).(cid:48)4 88.7◦ 87.4𝜇Jy
-FRB20190714A 18October2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.193 −13◦01(cid:48)17(cid:48).(cid:48)18 8(cid:48).(cid:48)2×6(cid:48).(cid:48)4 12.2◦ 80.7𝜇Jy
-FRB20190714A 13January2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠.116 −13◦01(cid:48)14(cid:48).(cid:48)51 0(cid:48).(cid:48)15×0(cid:48).(cid:48)65 17.6◦ 107.5𝜇Jy
-largeoffsetfromthecentreofthegalaxymakesthepersistentsource sufficientsensitivityinthesub-bandimages,thus,weareunableto
-unlikelytobeanAGN.SofarthisFRBhasnotbeenseentorepeat. derivethespectralindexoftheemissionofthehostgalaxy.
+FRB20190714A 28September2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.154 -13◦01(cid:48)17. (cid:48)(cid:48)30 9. (cid:48)(cid:48)6×7. (cid:48)(cid:48)4 88.7◦ 87.4𝜇Jy
+FRB20190714A 18October2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.193 −13◦01(cid:48)17. (cid:48)(cid:48)18 8. (cid:48)(cid:48)2×6. (cid:48)(cid:48)4 12.2◦ 80.7𝜇Jy
+FRB20190714A 13January2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠.116 −13◦01(cid:48)14. (cid:48)(cid:48)51 0. (cid:48)(cid:48)15×0. (cid:48)(cid:48)65 17.6◦ 107.5𝜇Jy
+largeoffsetfromthecentreofthegalaxymakesthepersistentsource
+unlikelytobeanAGN.SofarthisFRBhasnotbeenseentorepeat.
 Higherresolutionimagingwillberequiredtobecertainofadirect
-Our e-MERLIN observations probe a different spatial
 associationofthepersistentsourcewiththeFRB.Wedidnothave
+sufficientsensitivityinthesub-bandimages,thus,weareunableto
+derivethespectralindexoftheemissionofthehostgalaxy.
+Our e-MERLIN observations probe a different spatial
 scale than the size of the persistent radio source associated
 with FRB 20121102A. At the angular diameter distance of
 MNRAS000,1–15(2021)
 10 Chibuezeetal.
 Figure5.XRTsummedimageofFRB20171019AregiontakenduringtheMWLobservationcampaigninSeptember-October2019.Thepositionofthe
 Wolf1561starisshownincyanandislabelled.ThegreenboxindicatesFRB20171019A90%localisationregionasreportedinKumaretal.(2019).
-FRB 20190714A (780 Mpc), an unresolved source with an an- tentradioluminosity.Thesevaluesareexpectedtodecreaseona
-gularsizeof0(cid:48).(cid:48)6correspondstoaphysicalextentof(cid:46)2.3kpc.The timescaleofafewdecadestocenturies.Giventheassociationofa
-uGMRTreportedthedetectionofanunresolvedradioemissionat comparativelyfainterpersistentsource,FRB20190714Amaypo-
-650MHzwithafluxdensityof700±100𝜇Jy(Whartonetal.2021), tentiallybearepeatingFRBwhoseageliesbetweenthatofFRB
-whiletheJVLAdetectedpersistentemissionwithafluxdensityof 20121102AandFRB20180916A.Millisecondmagnetarsformed
-340±30𝜇Jyat3GHz(Riccietal.2021).Assumingtheestimated throughstandardastrophysicalchannelssuchashydrogenpoorsu-
-spectralindexbetweenthesefrequencies(∼−0.5,Riccietal.2021), perluminous supernovae and long duration gamma-ray bursts are
-the 1.3GHz flux density would be ∼ 500𝜇Jy (similar to the 3-𝜎 consistentwiththeprogenitorsofFRBsexpectedinlow-metallicity
-upperlimitonobservationsfrom1−2GHz;Lawetal.2021).The dwarf galaxies with high specific star-formation rate such as for
-fluxdensitywemeasuredforFRB20190714Aisafactorof∼10 FRB20121102A.However,Margalitetal.(2019)notethatitisalso
-lowerthanFRB20201124A,butFRB20190714Aisalsoafactor possibletoformsuchsourcesthroughavarietyofchannels,includ-
-2.6moredistant.Therefore,thefluxdensitieswouldbecomparable ingbinaryneutronstarmergersandaccretioninducedcollapseof
-iftheywereatsimilardistances. whitedwarfsinenvironmentsandhostgalaxydemographicsdiffer-
-enttoFRB20121102A.Suchsuggestionsareconsistentwithrecent
+FRB 20190714A (780 Mpc), an unresolved source with an an-
+gularsizeof0. (cid:48)(cid:48)6correspondstoaphysicalextentof(cid:46)2.3kpc.The
+uGMRTreportedthedetectionofanunresolvedradioemissionat
+650MHzwithafluxdensityof700±100𝜇Jy(Whartonetal.2021),
+whiletheJVLAdetectedpersistentemissionwithafluxdensityof
+340±30𝜇Jyat3GHz(Riccietal.2021).Assumingtheestimated
+spectralindexbetweenthesefrequencies(∼−0.5,Riccietal.2021),
+the 1.3GHz flux density would be ∼ 500𝜇Jy (similar to the 3-𝜎
+upperlimitonobservationsfrom1−2GHz;Lawetal.2021).The
+fluxdensitywemeasuredforFRB20190714Aisafactorof∼10
+lowerthanFRB20201124A,butFRB20190714Aisalsoafactor
+2.6moredistant.Therefore,thefluxdensitieswouldbecomparable
+iftheywereatsimilardistances.
 Given the resolution of MeerKAT we are unable to defini-
-localisations(e.g.Heintzetal.2020).
 tively state whether the persistent emission is associated with a
-star-formingregionortheFRBitself.However,theincreasedreso- The X-ray and VHE observations with Swift and H.E.S.S.
-lutionwiththee-MERLINbaselineswouldtendtofavouracompact allows us to probe non-thermal persistent emission associated to
-sourcesimilartotheoneobservedinFRB20121102A.Oneofthe the FRB host galaxy or its source. Recently, H.E.S.S. observed
-leading models to explain the bursts from, and radio counterpart SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic
-to FRB 20121102A, is a young nebula powered flaring magnetar magnetarlinkedtoarepeatingFRBanditsfirstX-raycounterpart.
-embedded in a 20–50 year-old supernova remnant (Beloborodov MagnetarX-rayflarescouldinfactbenon-thermalinnature(Lietal.
-2017; Metzger et al. 2019). The lack of a bright persistent radio 2021)indicatingthepresenceofparticleaccelerationthatcouldpo-
-sourceassociatedwiththerepeaterFRB20180916Asuggeststhat tentiallyreachtheVHEdomain.TheinverseComptonprocessisa
-it is comparatively older at (cid:38) 200−500 years and the persistent primarycandidatefortheproductionofVHEnon-thermalemission.
-radiosourcemayhavefaded.InthemodelbyMetzgeretal.(2019), H.E.S.S.observationsdidnotleadtoadetectionofapersistentora
-the nebula is suggested to contribute significantly to the rotation transientsourceassociatedtoFRB20171019A.WefoundnoX-ray
-measure and dispersion measure (DM), as well as to the persis- counterparts and thus derived the upper limits to constrain these
+star-formingregionortheFRBitself.However,theincreasedreso-
+lutionwiththee-MERLINbaselineswouldtendtofavouracompact
+sourcesimilartotheoneobservedinFRB20121102A.Oneofthe
+leading models to explain the bursts from, and radio counterpart
+to FRB 20121102A, is a young nebula powered flaring magnetar
+embedded in a 20–50 year-old supernova remnant (Beloborodov
+2017; Metzger et al. 2019). The lack of a bright persistent radio
+sourceassociatedwiththerepeaterFRB20180916Asuggeststhat
+it is comparatively older at (cid:38) 200−500 years and the persistent
+radiosourcemayhavefaded.InthemodelbyMetzgeretal.(2019),
+the nebula is suggested to contribute significantly to the rotation
+measure and dispersion measure (DM), as well as to the persis-
+tentradioluminosity.Thesevaluesareexpectedtodecreaseona
+timescaleofafewdecadestocenturies.Giventheassociationofa
+comparativelyfainterpersistentsource,FRB20190714Amaypo-
+tentiallybearepeatingFRBwhoseageliesbetweenthatofFRB
+20121102AandFRB20180916A.Millisecondmagnetarsformed
+throughstandardastrophysicalchannelssuchashydrogenpoorsu-
+perluminous supernovae and long duration gamma-ray bursts are
+consistentwiththeprogenitorsofFRBsexpectedinlow-metallicity
+dwarf galaxies with high specific star-formation rate such as for
+FRB20121102A.However,Margalitetal.(2019)notethatitisalso
+possibletoformsuchsourcesthroughavarietyofchannels,includ-
+ingbinaryneutronstarmergersandaccretioninducedcollapseof
+whitedwarfsinenvironmentsandhostgalaxydemographicsdiffer-
+enttoFRB20121102A.Suchsuggestionsareconsistentwithrecent
+localisations(e.g.Heintzetal.2020).
+The X-ray and VHE observations with Swift and H.E.S.S.
+allows us to probe non-thermal persistent emission associated to
+the FRB host galaxy or its source. Recently, H.E.S.S. observed
+SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic
+magnetarlinkedtoarepeatingFRBanditsfirstX-raycounterpart.
+MagnetarX-rayflarescouldinfactbenon-thermalinnature(Lietal.
+2021)indicatingthepresenceofparticleaccelerationthatcouldpo-
+tentiallyreachtheVHEdomain.TheinverseComptonprocessisa
+primarycandidatefortheproductionofVHEnon-thermalemission.
+H.E.S.S.observationsdidnotleadtoadetectionofapersistentora
+transientsourceassociatedtoFRB20171019A.WefoundnoX-ray
+counterparts and thus derived the upper limits to constrain these
 MNRAS000,1–15(2021)
 MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 11
 Figure6.MapofupperlimitsontheVHEgamma-rayenergyfluxderivedfromtheH.E.S.S.observations.Thelimitsarevalidabove120GeVandassume
 aphotonfluxdistributionfollowingan𝐸−2dependence.ThegreenboxindicatestheFRB20171019A90%localisationregionasreportedinKumaretal.
 (2019).Theoversamplingradiusis0.1◦.
-emissions.InthecaseofexistenceofX-raynon-thermaloutbursts, SwiftandH.E.S.S.instrumentsandobtainedupperlimitsinthethree
-thelackofVHEdetectioncouldindicatethatinverseComptonis domainsconstrainingtheMWLemissionsfromFRB20171019A.
-weakinthevicinityofthemagnetarsorthattheVHEgamma-ray The search for FRB MWL counterparts is ongoing within the
-emissionisquenched.Thislatterscenariocouldbeexplainedbythe H.E.S.S. collaboration and more results will be published in fu-
-factthatinverseComptonistakingplacetooclosetothemagne- tureworks.
-tar’ssurface,wherepairproductionandphotonsplittingcouldbe Given the association of a comparatively fainter persistent
-responsibleforsignificantenergylosses(Huetal.2019),preventing source,FRB20190714AmaypotentiallybearepeatingFRBwhose
-energeticparticlesandphotonstoreachthenebula. ageliesbetweenthatofFRB20121102AandFRB20180916A.
+emissions.InthecaseofexistenceofX-raynon-thermaloutbursts,
+thelackofVHEdetectioncouldindicatethatinverseComptonis
+weakinthevicinityofthemagnetarsorthattheVHEgamma-ray
+emissionisquenched.Thislatterscenariocouldbeexplainedbythe
+factthatinverseComptonistakingplacetooclosetothemagne-
+tar’ssurface,wherepairproductionandphotonsplittingcouldbe
+responsibleforsignificantenergylosses(Huetal.2019),preventing
+energeticparticlesandphotonstoreachthenebula.
 No persistent emissions were detected towards FRB
 20190711A and FRB 20171019A in our MeerKAT observations
-(seeFigures7,8,and9),thereforenofollowupobservationswere ACKNOWLEDGEMENTS
+(seeFigures7,8,and9),thereforenofollowupobservationswere
 conductedtowardsthoseFRBs.
+5 CONCLUSIONS
+SeveralFRBmodelsenvisionpersistentemissiontobeassociated
+withthesesources.Inthispaper,weconductedradioobservations
+of three FRBs (FRB 20190714A, 20190711A and 20171019A),
+and also a multi-wavelength campaign on one of these (FRB
+20171019A).
+Wedetectedpersistentcompactradioemissionassociatedwith
+FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and e-
+MERLINradiotelescope.Thisrepresentsthefirstdetectionofthe
+radiocontinuumemissionassociatedwiththehost(galaxy)ofFRB
+20190714AandisonlythethirdknownFRBtohavesuchanas-
+sociation.Wefurthermoreobtainedaradioupperlimitof∼15𝜇Jy
+beam−1fortherepeatingFRBs20190711Aand20171019A.
+WealsoperformedUV,X-rayandVHEobservationswiththe
+SwiftandH.E.S.S.instrumentsandobtainedupperlimitsinthethree
+domainsconstrainingtheMWLemissionsfromFRB20171019A.
+The search for FRB MWL counterparts is ongoing within the
+H.E.S.S. collaboration and more results will be published in fu-
+tureworks.
+Given the association of a comparatively fainter persistent
+source,FRB20190714AmaypotentiallybearepeatingFRBwhose
+ageliesbetweenthatofFRB20121102AandFRB20180916A.
+ACKNOWLEDGEMENTS
 This paper makes use of the MeerKAT data (Project ID: SCI-
 20190418-VC-01). The MeerKAT telescope is operated by the
 South African Radio Astronomy Observatory, which is a facility
-5 CONCLUSIONS
 of the National Research Foundation, an agency of the Depart-
-SeveralFRBmodelsenvisionpersistentemissiontobeassociated mentofScienceandInnovation(DSI).Thisworkmadeuseofthe
-withthesesources.Inthispaper,weconductedradioobservations Inter-UniversityInstituteforDataIntensiveAstronomy(IDIA)vi-
-of three FRBs (FRB 20190714A, 20190711A and 20171019A), sualizationlabhttps://vislab.idia.ac.za.IDIAisapartnershipofthe
-and also a multi-wavelength campaign on one of these (FRB UniversityofCapeTown,theUniversityofPretoria,theUniversity
-20171019A). oftheWesternCapeandtheSouthAfricanRadioastronomyObser-
-Wedetectedpersistentcompactradioemissionassociatedwith vatory.e-MERLINisaNationalFacilityoperatedbytheUniversity
-FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and e- ofManchesteratJodrellBankObservatoryonbehalfofSTFC.
-MERLINradiotelescope.Thisrepresentsthefirstdetectionofthe TheauthorsacknowledgefundingfromtheEuropeanResearch
-radiocontinuumemissionassociatedwiththehost(galaxy)ofFRB Council(ERC)undertheEuropeanUnion’sHorizon2020research
-20190714AandisonlythethirdknownFRBtohavesuchanas- andinnovationprogramme(grantagreementNo694745).Thesup-
-sociation.Wefurthermoreobtainedaradioupperlimitof∼15𝜇Jy portoftheNamibianauthoritiesandoftheUniversityofNamibia
-beam−1fortherepeatingFRBs20190711Aand20171019A. infacilitatingtheconstructionandoperationofH.E.S.S.isgrate-
-WealsoperformedUV,X-rayandVHEobservationswiththe fullyacknowledged,asisthesupportbytheGermanMinistryfor
+mentofScienceandInnovation(DSI).Thisworkmadeuseofthe
+Inter-UniversityInstituteforDataIntensiveAstronomy(IDIA)vi-
+sualizationlabhttps://vislab.idia.ac.za.IDIAisapartnershipofthe
+UniversityofCapeTown,theUniversityofPretoria,theUniversity
+oftheWesternCapeandtheSouthAfricanRadioastronomyObser-
+vatory.e-MERLINisaNationalFacilityoperatedbytheUniversity
+ofManchesteratJodrellBankObservatoryonbehalfofSTFC.
+TheauthorsacknowledgefundingfromtheEuropeanResearch
+Council(ERC)undertheEuropeanUnion’sHorizon2020research
+andinnovationprogramme(grantagreementNo694745).Thesup-
+portoftheNamibianauthoritiesandoftheUniversityofNamibia
+infacilitatingtheconstructionandoperationofH.E.S.S.isgrate-
+fullyacknowledged,asisthesupportbytheGermanMinistryfor
 MNRAS000,1–15(2021)
 12 Chibuezeetal.
 Figure7. FRB20171019AMeerKATimageandazoom-in(insert)aroundthepositionoftheFRB.Thewhiteellipseonthebottomleftcorneroftheinsert
 representthebeamsizeofMeerKAT.
-Education and Research (BMBF), the Max Planck Society, the REFERENCES
+Education and Research (BMBF), the Max Planck Society, the
 GermanResearchFoundation(DFG),theHelmholtzAssociation,
-AdámekK.,ArmourW.,2016,arXive-prints,p.arXiv:1611.09704
 the Alexander von Humboldt Foundation, the French Ministry of
-AdámekK.,ArmourW.,2019,AGPUImplementationoftheHarmonic
 Higher Education, Research and Innovation, the Centre National
+de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU),
+the Commissariat à l’énergie atomique et aux énergies alterna-
+tives (CEA), the U.K. Science and Technology Facilities Council
+(STFC),theKnutandAliceWallenbergFoundation,theNational
+ScienceCentre,Polandgrantno.2016/22/M/ST9/00382,theSouth
+AfricanDepartmentofScienceandTechnologyandNationalRe-
+searchFoundation,theUniversityofNamibia,theNationalCom-
+missiononResearch,Science&TechnologyofNamibia(NCRST),
+theAustrianFederalMinistryofEducation,ScienceandResearch
+and the Austrian Science Fund (FWF), the Australian Research
+Council (ARC), the Japan Society for the Promotion of Science
+andbytheUniversityofAmsterdam.Weappreciatetheexcellent
+workofthetechnicalsupportstaffinBerlin,Zeuthen,Heidelberg,
+Palaiseau,Paris,Saclay,TübingenandinNamibiaintheconstruc-
+tion and operation of the equipment. This work benefited from
+servicesprovidedbytheH.E.S.S.VirtualOrganisation,supported
+bythenationalresourceprovidersoftheEGIFederation.
+DATAAVAILABILITY
+Thedataunderlyingthisarticlewillbesharedonreasonablerequest
+tothecorrespondingauthors.
+REFERENCES
+AdámekK.,ArmourW.,2016,arXive-prints,p.arXiv:1611.09704
+AdámekK.,ArmourW.,2019,AGPUImplementationoftheHarmonic
 SumAlgorithm.p.489
-de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU), Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p.
-the Commissariat à l’énergie atomique et aux énergies alterna- arXiv:1711.10855
-tives (CEA), the U.K. Science and Technology Facilities Council AharonianF.,etal.,2006,A&A,457,899
-(STFC),theKnutandAliceWallenbergFoundation,theNational AlamS.,etal.,2015,TheAstrophysicalJournalSupplementSeries,219,12
-ScienceCentre,Polandgrantno.2016/22/M/ST9/00382,theSouth AshtonT.,etal.,2020,arXive-prints,p.arXiv:2001.04510
-AfricanDepartmentofScienceandTechnologyandNationalRe- BannisterK.W.,etal.,2019,Science,365,565
+Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p.
+arXiv:1711.10855
+AharonianF.,etal.,2006,A&A,457,899
+AlamS.,etal.,2015,TheAstrophysicalJournalSupplementSeries,219,12
+AshtonT.,etal.,2020,arXive-prints,p.arXiv:2001.04510
+BannisterK.W.,etal.,2019,Science,365,565
 BassaC.G.,etal.,2017,ApJ,843,L8
-searchFoundation,theUniversityofNamibia,theNationalCom-
 BeloborodovA.M.,2017,ApJ,843,L26
-missiononResearch,Science&TechnologyofNamibia(NCRST),
 BergeD.,FunkS.,HintonJ.,2007,A&A,466,1219
-theAustrianFederalMinistryofEducation,ScienceandResearch
 BhandariS.,etal.,2020,ApJ,895,L37
-and the Austrian Science Fund (FWF), the Australian Research
 BolmontJ.,etal.,2014,NuclearInstrumentsandMethodsinPhysicsRe-
-Council (ARC), the Japan Society for the Promotion of Science searchSectionA:Accelerators,Spectrometers,DetectorsandAssoci-
-andbytheUniversityofAmsterdam.Weappreciatetheexcellent atedEquipment,761,46–57
-workofthetechnicalsupportstaffinBerlin,Zeuthen,Heidelberg, BreeveldA.A.,etal.,2010,MonthlyNoticesoftheRoyalAstronomical
-Palaiseau,Paris,Saclay,TübingenandinNamibiaintheconstruc- Society,406,1687
-tion and operation of the equipment. This work benefited from BrunF.,PielQ.,deNauroisM.,BernhardS.,2020,Astropart.Phys.,118,
-servicesprovidedbytheH.E.S.S.VirtualOrganisation,supported 102429
+searchSectionA:Accelerators,Spectrometers,DetectorsandAssoci-
+atedEquipment,761,46–57
+BreeveldA.A.,etal.,2010,MonthlyNoticesoftheRoyalAstronomical
+Society,406,1687
+BrunF.,PielQ.,deNauroisM.,BernhardS.,2020,Astropart.Phys.,118,
+102429
 BurrowsD.N.,etal.,2005,SpaceSci.Rev.,120,165
-bythenationalresourceprovidersoftheEGIFederation.
 CalebM.,KeaneE.,2021,Universe,7,453
 CalebM.,StappersB.W.,RajwadeK.,FlynnC.,2019,MNRAS,484,5500
 CalebM.,etal.,2020,MNRAS,496,4565
 ChatterjeeS.,etal.,2017,Nature,541,58
 Chime/FRBCollaboration2021,TheAstronomer’sTelegram,14497,1
 CordesJ.M.,WassermanI.,2016,MNRAS,457,232
-DATAAVAILABILITY DaiZ.G.,WangJ.S.,YuY.W.,2017,ApJ,838,L7
+DaiZ.G.,WangJ.S.,YuY.W.,2017,ApJ,838,L7
 DimoudiS.,ArmourW.,2015,arXive-prints,p.arXiv:1511.07343
-Thedataunderlyingthisarticlewillbesharedonreasonablerequest Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A.,
-tothecorrespondingauthors. ArmourW.,2018,ApJS,239,28
+Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A.,
+ArmourW.,2018,ApJS,239,28
 MNRAS000,1–15(2021)
 MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 13
 Figure8. FRB20190711AMeerKATepochIimageandazoom-in(insert)aroundthepositionoftheFRB.Thewhiteellipseonthebottomleftcornerofthe
 insertrepresentthebeamsizeofMeerKAT.
-EftekhariT.,BergerE.,WilliamsP.K.G.,BlanchardP.K.,2018,ApJ,860, MarcoteB.,etal.,2017,ApJ,834,L8
-73 MarcoteB.,etal.,2020,Nature,577,190
-EvansP.A.,etal.,2007,A&A,469,379 MarcoteB.,etal.,2021,TheAstronomer’sTelegram,14603,1
-EvansP.A.,etal.,2009,MNRAS,397,1177 MargalitB.,BergerE.,MetzgerB.D.,2019,ApJ,886,110
-FongW.-f.,etal.,2021,ApJ,919,L23 MauchT.,etal.,2020,ApJ,888,61
-H.E.S.S.collaboration2021,ApJ,919,106 McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in
-HI4PICollaborationetal.,2016,A&A,594,A116 ShawR.A.,HillF.,BellD.J.,eds,AstronomicalSocietyofthePacific
-HeintzK.E.,etal.,2020,ApJ,903,152 ConferenceSeriesVol.376,AstronomicalDataAnalysisSoftwareand
-HeywoodI.,2020,oxkat:Semi-automatedimagingofMeerKATobserva- SystemsXVI.p.127
-tions(ascl:2009.003) MereghettiS.,etal.,2020,ApJ,898,L29
-HickishJ.,etal.,2016,JournalofAstronomicalInstrumentation,5,1641001 MetzgerB.D.,MargalitB.,SironiL.,2019,MNRAS,485,4091
-HilmarssonG.H.,etal.,2021,ApJ,908,L10 OffringaA.R.,etal.,2014,MNRAS,444,606
-HuK.,BaringM.G.,WadiasinghZ.,HardingA.K.,2019,MNRAS,486, ParsonsR.D.,HintonJ.A.,2014,AstroparticlePhysics,56,26
-3327–3349 Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p.
-Insight-HXMT2020,SGRJ1935+2154burstlist,http://hxmten.ihep. arXiv:2107.10113
-ac.cn/bfy/331.jhtml PlattsE.,WeltmanA.,WaltersA.,TendulkarS.P.,GordinJ.E.B.,Kandhai
-JamesC.W.,etal.,2020,MNRAS,495,2416 S.,2019,Phys.Rep.,821,1
-JonasJ.,MeerKATTeam2016,inMeerKATScience:OnthePathwayto PopovS.B.,PostnovK.A.,2013,arXive-prints,p.arXiv:1307.4924
-theSKA.p.1 PopovS.B.,PshirkovM.S.,2016,MNRAS,462,L16
-KashiyamaK.,IokaK.,MészárosP.,2013,ApJ,776,L39 PopovS.,PostnovK.,PshirkovM.,2018,InternationalJournalofModern
-KenyonJ.S.,SmirnovO.M.,GroblerT.L.,PerkinsS.J.,2018,MNRAS, PhysicsD,27,1844016
-478,2399 ProchaskaJ.X.,etal.,2019,Science,366,231
-KumarP.,etal.,2019,ApJ,887,L30 RaviV.,2019,NatureAstronomy,3,928
-KumarP.,etal.,2021,MNRAS,500,2525 Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p.
-LawC.,TendulkarS.,ClarkeT.,AggarwalK.,BethapudyS.,2021,The arXiv:2010.14334
-Astronomer’sTelegram,14526,1 RicciR.,PiroL.,PanessaF.,O’ConnorB.,LottiS.,BruniG.,ZhangB.,
-LiC.K.,etal.,2021,NatureAstronomy, 2021,TheAstronomer’sTelegram,14549,1
-LiuT.,RomeroG.E.,LiuM.-L.,LiA.,2016,ApJ,826,82 RidnaiaA.,etal.,2021,NatureAstronomy,inpress
-LorimerD.R.,BailesM.,McLaughlinM.A.,NarkevicD.J.,CrawfordF., RolkeW.A.,LópezA.M.,ConradJ.,2005,NuclearInstrumentsandMeth-
-2007,Science,318,777 odsinPhysicsResearchA,551,493
-LyubarskyY.,2014,MNRAS:Letters,442,L9 RomingP.W.A.,etal.,2005,SpaceScienceReviews,120,95–142
-MacquartJ.P.,etal.,2020,Nature,581,391 TavaniM.,etal.,2021,NatureAstronomy,5,401–407
+EftekhariT.,BergerE.,WilliamsP.K.G.,BlanchardP.K.,2018,ApJ,860,
+73
+EvansP.A.,etal.,2007,A&A,469,379
+EvansP.A.,etal.,2009,MNRAS,397,1177
+FongW.-f.,etal.,2021,ApJ,919,L23
+H.E.S.S.collaboration2021,ApJ,919,106
+HI4PICollaborationetal.,2016,A&A,594,A116
+HeintzK.E.,etal.,2020,ApJ,903,152
+HeywoodI.,2020,oxkat:Semi-automatedimagingofMeerKATobserva-
+tions(ascl:2009.003)
+HickishJ.,etal.,2016,JournalofAstronomicalInstrumentation,5,1641001
+HilmarssonG.H.,etal.,2021,ApJ,908,L10
+HuK.,BaringM.G.,WadiasinghZ.,HardingA.K.,2019,MNRAS,486,
+3327–3349
+Insight-HXMT2020,SGRJ1935+2154burstlist,http://hxmten.ihep.
+ac.cn/bfy/331.jhtml
+JamesC.W.,etal.,2020,MNRAS,495,2416
+JonasJ.,MeerKATTeam2016,inMeerKATScience:OnthePathwayto
+theSKA.p.1
+KashiyamaK.,IokaK.,MészárosP.,2013,ApJ,776,L39
+KenyonJ.S.,SmirnovO.M.,GroblerT.L.,PerkinsS.J.,2018,MNRAS,
+478,2399
+KumarP.,etal.,2019,ApJ,887,L30
+KumarP.,etal.,2021,MNRAS,500,2525
+LawC.,TendulkarS.,ClarkeT.,AggarwalK.,BethapudyS.,2021,The
+Astronomer’sTelegram,14526,1
+LiC.K.,etal.,2021,NatureAstronomy,
+LiuT.,RomeroG.E.,LiuM.-L.,LiA.,2016,ApJ,826,82
+LorimerD.R.,BailesM.,McLaughlinM.A.,NarkevicD.J.,CrawfordF.,
+2007,Science,318,777
+LyubarskyY.,2014,MNRAS:Letters,442,L9
+MacquartJ.P.,etal.,2020,Nature,581,391
+MarcoteB.,etal.,2017,ApJ,834,L8
+MarcoteB.,etal.,2020,Nature,577,190
+MarcoteB.,etal.,2021,TheAstronomer’sTelegram,14603,1
+MargalitB.,BergerE.,MetzgerB.D.,2019,ApJ,886,110
+MauchT.,etal.,2020,ApJ,888,61
+McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in
+ShawR.A.,HillF.,BellD.J.,eds,AstronomicalSocietyofthePacific
+ConferenceSeriesVol.376,AstronomicalDataAnalysisSoftwareand
+SystemsXVI.p.127
+MereghettiS.,etal.,2020,ApJ,898,L29
+MetzgerB.D.,MargalitB.,SironiL.,2019,MNRAS,485,4091
+OffringaA.R.,etal.,2014,MNRAS,444,606
+ParsonsR.D.,HintonJ.A.,2014,AstroparticlePhysics,56,26
+Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p.
+arXiv:2107.10113
+PlattsE.,WeltmanA.,WaltersA.,TendulkarS.P.,GordinJ.E.B.,Kandhai
+S.,2019,Phys.Rep.,821,1
+PopovS.B.,PostnovK.A.,2013,arXive-prints,p.arXiv:1307.4924
+PopovS.B.,PshirkovM.S.,2016,MNRAS,462,L16
+PopovS.,PostnovK.,PshirkovM.,2018,InternationalJournalofModern
+PhysicsD,27,1844016
+ProchaskaJ.X.,etal.,2019,Science,366,231
+RaviV.,2019,NatureAstronomy,3,928
+Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p.
+arXiv:2010.14334
+RicciR.,PiroL.,PanessaF.,O’ConnorB.,LottiS.,BruniG.,ZhangB.,
+2021,TheAstronomer’sTelegram,14549,1
+RidnaiaA.,etal.,2021,NatureAstronomy,inpress
+RolkeW.A.,LópezA.M.,ConradJ.,2005,NuclearInstrumentsandMeth-
+odsinPhysicsResearchA,551,493
+RomingP.W.A.,etal.,2005,SpaceScienceReviews,120,95–142
+TavaniM.,etal.,2021,NatureAstronomy,5,401–407
 MNRAS000,1–15(2021)
 14 Chibuezeetal.
 Figure9. FRB20190711AMeerKATepochIIimageandazoom-in(insert)aroundthepositionoftheFRB.Thewhiteellipseonthebottomleftcornerof
 theinsertrepresentthebeamsizeofMeerKAT.
-TendulkarS.P.,etal.,2017,ApJ,834,L7 Road,OxfordOX13RH,UK
-ThorntonD.,etal.,2013,Science,341,53 10NationalUniversityofIrelandGalway,UniversityRoad,Galway,
-TotaniT.,2013,PASJ,65,L12 H91TK33,Ireland
-VieyroF.L.,RomeroG.E.,Bosch-RamonV.,MarcoteB.,delValleM.V., 11SKA Observatory, Jodrell Bank Observatory, Macclesfield,
+TendulkarS.P.,etal.,2017,ApJ,834,L7
+ThorntonD.,etal.,2013,Science,341,53
+TotaniT.,2013,PASJ,65,L12
+VieyroF.L.,RomeroG.E.,Bosch-RamonV.,MarcoteB.,delValleM.V.,
 2017,A&A,602,A64
-CheshireSK119DL,UK
-WhartonR.,etal.,2021,TheAstronomer’sTelegram,14529,1 12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place,
+WhartonR.,etal.,2021,TheAstronomer’sTelegram,14529,1
 YamasakiS.,TotaniT.,KiuchiK.,2018,PASJ,70,39
-Dublin2,Ireland
 ZhangB.,2018,ApJ,854,L21
-13Max-Planck-InstitutfürKernphysik,P.O.Box103980,D69029
 deNauroisM.,RollandL.,2009,AstroparticlePhysics,32,231
+APPENDIXA: AUTHORAFFILIATIONS
+1CentreforSpaceResearch,North-WestUniversity,Potchefstroom
+2531,SouthAfrica
+2Department of Physics and Astronomy, Faculty of Physical Sci-
+ences,UniversityofNigeria,CarverBuilding,1UniversityRoad,
+Nsukka410001,Nigeria
+3JodrellBankCentreforAstrophysics,DepartmentofPhysicsand
+Astronomy,UniversityofManchester,ManchesterM139PL,UK
+4SydneyInstituteforAstronomy,SchoolofPhysics,TheUniversity
+ofSydney,NSW2006,Australia
+5Max-Planck-InstitutfürRadioastronomie,AufdemHügel69,D-
+53121Bonn,Germany
+6IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette,
+France
+7Department of Physics and Electronics, Rhodes University, PO
+Box94,Grahamstown6140,SouthAfrica
+8SouthAfricanRadioAstronomyObservatory,BlackRiverPark,2
+FirStreet,Observatory,CapeTown7925,SouthAfrica
+9Astrophysics,DepartmentofPhysics,UniversityofOxford,Keble
+Road,OxfordOX13RH,UK
+10NationalUniversityofIrelandGalway,UniversityRoad,Galway,
+H91TK33,Ireland
+11SKA Observatory, Jodrell Bank Observatory, Macclesfield,
+CheshireSK119DL,UK
+12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place,
+Dublin2,Ireland
+13Max-Planck-InstitutfürKernphysik,P.O.Box103980,D69029
 Heidelberg,Germany
 14HighEnergyAstrophysicsLaboratory,RAU,123HovsepEmin
 StYerevan0051,Armenia
 15Landessternwarte,UniversitätHeidelberg,Königstuhl,D69117
-APPENDIXA: AUTHORAFFILIATIONS
 Heidelberg,Germany
-1CentreforSpaceResearch,North-WestUniversity,Potchefstroom 16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille,
-2531,SouthAfrica France
-2Department of Physics and Astronomy, Faculty of Physical Sci- 17LaboratoireLeprince-Ringuet,ÉcolePolytechnique,CNRS,In-
-ences,UniversityofNigeria,CarverBuilding,1UniversityRoad, stitutPolytechniquedeParis,F-91128Palaiseau,France
-Nsukka410001,Nigeria 18University of Namibia, Department of Physics, Private Bag
-3JodrellBankCentreforAstrophysics,DepartmentofPhysicsand
+16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille,
+France
+17LaboratoireLeprince-Ringuet,ÉcolePolytechnique,CNRS,In-
+stitutPolytechniquedeParis,F-91128Palaiseau,France
+18University of Namibia, Department of Physics, Private Bag
 13301,Windhoek10005,Namibia
-Astronomy,UniversityofManchester,ManchesterM139PL,UK 19InstytutFizyki Ja¸drowejPAN, ul.Radzikowskiego 152,31-342
-4SydneyInstituteforAstronomy,SchoolofPhysics,TheUniversity
+19InstytutFizyki Ja¸drowejPAN, ul.Radzikowskiego 152,31-342
 Kraków,Poland
-ofSydney,NSW2006,Australia 20DESY,D-15738Zeuthen,Germany
-5Max-Planck-InstitutfürRadioastronomie,AufdemHügel69,D- 21SchoolofPhysics,UniversityoftheWitwatersrand,1JanSmuts
-53121Bonn,Germany Avenue,Braamfontein,Johannesburg,2050SouthAfrica
-6IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette, 22Université de Paris, CNRS, Astroparticule et Cosmologie, F-
-France 75013Paris,France
-7Department of Physics and Electronics, Rhodes University, PO 23DepartmentofPhysicsandElectricalEngineering,LinnaeusUni-
-Box94,Grahamstown6140,SouthAfrica versity,35195Växjö,Sweden
-8SouthAfricanRadioAstronomyObservatory,BlackRiverPark,2 24LaboratoireUniversetThéories,ObservatoiredeParis,Univer-
-FirStreet,Observatory,CapeTown7925,SouthAfrica sitéPSL,CNRS,UniversitédeParis,92190Meudon,France
-9Astrophysics,DepartmentofPhysics,UniversityofOxford,Keble
+20DESY,D-15738Zeuthen,Germany
+21SchoolofPhysics,UniversityoftheWitwatersrand,1JanSmuts
+Avenue,Braamfontein,Johannesburg,2050SouthAfrica
+22Université de Paris, CNRS, Astroparticule et Cosmologie, F-
+75013Paris,France
+23DepartmentofPhysicsandElectricalEngineering,LinnaeusUni-
+versity,35195Växjö,Sweden
+24LaboratoireUniversetThéories,ObservatoiredeParis,Univer-
+sitéPSL,CNRS,UniversitédeParis,92190Meudon,France
 MNRAS000,1–15(2021)
 MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 15
 25Sorbonne Université, Université Paris Diderot, Sorbonne Paris
diff --git a/read/results/pdfplumber/2201.00151.txt b/read/results/pdfplumber/2201.00151.txt
index 0031559..8caed3a 100644
--- a/read/results/pdfplumber/2201.00151.txt
+++ b/read/results/pdfplumber/2201.00151.txt
@@ -1,3 +1,41 @@
+a
+r
+X
+i
+v
+:
+2
+2
+0
+1 .
+0
+0
+1
+5
+1
+v
+1
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+G
+A
+]
+1
+J
+a
+n
+2
+0
+2
+2
 Astronomy&Astrophysicsmanuscriptno.Populations4 ©ESO2022
 January4,2022
 Multiple stellar populations in Schwarzschild modeling
@@ -6,860 +44,1633 @@ KlaudiaKowalczykandEwaL.Łokas
 NicolausCopernicusAstronomicalCenter,PolishAcademyofSciences,Bartycka18,00-716Warsaw,Poland
 e-mail:klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl
 January4,2022
-2202
 ABSTRACT
 Dwarfspheroidal(dSph)galaxiesarebelievedtobestronglydarkmatterdominatedandthusareconsideredperfectobjectstostudy
-darkmatterdistributionandtesttheoriesof structureformation. Theypossessresolved, multiplestellarpopulations thatoffer new naJ
+darkmatterdistributionandtesttheoriesof structureformation. Theypossessresolved, multiplestellarpopulations thatoffer new
 possibilitiesfor modeling. A promising tool for the dynamical modeling of these objects isthe Schwarzschild orbit superposition
 method.Inthisworkweextendourpreviousimplementationoftheschemetoincludemorethanonepopulationofstarsandamore
 generalformofthemass-to-lightratiofunction.Wetestedtheimprovedapproachonanearlyspherical,gas-freegalaxyformedin
-1
 thecosmologicalcontextfromtheIllustrissimulation.Wemodeledthebinnedvelocitymomentsforstarssplitintotwopopulations
 bymetallicityanddemonstratethatinspiteoflargersamplingerrorstheincreasednumberofconstraintsleadstosignificantlytighter
-]AG.hp-ortsa[
 confidenceregionsontherecovereddensityandvelocityanisotropyprofiles.WethenappliedthemethodtotheFornaxdSphgalaxy
 withstarssimilarlydividedintotwopopulations.Incomparisonwithourearlierwork,wefindtheanisotropyparametertobeslightly
 increasing, rather thandecreasing, withradiusand morestrongly constrained. Wearealsoabletoinferanisotropy for eachstellar
 populationseparatelyandfindthemtobesignificantlydifferent.
 Keywords. galaxies:kinematicsanddynamics–galaxies:structure–galaxies:fundamentalparameters–galaxies:dwarf–galaxies:
 starclusters:individual:Fornax
-1. Introduction momentsreliably and some assumption on the functionalform
+1. Introduction
+Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo
+1998;Tolstoyetal. 2009)are consideredto bea perfecttoolto
+test our current theories of structure formation involving dark
+matter in the context of near-field cosmology. The objects are
+believedtobestronglydarkmatterdominatedwithmass-to-light
+ratiosevenontheorderofafewhundredsolarunits.Duetotheir
+proximitytheyarealsotheonlyextragalacticsystemswherein-
+dividualstars canberesolvedandtheirvelocitiesmeasuredof-
+fering the possibility to create interesting dynamical modeling
+techniques.
+The first estimates of dark matter content in dSph galaxies
+werebasedonasinglemeasurementoftheline-of-sightvelocity
+dispersionofthestarsandtheapplicationofthevirialtheorem.
+Asthesamplesofthestarswithkinematicmeasurementsgrew,
+itbecamepossibletoestimatetheprofileofthevelocitydisper-
+sionandmodelitusingtheJeansequation(Binney&Tremaine
+2008). Since the stars in the galaxy can move on a variety
+of orbits, from circular to radial, the degeneracy between the
+anisotropyof the orbitsandthe mass distributionis inherentin
+this type of modeling. The reason for this lies in the fact that
+differentcombinationsofthesequantitiescanreproducetheve-
+locitydispersionprofileequallywell.
+Awaytoovercomethisissue,atleastpartially,istoresortto
+higher orderline-of-sightvelocity moments,such as the kurto-
+sis,andusethecorrespondingJeansequations.Sincethekurto-
+sisismoresensitivetothevelocityanisotropythantothemass
+distribution,usefulconstraintscanbeobtainedonboth.Still,the
+methodrequireslargekinematicsamplestoestimatethevelocity
+momentsreliably and some assumption on the functionalform
 oftheanisotropy(Łokas2002;Łokasetal.2005).
-Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo The Schwarzschild modeling technique (Schwarzschild
-1v15100.1022:viXra
-1998;Tolstoyetal. 2009)are consideredto bea perfecttoolto 1979) offers a different approach to estimate the properties of
-test our current theories of structure formation involving dark dSph galaxies without prior assumptions on the type of orbits.
-matter in the context of near-field cosmology. The objects are It relies on building a galaxy model out of a set of best-fitting
-believedtobestronglydarkmatterdominatedwithmass-to-light orbits probed in the range of energy and angular momenta. In
-ratiosevenontheorderofafewhundredsolarunits.Duetotheir this method, the anisotropy of the stellar orbits comes out as a
-proximitytheyarealsotheonlyextragalacticsystemswherein- resultofthemodelinginthesamewayasthedensityprofile.Al-
-dividualstars canberesolvedandtheirvelocitiesmeasuredof- thoughithasbeenoriginallydevelopedforlargeellipticalgalax-
-fering the possibility to create interesting dynamical modeling ies(vanderMareletal.1998;Vallurietal.2004;Gebhardtetal.
-techniques. 2015), it has recently been adopted for use on discrete data
+The Schwarzschild modeling technique (Schwarzschild
+1979) offers a different approach to estimate the properties of
+dSph galaxies without prior assumptions on the type of orbits.
+It relies on building a galaxy model out of a set of best-fitting
+orbits probed in the range of energy and angular momenta. In
+this method, the anisotropy of the stellar orbits comes out as a
+resultofthemodelinginthesamewayasthedensityprofile.Al-
+thoughithasbeenoriginallydevelopedforlargeellipticalgalax-
+ies(vanderMareletal.1998;Vallurietal.2004;Gebhardtetal.
+2015), it has recently been adopted for use on discrete data
 characteristic of dSph galaxies and applied to a number of
-The first estimates of dark matter content in dSph galaxies
 dwarfs,includingCarina,Draco,Fornax,Sculptor,andSextans
-werebasedonasinglemeasurementoftheline-of-sightvelocity
 (Jardel&Gebhardt 2008;Jardeletal.2013;Breddels&Helmi
-dispersionofthestarsandtheapplicationofthevirialtheorem.
 2013;Breddelsetal.2013;Kowalczyketal.2019).
-Asthesamplesofthestarswithkinematicmeasurementsgrew,
 ManydSphgalaxiesshowsignsofthepresenceofmultiple
-itbecamepossibletoestimatetheprofileofthevelocitydisper-
 stellarpopulationsresultingfromafewstarformationepisodes
-sionandmodelitusingtheJeansequation(Binney&Tremaine
 (Bellazzinietal.2001;delPinoetal.2015;Fabrizioetal.2016;
-2008). Since the stars in the galaxy can move on a variety
 Paceetal. 2020). This observationoffers a way to improvethe
-of orbits, from circular to radial, the degeneracy between the
 modeling methods since, assuming dynamical equilibrium, all
-anisotropyof the orbitsandthe mass distributionis inherentin
 populations are supposed to be influenced by the same under-
-this type of modeling. The reason for this lies in the fact that
 lying gravitational potential of the galaxy, but they have dif-
-differentcombinationsofthesequantitiescanreproducetheve-
 ferent distributions so more constraints can be imposed during
-locitydispersionprofileequallywell.
 the modeling. This approach was first used by Battagliaetal.
-Awaytoovercomethisissue,atleastpartially,istoresortto (2008) to model the mass distribution in the Sculptor dSph
-higher orderline-of-sightvelocity moments,such as the kurto- galaxy. A few attempts have also been made to constrain the
-sis,andusethecorrespondingJeansequations.Sincethekurto- inner slope of the dark matter profile in dSph galaxies using
-sisismoresensitivetothevelocityanisotropythantothemass thistechnique(Walker&Peñarrubia 2011;Amorisco&Evans
-distribution,usefulconstraintscanbeobtainedonboth.Still,the 2012;Hayashietal.2018)inordertoresolvetheso-calledcusp-
-methodrequireslargekinematicsamplestoestimatethevelocity core problem. It has been shown to be difficult, however, due
+(2008) to model the mass distribution in the Sculptor dSph
+galaxy. A few attempts have also been made to constrain the
+inner slope of the dark matter profile in dSph galaxies using
+thistechnique(Walker&Peñarrubia 2011;Amorisco&Evans
+2012;Hayashietal.2018)inordertoresolvetheso-calledcusp-
+core problem. It has been shown to be difficult, however, due
 Articlenumber,page1of12
 A&Aproofs:manuscriptno.Populations4
 Table1.PropertiesoftheIllustrisgalaxyusedtocreatemockdata.
-Property Value 16
+Property Value
 SubhaloID 16960
-]
-1-
-Numberofstellarparticles(N ⋆) 70446 12 ry
-Numberofdarkmatterparticles(N ) 78448
-DM ⊙
-Stellarmass(M ⋆) 5.74×1010M⊙ M[
-8
-Darkmattermass(M DM) 4.91×1011M⊙
-RFS
+Numberofstellarparticles(N ⋆ ) 70446
+Numberofdarkmatterparticles(N
+DM
+) 78448
+Stellarmass(M ⋆ ) 5.74×1010M⊙
+Darkmattermass(M DM ) 4.91×1011M⊙
 Meanmassofstellarparticles 815808M⊙
-Stellarhalf-massradius 9.99kpc 4
-Stellarhalf-numberradius(r ) 9.6kpc
+Stellarhalf-massradius 9.99kpc
+Stellarhalf-numberradius(r
 1/2
-Axisratioc/awithinr 0.907
-1/2 0
-Axisratiob/awithinr 0.949
+) 9.6kpc
+Axisratioc/awithinr
 1/2
-0 2 4 6 8 10 12
+0.907
+Axisratiob/awithinr
+1/2
+0.949
 Triaxiality 0.56
-t [Gyr]
-tothenonsphericityofthedwarfsthatintroducesbiasesinsuch Fig.1. StarformationrateasafunctionoftheageoftheUniversein
-measurements(Kowalczyketal.2013;Geninaetal.2018). thesimulatedgalaxyfromtheIllustrisprojectusedtocreatemockdata.
-Inourrecentpapers(Kowalczyketal.2017,2018,2019)we Theblackandgrayverticalarrowsindicatethelastmergerswhichthe
-developedtheSchwarzschildtechniqueintheformapplicableto galaxyunderwent,wetanddry,respectively.
+tothenonsphericityofthedwarfsthatintroducesbiasesinsuch
+measurements(Kowalczyketal.2013;Geninaetal.2018).
+Inourrecentpapers(Kowalczyketal.2017,2018,2019)we
+developedtheSchwarzschildtechniqueintheformapplicableto
 binnedvelocitymomentsofasingletracerandverifieditsabil-
 itytoreproducethemassdistributionandvelocityanisotropyof
-10 6
 simulated galaxies. We have also studied biases resulting from
 the nonsphericityof themodeledobjects.Later,we appliedthe
-8
 methodtomodelthekinematicsoftheFornaxdSphgalaxyesti-
-matingitsmassandanisotropyprofileswithunprecedentedpre- 4
-cision. 6 ]ryG[ 2 ]
-01[
+matingitsmassandanisotropyprofileswithunprecedentedpre-
+cision.
 In this paper we extend our Schwarzschild modeling tech-
-nique to include multiple stellar populations with the aim to 4 N
-constrain the properties of dSph galaxies even more strongly. 2 t
+nique to include multiple stellar populations with the aim to
+constrain the properties of dSph galaxies even more strongly.
 We test our approachon a realistic simulated galaxyformedin
-2
 the cosmological context, originating from the Illustris project
 (Vogelsbergeretal. 2014a). Although no precise analogues of
-0 0
 dSphgalaxiesareavailableinthissimulationbecauseoftheres-
-olution,we use a moremassive galaxybutwith propertiesoth- 0 1 2 3 4 5
-erwisesimilartodSphs.Thereliabilityofthemodelingdoesnot Z [Z ]
-⊙
+olution,we use a moremassive galaxybutwith propertiesoth-
+erwisesimilartodSphs.Thereliabilityofthemodelingdoesnot
 dependon the particularvalue of the mass so we believethese
-teststobeviable.Wedonotattempttoconstraintheinnerdark Fig. 2. Number of stars as a function of their metallicity and time of
-matterdensityprofile(whichispoorlyresolvedanyway)buttry formation(theageoftheUniverse)inthesimulatedgalaxy.Thevertical
-toputtighterlimitsontheestimatesofthemassandanisotropy lineindicatestheappliedsplitintostellarpopulations.
+teststobeviable.Wedonotattempttoconstraintheinnerdark
+matterdensityprofile(whichispoorlyresolvedanyway)buttry
+toputtighterlimitsontheestimatesofthemassandanisotropy
 profiles.Finally,weapplytheimprovedmethodtotheavailable
 kinematicdataforthedistinctstellar populationsoftheFornax
-dSph. magnetic fields, and the feedback from black holes. Although
-Thispaperis organizedasfollows. InSection 2 we present dwarfgalaxiesthatareofourinterestherearenotresolvedinthe
-the data for the simulated galaxy as well as their splitting into suite,thiscanbeeasilyovercomewiththeappropriatechoiceof
-stellarpopulationsandmockobservationsalongthemainaxes. theobjectandthetreatmentofdata.
-Section3containsanoverviewofourmodelingmethod,theap- As the key properties of dSph galaxy equivalents we iden-
-plicationof the methodto allstars andto two populations,and tified: the lack of gas, the lack of a black hole, a low spin,
-acomparisonoftheresultsobtainedwiththesetwoapproaches. the stellar mass much smaller than the dark matter mass and a
-TheresultsoftheapplicationofthemethodtotheFornaxdSph nearlysphericalshape.Thelastconditionwasadoptedinanat-
-galaxyare presentedin Section 4. We discuss our findingsand tempttoavoidanystrongbiasintroducedbythesphericalmod-
-summarizethepaperinSection5. elingofanonsphericalobject.Moreover,werequiredthegalaxy
+dSph.
+Thispaperis organizedasfollows. InSection 2 we present
+the data for the simulated galaxy as well as their splitting into
+stellarpopulationsandmockobservationsalongthemainaxes.
+Section3containsanoverviewofourmodelingmethod,theap-
+plicationof the methodto allstars andto two populations,and
+acomparisonoftheresultsobtainedwiththesetwoapproaches.
+TheresultsoftheapplicationofthemethodtotheFornaxdSph
+galaxyare presentedin Section 4. We discuss our findingsand
+summarizethepaperinSection5.
+2. Mockdata
+2.1.Selectionofthesimulatedgalaxy
+In order to test our modeling method on realistic simulated
+data, we decided to use a galaxy from the Illustris project
+(Vogelsbergeretal. 2014a,b; Geneletal. 2014; Nelsonetal.
+2015),namelytheIllustris-1cosmologicalsimulation.Thissim-
+ulationfollowstheformationandevolutionofgalaxiesfromthe
+early Universe to the present by solving gravity and hydrody-
+namics, as well as modeling of star formation, galactic winds,
+S
+F
+R
+[
+M
+⊙
+y
+r -
+1
+]
+t [Gyr]
+0
+4
+8
+12
+16
+0 2 4 6 8 10 12
+Fig.1. StarformationrateasafunctionoftheageoftheUniversein
+thesimulatedgalaxyfromtheIllustrisprojectusedtocreatemockdata.
+Theblackandgrayverticalarrowsindicatethelastmergerswhichthe
+galaxyunderwent,wetanddry,respectively.
+t
+[
+G y
+r ]
+Z [Z
+⊙
+]
+0
+2
+4
+6
+8
+10
+0 1 2 3 4 5
+0
+2
+4
+6
+N [
+1 0
+2 ]
+Fig. 2. Number of stars as a function of their metallicity and time of
+formation(theageoftheUniverse)inthesimulatedgalaxy.Thevertical
+lineindicatestheappliedsplitintostellarpopulations.
+magnetic fields, and the feedback from black holes. Although
+dwarfgalaxiesthatareofourinterestherearenotresolvedinthe
+suite,thiscanbeeasilyovercomewiththeappropriatechoiceof
+theobjectandthetreatmentofdata.
+As the key properties of dSph galaxy equivalents we iden-
+tified: the lack of gas, the lack of a black hole, a low spin,
+the stellar mass much smaller than the dark matter mass and a
+nearlysphericalshape.Thelastconditionwasadoptedinanat-
+tempttoavoidanystrongbiasintroducedbythesphericalmod-
+elingofanonsphericalobject.Moreover,werequiredthegalaxy
 to possess a significant number of both stellar and dark mat-
 ter particles (over 105), and a well resolved center. Due to the
-2. Mockdata largesofteningscale for darkmatter particlesin the simulation
-(ǫ = 1.42kpc), we looked for an object in which even the
-2.1.Selectionofthesimulatedgalaxy DM
+largesofteningscale for darkmatter particlesin the simulation
+(ǫ
+DM
+= 1.42kpc), we looked for an object in which even the
 moreconcentratedstellarpopulation(seeSection2.2)extended
-In order to test our modeling method on realistic simulated over43kpcsothattheregionaffectedbythenumericalartifacts
-data, we decided to use a galaxy from the Illustris project wasenclosedwithin2-3innermostdatabins(weused20linearly
-(Vogelsbergeretal. 2014a,b; Geneletal. 2014; Nelsonetal. spacedspatialbins,seeSection3.1).
-2015),namelytheIllustris-1cosmologicalsimulation.Thissim- Out of 27345 galaxies listed in the catalog of stellar circu-
-ulationfollowstheformationandevolutionofgalaxiesfromthe larities,angularmomenta,andaxisratiospublishedbytheIllus-
-early Universe to the present by solving gravity and hydrody- tristeam(Geneletal.2015)containingsubhaloswiththestellar
-namics, as well as modeling of star formation, galactic winds, masslargerthan109M⊙,onlyafewmetourrestrictiverequire-
+over43kpcsothattheregionaffectedbythenumericalartifacts
+wasenclosedwithin2-3innermostdatabins(weused20linearly
+spacedspatialbins,seeSection3.1).
+Out of 27345 galaxies listed in the catalog of stellar circu-
+larities,angularmomenta,andaxisratiospublishedbytheIllus-
+tristeam(Geneletal.2015)containingsubhaloswiththestellar
+masslargerthan109M⊙,onlyafewmetourrestrictiverequire-
 Articlenumber,page2of12
 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling
-PPPOOOPPPUUULLLAAATTTIIIOOONNN III PPPPPPPPPPPPPPPOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPUUUUUUUUUUUUUUULLLLLLLLLLLLLLLAAAAAAAAAAAAAAATTTTTTTTTTTTTTTIIIIIIIIIIIIIIIOOOOOOOOOOOOOOONNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIII
-major intermediate minor major intermediate minor
-80 80
-7.7 7.7
-40 7.1 40 7.1 ]2cpk/⊙M[ ]2cpk/⊙M[
-6.5 6.5 ]cpk[ ]cpk[
-0 0
-5.9 5.9 )Σ(gol )Σ(gol
--40 -40
-5.3 5.3
--80 160 -80 160
-40 80 40 80
-]s/mk[ ]s/mk[
-]cpk[ ]cpk[
-0 0 0 0
-V V
--40 -80 -40 -80
--80 -160 -80 -160
-40 90 40 90
-]s/mk[ ]s/mk[
-]cpk[ ]cpk[
-0 60 0 60
-σ σ
--40 30 -40 30
--80 0 -80 0
--80 -40 0 40 -80 -40 0 40 -80 -40 0 40 80 -80 -40 0 40 -80 -40 0 40 -80 -40 0 40 80
-[kpc] [kpc] [kpc] [kpc] [kpc] [kpc]
+-80
+-40
+0
+40
+80
+POPULATION I
+[kpc]
+major
+POPULATION I
+intermediate
+POPULATION I
+minor
+5.3
+5.9
+6.5
+7.1
+7.7
+log(
+Σ)
+[
+M
+⊙
+/kpc
+2]
+-80
+-40
+0
+40
+POPULATION II
+[kpc]
+POPULATION II POPULATION II
+-160
+-80
+0
+80
+160
+V
+[k
+m/s]
+-80
+-40
+0
+40
+-80 -40 0 40
+POPULATION II
+[kpc]
+[kpc]
+-80 -40 0 40
+POPULATION II
+[kpc]
+-80 -40 0 40 80
+POPULATION II
+[kpc]
+0
+30
+60
+90
+σ
+[k
+m/s]
+-80
+-40
+0
+40
+80
+POPULATION II
+[kpc]
+major
+POPULATION II
+intermediate
+POPULATION II
+minor
+5.3
+5.9
+6.5
+7.1
+7.7
+log(
+Σ)
+[
+M
+⊙
+/kpc
+2]
+-80
+-40
+0
+40
+POPULATION II
+[kpc]
+POPULATION II POPULATION II
+-160
+-80
+0
+80
+160
+V
+[k
+m/s]
+-80
+-40
+0
+40
+-80 -40 0 40
+POPULATION II
+[kpc]
+[kpc]
+-80 -40 0 40
+POPULATION II
+[kpc]
+-80 -40 0 40 80
+POPULATION II
+[kpc]
+0
+30
+60
+90
+σ
+[k
+m/s]
 Fig.3.Mapsoftheprojectedstellardensity,meanstellarvelocity,andstellarvelocitydispersion(inrows)fortwostellarpopulations:themetal-
 richpopulationI(left-handsidepanels)andthemetal-poorpopulationII(right-handside),andobservationsalongtheprincipalaxesdetermined
 forallstars(incolumns,alongthemajor,theintermediate,andtheminoraxis,respectively).
-1 120 120
-0.5 100 100
-)r(β )r(σ )r(σ
-0 80 80
-r t
--0.5 60 60
--1 40 40
-1 10 100 1 10 100 1 10 100
-r [kpc] r [kpc] r [kpc]
-1 120 120
-0.5 100 100
-)r(β )r(σ )r(σ
-0 80 80
-all stars r t
+-1
+-0.5
+0
+0.5
+1
+1 10 100
+β ( r )
+r [kpc]
+-1
+-0.5
+0
+0.5
+1
+0 10 20 30 40 50
+β ( r )
+r [kpc]
+all stars
 pop I
--0.5 60 60
 pop II
--1 40 40
-0 10 20 30 40 50 0 10 20 30 40 50 0 10 20 30 40 50
-r [kpc] r [kpc] r [kpc]
+40
+60
+80
+100
+120
+1 10 100
+σr ( r )
+r [kpc]
+40
+60
+80
+100
+120
+0 10 20 30 40 50
+σr ( r )
+r [kpc]
+40
+60
+80
+100
+120
+1 10 100
+σt ( r )
+r [kpc]
+40
+60
+80
+100
+120
+0 10 20 30 40 50
+σt ( r )
+r [kpc]
 Fig.4.Profilesofthevelocityanisotropyparameter,radialvelocitydispersion,andtangentialvelocitydispersion(inconsecutivecolumns)calcu-
 latedfromallstars(inred),includingonlypopulationI(inorange),andonlypopulationII(inblue).Theupperrowshowstheprofilesusingthe
 logarithmicdistancescaleandreachingtheoutskirtsofthegalaxywhereasthebottomrowpresentsinthelinearscaleonlytheradialrangeused
 inthemodeling.
-ments. We decided to use a galaxy labeled as subhalo 16960. forfurthercalculationsinthispaper.Thedifferencebetweenthe
-All the relevant properties of the galaxy are given in Table1, twocomesfromasmallgradientinthestellarmass-to-lightratio
-includingnumbersofparticlesandtotalmassesforbothcompo- withthedistancefromthegalacticcenter.Sinceinourapproach
-nents,anddetailsontheshapeofthestellarcomponent:theaxis we treat stars as equal-massparticles and refer to numberden-
-ratios minor to major (shortest to longest) c/a, intermediate to sities (multiplied by the mean mass of a stellar particle when
-majorb/a,andthetriaxialityparameterT =(a2−b2)/(a2−c2). needed),theapplicationofthehalf-numberradiusismoreself-
-Wedistinguishbetweenthehalf-massradiusprovidedintheIl- consistent.
-lustris database and the half-numberradius r , which we use
+ments. We decided to use a galaxy labeled as subhalo 16960.
+All the relevant properties of the galaxy are given in Table1,
+includingnumbersofparticlesandtotalmassesforbothcompo-
+nents,anddetailsontheshapeofthestellarcomponent:theaxis
+ratios minor to major (shortest to longest) c/a, intermediate to
+majorb/a,andthetriaxialityparameterT =(a2−b2)/(a2−c2).
+Wedistinguishbetweenthehalf-massradiusprovidedintheIl-
+lustris database and the half-numberradius r
 1/2
+, which we use
+forfurthercalculationsinthispaper.Thedifferencebetweenthe
+twocomesfromasmallgradientinthestellarmass-to-lightratio
+withthedistancefromthegalacticcenter.Sinceinourapproach
+we treat stars as equal-massparticles and refer to numberden-
+sities (multiplied by the mean mass of a stellar particle when
+needed),theapplicationofthehalf-numberradiusismoreself-
+consistent.
 Articlenumber,page3of12
 A&Aproofs:manuscriptno.Populations4
-major intermediate minor
-3
 10
+-3
+10 -1
+10 1
+10
+3
+10 100
+n
+⋆ (
+R
+)
+[
+k
+p
+c - 2
 ]
-1 2-
-10 cpk[
-)R(⋆n
--1
-10 all stars
+R [kpc]
+major
+10 100
+R [kpc]
+intermediate
+10 100
+R [kpc]
+minor
+all stars
 pop I
 pop II
--3
-10
-10 100 10 100 10 100
-R [kpc] R [kpc] R [kpc]
 Fig.5.Surfacenumberdensityprofilesofthestellardatasamplesforthesimulatedgalaxyobservedalongdifferentlinesofsight(fromtheleftto
 theright).Differentlinesshowprofilesforallavailablestars(inred),themetal-richpopulationI(inorange),andthemetal-poorpopulationII(in
-blue).Thinverticallinesindicater (seetext)andtheouterboundaryofthespectroscopicdata.
+blue).Thinverticallinesindicater
 0
-2.2.Splittingthestarsintopopulations Fig.4.Throughoutthepaperweusered,orange,andbluecolors
-to indicate values calculated or recoveredfor all stars, popula-
-Our chosen galaxy shows a complex formation history under- tionI,andpopulationII,respectively.Thetworowsofthefigure
-goingmultiplemergerswhichresultinextendedstar formation showthebehavioroftheparametersatdifferentscales.Thetop
-with a few star formation bursts. The last wet merger,that is a row plots the profiles with the distance from the center of the
-merger with an object containinggas, happensat 6.9Gyr from galaxyinthelogarithmicscaleandshowsthedropofanisotropy
-thebeginningofthesimulation,whereasthelastdrymerger(no attheouteredgesoftheobject.Thebottomrowusesthelinear
-gastransfer)at12.1Gyr,givingthegalaxyenoughtimetoregain distancescaleandfocusesonthemainbodyofthegalaxy.
+(seetext)andtheouterboundaryofthespectroscopicdata.
+2.2.Splittingthestarsintopopulations
+Our chosen galaxy shows a complex formation history under-
+goingmultiplemergerswhichresultinextendedstar formation
+with a few star formation bursts. The last wet merger,that is a
+merger with an object containinggas, happensat 6.9Gyr from
+thebeginningofthesimulation,whereasthelastdrymerger(no
+gastransfer)at12.1Gyr,givingthegalaxyenoughtimetoregain
 dynamicalequilibrium.Wepresentthestarformationrate(SFR)
-Figure 5 shows the surface number density profiles of the
-as a functionof time (the age of the Universe)in Fig.1, where starsas measuredin differentdirections.We can see thatwhile
-theselastmergersareindicatedwithblackandgrayverticalar- thedifferentsubsampleshavequitedistinguishableprofiles,the
-rows.InFig.2weshowthedistributionofstarsasafunctionof differencebetweenthelinesofsightissmallbecausethegalaxy
+as a functionof time (the age of the Universe)in Fig.1, where
+theselastmergersareindicatedwithblackandgrayverticalar-
+rows.InFig.2weshowthedistributionofstarsasafunctionof
 theirmetallicity(insolarunits)andthetimeofformation.Inor-
-isclosetospherical.
 dertodividethestellarsampleintotwopopulationswecutitin
 halfbasedonthemetallicityindexofeachstellarparticle.This
-splitisindicatedinFig.2withtheverticalline.Withsatisfying 2.3.Observables
+splitisindicatedinFig.2withtheverticalline.Withsatisfying
 accuracyitseparatesthestarsbornbeforeandafter4Gyrsince
-the start of the simulation, which correspondsto the formation We generatedninesetsofmockdatabyobservingallstarsand
-time beforeand afterthe endof thesecondmajorstar burst, as each populationseparatelyalongthe principalaxesdetermined
-showninFig.1.Werefertothemetal-richstarsaspopulationI fromallstars.Fortheobservablestobeusedinthemodelingwe
-andtothemetal-pooraspopulationII,followingthecommonly divided the stars into 20 bins spaced linearly in distance from
-usednomenclatureinastronomy. the center of the galaxy up to 50kpc, measuring the fraction
-of the total number of stars and the 2nd, 3rd, and 4th proper
+the start of the simulation, which correspondsto the formation
+time beforeand afterthe endof thesecondmajorstar burst, as
+showninFig.1.Werefertothemetal-richstarsaspopulationI
+andtothemetal-pooraspopulationII,followingthecommonly
+usednomenclatureinastronomy.
 InFig.3wepresentmapsoftheprojectedstellarmassden-
-moments of the line-of-sight velocity defined in Eq.8 and 9
 sity, line-of-sightvelocity, and line-of-sightvelocity dispersion
-of Kowalczyketal. (2018). The profiles of these quantities are
 forbothpopulationsobtainedbyprojectingthegalaxyalongits
-showninconsecutiverowsinFig.6.Columnscorrespondtodif-
 principal axes. The orientation was determined from the iner-
-ferentlinesof sight, fromthe leftto the right:alongthe major,
 tiatensorcalculatedfromallstarswithinthehalf-numberradius
-intermediate,andminoraxisofthegalaxy.Forclarityofthefig-
-r and therefore is the same in both panels. The two popula-
-1/2 ure,ineachpanelweindicateonlytheerrorbarsforoneofthe
+r
+1/2
+and therefore is the same in both panels. The two popula-
 tionsdiffersignificantlyinthespatialdistributionandkinemat-
-datasets. However,as the numberof starsin a sample remains
 ics with the metal-rich(consideredto be younger)populationI
-roughly constant between the lines of sight, the error bars are
 being more concentratedbut having lower centralvelocity dis-
-verysimilaramongthepanelsinagivenrow.
 persion. Both populationsshow a weak rotation signal at large
-Although in our previous studies of the reliability of
 distancesfromthecenter.
-the Schwarzschild modeling and its applications to real data
-The velocity anisotropy parameter β(r) = 1 − (σ2 +
-θ (Kowalczyketal. 2017,2018, 2019) weapproximatedtheden-
-σ2 φ)/(2σ2 r),whereσ iarevelocitydispersionsinsphericalcoordi- sityprofileofthetracerwiththeSérsicformula,wefoundthatit
-nates(Binney&Tremaine2008),describestheorbitalstructure doesnotprovideagoodapproximationofthedataforthesimu-
-ofgalaxies.Itisoneofthemostimportantdynamicalproperties latedgalaxyconsideredhere.Wethereforefittheprojectedden-
-of bound systems which cannot be inferred directly from ob- sityprofilewiththeKingformula(King1962)
+The velocity anisotropy parameter β(r) = 1 − (σ2
+θ
++
+σ2 φ )/(2σ2 r ),whereσ i arevelocitydispersionsinsphericalcoordi-
+nates(Binney&Tremaine2008),describestheorbitalstructure
+ofgalaxies.Itisoneofthemostimportantdynamicalproperties
+of bound systems which cannot be inferred directly from ob-
 servationsandhastoberecoveredbydynamicalmodeling.The
 profiles of the anisotropy parameter β as well as the radial σ
-r 2
-andtangentialσ =[(σ2+σ2)/2]1/2velocitydispersionsforour 1 1
-t θ φ I(R)=I − , (1)
-simulated galaxy are presented in the consecutive columns of 0  1+(R/R c)2 1+(R t/R c)2
-p 
-Articlenumber,page4of12 p
+r
+andtangentialσ
+t
+=[(σ2
+θ
++σ2
+φ
+)/2]1/2velocitydispersionsforour
+simulated galaxy are presented in the consecutive columns of
+Fig.4.Throughoutthepaperweusered,orange,andbluecolors
+to indicate values calculated or recoveredfor all stars, popula-
+tionI,andpopulationII,respectively.Thetworowsofthefigure
+showthebehavioroftheparametersatdifferentscales.Thetop
+row plots the profiles with the distance from the center of the
+galaxyinthelogarithmicscaleandshowsthedropofanisotropy
+attheouteredgesoftheobject.Thebottomrowusesthelinear
+distancescaleandfocusesonthemainbodyofthegalaxy.
+Figure 5 shows the surface number density profiles of the
+starsas measuredin differentdirections.We can see thatwhile
+thedifferentsubsampleshavequitedistinguishableprofiles,the
+differencebetweenthelinesofsightissmallbecausethegalaxy
+isclosetospherical.
+2.3.Observables
+We generatedninesetsofmockdatabyobservingallstarsand
+each populationseparatelyalongthe principalaxesdetermined
+fromallstars.Fortheobservablestobeusedinthemodelingwe
+divided the stars into 20 bins spaced linearly in distance from
+the center of the galaxy up to 50kpc, measuring the fraction
+of the total number of stars and the 2nd, 3rd, and 4th proper
+moments of the line-of-sight velocity defined in Eq.8 and 9
+of Kowalczyketal. (2018). The profiles of these quantities are
+showninconsecutiverowsinFig.6.Columnscorrespondtodif-
+ferentlinesof sight, fromthe leftto the right:alongthe major,
+intermediate,andminoraxisofthegalaxy.Forclarityofthefig-
+ure,ineachpanelweindicateonlytheerrorbarsforoneofthe
+datasets. However,as the numberof starsin a sample remains
+roughly constant between the lines of sight, the error bars are
+verysimilaramongthepanelsinagivenrow.
+Although in our previous studies of the reliability of
+the Schwarzschild modeling and its applications to real data
+(Kowalczyketal. 2017,2018, 2019) weapproximatedtheden-
+sityprofileofthetracerwiththeSérsicformula,wefoundthatit
+doesnotprovideagoodapproximationofthedataforthesimu-
+latedgalaxyconsideredhere.Wethereforefittheprojectedden-
+sityprofilewiththeKingformula(King1962)
+I(R)=I
+0 
+
+1
+p
+1+(R/R c )2
+−
+1
+p
+1+(R t /R c )2
+
+2
+, (1)
+Articlenumber,page4of12
 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling
-major intermediate minor
-0
 10
--1
+-3
 10
-)R(M
 -2
 10
--3
+-1
 10
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50
-R [kpc] R [kpc] R [kpc]
-12 ]2
-)1-
-9 s
-mk(3
-6
-01[)R(
+0
+0 10 20 30 40
+M(
+R)
+R [kpc]
+major
+0 10 20 30 40
+R [kpc]
+intermediate
+0 10 20 30 40 50
+R [kpc]
+minor
 3
-2
+6
+9
+12
+0 10 20 30 40
 m
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50
-R [kpc] R [kpc] R [kpc]
-10 31- ]
-)
-5 s
-mk(
-0 4
-01[)R(
+2 (
+R)[
+1
+0
+3(
+k
+m
+s -
+1)
+2]
+R [kpc]
+0 10 20 30 40
+R [kpc]
+0 10 20 30 40 50
+R [kpc]
+-10
 -5
--10 3m
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50
-R [kpc] R [kpc] R [kpc]
-4 ]4
-all stars )1-
-3 pop I s
-mk(8
-pop II
-2
-01[)R(
+0
+5
+10
+0 10 20 30 40
+m
+3 (
+R)[
+1
+0
+4(
+k
+m
+s -
+1)
+3]
+R [kpc]
+0 10 20 30 40
+R [kpc]
+0 10 20 30 40 50
+R [kpc]
+0
 1
+2
+3
 4
-0 m
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50
-R [kpc] R [kpc] R [kpc]
+0 10 20 30 40
+m
+4 (
+R)[
+1
+0
+8(
+k
+m
+s -
+1)
+4]
+R [kpc]
+0 10 20 30 40
+R [kpc]
+0 10 20 30 40 50
+R [kpc]
+all stars
+pop I
+pop II
 Fig.6.ObservablesusedinourSchwarzschildmodelingschemeofthesimulatedgalaxy.Inrows:thefractionofthetotalnumberofstars,2nd,
 3rd,and4thvelocitymoment.Incolumns:mockdatafromthesimulatedgalaxyalongthemajor,intermediate,andminoraxis.Inredwepresent
 thevaluesobtainedforallstarswhereasinorangeandbluethoseforpopulationsIandII,respectively.Forclarityofthefigure,ineachpanelwe
 indicateonlytheerrorbarsforoneofthedatasets.
-where I , R , and R are the modelparameters.The profile can 3. Schwarzschildmodeling
-0 c t
+where I
+0
+, R
+c
+, and R
+t
+are the modelparameters.The profile can
 beanalyticallydeprojectedtoobtainthe3Ddensity
+ρ(r)=
+ρ
+0
+z2
+"
+1
+z
+arccos(z)−
+p
+1−z2
+#
+, (2)
+where
+ρ
+0
+=
+I
+0
+πR
+c
+[1+(R
+t
+/R
+c
+)2]3/2
+(3)
+and
+z=
+s
+r2+R2 c
+R2 c +R2 t
+. (4)
+3. Schwarzschildmodeling
 In this section we briefly present our modeling method and its
 applicationtothedatasetsderivedforallstarsandthetwopop-
-ρ 1
-ρ(r)= 0 arccos(z)− 1−z2 , (2) ulations of the simulated galaxy separately. In both cases our
-z2 z aimwastorecovertheprofilesofthetotalmassandthevelocity
-" #
-p anisotropy.
-where
+ulations of the simulated galaxy separately. In both cases our
+aimwastorecovertheprofilesofthetotalmassandthevelocity
+anisotropy.
 3.1.Overviewofthemethod
-I
-ρ = 0 (3)
-0 πR [1+(R/R )2]3/2 We followthe approachintroducedin Kowalczyketal. (2018),
-c t c
+We followthe approachintroducedin Kowalczyketal. (2018),
 namely we model the total mass profile with the mass-to-light
-and ratioΥvaryingwithradius:
-z= r2+R2 c . (4) logΥ(r)= log(Υ 0) r≤r 0 (5)
-sR2 +R2 a(logr−logr 0)c+log(Υ 0) r>r
-c t ( 0
+ratioΥvaryingwithradius:
+logΥ(r)=
+(
+log(Υ 0 ) r≤r 0
+a(logr−logr 0 )c+log(Υ 0 ) r>r 0
+(5)
 Articlenumber,page5of12
 A&Aproofs:manuscriptno.Populations4
-ALL POPULATIONS
-3 3
+1
+2
+3
+0
+0.5
+1
+1
+2
+3
+ALL
+Υ
+0
+a
+c
+1
+2
+3
+0
+0.5
+1
+1
+2
+3
+POPULATIONS
+Υ
+0
+a
+c
+10
 100
-c 2 c 2
-2χ
-1 1
-1 3 1 3
-2 2
-a 0.5 a 0.5 10
-Υ Υ
-1 0 1 0
-0 0
-POP I POP II
-3 3
+χ
+2
+1
+2
+3
+0
+0.5
+1
+1
+2
+3
+POP I
+Υ
+0
+a
+c
+1
+2
+3
+0
+0.5
+1
+1
+2
+3
+POP II
+Υ
+0
+a
+c
+10
 100
-c 2 c 2
-2χ
-1 1
-1 3 1 3
-2 2
-a 0.5 a 0.5 10
-Υ Υ
-1 0 1 0
-0 0
+χ
+2
 Fig.7.Absolutevaluesofχ2obtainedfromthefitsofthreedatasets:allstars(topleftpanel),populationI(bottomleft),andpopulationII(bottom
 right)fortheobservationsalongthemajoraxisofthesimulatedgalaxy.Theresultsforthemodelingoftwopopulations(topright)wereobtained
-asanalgebraicsumofvaluesforpopulationsIandII.Toavoidlargenumbersinthefigure,Υ wasdividedbythemeanmassofastellarparticle.
+asanalgebraicsumofvaluesforpopulationsIandII.Toavoidlargenumbersinthefigure,Υ
 0
-where r is the distance from the center of the galaxy, r is a (∼ a4c4Υ4)thatwerefurtherusedto determinetheglobalmin-
-0 0
-constant,while Υ , a,andc are the parametersof a model.We imums(identifiedas the best-fitting models)and 1, 2, 3σ con-
+wasdividedbythemeanmassofastellarparticle.
+where r is the distance from the center of the galaxy, r
 0
-haveassumedlogr =0.33whichcorrespondstothreesoftening fidence levels which for three parameterscorrespondto ∆χ2 =
+is a
+constant,while Υ
 0
-scalesforstellarparticlesintheIllustrissimulation. 3.53, 8.02, 14.2(Pressetal.1992).
+, a,andc are the parametersof a model.We
+haveassumedlogr
+0
+=0.33whichcorrespondstothreesoftening
+scalesforstellarparticlesintheIllustrissimulation.
 Weprobedtheparametera∈[0:1.3]withastep∆a=0.04
 andc ∈ [1.1 : 2.9]witha step∆c = 0.2,imposingtherequire-
-3.2.Applicationtomockdata
 mentonthetotaldensityprofiletobemonotonicallydecreasing
-withradius.Foreachsetofparametersandforeachlineofsight In the following we present the direct and inferred results of
-wegenerated1200orbitsusing100valuesofenergy(expressed the Schwarzschild modeling of the data sets described in Sec-
-with the radius of a circular orbit) spaced logarithmically and tion2.3.
-12valuesoftherelativeangularmomentumspacedlinearly.The First, Fig.7showsthe distributionof theabsolutevaluesof
-outerradiusoftheorbitlibrary,thatistheapocenterofthemost theχ2 asafunctionofthreeparametersofthemass-to-lightra-
-extendedorbit,wasset tor out = 165kpcinordertocoverover tio. In order to avoid unnecessary repetitions, we include only
-0.999 of the total stellar mass based on the fitted King profile the plot for the mock data obtained by observing the Illustris
-parameters. galaxyalongitsmajoraxisastheothersarequalitativelysimilar.
-Wefitthekinematicsweightedwiththefractionofmasswith Thefourpanelsrefertofitsforallstars(topleft),themetal-rich
-the constrained least squares algorithm where different values populationI(bottomleft),themetal-poorpopulationII(bottom
-of Υ were obtainedwith a simple transformationof velocities right),andtheonenamed"populations"(topright)whichisthe
-0
-given by Eq.12, 13, and 15 in Kowalczyketal. (2018). In or- algebraicsumofvaluesforbothpopulations.
-dertosmoothoutthenumericalartifacts,thethree-dimensional Asourparametrizationofthemass-to-lightratioisnotintu-
-χ2 spaces were then interpolated with 12-order polynomials itivewepresentitsprofilesexplicitlyinthefirstrowsoftheleft-
+withradius.Foreachsetofparametersandforeachlineofsight
+wegenerated1200orbitsusing100valuesofenergy(expressed
+with the radius of a circular orbit) spaced logarithmically and
+12valuesoftherelativeangularmomentumspacedlinearly.The
+outerradiusoftheorbitlibrary,thatistheapocenterofthemost
+extendedorbit,wasset tor out = 165kpcinordertocoverover
+0.999 of the total stellar mass based on the fitted King profile
+parameters.
+Wefitthekinematicsweightedwiththefractionofmasswith
+the constrained least squares algorithm where different values
+of Υ
+0
+were obtainedwith a simple transformationof velocities
+given by Eq.12, 13, and 15 in Kowalczyketal. (2018). In or-
+dertosmoothoutthenumericalartifacts,thethree-dimensional
+χ2 spaces were then interpolated with 12-order polynomials
+(∼ a4c4Υ4
+0
+)thatwerefurtherusedto determinetheglobalmin-
+imums(identifiedas the best-fitting models)and 1, 2, 3σ con-
+fidence levels which for three parameterscorrespondto ∆χ2 =
+3.53, 8.02, 14.2(Pressetal.1992).
+3.2.Applicationtomockdata
+In the following we present the direct and inferred results of
+the Schwarzschild modeling of the data sets described in Sec-
+tion2.3.
+First, Fig.7showsthe distributionof theabsolutevaluesof
+theχ2 asafunctionofthreeparametersofthemass-to-lightra-
+tio. In order to avoid unnecessary repetitions, we include only
+the plot for the mock data obtained by observing the Illustris
+galaxyalongitsmajoraxisastheothersarequalitativelysimilar.
+Thefourpanelsrefertofitsforallstars(topleft),themetal-rich
+populationI(bottomleft),themetal-poorpopulationII(bottom
+right),andtheonenamed"populations"(topright)whichisthe
+algebraicsumofvaluesforbothpopulations.
+Asourparametrizationofthemass-to-lightratioisnotintu-
+itivewepresentitsprofilesexplicitlyinthefirstrowsoftheleft-
 Articlenumber,page6of12
 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling
-AAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLL PPPPPPPPPPPPOOOOOOOOOOOOPPPPPPPPPPPPUUUUUUUUUUUULLLLLLLLLLLLAAAAAAAAAAAATTTTTTTTTTTTIIIIIIIIIIIIOOOOOOOOOOOONNNNNNNNNNNNSSSSSSSSSSSS
-major intermediate minor major intermediate minor
-1010 1010
-3σ 3σ 109 2 1σ 109 2 1σ ]⊙L/⊙M[ ]⊙L/⊙M[
-σ σ
-108 best model 108 best model
-data data
-107 107 )r(Υ )r(Υ
-106 106
-10 100 10 100 10 100 10 100 10 100 10 100
-r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc]
-]3-cpk ]3-cpk
-108 108
-106 106 ⊙M[ ⊙M[
-)r( )r(
-104 104
-totν totν
-10 100 10 100 10 100 10 100 10 100 10 100
-r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc]
-1012 1012 ]⊙M[ ]⊙M[
-1011 1011 )r( )r(
-totM totM
-1010 1010
-10 100 10 100 10 100 10 100 10 100 10 100
-r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc]
-1 1
-0 0
-)r(β )r(β
--1 -1
--2 -2
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50
-r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc]
+106
+107
+108
+109
+1010
+10 100
+ALL
+Υ(r) [
+M
+⊙ /
+L
+⊙ ]
+r [kpc]
+major
+10 100
+ALL
+r [kpc]
+intermediate
+10 100
+ALL
+r [kpc]
+minor
+3σ 2σ
+1σ
+best model
+data
+104
+106
+108
+10 100
+ALL
+νtot
+(r)
+[
+M ⊙
+k
+pc
+-3]
+r [kpc]
+10 100
+ALL
+r [kpc]
+10 100
+ALL
+r [kpc]
+1010
+1011
+1012
+10 100
+ALL
+Mtot
+(r) [
+M
+⊙ ]
+r [kpc]
+10 100
+ALL
+r [kpc]
+10 100
+ALL
+r [kpc]
+-2
+-1
+0
+1
+0 10 20 30 40
+ALL
+β(r)
+r [kpc]
+0 10 20 30 40
+ALL
+r [kpc]
+0 10 20 30 40 50
+ALL
+r [kpc]
+106
+107
+108
+109
+1010
+10 100
+POPULATIONS
+Υ(r) [
+M
+⊙ /
+L
+⊙ ]
+r [kpc]
+major
+10 100
+POPULATIONS
+r [kpc]
+intermediate
+10 100
+POPULATIONS
+r [kpc]
+minor
+3σ 2σ
+1σ
+best model
+data
+104
+106
+108
+10 100
+POPULATIONS
+νtot
+(r)
+[
+M ⊙
+k
+pc
+-3]
+r [kpc]
+10 100
+POPULATIONS
+r [kpc]
+10 100
+POPULATIONS
+r [kpc]
+1010
+1011
+1012
+10 100
+POPULATIONS
+Mtot
+(r) [
+M
+⊙ ]
+r [kpc]
+10 100
+POPULATIONS
+r [kpc]
+10 100
+POPULATIONS
+r [kpc]
+-2
+-1
+0
+1
+0 10 20 30 40
+POPULATIONS
+β(r)
+r [kpc]
+0 10 20 30 40
+POPULATIONS
+r [kpc]
+0 10 20 30 40 50
+POPULATIONS
+r [kpc]
 Fig.8.Left-handside:resultsofSchwarzschildmodelingofthreemockdatasetsobtainedbyobservingthesimulatedgalaxyalongtheprincipal
 axes.Inrows:derivedmass-to-lightratio,totaldensity,totalmass,andanisotropyparameter.Incolumns:observationsalongthemajor,interme-
 diate,andminoraxis,respectively.Greenlinesindicatevaluesforthebest-fitmodelswhereasthecoloredareasofdecreasingintensityshowthe
-1,2,and3σconfidencelevels.Thetruevaluesarepresentedasblacklines.Thinverticallinesmarkthevaluesofr andtheouterrangeofthe
+1,2,and3σconfidencelevels.Thetruevaluesarepresentedasblacklines.Thinverticallinesmarkthevaluesofr
 0
+andtheouterrangeofthe
 datasets,fromlefttoright.Right-handside:sameasleftbutforthefitoftwostellarpopulations.
-and right-handside panelsof Fig.8 for the results obtainedfor 3.3.Comparisonoffittingresults
+and right-handside panelsof Fig.8 for the results obtainedfor
 all stars and the populations,respectively. We further calculate
 thetotaldensity(secondrows)andthetotalmasscontent(third
-rows).Weincludetheobtainedorbitanisotropywithinthemod- The main strength of the two populations method comes from
-eledrangeinthebottomrows.Theconsecutivecolumnspresent tracingtheunderlyinggravitationalpotentialatdifferentscales.
-the results for the observations along the major, intermediate, AscanbeseeninthebottompanelsofFig.7,populationI,which
-andminoraxis.Greenlinesindicatevaluesforthebest-fitmod- is more concentrated, is also more sensitive to Υ , but gives
-0
-elswhereasthecoloredareasofdecreasingintensitycorrespond weaker constraints on a or c. On the other hand, population II
-to1,2,and3σconfidenceregionsobtainedasextremevaluesal- attemptsto reproducethe totalmass contentatlargerdistances
-lowedbythemodelswithχ2withinagivenregion.Ineachpanel aswell,thereforeshowingstrongercouplingbetweentheparam-
-thetruevaluesfromthesimulationarepresentedwithblacklines eters.
-whilethinverticallinesmarkthevaluesofr andtheouterrange
+rows).Weincludetheobtainedorbitanisotropywithinthemod-
+eledrangeinthebottomrows.Theconsecutivecolumnspresent
+the results for the observations along the major, intermediate,
+andminoraxis.Greenlinesindicatevaluesforthebest-fitmod-
+elswhereasthecoloredareasofdecreasingintensitycorrespond
+to1,2,and3σconfidenceregionsobtainedasextremevaluesal-
+lowedbythemodelswithχ2withinagivenregion.Ineachpanel
+thetruevaluesfromthesimulationarepresentedwithblacklines
+whilethinverticallinesmarkthevaluesofr
 0
+andtheouterrange
 ofthedatasetsbeyondwhichthereliabilityofresultsdropssig-
-The global minimums of the χ2 distributions for both ap-
 nificantly. The true mass-to-light ratio profile was obtained by
-proaches, that is modeling one and two populations, which we
 dividingthetotalmassbythefittedKingprofiles,thereforethe
-identifyasthebest-fittingmodels,closelycoincideshowingthat
 drop at 100kpc is the numerical artifact occurring at the very
-thereisnointernalbiasintheimprovedmethod.However,sig-
 outskirtsofthegalaxy.
+Whereasintheright-handsidepanelsofFig.8theresulting
+anisotropyisobtainedfromthefitofallstarsandusesonlythe
+locationofglobalminimumandconfidencelevelsfromtwopop-
+ulations(asinthetoprightpanelofFig.7),inFig.9wepresent
+anothermethodofcalculatingtheanisotropy.Inthesecondand
+third row we show the derived profiles for population I and II
+separately and combine them as stellar mass weighted average
+inthetoprow.Asinpreviousfigures,threecolumnsrefertothe
+differentlinesofsightwhereasthenarrowfourthoneshowsthe
+behaviorofthetrueprofilesoutsidethemodeledrangewhich,as
+we noticed in our previousstudies, in a limited way influences
+the results. Such an impact is understandablesince the stars at
+largerdistancesfromthecenterarestillincludedintheline-of-
+sightmeasurements.
+3.3.Comparisonoffittingresults
+The main strength of the two populations method comes from
+tracingtheunderlyinggravitationalpotentialatdifferentscales.
+AscanbeseeninthebottompanelsofFig.7,populationI,which
+is more concentrated, is also more sensitive to Υ
+0
+, but gives
+weaker constraints on a or c. On the other hand, population II
+attemptsto reproducethe totalmass contentatlargerdistances
+aswell,thereforeshowingstrongercouplingbetweentheparam-
+eters.
+The global minimums of the χ2 distributions for both ap-
+proaches, that is modeling one and two populations, which we
+identifyasthebest-fittingmodels,closelycoincideshowingthat
+thereisnointernalbiasintheimprovedmethod.However,sig-
 nificantdifferencescanbeobservedwhencomparingtheconfi-
 dence levels, mainly at 1 and 3σ. Namely, we find that using
 two populations, the constraints we obtain on the density and
-Whereasintheright-handsidepanelsofFig.8theresulting
 anisotropyprofilearemuchstronger.
-anisotropyisobtainedfromthefitofallstarsandusesonlythe
-locationofglobalminimumandconfidencelevelsfromtwopop-
-ulations(asinthetoprightpanelofFig.7),inFig.9wepresent Additionally, the more accurate method allows us to study
-anothermethodofcalculatingtheanisotropy.Inthesecondand other effects and biases, for example the consequences of the
-third row we show the derived profiles for population I and II nonsphericity of the modeled object. Whereas for the fit of all
-separately and combine them as stellar mass weighted average starsthetruevaluesofthedensity,mass,andanisotropyprofiles
-inthetoprow.Asinpreviousfigures,threecolumnsrefertothe are contained within 1σ confidenceregions, the results for the
-differentlinesofsightwhereasthenarrowfourthoneshowsthe populationsaremoreorlessbiaseddependingontheaxis.They
-behaviorofthetrueprofilesoutsidethemodeledrangewhich,as are well reproducedfor the observation along the intermediate
-we noticed in our previousstudies, in a limited way influences axis, for which the effects of nonsphericityseem to cancelout,
-the results. Such an impact is understandablesince the stars at and more biased for the remaining lines of sight. We notice a
-largerdistancesfromthecenterarestillincludedintheline-of- trendfromunder-tooverestimationoftheanisotropywhengo-
-sightmeasurements. ingfromthemajortotheminoraxis.
+Additionally, the more accurate method allows us to study
+other effects and biases, for example the consequences of the
+nonsphericity of the modeled object. Whereas for the fit of all
+starsthetruevaluesofthedensity,mass,andanisotropyprofiles
+are contained within 1σ confidenceregions, the results for the
+populationsaremoreorlessbiaseddependingontheaxis.They
+are well reproducedfor the observation along the intermediate
+axis, for which the effects of nonsphericityseem to cancelout,
+and more biased for the remaining lines of sight. We notice a
+trendfromunder-tooverestimationoftheanisotropywhengo-
+ingfromthemajortotheminoraxis.
 Articlenumber,page7of12
 A&Aproofs:manuscriptno.Populations4
-major intermediate minor
-1 II
-POP
-0 )r(β
+-1
+0
+1
+0 10 20 30 40
+P
+O
+P
+I
 +
+P
+O
+P
+I
 I
-POP
+β
+(
+r )
+r [kpc]
+major
+0 10 20 30 40
+r [kpc]
+intermediate
+0 10 20 30 40
+r [kpc]
+minor
+50 60 70 80
 -1
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 60 70 80
-r [kpc] r [kpc] r [kpc]
+0
 1
+0 10 20 30 40
+P
+O
+P
 I
-0 POP )r(β
+β (
+r )
+r [kpc]
+0 10 20 30 40
+r [kpc]
+0 10 20 30 40
+r [kpc]
+50 60 70 80
 -1
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 60 70 80
-r [kpc] r [kpc] r [kpc]
+0
 1
-II
-0 POP )r(β
--1
-0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 60 70 80
-r [kpc] r [kpc] r [kpc]
-data 1σ
-best model 2σ
+0 10 20 30 40
+P
+O
+P I
+I
+β (
+r )
+r [kpc]
+0 10 20 30 40
+r [kpc]
+0 10 20 30 40
+r [kpc]
+50 60 70 80
+data
+best model
+1σ
+2σ
 3σ
 Fig.9.ProfilesoftheanisotropyparameterobtainedwiththeSchwarzschildmodelingoftwostellarpopulationsofthesimulatedgalaxy.Inrows:
 results for all stars(calculated asthe superposition of two populations), population I, and population II. Colors follow theconvention used in
 previousfigures.Incolumns:observationsalongthemajor,intermediate,andminoraxis.Thelastnarrowercolumnshowsthedata(blacklines)
 outsidethemodeledradialrange.Colorlinesindicatevaluesforthebest-fitmodelswhereasthecoloredareasofdecreasingintensityshowthe1,
 2,and3σconfidenceregions.
-4. ModelingFornaxdSph sample of all stars since only stars with reliable measurements
-ofmetallicitycouldbeincluded.
+4. ModelingFornaxdSph
 In this section we presentthe applicationof our Schwarzschild
 modelingschemetotheobservationaldatafortheFornaxdSph
 galaxy obtained by delPinoetal. (2015) and delPinoetal.
-100
-(2017).Thisstudyisafollow-upoftheworkofKowalczyketal. pop I
-(2019) and can be directly compared to the results presented pop II
-80
+(2017).Thisstudyisafollow-upoftheworkofKowalczyketal.
+(2019) and can be directly compared to the results presented
 there. Moreover,we refer the reader to these previouspublica-
 tions for details on the origin of data and our procedures used
-60
-forcleaningthespectroscopicsample. N
-Similarly to the approachintroducedin Section 2.2, we di- 40
+forcleaningthespectroscopicsample.
+Similarly to the approachintroducedin Section 2.2, we di-
 videdallavailablestarsintotwoequal-sizepopulationsbasedon
-their metallicity and then cross-correlatedthe sampleswith the 20
+their metallicity and then cross-correlatedthe sampleswith the
 datausedinKowalczyketal.(2019).Themetallicityhistogram
-ofthefinalspectroscopicsampleisshowninFig.10.Addition- 0
-ally, we color-coded each bin with the population it has been -2.5 -2 -1.5 -1 -0.5 0
+ofthefinalspectroscopicsampleisshowninFig.10.Addition-
+ally, we color-coded each bin with the population it has been
 assignedto,namelyorangeorblueforpopulationIorII.Inter-
-[Fe/H]
 estingly, the case of Fornax is similar to our simulated galaxy
 as the split at [Fe/H]= −1 also captures an important feature
-Fig.10.Metallicityhistogramofthefinalspectroscopicsampleusedin
 of the object’sstar formationhistory,separating stars into sub-
-themodelingoftwostellarpopulationsintheFornaxdSph.Eachbinis
-samples older and younger than 6 Gyr, as shown in Fig. 12 of color-codedaccordingtothepopulationithasbeenassignedto,orange
-delPinoetal. (2015) and Fig. 8 of delPinoetal. (2017). The orblueforpopulationIandII,respectively.
+samples older and younger than 6 Gyr, as shown in Fig. 12 of
+delPinoetal. (2015) and Fig. 8 of delPinoetal. (2017). The
 numbers of stars contained in the samples of all stars, popula-
 tionI,andpopulationIIaregivenin Table2, wheretheindices
-"phot" and "spec" refer to the photometricand kinematic sam- Aswehaveshowninourearlierwork,thelightprofileofthe
-ples. The sum of stars in the populations is lower than in the Fornax dSph can be well reproduced with the three-parameter
+"phot" and "spec" refer to the photometricand kinematic sam-
+ples. The sum of stars in the populations is lower than in the
+sample of all stars since only stars with reliable measurements
+ofmetallicitycouldbeincluded.
+N
+[Fe/H]
+pop I
+pop II
+0
+20
+40
+60
+80
+100
+-2.5 -2 -1.5 -1 -0.5 0
+Fig.10.Metallicityhistogramofthefinalspectroscopicsampleusedin
+themodelingoftwostellarpopulationsintheFornaxdSph.Eachbinis
+color-codedaccordingtothepopulationithasbeenassignedto,orange
+orblueforpopulationIandII,respectively.
+Aswehaveshowninourearlierwork,thelightprofileofthe
+Fornax dSph can be well reproduced with the three-parameter
 Articlenumber,page8of12
 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling
-Table2.PropertiesofthedatasamplesfortheFornaxdSph. 0.25
-all stars
-0.2
+Table2.PropertiesofthedatasamplesfortheFornaxdSph.
 Property ALL POPI POPII
-pop I
-Numberofstars(N phot) 65797 14882 49205 0.15 )R(M
-Numberofstars(N ) 3286 1136 1151 pop II
-spec
-0.1
+Numberofstars(N phot ) 65797 14882 49205 Numberofstars(N spec ) 3286 1136 1151
 Starswithin1.8kpc 3268 1134 1130
-Fittednormalization(N 0)[×104] 6.95 1.81 5.45 0.05
-Sérsicradius(R )[kpc] 0.454 0.429 0.420
-S 0
+Fittednormalization(N 0 )[×104] 6.95 1.81 5.45
+Sérsicradius(R
+S
+)[kpc] 0.454 0.429 0.420
 Sérsicparameter(m) 0.808 0.807 0.898
-0 0.4 0.8 1.2 1.6
-R [kpc]
-5
 10
-200 ]2
-104 160 2- ] )1-
-cpk[ s
-120 mk([)R(
-80 )R(
-3 all stars
-10 40 ⋆ 2m
-popI n
-0
-popII
-0 0.4 0.8 1.2 1.6
 2
-10 R [kpc]
-0.1 0.2 0.5 1 2
+10
+3
+10 4
+10
+5
+0.2 0.5 2 0.1 1
+n
+⋆ (
+R )
+[
+k
+p
+c - 2 ]
 R [kpc]
-]3
-16 )1-
-Fig.11.Surfacenumberdensityprofilesofthephotometricdatasam- s
-8 mk(2
+all stars
+popI
+popII
+Fig.11.Surfacenumberdensityprofilesofthephotometricdatasam-
 plesfortheFornaxdSph:allavailablestars(inred),themetal-richpop-
-ulationI (inorange), andthemetal-poor population II(inblue). Thin 0
-verticallinesindicater (seetext)andtheouterboundaryofthespec- 01[)R(
+ulationI (inorange), andthemetal-poor population II(inblue). Thin
+verticallinesindicater
 0
-troscopicdata. -8
--16
-3m
-0 0.4 0.8 1.2 1.6
+(seetext)andtheouterboundaryofthespec-
+troscopicdata.
 Sérsicformula(Sérsic1968).Theprofilesofnumberdensityfor
-R [kpc]
 allstarsandbothpopulationstogetherwiththebest-fittingSérsic
 profilesare presentedin Fig.11. The colorsfollow the conven-
-tionintroducedinprevioussections.Thinverticallinesindicate 16 ]4
-the innermost data point for the light profile for all stars and )1-
-the outerboundaryof the kinematic sample. The former,set at 12 s
-mk(4
+tionintroducedinprevioussections.Thinverticallinesindicate
+the innermost data point for the light profile for all stars and
+the outerboundaryof the kinematic sample. The former,set at
 logr = −0.16,isalsousedastheminimumofthemass-to-light
-ratio profile (r in Eq.5). The fitted parameters of the profiles, 8
-0 01[)R(
-thatisthenormalizationN ,theSérsicradiusR ,andtheSérsic
-0 S
-parameterm,areincludedinthesecondpartofTable2. 4
-Figure12presentstheprofilesoftheobservablesusedinthe 4
-0 m
+ratio profile (r 0 in Eq.5). The fitted parameters of the profiles,
+thatisthenormalizationN
+0
+,theSérsicradiusR
+S
+,andtheSérsic
+parameterm,areincludedinthesecondpartofTable2.
+Figure12presentstheprofilesoftheobservablesusedinthe
 Schwarzschildmodeling:thefractionofstarsandthe 2nd,3rd,
-0 0.4 0.8 1.2 1.6
 and4thvelocitymoments(toptobottom)forthethreedatasam-
-ples:allstars,populationI,andpopulationII(inred,orange,and R [kpc]
+ples:allstars,populationI,andpopulationII(inred,orange,and
 blue,respectively).Theerrorbarsindicate1σsamplingerrors.
-The parameter space for Υ(r) has been probed as follows: Fig. 12. Observables of the Fornax dSph used in our Schwarzschild
-a ∈ [0 : 1.85]with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a modelingscheme.Inrows:thefractionofthetotalnumberofstars,the
-step∆c = 0.2.We pointoutthatinKowalczyketal.(2019)the 2nd,3rd,and4thvelocitymoment.Inredwepresentthevaluesobtained
-parameter c was fixed at c = 3 and now we fit it as a free pa- forallstarswhereasinorangeandbluethoseforpopulationsIandII,
-rameter.AsforthemockdatainSection3.2,differentvaluesof respectively.
-Υ were obtainedwith the transformationof velocity moments
+The parameter space for Υ(r) has been probed as follows:
+a ∈ [0 : 1.85]with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a
+step∆c = 0.2.We pointoutthatinKowalczyketal.(2019)the
+parameter c was fixed at c = 3 and now we fit it as a free pa-
+rameter.AsforthemockdatainSection3.2,differentvaluesof
+Υ
 0
+were obtainedwith the transformationof velocity moments
 withintheχ2 fittingroutine.Thevaluesof∆χ2 forallstarsand
 thepopulationsareshowninthetwopanelsofFig.13(leftand
-right-handside,respectively).Duetothedensecoverageofthe theconfidencelevelsonΥfromthefitoftwopopulations.Green
-grid,wedecidedtoincludeonlythevalueswithin3σfromthe linesindicatethevaluesforthebest-fittingmodelswhereasthe
-fittedminimums(seeSection3.1). coloredareasofdecreasingintensityshowthe1,2,and3σcon-
-The profiles of the mass-to-light ratio, total density, total fidenceregions.Additionally,withblackdashedlinesweinclude
-mass,andvelocityanisotropyresultingfromtheχ2distributions theresultsfromKowalczyketal.(2019)forcomparison.
-arepresentedintheconsecutiverowsofFig.14.Theanisotropy As a result of freeing the steepness of the mass-to-light
-profileforthepopulationsisbasedonthefitofallstarsbutusing ratio profile (parameter c) with respect to the previous study
+right-handside,respectively).Duetothedensecoverageofthe
+grid,wedecidedtoincludeonlythevalueswithin3σfromthe
+fittedminimums(seeSection3.1).
+The profiles of the mass-to-light ratio, total density, total
+mass,andvelocityanisotropyresultingfromtheχ2distributions
+arepresentedintheconsecutiverowsofFig.14.Theanisotropy
+profileforthepopulationsisbasedonthefitofallstarsbutusing
+0
+0.05
+0.1 0.15
+0.2
+0.25
+0 0.4 0.8 1.2 1.6
+M ( R )
+R [kpc]
+all stars
+pop I
+pop II
+0
+40
+80
+120
+160
+200
+0 0.4 0.8 1.2 1.6
+m
+2 (
+R ) [
+(
+k
+m
+s - 1 )
+2
+]
+R [kpc]
+-16
+-8
+0
+8
+16
+0 0.4 0.8 1.2 1.6
+m
+3
+(
+R
+)
+[
+1
+0
+2
+(
+k
+m
+s -
+1
+)
+3
+]
+R [kpc]
+0
+4
+8
+12
+16
+0 0.4 0.8 1.2 1.6
+m
+4 (
+R ) [
+1
+0 4 (
+k
+m
+s -
+1 )
+4
+]
+R [kpc]
+Fig. 12. Observables of the Fornax dSph used in our Schwarzschild
+modelingscheme.Inrows:thefractionofthetotalnumberofstars,the
+2nd,3rd,and4thvelocitymoment.Inredwepresentthevaluesobtained
+forallstarswhereasinorangeandbluethoseforpopulationsIandII,
+respectively.
+theconfidencelevelsonΥfromthefitoftwopopulations.Green
+linesindicatethevaluesforthebest-fittingmodelswhereasthe
+coloredareasofdecreasingintensityshowthe1,2,and3σcon-
+fidenceregions.Additionally,withblackdashedlinesweinclude
+theresultsfromKowalczyketal.(2019)forcomparison.
+As a result of freeing the steepness of the mass-to-light
+ratio profile (parameter c) with respect to the previous study
 Articlenumber,page9of12
 A&Aproofs:manuscriptno.Populations4
-ALL POPULATIONS
-6 6
+0
+0.5
+1
+1.5
+0
+0.5 1
+1.5
+2
+3
+4
+5
+6
+ALL
+Υ 0
+a
+c
+0
+0.5
+1
+1.5
+0
+0.5 1
+1.5
+2
+3
+4
+5
+6
+POPULATIONS
+Υ 0
+a
+c
+0
+3
+6
+9
 12
-5 5
-4 4
-c c 9
-nim
-3 3
-2χ-2χ
-2 2 6
-1.5 1.5
-1 0 1 0 3
-Υ 0 0.5 1 0.5 Υ 0 0.5 1 0.5
-1.5 a 1.5 a
-0 0 0
+χ
+2-
+χ
+2
+mi
+n
 Fig.13.Valuesofχ2relativetothefittedminimumwithintherangeof3σconfidencelevelforallstars(leftpanel)andforthepopulations(right
 panel)fortheFornaxdSph.
-(Kowalczyketal.2019),weobtainedhigherestimatesoftheen- of the real growing profile by observing an object along the
-closed total mass at larger radii. In particular, for the mass en- minor and major axis, respectively. However, for the bias to
-closedwithin 1.8kpcwe get M (< 1.8kpc) = 3.87+1.48×108 occur in two populations presented here, their inner orienta-
-all −1.56
-M⊙ fromthefitforallstarsand M pops(< 1.8 kpc)= 4.71+ −0 1. .8 17 3× tionswouldneedtobeopposite.Sincesuchmorphologicalfea-
-108 M⊙ from the fit of populations, while previously we had tures are not supported by the photometric studies of Fornax
-M old(<1.8 kpc)=3.7+ −1 1. .4 3×108M⊙. ( sd pe al tiP alin ao lie gt na ml. e2 n0 t1 b5 e; twW ea en ng te ht ea sl. te2 l0 la1 r9 p) ow ph ui lc ah tiora nt sh ,e wrfi en cd oa ncg lo uo dd
-e
+(Kowalczyketal.2019),weobtainedhigherestimatesoftheen-
+closed total mass at larger radii. In particular, for the mass en-
+closedwithin 1.8kpcwe get M
+all
+(< 1.8kpc) = 3.87+1.48
+−1.56
+×108
+M⊙ fromthefitforallstarsand M
+pops
+(< 1.8 kpc)= 4.71+0.87
+−1.13
+×
+108 M⊙ from the fit of populations, while previously we had
+M
+old
+(<1.8 kpc)=3.7+1.4
+−1.3
+×108M⊙.
 Interestingly, despite the significant shift of the position of
-that the anisotropy profiles of the two populations modeled in
-χ2 (to c = 4.2 for all stars and 3.6 for populations), the ob-
-min thisworkareindeedsignificantlydistinct.
+χ2
+min
+(to c = 4.2 for all stars and 3.6 for populations), the ob-
 tainedprofileoftheanisotropyparameterremainsdecreasingor
-Finally,itisworthnoticingthattheso-calledmass-follows-
 flat for all stars but changesto increasing from0 to 0.5 for the
-lightmodel,thatis the one followingfromthe assumptionthat
 populations. Nevertheless, even in the latter case the previous
-thetotaldensitytracesthestellar distribution,isnolongersup-
 resultagreeswiththenewfindingwithin1σ.
-ported by the fit of the populations. With our parametrization,
-The detailed analysis of the anisotropy is shown in Fig.15 themass-follows-lightmodelcorrespondstoa = 0andwhereas
+The detailed analysis of the anisotropy is shown in Fig.15
 where the middle and bottom panels present the profiles ob-
-it is enclosed within 3σ for the fit of all stars, as was the case
 tainedforeachpopulationseparately.Wenoticethattheprofile
-inKowalczyketal.(2019),theallowedvaluesfortheimproved
 for populationI is decreasing or has a local minimumwhereas
-method are much larger, as demonstratedby the right panel of
 for population II is increasing (from −0.25 to 0.5 for the best-
-Fig.13.
 fittingmodel).SincepopulationIismoreconcentrated,thelast
 bins contain very few stars, which limits their credibility. The
-top panel of Fig.15 presents the anisotropy of all stars calcu- 5. Summaryanddiscussion
+top panel of Fig.15 presents the anisotropy of all stars calcu-
 latedasaweightedsuperpositionoftwopopulations.Withsuch
-Building on the previously created implementation of the
 approachwestillobtaintheincreasingprofile(from0to0.5)but
-Schwarzschildorbitsuperpositionmethodfocusedonmodeling
 thepreviousresultagreeswithitonlywithin2σ.
+Since Fornax dSph is significantly elongated with the pro-
+jected ellipticity of ǫ = 0.30 ± 0.01 (Irwin&Hatzidimitriou
+1995), we anticipate some bias in the obtained results caused
+bythesphericallysymmetricmodeling.Kowalczyketal.(2018)
+studied such bias in an axisymmetric simulated object qualita-
+tivelysimilartoFornaxandidentifieddifferencesinthesystem-
+aticerrorsdependingonwhetherthegalaxywasobservedalong
+itsmajororminoraxis.AssumingthatFornaxisobservedalong
+the lineof sightin betweenthese extremes,we expectthetotal
+massprofiletobeslightlyoverestimatedandtheanisotropytobe
+underestimated,further strengtheningthe likelihoodof the real
+anisotropytoberadialanditsprofiletobegrowingwithradius
+withrespecttotheresultsofKowalczyketal.(2019).
+Bothconstant(likeforourpopulationI)andgrowing(pop-
+ulation II) anisotropy profiles can arise from biased modeling
+of the real growing profile by observing an object along the
+minor and major axis, respectively. However, for the bias to
+occur in two populations presented here, their inner orienta-
+tionswouldneedtobeopposite.Sincesuchmorphologicalfea-
+tures are not supported by the photometric studies of Fornax
+(delPinoetal.2015;Wangetal.2019)whichratherfindagood
+spatial alignmentbetween the stellar populations, we conclude
+that the anisotropy profiles of the two populations modeled in
+thisworkareindeedsignificantlydistinct.
+Finally,itisworthnoticingthattheso-calledmass-follows-
+lightmodel,thatis the one followingfromthe assumptionthat
+thetotaldensitytracesthestellar distribution,isnolongersup-
+ported by the fit of the populations. With our parametrization,
+themass-follows-lightmodelcorrespondstoa = 0andwhereas
+it is enclosed within 3σ for the fit of all stars, as was the case
+inKowalczyketal.(2019),theallowedvaluesfortheimproved
+method are much larger, as demonstratedby the right panel of
+Fig.13.
+5. Summaryanddiscussion
+Building on the previously created implementation of the
+Schwarzschildorbitsuperpositionmethodfocusedonmodeling
 dSphgalaxiesoftheLocalGroup(Kowalczyketal.2017,2018,
-Since Fornax dSph is significantly elongated with the pro- 2019),weimprovedourtoolbyintroducingmultiplestellarpop-
-jected ellipticity of ǫ = 0.30 ± 0.01 (Irwin&Hatzidimitriou ulations. Such an improvement is desirable and justified since
-1995), we anticipate some bias in the obtained results caused manyofthedwarfsshowsignsofmultiplestarformationbursts
-bythesphericallysymmetricmodeling.Kowalczyketal.(2018) orextendedstarformationepisodes.Asthedifferentpopulations
-studied such bias in an axisymmetric simulated object qualita- trace the common underlying gravitational potential, one may
-tivelysimilartoFornaxandidentifieddifferencesinthesystem- expectasignificantimprovementintheestimatesofnotonlythe
-aticerrorsdependingonwhetherthegalaxywasobservedalong totalmasscontentbutalsotheorbitanisotropysincethisrobust
-itsmajororminoraxis.AssumingthatFornaxisobservedalong modeling technique reproduces the anisotropy as a by-product
-the lineof sightin betweenthese extremes,we expectthetotal ofthemodelingratherthantakingitasanassumption.
-massprofiletobeslightlyoverestimatedandtheanisotropytobe Wehavetestedourhypothesisbymodelingmockdatagener-
-underestimated,further strengtheningthe likelihoodof the real atedfromagalaxyformedintheIllustrissimulation.Duetothe
-anisotropytoberadialanditsprofiletobegrowingwithradius limitationsoftheresolution,wechoseagalaxyofmassafewor-
-withrespecttotheresultsofKowalczyketal.(2019). dersof magnitudelargerthanthe estimatedmasses ofclassical
-Bothconstant(likeforourpopulationI)andgrowing(pop- dwarfs.Still, the galaxypossessed appropriatequalitativechar-
-ulation II) anisotropy profiles can arise from biased modeling acteristics,suchasthelackofgasandanalmostsphericalshape,
+2019),weimprovedourtoolbyintroducingmultiplestellarpop-
+ulations. Such an improvement is desirable and justified since
+manyofthedwarfsshowsignsofmultiplestarformationbursts
+orextendedstarformationepisodes.Asthedifferentpopulations
+trace the common underlying gravitational potential, one may
+expectasignificantimprovementintheestimatesofnotonlythe
+totalmasscontentbutalsotheorbitanisotropysincethisrobust
+modeling technique reproduces the anisotropy as a by-product
+ofthemodelingratherthantakingitasanassumption.
+Wehavetestedourhypothesisbymodelingmockdatagener-
+atedfromagalaxyformedintheIllustrissimulation.Duetothe
+limitationsoftheresolution,wechoseagalaxyofmassafewor-
+dersof magnitudelargerthanthe estimatedmasses ofclassical
+dwarfs.Still, the galaxypossessed appropriatequalitativechar-
+acteristics,suchasthelackofgasandanalmostsphericalshape,
 Articlenumber,page10of12
 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling
-ALL POPULATIONS
-1
+101
+103
 105
-II
+0.1 1
+Υ(r) [ M
+⊙ / L ⊙ ]
+r [kpc]
+ALL
+0.1 1
+r [kpc]
+POPULATIONS
 3σ
-2σ 0 ]⊙L/⊙M[ POP
-1σ
-103 )r(β
-best model +
-K19 -1 I
-)r(Υ POP
-101
+2σ 1σ best model
+K19
+104
+106
+108
+0.1 1
+νt
+ot
+(r)
+[
+M
+⊙
+k
+p c -
+3]
+r [kpc]
+0.1 1
+r [kpc]
+105
+107
+109
+0.1 1
+Mt ot (r)
+[
+M
+⊙ ]
+r [kpc]
+0.1 1
+r [kpc]
+-3
 -2
-0 0.4 0.8 1.2 1.6
-0.1 1 0.1 1
-r [kpc] r [kpc] r [kpc]
+-1
+0
 1
-]3-cpk
-108
+0 0.4 0.8 1.2 1.6
+β(r)
+r [kpc]
+0 0.4 0.8 1.2 1.6
+r [kpc]
+Fig. 14. Results of Schwarzschild modeling of the Fornax dSph.
+In rows: derived mass-to-light ratio, total density, total mass, and
+anisotropy parameter.Incolumns: resultsforallstarsandthepopula-
+tions,respectively.Greenlinesindicatethevaluesforthebest-fitmodels
+whereasthecoloredareasofdecreasingintensityshowthe1,2,and3σ
+confidenceregions.Thebest-fittingvaluesobtainedbyKowalczyketal.
+(2019)areshownwithblackdashedlines.
+that made it a good test bed for modeling techniques applica-
+ble to dSph galaxies. We applied our approach to all data and
+totwostellarpopulationsseparately,comparingtheaccuracyof
+theobtainedresults. Althoughtheadditionofthe secondtracer
+seeminglyincreasesthenumberofconstraintstwice, theincre-
+mentissomewhatcompromisedbythesamplingerrorssincethe
+numberofstarsineachsampleisthenreduced.Still,wefound
+strong improvements in the accuracy of the method when us-
+ing two populations.The resultsof the modelingshow that the
+density and velocity anisotropyprofilesare more strongly con-
+strained, most importantly at the 3σ level, that is the range of
+allowedvaluesismuchnarrower.
+SimilarlytotheconclusionsofKowalczyketal.(2018)who
+explored the effects of nonsphericity using large and small
+data samples, the comparison of results presented in the left-
+and right-handside panelsof Fig.8 suggeststhat the improved
+methodusingtwostellarpopulationsgivesmoreprecisebutless
+accurate outcome. However, in both studies the apparent dete-
+rioration of the reliability is a consequence of modeling of a
+nonspherical object. In both cases, a simpler approach (much
+smaller data samples or using one stellar population) resulted
+-2
+-1
 0
+1
+0 0.4 0.8 1.2 1.6
+P
+O
+P I
++ P O P
 I
-⊙M[ POP )r(β
-106
--1
-)r(
-totν
-104
--2
-0.1 1 0.1 1
-r [kpc] r [kpc] 0 0.4 0.8 1.2 1.6
+I
+β
+( r )
 r [kpc]
-1
-109
-]⊙M[
-0 II
-107 )r( totM POP )r(β
+-2
 -1
-105
-0.1 1 0.1 1
-r [kpc] r [kpc] -2
-0 0.4 0.8 1.2 1.6
+0
 1
+0 0.4 0.8 1.2 1.6
+P
+O
+P
+I
+β (
+r )
 r [kpc]
+-2
+-1
 0
--1 best model 3σ )r(β
-1σ K19
--2 2σ
--3
-0 0.4 0.8 1.2 1.6 0 0.4 0.8 1.2 1.6 Fig. 15. Profiles of the anisotropy parameter obtained with the
-r [kpc] r [kpc] SchwarzschildmodelingoftwostellarpopulationsfortheFornaxdSph.
+1
+0 0.4 0.8 1.2 1.6
+P
+O P I
+I
+β ( r )
+r [kpc]
+best model
+1σ
+2σ
+3σ
+K19
+Fig. 15. Profiles of the anisotropy parameter obtained with the
+SchwarzschildmodelingoftwostellarpopulationsfortheFornaxdSph.
 Inrows:resultsforallstars(calculatedasthesuperpositionoftwopop-
-Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. ulations), population I, and population II. Color lines indicate values
-In rows: derived mass-to-light ratio, total density, total mass, and forthebest-fitmodelswhereasthecoloredareasofdecreasingintensity
-anisotropy parameter.Incolumns: resultsforallstarsandthepopula- showthe1,2,and3σconfidenceregions.Thedashedblacklineshows
-tions,respectively.Greenlinesindicatethevaluesforthebest-fitmodels theresultfromKowalczyketal.(2019)forcomparison.
-whereasthecoloredareasofdecreasingintensityshowthe1,2,and3σ
-confidenceregions.Thebest-fittingvaluesobtainedbyKowalczyketal.
-(2019)areshownwithblackdashedlines.
+ulations), population I, and population II. Color lines indicate values
+forthebest-fitmodelswhereasthecoloredareasofdecreasingintensity
+showthe1,2,and3σconfidenceregions.Thedashedblacklineshows
+theresultfromKowalczyketal.(2019)forcomparison.
 in larger final uncertainties, usually containing the true values
 within 1σ confidenceregion. On the other hand, the improved
 methodsexhibitsubstantiallyreduceduncertainties,highlighting
-that made it a good test bed for modeling techniques applica-
 theunderlyingbias.
-ble to dSph galaxies. We applied our approach to all data and
-totwostellarpopulationsseparately,comparingtheaccuracyof Our method parametrizes the total mass content with the
-theobtainedresults. Althoughtheadditionofthe secondtracer mass-to-lightratiovaryingwithradiusasapower-lawinthelog-
-seeminglyincreasesthenumberofconstraintstwice, theincre- logscale.Wemadetwomainchangeswithrespecttoourprevi-
-mentissomewhatcompromisedbythesamplingerrorssincethe ouswork:weaddedathirdparameterccontrollingthesteepness
-numberofstarsineachsampleisthenreduced.Still,wefound ofthemass-to-lightratioprofile(previouslyfixedatthevalueof
-strong improvements in the accuracy of the method when us- 3) and allowed for different stellar density profiles (previously
-ing two populations.The resultsof the modelingshow that the only Sérsic, now also King). These changesare of course cou-
-density and velocity anisotropyprofilesare more strongly con- pledsincedifferentdensityprofilesrequiredifferentexponentsto
-strained, most importantly at the 3σ level, that is the range of reproducethesamemassprofile.Itisvisiblealsoinourresults
-allowedvaluesismuchnarrower. since the King profile applied in the simulated galaxy gave us
-SimilarlytotheconclusionsofKowalczyketal.(2018)who valuesofclowerthan3.Nevertheless,wedecidedtousediffer-
-explored the effects of nonsphericity using large and small entdensityprofilestomakeourmethodmoregeneralandappli-
-data samples, the comparison of results presented in the left- cabletoobjects,suchasourIllustrisgalaxy,forwhichtheSérsic
-and right-handside panelsof Fig.8 suggeststhat the improved formula does not provide a good approximationof the density
-methodusingtwostellarpopulationsgivesmoreprecisebutless distribution.
-accurate outcome. However, in both studies the apparent dete- Finally,weappliedtheimprovedmethodtothedataforthe
-rioration of the reliability is a consequence of modeling of a FornaxdSphgalaxy.Duetotheadditionofanotherfreeparam-
-nonspherical object. In both cases, a simpler approach (much eter in our functional form for the mass-to-light ratio, our re-
-smaller data samples or using one stellar population) resulted sults for modeling all stars are slightly different from the ones
+Our method parametrizes the total mass content with the
+mass-to-lightratiovaryingwithradiusasapower-lawinthelog-
+logscale.Wemadetwomainchangeswithrespecttoourprevi-
+ouswork:weaddedathirdparameterccontrollingthesteepness
+ofthemass-to-lightratioprofile(previouslyfixedatthevalueof
+3) and allowed for different stellar density profiles (previously
+only Sérsic, now also King). These changesare of course cou-
+pledsincedifferentdensityprofilesrequiredifferentexponentsto
+reproducethesamemassprofile.Itisvisiblealsoinourresults
+since the King profile applied in the simulated galaxy gave us
+valuesofclowerthan3.Nevertheless,wedecidedtousediffer-
+entdensityprofilestomakeourmethodmoregeneralandappli-
+cabletoobjects,suchasourIllustrisgalaxy,forwhichtheSérsic
+formula does not provide a good approximationof the density
+distribution.
+Finally,weappliedtheimprovedmethodtothedataforthe
+FornaxdSphgalaxy.Duetotheadditionofanotherfreeparam-
+eter in our functional form for the mass-to-light ratio, our re-
+sults for modeling all stars are slightly different from the ones
 Articlenumber,page11of12
 A&Aproofs:manuscriptno.Populations4
-obtained in Kowalczyketal. (2019). However, in terms of the delPino,A.,Hidalgo,S.L.,Aparicio,A.,etal.2013,MNRAS,433,1505
-total density and mass distribution the estimates obtained here delPino,A.,Aparicio,A.,&Hidalgo,S.L.2015,MNRAS,454,3996
-agree very well with those earlier results in the range covered delPino,A.,Aparicio,A.,Hidalgo,S.L.,&Łokas,E.L.2017,MNRAS,465,
-3708
+obtained in Kowalczyketal. (2019). However, in terms of the
+total density and mass distribution the estimates obtained here
+agree very well with those earlier results in the range covered
 bythedata.Therefore,thedetailedcomparisonwithotheresti-
-Fabrizio,M.,Bono,G.,Nonino,M.,etal.2016,ApJ,830,126
-matesfromtheliteraturepresentedinKowalczyketal.(2019)is Gebhardt,K.,Richstone,D.,Tremaine,S.,etal.2003,ApJ,583,92
-stillvalidandwedonotrepeatithere. Genel,S.,Fall,S.M.,Hernquist,L.,etal.2015,ApJ,804,L40
-Amoresignificantdifferencewithrespecttotheseprevious Genel,S.,Vogelsberger,M.,Springel,V.,etal.2014,MNRAS,445,175
-Genina,A.,Benitez-Llambay,A.,Frenk,C.S.,etal.2018,MNRAS,474,1398
+matesfromtheliteraturepresentedinKowalczyketal.(2019)is
+stillvalidandwedonotrepeatithere.
+Amoresignificantdifferencewithrespecttotheseprevious
 estimates is seen in the results of modeling two populationsin
-Hayashi,K.,Fabrizio,M.,Łokas,E.L.,etal.2018,MNRAS,481,250
 Fornax.Inthiscasewefindtheanisotropytobeslightlyincreas-
-Irwin,M.,&Hatzidimitriou,D.1995,MNRAS,277,1354
-ingratherthandecreasingwithradiusand,mostimportantly,the Jardel,J.R.,&Gebhardt,K.2012,ApJ,746,89
-confidence regions for this parameter, as well as for the den- Jardel,J.R.,Gebhardt,K.,Fabricius,M.H.,Drory,N.,&Williams,M.J.2013,
-sity,aremuchnarrower.Wewerethusabletoobtaintightercon- ApJ,763,91
-King,I.1962,AJ,67,471
+ingratherthandecreasingwithradiusand,mostimportantly,the
+confidence regions for this parameter, as well as for the den-
+sity,aremuchnarrower.Wewerethusabletoobtaintightercon-
 straints on the properties of Fornax, which means that the im-
-Kowalczyk,K.,Łokas,E.L.,Kazantzidis,S.,&Mayer,L.2013,MNRAS,431,
-provedmethodissuccessful.Forthefirsttime,wewerealsoable 2796
-todeducethevelocityanisotropyprofilesforeachofthepopula- Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2017,MNRAS,470,3959
-tionsseparately.Wefoundthatthemoreconcentrated,metal-rich Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2018,MNRAS,476,2918
-Kowalczyk,K.,delPino,A.,Łokas,E.L.,&Valluri,M.2019,MNRAS,482,
+provedmethodissuccessful.Forthefirsttime,wewerealsoable
+todeducethevelocityanisotropyprofilesforeachofthepopula-
+tionsseparately.Wefoundthatthemoreconcentrated,metal-rich
 populationIhasadecreasinganisotropyprofilewhilethemore
-5241
 extended,metal-poorpopulationIIhastheanisotropyincreasing
-Łokas,E.L.,2002,MNRAS,333,697
-with radius.Thisfindingmaypartiallyexplainthe largespread Łokas,E.L.,Mamon,G.A.,&Prada,F.2005,MNRAS,363,918
-of the anisotropy values obtained in the literature and summa- Massari,D.,Helmi,A.,Mucciarelli,A.etal.2020,A&A,633,A36
-rized in Table 2 and 3 of Kowalczyketal. (2019), which were Mateo,M.1998,ARA&A,36,435
-Nelson,D.,Pillepich,A.,Genel,S.,etal.2015,AstronomyandComputing,13,
+with radius.Thisfindingmaypartiallyexplainthe largespread
+of the anisotropy values obtained in the literature and summa-
+rized in Table 2 and 3 of Kowalczyketal. (2019), which were
 often based on modeling subsamples of our spectroscopic data
-12
-set. Pace,A.B.,Kaplinghat,M.,Kirby,E.,etal.2020,MNRAS,495,3022
-For both studied objectswe split the stars into two popula- Press,W.H.,Teukolsky,S.A.,Vetterling,W.T.,&Flannery,B.P.1992,Numer-
-tions by dividing them in half based on their metallicity, Z (in icalRecipesinC,2ndedn.(CambridgeUniversityPress,Cambridge)
-solarunits),fortheIllustrisgalaxyand[Fe/H]forFornax.Such Schwarzschild,M.1979,ApJ,232,236
-Sérsic,J.L.1968,AtlasdeGalaxiasAustrales(ObservatorioAstronomico,Cor-
+set.
+For both studied objectswe split the stars into two popula-
+tions by dividing them in half based on their metallicity, Z (in
+solarunits),fortheIllustrisgalaxyand[Fe/H]forFornax.Such
 amethodisapproximatebutjustified.Bothgalaxieshavecom-
-doba,Argentina)
-plexstarformationhistorywithmultiplestarformationbursts,as Strigari,L.E.,Bullock,J.S.,&Kaplinghat,M.2007,ApJ,657,L1
-demonstratedbyFig.1inthisworkandFig.7indelPinoetal. Tolstoy,E.,Hill,V.,&Tosi,M.2009,ARA&A,47,371
-(2013), producingmultiplestellar populationswhichcannotbe Valluri,M.,Merritt,D.,&Emsellem,E.2004,ApJ,602,66
-vanderMarel,R.P.,Cretton,N.,deZeeuw,P.T.,&Rix,H.-W.1998,ApJ,493,
+plexstarformationhistorywithmultiplestarformationbursts,as
+demonstratedbyFig.1inthisworkandFig.7indelPinoetal.
+(2013), producingmultiplestellar populationswhichcannotbe
 easilytrackedasthemetallicityisagoodbutnotperfectproxy
-613
-forthestellarage.Moreover,themetallicityhistogramsforboth Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014a,Nature,509,177
-objects are approximately unimodal not allowing for a conve- Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014b,MNRAS,444,1518
-nient separation. More refined methods of division have been Walker,M.G.,&Peñarrubia,J.2011,ApJ,742,20
-Wang,M.Y.,deBoer,T.,Pieres,A.,etal.2019,ApJ,881,118
+forthestellarage.Moreover,themetallicityhistogramsforboth
+objects are approximately unimodal not allowing for a conve-
+nient separation. More refined methods of division have been
 suggestedintheliterature,forexampleintheformofthelikeli-
 hoodfunctionbasedontheposition,velocity,andmetallicityin-
 dex(Walker&Peñarrubia 2011).However,thelikelihoodfunc-
@@ -889,4 +1700,46 @@ versityPress,Princeton)
 Breddels,M.A.,&Helmi,A.2013,A&A,558,A35
 Breddels,M.A.,Helmi,A.,vandenBosch,R.C.E.,vandeVen,G.,&Battaglia,
 G.2013,MNRAS,433,3173
+delPino,A.,Hidalgo,S.L.,Aparicio,A.,etal.2013,MNRAS,433,1505
+delPino,A.,Aparicio,A.,&Hidalgo,S.L.2015,MNRAS,454,3996
+delPino,A.,Aparicio,A.,Hidalgo,S.L.,&Łokas,E.L.2017,MNRAS,465,
+3708
+Fabrizio,M.,Bono,G.,Nonino,M.,etal.2016,ApJ,830,126
+Gebhardt,K.,Richstone,D.,Tremaine,S.,etal.2003,ApJ,583,92
+Genel,S.,Fall,S.M.,Hernquist,L.,etal.2015,ApJ,804,L40
+Genel,S.,Vogelsberger,M.,Springel,V.,etal.2014,MNRAS,445,175
+Genina,A.,Benitez-Llambay,A.,Frenk,C.S.,etal.2018,MNRAS,474,1398
+Hayashi,K.,Fabrizio,M.,Łokas,E.L.,etal.2018,MNRAS,481,250
+Irwin,M.,&Hatzidimitriou,D.1995,MNRAS,277,1354
+Jardel,J.R.,&Gebhardt,K.2012,ApJ,746,89
+Jardel,J.R.,Gebhardt,K.,Fabricius,M.H.,Drory,N.,&Williams,M.J.2013,
+ApJ,763,91
+King,I.1962,AJ,67,471
+Kowalczyk,K.,Łokas,E.L.,Kazantzidis,S.,&Mayer,L.2013,MNRAS,431,
+2796
+Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2017,MNRAS,470,3959
+Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2018,MNRAS,476,2918
+Kowalczyk,K.,delPino,A.,Łokas,E.L.,&Valluri,M.2019,MNRAS,482,
+5241
+Łokas,E.L.,2002,MNRAS,333,697
+Łokas,E.L.,Mamon,G.A.,&Prada,F.2005,MNRAS,363,918
+Massari,D.,Helmi,A.,Mucciarelli,A.etal.2020,A&A,633,A36
+Mateo,M.1998,ARA&A,36,435
+Nelson,D.,Pillepich,A.,Genel,S.,etal.2015,AstronomyandComputing,13,
+12
+Pace,A.B.,Kaplinghat,M.,Kirby,E.,etal.2020,MNRAS,495,3022
+Press,W.H.,Teukolsky,S.A.,Vetterling,W.T.,&Flannery,B.P.1992,Numer-
+icalRecipesinC,2ndedn.(CambridgeUniversityPress,Cambridge)
+Schwarzschild,M.1979,ApJ,232,236
+Sérsic,J.L.1968,AtlasdeGalaxiasAustrales(ObservatorioAstronomico,Cor-
+doba,Argentina)
+Strigari,L.E.,Bullock,J.S.,&Kaplinghat,M.2007,ApJ,657,L1
+Tolstoy,E.,Hill,V.,&Tosi,M.2009,ARA&A,47,371
+Valluri,M.,Merritt,D.,&Emsellem,E.2004,ApJ,602,66
+vanderMarel,R.P.,Cretton,N.,deZeeuw,P.T.,&Rix,H.-W.1998,ApJ,493,
+613
+Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014a,Nature,509,177
+Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014b,MNRAS,444,1518
+Walker,M.G.,&Peñarrubia,J.2011,ApJ,742,20
+Wang,M.Y.,deBoer,T.,Pieres,A.,etal.2019,ApJ,881,118
 Articlenumber,page12of12
diff --git a/read/results/pdfplumber/2201.00178.txt b/read/results/pdfplumber/2201.00178.txt
index 3d72e5f..8340a8f 100644
--- a/read/results/pdfplumber/2201.00178.txt
+++ b/read/results/pdfplumber/2201.00178.txt
@@ -5,23 +5,20 @@ Prasad Mani ,1 Chris S. Hanson ,2 and Shravan Hanasoge 1,2
 1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India
 2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE
 ABSTRACT
-2202
 The technique of normal-mode coupling is a powerful tool with which to seismically image non-
 axisymmetricphenomenaintheSun. HereweapplymodecouplingintheCartesianapproximationto
 probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and
-Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling naJ
+Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling
 measurements to show that the resulting divergence and radial vorticity maps at supergranular length
 scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre-
-lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, 1
+lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows,
 while ≥ 0.8 is obtained for the radial vorticity.
-]RS.hp-ortsa[
 Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662)
 1. INTRODUCTION
 Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect
 on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun,
 behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber
 of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of
-1v87100.1022:viXra
 the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be
 retrieved through meticulous inversions of these seismic measurements.
 Several important flow systems on the Sun have been inferred using various global and local helioseismic methods.
@@ -44,6 +41,44 @@ et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formu
 validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and
 comparing with previous time-distance studies (Langfellner et al. 2018).
 prasad.subramanian@tifr.res.in
+a
+r
+X
+i
+v
+:
+2
+2
+0
+1
+.
+0
+0
+1
+7
+8
+v
+1
+[
+a
+s
+t
+r
+o
+-
+p
+h .
+S
+R
+]
+1
+J
+a
+n
+2
+0
+2
+2
 2 Mani et al.
 Normal-modecouplingreferstotheconceptofexpressingsolar-oscillationeigenfunctionsasalinearweightedcombi-
 nationofmodel-eigenfunctions(e.g.,ModelSChristensen-Dalsgaard2021). Themodeleigenfunctionsformacomplete
@@ -58,27 +93,47 @@ In this study, we extend the spectral analysis of H21 and develop the method to
 at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is
 reworked,primarilytoimagesteadyflows. Measurementsarethenconstructed,andinversionstoinferdivergenceflow
 and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order
-coupling (p -p ), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface.
-2 2
+coupling (p
+2
+-p
+2
+), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface.
 We compare our results with flows obtained using the Local Correlation Tracking method on solar granules.
 1.1. Forward problem
 In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a
 complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we
-denote the horizontal unit vectors e and e in our local Cartesian domain as pointing towards west and north on the
-x y
-solar surface, respectively, and e points outwards. This approximation is valid when observing patches of the surface
+denote the horizontal unit vectors e
+x
+and e
+y
+in our local Cartesian domain as pointing towards west and north on the
+solar surface, respectively, and e
 z
+points outwards. This approximation is valid when observing patches of the surface
 that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood
 of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the
-horizontal wavenumber qR ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q ,q )| is the vector horizontal
-(cid:12) x y
+horizontal wavenumber qR
+(cid:12)
+≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q
+x
+,q
+y
+)| is the vector horizontal
 wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow
 perturbationdescribedinahorizontalFourierdomain. Supergranularvelocitiesaresubsonic(300-400m/s,seeRincon
-&Rieutord2018), permittingustomodeltheflowvectoruuu=(u ,u ,u )intheCartesiandomainlikeso(Unnoetal.
-x y z
+&Rieutord2018), permittingustomodeltheflowvectoruuu=(u
+x
+,u
+y
+,u
+z
+)intheCartesiandomainlikeso(Unnoetal.
 1989; Woodard 2006)
-uσ =∇×[∇×(P e )]+∇×(T e ), (1)
-z z
+uσ =∇×[∇×(P e
+z
+)]+∇×(T e
+z
+), (1)
 where P = Pσ(x) and T = Tσ(x) are poloidal and toroidal scalar functions, varying with position x and temporal
 frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
 perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for
@@ -86,87 +141,228 @@ example), here we only consider the frequency bin σ = 0, denoting the temporall
 of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of
 perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using
 vector calculus results in
-u=−∇2Pe +∇(∂ P)+∇ T×e , (2)
-z z h z
-where ∇ refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the
+u=−∇2Pe
+z
++∇(∂
+z
+P)+∇
 h
+T×e
+z
+, (2)
+where ∇
+h
+refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the
 Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a
-functionofhorizontalwavenumberq anddepthze . HencethepoloidalandtoroidalflowsaredescribedbyP (z)and
-z q
-T (z), respectively. Furthermore, we parametrize the flow along e using basis functions f(z) (Chebyshev, B-spline,
-q z
+functionofhorizontalwavenumberq anddepthze
+z
+. HencethepoloidalandtoroidalflowsaredescribedbyP
+q
+(z)and
+T
+q
+(z), respectively. Furthermore, we parametrize the flow along e
+z
+using basis functions f(z) (Chebyshev, B-spline,
 etc). This is expressed as
-(cid:88) (cid:88)
-P ≡P (z)= f (z)P , T ≡T (z)= f (z)T . (3)
-q j qj q j qj
-j j
-The flow coefficients P and T , represented by the discrete indices q and j, become ideal candidates for inversions,
-qj qj
+P ≡P
+q
+(z)=
+(cid:88)
+j
+f
+j
+(z)P
+qj
+, T ≡T
+q
+(z)=
+(cid:88)
+j
+f
+j
+(z)T
+qj
+. (3)
+The flow coefficients P
+qj
+and T
+qj
+, represented by the discrete indices q and j, become ideal candidates for inversions,
 where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be
-exploited to expedite inversions. Note that P = P∗ and T = T∗ for the flow field to be real in the spatio-
-qj −qj qj −qj
+exploited to expedite inversions. Note that P
+qj
+= P∗
+−qj
+and T
+qj
+= T∗
+−qj
+for the flow field to be real in the spatio-
 temporal domain.
 To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner
 Imaging near-surface flows using mode-coupling analysis 3
-φω∗φω , wherek istheoscillationmodewavenumber(k ,k )andω isthetemporalfrequency. Relateφω∗φω thus
-k k+q x y k k+q
-to the flow coefficients P and T (see eq A7)
-qj qj
+φω∗
+k
+φω
+k+q
+, wherek istheoscillationmodewavenumber(k
+x
+,k
+y
+)andω isthetemporalfrequency. Relateφω∗
+k
+φω
+k+q
+thus
+to the flow coefficients P
+qj
+and T
+qj
+(see eq A7)
+(cid:104)φω∗
+k
+φω
+k+q
+(cid:105)=Hω
+kk(cid:48)nn(cid:48)
 (cid:88)
-(cid:104)φω∗φω (cid:105)=Hω C P +D T . (4)
-k k+q kk(cid:48)nn(cid:48) qj,k qj qj,k qj
 j
+C
+qj,k
+P
+qj
++D
+qj,k
+T
+qj
+. (4)
 TheweightfactorHω (seeeqA8)isafunctionoffrequency,capturinginformationabouttheextentofcouplingbetween
 thetwomodes[n,k]and[n(cid:48),k(cid:48)],wherenandn(cid:48) aretheradialordersofthemodes,andk =|k|andk(cid:48) =|k(cid:48)|=|k+q|.
 Thespectralprofileofthemode(seeeqA9)isapproximatedusingaLorentzian(Andersonetal.1990). Themorethe
-Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C and D are poloidal
-qj,k qj,k
+Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C
+qj,k
+and D
+qj,k
+are poloidal
 and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements
-and are derived from the solar model see Appendix A. They possess the symmetry relation: C = C and
-qj,k −qj,−k
-D =D (see eq A6). The kernels, as flows, are expressed on the basis f (z).
-qj,k −qj,−k j
+and are derived from the solar model see Appendix A. They possess the symmetry relation: C
+qj,k
+= C
+−qj,−k
+and
+D
+qj,k
+=D
+−qj,−k
+(see eq A6). The kernels, as flows, are expressed on the basis f
+j
+(z).
 1.2. Least-squares of cross-correlation
-Even though φω∗φω isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea-
-k k+q
+Even though φω∗
+k
+φω
+k+q
+isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea-
 surement, knowninmode-couplingliteratureas’B-coefficients’, ismuchbetterdesignedforinversionasitreducesthe
-dimensionoftheproblem. Aleast-squaresfittothecross-correlationφω∗φω (seeWoodard2006,2014,2016)results
-k k+q
-in the B-coefficients B , according to
+dimensionoftheproblem. Aleast-squaresfittothecross-correlationφω∗
+k
+φω
+k+q
+(seeWoodard2006,2014,2016)results
+in the B-coefficients B
 k,q
-(cid:80) Hω∗ φω∗φω
-kk(cid:48)nn(cid:48) k k+q
-B = ω . (5)
-k,q (cid:80) |Hω |2
+, according to
+B
+k,q
+=
+(cid:80)
+ω
+Hω∗
 kk(cid:48)nn(cid:48)
+φω∗
+k
+φω
+k+q
+(cid:80)
 ω
-Multiplying eq 4 on both sides by Hω∗ and substituting by eq 5 on the left-hand-side results in a concisely defined
+|Hω
+kk(cid:48)nn(cid:48)
+|2
+. (5)
+Multiplying eq 4 on both sides by Hω∗
 kk(cid:48)nn(cid:48)
+and substituting by eq 5 on the left-hand-side results in a concisely defined
 forward problem (compare with eq 4)
+B
+k,q
+=
 (cid:88)
-B = C P +D T . (6)
-k,q qj,k qj qj,k qj
 j
+C
+qj,k
+P
+qj
++D
+qj,k
+T
+qj
+. (6)
 In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω.
-Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω ,
+Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω
+nk
+,
+|ω|∈
+(cid:16)
+ω
 nk
-(cid:16) (cid:17)
-|ω|∈ ω −(cid:15)Γ /2,ω +(cid:15)Γ /2 or
-nk nk nk nk
-(cid:16) (cid:17)
-|ω|∈ ω −(cid:15)Γ /2,ω +(cid:15)Γ /2 . (7)
-n(cid:48)k(cid:48) n(cid:48)k(cid:48) n(cid:48)k(cid:48) n(cid:48)k(cid:48)
-Summing over ±ω guarantees that the parity B = B∗ (see Appendix A for derivation) is obeyed, thereby
-k,q −k,−q
+−(cid:15)Γ
+nk
+/2,ω
+nk
++(cid:15)Γ
+nk
+/2
+(cid:17)
+or
+|ω|∈
+(cid:16)
+ω
+n(cid:48)k(cid:48)
+−(cid:15)Γ
+n(cid:48)k(cid:48)
+/2,ω
+n(cid:48)k(cid:48)
++(cid:15)Γ
+n(cid:48)k(cid:48)
+/2
+(cid:17)
+. (7)
+Summing over ±ω guarantees that the parity B
+k,q
+= B∗
+−k,−q
+(see Appendix A for derivation) is obeyed, thereby
 ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain.
 Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and
 −k,
+B∗
+−k,−q
+=
 (cid:88)
-B∗ = C P∗ +D T∗ . (8)
-−k,−q −qj,−k −qj −qj,−k −qj
 j
-Substituting parity and symmetry relations for all terms in the above results in eq 6. As B is constructed by a
+C
+−qj,−k
+P∗
+−qj
++D
+−qj,−k
+T∗
+−qj
+. (8)
+Substituting parity and symmetry relations for all terms in the above results in eq 6. As B
 k,q
+is constructed by a
 least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a
 by-product.
 1.3. Noise model
@@ -178,19 +374,34 @@ Everyindependentrealizationofamodecanbeunderstoodastheoutputofadampedharmonicosc
 random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes
 and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters
 4 Mani et al.
-Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p (orange) and p (green). The shaded
-1 2
+Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p
+1
+(orange) and p
+2
+(green). The shaded
 regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of
-kR andω/2π towhichwehaverestrictedourselvesinthisanalysis. BeyondkR of2000,itisseenthatthetheoreticalfitting
-(cid:12) (cid:12)
+kR
+(cid:12)
+andω/2π towhichwehaverestrictedourselvesinthisanalysis. BeyondkR
+(cid:12)
+of2000,itisseenthatthetheoreticalfitting
 of mode frequencies start deviating from the observed dispersion relation for the f-mode.
-such as its amplitude, frequency and linewidth, and consequently in B in our case. We use the same noise model
+such as its amplitude, frequency and linewidth, and consequently in B
 k,q
+in our case. We use the same noise model
 as in H21, which was motivated by the above discussion,
-G ≡(cid:104)|B |2(cid:105), (9)
-k,q k,q
-where, unlike H21, we again sum over ±ω. G is real, with the symmetry relation G =G (see Appendix A
-k,q k,q −k,−q
+G
+k,q
+≡(cid:104)|B
+k,q
+|2(cid:105), (9)
+where, unlike H21, we again sum over ±ω. G
+k,q
+is real, with the symmetry relation G
+k,q
+=G
+−k,−q
+(see Appendix A
 for explanation).
 2. DATA ANALYSIS
 In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the
@@ -199,8 +410,13 @@ is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated
 at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4×194.4 Mm2 in size, tracked for 24 hours
 and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number
 2197,Carringtonlongitude90◦). ThisDopplercubeisconsideredasthephysicalwavefieldφ(x,y;t). TheFourier-space
-wavefieldφω (andsubsequently,thecross-correlationφω∗φω )isobtainedbycomputingthe3Dspatialandtemporal
-k k k+q
+wavefieldφω
+k
+(andsubsequently,thecross-correlationφω∗
+k
+φω
+k+q
+)isobtainedbycomputingthe3Dspatialandtemporal
 Fourier transform of the Dopplercube.
 The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in
 Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon
@@ -208,217 +424,417 @@ Eq 6, while short enough that supergranules do not substantially evolve (lifetim
 from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015).
 Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral
 profiles of the two modes [n,k] and [n(cid:48),k(cid:48)] closely align in ω space. This implies that their mode frequencies should be
-sufficiently close (|ω −ω | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
-nk n(cid:48)k(cid:48)
+sufficiently close (|ω
+nk
+−ω
+n(cid:48)k(cid:48)
+| ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
 ±ω is significant only over a few linewidths ((cid:15), the summation parameter; see eq 7). We have empirically found and
-tabulated δ in Table 1 for the radial order couplings n-n(cid:48) ∈ f-f, p -p , and p -p (the signal strength depends only
-1 1 2 2
+tabulated δ in Table 1 for the radial order couplings n-n(cid:48) ∈ f-f, p
+1
+-p
+1
+, and p
+2
+-p
+2
+(the signal strength depends only
 weakly on (cid:15); we set it to 3 line widths).
-Figure 1 shows that for any two adjacent ridges (adjacent n and n(cid:48)), mode frequencies ω and ω become spaced
-nk n(cid:48)k
-farther apart with increasing wavenumber kR . It is also known that mode linewidth Γ grows with radial orders for
+Figure 1 shows that for any two adjacent ridges (adjacent n and n(cid:48)), mode frequencies ω
+nk
+and ω
+n(cid:48)k
+become spaced
+farther apart with increasing wavenumber kR
 (cid:12)
-a given kR . Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
+. It is also known that mode linewidth Γ grows with radial orders for
+a given kR
 (cid:12)
-observation set the total number of modes within a range of kR (and ω/2π) that can be clearly observed, thereby
+. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
+observation set the total number of modes within a range of kR
 (cid:12)
+(and ω/2π) that can be clearly observed, thereby
 affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually
-inspectingthepower-spectrum),theparametersdescribingtheextentofcouplingoverdifferentrangesofkR atfixed
+inspectingthepower-spectrum),theparametersdescribingtheextentofcouplingoverdifferentrangesofkR
 (cid:12)
-radial order are different. In wavenumber, we restrict our analysis to within 200≤kR ≤2000 and qR ≤300. Our
-(cid:12) (cid:12)
+atfixed
+radial order are different. In wavenumber, we restrict our analysis to within 200≤kR
+(cid:12)
+≤2000 and qR
+(cid:12)
+≤300. Our
 frequency range is confined to span the range over which acoustic modes are observed (2≤ω/2π ≤5 in mHz).
 Imaging near-surface flows using mode-coupling analysis 5
-Coupling kR range # of δ
+Coupling kR
 (cid:12)
+range # of δ
 modes
 f-f [400,1000] 5240 4
 [1000,1500] 7784 1.1
 [1500,2000] 10940 0.4
-p -p [400,1000] 5240 4.5
-1 1
+p
+1
+-p
+1
+[400,1000] 5240 4.5
 [1000,1750] 12852 2
-p -p [200,1000] 5886 3
-2 2
+p
+2
+-p
+2
+[200,1000] 5886 3
 [1000,1300] 4280 3
 Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different
-ranges of kR .
+ranges of kR
 (cid:12)
+.
 3. INVERSION
 The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements
-B from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
+B
 k,q
+from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
 leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods
 complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas
 SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis
-functions f (z) (J (cid:28) M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000,
+functions f
 j
+(z) (J (cid:28) M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000,
 computation starts to quickly become expensive for SOLA.
 Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While
 f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is
-presenteveninp -p , andp -p (seeFigure3), andpossiblyotherhigherorderself-andcross-couplings. Sinceweare
-1 1 2 2
+presenteveninp
+1
+-p
+1
+, andp
+2
+-p
+2
+(seeFigure3), andpossiblyotherhigherorderself-andcross-couplings. Sinceweare
 interested in only surface flows, we leave higher order coupling to future work.
-It bears mentioning that the slopes of the ridges in the kR -ν spectrum (Figure 1) increase with radial order. This
+It bears mentioning that the slopes of the ridges in the kR
 (cid:12)
-limitsustolow-to-intermediatekR (<1000)forthesehigherradialordersifwearetoremainundertheacousticcut-
+-ν spectrum (Figure 1) increase with radial order. This
+limitsustolow-to-intermediatekR
 (cid:12)
+(<1000)forthesehigherradialordersifwearetoremainundertheacousticcut-
 offfrequencyof5.3mHz. Italsobecomesimperativetouseaspatiallylargerobservationpatchtogainaccesstosignals
-from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation.
+from low kR
 (cid:12)
+- too large an observation region could possibly render invalid the Cartesian geometry approximation.
 Regardless,inadditiontoperforminginversionsusingallthecouplingsstackedtogether,wealsodemonstrateinversions
 separatelyforthethreecouplings(seeTable2)inordertoaccountforthefullgamutofmode-couplingasasignal-rich
 helioseismic technique.
 3.1. RLS
 For given q, the forward problem may be stated as
 KU=B, (10)
+with the aim to minimize the misfit
 (cid:80)
-with the aim to minimize the misfit ||KU−B|| , with |||| denoting the L norm. Here, K is the matrix formed
-2 2 2
 k
-by the sensitivity kernels: {C ,D }. U is a vector composed of the flow coefficients: {P ,T } and B is a vector
-qj,k qj,k qj qj
-composed of computed B-coefficients: {B }. The least-squares problem is solved simultaneously for poloidal and
+||KU−B||
+2
+, with ||||
+2
+denoting the L
+2
+norm. Here, K is the matrix formed
+by the sensitivity kernels: {C
+qj,k
+,D
+qj,k
+}. U is a vector composed of the flow coefficients: {P
+qj
+,T
+qj
+} and B is a vector
+composed of computed B-coefficients: {B
 k,q
-toroidal flow. We use B-spline basis functions as our f (z), comprising 11 knots spaced uniformly in acoustic radius,
+}. The least-squares problem is solved simultaneously for poloidal and
+toroidal flow. We use B-spline basis functions as our f
 j
+(z), comprising 11 knots spaced uniformly in acoustic radius,
 for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis
 functionsforeachpoloidalandtoroidal,thedimensionsofK,UandBarethusM×22,22×1,andM×1respectively.
-Normalizingbothsidesofeq10bythenoisecovarianceΛ(adiagonalmatrixwiththeentriesG ;seeeq9;dimension
+Normalizingbothsidesofeq10bythenoisecovarianceΛ(adiagonalmatrixwiththeentriesG
 k,q
+;seeeq9;dimension
 M ×M) and pre-multiplying by K(cid:124),
-(K(cid:124) Λ−1K)U=(K(cid:124) Λ−1)B, (11)
-U=(K(cid:124) Λ−1K)−1K(cid:124) Λ−1B. (12)
+(K (cid:124) Λ−1K)U=(K (cid:124) Λ−1)B, (11)
+U=(K (cid:124) Λ−1K)−1K (cid:124) Λ−1B. (12)
 6 Mani et al.
-Figure 2. Left: Averagingkernelforpoloidalflow(seesectionB.2,eqB17,andleftpanelofFigure8)forqR =[−112,−45],
+Figure 2. Left: Averagingkernelforpoloidalflow(seesectionB.2,eqB17,andleftpanelofFigure8)forqR
 (cid:12)
-at the depth z = −0.41 Mm. Right: L-curve for the mode qR = [−112,−45]; the knee (λ = 2.48) is marked by a blue
-o (cid:12)
+=[−112,−45],
+at the depth z
+o
+= −0.41 Mm. Right: L-curve for the mode qR
+(cid:12)
+= [−112,−45]; the knee (λ = 2.48) is marked by a blue
 diamond.
+Since the least-squares problem is typically ill-posed, we restate the minimization as
 (cid:80)
-Since the least-squares problem is typically ill-posed, we restate the minimization as ||KU−B|| +λ||U|| with
-2 2
 k
+||KU−B||
+2
++λ||U||
+2
+with
 the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution
 norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the
 data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this
 regularization makes the problem better conditioned and is now defined as
-U=(K(cid:124) Λ−1K+λI)−1K(cid:124) Λ−1B, (13)
-where I is the identity matrix for L regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
+U=(K (cid:124) Λ−1K+λI)−1K (cid:124) Λ−1B, (13)
+where I is the identity matrix for L
 1
-by plotting ||U|| vs ||KU − B|| for different values of λ (see right panel of Figure 2), is usually chosen as the
-2 2
+regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
+by plotting ||U||
+2
+vs ||KU − B||
+2
+for different values of λ (see right panel of Figure 2), is usually chosen as the
 regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal
-flow P are shown in Figure 3.
+flow P
 q
+are shown in Figure 3.
 4. LCT
 Toimproveconfidenceintheimagednear-surfaceflowsthroughmode-coupling,wecomparethemwithflowsobtained
 from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by
-examining the advection of convective granules (1.2 Mm, qR ≈ 3500; Hathaway et al. 2015) by underlying larger-
+examining the advection of convective granules (1.2 Mm, qR
 (cid:12)
+≈ 3500; Hathaway et al. 2015) by underlying larger-
 scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈35 Mm),
 LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation.
 Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2
 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob-
 tained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between
-consecutive intensity images, which we denote as I ,I . The LCT method selects a patch in two images each
-1 2
-(I = I 1e(x−xij)2/2sigma2,I = I 2e(x−xij)2/2sigma2) that observe the same granule at the grid point x = (x i,y j).
-1 2 ij
+consecutive intensity images, which we denote as I
+1
+,I
+2
+. The LCT method selects a patch in two images each
+(I
+1
+= I
+1
+e(x−xij)2/2sigma2,I
+2
+= I
+2
+e(x−xij)2/2sigma2) that observe the same granule at the grid point x
+ij
+= (x
+i
+,y
+j
+).
 A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance
 moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in
-section 1.1. The two patches I ,I are then cross correlated for different values of position shifts ∆x,
-1 2
+section 1.1. The two patches I
+1
+,I
+2
+are then cross correlated for different values of position shifts ∆x,
+C
+ij
+(∆x,∆y)=
 (cid:90)
-C (∆x,∆y)= dxI∗(−x)I (∆x−x). (14)
-ij 1 2
-The shift ∆x = (∆x,∆y) that maximizes the cross-correlation C is taken to be the proper motion of the granule.
+dxI∗
+1
+(−x)I
+2
+(∆x−x). (14)
+The shift ∆x = (∆x,∆y) that maximizes the cross-correlation C
 ij
+is taken to be the proper motion of the granule.
 Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10
-min), the velocities are given by v = ∆x/∆t and v = ∆y/∆t. This exercise is repeated for all grid points in the
-x y
-images I ,I and for each consecutive pair of images in the cube.
-1 2
-In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing v and v . FLCT
-x y
+min), the velocities are given by v
+x
+= ∆x/∆t and v
+y
+= ∆y/∆t. This exercise is repeated for all grid points in the
+images I
+1
+,I
+2
+and for each consecutive pair of images in the cube.
+In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing v
+x
+and v
+y
+. FLCT
 requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the
 Imaging near-surface flows using mode-coupling analysis 7
-Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p -p , and p -p as a function of q R and
-1 1 2 2 x (cid:12)
-q R . Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the
-y (cid:12)
+Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p
+1
+-p
+1
+, and p
+2
+-p
+2
+as a function of q
+x
+R
+(cid:12)
+and
+q
+y
+R
+(cid:12)
+. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the
 mean. Total power appears to increase through the radial orders. Power is in units of m2/s4.
 dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the
-FLCT code. v and v are then computed for consecutive pairs of images and are averaged over the entire day.
-x y
+FLCT code. v
+x
+and v
+y
+are then computed for consecutive pairs of images and are averaged over the entire day.
 5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY
 For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by
 substituting P and T from eq 3 into eq 2 as below -
-uuu(q,z)=−∇2Pe +∇(∂ P)+∇ T×e ,
-z z h z
-=−(0, 0, ∂2P +∂2P +∂2P)+(∂ ∂ P, ∂ ∂ P, ∂2P)+(∂ T, −∂ T, 0). (15)
-x y z x z y z z y x
-Setting ∂2+∂2 =q2, div is given by,
-x y
-∇ ·uuu(q,z)=q2∂ P, (16)
-h z
+uuu(q,z)=−∇2Pe
+z
++∇(∂
+z
+P)+∇
+h
+T×e
+z
+,
+=−(0, 0, ∂2
+x
+P +∂2
+y
+P +∂2
+z
+P)+(∂
+x
+∂
+z
+P, ∂
+y
+∂
+z
+P, ∂2
+z
+P)+(∂
+y
+T, −∂
+x
+T, 0). (15)
+Setting ∂2
+x
++∂2
+y
+=q2, div is given by,
+∇
+h
+·uuu(q,z)=q2∂
+z
+P, (16)
 and curl is given by,
-(cid:104) (cid:105)
-∇×uuu(q,z) =q2T. (17)
+(cid:104)
+∇×uuu(q,z)
+(cid:105)
 z
+=q2T. (17)
 We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The
-essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of
+essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR
 (cid:12)
+of
 interest (see Figure 4), and subsequently convert it to real space.
-We seek to show comparisons (see Figures 5, 6, and 7) for qR = 100, 150, 200 and 250. To sufficiently delineate
+We seek to show comparisons (see Figures 5, 6, and 7) for qR
 (cid:12)
+= 100, 150, 200 and 250. To sufficiently delineate
 flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The
 Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to
 obtain a real-space steady-flow map.
 8 Mani et al.
 Figure 4. Left: Divergence-flow power spectrum |div|2, from eqn 16, obtained from inversion using all the couplings. The
-power-spectrum is then filtered with a bandpass centered around qR =150 (middle panel). The resulting spectra is shown in
+power-spectrum is then filtered with a bandpass centered around qR
 (cid:12)
+=150 (middle panel). The resulting spectra is shown in
 the right panel. The units of |div|2 are in s−2. For illustration, we show the action of the filter on the power-spectrum |div|2
 since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter.
-For LCT, we first apply a Gaussian smoothing to v and v to average over small-scale features; the extent of
-x y
-smoothing depends on the length scale qR to be compared with mode-coupling. div and curl are then simply
+For LCT, we first apply a Gaussian smoothing to v
+x
+and v
+y
+to average over small-scale features; the extent of
+smoothing depends on the length scale qR
 (cid:12)
+to be compared with mode-coupling. div and curl are then simply
 computed by
-div =∂ v +∂ v , (18)
-x x y y
-curl=∂ v −∂ v . (19)
-x y y x
+div =∂
+x
+v
+x
++∂
+y
+v
+y
+, (18)
+curl=∂
+x
+v
+y
+−∂
+y
+v
+x
+. (19)
 We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling,
 and transform back to real space.
 Condensing all of the above, the following sequence of operations to compare flows at desired length scales are
 performed for mode-coupling (M-C) and for LCT -
-M-C: φ(x,y;t)=3 =D =F =F =T ⇒φω,B =i =nv =e =rs =io =n ⇒P,T =∇ ==h⇒· eqns 16, 17==F =ilt =e =r, ⇒div,curl
-k k,q
-∇× 2DFFT
+M-C: φ(x,y;t) 3DFFT =====⇒φω
+k
+,B
+k,q
+inversion ======⇒P,T ∇h· ===⇒
+∇×
+eqns 16, 17 Filter, =====⇒
+2DFFT
+div,curl
+LCT: I 1 ,I 2
+FLCT
+====⇒v x ,v y
+smooth,
+======⇒
+∇h· ∇×
+eqns 18, 19
+2DFFT,
+======⇒
+Filter
 Filtered,
-FLCT smooth, 2DFFT, 2DFFT
-LCT: I 1,I ====⇒v x,v ======⇒eqns 18, 19======⇒Fourier-space =====⇒div,curl
-2 y
-∇h· ∇× Filter
+Fourier-space
 flows
+2DFFT
+=====⇒div,curl
 6. RESULTS
 Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5,
 where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from
-the two methods near supergranular scale (qR ≈ 100). Near-surface flows are imaged most faithfully when all the
+the two methods near supergranular scale (qR
 (cid:12)
+≈ 100). Near-surface flows are imaged most faithfully when all the
 couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of
 vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between
 the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence
 flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to
-insufficientmodesforthep -p case(seeTable1),weareunabletoinfervorticalflowswithconvictionotherthannear
-2 2
+insufficientmodesforthep
+2
+-p
+2
+case(seeTable1),weareunabletoinfervorticalflowswithconvictionotherthannear
 the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished
-throughmode-couplinghelioseismology-usingf-forp -p alonetoseismicallyinfernear-surfacedivergenceandvortical
-1 1
-flows at different scales (qR = 100,150) can yield extremely good agreement with LCT. As the length scale of the
+throughmode-couplinghelioseismology-usingf-forp
+1
+-p
+1
+alonetoseismicallyinfernear-surfacedivergenceandvortical
+flows at different scales (qR
 (cid:12)
+= 100,150) can yield extremely good agreement with LCT. As the length scale of the
 inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases.
 An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to
 comment substantively on the flows at these scales.
@@ -427,15 +843,19 @@ Imaging near-surface flows using mode-coupling analysis 9
 (a)qR(cid:12)=100,f-f+p1-p1 +p2-p2
 Figure5. Real-spacedivergenceflows(leftcolumn,inunitsof10−5s−1)andradialvorticity(rightcolumn,inunitsof10−6s−1)
 for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around
-qR =100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges
+qR
 (cid:12)
+=100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges
 outfromtheflowmapsandcompareacircularregionofdiameter≈175Mm. Theslopesofthebest-fitlinethroughthescatter
 plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum
 values.
 For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated
 numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward
-a precise statement on them. H21 reported a 60% greater amplitude for p -p over f-f coupling (Figure 3 reflects a
-1 1
+a precise statement on them. H21 reported a 60% greater amplitude for p
+1
+-p
+1
+over f-f coupling (Figure 3 reflects a
 similar conclusion), another element to consider when combining different radial orders. The choice of regularization
 (see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow
 amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages.
@@ -447,51 +867,76 @@ Thus, theamplitudesofthemode-couplingflows(andthecorrelationcoefficient)dependup
 • Smoothing applied to LCT flows (indirectly; see below paragraph),
 • The depth at which flows are inferred.
 Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close
-to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR , we first fix the coupling(s)
+to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR
 (cid:12)
+, we first fix the coupling(s)
 and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and
 10 Mani et al.
 (a)qR(cid:12)=100,f-f (b)qR(cid:12)=150,p1-p1
 Figure6. Real-spacedivergenceflows(leftcolumn,inunitsof10−5s−1)andradialvorticity(rightcolumn,inunitsof10−6s−1)
 for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around
-qR =100, and using (b) p -p coupling (bottom row), bandpass filtered around qR =150. We cut edges out from the flow
-(cid:12) 1 1 (cid:12)
+qR
+(cid:12)
+=100, and using (b) p
+1
+-p
+1
+coupling (bottom row), bandpass filtered around qR
+(cid:12)
+=150. We cut edges out from the flow
 maps and compare a circular region of diameter ≈175 Mm.
 (a)qR(cid:12)=200,f-f+p1-p1 +p2-p2 (b)qR(cid:12)=250,f-f+p1-p1 +p2-p2
 Figure7. Real-spacedivergenceflows(leftcolumn,inunitsof10−5s−1)andradialvorticity(rightcolumn,inunitsof10−6s−1)
 for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around
-(a)qR =200,and(b)qR =250. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter≈175Mm.
-(cid:12) (cid:12)
+(a)qR
+(cid:12)
+=200,and(b)qR
+(cid:12)
+=250. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter≈175Mm.
 vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained
 from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation
 (corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired
-qR .
+qR
 (cid:12)
+.
 Ithasbeenshown(seeDeRosa&Toomre2004;Langfellneretal.2015)thatline-of-sightvelocityfromDopplergrams
 and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes
 fordivergenceflowsowingtothemulti-stepprocessinvolvedinobtainingthem. Forexample, therehasbeenahistory
 (see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda
-& Sˇvanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al.
+& ˇ Svanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al.
 Imaging near-surface flows using mode-coupling analysis 11
-Coupling qR div curl
+Coupling qR
 (cid:12)
+div curl
 f-f 100 0.97 0.87
-+ p -p 150 0.95 0.76
-1 1
-+ p -p 200 0.92 0.76
-2 2
++ p
+1
+-p
+1
+150 0.95 0.76
++ p
+2
+-p
+2
+200 0.92 0.76
 250 0.85 0.65
 f-f 100 0.96 0.85
 150 0.93 0.76
 200 0.89 0.69
 250 0.77 0.58
-p -p 100 0.95 0.83
-1 1
+p
+1
+-p
+1
+100 0.95 0.83
 150 0.95 0.75
 200 0.92 0.75
 250 0.85 0.61
-p -p 100 0.94 0.7
-2 2
+p
+2
+-p
+2
+100 0.94 0.7
 150 0.91 0.39
 200 0.79 0.3
 250 0.55 0.3
@@ -510,105 +955,338 @@ applications to investigate other depth- and time-varying features such as giant
 Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves.
 APPENDIX
 A. DERIVATION OF THE FORWARD MODEL
-As described in section 1.1, we seek to describe the flow u as a function of q along e . To that end, substituting
+As described in section 1.1, we seek to describe the flow u as a function of q along e
 z
+. To that end, substituting
 eq 3 into eq 2,
-uσ(z)=(cid:88)(cid:8) q2f e +iqf(cid:48)(cid:9) Pσ +iq×e f Tσ. (A1)
-q j z j jq z j jq
+uσ
+q
+(z)= (cid:88)
 j
+(cid:8) q2f
+j
+e
+z
++iqf(cid:48)
+j
+(cid:9) Pσ
+jq
++iq×e
+z
+f
+j
+Tσ
+jq
+. (A1)
 For flows in the anelastic limit (u (cid:28) speed of sound), we can denote the flow perturbation operator as δLσ =
 −2iωρuσ·∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
-δLσ = −2iωρ(iuσ·k+uσ·e ∂ ), (A2)
-q q q z z
-=−2iωρ(cid:80)(cid:8) −k·qf(cid:48)Pσ −k·(q×e )f Tσ +q2f Pσ ∂ (cid:9) . (A3)
-j jq z j jq j jq z
+δLσ
+q
+= −2iωρ(iuσ
+q
+·k+uσ
+q
+·e
+z
+∂
+z
+), (A2)
+=−2iωρ (cid:80)
+j
+(cid:8) −k·qf(cid:48)
+j
+Pσ
+jq
+−k·(q×e
+z
+)f
+j
+Tσ
+jq
++q2f
 j
+Pσ
+jq
+∂
+z
+(cid:9) . (A3)
 12 Mani et al.
 Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006)
-ξ ≡ξ (z)=ikˆH (z)e +zˆV (z), (A4)
-k nk nk z nk
+ξ
+k
+≡ξ
+nk
+(z)=iˆ kH
+nk
+(z)e
+z
++ˆ zV
+nk
+(z), (A4)
 where H and V are real-valued functions; n and n(cid:48) are dropped for compactness of notation. Then the coupling of
-two modes ξ and ξ (k(cid:48) = k+q), by the flow perturbation operator δLσ, denoted by coupling integral Λk (σ), is
-k k(cid:48) q k(cid:48)
+two modes ξ
+k
+and ξ
+k(cid:48)
+(k(cid:48) = k+q), by the flow perturbation operator δLσ
+q
+, denoted by coupling integral Λk
+k(cid:48)
+(σ), is
 given by
+Λk
+k(cid:48)
+(σ)≡ (cid:90) dx(δLσ
+q
+ξ
+k
+)·ξ∗
+k(cid:48)
+= (cid:90) dx
 (cid:34)
-Λk (σ)≡(cid:90) dx(δLσξ )·ξ∗ =(cid:90) dx −2iωρ(cid:88)(cid:110) q2f Pσ (kˆ·kˆ(cid:48) H(cid:48)H∗ +V(cid:48)V∗)
-k(cid:48) q k k(cid:48) j jq k k(cid:48) k k(cid:48)
+−2iωρ (cid:88)
+j
+(cid:110) q2f
 j
+Pσ
+jq
+(ˆ k·ˆ k (cid:48) H(cid:48)
+k
+H∗
+k(cid:48)
++V(cid:48)
+k
+V∗
+k(cid:48)
+)
+− (cid:2) k·qf(cid:48)
+j
+Pσ
+jq
++k·(q×e
+z
+)f
+j
+Tσ
+jq
+(cid:3) (ˆ k·ˆ k (cid:48) H
+k
+H∗
+k(cid:48)
++V
+k
+V∗
+k(cid:48)
+) (cid:111)
 (cid:35)
-−(cid:2) k·qf(cid:48)Pσ +k·(q×e )f Tσ(cid:3) (kˆ·kˆ(cid:48) H H∗ +V V∗)(cid:111) (A5)
-j jq z j jq k k(cid:48) k k(cid:48)
+(A5)
 We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and
-toroidal sensitivity kernels, C and D respectively. Hence, they are given by
-qj,k qj,k
-C =(cid:90) dzρ(cid:104) q2f (kˆ·kˆ(cid:48) H(cid:48)H∗ +V(cid:48)V∗)
-qj,k j k k(cid:48) k k(cid:48)
-−k·qf(cid:48)(kˆ·kˆ(cid:48) H H∗ +V V∗)(cid:105) ,
-j k k(cid:48) k k(cid:48)
+toroidal sensitivity kernels, C
+qj,k
+and D
+qj,k
+respectively. Hence, they are given by
+C
+qj,k
+= (cid:90) dzρ (cid:104) q2f
+j
+(ˆ k·ˆ k (cid:48) H(cid:48)
+k
+H∗
+k(cid:48)
++V(cid:48)
+k
+V∗
+k(cid:48)
+)
+−k·qf(cid:48)
+j
+(ˆ k·ˆ k (cid:48) H
+k
+H∗
+k(cid:48)
++V
+k
+V∗
+k(cid:48)
+) (cid:105) ,
+D
+qj,k
+=k·(q×e
+z
+)
 (cid:90)
-D =k·(q×e ) dzρf (kˆ·kˆ(cid:48) H H∗ +V V∗). (A6)
-qj,k z j k k(cid:48) k k(cid:48)
-Note the symmetry C = C and D = D . This coupling integral contributes to the cross-spectral
-qj,k −qj,−k qj,k −qj,−k
+dzρf
+j
+(ˆ k·ˆ k (cid:48) H
+k
+H∗
+k(cid:48)
++V
+k
+V∗
+k(cid:48)
+). (A6)
+Note the symmetry C
+qj,k
+= C
+−qj,−k
+and D
+qj,k
+= D
+−qj,−k
+. This coupling integral contributes to the cross-spectral
 measurement between modes k and k+q From eq 8 of Woodard (2014), we write the first-order effect of flow on
 wavefield cross-correlation as
-(cid:104)φω∗φω+σ(cid:105)=Hω Λk (σ), (A7)
-k k+q kk(cid:48)σ k(cid:48)
+(cid:104)φω∗
+k
+φω+σ
+k+q
+(cid:105)=Hω
+kk(cid:48)σ
+Λk
+k(cid:48)
+(σ), (A7)
 where the function H is given by
-Hω =−2iω(N |Rω|2Rω+σ+N |Rω+σ|2Rω∗). (A8)
-kk(cid:48)σ k k k(cid:48) k(cid:48) k(cid:48) k
+Hω
+kk(cid:48)σ
+=−2iω(N
+k
+|Rω
+k
+|2Rω+σ
+k(cid:48)
++N
+k(cid:48)
+|Rω+σ
+k(cid:48)
+|2Rω∗
+k
+). (A8)
 We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4.
 The mode spectral profile R is a Lorentzian, given by
+Rω
+k
+=
 1
-Rω = , (A9)
-k ω2 −ω2−iωγ /2
-nk nk
-whereω istheresonantfrequencyofthemode,andγ isthemodelinewidth. EqA9canbederivedbyintroducing
-nk nk
+ω2
+nk
+−ω2−iωγ
+nk
+/2
+, (A9)
+whereω
+nk
+istheresonantfrequencyofthemode,andγ
+nk
+isthemodelinewidth. EqA9canbederivedbyintroducing
 mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq
 5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation.
-Also, the parity Hω =H−ω∗ and Rω =R−ω∗ are established. Mode normalization N is given by
-kk(cid:48)σ kk(cid:48)−σ k k
-(cid:80) |φω k|2
-(cid:88)Q
-1
-N = ω , (A10)
-k Q (cid:80) Rω
-k k
+Also, the parity Hω
+kk(cid:48)σ
+=H−ω∗
+kk(cid:48)−σ
+and Rω
+k
+=R−ω∗
+k
+are established. Mode normalization N is given by
+N
+k
+= 1
+Q
+Q (cid:88)
+k
+(cid:80)
 ω
+|φω
+k
+|2
+(cid:80)
+ω
+Rω
+k
+, (A10)
+where the 1
+Q
 Q
-where the 1 (cid:80) on the right-hand-side implies average over all [k ,k ] (Q terms in all) such that k = |k| is constant.
-Q x y
+(cid:80)
 k
-This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω .
+on the right-hand-side implies average over all [k
+x
+,k
+y
+] (Q terms in all) such that k = |k| is constant.
+This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω
 nk
+.
 Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real.
 The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve
-to establish the parity Bσ = B∗−σ . This allows for obtaining Pσ = P∗−σ, and subsequently, purely real flow in
-k,q −k,−q q −q
+to establish the parity Bσ
+k,q
+= B∗−σ
+−k,−q
+. This allows for obtaining Pσ
+q
+= P∗−σ
+−q
+, and subsequently, purely real flow in
 the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into
-the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ =G−σ .
-k,q −k,−q
+the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ
+k,q
+=G−σ
+−k,−q
+.
 Imaging near-surface flows using mode-coupling analysis 13
 B. SOLA INVERSIONS
 SubtractiveOptimallyLocalizedAverages(SOLA,Pijpers&Thompson1994)aimstoobtainasetofweightfactors
-for the mode q and depth z , which we will call α . A linear weighted sum of the measurements B in the fashion
-o k,zo k,q
+for the mode q and depth z
+o
+, which we will call α
+k,zo
+. A linear weighted sum of the measurements B
+k,q
+in the fashion
 (cid:80)
-α B allows for an average value of the flow P (z) to be estimated at the depth z . To obtain the coefficients
-k,zo k,q q o
 k
-α , it is assumed that a set of sensitivity kernels K (z) for the mode q can be summed up coherently to give an
-k,zo k,q
-’averaging kernel’ thatis localized atthe depth z . Conventionally, a Gaussiancenteredat z and awidth ∆is chosen
-o o
+α
+k,zo
+B
+k,q
+allows for an average value of the flow P
+q
+(z) to be estimated at the depth z
+o
+. To obtain the coefficients
+α
+k,zo
+, it is assumed that a set of sensitivity kernels K
+k,q
+(z) for the mode q can be summed up coherently to give an
+’averaging kernel’ thatis localized atthe depth z
+o
+. Conventionally, a Gaussiancenteredat z
+o
+and awidth ∆is chosen
 which the averaging kernel should resemble after performing inversion.
 B.1. Kernels in the integral form
-Since the kernels in eq A6 are manifest as coefficients on a basis f (z), we first derive kernels that can be expressed
+Since the kernels in eq A6 are manifest as coefficients on a basis f
 j
+(z), we first derive kernels that can be expressed
 as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions:
-P ≡ P (z), p ≡ P , F ≡ f (z), B ≡ B C ≡ C and K ≡ K (z), we write (assume only poloidal flow for
-q qj j k,q qj,k k,q
+P ≡ P
+q
+(z), p ≡ P
+qj
+, F ≡ f
+j
+(z), B ≡ B
+k,q
+C ≡ C
+qj,k
+and K ≡ K
+k,q
+(z), we write (assume only poloidal flow for
 simplicity, the same derivations hold true for toroidal flow as well)
 P =Fp (B11)
 The size of P is thus the same as the length of the radial grid z.
@@ -621,171 +1299,297 @@ B =Cp
 =KP (B13)
 where
 K =(FTF)−1FTC,
-(cid:88)(cid:104)(cid:90) (cid:105)−1
-i.e., K (z)= dzf (z)f (z) f (z)C (B14)
-k,q j j(cid:48) j(cid:48) qj(cid:48),k
+i.e., K
+k,q
+(z)=
+(cid:88)
 j,j(cid:48)
+(cid:104)(cid:90)
+dzf
+j
+(z)f
+j(cid:48)
+(z)
+(cid:105)−1
+f
+j(cid:48)
+(z)C
+qj(cid:48),k
+(B14)
 B.2. Obtaining the coefficients α
 Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z
 o
-1 (cid:16)z−z (cid:17)
-T(z,z )= √ exp o . (B15)
-o 2π∆2 2∆2
+T(z,z
+o
+)=
+1
+√
+2π∆2
+exp
+(cid:16)z−z
+o
+2∆2
+(cid:17)
+. (B15)
 This can be achieved by solving the optimization problem
-(cid:90) (cid:104) (cid:105)2
-minimizeX = dz T(z,z )−Θ (z,z ) , (B16)
-o q o
+minimizeX =
+(cid:90)
+dz
+(cid:104)
+T(z,z
+o
+)−Θ
+q
+(z,z
+o
+)
+(cid:105)2
+, (B16)
 where we introduce the averaging kernel for mode q thus
+Θ
+q
+(z,z
+o
+)=
 (cid:88)
-Θ (z,z )= α K (z). (B17)
-q o k,zo k,q
 k
+α
+k,zo
+K
+k,q
+(z). (B17)
 As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13
 and B14.
 14 Mani et al.
-Figure 8. Left: Kernel K (z) (eq B14) shown vs depth z for the three radial order couplings f-f, p -p , and p -p . qR =
-k,q 1 1 2 2 (cid:12)
-[−112,−45] and kR = [−853,−157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel
+Figure 8. Left: Kernel K
+k,q
+(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p
+1
+-p
+1
+, and p
+2
+-p
+2
+. qR
 (cid:12)
-(eq B17) using SOLA, for qR = [−112,−45] at depth z = −0.48 Mm, and the corresponding target Gaussian (eq B15).
-(cid:12) 0
+=
+[−112,−45] and kR
+(cid:12)
+= [−853,−157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel
+(eq B17) using SOLA, for qR
+(cid:12)
+= [−112,−45] at depth z
+0
+= −0.48 Mm, and the corresponding target Gaussian (eq B15).
 Integral of the averaging kernel over z is 0.89.
-Setting ∂X →0 gives us the matrix problem to be solved
+Setting ∂X
 ∂α
+→0 gives us the matrix problem to be solved
 A{α}=v,
-(cid:104) (cid:105)−1
-{α}= A+µI v, (B18)
-where the square matrix A=(cid:82) dzK (z)K (z) and v =(cid:82) dzK (z)T(z,z ). Here, k(cid:48) is just a dummy index for
-k,q k(cid:48),q k,q o
+{α}=
+(cid:104)
+A+µI
+(cid:105)−1
+v, (B18)
+where the square matrix A= (cid:82) dzK
+k,q
+(z)K
+k(cid:48),q
+(z) and v = (cid:82) dzK
+k,q
+(z)T(z,z
+o
+). Here, k(cid:48) is just a dummy index for
 denotingelementsinthematrixA,(k(cid:48) (cid:54)=k+q). InthelastlineofeqB18,weintroduceregularizationusinganIdentity
 matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining
 α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α
+obtained from eq B18 into last line of eq B13, and
 (cid:80)
-obtained from eq B18 into last line of eq B13, and on both sides
 k
+on both sides
+(cid:88)
+k
+α
+k,zo
+Bσ
+k,q
+=
+(cid:88)
+k
+α
+k,zo
 (cid:90)
-(cid:88) (cid:88)
-α Bσ = α dzK (z)Pσ(z),
-k,zo k,q k,zo k,q q
-k k
+dzK
+k,q
+(z)Pσ
+q
+(z),
+=
 (cid:90)
-= dzΘ (z,z )Pσ(z),
-q o q
-≈(cid:104)Pσ(z )(cid:105) (B19)
-q o
+dzΘ
+q
+(z,z
+o
+)Pσ
+q
+(z),
+≈(cid:104)Pσ
+q
+(z
+o
+)(cid:105) (B19)
 Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di-
 vergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10.
 REFERENCES
-Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. Bo¨ning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
-1990, ApJ, 364, 699, doi: 10.1086/169452 Schou, J. 2020, A&A, 635, A181,
-Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of doi: 10.1051/0004-6361/201937331
+Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M.
+1990, ApJ, 364, 699, doi: 10.1086/169452
+Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of
 Modern Physics, 64, 885,
-Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189,
 doi: 10.1103/RevModPhys.64.885
-doi: 10.1086/324323
 Birch, A. C., Schunker, H., Braun, D. C., et al. 2016,
-Christensen-Dalsgaard, J. 2002, Reviews of Modern
 Science Advances, 2, e1600557,
-Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073
 doi: 10.1126/sciadv.1600557
-Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, —. 2021, Living Reviews in Solar Physics, 18, 2,
-A&A, 628, A37, doi: 10.1051/0004-6361/201935591 doi: 10.1007/s41116-020-00028-3
+Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019,
+A&A, 628, A37, doi: 10.1051/0004-6361/201935591
+B¨ oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
+Schou, J. 2020, A&A, 635, A181,
+doi: 10.1051/0004-6361/201937331
+Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189,
+doi: 10.1086/324323
+Christensen-Dalsgaard, J. 2002, Reviews of Modern
+Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073
+—. 2021, Living Reviews in Solar Physics, 18, 2,
+doi: 10.1007/s41116-020-00028-3
 Imaging near-surface flows using mode-coupling analysis 15
-Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of q R and q R . Right: Corresponding power-spectrum
-x (cid:12) y (cid:12)
+Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of q
+x
+R
+(cid:12)
+and q
+y
+R
+(cid:12)
+. Right: Corresponding power-spectrum
 averaged over the azimuthal angle. Shaded region shows ±1−σ error around the mean. Power is in units of m2/s4.
 Figure 10. Real-space divergence flows (in units of 10−5s−1) for mode-coupling inversion through SOLA using f-f coupling,
-andLCT,bandpassfilteredaroundqR =100. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter
+andLCT,bandpassfilteredaroundqR
 (cid:12)
+=100. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter
 ≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is
 1.05. For demonstration, we show inversions only for poloidal flow using SOLA.
-De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A,
-192, 351, doi: 10.1023/A:1005269001739 652, L6, doi: 10.1051/0004-6361/202141462
-De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ,
-doi: 10.1086/424920 824, 128, doi: 10.3847/0004-637X/824/2/128
-Duvall,T.L.,J.,&Harvey,J.W.1986,inNATOAdvanced Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32,
+De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh,
+192, 351, doi: 10.1023/A:1005269001739
+De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242,
+doi: 10.1086/424920
+Duvall,T.L.,J.,&Harvey,J.W.1986,inNATOAdvanced
 Study Institute (ASI) Series C, Vol. 169, Seismology of
-doi: 10.3847/2041-8213/aaff60
 the Sun and the Distant Stars, ed. D. O. Gough, 105–116
-Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020,
 Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., &
-Science Advances, 6, eaba9639,
 Pomerantz, M. A. 1993, Nature, 362, 430,
-doi: 10.1126/sciadv.aba9639
 doi: 10.1038/362430a0
-Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., &
 Fisher, G. H., & Welsch, B. T. 2008, in Astronomical
-Sreenivasan, K. R. 2017, MNRAS, 470, 1404,
 Society of the Pacific Conference Series, Vol. 383,
-doi: 10.1093/mnras/stx1298
 Subsurface and Atmospheric Influences on Solar Activity,
-Hansen, P. C. 1992, SIAM review, 34, 561
 ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, &
-Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., &
 G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289
-Sreenivasan, K. R. 2020, A&A, 644, A103,
 Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S.
-doi: 10.1051/0004-6361/202039108
 1997, Nature, 390, 52, doi: 10.1038/36294
-Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021,
-doi: 10.1086/423367 ApJ, 910, 156, doi: 10.3847/1538-4357/abe770
-Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I.
-Science, 368, 1469, doi: 10.1126/science.aaz7119 2015, ApJ, 811, 105, doi: 10.1088/0004-637X/811/2/105
+Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472,
+doi: 10.1086/423367
+Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020,
+Science, 368, 1469, doi: 10.1126/science.aaz7119
+Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A,
+652, L6, doi: 10.1051/0004-6361/202141462
+Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ,
+824, 128, doi: 10.3847/0004-637X/824/2/128
+Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32,
+doi: 10.3847/2041-8213/aaff60
+Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020,
+Science Advances, 6, eaba9639,
+doi: 10.1126/sciadv.aba9639
+Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., &
+Sreenivasan, K. R. 2017, MNRAS, 470, 1404,
+doi: 10.1093/mnras/stx1298
+Hansen, P. C. 1992, SIAM review, 34, 561
+Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., &
+Sreenivasan, K. R. 2020, A&A, 644, A103,
+doi: 10.1051/0004-6361/202039108
+Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021,
+ApJ, 910, 156, doi: 10.3847/1538-4357/abe770
+Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I.
+2015, ApJ, 811, 105, doi: 10.1088/0004-637X/811/2/105
 16 Mani et al.
-Hathaway,D.H.,Upton,L.,&Colegrove,O.2013,Science, Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar
-342, 1217, doi: 10.1126/science.1244682 Physics, 15, 6, doi: 10.1007/s41116-018-0013-5
-Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807 Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord,
-M. 2017, A&A, 599, A69,
+Hathaway,D.H.,Upton,L.,&Colegrove,O.2013,Science,
+342, 1217, doi: 10.1126/science.1244682
+Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807
 Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard,
-doi: 10.1051/0004-6361/201629747
 M. F., & Tromp, J. 2021, ApJS, 253, 47,
-Schad, A., & Roth, M. 2020, ApJ, 890, 32,
 doi: 10.3847/1538-4365/abdf5e
+Korda, D., & ˇ Svanda, M. 2021, A&A, 646, A184,
+doi: 10.1051/0004-6361/202039928
+Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617,
+A97, doi: 10.1051/0004-6361/201732471
+Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581,
+A67, doi: 10.1051/0004-6361/201526024
+Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical
+Transactions of the Royal Society of London Series A,
+339, 431, doi: 10.1098/rsta.1992.0048
+Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261,
+doi: 10.1023/A:1005227200911
+L¨ optien, B., Birch, A. C., Duvall, T. L., Gizon, L., &
+Schou, J. 2016, A&A, 587, A9,
+doi: 10.1051/0004-6361/201526805
+L¨ optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature
+Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x
+Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125,
+doi: 10.3847/1538-4357/ab7227
+Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652,
+A96, doi: 10.1051/0004-6361/202141044
+Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139,
+doi: 10.3847/1538-4357/abb133
+—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6
+November, L. J., & Simon, G. W. 1988, ApJ, 333, 427,
+doi: 10.1086/166758
+Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231
+Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., &
+Stein, R. 2001, A&A, 377, L14,
+doi: 10.1051/0004-6361:20011160
+Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar
+Physics, 15, 6, doi: 10.1007/s41116-018-0013-5
+Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord,
+M. 2017, A&A, 599, A69,
+doi: 10.1051/0004-6361/201629747
+Schad, A., & Roth, M. 2020, ApJ, 890, 32,
 doi: 10.3847/1538-4357/ab65ec
-Korda, D., & Sˇvanda, M. 2021, A&A, 646, A184,
 Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh,
-doi: 10.1051/0004-6361/202039928
 275, 207, doi: 10.1007/s11207-011-9834-2
-Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617, Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505,
-A97, doi: 10.1051/0004-6361/201732471 390, doi: 10.1086/306146
-Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581, Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed.
-A67, doi: 10.1051/0004-6361/201526024 J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389
+Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505,
+390, doi: 10.1086/306146
+Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed.
+J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389
 Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ,
-Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical
 59, S637, doi: 10.1093/pasj/59.sp3.S637
-Transactions of the Royal Society of London Series A,
 Snodgrass, H. B. 1984, SoPh, 94, 13,
-339, 431, doi: 10.1098/rsta.1992.0048
 doi: 10.1007/BF00154804
-Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261,
 Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996,
-doi: 10.1023/A:1005227200911
 Science, 272, 1300, doi: 10.1126/science.272.5266.1300
-Lo¨ptien, B., Birch, A. C., Duvall, T. L., Gizon, L., &
 Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H.
-Schou, J. 2016, A&A, 587, A9,
 1989, Nonradial oscillations of stars
-doi: 10.1051/0004-6361/201526805 Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555,
-Lo¨ptien, B., Gizon, L., Birch, A. C., et al. 2018, Nature A136, doi: 10.1051/0004-6361/201321628
-Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x Vorontsov, S. V. 2011, MNRAS, 418, 1146,
-Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125, doi: 10.1111/j.1365-2966.2011.19564.x
-doi: 10.3847/1538-4357/ab7227 Woodard, M. 2014, SoPh, 289, 1085,
+Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555,
+A136, doi: 10.1051/0004-6361/201321628
+Vorontsov, S. V. 2011, MNRAS, 418, 1146,
+doi: 10.1111/j.1365-2966.2011.19564.x
+Woodard, M. 2014, SoPh, 289, 1085,
 doi: 10.1007/s11207-013-0386-5
-Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652,
 Woodard, M., Schou, J., Birch, A. C., & Larson, T. P.
-A96, doi: 10.1051/0004-6361/202141044
 2013, SoPh, 287, 129, doi: 10.1007/s11207-012-0075-9
-Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139,
 Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206
-doi: 10.3847/1538-4357/abb133
 —. 2006, ApJ, 649, 1140, doi: 10.1086/506927
-—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6
 —. 2007, ApJ, 668, 1189, doi: 10.1086/521391
-November, L. J., & Simon, G. W. 1988, ApJ, 333, 427, —. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223
-doi: 10.1086/166758 Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007,
-Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 ApJ, 659, 848, doi: 10.1086/512009
-Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., & Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G.,
+—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223
+Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007,
+ApJ, 659, 848, doi: 10.1086/512009
+Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G.,
 & Duvall, T. L., J. 2012, ApJL, 749, L5,
-Stein, R. 2001, A&A, 377, L14,
 doi: 10.1088/2041-8205/749/1/L5
-doi: 10.1051/0004-6361:20011160
diff --git a/read/results/pdfplumber/2201.00200.txt b/read/results/pdfplumber/2201.00200.txt
index 9782285..33b7ed3 100644
--- a/read/results/pdfplumber/2201.00200.txt
+++ b/read/results/pdfplumber/2201.00200.txt
@@ -4,92 +4,134 @@ Local heating due to convective overshooting and the solar
 modelling problem
 I.Baraffe1,2,T.Constantino1,J.Clarke1,A.LeSaux1,2,T.Goffrey4,T.Guillet1,J.Pratt3,D.G.Vlaykov1
 1 UniversityofExeter,PhysicsandAstronomy,EX44QLExeter,UK(e-mail:i.baraffe@ex.ac.uk)
-2 E´coleNormaleSupe´rieure,Lyon,CRAL(UMRCNRS5574),Universite´deLyon,France
+2 ´ EcoleNormaleSup´ erieure,Lyon,CRAL(UMRCNRS5574),Universit´ edeLyon,France
 3 DepartmentofPhysicsandAstronomy,GeorgiaStateUniversity,AtlantaGA30303,USA
 4 CentreforFusion,SpaceandAstrophysics,DepartmentofPhysics,UniversityofWarwick,Coventry,CV47AL,UK
 ABSTRACT
-2202
 Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary
 oftheconvectiveenvelopemodifythethermalbackgroundintheovershootinglayer.Basedontheseresults,weimplementinone-
 dimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a
 solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a
-naJ
 localheatingandsmoothingofthetemperaturegradientbelowtheconvectiveboundary.Weshowthatintroducinglocalheatingin
 theovershootinglayercanreducethesound-speeddiscrepancyusuallyreportedbetweensolarmodelsandthestructureoftheSun
 inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the
-speedofsound.Theseeffectscouldhelpreducethediscrepanciesbetweensolarmodelsandobservedconstraintsbasedonseismic 1
+speedofsound.Theseeffectscouldhelpreducethediscrepanciesbetweensolarmodelsandobservedconstraintsbasedonseismic
 inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective
-]RS.hp-ortsa[
 penetrationprocess,thegoalofthisworkistoinvitesolarmodellerstoconsiderbothprocessesforamoreconsistentapproach.
 Keywords.Convection–Hydrodynamics–Stars:evolution–Sun:evolution-helioseismology-interior
-1. Introduction baseoftheconvectivezone,Christensen-Dalsgaardetal.(2011)
-found that models that better fit the helioseismic data have a
+1. Introduction
 Modelling the internal structure of the Sun is still a challenge.
-weakly sub-adiabatic temperature gradient in the lower part of
 A recent review by Christensen-Dalsgaard (2021) describes in
-theconvectivezoneandasmoothtransitiontotheradiativegra-
 detailthelong-standingeffortstoimprovesolarmodels.Theso-
-dientintheovershootinglayer.ButChristensen-Dalsgaardetal.
-lar modelling problem refers to the discrepancy between helio- (2011)notedthattherequiredtemperaturestratificationisdiffi-
-seismology and solar interior models that adopt low metallici- culttoreconcilewithexistingovershootingmodelsandnumer- 1v00200.1022:viXra
+lar modelling problem refers to the discrepancy between helio-
+seismology and solar interior models that adopt low metallici-
 tiespredictedbythethree-dimensional(3D)atmospheremodels
-ical simulations. They concluded that only non-local turbulent
 of,forexample,Asplundetal.(2009)andCaffauetal.(2011),
-convectionmodelscouldproducethedesireddegreeofsmooth-
 in contrast to the high metallicities based on previous litera-
-nessinthetransition(seeforexampleZhang&Li2012;Zhang
 ture compilations by, for example, Anders & Grevesse (1989)
-et al. 2012). But these non-local models remain uncertain, and
 and Grevesse & Noels (1993). Asplund et al. (2021) have re-
-their description of overshooting under the conditions found at
 cently confirmed with state-of-the-art 3D simulations the rela-
-the base of the solar convective zone is yet to be validated.
 tively low metal abundances for the Sun. Asplund et al. (2021)
-Zhang et al. (2019) explored the impact of overshooting by
 consider that their study yields the most reliable solar abun-
-introducing a parametrised turbulent kinetic energy flux based
 dancesavailabletoday,suggestingthatthesolarmodellingprob-
-on a model with parameters that are adjusted to improve the
 lemisnolongeraproblemofabundancesbutratheraproblem
-helioseismic properties. They suggest that amelioration can be
 ofstellarphysics.Thetreatmentofmixingbelowtheconvective
-obtained specifically below the convective envelope. However,
 zoneisoneofthekeyprocessesthatcouldimprovesolarmod-
-Zhangetal.(2019)findthatthismodelcannotsolvethewhole
 els.Severalstudiesindeedrevealthattheprocessofconvective
-solarproblembecausesuchafluxworsensthesound-speedpro-
 penetration, also called overshooting, at the bottom of the con-
-fileinthedeepradiativeinterioroftheirsolarmodel.Giventhe
 vectiveenvelopecouldplayanimportantroleinimprovingthe
-uncertaintiesregardingthetemperaturestratificationoftheover-
-agreement between solar models and helioseismic constraints shootingregion,solarmodellershaveconsideredtheseeffectsas
-(seeforexampleChristensen-Dalsgaardetal.2011;Zhangetal. secondaryandhavefocusedtheireffortsonexploringtheimpact
+agreement between solar models and helioseismic constraints
+(seeforexampleChristensen-Dalsgaardetal.2011;Zhangetal.
 2012; Buldgen et al. 2019b). Overshooting in solar models has
-ofsolarabundances,microphysics(opacities,equationsofstate,
 mostoftenbeentreatedusingdiffusiveorinstantaneouschemi-
-nuclear reaction rates), and chemical mixing and diffusion (see
 calmixing.Atemperaturegradientthatsharplytransitionsfrom
-details and references in the review of Buldgen et al. 2019a).
-a nearly adiabatic form to a radiative form is usually assumed, Additional, more exotic effects such as early disk accretion or
+a nearly adiabatic form to a radiative form is usually assumed,
 as suggested by the theoretical work of Zahn (1991). Models
-solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot
 with a smoother transition have also been investigated. Based
-2021)arealsoattractingincreasingattention.
 on the analysis of models with different stratifications near the
+Sendoffprintrequeststo:I.Baraffe
+baseoftheconvectivezone,Christensen-Dalsgaardetal.(2011)
+found that models that better fit the helioseismic data have a
+weakly sub-adiabatic temperature gradient in the lower part of
+theconvectivezoneandasmoothtransitiontotheradiativegra-
+dientintheovershootinglayer.ButChristensen-Dalsgaardetal.
+(2011)notedthattherequiredtemperaturestratificationisdiffi-
+culttoreconcilewithexistingovershootingmodelsandnumer-
+ical simulations. They concluded that only non-local turbulent
+convectionmodelscouldproducethedesireddegreeofsmooth-
+nessinthetransition(seeforexampleZhang&Li2012;Zhang
+et al. 2012). But these non-local models remain uncertain, and
+their description of overshooting under the conditions found at
+the base of the solar convective zone is yet to be validated.
+Zhang et al. (2019) explored the impact of overshooting by
+introducing a parametrised turbulent kinetic energy flux based
+on a model with parameters that are adjusted to improve the
+helioseismic properties. They suggest that amelioration can be
+obtained specifically below the convective envelope. However,
+Zhangetal.(2019)findthatthismodelcannotsolvethewhole
+solarproblembecausesuchafluxworsensthesound-speedpro-
+fileinthedeepradiativeinterioroftheirsolarmodel.Giventhe
+uncertaintiesregardingthetemperaturestratificationoftheover-
+shootingregion,solarmodellershaveconsideredtheseeffectsas
+secondaryandhavefocusedtheireffortsonexploringtheimpact
+ofsolarabundances,microphysics(opacities,equationsofstate,
+nuclear reaction rates), and chemical mixing and diffusion (see
+details and references in the review of Buldgen et al. 2019a).
+Additional, more exotic effects such as early disk accretion or
+solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot
+2021)arealsoattractingincreasingattention.
 To reinvigorate the debate, Buldgen et al. (2019b) recently
-Sendoffprintrequeststo:I.Baraffe highlightedonceagainhowthetransitionofthetemperaturegra-
+highlightedonceagainhowthetransitionofthetemperaturegra-
+1
+a
+r
+X i
+v
+:
+2
+2
+0
 1
+.
+0
+0
+2
+0
+0
+v
+1
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+S
+R
+]
+1
+J
+a
+n
+2
+0
+2
+2
 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem
-dientjustbelowtheconvectiveenvelopecansignificantlyimpact tionprocesstothelocalheatingandtotheradiativebumpinthe
-the disagreement between solar models and helioseismic con- overshootinglayer.Thesolar-likestarsimulatedinB21isbased
-straints. Their results, based on a method that combines multi- on a model that is not thermally relaxed. It is reasonable to as-
-ple structural inversions, suggest that the transition in temper- sumethatthelocalheatingseeninB21ispresentinstarsbecause
-ature gradient is improperly reproduced by adopting either an thenegativeheatfluxintheovershootinglayerandthebumpin
-adiabatic or a radiative temperature gradient in the overshoot- theradiativefluxthatcompensatesforthisfeaturearepersistent.
-ing layer. The solution should be somewhere in between these Thesetwofeaturesarealsocommonlyobservedinotherhydro-
-twoextremes.Christensen-Dalsgaardetal.(2018)alsonotethat dynamical simulations, as mentioned above. An exploration of
-an increase in the temperature at the transition would remove theimpactofthisheatingonstellarevolutionmodelsmayreveal
-aremainingsmallsharpdipinthespeedofsoundimmediately thatheatingisanecessaryaspectofmodelsforovershooting.
+dientjustbelowtheconvectiveenvelopecansignificantlyimpact
+the disagreement between solar models and helioseismic con-
+straints. Their results, based on a method that combines multi-
+ple structural inversions, suggest that the transition in temper-
+ature gradient is improperly reproduced by adopting either an
+adiabatic or a radiative temperature gradient in the overshoot-
+ing layer. The solution should be somewhere in between these
+twoextremes.Christensen-Dalsgaardetal.(2018)alsonotethat
+an increase in the temperature at the transition would remove
+aremainingsmallsharpdipinthespeedofsoundimmediately
 beneaththeconvectivezoneofthemodel.Amajordifficultyis
 todisentangletheeffectsofovershootfromtheeffectsofopaci-
 ties,whichcanalsoalterthetemperaturegradientintheselayers.
@@ -118,142 +160,285 @@ implicitsimulationsofconvectionandconvectivepenetrationin
 a solar-like model with the MUlti-dimensional Stellar Implicit
 Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017).
 The main motivation was to explore the impact of an artificial
-increaseinthestellarluminosityonthepropertiesofconvection Fig.1. Radial profile of the temperature departure ∆T/T 0 from
-and convective penetration. This procedure is a common tactic theinitialprofileT 0andofthesub-adiabaticity(∇−∇ ad)closeto
-adopted in hydrodynamical simulations of convection (Rogers theconvectiveboundarypredictedby2Dhydrodynamicalsimu-
-etal.2006;Meakin&Arnett2007;Brunetal.2011;Hotta2017; lations(B21)ofsolar-likemodels.Thelowerpanelcorresponds
-Edelmannetal.2019).TheexperimentsofB21highlighttheim- to the model with a realistic stellar luminosity and the upper
-pactofpenetrativedownflowsonthelocalthermalbackground panel to a model with luminosity enhanced by a factor of ten.
-intheovershootinglayer.Theyillustratehowconvectivedown- The dash-dotted red lines show ∆T/T 0 (in %), the relative dif-
-flows,whenpenetratingtheregionbelowtheconvectivebound- ferencebetweenthetimeandspaceaveragesofthetemperature,
-aryoftheenvelope,caninducealocalheatingandamodification T,andtheinitialtemperature,T 0.Thesolidbluelinesshowthe
-ofthetemperaturegradientasaresultofcompressionandshear time and space averages of the sub-adiabaticity (∇−∇ ad). The
-in the overshooting layer. This modification of the local back- dashedblacklinesshowtheinitialprofileofthesub-adiabaticity,
-ground is connected to a local increase in the radiative flux to (∇−∇ ad) init.Theconvectiveboundaryisindicatedbythevertical
-counterbalancethenegativeenthalpyflux(orheatflux)produced solidline(seedetailsinB21)
+increaseinthestellarluminosityonthepropertiesofconvection
+and convective penetration. This procedure is a common tactic
+adopted in hydrodynamical simulations of convection (Rogers
+etal.2006;Meakin&Arnett2007;Brunetal.2011;Hotta2017;
+Edelmannetal.2019).TheexperimentsofB21highlighttheim-
+pactofpenetrativedownflowsonthelocalthermalbackground
+intheovershootinglayer.Theyillustratehowconvectivedown-
+flows,whenpenetratingtheregionbelowtheconvectivebound-
+aryoftheenvelope,caninducealocalheatingandamodification
+ofthetemperaturegradientasaresultofcompressionandshear
+in the overshooting layer. This modification of the local back-
+ground is connected to a local increase in the radiative flux to
+counterbalancethenegativeenthalpyflux(orheatflux)produced
 by penetrating flows. The negative peak of the enthalpy flux
 andthepositivebumpoftheradiativefluxbelowtheconvective
-boundary are well-known features described in many numeri- The behaviour of the thermal profile below the convective
-calworks(Hurlburtetal.1986;Muthsametal.1995;Brummell boundary found in the simulations of B21 is illustrated in Fig.
-et al. 2002; Brun et al. 2011; Hotta 2017; Ka¨pyla¨ 2019; Cai 1.Itisdisplayedforthemodelwitharealisticstellarluminosity
-2020).Afewworks(Rogersetal.2006;Vialletetal.2013;Korre (lowerpanel).Wealsoshowtheresultsforamodelwithanartifi-
-et al. 2019; Higl et al. 2021) have also reported a modification cialenhancementintheluminositybyafactoroftenbecausethe
-ofthelocalthermalbackgroundintheovershootingregion,but featuresareintensifiedinthese‘boosted’models(upperpanel).
-withoutprovidingadetaileddescription.ThesimulationsofB21 Thefigureshowsthelocalheatingintheovershootinglayerand
-provideaphysicalexplanationthatlinkstheconvectivepenetra- itsimpactonthesub-adiabaticity(∇−∇ ),with∇ = dlogT the
-ad dlogP
+boundary are well-known features described in many numeri-
+calworks(Hurlburtetal.1986;Muthsametal.1995;Brummell
+et al. 2002; Brun et al. 2011; Hotta 2017; K¨ apyl¨ a 2019; Cai
+2020).Afewworks(Rogersetal.2006;Vialletetal.2013;Korre
+et al. 2019; Higl et al. 2021) have also reported a modification
+ofthelocalthermalbackgroundintheovershootingregion,but
+withoutprovidingadetaileddescription.ThesimulationsofB21
+provideaphysicalexplanationthatlinkstheconvectivepenetra-
+tionprocesstothelocalheatingandtotheradiativebumpinthe
+overshootinglayer.Thesolar-likestarsimulatedinB21isbased
+on a model that is not thermally relaxed. It is reasonable to as-
+sumethatthelocalheatingseeninB21ispresentinstarsbecause
+thenegativeheatfluxintheovershootinglayerandthebumpin
+theradiativefluxthatcompensatesforthisfeaturearepersistent.
+Thesetwofeaturesarealsocommonlyobservedinotherhydro-
+dynamical simulations, as mentioned above. An exploration of
+theimpactofthisheatingonstellarevolutionmodelsmayreveal
+thatheatingisanecessaryaspectofmodelsforovershooting.
+Fig.1. Radial profile of the temperature departure ∆T/T 0 from
+theinitialprofileT 0 andofthesub-adiabaticity(∇−∇ ad )closeto
+theconvectiveboundarypredictedby2Dhydrodynamicalsimu-
+lations(B21)ofsolar-likemodels.Thelowerpanelcorresponds
+to the model with a realistic stellar luminosity and the upper
+panel to a model with luminosity enhanced by a factor of ten.
+The dash-dotted red lines show ∆T/T 0 (in %), the relative dif-
+ferencebetweenthetimeandspaceaveragesofthetemperature,
+T,andtheinitialtemperature,T 0 .Thesolidbluelinesshowthe
+time and space averages of the sub-adiabaticity (∇−∇ ad ). The
+dashedblacklinesshowtheinitialprofileofthesub-adiabaticity,
+(∇−∇ ad ) init .Theconvectiveboundaryisindicatedbythevertical
+solidline(seedetailsinB21)
+The behaviour of the thermal profile below the convective
+boundary found in the simulations of B21 is illustrated in Fig.
+1.Itisdisplayedforthemodelwitharealisticstellarluminosity
+(lowerpanel).Wealsoshowtheresultsforamodelwithanartifi-
+cialenhancementintheluminositybyafactoroftenbecausethe
+featuresareintensifiedinthese‘boosted’models(upperpanel).
+Thefigureshowsthelocalheatingintheovershootinglayerand
+itsimpactonthesub-adiabaticity(∇−∇
+ad
+),with∇ = dlogT
+dlogP
+the
 2
 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem
-temperature gradient and ∇ = dlogT| the adiabatic gradient. entropydiscrepancy(S −S )/S hastwopositivepeaksin
-ad dlogP S Sun ref ref
-Theinitialstratificationbelowtheconvectiveboundary(located theradiativezone,onejustbelowtheovershootingregionanda
-at r = 0.6734 × R for this specific stellar model) is set by larger peak deeper at ∼ 40% of the stellar radius. This discrep-
+temperature gradient and ∇
+ad
+= dlogT
+dlogP
+|
+S
+the adiabatic gradient.
+Theinitialstratificationbelowtheconvectiveboundary(located
+at r = 0.6734 × R
 star
-thestableradiativegradient,∇ (seethedashedblacklinebe- ancyisnegativeintheconvectivezone.Thecorrectionsapplied
+for this specific stellar model) is set by
+thestableradiativegradient,∇
 rad
-lowtheconvectiveboundaryinFig.1).B21showthat,asare- toAhelpreducetheseentropydiscrepanciesinbothregions.
-sultofthelocalheatingbelowtheconvectiveboundarycharac- The fourth concerns the density. The quantity (ρ Sun −
-terisedbythebumpintemperaturedifference∆T/T 0 displayed ρ ref)/ρ ref has a negative peak in the radiative region, at ∼ 35%
-in Fig. 1, the temperature gradient becomes less sub-adiabatic ofthestellarradius,andispositiveintheconvectivezone.
-immediately below the convective boundary1. The net result is Importantly,Buldgenetal.(2020)mentionthattheirrecon-
-a smoother transition just below the convective boundary with struction procedure gives similar Ledoux discriminant profiles
-a temperature gradient that has an intermediate value between for a wide range of initial reference models. We used these re-
-the radiative temperature gradient and the adiabatic one. In the sults to gauge whether the modifications of the thermal profile
-next section we analyse the impact of this local heating on 1D predicted by B21 can help in qualitatively improving all the
-solar structures by adopting a simple prescription that mimics structuralquantitiesusedbyBuldgenetal.(2020).
+(seethedashedblacklinebe-
+lowtheconvectiveboundaryinFig.1).B21showthat,asare-
+sultofthelocalheatingbelowtheconvectiveboundarycharac-
+terisedbythebumpintemperaturedifference∆T/T 0 displayed
+in Fig. 1, the temperature gradient becomes less sub-adiabatic
+immediately below the convective boundary1. The net result is
+a smoother transition just below the convective boundary with
+a temperature gradient that has an intermediate value between
+the radiative temperature gradient and the adiabatic one. In the
+next section we analyse the impact of this local heating on 1D
+solar structures by adopting a simple prescription that mimics
 the behaviour of the temperature gradient suggested by hydro-
 dynamicalsimulations.
+3. Impactonone-dimensionalsolarstructure
+models
+3.1. Helioseismicconstraints
+Ourprimarygoalinthisshortpaperistoillustratethepotential,
+qualitative impact of the local heating produced by overshoot-
+ing. We adopted a strategy inspired by the analysis of Buldgen
+et al. (2020), who constructed a static structure of the Sun in
+agreement with seismic inversions of the Ledoux discriminant
+definedby
+A=
+1
+Γ
+1
+dlnP
+dlnr
+−
+dlnρ
+dlnr
+, (1)
+with Γ
+1
+= (∂lnP/∂lnρ)
+ad
+. Starting from a reference evolu-
+tionary model, Buldgen et al. (2020) used an inversion pro-
+cedure to iteratively reconstruct a solar model. Successive in-
+versions of the Ledoux discriminant allowed them to obtain a
+model-independentprofileforthisquantity.Theirreconstruction
+method also gives solar structures that are in excellent agree-
+mentwithotherstructuralinversions,namelytheentropy,S,the
+squareofthespeedofsound,c2
+s
+,andthedensity,ρ.Toillustrate
+the convergence of their reconstruction procedure, they show
+(rightpanelsoftheirFigs.3-6)thesuccessiveiterationsthatcon-
+verge to an excellent level of agreement for the four structural
+inversions(A,S,c2
+s
+,ρ)startingfromtheinitialreferencemodel
+adoptedintheirwork.Thedifferencesfoundbetweentherecon-
+structedmodelandthereferencemodelareusefulastheyindi-
+catethemodificationsofthereferencemodelthatarerequiredto
+convergetowardsasolarmodelinagreementwithhelioseismic
+data. We recall here the major trends found by Buldgen et al.
+(2020)forthefourstructuralquantities,whichareusedforour
+analysisinSect.3.2.
+The first concerns the Ledoux discriminant. The major dis-
+crepancy between the Sun and the reference model occurs just
+below the convective boundary, with a large positive bump for
+thequantity(A
+Sun
+-A
+ref
+).
+Thesecondconcernsthespeedofsound.Thesamepositive
+bumpatthesamelocationasfortheLedouxdiscriminant,A,is
+observed for the quantity (c2
+s,Sun
+− c2
+s,ref
+)/c2
+s,ref
+. The corrections
+appliedtoAduringthereconstructionprocedurealsoreducethe
+discrepancyinthespeedofsoundintheradiativeregion.
+Thethirdconcernstheentropy.Largediscrepanciesareob-
+servedinboththeradiativeregionandtheconvectivezone.The
+1 Lesssub-adiabaticmeansthat|∇−∇
+ad
+|decreasescomparedtothe
+initialprofile.
+entropydiscrepancy(S
+Sun
+−S
+ref
+)/S
+ref
+hastwopositivepeaksin
+theradiativezone,onejustbelowtheovershootingregionanda
+larger peak deeper at ∼ 40% of the stellar radius. This discrep-
+ancyisnegativeintheconvectivezone.Thecorrectionsapplied
+toAhelpreducetheseentropydiscrepanciesinbothregions.
+The fourth concerns the density. The quantity (ρ Sun −
+ρ ref )/ρ ref has a negative peak in the radiative region, at ∼ 35%
+ofthestellarradius,andispositiveintheconvectivezone.
+Importantly,Buldgenetal.(2020)mentionthattheirrecon-
+struction procedure gives similar Ledoux discriminant profiles
+for a wide range of initial reference models. We used these re-
+sults to gauge whether the modifications of the thermal profile
+predicted by B21 can help in qualitatively improving all the
+structuralquantitiesusedbyBuldgenetal.(2020).
 3.2. Testingone-dimensionalsolarmodels
 Ourmainmotivationistoshowthepotentialimpactofthelocal
-3. Impactonone-dimensionalsolarstructure
 heating described in Sect. 2 on stellar models. We are not aim-
-models inginthisshortworkatconstructingthebestsolarmodeltofit
+inginthisshortworkatconstructingthebestsolarmodeltofit
 helioseismicconstraints.Usingstellarevolutioncodes,wehave
-3.1. Helioseismicconstraints
 adopted two different methods that can be found in the litera-
-Ourprimarygoalinthisshortpaperistoillustratethepotential, turetoconstructsolarmodels(e.g.Zhangetal.2012;Vinyoles
-qualitative impact of the local heating produced by overshoot- et al. 2017). Our first method relies on the thermal relaxation
-ing. We adopted a strategy inspired by the analysis of Buldgen of a reference model with solar radius and luminosity that is
-et al. (2020), who constructed a static structure of the Sun in modifiedtoreproducethetemperaturegradientintheovershoot-
-agreement with seismic inversions of the Ledoux discriminant inglayersuggestedbyhydrodynamicalsimulations.Inthiscase,
-definedby thechemicalabundancesarenotmodifiedbynuclearreactions,
+turetoconstructsolarmodels(e.g.Zhangetal.2012;Vinyoles
+et al. 2017). Our first method relies on the thermal relaxation
+of a reference model with solar radius and luminosity that is
+modifiedtoreproducethetemperaturegradientintheovershoot-
+inglayersuggestedbyhydrodynamicalsimulations.Inthiscase,
+thechemicalabundancesarenotmodifiedbynuclearreactions,
 mixing, or microscopic diffusion during the relaxation process.
-1 dlnP dlnρ
-A= − , (1) For these tests, we used the 1D Lyon stellar evolution code
-Γ dlnr dlnr (Baraffeetal.1998).Werepeatedthisexperimentbasedonther-
-1
-with Γ = (∂lnP/∂lnρ) . Starting from a reference evolu- mal relaxation with the stellar evolution code MONSTAR (e.g.
-1 ad
+For these tests, we used the 1D Lyon stellar evolution code
+(Baraffeetal.1998).Werepeatedthisexperimentbasedonther-
+mal relaxation with the stellar evolution code MONSTAR (e.g.
 Constantino et al. 2014) and obtained the same qualitative re-
-tionary model, Buldgen et al. (2020) used an inversion pro-
 sults.
-cedure to iteratively reconstruct a solar model. Successive in-
 The second method considers models that account for the
-versions of the Ledoux discriminant allowed them to obtain a
 modification of the temperature gradient in the overshooting
-model-independentprofileforthisquantity.Theirreconstruction
 layer from the zero age main sequence (ZAMS). The models
-method also gives solar structures that are in excellent agree-
 arethenevolveduntiltheyreachthesolarradiusandluminosity.
-mentwithotherstructuralinversions,namelytheentropy,S,the
-squareofthespeedofsound,c2,andthedensity,ρ.Toillustrate With this approach, changes in the chemical abundances from
-s nuclearreactions,microscopicdiffusion,andovershootingmix-
-the convergence of their reconstruction procedure, they show
+With this approach, changes in the chemical abundances from
+nuclearreactions,microscopicdiffusion,andovershootingmix-
 ing are also consistent with any modification of the structure
-(rightpanelsoftheirFigs.3-6)thesuccessiveiterationsthatcon-
 induced by the forced local heating in the overshooting layer.
-verge to an excellent level of agreement for the four structural
-inversions(A,S,c2,ρ)startingfromtheinitialreferencemodel These tests were performed with MONSTAR as it includes the
-adoptedintheirwos treatmentofmicroscopicdiffusion.
-rk.Thedifferencesfoundbetweentherecon-
+These tests were performed with MONSTAR as it includes the
+treatmentofmicroscopicdiffusion.
 The first method allows the impact of local heating in
-structedmodelandthereferencemodelareusefulastheyindi-
 the overshooting layer after thermal relaxation to be isolated.
-catethemodificationsofthereferencemodelthatarerequiredto
 The second method provides evolutionary models that are self-
-convergetowardsasolarmodelinagreementwithhelioseismic
 consistentsincetheeffectofthemodificationofthetemperature
-data. We recall here the major trends found by Buldgen et al.
 gradientisaccountedforduringtheirevolutiononthemainse-
-(2020)forthefourstructuralquantities,whichareusedforour
 quence.
-analysisinSect.3.2.
 Inthefollowing,weadoptamodificationofthelocaltemper-
-The first concerns the Ledoux discriminant. The major dis-
 aturegradientintheovershootinglayerthatqualitativelyrepro-
-crepancy between the Sun and the reference model occurs just
 ducesthebehaviourdisplayedinFig.1.Wedefineanovershoot-
-below the convective boundary, with a large positive bump for
-inglengthd = α H ,with H thepressurescaleheight
-thequantity(A -A ). ov ov P,CB P,CB
-Sun ref
-attheconvectiveboundaryandα afreeparameter.Wealsode-
-Thesecondconcernsthespeedofsound.Thesamepositive ov
-finetworadiallocations,r =r −d andr =r −d /2,
-bumpatthesamelocationasfortheLedouxdiscriminant,A,is ov CB ov mid CB ov
-observed for the quantity (c2 − c2 )/c2 . The corrections withr CBtheradiallocationoftheconvectiveboundary.Thetem-
-s,Sun s,ref s,ref peraturegradientismodifiedasfollows.Forr ≤ r < r ,we
-appliedtoAduringthereconstructionprocedurealsoreducethe mid CB
+inglengthd
+ov
+= α
+ov
+H
+P,CB
+,with H
+P,CB
+thepressurescaleheight
+attheconvectiveboundaryandα
+ov
+afreeparameter.Wealsode-
+finetworadiallocations,r
+ov
+=r
+CB
+−d
+ov
+andr
+mid
+=r
+CB
+−d
+ov
+/2,
+withr CB theradiallocationoftheconvectiveboundary.Thetem-
+peraturegradientismodifiedasfollows.Forr
+mid
+≤ r < r
+CB
+,we
 use
-discrepancyinthespeedofsoundintheradiativeregion.
-Thethirdconcernstheentropy.Largediscrepanciesareob- ∇=g(r)∇ +(1−g(r))∇ , (2)
-ad rad
-servedinboththeradiativeregionandtheconvectivezone.The
-with
-1 Lesssub-adiabaticmeansthat|∇−∇ |decreasescomparedtothe
+∇=g(r)∇
 ad
-initialprofile. g(r)= sin{[(r−r )/(r −r )]a×π/2}. (3)
-mid CB mid
++(1−g(r))∇
+rad
+, (2)
+with
+g(r)= sin{[(r−r
+mid
+)/(r
+CB
+−r
+mid
+)]a×π/2}. (3)
 3
 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem
-Forr ≤r<r ,weuse byEqs.(2)-(5)yieldssimilarqualitativechangesinthetemper-
-ov mid
-ature and the sub-adiabaticity close to the convective boundary
-∇=∇ −h(r)∇ , (4)
-rad ad thatwasfoundinthehydrodynamicalsimulationsofB21.
+Forr
+ov
+≤r<r
+mid
+,weuse
+∇=∇
+rad
+−h(r)∇
+ad
+, (4)
 with
-h(r)=b×sin{[(r −r)/(r −r )]×π}. (5)
-mid mid ov
+h(r)=b×sin{[(r
+mid
+−r)/(r
+mid
+−r
+ov
+)]×π}. (5)
 Sine functions are used in Eqs. (3) and (5) to reproduce the
 smoothvariationsinthetemperaturegradientbelowtheconvec-
 tiveboundaryproducedbythehydrodynamicalsimulations.We
@@ -268,20 +453,28 @@ results,butwenotethattheresultsareinsensitivetothevalueof
 b.
 3.2.1. Thermalequilibriummodels
 Thedetailsoftheprocedureforthefirstmethodarethefollow-
-ing.Wecalculatetheevolutionofa1 M modelwithaninitial
+ing.Wecalculatetheevolutionofa1 M
 (cid:12)
+modelwithaninitial
 helium mass fraction of 0.28, metallicity Z = 0.02, and a mix-
-ing length l = 1.9H . We use a reference model that is in
-mix P
+ing length l
+mix
+= 1.9H
+P
+. We use a reference model that is in
 thermal equilibrium2 and has the luminosity and radius of the
 current Sun. Starting from this reference model, the tempera-
 ture gradient is modified over a prescribed depth to mimic the
 impact of overshooting according to the hydrodynamical sim-
 ulations described in Sect. 2. We adopt the prescription given
-byEqs.(2)-(5)overadistanced belowtheconvectivebound-
+byEqs.(2)-(5)overadistanced
+ov
+belowtheconvectivebound-
+ary.WeshowtheresultsinFig.2forα
+ov
+=0.15andα
 ov
-ary.WeshowtheresultsinFig.2forα =0.15andα =0.20.
-ov ov
+=0.20.
 Theseovershootingwidthsareingoodagreementwiththemaxi-
 maldepthreachedbydownflowsbelowtheconvectiveboundary
 predicted by the hydrodynamical simulations for the solar-like
@@ -289,50 +482,86 @@ model investigated in B21. We note that the stellar model used
 inB21isslightlyunder-luminouscomparedtotheSun(seeB21
 fordetails).B21alsomentionthatoneshouldbecautiouswhen
 directlyapplyingtheovershootingdepthspredictedbytheirsim-
-Fig.2. Radial profile of the temperature difference and of the
 ulationstorealstarssincethefinalrelaxedstateforthesesimula-
-tions may have different properties from non-thermally relaxed sub-adiabaticityofa1Dsolar-likestructurewithamodifiedtem-
-perature gradient in the overshooting layer according to Eqs.
-states. We varied α between 0.15 and 0.35 and find that the
-ov (2)-(5). The temperature gradient is modified over a distance
+tions may have different properties from non-thermally relaxed
+states. We varied α
+ov
+between 0.15 and 0.35 and find that the
 results do not change qualitatively. However, the amplitude of
-d = α H ,withα =0.15inthelowerpanelandα =0.20
-the variations in the model properties depends on d (see be- ov ov P,CB ov ov
-ov intheupperpanel.Thedash-dottedredlinesshowthepercent-
+the variations in the model properties depends on d
+ov
+(see be-
 low).Asshownbelow,thissimpleprescriptionimplementedin
-agerelativetemperaturedifference,∆T/T ,with∆T =T−T .
-a stellar evolution code yields a local increase in the tempera- ref ref
-Thesolidbluelinescorrespondtothesub-adiabaticity(∇−∇ ).
-ture below the convective boundary, similar to that observed in ad
-The dashed black lines show the sub-adiabaticity of the refer-
+a stellar evolution code yields a local increase in the tempera-
+ture below the convective boundary, similar to that observed in
 thehydrodynamicalsimulations.WestressthatEqs.(2)-(5)have
-encemodel.Theconvectiveboundaryisindicatedbythevertical
 beenchosenforsimplicity.Theyareonlyaroughapproximation
-solidline.Theverticaldashedlineineachpanelislocatedata
 thatcanmimicthethermalprofilebehavioursuggestedinthe2D
-distanced belowtheconvectiveboundary.
-simulations. ov
+simulations.
 Themodelwithamodifiedtemperaturegradientisthenther-
 mally relaxed, that is to say, it is evolved over many thermal
-Theimpactonthewholestellarstructurewasquantifiedby
 timescaleswithoutanymodificationoftheabundancesfromnu-
-comparingthefourstructuralquantities(A,S,c2,ρ)betweenthe
-clearreactionsuntilthermalequilibriumisreached.Thetemper- s
-modified and the reference model. The results are displayed in
+clearreactionsuntilthermalequilibriumisreached.Thetemper-
 ature gradient is modified in the overshooting layer during the
-Fig.3,with∆Xdefinedas(X−X )foranystructuralquantityX.
-wholerelaxationprocess,andthisisreferredtoasa‘forcedlocal ref
-Theforcedlocalheatingintheovershootinglayerproducessim-
+wholerelaxationprocess,andthisisreferredtoasa‘forcedlocal
 heating’.Thisprocedureensuresthatthemodelwithamodified
-ilarpositivepeaksfor∆A,∆S,and∆c2,asfoundforthetemper-
-temperaturegradientcanbeconsistentlycomparedtotherefer- s
-ature.Themodificationthusprovidesthecorrectionrequiredto
+temperaturegradientcanbeconsistentlycomparedtotherefer-
 ence model. As shown in Fig. 2, the simple prescription given
-improvethediscrepancyfortheLedouxdiscriminantdescribed
-2 Thermalequilibriummeansthatthetotalnuclearenergyproduced in the first of the trends outlined in Sect. 3.1. Unsurprisingly,
-inthecentralregionsbalancestheradiativelossesatthesurface,i.e.the such a modification of the temperature gradient is expected to
-totalnuclearluminosity,L ,equalsthetotalstellarluminosity,L. improve the agreement with helioseismic constraints and help
+2 Thermalequilibriummeansthatthetotalnuclearenergyproduced
+inthecentralregionsbalancestheradiativelossesatthesurface,i.e.the
+totalnuclearluminosity,L
 nuc
+,equalsthetotalstellarluminosity,L.
+byEqs.(2)-(5)yieldssimilarqualitativechangesinthetemper-
+ature and the sub-adiabaticity close to the convective boundary
+thatwasfoundinthehydrodynamicalsimulationsofB21.
+Fig.2. Radial profile of the temperature difference and of the
+sub-adiabaticityofa1Dsolar-likestructurewithamodifiedtem-
+perature gradient in the overshooting layer according to Eqs.
+(2)-(5). The temperature gradient is modified over a distance
+d
+ov
+= α
+ov
+H
+P,CB
+,withα
+ov
+=0.15inthelowerpanelandα
+ov
+=0.20
+intheupperpanel.Thedash-dottedredlinesshowthepercent-
+agerelativetemperaturedifference,∆T/T
+ref
+,with∆T =T−T
+ref
+.
+Thesolidbluelinescorrespondtothesub-adiabaticity(∇−∇
+ad
+).
+The dashed black lines show the sub-adiabaticity of the refer-
+encemodel.Theconvectiveboundaryisindicatedbythevertical
+solidline.Theverticaldashedlineineachpanelislocatedata
+distanced
+ov
+belowtheconvectiveboundary.
+Theimpactonthewholestellarstructurewasquantifiedby
+comparingthefourstructuralquantities(A,S,c2
+s
+,ρ)betweenthe
+modified and the reference model. The results are displayed in
+Fig.3,with∆Xdefinedas(X−X
+ref
+)foranystructuralquantityX.
+Theforcedlocalheatingintheovershootinglayerproducessim-
+ilarpositivepeaksfor∆A,∆S,and∆c2
+s
+,asfoundforthetemper-
+ature.Themodificationthusprovidesthecorrectionrequiredto
+improvethediscrepancyfortheLedouxdiscriminantdescribed
+in the first of the trends outlined in Sect. 3.1. Unsurprisingly,
+such a modification of the temperature gradient is expected to
+improve the agreement with helioseismic constraints and help
 4
 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem
 remove the sound speed anomaly below the convective bound-
@@ -356,64 +585,132 @@ Thesetrendsareinsensitivetothedepthoverwhichthetem-
 peraturegradientismodified.Increasingthedepthincreasesthe
 magnitudeofthedifferencesbuthasnoimpactontheirsign. We
 findthatthemaximumvariationinthemodelproperties,suchas
-thespeedofsound,∆c2/c2 ,roughlyscaleswithd2 .Thisscal-
-s s,ref ov
+thespeedofsound,∆c2
+s
+/c2
+s,ref
+,roughlyscaleswithd2
+ov
+.Thisscal-
 ingislinkedtotheintegratedareabetweenthemodifiedtemper-
 aturegradientcurveandtheoneforthereference(non-modified)
 temperature gradient, which roughly decreases linearly with r.
 Thisareaisproportionaltothesquareoftheovershootingdepth,
 and consequently, the maximum variation in the model proper-
-tiesisalsoproportionaltod2 .Thequalitativetrendsalsoremain
+tiesisalsoproportionaltod2
 ov
+.Thequalitativetrendsalsoremain
 the same whether overshooting mixing in the reference model
 isignoredorincludedusingastepfunction(withinstantaneous
 mixing)oranexponentialdecayforthediffusioncoefficient(e.g.
 Freytagetal.1996).
 3.2.2. Self-consistentevolutionarymodels
 For the tests based on the second method, we ran different sets
-ofmodelswithdifferentcombinationsofassumptions,including Fig.3. Difference of various structural quantities between a
-or not microscopic diffusion and with or without overshooting model with a modified temperature gradient in the overshoot-
-inglayerandareferencemodelcalculatedwiththeLyonstellar
+ofmodelswithdifferentcombinationsofassumptions,including
+or not microscopic diffusion and with or without overshooting
 mixing. When overshooting mixing was included in the over-
-evolutioncode.Thetemperaturegradientinthemodifiedmodel
 shooting layer, it was based either on a step function or on an
-exponentialdecayforthediffusioncoefficient.Microscopicdif- is changed over a distance d ov = α ovH P,CB below the convec-
-tive boundary (indicated by the vertical solid line). The lower
+exponentialdecayforthediffusioncoefficient.Microscopicdif-
 fusionforHandHewasimplementedaccordingtoThouletal.
-panel shows the results for α = 0.15 and the upper panel for
-(1994). For these tests, the temperature gradient was modified ov
-α =0.20.
-accordingto Eqs.(2)-(5).AllmodelsstartfromtheZAMSand ov
+(1994). For these tests, the temperature gradient was modified
+accordingto Eqs.(2)-(5).AllmodelsstartfromtheZAMSand
 areevolveduntiltheyreachthesolarradiusandluminosityatthe
 sameage.Thiswasachievedbymakingsmalladjustmentstothe
-totheassumptionsregardingwhetherovershootingmixingisin-
-mixinglength,l .Themodelswithtemperaturegradientmod-
-mix cludedornot.Butatleastwefindsolutionsthatarecompatible
+mixinglength,l
+mix
+.Themodelswithtemperaturegradientmod-
 ificationswerecomparedtotherelevantreferencemodel,which
-withthefourtrendsfoundbyBuldgenetal.(2020)forthefour
 has no modification of the temperature gradient but everything
-structural quantities. This is illustrated in Fig. 4 with a model
 else is the same (i.e. the same treatment of microscopic diffu-
-that accounts for step function overshooting mixing over a dis-
 sionandofovershootingmixing).Theevolutionarymodelswith
-tanced =0.15H (lowerpanel)andd =0.20H (upper
-temperaturegradientmodificationsarethusself-consistent.The ov P,CB ov P,CB
-panel).
+temperaturegradientmodificationsarethusself-consistent.The
 maindifferencebetweenthisapproachandtheoneintheprevi-
 oussectionisthatthesemodelsaccumulatesmalldifferencesin,
 for example, central H abundance when compared to their ref-
-4. Conclusion
 erence model. These tests produce the same trends in the over-
-shooting layer as found for the tests based on the first method The tests performed in Sect. 3 are based on different methods
-(Sect. 3.2.1), independently of the treatment of overshooting (relaxedmodelsversusconsistentevolution)thatcanbeusedto
-mixing and whether microscopic diffusion is included or not. construct solar models. Independently of the method used, the
-Intheconvectivezone,allmodelsgiveapositivedifferencefor tests show that a local increase in the temperature in the over-
-thedensitybetweenthemodelwithamodifiedtemperaturegra- shootingregionduetoconvectivepenetrationprovidesthequali-
-dient and the relevant reference model. For the other quantities tativeeffectsrequiredtoimprovethespeedofsounddiscrepancy
-(S,c2),thedifferencesintheconvectivezoneareverysensitive belowtheconvectiveboundary.Thisdiscrepancyispersistentin
+shooting layer as found for the tests based on the first method
+(Sect. 3.2.1), independently of the treatment of overshooting
+mixing and whether microscopic diffusion is included or not.
+Intheconvectivezone,allmodelsgiveapositivedifferencefor
+thedensitybetweenthemodelwithamodifiedtemperaturegra-
+dient and the relevant reference model. For the other quantities
+(S,c2
 s
+),thedifferencesintheconvectivezoneareverysensitive
+Fig.3. Difference of various structural quantities between a
+model with a modified temperature gradient in the overshoot-
+inglayerandareferencemodelcalculatedwiththeLyonstellar
+evolutioncode.Thetemperaturegradientinthemodifiedmodel
+is changed over a distance d ov = α ov H P,CB below the convec-
+tive boundary (indicated by the vertical solid line). The lower
+panel shows the results for α
+ov
+= 0.15 and the upper panel for
+α
+ov
+=0.20.
+totheassumptionsregardingwhetherovershootingmixingisin-
+cludedornot.Butatleastwefindsolutionsthatarecompatible
+withthefourtrendsfoundbyBuldgenetal.(2020)forthefour
+structural quantities. This is illustrated in Fig. 4 with a model
+that accounts for step function overshooting mixing over a dis-
+tanced
+ov
+=0.15H
+P,CB
+(lowerpanel)andd
+ov
+=0.20H
+P,CB
+(upper
+panel).
+4. Conclusion
+The tests performed in Sect. 3 are based on different methods
+(relaxedmodelsversusconsistentevolution)thatcanbeusedto
+construct solar models. Independently of the method used, the
+tests show that a local increase in the temperature in the over-
+shootingregionduetoconvectivepenetrationprovidesthequali-
+tativeeffectsrequiredtoimprovethespeedofsounddiscrepancy
+belowtheconvectiveboundary.Thisdiscrepancyispersistentin
 5
 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem
+Fig.4. Difference of various structural quantities between a
+modified model and a reference model calculated with the
+MONSTAR stellar evolution code. The reference model is
+evolved from the ZAMS with microscopic diffusion and step
+functionovershootingmixingoveradistanced
+ov
+=α
+ov
+H
+P,CB
+be-
+lowtheconvectiveboundary.Thelowerpanelshowstheresults
+forα
+ov
+= 0.15andtheupperpanelforα
+ov
+= 0.20.Themodels
+with a modified temperature gradient in the overshooting layer
+(samemicroscopicdiffusionandovershootingmixingtreatment
+as the reference model) are evolved similarly from the ZAMS.
+Theconvectiveboundaryisindicatedbytheverticalsolidline.
+solar models that use low solar metal abundances. This is not
+surprising because an increase in the temperature in this spe-
+cificregionhaspreviouslybeeninvokedintheliteraturetosolve
+this problem, as mentioned in Sect. 1. However, the details of
+thephysicalprocessresponsibleforthislocalheatinghavebeen
+lacking, whereas we can now suggest an explanation based on
+the B21 results. The trends that we find for the four structural
+quantities (A, S, c2
+s
+, ρ) are robust below the convective bound-
+aryandinalargefractionoftheradiativecore,independentlyof
+thetreatmentofmixinganddiffusionandofthemethodforcon-
+structingthemodelsinSects.3.2.1and3.2.2.Ourexperiments
+additionally show that such a local change in the temperature,
+despitebeingmadeoveraverylimitedregionbelowtheconvec-
+tive boundary, can also affect the density, the entropy, and the
 speed of sound in the convective envelope after thermal relax-
 ation or evolution on the main sequence. How these quantities
 areaffectedintheconvectiveenvelopecomparedtoareference
@@ -451,40 +748,30 @@ models.Butaconsistentapproachshouldalsorequireaccount-
 ingforalocalchangeinthetemperaturegradient.Theimpactof
 thislocalheatinggoesintherightdirectiontoimprovenotonly
 the discrepancies of solar models below the convective bound-
-Fig.4. Difference of various structural quantities between a ary,butalsointheconvectiveenvelope.Thiseffectoffersanin-
-modified model and a reference model calculated with the
+ary,butalsointheconvectiveenvelope.Thiseffectoffersanin-
 teresting step forward for solving the solar modelling problem.
-MONSTAR stellar evolution code. The reference model is
 Inthisexploratorywork,weadoptasimpleprescriptionforthe
-evolved from the ZAMS with microscopic diffusion and step
 local heating in the overshooting layer since the main goal is
-functionovershootingmixingoveradistanced =α H be-
-ov ov P,CB to highlight its qualitative impact on stellar models. However,
-lowtheconvectiveboundary.Thelowerpanelshowstheresults thiseffectshouldnotbeconsideredasanotherfreeparameterin
-forα = 0.15andtheupperpanelforα = 0.20.Themodels
-ov ov the solar modelling problem. Future multi-dimensional hydro-
-with a modified temperature gradient in the overshooting layer
+to highlight its qualitative impact on stellar models. However,
+thiseffectshouldnotbeconsideredasanotherfreeparameterin
+the solar modelling problem. Future multi-dimensional hydro-
 dynamicalsimulationswillenablethisprocess,anditstreatment
-(samemicroscopicdiffusionandovershootingmixingtreatment
 in1Dstellarevolutioncodes,tobebetterconstrained.
-as the reference model) are evolved similarly from the ZAMS.
-Theconvectiveboundaryisindicatedbytheverticalsolidline.
 5. Acknowledgements
-solar models that use low solar metal abundances. This is not Wethankouranonymousrefereeforvaluablecommentswhich
-surprising because an increase in the temperature in this spe- helpedimprovingthemanuscript.Thisworkissupportedbythe
-cificregionhaspreviouslybeeninvokedintheliteraturetosolve ERC grant No. 787361-COBOM and the consolidated STFC
-this problem, as mentioned in Sect. 1. However, the details of grant ST/R000395/1. IB thanks the Max Planck Institut fu¨r
-thephysicalprocessresponsibleforthislocalheatinghavebeen Astrophysics(Garching)forwarmhospitalityduringcompletion
-lacking, whereas we can now suggest an explanation based on ofpartofthiswork.Theauthorswouldliketoacknowledgethe
-the B21 results. The trends that we find for the four structural use of the University of Exeter High-Performance Computing
-quantities (A, S, c2, ρ) are robust below the convective bound- (HPC) facility ISCA and of the DiRAC Data Intensive service
-s
-aryandinalargefractionoftheradiativecore,independentlyof atLeicester,operatedbytheUniversityofLeicesterITServices,
-thetreatmentofmixinganddiffusionandofthemethodforcon- whichformspartoftheSTFCDiRACHPCFacility.Theequip-
-structingthemodelsinSects.3.2.1and3.2.2.Ourexperiments ment was funded by BEIS capital funding via STFC capital
-additionally show that such a local change in the temperature, grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC
-despitebeingmadeoveraverylimitedregionbelowtheconvec- OperationsgrantST/R001014/1.DiRACispartoftheNational
-tive boundary, can also affect the density, the entropy, and the e-Infrastructure.
+Wethankouranonymousrefereeforvaluablecommentswhich
+helpedimprovingthemanuscript.Thisworkissupportedbythe
+ERC grant No. 787361-COBOM and the consolidated STFC
+grant ST/R000395/1. IB thanks the Max Planck Institut f¨ ur
+Astrophysics(Garching)forwarmhospitalityduringcompletion
+ofpartofthiswork.Theauthorswouldliketoacknowledgethe
+use of the University of Exeter High-Performance Computing
+(HPC) facility ISCA and of the DiRAC Data Intensive service
+atLeicester,operatedbytheUniversityofLeicesterITServices,
+whichformspartoftheSTFCDiRACHPCFacility.Theequip-
+ment was funded by BEIS capital funding via STFC capital
+grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC
+OperationsgrantST/R001014/1.DiRACispartoftheNational
+e-Infrastructure.
 6
 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem
 References
@@ -516,10 +803,10 @@ Freytag,B.,Ludwig,H.G.,&Steffen,M.1996,A&A,313,497
 Goffrey,T.,Pratt,J.,Viallet,M.,etal.2017,A&A,600,A7
 Grevesse,N.&Noels,A.1993,inOriginandEvolutionoftheElements,ed.
 N.Prantzos,E.Vangioni-Flam,&M.Casse,15–25
-Higl,J.,Mu¨ller,E.,&Weiss,A.2021,A&A,646,A133
+Higl,J.,M¨ uller,E.,&Weiss,A.2021,A&A,646,A133
 Hotta,H.2017,ApJ,843,52
 Hurlburt,N.E.,Toomre,J.,&Massaguer,J.M.1986,ApJ,311,563
-Ka¨pyla¨,P.J.2019,A&A,631,A122
+K¨ apyl¨ a,P.J.2019,A&A,631,A122
 Korre,L.,Brummell,N.,Garaud,P.,&Guervilly,C.2021,MNRAS,503,362
 Korre,L.,Garaud,P.,&Brummell,N.H.2019,MNRAS,484,1220
 Kunitomo,M.&Guillot,T.2021,arXive-prints,arXiv:2109.06492
@@ -530,7 +817,7 @@ Rogers,T.M.,Glatzmaier,G.A.,&Jones,C.A.2006,ApJ,653,765
 Thoul,A.A.,Bahcall,J.N.,&Loeb,A.1994,ApJ,421,828
 Viallet,M.,Baraffe,I.,&Walder,R.2011,A&A,531,A86
 Viallet,M.,Goffrey,T.,Baraffe,I.,etal.2016,A&A,586,A153
-Viallet,M.,Meakin,C.,Arnett,D.,&Moca´k,M.2013,ApJ,769,1
+Viallet,M.,Meakin,C.,Arnett,D.,&Moc´ ak,M.2013,ApJ,769,1
 Vinyoles,N.,Serenelli,A.M.,Villante,F.L.,etal.2017,ApJ,835,202
 Zahn,J.P.1991,A&A,252,179
 Zhang,C.,Deng,L.,Xiong,D.,&Christensen-Dalsgaard,J.2012,ApJ,759,
diff --git a/read/results/pdfplumber/2201.00201.txt b/read/results/pdfplumber/2201.00201.txt
index 1b98faf..d842263 100644
--- a/read/results/pdfplumber/2201.00201.txt
+++ b/read/results/pdfplumber/2201.00201.txt
@@ -8,260 +8,390 @@ December2021
 ABSTRACT
 Context. Pieces of empirical evidence suggest the existence of a period-age relation for long-period variables (LPVs). Yet, this
 propertyhashardlybeenstudiedontheoreticalgroundsthusfar.
-2202
 Aims.Weaimtoexaminetheperiod-agerelationusingtheresultsfromrecentnonlinearpulsationcalculations.
 Methods.WecombinedisochronemodelswiththeoreticalperiodstosimulatethedistributionoffundamentalmodeLPVpulsators,
 whichincludeMiras,intheperiod-ageplane,andwecompareditwithobservationsofLPVsinGalacticandMagellanicClouds’
 clusters.
-Results.Inagreementwithobservations,modelspredictthatthefundamentalmodeperioddecreaseswithincreasingagebecauseof naJ
+Results.Inagreementwithobservations,modelspredictthatthefundamentalmodeperioddecreaseswithincreasingagebecauseof
 thedominantroleofmassinshapingstellarstructureandevolution.Atagivenage,theperioddistributionshowsanon-negligible
 width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and C-
-richmodelsarepredictedtohavedifferentslopes.Wederivedbest-fitrelationsdescribingageandinitialmassasafunctionofthe 71
+richmodelsarepredictedtohavedifferentslopes.Wederivedbest-fitrelationsdescribingageandinitialmassasafunctionofthe
 fundamentalmodeperiodforbothO-andC-richmodels.
 Conclusions.Thestudyconfirmsthepoweroftheperiod-agerelationstostudypopulationsofLPVsofspecifictypes,eitherO-rich
-orC-rich,onstatisticalgrounds.Indoingso,itisrecommendednottolimitastudytoMiras,whichwouldmakeitpronetoselection ]RS.hp-ortsa[
+orC-rich,onstatisticalgrounds.Indoingso,itisrecommendednottolimitastudytoMiras,whichwouldmakeitpronetoselection
 biases,butrathertoincludesemi-regularvariablesthatpulsatepredominantlyinthefundamentalmode.Theuseoftherelationsto
 studyindividualLPVs,ontheotherhand,requiresmorecaregiventhescatterintheperioddistributionpredictedatanygivenage.
 Key words. stars: AGB and post-AGB – stars: evolution – stars: variables: general – Galaxy: stellar content – Galaxy: globular
 clusters:general–MagellanicClouds
-1. Introduction the first time the radial velocity of LPVs in the southern hemi-
-sphere, by Feast (1963). In this seminal paper, Feast realized
+1. Introduction
 Low-tointermediate-massstarsapproachtheendoftheirlives
-thatLPVswithshorterperiodsmustbemembersofolderstellar
 throughtheasymptoticgiantbranch(AGB)evolutionaryphase,
+duringwhichtheyexhibitpulsationswithtimescalesuptosev-
+eralhundredsofdays,andtheyarehenceknownaslong-period
+variables (LPVs). If their V-band amplitude exceeds 2.5 mag,
+theyareclassifiedasMiras,whichhavearatherregularperiodic-
+ityandtheyarebelievedtopulsateonlyintheradialfundamen-
+tal mode (FM). If their photometric amplitude is smaller, they
+areknownassemi-regularvariables(SRVs),whicharethought
+tobetheprogenitorsofMiras.Thenamestemsfromthelesser
+degree of regularity of their light curves, likely due to the fact
+thattheycanpulsateinmultiplemodessimultaneously.
+ThenotionthatyoungerLPVstendtodisplaylongerperiods
+comparedtoolderones,oftenreferredtoastheperiod-age(PA)
+relation,isrootedintheempiricalevidencefromstellarkinemat-
+ics in the solar neighborhood. The first such piece of evidence
+isprobablyduetoMerrill(1923),whopointedoutthatM-type
+LPVsincreasinglylagbehindthelocalstandardofrest(i.e.,pos-
+sess a higher asymmetric drift) as their period decreases. Later
+studies(assummarizedbyWyatt&Cahn1983)confirmedthis
+behavior (also using proper motion data, e.g., Wilson & Mer-
+rill 1942), and showed that the shorter periods are also accom-
+panied by a higher velocity dispersion. Furthermore, groups of
+LPVswithrelativelyshortperiodsarecharacterizedbyagreater
+scaleheightabovetheGalacticplane.Thiswasshown,usingfor
+(cid:63) Corresponding author: M. Trabucchi
+(michele.trabucchi@unige.ch)
+the first time the radial velocity of LPVs in the southern hemi-
+sphere, by Feast (1963). In this seminal paper, Feast realized
+thatLPVswithshorterperiodsmustbemembersofolderstellar
 populationsandemphasizedtheirhighlypromisingapplications
-duringwhichtheyexhibitpulsationswithtimescalesuptosev- 2v10200.1022:viXra
 for both Galactic and extra-galactic studies over a wide range
-eralhundredsofdays,andtheyarehenceknownaslong-period
 of stellar ages. It should be noted that the PA relation is con-
-variables (LPVs). If their V-band amplitude exceeds 2.5 mag,
 nectedwiththeexistenceofaperiod-metallicityrelation(Lloyd
-theyareclassifiedasMiras,whichhavearatherregularperiodic-
 Evans&Menzies1973;LloydEvans1983b;Feast1981;Feast
-ityandtheyarebelievedtopulsateonlyintheradialfundamen-
 &Whitelock2000a,andreferencestherein).
-tal mode (FM). If their photometric amplitude is smaller, they
 A number of subsequent works have corroborated the PA
-areknownassemi-regularvariables(SRVs),whicharethought
 relation on empirical grounds, or have exploited it to interpret
-tobetheprogenitorsofMiras.Thenamestemsfromthelesser
 observationalresults.RelevantexamplesarestudiesofLPVsin
-degree of regularity of their light curves, likely due to the fact
 globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; White-
-thattheycanpulsateinmultiplemodessimultaneously.
 lock 1986), toward the galactic center and bulge (Lloyd Evans
-ThenotionthatyoungerLPVstendtodisplaylongerperiods
 1976;Feastetal.1980;Whitelocketal.1991)orathighgalactic
-comparedtoolderones,oftenreferredtoastheperiod-age(PA)
 latitude(Jura&Kleinmann1992;Whitelocketal.1994).Ofpar-
-relation,isrootedintheempiricalevidencefromstellarkinemat-
 ticularinterestistherecentefforttoextendtheanalysisofLPVs
-ics in the solar neighborhood. The first such piece of evidence
 todwarfgalaxiesintheLocalGroup(Menziesetal.2002,2008;
-isprobablyduetoMerrill(1923),whopointedoutthatM-type
 Whitelocketal.2009;Menziesetal.2010,2011;Sakamotoetal.
-LPVsincreasinglylagbehindthelocalstandardofrest(i.e.,pos-
 2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013;
-sess a higher asymmetric drift) as their period decreases. Later
 Menziesetal.2015).
-studies(assummarizedbyWyatt&Cahn1983)confirmedthis
-behavior (also using proper motion data, e.g., Wilson & Mer- TheHipparcosmissionprovidedthemeanstorefinethere-
-rill 1942), and showed that the shorter periods are also accom- sults on the period-kinematics connection. This was done by
-panied by a higher velocity dispersion. Furthermore, groups of Feast&Whitelock(2000b),whofoundevidencesupportingthe
-LPVswithrelativelyshortperiodsarecharacterizedbyagreater existence of a bar-like structure in the Bulge from the orbits of
-scaleheightabovetheGalacticplane.Thiswasshown,usingfor localLPVs.AsimilarstudydedicatedtoC-richLPVswasper-
+TheHipparcosmissionprovidedthemeanstorefinethere-
+sults on the period-kinematics connection. This was done by
+Feast&Whitelock(2000b),whofoundevidencesupportingthe
+existence of a bar-like structure in the Bulge from the orbits of
+localLPVs.AsimilarstudydedicatedtoC-richLPVswasper-
 formed by Feast et al. (2006), who provided quantitative age
-(cid:63) Corresponding author: M. Trabucchi estimates for these stars. A summary of the main results and
-(michele.trabucchi@unige.ch) prospectsemergingfromtheseHipparcos-erastudiesisgivenby
+estimates for these stars. A summary of the main results and
+prospectsemergingfromtheseHipparcos-erastudiesisgivenby
 Articlenumber,page1of9
+a
+r
+X
+i
+v
+:
+2
+2
+0
+1
+.
+0
+0
+2
+0
+1
+v
+2
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+S
+R
+]
+1
+7
+J
+a
+n
+2
+0
+2
+2
 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs
-Feast(2007).Morerecently,thestudyoftheGalaxywithLPVs 2.2. Data
+Feast(2007).Morerecently,thestudyoftheGalaxywithLPVs
 hasbeenstimulatedbythewealthofdataacquiredbylarge-scale
-surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), espe- Asafirstsetofdata,weconsideredthecluster-LPVpairsused
-ciallytheGaiamission(Gradyetal.2019,2020). byGradyetal.(2019,seetheirtables1and 2).Theseconsistof
-ItseemsrelevantthatjustafewyearsafterthestudyofFeast 19clustersintheLargeMagellanicCloud,hostingatotalof20
-(1963), Kippenhahn & Smith (1969) predicted the PA relation potentialLPVmembers,andeightGalacticclusterseachhosting
-ofclassicalCepheidsfromstellarevolutionandpulsationmod- apotentialLPVmember.
-els. The theoretical modeling of Cepheids and of their period- WeexpandedthislistwithdataforLPVsinafewpopulous
-luminosity (PL) and PA relations is now an active field of re- clusters,namelytheGalacticclustersNGC362,NGC2808,47
-search(e.g.,Bonoetal.2005;Andersonetal.2016;DeSomma Tuc(NGC104),andωCen(NGC5139);theLMCclustersNGC
-etal.2020).Incontrast,whenitcomestotheoreticalassessments 1978andNGC1846;andtheclusterNGC419intheSmallMag-
-oftheLPVPArelation,theliteratureissurprisinglyscarce(espe- ellanicCloud(SMC).ThesourcelistsweretakenfromLebzel-
-ciallyincomparisonwiththesignificanteffortputintoempirical ter&Wood(2005,2007,2011,2016)andKamathetal.(2010),
-studies).Infact,wewereabletoidentifyonlytworelevantstud- whosenotationforthesourcesnamesisadoptedhere.Afterex-
-ies addressing this subject (Wyatt & Cahn 1983; Eggen 1998). cludingthestarLW3inNGC1846andthestarV129inωCen,
-Thediscrepancyinperiodpredictionsbetweenlinearandnonlin- whichareunlikelyclustermembers(cf.Lebzelter&Wood2007,
-ear pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter 2016),wereachedatotalof203sources.
+surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), espe-
+ciallytheGaiamission(Gradyetal.2019,2020).
+ItseemsrelevantthatjustafewyearsafterthestudyofFeast
+(1963), Kippenhahn & Smith (1969) predicted the PA relation
+ofclassicalCepheidsfromstellarevolutionandpulsationmod-
+els. The theoretical modeling of Cepheids and of their period-
+luminosity (PL) and PA relations is now an active field of re-
+search(e.g.,Bonoetal.2005;Andersonetal.2016;DeSomma
+etal.2020).Incontrast,whenitcomestotheoreticalassessments
+oftheLPVPArelation,theliteratureissurprisinglyscarce(espe-
+ciallyincomparisonwiththesignificanteffortputintoempirical
+studies).Infact,wewereabletoidentifyonlytworelevantstud-
+ies addressing this subject (Wyatt & Cahn 1983; Eggen 1998).
+Thediscrepancyinperiodpredictionsbetweenlinearandnonlin-
+ear pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter
 &Wood2005;Trabucchietal.2021b),andmoregenerallythe
-The aforementioned studies also provide a lot of informa-
 difficultyinmodelingthestructureofevolvedredgiants,likely
-tion,possiblyincluding JHK photometry,oneormoreperiods,
 playedaroleinhamperingthetheoreticalinvestigationofthePA
-and a spectral type. In order to expand on the available data,
 relationofLPVs.
-wecrossmatchedtheselectedsamplewiththeTwoMicronAll-
 MotivatedbythereleaseofupdatedAGBevolutionarymod-
-Sky Survey (2MASS, Skrutskie et al. 2006), the all-sky data
 els(Pastorellietal.2019,2020)andtheavailabilityofnew,ac-
-release of the Wide-field Infrared Survey Explorer (AllWISE,
 curate model predictions for the FM period of AGB stars (Tra-
-Cutri et al. 2013), the catalog of variable stars from the All-
 bucchietal.2019,2021b),wedecidedtoinvestigatethenature
-Sky Automated Survey for SuperNovae (ASAS-SN Jayasinghe
 ofthePArelationofLPVsontheoreticalgrounds.Theadopted
-etal.2020),thecatalogsofLPVsintheMagellanicCloudsfrom
 models and observed data are described in Sect. 2, while in
-thethirdphaseoftheOpticalGravitationalLensingExperiment
 Sect. 3 we present the results, which are discussed in Sect. 4.
-(OGLE-III,Soszyn´skietal.2009,2011),theearlythirddatare-
 WesummarizeourconclusionsinSect.5.
+2. Methods
+2.1. Models
+We employed PARSEC-COLIBRI isochrones (Marigo et al.
+2017) with stellar evolutionary models from Pastorelli et al.
+(2019, 2020) for the thermally pulsing asymptotic giant branch
+(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, ver-
+sion 1.2S) for the preceding evolution. The adopted set of
+isochrones covers the range 0.001 to 0.016 in initial metal-
+licity (Z
+i
+), with a 0.001 step, while it spans the age interval
+8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB
+phaseisshort-lived,itonlyspansasmallrangeofinitialmasses
+foreachgivenisochrone,oforderof10−2M
+(cid:12)
+atmost.
+Theadoptedisochronesincludelinearpulsationperiodsfrom
+Trabucchietal.(2019)forovertonemodesandnonlinearperiods
+computed with the period-mass-radius relation from Trabucchi
+etal.(2021b)fortheFM1.Pulsationpropertieswerecomputed
+alongboththeearly-AGBandtheTP-AGB.Wedidnotextend
+ouranalysistoredsupergiantstarsasthepulsationprescription
+weemployedarestrictlyvalidonlybelow7M
+(cid:12)
+.
+Werecallthat,withtheadoptednonlinearrelation,theperiod
+increaseswithradius(R)asabrokenpowerlaw,whoseexponent
+decreasesassoonasthe“bendingradius”R
+b
+isexceeded,itand
+becomes zero when the “saturation radius” R
+s
+> R
+b
+is reached
+(i.e.,theperiodbecomesindependentofradius).Theexactval-
+ues of R b and R s , as well as of the exponents, depend on the current mass (M). We assume that the FM is dominant if the
+stellar radius is larger than the critical value R
+dom,0
+, which we
+computedfromthecurrentstellarmassusingEq.4ofTrabucchi
+etal.(2021b).
+1 Hereinafter, whenever we discuss periods, it should be understood
+thatwerefertoFMperiodsonwhichthisworkisfocused.
+2.2. Data
+Asafirstsetofdata,weconsideredthecluster-LPVpairsused
+byGradyetal.(2019,seetheirtables1and 2).Theseconsistof
+19clustersintheLargeMagellanicCloud,hostingatotalof20
+potentialLPVmembers,andeightGalacticclusterseachhosting
+apotentialLPVmember.
+WeexpandedthislistwithdataforLPVsinafewpopulous
+clusters,namelytheGalacticclustersNGC362,NGC2808,47
+Tuc(NGC104),andωCen(NGC5139);theLMCclustersNGC
+1978andNGC1846;andtheclusterNGC419intheSmallMag-
+ellanicCloud(SMC).ThesourcelistsweretakenfromLebzel-
+ter&Wood(2005,2007,2011,2016)andKamathetal.(2010),
+whosenotationforthesourcesnamesisadoptedhere.Afterex-
+cludingthestarLW3inNGC1846andthestarV129inωCen,
+whichareunlikelyclustermembers(cf.Lebzelter&Wood2007,
+2016),wereachedatotalof203sources.
+The aforementioned studies also provide a lot of informa-
+tion,possiblyincluding JHK photometry,oneormoreperiods,
+and a spectral type. In order to expand on the available data,
+wecrossmatchedtheselectedsamplewiththeTwoMicronAll-
+Sky Survey (2MASS, Skrutskie et al. 2006), the all-sky data
+release of the Wide-field Infrared Survey Explorer (AllWISE,
+Cutri et al. 2013), the catalog of variable stars from the All-
+Sky Automated Survey for SuperNovae (ASAS-SN Jayasinghe
+etal.2020),thecatalogsofLPVsintheMagellanicCloudsfrom
+thethirdphaseoftheOpticalGravitationalLensingExperiment
+(OGLE-III,Soszy´ nskietal.2009,2011),theearlythirddatare-
 lease from the Gaia mission (Gaia EDR3, Gaia Collaboration
 etal.2021),andthecatalogofLPVcandidatesfromGaiaDR2
 (Mowlavietal.2018).
-2. Methods
 Following Grady et al. (2019), we took ages from
-2.1. Models Kharchenkoetal.(2016)andBaumgardtetal.(2013)forclusters
+Kharchenkoetal.(2016)andBaumgardtetal.(2013)forclusters
 intheGalaxyandLMC,respectively,therebyensuringthatages
-We employed PARSEC-COLIBRI isochrones (Marigo et al.
 would be homogeneously derived for clusters in both galaxies.
-2017) with stellar evolutionary models from Pastorelli et al.
 Age uncertainties from Baumgardt et al. (2013), provided for
-(2019, 2020) for the thermally pulsing asymptotic giant branch each cluster, are generally around σ (cid:39) 0.05. Kharchenko
+each cluster, are generally around σ
 log(τ)
-(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, ver-
+(cid:39) 0.05. Kharchenko
 et al. (2016) do not provide age uncertainties, but a reasonable
-sion 1.2S) for the preceding evolution. The adopted set of upper limit for their method should be σ = 0.2 based on
+upper limit for their method should be σ
 log(τ)
-isochrones covers the range 0.001 to 0.016 in initial metal-
+= 0.2 based on
 the analysis of Kharchenko et al. (2005) (the same value was
-licity (Z), with a 0.001 step, while it spans the age interval
-i adoptedbyGradyetal.2019,intheirFig.7).
-8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB
+adoptedbyGradyetal.2019,intheirFig.7).
 As discussed by Kamath et al. (2010), the age of the SMC
-phaseisshort-lived,itonlyspansasmallrangeofinitialmasses
-foreachgivenisochrone,oforderof10−2M atmost. cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is
-(cid:12) consistentwiththevalueτ = 1.45±0.05GyrfromGoudfrooij
-Theadoptedisochronesincludelinearpulsationperiodsfrom
+cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is
+consistentwiththevalueτ = 1.45±0.05GyrfromGoudfrooij
 et al. (2014), while it is as young as τ (cid:39) 0.89±0.015 Gyr ac-
-Trabucchietal.(2019)forovertonemodesandnonlinearperiods
 cordingtoPerrenetal.(2017).Sinceanaccurateestimateisnot
-computed with the period-mass-radius relation from Trabucchi
 necessaryforourexploratoryanalysis,wetookaroughaverage
-etal.(2021b)fortheFM1.Pulsationpropertieswerecomputed
 and assumed log(τ/yr) = 9.1±0.1. NGC 419 and NGC 1846
-alongboththeearly-AGBandtheTP-AGB.Wedidnotextend
 likely exhibit TP-AGB boosting (Girardi et al. 2013). We note
-ouranalysistoredsupergiantstarsasthepulsationprescription
 thatsomeclustersshowmultiplestellarpopulations,whoseage
-weemployedarestrictlyvalidonlybelow7M .
-(cid:12) spreadhasbeenestimatedinsomecases(e.g.,Mackey&Broby
-Werecallthat,withtheadoptednonlinearrelation,theperiod
+spreadhasbeenestimatedinsomecases(e.g.,Mackey&Broby
 Nielsen2007;Joo&Lee2013;Villanovaetal.2014)andiscon-
-increaseswithradius(R)asabrokenpowerlaw,whoseexponent
 sistentwiththeageuncertaintiesweadopted.
-decreasesassoonasthe“bendingradius”R isexceeded,itand
-b
-becomes zero when the “saturation radius” R > R is reached Distances of Galactic clusters were also taken from
-s b
-(i.e.,theperiodbecomesindependentofradius).Theexactval- Kharchenko et al. (2016), while for the Magellanic Clouds and
-cu ue rs reo nf tR mb aa sn sd (MR s ), .a Ws ew ae sl sl ua ms eof tht ah te thex ep Fo Mnen it ss, dod mep ie nn ad nto in t th he 0th .0ei 9r mclu agste ar ns dw µe SMad Cop =ted 18t .h 9e 6d ±ist 0a .n 0c 2e mm ao gdu frl oi mµ LM deC G= ri1 js8. e4 t9 a±
-f e l.
-stellar radius is larger than the critical value R , which we (2017).Wesearchedfordataoninterstellarextinctionfromsev-
-dom,0
-computedfromthecurrentstellarmassusingEq.4ofTrabucchi eralliteratureworks(e.g.,Nayaketal.2016;Kharchenkoetal.
-etal.(2021b). 2016; Perren et al. 2017), all of which suggest that extinction
-in the K filter is smaller than ∼ 0.1 mag for most of the clus-
+Distances of Galactic clusters were also taken from
+Kharchenko et al. (2016), while for the Magellanic Clouds and
+their clusters we adopted the distance moduli µ LMC = 18.49± 0.09 mag and µ SMC = 18.96 ± 0.02 mag from de Grijs et al.
+(2017).Wesearchedfordataoninterstellarextinctionfromsev-
+eralliteratureworks(e.g.,Nayaketal.2016;Kharchenkoetal.
+2016; Perren et al. 2017), all of which suggest that extinction
+in the K
 s
-1 Hereinafter, whenever we discuss periods, it should be understood tersweconsidered,andatmostaslargeas∼ 0.3mag,whichis
-thatwerefertoFMperiodsonwhichthisworkisfocused. negligibleforourpurposes.
+filter is smaller than ∼ 0.1 mag for most of the clus-
+tersweconsidered,andatmostaslargeas∼ 0.3mag,whichis
+negligibleforourpurposes.
 Articlenumber,page2of9
 Trabucchietal.:Theperiod-agerelationofLPVs
-A detailed membership verification is beyond the scope of On the basis of the average age of these two groups of clus-
-this work, and we relied on the checks performed by authors ters and the associated uncertainty, and taking the discrete age
-whose source lists we adopted. It should be kept in mind that samplingoftheisochronesintoaccount,weconsideredtheage
-somesourcesmaynotberealclustermembers. rangeslog(τ/yr)=9.15±0.10andlog(τ/yr)=10.10±0.20.Pe-
-For sources without a spectral type, we used the Gaia- rioddistributionsatthoseagesaredisplayedinpanels(b)and(c)
-2MASS diagram (Lebzelter et al. 2018, 2019) to determine ofFig.1,respectively,showinggoodagreementbetweenmodel
-whethertheyareO-orC-rich.Weusedthenear-infraredperiod- predictionsandobservations.Wenotethatinbothcases,thedis-
-luminosity diagram to identify the most likely pulsation mode tributionisskewedtowardshortperiods,whichseemstobetrue
-associated with each period of each observed source. We se- atallagesforO-richstars.Thiscanbeseeninpanel(a)ofFig.2,
-lectedonlyFMperiodsandrejectedlongsecondaryperiodsand whichisaversionofthePAplanelimitedtoanO-richcompo-
-periods attributed to overtone mode pulsation. The details of sition2. Indeed, although at τ (cid:46) 5 Gyr the observed sample is
-these classification steps are provided in Appendix A. Out of veryscarce,itappearstobeconsistentwithmodelspredictinga
-203 sources from the initial list, we identified 95 LPVs pulsat- more densely populated region in the shorter-period half of the
-ing in the FM, consisting of 40 C-rich and 55 O-rich sources. PAdistribution.
-Theyconsistof29Miras,33semi-regularvariables,and33other The case of C-stars, shown in panel (b) of Fig. 2, is differ-
-sources(mostlikelyLPVs)whosevariabilitytypehasnotbeen ent. They only form over a restricted range of initial masses
-determined.We notethat,withtheexceptionof GaiaDR2,the andages,sotheiroccurrenceinagivenstellarpopulationisan
-sourcesofvariabilitydataconsideredheredonotreporttheun- age indicator on its own. Toward the low-mass (old age) side
-certaintyassociatedwithobservedperiods.However,sinceperi- of the C-star regime, the behavior is similar to the O-rich case
-odswerederivedinmostcasesfromwell-sampled,high-quality withaconcentrationaroundrelativelyshortperiods.C-richmod-
-variability observations, relative period uncertainties are most els tend to have a lower surface temperature and larger radii,
-likelynegligiblecomparedwiththoseassociatedwithage. at a given mass, compared to O-rich models, and thus they at-
-tainlongerperiodsmoreeasily.Thisoccursinparticulartoward
-highermasses,sothatyoungerC-richmodelsaremoreconcen-
-3. Results trated at longer periods, leading to a steeper PA relation com-
-pared with the O-rich case. These predictions agree with ob-
-Panel (a) of Fig. 1 shows a comparison between model predic- servations on the old side of the period distribution, while the
-tions and observations in the P FM–log(τ/yr) plane. The former scarcityofCstarsatτ(cid:39)0.6Gyrpreventsusfromperforminga
-are displayed by a density map showing the expected number comparisonatyoungerages.
-N FM of LPVs pulsating in the FM in each period-age bin, nor- InappendixB,weprovideanalyticPArelationsbyfittingthe
-malizedtomaximum.Modelpredictionsareingoodagreement high-densitypartsoftheO-andC-richmodels’distribution.We
-with data derived from observations (i.e., individual LPVs in emphasizethat,becauseofthelargescatteroftherelation,ages
-clusters, represented by symbols), and they show that the pe- estimatedinthiswayforindividualLPVsareboundtobehighly
-riodofLPVspulsatingintheFMdecreaseswithincreasingage. uncertain.Asawaytoassesstheerrorinagedetermination,we
-Crosses mark the average properties of the three groups of C- also provide analytic best-fit relations to the boundaries of the
-rich LPVs from Feast et al. (2006, their table 4), which fit the PA distribution of the models in the appendix. These relations
-generalpatternwiththeexceptionoftheirgroup3,estimatedto aredisplayedinFig.2.
+A detailed membership verification is beyond the scope of
+this work, and we relied on the checks performed by authors
+whose source lists we adopted. It should be kept in mind that
+somesourcesmaynotberealclustermembers.
+For sources without a spectral type, we used the Gaia-
+2MASS diagram (Lebzelter et al. 2018, 2019) to determine
+whethertheyareO-orC-rich.Weusedthenear-infraredperiod-
+luminosity diagram to identify the most likely pulsation mode
+associated with each period of each observed source. We se-
+lectedonlyFMperiodsandrejectedlongsecondaryperiodsand
+periods attributed to overtone mode pulsation. The details of
+these classification steps are provided in Appendix A. Out of
+203 sources from the initial list, we identified 95 LPVs pulsat-
+ing in the FM, consisting of 40 C-rich and 55 O-rich sources.
+Theyconsistof29Miras,33semi-regularvariables,and33other
+sources(mostlikelyLPVs)whosevariabilitytypehasnotbeen
+determined.We notethat,withtheexceptionof GaiaDR2,the
+sourcesofvariabilitydataconsideredheredonotreporttheun-
+certaintyassociatedwithobservedperiods.However,sinceperi-
+odswerederivedinmostcasesfromwell-sampled,high-quality
+variability observations, relative period uncertainties are most
+likelynegligiblecomparedwiththoseassociatedwithage.
+3. Results
+Panel (a) of Fig. 1 shows a comparison between model predic-
+tions and observations in the P
+FM
+–log(τ/yr) plane. The former
+are displayed by a density map showing the expected number
+N FM of LPVs pulsating in the FM in each period-age bin, nor-
+malizedtomaximum.Modelpredictionsareingoodagreement
+with data derived from observations (i.e., individual LPVs in
+clusters, represented by symbols), and they show that the pe-
+riodofLPVspulsatingintheFMdecreaseswithincreasingage.
+Crosses mark the average properties of the three groups of C-
+rich LPVs from Feast et al. (2006, their table 4), which fit the
+generalpatternwiththeexceptionoftheirgroup3,estimatedto
 beolderthanwhatourmodelspredictatP(cid:39)650.
 We also show a linear best-fit to the models distribution
-(weighted by N ), which shows a fairly good agreement with 4. Discussion
+(weighted by N
 FM
+), which shows a fairly good agreement with
 the best-fit to observations by Grady et al. (2019, also shown).
-In general agreement with observations, models confirm that
 However, the best-fit line does not fully capture the properties
-LPVs pulsating predominantly in the FM follow a PA relation,
 ofthepredictions,noroftheobservedtrend.Indeed,modelsare
-whichexhibitsanon-negligibledispersion.Thankstothenewly
 indicativeofasubstantialdispersionaroundtherelation.Forin-
-availablenonlinearperiodpredictions,wewereabletobetterex-
 stance,at1Gyr,theFMperiodrangesfrom∼200daysto∼550
-aminethenatureofthisrelationandtheoriginofitsscatter.
 days.Conversely,LPVspulsatingintheFMwithaperiodof350
-ThePArelationisintimatelyconnectedwiththePLrelation,
 daysarepredictedtobeatleast∼200Myrold,buttheycanbeas
-bothpatternsemergingbecauseoftheprominentroleofmassin
 old as ∼3 Gyr. Observed data are consistent with the predicted
-shaping stellar structure and evolution. Indeed, stellar mass de-
 spread,althoughtheagreementcannotbeconsideredastheob-
-terminesthelifetimesofthemainevolutionarystages,andthus
 servedsampleadoptedisnotcomplete.
-the age of stars in the AGB phase. Pulsation models (Trabuc-
 Nonetheless, it is relevant that some clusters host multiple
-chi et al. 2021b) show that the radius R (and correspond-
-LPVs, which are thus almost coeval, and they do span a wide dom,0
-ingluminosity)attheonsetofdominantFMpulsation(DFMP)
+LPVs, which are thus almost coeval, and they do span a wide
 periodrange.Someoftheseclustershostmultiplestellarpopu-
-increases with mass, so that the most massive FM-dominated
 lationsthatarebelievedtohaveformedoveratimecomparable
-LPVs are brighter. They also have longer periods, as this in-
 with the age uncertainties we adopted. This means that longer-
-creases with radius. In other words, the period, luminosity, and
 period(moremassive)LPVsintheseclustersprobablyleanto-
-age near the tip of the AGB are all functions of initial stellar
 wardtheloweragelimitassumedfortheirhostcluster,andthe
-mass(atleasttoagoodapproximation).
 opposite is true at shorter periods. This tends to strengthen the
-WenotethatthiswouldnotbethecaseiftheFMweredom-
 agreementbetweenmodelsandobservations.
-inantalongtheentireAGB,asthelargechangeinradiusduring
 Our data set samples the intermediate-age range (NGC 419
-thisphasewouldresultinawiderangeofperiodsatagivenage.
 and NGC 1846) relatively well as well as old ages (ω Cen, 47
-ItistheveryfactthatDFMPoccursonlyduringthefinalportion
 Tuc, NGC 362, and NGC 2808). This provides us with the op-
-portunity to study the period distribution at these ages, and for 2 AfurtherversionofthePAplanehighlightingbothchemicaltypes
-a more detailed comparison between models and observations. canbefoundinFig.A.2ofappendixA.1.
+portunity to study the period distribution at these ages, and for
+a more detailed comparison between models and observations.
+On the basis of the average age of these two groups of clus-
+ters and the associated uncertainty, and taking the discrete age
+samplingoftheisochronesintoaccount,weconsideredtheage
+rangeslog(τ/yr)=9.15±0.10andlog(τ/yr)=10.10±0.20.Pe-
+rioddistributionsatthoseagesaredisplayedinpanels(b)and(c)
+ofFig.1,respectively,showinggoodagreementbetweenmodel
+predictionsandobservations.Wenotethatinbothcases,thedis-
+tributionisskewedtowardshortperiods,whichseemstobetrue
+atallagesforO-richstars.Thiscanbeseeninpanel(a)ofFig.2,
+whichisaversionofthePAplanelimitedtoanO-richcompo-
+sition2. Indeed, although at τ (cid:46) 5 Gyr the observed sample is
+veryscarce,itappearstobeconsistentwithmodelspredictinga
+more densely populated region in the shorter-period half of the
+PAdistribution.
+The case of C-stars, shown in panel (b) of Fig. 2, is differ-
+ent. They only form over a restricted range of initial masses
+andages,sotheiroccurrenceinagivenstellarpopulationisan
+age indicator on its own. Toward the low-mass (old age) side
+of the C-star regime, the behavior is similar to the O-rich case
+withaconcentrationaroundrelativelyshortperiods.C-richmod-
+els tend to have a lower surface temperature and larger radii,
+at a given mass, compared to O-rich models, and thus they at-
+tainlongerperiodsmoreeasily.Thisoccursinparticulartoward
+highermasses,sothatyoungerC-richmodelsaremoreconcen-
+trated at longer periods, leading to a steeper PA relation com-
+pared with the O-rich case. These predictions agree with ob-
+servations on the old side of the period distribution, while the
+scarcityofCstarsatτ(cid:39)0.6Gyrpreventsusfromperforminga
+comparisonatyoungerages.
+InappendixB,weprovideanalyticPArelationsbyfittingthe
+high-densitypartsoftheO-andC-richmodels’distribution.We
+emphasizethat,becauseofthelargescatteroftherelation,ages
+estimatedinthiswayforindividualLPVsareboundtobehighly
+uncertain.Asawaytoassesstheerrorinagedetermination,we
+also provide analytic best-fit relations to the boundaries of the
+PA distribution of the models in the appendix. These relations
+aredisplayedinFig.2.
+4. Discussion
+In general agreement with observations, models confirm that
+LPVs pulsating predominantly in the FM follow a PA relation,
+whichexhibitsanon-negligibledispersion.Thankstothenewly
+availablenonlinearperiodpredictions,wewereabletobetterex-
+aminethenatureofthisrelationandtheoriginofitsscatter.
+ThePArelationisintimatelyconnectedwiththePLrelation,
+bothpatternsemergingbecauseoftheprominentroleofmassin
+shaping stellar structure and evolution. Indeed, stellar mass de-
+terminesthelifetimesofthemainevolutionarystages,andthus
+the age of stars in the AGB phase. Pulsation models (Trabuc-
+chi et al. 2021b) show that the radius R
+dom,0
+(and correspond-
+ingluminosity)attheonsetofdominantFMpulsation(DFMP)
+increases with mass, so that the most massive FM-dominated
+LPVs are brighter. They also have longer periods, as this in-
+creases with radius. In other words, the period, luminosity, and
+age near the tip of the AGB are all functions of initial stellar
+mass(atleasttoagoodapproximation).
+WenotethatthiswouldnotbethecaseiftheFMweredom-
+inantalongtheentireAGB,asthelargechangeinradiusduring
+thisphasewouldresultinawiderangeofperiodsatagivenage.
+ItistheveryfactthatDFMPoccursonlyduringthefinalportion
+2 AfurtherversionofthePAplanehighlightingbothchemicaltypes
+canbefoundinFig.A.2ofappendixA.1.
 Articlenumber,page3of9
 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs
 Fig.1.Period-agediagram.Panel(a)showsthepredictedperiod-agedistribution(darkertonesindicateahigherexpectednumberofLPVson
@@ -273,220 +403,367 @@ inpanel(a)bytheblueandredshadedareas(atlog(τ/yr) ∼ 9.15and∼ 10.10,respectiv
 suppressedinpanel(a).
 Fig.2.SimilartoFig.1,butlimitedtoO-rich(leftpanel)andC-rich(rightpanel)LPVs.Thesolidlinemarksthebestfittothemodels,while
 dashedlinesarebestfitstotheedgesofthemodeldistribution(seethetextformoredetails).
-oftheAGBthatlimitstherangeofperiodsaFM-pulsatingLPV Thisfeatureisstrengthenedwhenasetofisochronesiscon-
-canhaveatagivenage.Yet,theDFMPpartoftheAGBislong sidered which spans a range of initial metallicities because the
-enoughforsignificantvariationsinradiustooccur,whichresult adopted criterion for the onset of DFMP does not depend on
-inthedispersionofthePArelationseeninFig.1. metallicity, but the FM period does as metal-poor LPVs are
+oftheAGBthatlimitstherangeofperiodsaFM-pulsatingLPV
+canhaveatagivenage.Yet,theDFMPpartoftheAGBislong
+enoughforsignificantvariationsinradiustooccur,whichresult
+inthedispersionofthePArelationseeninFig.1.
+AtagiveninitialmetallicityZ i ,theshapeoftheperioddis-
+tributionprimarilyresultsfromthefactthat,throughouttheTP-
+AGB (the stage during which the FM is normally excited), the
+envelope expansion accelerates, while the period becomes pro-
+gressivelylesssensitivetochangesinradius(seeAppendixC).
+In particular, the slope of the period-radius relation decreases
+sharply at P b = P(R b ). The FM period distribution is roughly
+symmetricaroundthatvalue,butatitsshort-periodside,theFM
+isnotdominant.Therefore,whenonlyFM-dominatedLPVsare
+considered,asisdonehere,theobservedperioddistributionap-
+pearsskewedtowardshortperiods.
+Thisfeatureisstrengthenedwhenasetofisochronesiscon-
+sidered which spans a range of initial metallicities because the
+adopted criterion for the onset of DFMP does not depend on
+metallicity, but the FM period does as metal-poor LPVs are
 warmer and have smaller radii compared with metal-rich ones.
-AtagiveninitialmetallicityZ i,theshapeoftheperioddis- As a consequence, the bulk of the period distribution of metal-
-tributionprimarilyresultsfromthefactthat,throughouttheTP- poorLPVsisatperiodsshorterthanP ,sotheyonlycontribute
+As a consequence, the bulk of the period distribution of metal-
+poorLPVsisatperiodsshorterthanP
 b
-AGB (the stage during which the FM is normally excited), the to the global distribution (i.e., at all Z at a given age) over a
+,sotheyonlycontribute
+to the global distribution (i.e., at all Z
 i
-envelope expansion accelerates, while the period becomes pro- smallperiodrangeatP(cid:38) P .Incontrast,metal-richLPVshave
+at a given age) over a
+smallperiodrangeatP(cid:38) P
 b
-gressivelylesssensitivetochangesinradius(seeAppendixC). periodswellbeyondP ,sotheycontributebothatthatvalueand
+.Incontrast,metal-richLPVshave
+periodswellbeyondP
 b
-In particular, the slope of the period-radius relation decreases atlongerperiods.TheresultisanexcessofFM-dominatedLPVs
-sharply at P b = P(R b). The FM period distribution is roughly nearP b,thatistosayontheshortsideoftheoverallperioddis-
-symmetricaroundthatvalue,butatitsshort-periodside,theFM tribution.
-isnotdominant.Therefore,whenonlyFM-dominatedLPVsare
-considered,asisdonehere,theobservedperioddistributionap- We note that, in contrast with the prescription we adopted,
-pearsskewedtowardshortperiods. the onset of DFMP in reality is probably sensitive to metallic-
+,sotheycontributebothatthatvalueand
+atlongerperiods.TheresultisanexcessofFM-dominatedLPVs
+nearP b ,thatistosayontheshortsideoftheoverallperioddis-
+tribution.
+We note that, in contrast with the prescription we adopted,
+the onset of DFMP in reality is probably sensitive to metallic-
 Articlenumber,page4of9
 Trabucchietal.:Theperiod-agerelationofLPVs
-ity.Whilethegooddegreeofagreementwithobservationssug- the DFMP regime, and they display a smaller range of periods
-gests that the dependence is weak at most, it is possible for at a given age (cf. Feast & Whitelock 2000b). In other words,
-any discrepancy to be smeared out by the fact that our set of theyshouldexhibitarelativelynarrowPArelation(eventhough,
-isochrone implicitly assumes a flat star-formation rate with no basedontheobservationaldatasetweadopted,thereisnocon-
-age-metallicityrelation,soitisnotanaccuraterepresentationof clusiveevidencethatconsideringonlyMirasreducesthescatter
-anyrealisticstellarenvironment.Inthissense,thePArelationis ofthePArelation).
-environment-dependent,anditisnotnecessarilyuniversal. Nonetheless,wecautionagainstthisapproachasitisprone
-A further point of uncertainty stems from the fact that the tointroducinguncontrolledbiases,asthetraditionaldistinction
-prescription we adopted assumes that the FM period only de- betweenSRVsandMirasisarbitrary(seeTrabucchietal.2021a,
-pends upon the mass and radius, and that it is affected by a andreferencestherein).Assuch,itdisregardsthephysicalpro-
-changeincompositiononlythroughtheeffectthatsuchavaria- cesses at the origin of the range of amplitudes characterizing
-tionhasontheradius.Whilethisistruetoagoodapproximation, LPVs. In particular, photometric amplitudes are largely deter-
-linearmodelsshowasmalldependenceofperiodsonmetallic- minedbytheformationanddissociationofmoleculesinthestel-
-ityatafixedmassandradius,butthequantitativeimpactinthe laratmosphere,andtheyarelikelytobemetallicity-dependent.
-nonlinearcaseisunknown.Wecanonlyestimate,basedonthe It is therefore reasonable to assume that metal-poor (old) Mira
-resultsofTrabucchietal.(2019),anuncertaintyof±10%atmost analogs might be classified as SRVs, thereby undermining the
-withrespecttotheprescriptionsadoptedhere. potential application of the PA relation if restricted to Miras.
-Qualitatively, a realistic age-metallicity relation and the ThisseemstobesupportedbythefactthatthebulkofoldLPVs
-metallicitydependenceoftheperiodandoftheonsetofDFMP inoursampleareclassifiedasSRVs.Therefore,studiesinvolv-
-are all expected to result in a steeper PA relation than the one ing PA relations of LPVs would advantageously include both
-wepredict,butitisdifficulttoassesstherelativeimportanceof MirasandFM-pulsatingSRVs.
-theseeffects.Inthissense,thecompositionprobablyaffectsthe ThechallengeassociatedwithSRVsstemsfromthefactthat
-shape of the PA relation more than its dispersion. The latter is they are often multiperiodic (even when predominantly pulsat-
-likelyaffectedbythecompositionindirectlythroughmassloss, ingintheFM),apropertythatcomplicatesthelightcurveanal-
-the analysis of which is beyond the scope of this study. How- ysis and period extraction. At the same time, this feature could
-ever,wepointoutthatmasslossrepresentsasourceofscatterin potentially improve age determinations as overtone modes are
-combinationwiththeoccurrenceofthermalpulses,becauseitre- expectedtodisplayaPArelationaswell.
+ity.Whilethegooddegreeofagreementwithobservationssug-
+gests that the dependence is weak at most, it is possible for
+any discrepancy to be smeared out by the fact that our set of
+isochrone implicitly assumes a flat star-formation rate with no
+age-metallicityrelation,soitisnotanaccuraterepresentationof
+anyrealisticstellarenvironment.Inthissense,thePArelationis
+environment-dependent,anditisnotnecessarilyuniversal.
+A further point of uncertainty stems from the fact that the
+prescription we adopted assumes that the FM period only de-
+pends upon the mass and radius, and that it is affected by a
+changeincompositiononlythroughtheeffectthatsuchavaria-
+tionhasontheradius.Whilethisistruetoagoodapproximation,
+linearmodelsshowasmalldependenceofperiodsonmetallic-
+ityatafixedmassandradius,butthequantitativeimpactinthe
+nonlinearcaseisunknown.Wecanonlyestimate,basedonthe
+resultsofTrabucchietal.(2019),anuncertaintyof±10%atmost
+withrespecttotheprescriptionsadoptedhere.
+Qualitatively, a realistic age-metallicity relation and the
+metallicitydependenceoftheperiodandoftheonsetofDFMP
+are all expected to result in a steeper PA relation than the one
+wepredict,butitisdifficulttoassesstherelativeimportanceof
+theseeffects.Inthissense,thecompositionprobablyaffectsthe
+shape of the PA relation more than its dispersion. The latter is
+likelyaffectedbythecompositionindirectlythroughmassloss,
+the analysis of which is beyond the scope of this study. How-
+ever,wepointoutthatmasslossrepresentsasourceofscatterin
+combinationwiththeoccurrenceofthermalpulses,becauseitre-
 ducestheminimumradiusfortheonsetofDFMP.Thus,during
 the luminosity dips associated with thermal pulses, a LPV can
-haveaperiodshorterthantheoneithadwhenitfirstenteredthe 5. Conclusions
+haveaperiodshorterthantheoneithadwhenitfirstenteredthe
 DFMPregime(seeAppendixC).Anadditionalsourceofuncer-
-tainty,whichwedisregarded,isrotation(orotherprocessesthat Weusedtheresultsfromrecentnonlinearpulsationcalculations
-induce extra mixing in the core) which causes a spread in ages andcombinedthemwithstate-of-the-artisochronemodelstoin-
-atagiveninitialmass(cf.Andersonetal.2016,forthecaseof vestigate the PArelation of FM-dominated LPVs, finding good
-classicalCepheids). agreement with the distribution of observed LPVs in star clus-
-Thefairlygoodagreementbetweenmodelsandobservations ters. The theoretical PA relation displays a non-negligible scat-
-encouragestheuseofLPVsasageindicators,butthescatterof ter,whoseoriginweidentifiedduetothefactthat,despitebeing
-thePArelationhampersthisapplication.Weattemptedtoreduce very brief, the portion of AGB evolution during which the FM
-thescatterthroughcorrectionsinvolvingphotometricproperties, becomes dominant shows a relatively large range in mass and
-as is customarily done for classical Cepheids with a color term radiusatagivenage.
-(e.g.,Bonoetal.2005),butwithunsatisfactoryresults.Acorrec- The theoretical distribution of FM periods is roughly sym-
-tiondependentonthephotometricamplitudeofvariabilityrep- metric,buttheFMisnotdominantattheshortestperiods.Asa
-resents a promising alternative, but it cannot be pursued at the result,modelspredictthatthedistributionofdominantFMperi-
-moment.Indeed,forcomputationalefficiency,currentpulsation odsatagivenageisskewedtowardshortperiods,inagreement
-modelsincludeonlyacrudetreatmentoftheatmosphericlayers withobservations.Dependingonstellarpopulations,metallicity
-as they do not affect pulsation periods. On the other hand, the mayenhancethisfeatureasmetal-poorLPVs,whichtendtobe
-atmosphere is crucial in determining the spectral energy distri- warmerandmorecompact,onlycontributenearshortperiods.
-butionanditsvariationthroughoutthepulsationcycle,andhence Weprovidethebest-fitPArelationseparatelyforO-richand
-theamplitudeofvariability.Atthesametime,theobservational C-richFM-pulsatingLPVs.ThelatterLPVsshowasteeperPA
-sample adopted here is too heterogeneous for a self-consistent relationbecauseoftheirlowersurfacetemperatures,whichallow
-investigationofamplitude,butthiskindofstudycouldbemade themtoreachlongerperiodsmoreeasily.
-possible by the upcoming data release 3 of the Gaia mission Our analysis concerns all LPVs predominantly pulsating in
-(Gaia Collaboration et al. 2021) and the future Legacy Survey the FM, regardless of whether they are classified as Miras or
-ofSpaceandTime(LSST,Ivezic´ etal.2019)oftheVeraRubin SRVs.Wediscouragesuchadistinctioninthatitisarbitraryand
-Observatory. pronetoselectionbiasesthatriskcompromisingtheuseofLPVs
-It is worth noting that our analysis applies to Miras as well asageindicators.
-as SRVs, provided that they predominantly pulsate in the FM. ThemainlimitationintheuseofthePArelationforagede-
-The limitation of PA relation studies to Miras, as has mainly terminations of individual LPVs stems from its relatively large
-been done in literature so far, undoubtedly has some advan- scatter. We suggest that corrective terms, involving the ampli-
-tages: to begin with, the fact that Miras are typically easier to tude of variability, might help to reduce this scatter and antici-
-detect than SRVs, and their light curves are easier to process patethatupcomingdatafromongoingandfuturesurveysdedi-
-as they tend to be more regular. Moreover, Miras represent the catedtotime-domainastronomywillbehighlyvaluabletoprobe
-end-pointofAGBevolution,soinprincipletheycorrespondtoa thispossibility.Astudyoftheimpactofmetallicityonnonlinear
-smallerrangeofstellarparameterscomparedtothefullextentof pulsationishighlydesirabletopursuethislineofinvestigation,
+tainty,whichwedisregarded,isrotation(orotherprocessesthat
+induce extra mixing in the core) which causes a spread in ages
+atagiveninitialmass(cf.Andersonetal.2016,forthecaseof
+classicalCepheids).
+Thefairlygoodagreementbetweenmodelsandobservations
+encouragestheuseofLPVsasageindicators,butthescatterof
+thePArelationhampersthisapplication.Weattemptedtoreduce
+thescatterthroughcorrectionsinvolvingphotometricproperties,
+as is customarily done for classical Cepheids with a color term
+(e.g.,Bonoetal.2005),butwithunsatisfactoryresults.Acorrec-
+tiondependentonthephotometricamplitudeofvariabilityrep-
+resents a promising alternative, but it cannot be pursued at the
+moment.Indeed,forcomputationalefficiency,currentpulsation
+modelsincludeonlyacrudetreatmentoftheatmosphericlayers
+as they do not affect pulsation periods. On the other hand, the
+atmosphere is crucial in determining the spectral energy distri-
+butionanditsvariationthroughoutthepulsationcycle,andhence
+theamplitudeofvariability.Atthesametime,theobservational
+sample adopted here is too heterogeneous for a self-consistent
+investigationofamplitude,butthiskindofstudycouldbemade
+possible by the upcoming data release 3 of the Gaia mission
+(Gaia Collaboration et al. 2021) and the future Legacy Survey
+ofSpaceandTime(LSST,Ivezi´ cetal.2019)oftheVeraRubin
+Observatory.
+It is worth noting that our analysis applies to Miras as well
+as SRVs, provided that they predominantly pulsate in the FM.
+The limitation of PA relation studies to Miras, as has mainly
+been done in literature so far, undoubtedly has some advan-
+tages: to begin with, the fact that Miras are typically easier to
+detect than SRVs, and their light curves are easier to process
+as they tend to be more regular. Moreover, Miras represent the
+end-pointofAGBevolution,soinprincipletheycorrespondtoa
+smallerrangeofstellarparameterscomparedtothefullextentof
+the DFMP regime, and they display a smaller range of periods
+at a given age (cf. Feast & Whitelock 2000b). In other words,
+theyshouldexhibitarelativelynarrowPArelation(eventhough,
+basedontheobservationaldatasetweadopted,thereisnocon-
+clusiveevidencethatconsideringonlyMirasreducesthescatter
+ofthePArelation).
+Nonetheless,wecautionagainstthisapproachasitisprone
+tointroducinguncontrolledbiases,asthetraditionaldistinction
+betweenSRVsandMirasisarbitrary(seeTrabucchietal.2021a,
+andreferencestherein).Assuch,itdisregardsthephysicalpro-
+cesses at the origin of the range of amplitudes characterizing
+LPVs. In particular, photometric amplitudes are largely deter-
+minedbytheformationanddissociationofmoleculesinthestel-
+laratmosphere,andtheyarelikelytobemetallicity-dependent.
+It is therefore reasonable to assume that metal-poor (old) Mira
+analogs might be classified as SRVs, thereby undermining the
+potential application of the PA relation if restricted to Miras.
+ThisseemstobesupportedbythefactthatthebulkofoldLPVs
+inoursampleareclassifiedasSRVs.Therefore,studiesinvolv-
+ing PA relations of LPVs would advantageously include both
+MirasandFM-pulsatingSRVs.
+ThechallengeassociatedwithSRVsstemsfromthefactthat
+they are often multiperiodic (even when predominantly pulsat-
+ingintheFM),apropertythatcomplicatesthelightcurveanal-
+ysis and period extraction. At the same time, this feature could
+potentially improve age determinations as overtone modes are
+expectedtodisplayaPArelationaswell.
+5. Conclusions
+Weusedtheresultsfromrecentnonlinearpulsationcalculations
+andcombinedthemwithstate-of-the-artisochronemodelstoin-
+vestigate the PArelation of FM-dominated LPVs, finding good
+agreement with the distribution of observed LPVs in star clus-
+ters. The theoretical PA relation displays a non-negligible scat-
+ter,whoseoriginweidentifiedduetothefactthat,despitebeing
+very brief, the portion of AGB evolution during which the FM
+becomes dominant shows a relatively large range in mass and
+radiusatagivenage.
+The theoretical distribution of FM periods is roughly sym-
+metric,buttheFMisnotdominantattheshortestperiods.Asa
+result,modelspredictthatthedistributionofdominantFMperi-
+odsatagivenageisskewedtowardshortperiods,inagreement
+withobservations.Dependingonstellarpopulations,metallicity
+mayenhancethisfeatureasmetal-poorLPVs,whichtendtobe
+warmerandmorecompact,onlycontributenearshortperiods.
+Weprovidethebest-fitPArelationseparatelyforO-richand
+C-richFM-pulsatingLPVs.ThelatterLPVsshowasteeperPA
+relationbecauseoftheirlowersurfacetemperatures,whichallow
+themtoreachlongerperiodsmoreeasily.
+Our analysis concerns all LPVs predominantly pulsating in
+the FM, regardless of whether they are classified as Miras or
+SRVs.Wediscouragesuchadistinctioninthatitisarbitraryand
+pronetoselectionbiasesthatriskcompromisingtheuseofLPVs
+asageindicators.
+ThemainlimitationintheuseofthePArelationforagede-
+terminations of individual LPVs stems from its relatively large
+scatter. We suggest that corrective terms, involving the ampli-
+tude of variability, might help to reduce this scatter and antici-
+patethatupcomingdatafromongoingandfuturesurveysdedi-
+catedtotime-domainastronomywillbehighlyvaluabletoprobe
+thispossibility.Astudyoftheimpactofmetallicityonnonlinear
+pulsationishighlydesirabletopursuethislineofinvestigation,
 Articlenumber,page5of9
 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs
-aswouldbeatheoreticalinvestigationofthedependenceofpho- Joo,S.-J.&Lee,Y.-W.2013,ApJ,762,36
-tometricamplitudesuponglobalstellarparameters. Jura,M.&Kleinmann,S.G.1992,ApJS,79,105
-Kamath,D.,Wood,P.R.,Soszyn´ski,I.,&Lebzelter,T.2010,MNRAS,408,522
-Acknowledgements. M.T.andN.M.acknowledgethesupportprovidedbythe Kharchenko,N.V.,Piskunov,A.E.,Röser,S.,Schilbach,E.,&Scholz,R.D.
-SwissNationalScienceFoundationthroughgrantNr.188697.Wearegrateful 2005,A&A,438,1163
-totheanonymousrefereefortheconstructivecommentsthathelpedimproving Kharchenko,N.V.,Piskunov,A.E.,Schilbach,E.,Röser,S.,&Scholz,R.D.
-thispaper,andtoLéoGirardiforhelpingwiththecomputationandinterpre- 2016,A&A,585,A101
-tationofisochrones.Thisresearchhasmadeuseof:datafromtheOGLE-III Kippenhahn,R.&Smith,L.1969,A&A,1,142
-Catalog of Variable Stars; data products from the Two Micron All Sky Sur- Kluyver,T.,Ragan-Kelley,B.,Pérez,F.,etal.2016,inPositioningandPower
-vey, which is a joint project of the University of Massachusetts and the In- in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides &
+aswouldbeatheoreticalinvestigationofthedependenceofpho-
+tometricamplitudesuponglobalstellarparameters.
+Acknowledgements. M.T.andN.M.acknowledgethesupportprovidedbythe
+SwissNationalScienceFoundationthroughgrantNr.188697.Wearegrateful
+totheanonymousrefereefortheconstructivecommentsthathelpedimproving
+thispaper,andtoLéoGirardiforhelpingwiththecomputationandinterpre-
+tationofisochrones.Thisresearchhasmadeuseof:datafromtheOGLE-III
+Catalog of Variable Stars; data products from the Two Micron All Sky Sur-
+vey, which is a joint project of the University of Massachusetts and the In-
 fraredProcessingandAnalysisCenter/CaliforniaInstituteofTechnology,funded
-B.Scmidt(Netherlands:IOSPress),87–90
-by the National Aeronautics and Space Administration and the National Sci- Lebzelter,T.,Mowlavi,N.,Marigo,P.,etal.2018,A&A,616,L13
-enceFoundation;datafromtheEuropeanSpaceAgency(ESA)missionGaia Lebzelter,T.,Trabucchi,M.,Mowlavi,N.,etal.2019,A&A,631,A24
-(https://www.cosmos.esa.int/gaia),processedbytheGaiaDataProcess- Lebzelter,T.&Wood,P.R.2005,A&A,441,1117
-ingandAnalysisConsortium(DPAC,https://www.cosmos.esa.int/web/ Lebzelter,T.&Wood,P.R.2007,A&A,475,643
-gaia/dpac/consortium). Funding for the DPAC has been provided by na- Lebzelter,T.&Wood,P.R.2011,A&A,529,A137
-tionalinstitutions,inparticulartheinstitutionsparticipatingintheGaiaMulti- Lebzelter,T.&Wood,P.R.2016,A&A,585,A111
+by the National Aeronautics and Space Administration and the National Sci-
+enceFoundation;datafromtheEuropeanSpaceAgency(ESA)missionGaia
+(https://www.cosmos.esa.int/gaia),processedbytheGaiaDataProcess-
+ingandAnalysisConsortium(DPAC,https://www.cosmos.esa.int/web/
+gaia/dpac/consortium). Funding for the DPAC has been provided by na-
+tionalinstitutions,inparticulartheinstitutionsparticipatingintheGaiaMulti-
 lateralAgreement.Thisresearchhasmadeuseofthefollowingfree/opensource
-LloydEvans,T.1976,MNRAS,174,169
 softwareand/orlibraries:theStarlinkTablesInfrastructureLibrary(STILTSand
-LloydEvans,T.1983a,MNRAS,204,985
-Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver LloydEvans,T.1983b,MNRAS,204,961
+Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver
 etal.2016)notebooks;thePythonlibrariesNumPy(Harrisetal.2020),SciPy
-LloydEvans,T.&Menzies,J.W.1973,inAstrophysicsandSpaceScienceLi-
 (Virtanenetal.2020),matplotlib(aPythonlibraryforpublicationqualitygraph-
-brary,Vol.36,IAUColloq.21:VariableStarsinGlobularClustersandin
 ics,Hunter2007),andAstropy(acommunity-developedcorePythonpackage
-RelatedSystems,ed.J.D.Fernie,151
-forAstronomy,AstropyCollaborationetal.2018).Thisresearchhasmadeuseof Mackey,A.D.&BrobyNielsen,P.2007,MNRAS,379,151
-NASA’sAstrophysicsDataSystemBibliographicServices,andofthefollowing Marigo,P.,Girardi,L.,Bressan,A.,etal.2017,ApJ,835,77
+forAstronomy,AstropyCollaborationetal.2018).Thisresearchhasmadeuseof
+NASA’sAstrophysicsDataSystemBibliographicServices,andofthefollowing
 servicesprovidedbyCDS,Strasbourg:theSIMBADdatabase,VizieRcatalogue
-Menzies,J.,Feast,M.,Tanabé,T.,Whitelock,P.,&Nakada,Y.2002,MNRAS,
 accesstool(DOI:10.26093/cds/vizier,Ochsenbeinetal.2000),the“Aladinsky
-335,923
 atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012;
-Menzies,J.,Feast,M.,Whitelock,P.,etal.2008,MNRAS,385,1045
 Pineauetal.2020).
-Menzies,J.W.,Feast,M.W.,Whitelock,P.A.,&Matsunaga,N.2011,MNRAS,
-414,3492
-Menzies,J.W.,Whitelock,P.A.,&Feast,M.W.2015,MNRAS,452,910
-Menzies,J.W.,Whitelock,P.A.,Feast,M.W.,&Matsunaga,N.2010,MNRAS,
-References 406,86
-Merrill,P.W.1923,ApJ,58,215
+References
 Anderson,R.I.,Saio,H.,Ekström,S.,Georgy,C.,&Meynet,G.2016,A&A,
-Mowlavi,N.,Lecoeur-Taïbi,I.,Lebzelter,T.,etal.2018,A&A,618,A58
 591,A8
-Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016,
-AstropyCollaboration,Price-Whelan,A.M.,Sipo˝cz,B.M.,etal.2018,AJ,156,
-MNRAS,463,1446
+AstropyCollaboration,Price-Whelan,A.M.,Sip˝ ocz,B.M.,etal.2018,AJ,156,
 123
-Ochsenbein,F.,Bauer,P.,&Marcout,J.2000,A&AS,143,23
 Battinelli,P.&Demers,S.2012,A&A,544,A10
-Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2020,MNRAS,498,3283
 Battinelli,P.&Demers,S.2013,A&A,553,A93
-Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2019,MNRAS,485,5666
 Baumgardt,H.,Parmentier,G.,Anders,P.,&Grebel,E.K.2013,MNRAS,430,
-Pérez,F.&Granger,B.E.2007,ComputinginScienceandEngineering,9,21
 676
-Perren,G.I.,Piatti,A.E.,&Vázquez,R.A.2017,A&A,602,A89
-Boch,T.,Pineau,F.,&Derriere,S.2012,inAstronomicalSocietyofthePa- Pineau,F.-X.,Boch,T.,Derrière,S.,&Schaaff,A.2020,inAstronomicalSo-
+Boch,T.,Pineau,F.,&Derriere,S.2012,inAstronomicalSocietyofthePa-
 cificConferenceSeries,Vol.461,AstronomicalDataAnalysisSoftwareand
-cietyofthePacificConferenceSeries,Vol.522,AstronomicalDataAnalysis
 SystemsXXI,ed.P.Ballester,D.Egret,&N.P.F.Lorente,291
-SoftwareandSystemsXXVII,ed.P.Ballester,J.Ibsen,M.Solar,&K.Short-
 Bonnarel,F.,Fernique,P.,Bienaymé,O.,etal.2000,A&AS,143,33
-ridge,125
 Bono,G.,Marconi,M.,Cassisi,S.,etal.2005,ApJ,621,966
-Sakamoto,T.,Matsunaga,N.,Hasegawa,T.,&Nakada,Y.2012,ApJ,761,L10
 Bressan,A.,Marigo,P.,Girardi,L.,etal.2012,MNRAS,427,127
-Skrutskie,M.F.,Cutri,R.M.,Stiening,R.,etal.2006,AJ,131,1163
 Catchpole,R.M.,Whitelock,P.A.,Feast,M.W.,etal.2016,MNRAS,455,
-Soszyn´ski,I.,Olechowska,A.,Ratajczak,M.,etal.2021,ApJ,911,L22
 2216
-Soszyn´ski,I.,Udalski,A.,Szyman´ski,M.K.,etal.2009,ActaAstron.,59,239
 Cutri,R.M.,Wright,E.L.,Conrow,T.,etal.2013,ExplanatorySupplement
-Soszyn´ski,I.,Udalski,A.,Szyman´ski,M.K.,etal.2011,ActaAstron.,61,217
 totheAllWISEDataReleaseProducts,ExplanatorySupplementtotheAll-
-Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se-
 WISEDataReleaseProducts
-ries,Vol.351,AstronomicalDataAnalysisSoftwareandSystemsXV,ed.
 deGrijs,R.,Courbin,F.,Martínez-Vázquez,C.E.,etal.2017,SpaceSci.Rev.,
-C.Gabriel,C.Arviset,D.Ponz,&S.Enrique,666
 212,1743
-Trabucchi,M.,Mowlavi,N.,&Lebzelter,T.2021a,A&A,656,A66
 DeSomma,G.,Marconi,M.,Cassisi,S.,etal.2020,MNRAS,496,5039
-Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2017,ApJ,847,139
 Eggen,O.J.1998,AJ,115,2435
-Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2019,MNRAS,482,929
 Feast,M.2007,inAstronomicalSocietyofthePacificConferenceSeries,Vol.
-Trabucchi,M.,Wood,P.R.,Mowlavi,N.,etal.2021b,MNRAS,500,1575
 378,WhyGalaxiesCareAboutAGBStars:TheirImportanceasActorsand
-Urago,R.,Omodaka,T.,Nagayama,T.,etal.2020,ApJ,891,50
 Probes,ed.F.Kerschbaum,C.Charbonnel,&R.F.Wing,479
-Villanova,S.,Geisler,D.,Gratton,R.G.,&Cassisi,S.2014,ApJ,791,107
 Feast,M.&Whitelock,P.2000a,inAstrophysicsandSpaceScienceLibrary,
-Virtanen,P.,Gommers,R.,Oliphant,T.E.,etal.2020,NatureMethods,17,261
 Vol.255,AstrophysicsandSpaceScienceLibrary,ed.F.Matteucci&F.Gio-
-Wenger,M.,Ochsenbein,F.,Egret,D.,etal.2000,A&AS,143,9
 vannelli,229
-Whitelock,P.,Feast,M.,&Catchpole,R.1991,MNRAS,248,276
 Feast,M.W.1963,MNRAS,125,367
-Whitelock,P.,Menzies,J.,Feast,M.,etal.1994,MNRAS,267,711
 Feast,M.W.1966,TheObservatory,86,120
-Whitelock,P.A.1986,MNRAS,219,525
 Feast,M.W.1981,inAstrophysicsandSpaceScienceLibrary,Vol.88,Physical
-Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,etal.2009,MNRAS,394,795
 ProcessesinRedGiants,ed.J.Iben,I.&A.Renzini,193–204
-Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,Nsengiyumva,F.,&Matsunaga,
 Feast,M.W.,Robertson,B.S.C.,&Black,C.1980,MNRAS,190,227
-N.2013,MNRAS,428,2216
 Feast,M.W.&Whitelock,P.A.2000b,MNRAS,317,460
-Wilson,R.E.&Merrill,P.W.1942,ApJ,95,248
 Feast,M.W.,Whitelock,P.A.,&Menzies,J.W.2006,MNRAS,369,791
-Wyatt,S.P.&Cahn,J.H.1983,ApJ,275,225
 GaiaCollaboration,Brown,A.G.A.,Vallenari,A.,etal.2021,A&A,649,A1
-Girardi,L.,Marigo,P.,Bressan,A.,&Rosenfield,P.2013,ApJ,777,142 Ya’Ari,A.&Tuchman,Y.1996,ApJ,456,350
+Girardi,L.,Marigo,P.,Bressan,A.,&Rosenfield,P.2013,ApJ,777,142
 Goudfrooij,P.,Girardi,L.,Kozhurina-Platais,V.,etal.2014,ApJ,797,35
 Grady,J.,Belokurov,V.,&Evans,N.W.2019,MNRAS,483,3022
 Grady,J.,Belokurov,V.,&Evans,N.W.2020,MNRAS,492,3128
 Harris,C.R.,Millman,K.J.,vanderWalt,S.J.,etal.2020,Nature,585,357
 Hunter,J.D.2007,ComputinginScience&Engineering,9,90
-Ivezic´,Ž.,Kahn,S.M.,Tyson,J.A.,etal.2019,ApJ,873,111
+Ivezi´ c,Ž.,Kahn,S.M.,Tyson,J.A.,etal.2019,ApJ,873,111
 Jayasinghe,T.,Stanek,K.Z.,Kochanek,C.S.,etal.2020,MNRAS,491,13
+Joo,S.-J.&Lee,Y.-W.2013,ApJ,762,36
+Jura,M.&Kleinmann,S.G.1992,ApJS,79,105
+Kamath,D.,Wood,P.R.,Soszy´ nski,I.,&Lebzelter,T.2010,MNRAS,408,522
+Kharchenko,N.V.,Piskunov,A.E.,Röser,S.,Schilbach,E.,&Scholz,R.D.
+2005,A&A,438,1163
+Kharchenko,N.V.,Piskunov,A.E.,Schilbach,E.,Röser,S.,&Scholz,R.D.
+2016,A&A,585,A101
+Kippenhahn,R.&Smith,L.1969,A&A,1,142
+Kluyver,T.,Ragan-Kelley,B.,Pérez,F.,etal.2016,inPositioningandPower
+in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides &
+B.Scmidt(Netherlands:IOSPress),87–90
+Lebzelter,T.,Mowlavi,N.,Marigo,P.,etal.2018,A&A,616,L13
+Lebzelter,T.,Trabucchi,M.,Mowlavi,N.,etal.2019,A&A,631,A24
+Lebzelter,T.&Wood,P.R.2005,A&A,441,1117
+Lebzelter,T.&Wood,P.R.2007,A&A,475,643
+Lebzelter,T.&Wood,P.R.2011,A&A,529,A137
+Lebzelter,T.&Wood,P.R.2016,A&A,585,A111
+LloydEvans,T.1976,MNRAS,174,169
+LloydEvans,T.1983a,MNRAS,204,985
+LloydEvans,T.1983b,MNRAS,204,961
+LloydEvans,T.&Menzies,J.W.1973,inAstrophysicsandSpaceScienceLi-
+brary,Vol.36,IAUColloq.21:VariableStarsinGlobularClustersandin
+RelatedSystems,ed.J.D.Fernie,151
+Mackey,A.D.&BrobyNielsen,P.2007,MNRAS,379,151
+Marigo,P.,Girardi,L.,Bressan,A.,etal.2017,ApJ,835,77
+Menzies,J.,Feast,M.,Tanabé,T.,Whitelock,P.,&Nakada,Y.2002,MNRAS,
+335,923
+Menzies,J.,Feast,M.,Whitelock,P.,etal.2008,MNRAS,385,1045
+Menzies,J.W.,Feast,M.W.,Whitelock,P.A.,&Matsunaga,N.2011,MNRAS,
+414,3492
+Menzies,J.W.,Whitelock,P.A.,&Feast,M.W.2015,MNRAS,452,910
+Menzies,J.W.,Whitelock,P.A.,Feast,M.W.,&Matsunaga,N.2010,MNRAS,
+406,86
+Merrill,P.W.1923,ApJ,58,215
+Mowlavi,N.,Lecoeur-Taïbi,I.,Lebzelter,T.,etal.2018,A&A,618,A58
+Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016,
+MNRAS,463,1446
+Ochsenbein,F.,Bauer,P.,&Marcout,J.2000,A&AS,143,23
+Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2020,MNRAS,498,3283
+Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2019,MNRAS,485,5666
+Pérez,F.&Granger,B.E.2007,ComputinginScienceandEngineering,9,21
+Perren,G.I.,Piatti,A.E.,&Vázquez,R.A.2017,A&A,602,A89
+Pineau,F.-X.,Boch,T.,Derrière,S.,&Schaaff,A.2020,inAstronomicalSo-
+cietyofthePacificConferenceSeries,Vol.522,AstronomicalDataAnalysis
+SoftwareandSystemsXXVII,ed.P.Ballester,J.Ibsen,M.Solar,&K.Short-
+ridge,125
+Sakamoto,T.,Matsunaga,N.,Hasegawa,T.,&Nakada,Y.2012,ApJ,761,L10
+Skrutskie,M.F.,Cutri,R.M.,Stiening,R.,etal.2006,AJ,131,1163
+Soszy´ nski,I.,Olechowska,A.,Ratajczak,M.,etal.2021,ApJ,911,L22
+Soszy´ nski,I.,Udalski,A.,Szyma´ nski,M.K.,etal.2009,ActaAstron.,59,239
+Soszy´ nski,I.,Udalski,A.,Szyma´ nski,M.K.,etal.2011,ActaAstron.,61,217
+Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se-
+ries,Vol.351,AstronomicalDataAnalysisSoftwareandSystemsXV,ed.
+C.Gabriel,C.Arviset,D.Ponz,&S.Enrique,666
+Trabucchi,M.,Mowlavi,N.,&Lebzelter,T.2021a,A&A,656,A66
+Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2017,ApJ,847,139
+Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2019,MNRAS,482,929
+Trabucchi,M.,Wood,P.R.,Mowlavi,N.,etal.2021b,MNRAS,500,1575
+Urago,R.,Omodaka,T.,Nagayama,T.,etal.2020,ApJ,891,50
+Villanova,S.,Geisler,D.,Gratton,R.G.,&Cassisi,S.2014,ApJ,791,107
+Virtanen,P.,Gommers,R.,Oliphant,T.E.,etal.2020,NatureMethods,17,261
+Wenger,M.,Ochsenbein,F.,Egret,D.,etal.2000,A&AS,143,9
+Whitelock,P.,Feast,M.,&Catchpole,R.1991,MNRAS,248,276
+Whitelock,P.,Menzies,J.,Feast,M.,etal.1994,MNRAS,267,711
+Whitelock,P.A.1986,MNRAS,219,525
+Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,etal.2009,MNRAS,394,795
+Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,Nsengiyumva,F.,&Matsunaga,
+N.2013,MNRAS,428,2216
+Wilson,R.E.&Merrill,P.W.1942,ApJ,95,248
+Wyatt,S.P.&Cahn,J.H.1983,ApJ,275,225
+Ya’Ari,A.&Tuchman,Y.1996,ApJ,456,350
 Articlenumber,page6of9
 Trabucchietal.:Theperiod-agerelationofLPVs
+Fig.A.1.Absolute-K
+s
+Gaia-2MASSdiagramforthestarswithorwith-
+out a spectral type (left and right panels, respectively) in the selected
+sample. Symbol colors and shapes indicate the spectral type and host
+clusterdescribedinthelegend,respectively,whichalsoreportsthenum-
+berofsourcesdisplayed(i.e.,havingbothopticalandNIRphotometry).
+The dashed line marks the separation between O- and C-rich sources
+accordingtoLebzelteretal.(2018).AnarrowmarksthesourceMSX
+LMC124inNGC1830that,havingW
+BP,RP
+−W
+J,Ks
+=9.73mag,liesout-
+sidetheplotarea.BackgrounddotsareLPVsintheLMCfromOGLE-
+III(lightgray)andMowlavietal.(2018)(darkergray).
+AppendixA: ClassificationofobservedLPVs
+AppendixA.1: Spectraltype
+We adopted the spectral types provided by Lebzelter & Wood
+(2007)andKamathetal.(2010)for52oftheLPVstheystudied
+in NGC 1846, NGC 1978, and NGC 419. The only exception
+isthestar5-3inNGC419,forwhichweadoptedtheS-typeas
+reportedbyLloydEvans(1983a).
+We also searched the SIMBAD astronomical database
+(Wenger et al. 2000) for spectral type information, which we
+foundfor26morestars.WeusedtheGaia-2MASSdiagramof
+Lebzelteretal.(2018)toconfirmthechemicaltypeclassification
+takenfromliteratureandtocharacterizethesurfacechemistryof
+sourcesofanunknownspectraltype(seeFig.A.1).Amongthe
+latter,weidentified13C-richstarsand106O-richsources.
+ThreeofthesourceswithoutaspectraltypelackGaiapho-
+tometry,sotheycannotbeclassifiedwiththeGaia-2MASS.Two
+of them (LW5 and LW22 in 47 Tuc) have no match in Gaia
+EDR3,buttheyhaveNIRdataandareprobablyO-richbasedon
+theirpositioninthe J−K
+s
+versusK
+s
+color-magnitudediagram.
+The third source is one of the two stars in NGC 1903 from the
+listofGradyetal.(2019),whichweidentifiedwiththe2MASS
+source J05171633-6920298. It is likely C-rich according to the
+NIRcolor-magnitudediagram.
+Finally, the sources V138 in ω Cen, LW15 in NGC 2808,
+andLW4inNGC362lackNIRdata.Theycannotbeplacedin
+the NIR PL diagram, upon which we relied to assign pulsation
+modes to periods, so we excluded them from the sample. The
 distributionofO-andC-richsourcesintheperiod-agediagram
 isshowninFig.A.2.
 AppendixA.2: Variability
@@ -501,7 +778,7 @@ overtonemodeisassociatedwithsequenceA,thefirstovertone
 modewithsequencesBandC(cid:48),andthefundamentalmodewith
 sequenceC(e.g.,Trabucchietal.2017).Weexcludedlongsec-
 ondaryperiodsonsequenceDastheyarenotduetostellarpul-
-sation (Soszyn´ski et al. 2021, and references therein), and we
+sation (Soszy´ nski et al. 2021, and references therein), and we
 used the pattern of PL sequences in the LMC as a reference to
 guidethemodeidentification(cf.Trabucchietal.2021a).
 Weperformedthisclassificationseparatelyforperiodscom-
@@ -509,128 +786,157 @@ ingfromeachdistinctdataset.Iftwoormoreperiodsfromdif-
 ferent data sets were assigned to the same pulsation mode, we
 retained only one of those periods, with priority to the values
 from Lebzelter & Wood and Kamath et al. (2010). If the latter
-Fig.A.1.Absolute-K Gaia-2MASSdiagramforthestarswithorwith- authors do not provide this information, we adopted the period
-s
-out a spectral type (left and right panels, respectively) in the selected from OGLE-III if available, and otherwise from ASAS-SN or
-sample. Symbol colors and shapes indicate the spectral type and host fromGaiaDR2.
-clusterdescribedinthelegend,respectively,whichalsoreportsthenum- For some sources, the periods reported in different catalogs
-berofsourcesdisplayed(i.e.,havingbothopticalandNIRphotometry).
+authors do not provide this information, we adopted the period
+from OGLE-III if available, and otherwise from ASAS-SN or
+fromGaiaDR2.
+For some sources, the periods reported in different catalogs
 wereassignedtothesamemodethroughthisprocedure.Inmost
-The dashed line marks the separation between O- and C-rich sources
 cases, these periods are reasonably similar to each other. Only
-accordingtoLebzelteretal.(2018).AnarrowmarksthesourceMSX
-LMC124inNGC1830that,havingW −W =9.73mag,liesout- in a few caseswere they significantly different, but this didnot
-sidetheplotarea.BackgrounddotsareBP L,R PP VsinJ,K ts heLMCfromOGLE- alterourconclusions.
-III(lightgray)andMowlavietal.(2018)(darkergray). Whenavailable,thevariabilitytypewastakenfromOGLE-
+in a few caseswere they significantly different, but this didnot
+alterourconclusions.
+Whenavailable,thevariabilitytypewastakenfromOGLE-
 IIIorASAS-SN.Wenotethatweareonlyinterestedinwhether
 a star is classified as a Mira or semi-regular variable. In many
-AppendixA: ClassificationofobservedLPVs cases,thistypeisnotgivenorthestarissimplyconsidered,for
+cases,thistypeisnotgivenorthestarissimplyconsidered,for
 instance,asanLPVorAGBinSIMBAD,inwhichcasewecon-
-AppendixA.1: Spectraltype sideredthevariabilitytypeasundetermined.
-We adopted the spectral types provided by Lebzelter & Wood
-(2007)andKamathetal.(2010)for52oftheLPVstheystudied
+sideredthevariabilitytypeasundetermined.
 AppendixB: Fittingrelations
-in NGC 1846, NGC 1978, and NGC 419. The only exception
-isthestar5-3inNGC419,forwhichweadoptedtheS-typeas WeobtainedanalyticexpressionsforthePArelationsseparately
-reportedbyLloydEvans(1983a). for O- and C-rich stars, proceeding as follows. For each bin of
-We also searched the SIMBAD astronomical database log(τ/yr), we modeled the period distribution with a Gaussian
-(Wenger et al. 2000) for spectral type information, which we kernel density estimator (KDE) and identified the peak of the
-foundfor26morestars.WeusedtheGaia-2MASSdiagramof distribution. To describe the boundaries of the PA relation, we
-Lebzelteretal.(2018)toconfirmthechemicaltypeclassification adopted, at each age, the values of the period at which the dis-
-takenfromliteratureandtocharacterizethesurfacechemistryof tributionequals25%ofitsmaximum.Weselectedthisarbitrary
-sourcesofanunknownspectraltype(seeFig.A.1).Amongthe value upon visual inspection of the PA plane. We modeled the
-latter,weidentified13C-richstarsand106O-richsources. central trend of the PA relation, as well as its short- and long-
-ThreeofthesourceswithoutaspectraltypelackGaiapho- periodedges,withlinearorquadraticfunctionsintheform
-tometry,sotheycannotbeclassifiedwiththeGaia-2MASS.Two
-log(τ/yr)=a +a (P/P˜)+a (P/P˜)2, (B.1)
-of them (LW5 and LW22 in 47 Tuc) have no match in Gaia 0 1 2
-EDR3,buttheyhaveNIRdataandareprobablyO-richbasedon
-(where P˜ = 350 days) and employed a Lenvenberg-Marquardt
-theirpositioninthe J−K versusK color-magnitudediagram.
-s s nonlinearregressionalgorithm3toderivethebest-fitcoefficients,
-The third source is one of the two stars in NGC 1903 from the
+WeobtainedanalyticexpressionsforthePArelationsseparately
+for O- and C-rich stars, proceeding as follows. For each bin of
+log(τ/yr), we modeled the period distribution with a Gaussian
+kernel density estimator (KDE) and identified the peak of the
+distribution. To describe the boundaries of the PA relation, we
+adopted, at each age, the values of the period at which the dis-
+tributionequals25%ofitsmaximum.Weselectedthisarbitrary
+value upon visual inspection of the PA plane. We modeled the
+central trend of the PA relation, as well as its short- and long-
+periodedges,withlinearorquadraticfunctionsintheform
+log(τ/yr)=a
+0
++a
+1
+(P/˜ P)+a
+2
+(P/˜ P)2, (B.1)
+(where ˜ P = 350 days) and employed a Lenvenberg-Marquardt
+nonlinearregressionalgorithm3toderivethebest-fitcoefficients,
 which are listed in Table B.1. We remark that these best-fit ex-
-listofGradyetal.(2019),whichweidentifiedwiththe2MASS
 pressionsareonlyvalidintheintervals8.0 ≤ log(τ/yr) ≤ 10.3
-source J05171633-6920298. It is likely C-rich according to the
 and 20 < P/days < 700 for O-rich composition, and within
-NIRcolor-magnitudediagram.
-Finally, the sources V138 in ω Cen, LW15 in NGC 2808, 3 WemadeuseofthePythonlibrarySciPytoperformGaussianKDE
-andLW4inNGC362lackNIRdata.Theycannotbeplacedin modeling and best-fit, respectively, by means of the gaussian_kde
-the NIR PL diagram, upon which we relied to assign pulsation tool from the stats module and the curve_fit function from the
-modes to periods, so we excluded them from the sample. The optimizemodule.
+3 WemadeuseofthePythonlibrarySciPytoperformGaussianKDE
+modeling and best-fit, respectively, by means of the gaussian_kde
+tool from the stats module and the curve_fit function from the
+optimizemodule.
 Articlenumber,page7of9
 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs
 Fig.A.2.SimilartoFig.1,excepteachsourceiscolor-codedaccordingtowhetherithasbeenclassifiedasO-rich(blue)orC-rich(red).
-TableB.1.Best-fitcoefficientsforthePArelationanditsboundariesin TableB.2.Best-fitcoefficientsfortheperiod-initialmassrelationand
-theformgiveninEq.B.1. itsboundariesintheformgiveninEq.B.2.
-Sp.type relation a a a Sp.type relation b b b
-0 1 2 0 1 2
-center 10.78 -2.660 0.5953 center -0.2790 0.8958 -0.1828
-O-rich loweredge 10.46 -2.818 0.6578 O-rich loweredge -0.1772 0.9975 -0.2203
-upperedge 10.54 -0.8187 -0.2335 upperedge -0.1740 0.2783 0.8247
-center 9.755 -0.7532 center -0.0304 0.2885
-C-rich loweredge 9.982 -1.698 C-rich loweredge -0.0131 0.5752
-upperedge 8.498 -1.827 -0.9959 upperedge -0.2245 -0.2720 0.2343
-8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich DFMP are indicated by solid lines. Panel (b) shows the period
-case. distributionsforafewdifferentcases.
-Becauseoftheconnectionbetweenageandinitialmass,the Itisinstructive,tobeginwith,toignoretheeffectofthermal
-PArelationcanbetranslatedintoaperiod-initialmassrelation, pulsesandconsideronlythequiescentevolution(greenlinesin
-whichwederivedusingthesameapproachdescribedabove,and Fig.C.1).Thesmallestinitialmasscorrespondstoastarthatjust
-assumingtheform enteredtheTP-AGB,whentheFMhasaperiodof∼ 240days
+TableB.1.Best-fitcoefficientsforthePArelationanditsboundariesin
+theformgiveninEq.B.1.
+Sp.type relation a
+0
+a
+1
+a
+2
+O-rich
+center 10.78 -2.660 0.5953
+loweredge 10.46 -2.818 0.6578
+upperedge 10.54 -0.8187 -0.2335
+C-rich
+center 9.755 -0.7532
+loweredge 9.982 -1.698
+upperedge 8.498 -1.827 -0.9959
+8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich
+case.
+Becauseoftheconnectionbetweenageandinitialmass,the
+PArelationcanbetranslatedintoaperiod-initialmassrelation,
+whichwederivedusingthesameapproachdescribedabove,and
+assumingtheform
+log(M i /M (cid:12) )=b 0 +b 1 (P/˜ P)+b 2 (P/˜ P)2. (B.2)
+Theresultingbest-fitlinesaredisplayedinFig.B.1,andtheco-
+efficientsaregiveninTableB.2.
+WeremarkthatboththePAandtheperiod-initialmassrela-
+tionsdependonmodelassumptions,inparticularmasslossand
+mixing,aswellasonthepropertiesofthepopulationofLPVs,
+namelythestar-formationhistoryandage-metallicityrelation.
+AppendixC: Theshapeoftheperioddistribution
+Asanexamplecase,weconsideranisochroneofagelog(τ/yr)=
+8.3andinitialmetallicityZ
+i
+=0.006.StarsontheTP-AGBhave
+initial masses M
+i
+(cid:39) 3.85M
+(cid:12)
+over a small range of ∼ 10−3M
+(cid:12)
+.
+The relation between period and initial mass is displayed in
+panel (a) of Fig. C.1, where isochrone portions undergoing
+TableB.2.Best-fitcoefficientsfortheperiod-initialmassrelationand
+itsboundariesintheformgiveninEq.B.2.
+Sp.type relation b
+0
+b
+1
+b
+2
+O-rich
+center -0.2790 0.8958 -0.1828
+loweredge -0.1772 0.9975 -0.2203
+upperedge -0.1740 0.2783 0.8247
+C-rich
+center -0.0304 0.2885
+loweredge -0.0131 0.5752
+upperedge -0.2245 -0.2720 0.2343
+DFMP are indicated by solid lines. Panel (b) shows the period
+distributionsforafewdifferentcases.
+Itisinstructive,tobeginwith,toignoretheeffectofthermal
+pulsesandconsideronlythequiescentevolution(greenlinesin
+Fig.C.1).Thesmallestinitialmasscorrespondstoastarthatjust
+enteredtheTP-AGB,whentheFMhasaperiodof∼ 240days
 butisnotdominant.Itonlybecomesdominantaboveathreshold
-log(M i/M (cid:12))=b 0+b 1(P/P˜)+b 2(P/P˜)2. (B.2) radiusR dom,0,thatisforperiodslongerthana(mass-dependent)
-criticalperiod P (thesolidgraylineinFig.C.1).Theleast
+radiusR dom,0 ,thatisforperiodslongerthana(mass-dependent)
+criticalperiod P
 dom,0
-Theresultingbest-fitlinesaredisplayedinFig.B.1,andtheco-
-evolved (quiescent) model with dominant FM has P (cid:39) 360
-efficientsaregiveninTableB.2. FM
+(thesolidgraylineinFig.C.1).Theleast
+evolved (quiescent) model with dominant FM has P
+FM
+(cid:39) 360
 days(greencircleandhorizontalline),correspondingtoasharp
-WeremarkthatboththePAandtheperiod-initialmassrela-
 cutintheperioddistributionshowninpanel(b)ofFig.C.1.
-tionsdependonmodelassumptions,inparticularmasslossand
 AsastarevolvesalongtheAGBitexpands,anditsperiodbe-
-mixing,aswellasonthepropertiesofthepopulationofLPVs,
 comeslongerinresponsetotheincreaseinradius.Modelswith
-namelythestar-formationhistoryandage-metallicityrelation.
 ahigherinitialmassaremoreevolved,hencetheyhavealarger
 radiusandalongerperiod.Therateatwhichaperiodincreases
 withradiusisnotfixed,butratherdecreaseswithevolution.Ac-
-AppendixC: Theshapeoftheperioddistribution
 cordingtotheprescriptionofTrabucchietal.(2021b),aperiod
-Asanexamplecase,weconsideranisochroneofagelog(τ/yr)= growswithradiusasabrokenpower-lawwithexponentα(cid:39)1.8
-8.3andinitialmetallicityZ =0.006.StarsontheTP-AGBhave ifR<R ,andwithα(cid:39)1.25atlargerradii.
-i b
-initial masses M (cid:39) 3.85M over a small range of ∼ 10−3M . This is equivalent to saying that the period grows more
-i (cid:12) (cid:12)
-The relation between period and initial mass is displayed in slowly after it exceeds a critical value P = P(R ), marked by
-b b
-panel (a) of Fig. C.1, where isochrone portions undergoing the gray dotted line in Fig. C.1. The isochrone reaches it at
+growswithradiusasabrokenpower-lawwithexponentα(cid:39)1.8
+ifR<R
+b
+,andwithα(cid:39)1.25atlargerradii.
+This is equivalent to saying that the period grows more
+slowly after it exceeds a critical value P
+b
+= P(R
+b
+), marked by
+the gray dotted line in Fig. C.1. The isochrone reaches it at
 Articlenumber,page8of9
 Trabucchietal.:Theperiod-agerelationofLPVs
-Fig.B.1.SimilartoFig.2,butshowinginitialmassM inplaceofage.Thebest-fitlinestothemostpopulatedbandandedgesofthetheoretical
+Fig.B.1.SimilartoFig.2,butshowinginitialmassM
 i
-P –M relationareshown.
-FM i
-tion causes the period to decrease, and the cut at ∼ 360 days
-becomes less sharp. Because of mass loss, the threshold period
-P is lowered, so that the shortest period associated with
-dom,0
-DFMP does not correspond to the least evolved model (green
-circle),butrathertotheluminositydipofathermalpulse(blue
-circle).
-Tobeprecise,theearliestoccurrenceofDFMPisontheleft-
-mostluminosityspike(redcircle),whosedurationissoshortthat
-itisunlikelytobeobserved.Indeed,theinclusionofluminosity
-spikes alters the period distribution at long periods very little.
-Luminosity spikes are relevant only for relatively massive and
-youngTP-AGBstars,andtheygiverisetothepoorlypopulated
-portionofthePArelationatthelongestperiods,asseeninpanel
-(a)ofFig.2.
+inplaceofage.Thebest-fitlinestothemostpopulatedbandandedgesofthetheoretical
+P
+FM
+–M
+i
+relationareshown.
 Fig.C.1.Perioddistributionatfixedageandmetallicity.Panel(a)shows
 periodasafunctionofinitialmass(currentmassonthetopaxis)onthe
-TP-AGB for a ∼ 200 Myr old isochrone with Z = 0.006. Red lines
+TP-AGB for a ∼ 200 Myr old isochrone with Z
 i
+= 0.006. Red lines
 showfullthermalpulses,whilebluelinesignoreluminosityspikesand
 green lines show only the quiescent evolution. The same color code
 isusedfortheperioddistributions(normalizedtotheirmaximum)on
@@ -641,8 +947,13 @@ GraylinesmarkthecriticalvaluesofperiodsatwhichtheFMbecomes
 dominant(solidline),lesssensitivetoradius(dottedline,whichoccurs
 attheverticallineforthisspecificisochrone),andindependentofradius
 (dashedline).
-M (cid:39) 3.8524M (verticalgrayline),when P (cid:39) 420days.In
-i (cid:12) FM
+M
+i
+(cid:39) 3.8524M
+(cid:12)
+(verticalgrayline),when P
+FM
+(cid:39) 420days.In
 modelswithasmallerinitialmass,theperiodisstillincreasing
 atarelativelylargerateastheenvelopeexpands,whileinmore
 massivemodelstheperiodhasalreadybecomelesssensitiveto
@@ -654,4 +965,20 @@ this maximum, while limiting the selection to DFMP, produces
 adistributionskewedtowardshortperiods,asfoundinSect.3.
 If the luminosity dips following thermal pulses are taken
 into account (blue lines), the corresponding envelope contrac-
+tion causes the period to decrease, and the cut at ∼ 360 days
+becomes less sharp. Because of mass loss, the threshold period
+P
+dom,0
+is lowered, so that the shortest period associated with
+DFMP does not correspond to the least evolved model (green
+circle),butrathertotheluminositydipofathermalpulse(blue
+circle).
+Tobeprecise,theearliestoccurrenceofDFMPisontheleft-
+mostluminosityspike(redcircle),whosedurationissoshortthat
+itisunlikelytobeobserved.Indeed,theinclusionofluminosity
+spikes alters the period distribution at long periods very little.
+Luminosity spikes are relevant only for relatively massive and
+youngTP-AGBstars,andtheygiverisetothepoorlypopulated
+portionofthePArelationatthelongestperiods,asseeninpanel
+(a)ofFig.2.
 Articlenumber,page9of9
diff --git a/read/results/pdfplumber/2201.00214.txt b/read/results/pdfplumber/2201.00214.txt
index e1c3f14..3cf7cfd 100644
--- a/read/results/pdfplumber/2201.00214.txt
+++ b/read/results/pdfplumber/2201.00214.txt
@@ -1,16 +1,59 @@
+a
+r
+X
+i
+v
+:
+2
+2
+0
+1
+.
+0
+0
+2
+1
+4
+v
+1
+[
+a
+s
+t
+r
+o
+-
+p
+h
+.
+S
+R
+]
+1
+J
+a
+n
+2
+0
+2
+2
 Temperature Analysis of Flaring
 (AR11283) and non-Flaring (AR12194)
 Coronal Loops
-2202
-Fathalian1, Hosseini Rad2, Alipour2, Safari2
-N. S. S. N. H.
+N. F
+athalian1,
+S. S. H
+osseini
+R
+ad2,
+N. A
+lipour2,
+H. S
+afari2
 1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran.
-naJ
 2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran.
 e-mail: narges_fathalian@alum.sharif.edu
-1
 January 4, 2022
-]RS.hp-ortsa[
 Abstract
 Here, westudy the temperaturestructureof flaringand non-flaringcoronal loops, usingextracted
 loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging
@@ -19,7 +62,6 @@ region (AR11283) during 22:10UT till 23:00UT, on 2011, September6; and non-flari
 (AR12194) during 08:00:00UT till 09:00:00UT on 2014, October 26. By using spatially-synthesized
 Gaussian DEM forward-fitting method, we calculate the peak temperatures for each strip of the loops.
 WeapplytheLomb-Scarglemethodtocomputetheoscillationsperiodsforthetemperatureseriesofeach
-1v41200.1022:viXra
 strip. The periods of the temperature oscillations for the flaring loops are ranged from 7 min to 28.4
 min. Thesetemperatureoscillationsshow veryclosebehaviortotheslow-modeoscillation. Weobserve
 that the temperature oscillations in the flaring loops are started at least around 10 minutes before the
@@ -33,8 +75,8 @@ flaringloops’periodsshowmorediversityandtheirtemperatureshavewiderrangesofvari
 non-flaringones. Moreaccuratecommentaryinthisrespectrequiresmoreextensivestatisticalresearch
 andbroaderobservations.
 CoronalLoops,TemperatureAnalysis,TemperatureOscillations,Flaringandnon-FlaringActiveRegions
-ntroduction
 I. I
+ntroduction
 Analyzing the thermal structure of coronal loops is of considerable interest, especially as these
 magnetic loops have an essential role in heating the solar chromosphere and corona. Such anal-
 ysis can help to describe how the process of solar flaring is correlated with the loop’s thermal
@@ -113,8 +155,8 @@ the data employed and the time and properties of the flare, occurred in the acti
 section III, we explain the method we use to analyzethe time-seriesof temperaturesin different
 strips of the loops. Section IV is specified to our results, obtained related to flaring and non-
 flaringregions. InsectionVwebrieflystateasummaryofthiswork.
-ata
 II. D
+ata
 Weinvestigatethethermalstructureandtreatmentofloopsinaflaringregiontoseeifitfollows
 thetransverseoscillationsoftheloops,andweexaminethethermalfluctuationsattheflaretime.
 Forthispurpose,weselectahighenergyflarex2.1whichthetransverseoscillationsoftwoloops
@@ -151,10 +193,13 @@ indicated by the white box is featured in a zoom-in view in Figure 1.b (right) a
 selected parts of the center of the three chosen loops are shown by red lines (the movie of
 the region is available in this link). As it is clear in the movie, these three loops oscillate
 togetherandtheiroscillationsdecaysimultaneously. Thecenteroffigure1.aiscoordinated
-at (230, 165) arcsec and its width and height are 450′′ ×456′′ /750×775 pixels. The flare
+at (230, 165) arcsec and its width and height are 450 ′′ ×456 ′′ /750×775 pixels. The flare
 occurringinthisactiveregionisanX2.1classflarelocatedclosetothediskcenteratlatitude
-◦ ◦
-14 northand longitude 18 west(269.9arcsec,129.9arcsec). Thisflareinitiatesat22:12UT,
+14
+◦
+northand longitude 18
+◦
+west(269.9arcsec,129.9arcsec). Thisflareinitiatesat22:12UT,
 ends about 22:24UT with the peak at 22:20UT,and associates with a coronal mass ejection
 (CME)whichoccursfrom2011September6,21:36:05Tto2011September7,02:24:05T,with
 theradialvelocityof469km/s,angularwidthof252deg,andpositionangleof275deg(for
@@ -162,7 +207,7 @@ moredetailslookatLASCOCMEcatalogue.) 1
 – Threeloops of non-flaring activeregion 12194: Asa blind test, we selectthree loops of the
 non-flaring(nonfhereafter)activeregion12194inthesmoothtimeperiodof08:00:00UTtill
 09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec
-and its width and height are 615′′ ×615′′ /1025×1025 pixels. We consider the images of
+and its width and height are 615 ′′ ×615 ′′ /1025×1025 pixels. We consider the images of
 the selected area with the cadence of 12 sec in the same six wavelengths mentioned above.
 These loops are relatively motionless and do not show any transversal oscillation (see the
 region’s movie in the link). We select the loops in such a way that they do not have any
@@ -177,8 +222,12 @@ We alsoused drot_map.pro subroutine tocorrectthe differentialrotationeffect. Acc
 movie made by pre-processed images, the most obvious loops (marked in the abovementioned
 figures)areselectedineachregion(withobvioustransversaloscillationsinthecaseoftheflaring
 activeregion).
-emperature nalysis ethod
-III. T A M
+III. T
+emperature
+A
+nalysis
+M
+ethod
 We extract the selected loop segment pixels, for each loop, and calculate the normal vectors
 to each point of the loop’s direction. Then by using these data, we straighten each loop in a
 considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available
@@ -199,29 +248,60 @@ the background from observed data is fitting a single-Gaussian cospatial functio
 function on the flux profile. The DEM for each strip is considered to be single-Gaussian DEM
 in terms of the logarithm of the temperature, which has three free parameters (Aschwanden &
 Boerner,2011):
-dEM exp(−[log(T)−log(T p,i)
-DEM = i = EM ). (1)
-i dT p,i 2σ2
+DEM
+i
+= dEM i
+dT
+= EM
+p,i
+exp(− [log(T)−log(T p,i )
+2σ2
+T,i
+). (1)
+In which, T
+p,i
+is the DEM peak temperature, EM
+p,i
+is the peak EM function, and σ
 T,i
-In which, T is the DEM peak temperature, EM is the peak EM function, and σ is the
-p,i p,i T,i
+is the
 logarithmicwidthofthetemperatureforthatstrip. Tocalculatethebackground-subtractedfluxes
 (foreachstrip)weuseEq.6ofAschwanden&Boerner(2011)(inbelow):
+F
+0λ
+=
+Z
 dEM(T)
-F = R (T)dT =∑EM(T )R (T ). (2)
-0λ Z dT λ k λ k
+dT
+R
+λ
+(T)dT =∑
+k
+EM(T
+k
+)R
+λ
+(T
 k
-Here,R (T)istheinstrumentaltemperatureresponsefunctionofeachwavelengthfilterλ,which
+). (2)
+Here,R
 λ
+(T)istheinstrumentaltemperatureresponsefunctionofeachwavelengthfilterλ,which
 is obtained by the code aia_get_response.pro in the SSW package. As time has passed, the AIA
 response functions calibration has partly changed. Here, we use the updated calibration of the
 temperature response functions, for each of the AIA temperature filters, according to the CHI-
 ANTIVersion2019codeavailableintheSolarSoftWare(SSW).Afterforward-fittingtheGaussian
 DEMtothebackground-subtractedobservedfluxesinmultiplewavelengths,thethree-fittingpa-
-rameters,temperaturewidth(σ ),peakoftemperature(T ),andpeakemissionmeasure(EM )
-T,i p,i p,i
-arefoundbyminimizing χ2.
+rameters,temperaturewidth(σ
+T,i
+),peakoftemperature(T
+p,i
+),andpeakemissionmeasure(EM
+p,i
+)
+arefoundbyminimizing χ2
 i
+.
 Our data sample is uneven because of omitting some damaged images in between. There-
 fore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is
 developed to use the technique periodogram, in the case where the observation times are un-
@@ -238,8 +318,8 @@ the periods, we computed the probability values (p-values). In the Lomb-Scargle
 significance returned here is the false alarm probability of the null hypothesis, i.e., as the data
 is composed of independent Gaussian random variables. Accordingly, low probability values
 (p-valuelessthan0.05)indicateahighdegreeofsignificanceintheassociatedperiodicsignal.
-esults
 IV. R
+esults
 i. Temperature Analysis of Flaring Active Region Loops
 Thenceforth the temperature time-series of different strips of the selected loops are calculated
 using the method described in section 3. In the following figures, the vertical axis shows the
@@ -389,8 +469,8 @@ withthenon-flaringones. Andfigure9showsthattheincreasinganddecreasingoftemperatu
 range,orthedifferencebetweenmaximumandminimumofthetemperaturevalue(max(log(T))-
 min(log(T))),ismuchhigheronaveragefortheloops’stripsoftheflaringARincomparisonwith
 theloops’stripsofthenon-flaring one.
-ummery
 V. S
+ummery
 We reported the temperature oscillations of coronal loops of a flaring active region. We selected
 theflaringactiveregion11283toinvestigatethethermalstructureandtreatmentofitsloops. This
 regionincludesahighenergyflarex2.1andthetransverseoscillationsoftwoloopsofithavebeen
@@ -473,159 +553,212 @@ oscillatory manner. Compared with these non-flaring loops, the flaring loops sho
 peraturesonaverageandhigher oscillation periodswith higher peaksanddeepervalleys. More
 accurate commentary in this respect requiresmore extensive statistical researchand broader ob-
 servations.
+arcsec
+arcsec
+79 154 229 304 379 454
+−68
+25
+118
+211
+304
 397
 a
-304
-211
-cescra
-311 b
-118
+arcsec
+arcsec
+114.6 171.2 227.8 284.4 341
+171.4
+206.3
+241.2
 276.1
-Loop C2
-25 241.2 Loop C1 Loop B2 cescra
+311
 Loop B1 Loop A
-206.3
-−68
-79 154 229 304 379 454 171.4
-arcsec 114.6 171.2 a2 rc2 s7 e.8 c 284.4 341
+Loop C2
+Loop C1
+b
+Loop B2
 Figure1: (a)AIAimageoftheAR11283on2011September6,22:10UTasseeninthe171 filter. (b)Zoom-inview
 oftheareamarkedbyaboxintheleft. Theselectedloopsaredistinguishedinred. TheloopsAandBare
 thesameloopsstudiedbyJainetal.(2015)(seeFig.3ainJainetal.(2015)).
+arcsec
+arcsec
+−154 0 154 308
+−572
+−418
+−264
+−110
 44
 a
-−110
+arcsec
+arcsec
+−202 −134 −66 2 70
+−396
+−338
+−280
+−221
 −162
+nonf−LoopA
+nonf−LoopB
+nonf−LoopC
 b
-nonf−LoopB cescra
-−264 −221
-cescra
-−280
-−418
-−338 nonf−LoopC nonf−LoopA
-−572
-−154 0 154 308 −396
-arcsec −202 −134 a− r6 c6 sec 2 70
 Figure2: (a)TheNOAAAR12194on2014October26,at08:00:00UTin171 recordedbyAIA/SDO.(b)Zoom-in
 viewofthearea,markedbyaboxintheleft,theloopsaredistinguishedinred.
-F−LoopA
-6.8
-6.6
-6.4 TgoL
-6.2
-6
 5.8
-6.8
-6.6
-6.4 TgoL
-6.2
 6
-5.8
-6.8
-6.6
-6.4 TgoL
 6.2
-6
-5.8
-22:10 22:20 22:30 22:40 22:50 23:00
-time
-F−LoopB1
-6.8
+6.4
 6.6
-6.4 TgoL
-6.2
-6
-5.8
 6.8
-6.6
-6.4 TgoL
+L o
+g T
+F−LoopA
+5.8
+6
 6.2
+6.4
+6.6
+6.8
+L o
+g T
+22:10 22:20 22:30 22:40 22:50 23:00
+5.8
 6
+6.2
+6.4
+6.6
+6.8
+time
+L o
+g T
 5.8
+6
+6.2
+6.4
+6.6
+6.8
+LogT
+F−LoopB1
 22:10 22:20 22:30 22:40 22:50 23:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
 time
+LogT
 Figure3: Fromuptodown: Thetime-seriesofthetemperatureoscillationsforthefirst3stripsofLoopA(strip1to
 3fromtoptodown),andthefirst2stripsofLoopB1. Horizontalaxisisthetimeandtheverticalaxisisthe
 logarithmofthetemperature. Theredlinesmarktheinitialandfinaltimeoftheflarex2.1.
-F−loopA
-42
-6.8
-32 6.6
-)mM(htgneL
-6.4
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+11
 21
-6.2 pooL
-11 6
-5.8
-20
-2 : 10 22:20 22:30 22:40 22:50 23:00
+32
+42
+F−loopA
 Time
-F−loopB1
-20 6.5
-6.45
+Loop
+Length( M
+m)
+5.8
+6
+6.2
 6.4
-15
-6.35
-)mM(htgneL
-6.3
-10 6.25
-6.2 pooL
-6.15
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
 5
-6.1
-6.05
-20 6
-2 : 10 22:20 22:30 22:40 22:50 23:00
+10
+15
+20
+F−loopB1
 Time
-F−loopB2
-16
-6.8
-12 6.6
-)mM(htgneL
+Loop
+Length( M
+m)
+6
+6.05
+6.1
+6.15
+6.2
+6.25
+6.3
+6.35
 6.4
+6.45
+6.5
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+4
 8
-6.2 pooL
-4 6
-5.8
-20
-2 : 10 22:20 22:30 22:40 22:50 23:00
+12
+16
+F−loopB2
 Time
-F−loopC1
-22
-6.8
-17 6.6
-)mM(htgneL
-6.4
-11
-6.2
-pooL
+Loop
+Length( M
+m)
+5.8
 6
+6.2
+6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
 6
-5.8
-20 5.6
-2 : 10 22:20 22:30 22:40 22:50 23:00
-Time
-F−loopC2
 11
-6.8
-8 6.6
-)mM(htgneL
+17
+22
+F−loopC1
+Time
+Loop
+Length(
+M
+m)
+5.6
+5.8
+6
+6.2
 6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+3
 6
-6.2 pooL
-3 6
-5.8
-20
-2 : 10 22:20 22:30 22:40 22:50 23:00
+8
+11
+F−loopC2
 Time
+Loop
+Length( M
+m)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
 Figure4: TemperaturemapoftheflaringloopsA,B1,B2,C1,andC2(fromtoptodown)asatimeseries. Thevertical
 axisisthedistancealongtheloopinMm,andthehorizontalaxisisthetime. Thecolorbarintheleftshows
 thecolorsconsideredforthetemperaturerange.
 Table1: ThepropertiesobservedfortheloopsegmentsoftheflaringAR.
-Thehighest Thehighest
-FLoopA Max(log(T))- FLoopB2 Max(log(T))-
-Temp.’speriod Temp.’speriod
-(StripNumber) Min(log(T)) (StripNumber) Min(log(T))
-observed observed
+FLoopA
+(StripNumber)
+Thehighest
+Temp.’speriod
+observed
+Max(log(T))-
+Min(log(T))
+FLoopB2
+(StripNumber)
+Thehighest
+Temp.’speriod
+observed
+Max(log(T))-
+Min(log(T))
 1 9.94 1.09 1 18.07 0.68
 2 16.57 0.79 2 24.85 0.83
 3 8.46 0.65 3 24.85 0.85
@@ -664,11 +797,13 @@ FLoopB1 - - 4 16.57 0.93
 10 18.07 1.6
 11 18.07 1.6
 Table2: ThepropertiesobservedfortheloopsegmentsofthenonflaringAR.
+Nonf-LoopA
+(StripNumber)
 Thehighest
-Nonf-LoopA Max(log(T))-
 Temp.’speriod
-(StripNumber) Min(log(T))
 observed
+Max(log(T))-
+Min(log(T))
 1 24 0.61
 2 30 0.95
 3 30 0.81
@@ -680,171 +815,205 @@ observed
 9 30 0.72
 10 30 0.77
 11 30 0.61
+Nonf-LoopB
+(StripNumber)
 Thehighest
-Nonf-LoopB Max(log(T))-
 Temp.’speriod
-(StripNumber) Min(log(T))
 observed
+Max(log(T))-
+Min(log(T))
 1 26.66 0.36
 2 26.66 0.64
 3 10.43 0.45
 4 12 0.62
 5 30 0.98
 6 8.57 0.67
+Nonf-LoopC
+(StripNumber)
 Thehighest
-Nonf-LoopC Max(log(T))-
 Temp.’speriod
-(StripNumber) Min(log(T))
 observed
+Max(log(T))-
+Min(log(T))
 1 26.66 0.76
 2 26.66 0.75
 3 26.66 0.26
 4 30 0.27
 5 30 0.8
-Int−Fe−LoopA
-43 0.2
-0.18
-0.16
-32
-0.14
-)mM(htgneL
-0.12
-22 0.1
-0.08 pooL
-0.06
+22:10 22:20 22:30 22:40 22:50 23:00
+0
 11
-0.04
-0.02
-20 0
-2 : 10 22:20 22:30 22:40 22:50 23:00
-Time
+22
+32
+43
 Int−Fe−LoopA
-1
-0.9
-0.8
-IIIVX
-0.7
-eF
-0.6 ytisnetnI
-0.5
-0.4 dezilamroN
-0.3
-0.2
-0.1
+Time
+Loop
+Length(
+M
+m)
 0
+0.02
+0.04
+0.06
+0.08
+0.1
+0.12
+0.14
+0.16
+0.18
+0.2
 22:10 22:20 22:30 22:40 22:50 23:00
+0
+0.1
+0.2
+0.3
+0.4
+0.5
+0.6
+0.7
+0.8
+0.9
+1
+Int−Fe−LoopA
 Time
+Nor
+malized
+Intensity
+Fe
+XVIII
 Figure5: Normalized intensity map of the flaring loop A for the wavelength Fe XVIII, and mean intensity of Fe
 XVIII (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and
 normalizedintensityforthesecond. Thehorizontalaxisisthetime. Thecolorbarintheleftshowsthecolors
 consideredfortheIntensityrange.
-acknowledgements
 VI.
+acknowledgements
 The author Narges Fathalian wishes to also express her thanks for the technical support and
 comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regarding to this
 work.
-NonF−LoopA
-6.8
-6.6
-6.4 TgoL
-6.2
-6
 5.8
-6.8
-6.6
-6.4 TgoL
-6.2
 6
-5.8
-8:00 8:10 8:20 8:30 8:40 8:50 9:00
-time
-NonF−LoopB
-6.8
-6.6
-6.4 TgoL
 6.2
-6
-5.8
-6.8
+6.4
 6.6
-6.4 TgoL
-6.2
+6.8
+LogT
+NonF−LoopA
+8:00 8:10 8:20 8:30 8:40 8:50 9:00
+5.8
 6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
 5.8
+6
+6.2
+6.4
+6.6
+6.8
+LogT
+NonF−LoopB
 8:00 8:10 8:20 8:30 8:40 8:50 9:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
 time
+LogT
 Figure6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non-
 flaringLoopsAandB.Horizontalaxisisthetimeandtheverticalaxisisthelogarithmofthetemperature.
-NonF−loopA
-20
-6.8
-15 6.6
-)mM(htgneL
-6.4
-10
-6.2 pooL
-5 6
-5.8
-0
 8:10 8:20 8:30 8:40 8:50 9:00
-Time
-NonF−loopB
-18
-6.8
-14 6.6
-)mM(htgneL
-6.4
-9
-6.2 pooL
-5 6
-5.8
 0
-8:10 8:20 8:30 8:40 8:50 9:00
-Time
-NonF−loopC
+5
 10
-6.8
-8 6.6
-)mM(htgneL
+15
+20
+NonF−loopA
+Time
+Loop
+Length( M
+m)
+5.8
+6
+6.2
 6.4
+6.6
+6.8
+8:10 8:20 8:30 8:40 8:50 9:00
+0
 5
-6.2 pooL
-3 6
+9
+14
+18
+NonF−loopB
+Time
+Loop
+Length( M
+m)
 5.8
-0
+6
+6.2
+6.4
+6.6
+6.8
 8:10 8:20 8:30 8:40 8:50 9:00
+0
+3
+5
+8
+10
+NonF−loopC
 Time
+Loop
+Length( M
+m)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
 Figure7: fromtoptodown: Temperaturemapofthenon-flaringloopsA,BandCasatime-series. Theverticalaxis
 isthedistancealongtheloopinMm,andthehorizontalaxisisthetime. Thecolor-barintheleftshowsthe
 colorsconsideredforthetemperaturerange.
-0.4
-0.35
-0.3 sdoireP
-0.25 .pmeT
-0.2 fo
-egatnecreP
-0.15
-0.1
-0.05
-0
 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
+0
+0.05
+0.1
+0.15
+0.2
+0.25
+0.3
+0.35
+0.4
 Temp. Period (min)
+Percentage
+of
+Te
+mp.
+Periods
 Figure8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non-
 flaring(redbars)ARs. Thehorizontalaxisshowsthetemperatureperiodsinminute.
-12
-10
-8
-rebmuN
-6
-4
-2
-0
 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7
+0
+2
+4
+6
+8
+10
+12
 max(log(T))−min(log(T))
+Nu
+mber
 Figure9: Hisogramoftheparameterof(max(log(T))-min(log(T)))foreachstripoftheloopsoftheflaring(bluebars)
 andnon-flaring(redbars)ARs.
-eferences
 R
+eferences
 Abedini,A.,Safari,H.,&Nasiri,S.2012,SolarPhysics, 280
 Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F.
 2013,ApJ,773,156
diff --git a/read/results/pdfplumber/GeoTopo-book.txt b/read/results/pdfplumber/GeoTopo-book.txt
index e3ab1bf..5c1deb9 100644
--- a/read/results/pdfplumber/GeoTopo-book.txt
+++ b/read/results/pdfplumber/GeoTopo-book.txt
@@ -24,15 +24,29 @@ oder der Oberfläche einer Pyramide verformen, aber nicht zum R2 oder zu einem T
 den R2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein
 Loch machen.
 Erforderliche Vorkenntnisse
-EswirdeinsichererUmgangmitdenQuantoren( , ),Mengenschreibweisen( , , , ,R, (M))
-∀ ∃ ∪ ∩ \ ∅ P
+EswirdeinsichererUmgangmitdenQuantoren(
+∀
+,
+∃
+),Mengenschreibweisen(
+∪
+,
+∩
+,
+\
+,
+∅
+,R,
+P
+(M))
 und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Wider-
 spruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag,
 Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem
 in „Analysis I“ vermittelt.
 Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit,
-derSpektralsatzundderprojektiveRaum (R)aus„LineareAlgebraI“ bekanntsind.In„Lineare
+derSpektralsatzundderprojektiveRaum
 P
+(R)aus„LineareAlgebraI“ bekanntsind.In„Lineare
 Algebra II“ wird der Begriff der Orthonormalbasis eingeführt.
 iii
 (a) S2 (b) Würfel (c) Pyramide
@@ -82,658 +96,1642 @@ Stichwortverzeichnis 111
 1 Topologische Grundbegriffe
 1.1 Topologische Räume
 Definition 1
-Ein topologischer Raum ist ein Paar (X,T) bestehend aus einer Menge X und T (X)
+Ein topologischer Raum ist ein Paar (X,T) bestehend aus einer Menge X und T
 ⊆ P
+(X)
 mit folgenden Eigenschaften
-(i) ,X T
-∅ ∈
-(ii) Sind U ,U T, so ist U U T
-1 2 1 2
-∈ ∩ ∈
+(i)
+∅
+,X
+∈
+T
+(ii) Sind U
+1
+,U
+2
+∈
+T, so ist U
+1
+∩
+U
+2
+∈
+T
+(iii) Ist I eine Menge und U
+i
+∈
+T für jedes i
+∈
+I, so ist
 (cid:91)
-(iii) Ist I eine Menge und U T für jedes i I, so ist U T
-i i
-∈ ∈ ∈
 i∈I
+U
+i
+∈
+T
 Die Elemente von T heißen offene Teilmengen von X.
-A X heißt abgeschlossen, wenn X A offen ist.
-⊆ \
+A
+⊆
+X heißt abgeschlossen, wenn X
+\
+A offen ist.
 Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0,1). Auch gibt es
 Mengen, die sowohl abgeschlossen als auch offen sind.
 Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.)
-Betrachte und X mit der trivialen Topologie T = ,X .
+Betrachte
+∅
+und X mit der trivialen Topologie T
 triv
-∅ {∅ }
-Es gilt: X T und T, d. h. X und sind offen. Außerdem XC = X X = T und
-∈ ∅ ∈ ∅ \ ∅ ∈
-X = X T, d. h. X und sind als Komplement offener Mengen abgeschlossen. (cid:4)
-\∅ ∈ ∅
+=
+{∅
+,X
+}
+.
+Es gilt: X
+∈
+T und
+∅ ∈
+T, d. h. X und
+∅
+sind offen. Außerdem XC = X
+\
+X =
+∅ ∈
+T und
+X
+\∅
+= X
+∈
+T, d. h. X und
+∅
+sind als Komplement offener Mengen abgeschlossen. (cid:4)
 Beispiel 1 (Topologien)
-1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie T :
+1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie T
 Euklid
-U Rn offen für jedes x U gibt es r > 0,
-⊆ ⇔ ∈
-sodass B (x) = y Rn d(x,y) < r U
+:
+U
+⊆
+Rn offen
+⇔
+für jedes x
+∈
+U gibt es r > 0,
+sodass B
 r
-{ ∈ | } ⊆
+(x) =
+{
+y
+∈
+Rn
+|
+d(x,y) < r
+} ⊆
+U
 Diese Topologie wird auch „Standardtopologie des Rn“ genannt. Sie beinhaltet unter
 anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli-
 chem Mittelpunkt (vgl. Definition 1.ii).
 2) Jeder metrische Raum (X,d) ist auch ein topologischer Raum.
-3) Für eine Menge X heißt T = (X) diskrete Topologie.
+3) Für eine Menge X heißt T
 Diskret
+=
 P
-4) X := R,T := U R R U endlich heißt Zariski-Topologie
+(X) diskrete Topologie.
+4) X := R,T
 Z
-{ ⊆ | \ }∪{∅}
+:=
+{
+U
+⊆
+R
+|
+R
+\
+U endlich
+}∪{∅}
+heißt Zariski-Topologie
 Beobachtungen:
-U T f R[X], sodass R U = V(f) = x R f(x) = 0
-Z
-• ∈ ⇔ ∃ ∈ \ { ∈ | }
-Es gibt keine disjunkten offenen Mengen in T .
+•
+U
+∈
+T
 Z
+⇔ ∃
+f
+∈
+R[X], sodass R
+\
+U = V(f) =
+{
+x
+∈
+R
+|
+f(x) = 0
+}
 •
+Es gibt keine disjunkten offenen Mengen in T
+Z
+.
 4 1.1.TOPOLOGISCHERÄUME
-5) X := Rn,T = U Rn Es gibt Polynome f ,...,f R[X ,...,X ] sodass
-Z 1 r 1 n
-{ ⊆ | ∈
-Rn U = V(f ,...,f )
-1 r
-\ }
-6) X := 0,1 ,T = , 0,1 , 0 heißt Sierpińskiraum.
-{ } {∅ { } { }}
-, 0,1 , 1 sind dort alle abgeschlossenen Mengen.
-∅ { } { }
+5) X := Rn,T
+Z
+=
+{
+U
+⊆
+Rn
+|
+Es gibt Polynome f
+1
+,...,f
+r
+∈
+R[X
+1
+,...,X
+n
+] sodass
+Rn
+\
+U = V(f
+1
+,...,f
+r
+)
+}
+6) X :=
+{
+0,1
+}
+,T =
+{∅
+,
+{
+0,1
+}
+,
+{
+0
+}}
+heißt Sierpińskiraum.
+∅
+,
+{
+0,1
+}
+,
+{
+1
+}
+sind dort alle abgeschlossenen Mengen.
 Definition 2
-Sei (X,T) ein topologischer Raum und x X.
+Sei (X,T) ein topologischer Raum und x
+∈
+X.
+Eine Teilmenge U
+⊆
+X heißt Umgebung von x, wenn es ein U
+0
+∈
+T gibt mit x
 ∈
-Eine Teilmenge U X heißt Umgebung von x, wenn es ein U T gibt mit x U und
-0 0
-⊆ ∈ ∈
-U U.
+U
+0
+und
+U
 0
 ⊆
+U.
 Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt.
 Definition 3
-Sei (X,T) ein topologischer Raum und M X eine Teilmenge.
+Sei (X,T) ein topologischer Raum und M
 ⊆
+X eine Teilmenge.
+a) M◦ :=
+{
+x
+∈
+M
+|
+M ist Umgebung von x
+}
+=
 (cid:91)
-a) M◦ := x M M ist Umgebung von x = U heißt Inneres oder offener
-{ ∈ | }
 U⊆M
 U∈T
+U heißt Inneres oder offener
 Kern von M.
+b) M :=
 (cid:92)
-b) M := A heißt abgeschlossene Hülle oder Abschluss von M.
 M⊆A
 Aabgeschlossen
-c) ∂M := M M◦ heißt Rand von M.
+A heißt abgeschlossene Hülle oder Abschluss von M.
+c) ∂M := M
 \
+M◦ heißt Rand von M.
 d) M heißt dicht in X, wenn M = X ist.
 Beispiel 2
 1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ =
 ∅
 2) Sei X = R und M = (a,b). Dann gilt: M = [a,b]
-3) Sei X = R,T = T und M = (a,b). Dann gilt: M = R
+3) Sei X = R,T = T
 Z
+und M = (a,b). Dann gilt: M = R
 Definition 4
 Sei (X,T) ein topologischer Raum.
-a) B T heißt Basis der Topologie T, wenn jedes U T Vereinigung von Elementen
-⊆ ∈
+a) B
+⊆
+T heißt Basis der Topologie T, wenn jedes U
+∈
+T Vereinigung von Elementen
 aus B ist.
-b) T heißt Subbasis der Topologie T, wenn jedes U T Vereinigung von endlichen
-S ⊆ ∈
-Durchschnitten von Elementen aus ist.
+b)
+S ⊆
+T heißt Subbasis der Topologie T, wenn jedes U
+∈
+T Vereinigung von endlichen
+Durchschnitten von Elementen aus
 S
+ist.
 Beispiel 3 (Basis und Subbasis)
 1) Jede Basis ist auch eine Subbasis, z.B.
-S = (a,b) a,b R,a < b ist für R mit der Standardtopologie sowohl Basis als
-{ | ∈ }
+S =
+{
+(a,b)
+|
+a,b
+∈
+R,a < b
+}
+ist für R mit der Standardtopologie sowohl Basis als
 auch Subbasis.
 2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist
-B = B (x) r Q ,x Qn
-r >0
-{ | ∈ ∈ }
+B =
+{
+B
+r
+(x)
+|
+r
+∈
+Q
+>0
+,x
+∈
+Qn
+}
 ist eine abzählbare Basis von T.
-3) Sei(X,T)eintopologischerRaummitX = 0,1,2 undT = , 0 , 0,1 , 0,2 ,X .
-{ } {∅ { } { } { } }
-Dann ist = , 0,1 , 0,2 eine Subbasis von T, da gilt:
-S {∅ { } { }}
+3) Sei(X,T)eintopologischerRaummitX =
+{
+0,1,2
+}
+undT =
+{∅
+,
+{
+0
+}
+,
+{
+0,1
+}
+,
+{
+0,2
+}
+,X
+}
+.
+Dann ist
+S
+=
+{∅
+,
+{
+0,1
+}
+,
+{
+0,2
+}}
+eine Subbasis von T, da gilt:
 5 1.1.TOPOLOGISCHERÄUME
-T
 • S ⊆
-, 0,1 und 0,2
-• ∅ { } { } ∈ S
-0 = 0,1 0,2
-• { } { }∩{ }
-X = 0,1 0,2
-• { }∪{ }
-Allerings ist keine Basis von (X,T), da 0 nicht als Vereinigung von Elementen
-S { }
-aus erzeugt werden kann.
+T
+• ∅
+,
+{
+0,1
+}
+und
+{
+0,2
+} ∈ S
+• {
+0
+}
+=
+{
+0,1
+}∩{
+0,2
+}
+•
+X =
+{
+0,1
+}∪{
+0,2
+}
+Allerings ist
 S
+keine Basis von (X,T), da
+{
+0
+}
+nicht als Vereinigung von Elementen
+aus
+S
+erzeugt werden kann.
 Bemerkung 2
-Sei X eine Menge und (X). Dann gibt es genau eine Topologie T auf X, für die
-S ⊆ P S
+Sei X eine Menge und
+S ⊆ P
+(X). Dann gibt es genau eine Topologie T auf X, für die
+S
 Subbasis ist.
 Definition 5
-Sei (X,T) ein topologischer Raum und Y X.
+Sei (X,T) ein topologischer Raum und Y
 ⊆
-T := U Y U T ist eine Topologie auf Y.
+X.
+T
+Y
+:=
+{
+U
+∩
+Y
+|
+U
+∈
+T
+}
+ist eine Topologie auf Y.
+T
+Y
+heißt Teilraumtopologie und (Y,T
 Y
-{ ∩ | ∈ }
-T heißt Teilraumtopologie und (Y,T ) heißt ein Teilraum von (X,T).
-Y Y
+) heißt ein Teilraum von (X,T).
 Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt.
 Definition 6
-Seien X ,X topologische Räume.
-1 2
-U X X sei offen, wenn es zu jedem x = (x ,x ) U Umgebungen U um x mit
-1 2 1 2 i i
-⊆ × ∈
-i = 1,2 gibt, sodass U U U gilt.
-1 2
-× ⊆
-T = U X X U offen isteineTopologieaufX X .SieheißtProdukttopologie.
-1 2 1 2
-{ ⊆ × | } ×
-B = U U U offen in X ,i = 1,2 ist eine Basis von T.
-1 2 i i
-{ × | }
+Seien X
+1
+,X
+2
+topologische Räume.
+U
+⊆
+X
+1
+×
 X
 2
+sei offen, wenn es zu jedem x = (x
+1
+,x
+2
+)
+∈
+U Umgebungen U
+i
+um x
+i
+mit
+i = 1,2 gibt, sodass U
+1
+×
+U
+2
+⊆
+U gilt.
+T =
+{
 U
-x
-U x
-2 2
+⊆
+X
+1
+×
+X
+2
+|
+U offen
+}
+isteineTopologieaufX
+1
+×
 X
+2
+.SieheißtProdukttopologie.
+B =
+{
+U
 1
+×
+U
+2
+|
+U
+i
+offen in X
+i
+,i = 1,2
+}
+ist eine Basis von T.
+U
+x
+x
+2
 x
 1
 U
+2
+U
+1
+X
+1
+X
+2
+Abbildung 1.1: Zu x = (x
+1
+,x
+2
+) gibt es Umgebungen U
+1
+,U
+2
+mit U
 1
-Abbildung 1.1: Zu x = (x ,x ) gibt es Umgebungen U ,U mit U U U
-1 2 1 2 1 2
-× ⊆
+×
+U
+2
+⊆
+U
 Beispiel 4 (Produkttopologien)
-1) X = X = R mit euklidischer Topologie.
-1 2
-Die Produkttopologie auf R R = R2 stimmt mit der euklidischen Topologie auf
-⇒ ×
+1) X
+1
+= X
+2
+= R mit euklidischer Topologie.
+⇒
+Die Produkttopologie auf R
+×
+R = R2 stimmt mit der euklidischen Topologie auf
 R2 überein.
-2) X = X = R mit Zariski-Topologie. T Produkttopologie auf R2: U U
-1 2 1 2
+2) X
+1
+= X
+2
+= R mit Zariski-Topologie. T Produkttopologie auf R2: U
+1
 ×
+U
+2
 (Siehe Abbildung 1.2)
 6 1.1.TOPOLOGISCHERÄUME
-U2
+U
+1
+= R
+\
+N
+U
+2
 =
 R
 \
 N
-U = R N
-1
-\
 Abbildung 1.2: Zariski-Topologie auf R2
 Definition 7
-Sei X ein topologischer Raum, eine Äquivalenzrelation auf X, X = X/ sei die Menge
+Sei X ein topologischer Raum,
 ∼
+eine Äquivalenzrelation auf X, X = X/
 ∼
-der Äquivalenzklassen, π : X X, x [x] .
+sei die Menge
+der Äquivalenzklassen, π : X
+→
+X, x
+(cid:55)→
+[x]
 ∼
-→ (cid:55)→
+.
 T X := (cid:8) U ⊆ X (cid:12) (cid:12) π−1(U) ∈ T X (cid:9)
-(X,T ) heißt Quotiententopologie.
+(X,T
 X
+) heißt Quotiententopologie.
 Beispiel 5
-X = R,a b : a b Z
-∼ ⇔ − ∈
-π−1(u)
+X = R,a
+∼
+b :
+⇔
 a
+−
+b
+∈
+Z
 R
 -1 0 1 2 3 4 5
+0
 a
 U
+a
+π−1(u)
 0
-0 1, d. h. [0] = [1]
 ∼
+1, d. h. [0] = [1]
 Beispiel 6
-Sei X = R2 und (x ,y ) (x ,y ) x x Z und y y Z. Dann ist X/ ein Torus.
-1 1 2 2 1 2 1 2 ∼
-∼ ⇔ − ∈ − ∈
-Beispiel 7 (Projektiver Raum)
-X = Rn+1 0 , x y λ R× mit y = λx
-\{ } ∼ ⇔ ∃ ∈
-x und y liegen auf der gleichen
-⇔
-Ursprungsgerade
-X = n(R)
+Sei X = R2 und (x
+1
+,y
+1
+)
+∼
+(x
+2
+,y
+2
+)
+⇔
+x
+1
+−
+x
+2
+∈
+Z und y
+1
+−
+y
+2
+∈
+Z. Dann ist X/
+∼
+ein Torus.
+Beispiel 7 (Projektiver Raum)
+X = Rn+1
+\{
+0
+}
+, x
+∼
+y
+⇔ ∃
+λ
+∈
+R× mit y = λx
+⇔
+x und y liegen auf der gleichen
+Ursprungsgerade
+X =
 P
+n(R)
 7 1.2.METRISCHERÄUME
 Also für n = 1:
+−
 4
-2
-4 2 2 4 6 8
-− −
-2
+−
+2 2 4 6 8
 −
 4
 −
+2
+2
+4
 1.2 Metrische Räume
 Definition 8
-Sei X eine Menge. Eine Abbildung d : X X R+ heißt Metrik, wenn gilt:
-× → 0
-(i) Definitheit: d(x,y) = 0 x = y x,y X
-⇔ ∀ ∈
-(ii) Symmetrie: d(x,y) = d(y,x) x,y X
-∀ ∈
-(iii) Dreiecksungleichung: d(x,z) d(x,y)+d(y,z) x,y,z X
-≤ ∀ ∈
+Sei X eine Menge. Eine Abbildung d : X
+×
+X
+→
+R+
+0
+heißt Metrik, wenn gilt:
+(i) Definitheit: d(x,y) = 0
+⇔
+x = y
+∀
+x,y
+∈
+X
+(ii) Symmetrie: d(x,y) = d(y,x)
+∀
+x,y
+∈
+X
+(iii) Dreiecksungleichung: d(x,z)
+≤
+d(x,y)+d(y,z)
+∀
+x,y,z
+∈
+X
 Das Paar (X,d) heißt ein metrischer Raum.
 Bemerkung 3
 Sei (X,d) ein metrischer Raum und
-B (x) := y X d(x,y) < r für x X,r R+
+B
 r
-{ ∈ | } ∈ ∈
-B = B (x) (X) x X,r R+ ist Basis einer Topologie auf X.
+(x) :=
+{
+y
+∈
+X
+|
+d(x,y) < r
+}
+für x
+∈
+X,r
+∈
+R+
+B =
+{
+B
 r
-{ ⊆ P | ∈ ∈ }
+(x)
+⊆ P
+(X)
+|
+x
+∈
+X,r
+∈
+R+
+}
+ist Basis einer Topologie auf X.
 Definition 9
-Seien (X,d ) und (Y,d ) metrische Räume und ϕ : X Y eine Abbildung mit
-X Y
+Seien (X,d
+X
+) und (Y,d
+Y
+) metrische Räume und ϕ : X
 →
-x ,x X : d (x ,x ) = d (ϕ(x ),ϕ(x ))
-1 2 X 1 2 Y 1 2
-∀ ∈
+Y eine Abbildung mit
+∀
+x
+1
+,x
+2
+∈
+X : d
+X
+(x
+1
+,x
+2
+) = d
+Y
+(ϕ(x
+1
+),ϕ(x
+2
+))
 Dann heißt ϕ eine Isometrie von X nach Y.
 Beispiel 8 (Skalarprodukt erzeugt Metrik)
-Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt , . Dann wird V
-(cid:112) (cid:104)· ·(cid:105)
-durch d(x,y) := x y,x y zum metrischen Raum.
-(cid:104) − − (cid:105)
+Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt
+(cid:104)·
+,
+·(cid:105)
+. Dann wird V
+durch d(x,y) :=
+(cid:112)
+(cid:104)
+x
+−
+y,x
+−
+y
+(cid:105)
+zum metrischen Raum.
 Beispiel 9 (diskrete Metrik)
 Sei X eine Menge. Dann heißt
+d(x,y) =
 (cid:40)
 0 falls x = y
-d(x,y) =
-1 falls x = y
+1 falls x
 (cid:54)
+= y
 die diskrete Metrik. Die Metrik d induziert die diskrete Topologie.
 8 1.2.METRISCHERÄUME
 Beispiel 10
-X = R2 und d((x ,y ),(x ,y )) := max( x x , y y ) ist Metrik.
-1 1 2 2 1 2 1 2
-(cid:107) − (cid:107) (cid:107) − (cid:107)
+X = R2 und d((x
+1
+,y
+1
+),(x
+2
+,y
+2
+)) := max(
+(cid:107)
+x
+1
+−
+x
+2
+(cid:107)
+,
+(cid:107)
+y
+1
+−
+y
+2
+(cid:107)
+) ist Metrik.
 Beobachtung: d erzeugt die euklidische Topologie.
+B
+r
+(0) =
 r r
-B (0) = r
 r
 r
-(a) B (0) (b) Euklidische Topologie
+(a) B
 r
+(0) (b) Euklidische Topologie
 Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10
 9 1.2.METRISCHERÄUME
 Beispiel 11 (SNCF-Metrik1)
 X = R2
+−
 4
-2
-4 2 2 4 6 8
-− −
-2
+−
+2 2 4 6 8
 −
 4
 −
+2
+2
+4
 Definition 10
-Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x = y in X
+Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x
 (cid:54)
-Umgebungen U um x und U um y gibt, sodass U U = .
-x y x y
-∩ ∅
+= y in X
+Umgebungen U
+x
+um x und U
+y
+um y gibt, sodass U
+x
+∩
+U
+y
+=
+∅
+.
 Bemerkung 4 (Trennungseigenschaft)
 Metrische Räume sind hausdorffsch, wegen
-d(x,y) > 0 ε > 0 : B (x) B (y) =
-ε ε
-⇒ ∃ ∩ ∅
+d(x,y) > 0
+⇒ ∃
+ε > 0 : B
+ε
+(x)
+∩
+B
+ε
+(y) =
+∅
 Beispiel 12 (Topologische Räume und Hausdorff-Räume)
-1) (R,T ) ist ein topologischer Raum, der nicht hausdorffsch ist.
+1) (R,T
 Z
-2) (R,T ) ist ein topologischer Hausdorff-Raum.
+) ist ein topologischer Raum, der nicht hausdorffsch ist.
+2) (R,T
 Euklid
+) ist ein topologischer Hausdorff-Raum.
 Bemerkung 5 (Eigenschaften von Hausdorff-Räumen)
-Seien X,X ,X Hausdorff-Räume.
-1 2
+Seien X,X
+1
+,X
+2
+Hausdorff-Räume.
 a) Jeder Teilraum von X ist hausdorffsch.
-b) X X ist hausdorffsch (vgl. Abbildung 1.4).
-1 2
+b) X
+1
 ×
+X
+2
+ist hausdorffsch (vgl. Abbildung 1.4).
 Definition 11
-Sei X ein topologischer Raum und (x) n∈N eine Folge in X. x X heißt Grenzwert oder
+Sei X ein topologischer Raum und (x) n∈N eine Folge in X. x
 ∈
-Limes von (x ), wenn es für jede Umgebung U von x ein n gibt, sodass x U für alle
-n 0 n
-∈
-n n .
+X heißt Grenzwert oder
+Limes von (x
+n
+), wenn es für jede Umgebung U von x ein n
 0
+gibt, sodass x
+n
+∈
+U für alle
+n
 ≥
+n
+0
+.
 Bemerkung 6
 Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert.
-Beweis: Sei (x ) eine konvergierende Folge und x und y Grenzwerte der Folge.
-n
-Da X hausdorffsch ist, gibt es Umgebungen U von x und U von y mit U U = falls
-x y x y
-∩ ∅
-x = y. Da (x ) gegen x und y konvergiert, existiert ein n mit x U U für alle n n
-n 0 n x y 0
-(cid:54) ∈ ∩ ≥
-x = y (cid:4)
+Beweis: Sei (x
+n
+) eine konvergierende Folge und x und y Grenzwerte der Folge.
+Da X hausdorffsch ist, gibt es Umgebungen U
+x
+von x und U
+y
+von y mit U
+x
+∩
+U
+y
+=
+∅
+falls
+x
+(cid:54)
+= y. Da (x
+n
+) gegen x und y konvergiert, existiert ein n
+0
+mit x
+n
+∈
+U
+x
+∩
+U
+y
+für alle n
+≥
+n
+0
 ⇒
+x = y (cid:4)
 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt.
 10 1.3.STETIGKEIT
+(x
+1
+,y
+1
+) (x
+2
+,y
+2
+)
+x
+1
+x
+2
+U
+1
+×
+X
+2
+U
+2
+×
 X
 2
-(x ,y ) (x ,y )
-1 1 2 2
 X
 1
-x x
-1 2
-U X U X
-1 2 2 2
-× ×
-Abbildung 1.4: Wenn X ,X hausdorffsch sind, dann auch X X
-1 2 1 2
+X
+2
+Abbildung 1.4: Wenn X
+1
+,X
+2
+hausdorffsch sind, dann auch X
+1
 ×
+X
+2
 1.3 Stetigkeit
 Definition 12
-Seien (X,T ),(Y,T ) topologische Räume und f : X Y eine Abbildung.
-X Y
+Seien (X,T
+X
+),(Y,T
+Y
+) topologische Räume und f : X
 →
-a) f heißt stetig : U T : f−1(U) T .
-Y X
-⇔ ∀ ∈ ∈
+Y eine Abbildung.
+a) f heißt stetig :
+⇔ ∀
+U
+∈
+T
+Y
+: f−1(U)
+∈
+T
+X
+.
 b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g :
-Y X gibt, sodass g f = id und f g = id .
-X Y
-→ ◦ ◦
+Y
+→
+X gibt, sodass g
+◦
+f = id
+X
+und f
+◦
+g = id
+Y
+.
 Bemerkung 72
-Seien X,Y metrische Räume und f: X Y eine Abbildung.
+Seien X,Y metrische Räume und f: X
 →
-Dann gilt: f ist stetig zu jedem x X und jedem ε > 0 gibt es δ(x,ε) > 0, sodass für
-⇔ ∈
-alle y X mit d(x,y) < δ gilt d (f(x),f(y)) < ε.
+Y eine Abbildung.
+Dann gilt: f ist stetig
+⇔
+zu jedem x
+∈
+X und jedem ε > 0 gibt es δ(x,ε) > 0, sodass für
+alle y
+∈
+X mit d(x,y) < δ gilt d
 Y
+(f(x),f(y)) < ε.
+Beweis: „
+⇒
+“: Sei x
 ∈
-Beweis: „ “: Sei x X,ε > 0 gegeben und U := B (f(x)).
+X,ε > 0 gegeben und U := B
 ε
-⇒ ∈
+(f(x)).
 Dann ist U offen in Y.
-=D =e =f. =1 =2.a f−1(U) ist offen in X. Dann ist x f−1(U).
-⇒ ∈
-δ > 0, sodass B (x) f−1(U)
+Def. 12.a =====
+⇒
+f−1(U) ist offen in X. Dann ist x
+∈
+f−1(U).
+⇒ ∃
+δ > 0, sodass B
 δ
-⇒ ∃ ⊆
-f(B (x)) U
+(x)
+⊆
+f−1(U)
+⇒
+f(B
 δ
-⇒ ⊆
-y X d (x,y) < δ Beh.
+(x))
+⊆
+U
+⇒ {
+y
+∈
+X
+|
+d
 X
-⇒ { ∈ | } ⇒
-„ “: Sei U Y offen, X f−1(U).
-⇐ ⊆ ∈
-Dann gibt es ε > 0, sodass B (f(x)) U
+(x,y) < δ
+} ⇒
+Beh.
+„
+⇐
+“: Sei U
+⊆
+Y offen, X
+∈
+f−1(U).
+Dann gibt es ε > 0, sodass B
 ε
+(f(x))
 ⊆
+U
 Vor.
-== Es gibt δ > 0, sodass f(B (x)) B (f(x)))
-δ ε
-⇒ ⊆
-B (x) f−1(B (f(x))) f−1(U) (cid:4)
-δ ε
-⇒ ⊆ ⊆
+==
+⇒
+Es gibt δ > 0, sodass f(B
+δ
+(x))
+⊆
+B
+ε
+(f(x)))
+⇒
+B
+δ
+(x)
+⊆
+f−1(B
+ε
+(f(x)))
+⊆
+f−1(U) (cid:4)
 Bemerkung 8
-Seien X,Y topologische Räume und f : X Y eine Abbildung. Dann gilt:
+Seien X,Y topologische Räume und f : X
 →
+Y eine Abbildung. Dann gilt:
 f ist stetig
-für jede abgeschlossene Teilmenge A Y gilt : f−1(A) X ist abgeschlossen.
-⇔ ⊆ ⊆
+⇔
+für jede abgeschlossene Teilmenge A
+⊆
+Y gilt : f−1(A)
+⊆
+X ist abgeschlossen.
 Beispiel 13 (Stetige Abbildungen und Homöomorphismen)
-1) Für jeden topologischen Raum X gilt: id : X X ist Homöomorphismus.
+1) Für jeden topologischen Raum X gilt: id
 X
+: X
 →
+X ist Homöomorphismus.
 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt.
 11 1.3.STETIGKEIT
-2) Ist (Y,T ) trivialer topologischer Raum, d. h. T = T , so ist jede Abbildung
-Y Y triv
-f : X Y stetig.
+2) Ist (Y,T
+Y
+) trivialer topologischer Raum, d. h. T
+Y
+= T
+triv
+, so ist jede Abbildung
+f : X
 →
-3) Ist X diskreter topologischer Raum, so ist f : X Y stetig für jeden topologischen
+Y stetig.
+3) Ist X diskreter topologischer Raum, so ist f : X
 →
+Y stetig für jeden topologischen
 Raum Y und jede Abbildung f.
-4) Sei X = [0,1),Y = S1 = z C z = 1 und f(t) = e2πit.
-{ ∈ | (cid:107) (cid:107) }
-f
-0
+4) Sei X = [0,1),Y = S1 =
+{
+z
+∈
+C
+| (cid:107)
+z
+(cid:107)
+= 1
+}
+und f(t) = e2πit.
 R
 0 1
+0
+f
 g
 Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist.
 Die Umkehrabbildung g ist nicht stetig, da g−1(U) nicht offen ist (vgl. Abbildung 1.5).
 Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig)
-Seien X,Y,Z topologische Räume, f : X Y und g : Y Z stetige Abbildungen.
-→ →
-Dann ist g f : X Z stetig.
-◦ →
+Seien X,Y,Z topologische Räume, f : X
+→
+Y und g : Y
+→
+Z stetige Abbildungen.
+Dann ist g
+◦
+f : X
+→
+Z stetig.
+X
 f (cid:47)(cid:47)
-X Y
-g◦f (cid:32)(cid:32) (cid:127)(cid:127) g
+g◦f (cid:32)(cid:32)
+Y
+g (cid:127)(cid:127)
 Z
-Beweis: Sei U Z offen (g f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig
-⊆ ⇒ ◦
-ist, f−1(g−1(U)) ist offen in X, weil f stetig ist. (cid:4)
-Bemerkung 10
+Beweis: Sei U
+⊆
+Z offen
+⇒
+(g
+◦
+f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig
+ist, f−1(g−1(U)) ist offen in X, weil f stetig ist. (cid:4)
+Bemerkung 10
 a) Für jeden topologischen Raum X ist
-Homöo(X) := f : X X f ist Homöomorphismus
-{ → | }
+Homöo(X) :=
+{
+f : X
+→
+X
+|
+f ist Homöomorphismus
+}
 eine Gruppe.
-b) Jede Isometrie f : X Y zwischen metrischen Räumen ist ein Homöomorphismus.
+b) Jede Isometrie f : X
+→
+Y zwischen metrischen Räumen ist ein Homöomorphismus.
+c) Iso(X) :=
+{
+f : X
 →
-c) Iso(X) := f : X X f ist Isometrie ist eine Untergruppe von Homöo(X) für
-{ → | }
+X
+|
+f ist Isometrie
+}
+ist eine Untergruppe von Homöo(X) für
 jeden metrischen Raum X.
 Bemerkung 11 (Projektionen sind stetig)
-Seien X,Y topologische Räume. π : X Y X und π : X Y Y die Projektionen
-X Y
-× → × →
-π : (x,y) x und π : (x,y) y
-X Y
-(cid:55)→ (cid:55)→
-Wird X Y mit der Produkttopologie versehen, so sind π und π stetig.
-X Y
+Seien X,Y topologische Räume. π
+X
+: X
+×
+Y
+→
+X und π
+Y
+: X
+×
+Y
+→
+Y die Projektionen
+π
+X
+: (x,y)
+(cid:55)→
+x und π
+Y
+: (x,y)
+(cid:55)→
+y
+Wird X
+×
+Y mit der Produkttopologie versehen, so sind π
+X
+und π
+Y
+stetig.
+Beweis: Sei U
+⊆
+X offen
+⇒
+π−1
+X
+(U) = U
 ×
-Beweis: Sei U X offen
-π−1(U) =⊆ U Y ist offen in X Y. (cid:4)
-⇒ X × ×
+Y ist offen in X
+×
+Y. (cid:4)
 Bemerkung 12
-Sei X ein topologischer Raum, eine Äquivalenzrelation auf X, X = X/ der Bahnenraum
+Sei X ein topologischer Raum,
 ∼
+eine Äquivalenzrelation auf X, X = X/
 ∼
-versehen mit der Quotiententopologie, π : X X, x [x] .
+der Bahnenraum
+versehen mit der Quotiententopologie, π : X
+→
+X, x
+(cid:55)→
+[x]
 ∼
-→ (cid:55)→
+.
 Dann ist π stetig.
 12 1.4.ZUSAMMENHANG
-Beweis: Nach Definition ist U X offen π−1(U) X offen. (cid:4)
-⊆ ⇔ ⊆
+Beweis: Nach Definition ist U
+⊆
+X offen
+⇔
+π−1(U)
+⊆
+X offen. (cid:4)
 Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird.
 Beispiel 14 (Stereographische Projektion)
-Rn und Sn N sind homöomorph für beliebiges N Sn. Es gilt:
-\{ } ∈
-Sn = (cid:8) x Rn+1 (cid:12) (cid:12) x = 1(cid:9)
-∈ (cid:107) (cid:107)
-(cid:40) (cid:12) (cid:12) n (cid:88)+1 (cid:41)
-= x Rn+1 (cid:12) x2 = 1
-(cid:12) i
+Rn und Sn
+\{
+N
+}
+sind homöomorph für beliebiges N
+∈
+Sn. Es gilt:
+Sn = (cid:8) x
+∈
+Rn+1 (cid:12) (cid:12)
+(cid:107)
+x
+(cid:107)
+= 1 (cid:9)
+=
+(cid:40)
+x
 ∈
+Rn+1
+(cid:12)
 (cid:12)
+(cid:12)
+(cid:12)
+(cid:12)
+n+1
+(cid:88)
 i=1
- 
+x2
+i
+= 1
+(cid:41)
+O. B. d. A. sei N =
+
+
+
+
+
 0
 .
-.
-..
-O. B. d. A. sei N = Die Gerade durch N und P schneidet die Ebene H in genau
- 
-0
-1
-einem Punkt Pˆ. P wird auf Pˆ abgebildet.
-f :Sn N Rn
-\{ } →
+.
+.
+0
+1
+
+
+
+
+
+. Die Gerade durch N und P schneidet die Ebene H in genau
+einem Punkt ˆ P. P wird auf ˆ P abgebildet.
+f :Sn
+\{
+N
+} →
+Rn
+P
+(cid:55)→
 genaueinPunkt
 (cid:122) (cid:125)(cid:124) (cid:123)
-P L H
+L
 P
-(cid:55)→ ∩
-  (cid:12) 
- x 1 (cid:12) 
- . (cid:12) 
-wobei Rn = H =   . .   ∈ Rn+1 (cid:12) (cid:12) x n+1 = 0 und L P die Gerade in Rn+1 durch N
- (cid:12) 
- x n+1 (cid:12) 
+∩
+H
+wobei Rn = H =
+
+
+
+
+
+
+ 
+x 1
+.
+. .
+x n+1
+
+  ∈ Rn+1
+(cid:12)
+(cid:12)
+(cid:12)
+(cid:12) (cid:12)
+(cid:12)
+(cid:12)
+x n+1 = 0
+
+
+
+
+
+und L P die Gerade in Rn+1 durch N
 und P ist.
- 
+Sei P =
+
+ 
 x
 1
 .
-Sei P =   . .  , so ist x n+1 < 1, also ist L P nicht parallel zu H. Also schneiden sich L P
+. .
 x
 n+1
-und H in genau einem Punkt Pˆ.
+
+  , so ist x n+1 < 1, also ist L P nicht parallel zu H. Also schneiden sich L P
+und H in genau einem Punkt ˆ P.
 Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
 1.4 Zusammenhang
 Definition 13
 a) EinRaumX heißtzusammenhängend,wenneskeineoffenen,nichtleerenTeilmengen
-U ,U von X gibt mit U U = und U U = X.
-1 2 1 2 1 2
-∩ ∅ ∪
-b) Eine Teilmenge Y X heißt zusammenhängend, wenn Y als topologischer Raum mit
+U
+1
+,U
+2
+von X gibt mit U
+1
+∩
+U
+2
+=
+∅
+und U
+1
+∪
+U
+2
+= X.
+b) Eine Teilmenge Y
 ⊆
+X heißt zusammenhängend, wenn Y als topologischer Raum mit
 der Teilraumtopologie zusammenhängend ist.
 13 1.4.ZUSAMMENHANG
+x
+y
 z
 N
-P
+ˆ P
 0
-y
-Pˆ
-x
+P
 Abbildung 1.6: Visualisierung der stereographischen Projektion
 Bemerkung 13
-X ist zusammenhängend Es gibt keine abgeschlossenen, nichtleeren Teilmengen A ,A
-1 2
+X ist zusammenhängend
 ⇔
-mit A A = und A A = X.
-1 2 1 2
-∩ ∅ ∪
+Es gibt keine abgeschlossenen, nichtleeren Teilmengen A
+1
+,A
+2
+mit A
+1
+∩
+A
+2
+=
+∅
+und A
+1
+∪
+A
+2
+= X.
 Beispiel 15 (Zusammenhang von Räumen)
-1) (Rn,T ) ist zusammenhängend, denn:
+1) (Rn,T
 Euklid
-Annahme: Rn = U ˙ U mit = U ,U T existieren.
-1 2 1 2 Euklid
-∪ ∅ (cid:54) ∈
-Sei x U ,y U und [x,y] die Strecke zwischen x und y. Sei V = [x,y]. Nun
-1 2
-∈ ∈
-betrachten wir V (cid:40) Rn als (metrischen) Teilraum mit der Teilraumtopologie T .
+) ist zusammenhängend, denn:
+Annahme: Rn = U
+1
+˙
+∪
+U
+2
+mit
+∅ (cid:54)
+= U
+1
+,U
+2
+∈
+T
+Euklid
+existieren.
+Sei x
+∈
+U
+1
+,y
+∈
+U
+2
+und [x,y] die Strecke zwischen x und y. Sei V = [x,y]. Nun
+betrachten wir V (cid:40) Rn als (metrischen) Teilraum mit der Teilraumtopologie T
 V
-Somit gilt U [x,y] T wegen der Definition der Teilraumtopologie.
-1 V
-∩ ∈
-Dann gibt es z [x,y] mit z ∂(U [x,y]), aber z / U z U . In jeder
-1 1 2
-∈ ∈ ∩ ∈ ⇒ ∈
-Umgebung von z liegt ein Punkt von U Widerspruch zu U offen.
-1 2
+.
+Somit gilt U
+1
+∩
+[x,y]
+∈
+T
+V
+wegen der Definition der Teilraumtopologie.
+Dann gibt es z
+∈
+[x,y] mit z
+∈
+∂(U
+1
+∩
+[x,y]), aber z /
+∈
+U
+1
+⇒
+z
+∈
+U
+2
+. In jeder
+Umgebung von z liegt ein Punkt von U
+1
 ⇒
-2) R 0 ist nicht zusammenhängend, denn R 0 = R R
-<0 >0
-\{ } \{ } ∪
-3) R2 0 ist zusammenhängend.
-\{ }
-4) Q (cid:40) R ist nicht zusammenhängend, da (Q R √ ) (Q R √ ) = Q
-∩ < 2 ∪ ∩ > 2
-5) x ist zusammenhängend für jedes x X, wobei X ein topologischer Raum ist.
-{ } ∈
+Widerspruch zu U
+2
+offen.
+2) R
+\{
+0
+}
+ist nicht zusammenhängend, denn R
+\{
+0
+}
+= R
+<0
+∪
+R
+>0
+3) R2
+\{
+0
+}
+ist zusammenhängend.
+4) Q (cid:40) R ist nicht zusammenhängend, da (Q
+∩
+R
+<
+√
+2
+)
+∪
+(Q
+∩
+R
+>
+√
+2
+) = Q
+5)
+{
+x
+}
+ist zusammenhängend für jedes x
+∈
+X, wobei X ein topologischer Raum ist.
 6) R mit Zariski-Topologie ist zusammenhängend.
 Bemerkung 14
-Sei X ein topologischer Raum und A X zusammenhängend. Dann ist auch A zusammen-
+Sei X ein topologischer Raum und A
 ⊆
+X zusammenhängend. Dann ist auch A zusammen-
 hängend.
 14 1.4.ZUSAMMENHANG
 Beweis: durch Widerspruch
-Annahme: A = A A , A abgeschlossen, A = , A A =
-1 2 i i 1 2
-∪ (cid:54) ∅ ∩ ∅
-A = (A A ) ˙ (A A )
-1 2
-⇒ ∩ ∪ ∩
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-abgeschlossen abgeschlossen
+Annahme: A = A
+1
+∪
+A
+2
+, A
+i
+abgeschlossen, A
+i
+(cid:54)
+=
+∅
+, A
+1
+∩
+A
+2
+=
+∅
+⇒
+A = (A
+∩
+A
+1
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+abgeschlossen
+˙
+∪
+(A
+∩
+A
+2
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+abgeschlossen
 (cid:124) (cid:123)(cid:122) (cid:125)
 disjunkt
-Wäre A A =
+Wäre A
+∩
+A
 1
-∩ ∅
-A A = A ˙ A
-1 2
-⇒ ⊆ ∪
-A A A A
-2 2
-⇒ ⊆ ⇒ ⊆
-A =
+=
+∅
+⇒
+A
+⊆
+A = A
+1
+˙
+∪
+A
+2
+⇒
+A
+⊆
+A
+2
+⇒
+A
+⊆
+A
+2
+⇒
+A
 1
-⇒ ∅
-Widerspruch zu A =
+=
+∅
+⇒
+Widerspruch zu A
 1
-⇒ (cid:54) ∅
-A A = und analog A A =
-1 2
-⇒ ∩ (cid:54) ∅ ∩ (cid:54) ∅
-Widerspruch zu A ist zusammenhängend. (cid:4)
+(cid:54)
+=
+∅
+⇒
+A
+∩
+A
+1
+(cid:54)
+=
+∅
+und analog A
+∩
+A
+2
+(cid:54)
+=
+∅
 ⇒
+Widerspruch zu A ist zusammenhängend. (cid:4)
 Bemerkung 15
-Sei X ein topologischer Raum und A,B X zusammenhängend.
+Sei X ein topologischer Raum und A,B
 ⊆
-Ist A B = , dann ist A B zusammenhängend.
-∩ (cid:54) ∅ ∪
-Beweis: Sei A B = U ˙ U ,U = offen
-1 2 i
-∪ ∪ (cid:54) ∅
-o.B.d.A.
-====== A = (A U ) ˙ (A U ) offen
-1 2
-⇒ ∩ ∪ ∩
-Azhgd.
-==== A U =
-1
-⇒ ∩ ∅
-A∩B(cid:54)=∅
-==== U B
+X zusammenhängend.
+Ist A
+∩
+B
+(cid:54)
+=
+∅
+, dann ist A
+∪
+B zusammenhängend.
+Beweis: Sei A
+∪
+B = U
 1
-⇒ ⊆
-B = (B U ) (B U ) ist unerlaubte Zerlegung.
-1 2
-∩ ∪ ∩
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-=U1 =∅
-(cid:4)
-Definition 14
-Sei X ein topologischer Raum.
-Für x X sei Z(x) X definiert durch
-∈ ⊆
-(cid:91)
-Z(x) := A
-A⊆Xzhgd.
-x∈A
+˙
+∪
+U
+2
+,U
+i
+(cid:54)
+=
+∅
+offen
+o.B.d.A.
+======
+⇒
+A = (A
+∩
+U
+1
+) ˙
+∪
+(A
+∩
+U
+2
+) offen
+Azhgd.
+====
+⇒
+A
+∩
+U
+1
+=
+∅
+A∩B(cid:54)=∅
+====
+⇒
+U
+1
+⊆
+B
+B = (B
+∩
+U
+1
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=U1
+∪
+(B
+∩
+U
+2
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=∅
+ist unerlaubte Zerlegung.
+(cid:4)
+Definition 14
+Sei X ein topologischer Raum.
+Für x
+∈
+X sei Z(x)
+⊆
+X definiert durch
+Z(x) :=
+(cid:91)
+A⊆Xzhgd.
+x∈A
+A
 Z(x) heißt Zusammenhangskomponente.
 Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten)
 Sei X ein topologischer Raum. Dann gilt:
@@ -742,355 +1740,886 @@ b) Z(x) ist abgeschlossen.
 c) X ist disjunkte Vereinigung von Zusammenhangskomponenten.
 Beweis:
 15 1.5.KOMPAKTHEIT
-a) Sei Z(x) = A ˙ A mit A = abgeschlossen.
-1 2 i
-∪ (cid:54) ∅
-O. B. d. A. sei x A und y A . y liegt in einer zusammehängenden Teilmenge A,
-1 2
-∈ ∈
-die auch x enthält. A = (A A ) (A A ) ist unerlaubte Zerlegung.
-1 2
-⇒ ∩ ∪ ∩
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-(cid:51)x (cid:51)y
-b) Nach Bemerkung 14 ist Z(x) zusammenhängend Z(x) Z(x) Z(x) = Z(x)
-⇒ ⊆ ⇒
+a) Sei Z(x) = A
+1
+˙
+∪
+A
+2
+mit A
+i
+(cid:54)
+=
+∅
+abgeschlossen.
+O. B. d. A. sei x
+∈
+A
+1
+und y
+∈
+A
+2
+. y liegt in einer zusammehängenden Teilmenge A,
+die auch x enthält.
+⇒
+A = (A
+∩
+A
+1
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+(cid:51)x
+∪
+(A
+∩
+A
+2
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+(cid:51)y
+ist unerlaubte Zerlegung.
+b) Nach Bemerkung 14 ist Z(x) zusammenhängend
+⇒
+Z(x)
+⊆
+Z(x)
+⇒
+Z(x) = Z(x)
+c) Ist Z(y)
+∩
+Z(x)
+(cid:54)
+=
+∅
 Bem. 15
-c) Ist Z(y) Z(x) = ===== Z(y) Z(x) ist zusammenhängend.
-∩ (cid:54) ∅ ⇒ ∪
-Z(x) Z(y) Z(x) Z(y) Z(x)
-⇒ ∪ ⊆ ⇒ ⊆
-Z(y) Z(x) Z(y)
-⊆ ⇒ ⊆
+=====
+⇒
+Z(y)
+∪
+Z(x) ist zusammenhängend.
+⇒
+Z(x)
+∪
+Z(y)
+⊆
+Z(x)
+⇒
+Z(y)
+⊆
+Z(x)
+⊆
+Z(y)
+⇒
+Z(x)
+⊆
+Z(y)
 (cid:4)
 Bemerkung 17
-Sei f : X Y stetig. Ist A X zusammenhängend, so ist f(A) Y zusammenhängend.
-→ ⊆ ⊆
-Beweis: Sei f(A) = U U ,U = , offen, disjunkt.
-1 2 i
-∪ (cid:54) ∅
-f−1(f(A)) = f−1(U ) f−1(U )
-1 2
-⇒ ∪
-A = (A f−1(U )) (A f−1(U )) (cid:4)
-1 2
-⇒ ∩ ∪ ∩
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-(cid:54)=∅ (cid:54)=∅
+Sei f : X
+→
+Y stetig. Ist A
+⊆
+X zusammenhängend, so ist f(A)
+⊆
+Y zusammenhängend.
+Beweis: Sei f(A) = U
+1
+∪
+U
+2
+,U
+i
+(cid:54)
+=
+∅
+, offen, disjunkt.
+⇒
+f−1(f(A)) = f−1(U
+1
+)
+∪
+f−1(U
+2
+)
+⇒
+A = (A
+∩
+f−1(U
+1
+))
+(cid:124) (cid:123)(cid:122) (cid:125)
+(cid:54)=∅
+∪
+(A
+∩
+f−1(U
+2
+))
+(cid:124) (cid:123)(cid:122) (cid:125)
+(cid:54)=∅
+(cid:4)
 1.5 Kompaktheit
 Definition 15
-Sei X eine Menge und U (X).
+Sei X eine Menge und U
 ⊆ P
+(X).
 U heißt eine Überdeckung von X, wenn gilt:
-x X : M U : x M
-∀ ∈ ∃ ∈ ∈
+∀
+x
+∈
+X :
+∃
+M
+∈
+U : x
+∈
+M
 Definition 16
 Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X
-U = U mit U offen in X
-{ i }i∈I i
+U =
+{
+U
+i }i∈I
+mit U
+i
+offen in X
 eine endliche Teilüberdeckung
 (cid:91)
-U = X mit J N
-i
-| | ∈
 i∈J⊆I
+U
+i
+= X mit
+|
+J
+| ∈
+N
 besitzt.
 Bemerkung 18
 Das Einheitsintervall I := [0,1] ist kompakt bezüglich der euklidischen Topologie.
-Beweis: Sei (U ) eine offene Überdeckung von I.
-i i∈J
+Beweis: Sei (U
+i
+)
+i∈J
+eine offene Überdeckung von I.
 Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in
-einem der U enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle
+einem der U
 i
+enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle
 16 1.5.KOMPAKTHEIT
-der Länge δ unterteilen und alle U in die endliche Überdeckung aufnehmen, die Teilintervalle
+der Länge δ unterteilen und alle U
 i
+in die endliche Überdeckung aufnehmen, die Teilintervalle
 enthalten.
-Angenommen, es gibt kein solches δ. Dann gibt es für jedes n N ein Intervall I [0,1]
-n
-∈ ⊆
-der Länge 1/n sodass I (cid:40) U für alle i J.
-n i
-∈
-Sei x der Mittelpunkt von I . Die Folge (x ) hat einen Häufungspunkt x [0,1]. Dann
-n n n
-∈
-gibt es i J mit x U . Da U offen ist, gibt es ein ε > 0, sodass (x ε,x+ε) U .
-i i i
-∈ ∈ − ⊆
-Dann gibt es n 0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n n : x x < ε/2, also
-0 n
-≥ | − |
-I (x ε,x+ε) U für mindestens ein n N.4
-n i
-⊆ − ⊆ ∈
-Widerspruch
+Angenommen, es gibt kein solches δ. Dann gibt es für jedes n
+∈
+N ein Intervall I
+n
+⊆
+[0,1]
+der Länge 1/n sodass I
+n
+(cid:40) U
+i
+für alle i
+∈
+J.
+Sei x
+n
+der Mittelpunkt von I
+n
+. Die Folge (x
+n
+) hat einen Häufungspunkt x
+∈
+[0,1]. Dann
+gibt es i
+∈
+J mit x
+∈
+U
+i
+. Da U
+i
+offen ist, gibt es ein ε > 0, sodass (x
+−
+ε,x+ε)
+⊆
+U
+i
+.
+Dann gibt es n
+0
+, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n
+≥
+n
+0
+:
+|
+x
+−
+x
+n
+|
+< ε/2, also
+I
+n
+⊆
+(x
+−
+ε,x+ε)
+⊆
+U
+i
+für mindestens ein n
+∈
+N.4
 ⇒
-Dann überdecke [0,1] mit endlich vielen Intervallen I ,...,I der Länge δ. Jedes I ist in
-1 d j
-U enthalten.
+Widerspruch
+Dann überdecke [0,1] mit endlich vielen Intervallen I
+1
+,...,I
+d
+der Länge δ. Jedes I
+j
+ist in
+U
 ij
-U ,...,U ist endliche Teilüberdeckung von U. (cid:4)
-j1 j
-⇒ d
+enthalten.
+⇒
+U
+j1
+,...,U
+j
+d
+ist endliche Teilüberdeckung von U. (cid:4)
 Beispiel 16 (Kompakte Räume)
 1) R ist nicht kompakt.
 2) (0,1) ist nicht kompakt.
-(cid:83)
-U = (1/n,1 −1/n) n∈NU = (0,1)
-n n
+U
+n
+= (1/n,1
+−
+1/n)
 ⇒
+(cid:83)
+n∈N
+U
+n
+= (0,1)
 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch.
 Bemerkung 19
-Sei X kompakter Raum, A X abgeschlossen. Dann ist A kompakt.
+Sei X kompakter Raum, A
 ⊆
-Beweis: Sei (V ) offene Überdeckung von A.
-i i∈I
-Dann gibt es für jedes i I eine offene Teilmenge U X mit V = U A.
-i i i
-∈ ⊆ ∩
-(cid:91)
-A U
+X abgeschlossen. Dann ist A kompakt.
+Beweis: Sei (V
+i
+)
+i∈I
+offene Überdeckung von A.
+Dann gibt es für jedes i
+∈
+I eine offene Teilmenge U
 i
-⇒ ⊆
+⊆
+X mit V
+i
+= U
+i
+∩
+A.
+⇒
+A
+⊆
+(cid:91)
 i∈I
-U = U i I X A ist offene Überdeckung von X
+U
+i
+⇒
+U =
+{
+U
 i
-⇒ { | ∈ }∪{ \ }
+|
+i
+∈
+I
+}∪{
+X
+\
+A
+}
+ist offene Überdeckung von X
+X kompakt
+=======
+⇒
+es gibt i
+1
+,...,i
 n
-X kompakt (cid:91)
-======= es gibt i ,...,i I, sodass U (X A) = X
-1 n ij
-⇒ ∈ ∪ \
-j=1
- 
+∈
+I, sodass
 n
 (cid:91)
- U ij ∪(X \A) ∩A = A
+j=1
+U
+ij
+∪
+(X
+\
+A) = X
 ⇒
+
+
+n
+(cid:91)
 j=1
+U ij
+∪
+(X
+\
+A)
+
+
+∩
+A = A
+⇒
 n
 (cid:91)
-(U A) ((X A) A) = A
+j=1
+(U
 ij
-⇒ ∩ ∪ \ ∩
-j=1(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-=Vij =∅
-V ,...,V überdecken A.
-i1 in
+∩
+A)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=Vij
+∪
+((X
+\
+A)
+∩
+A)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=∅
+= A
 ⇒
+V
+i1
+,...,V
+in
+überdecken A.
 (cid:4)
 Bemerkung 20
-Seien X,Y kompakte topologische Räume. Dann ist X Y mit der Produkttopologie
+Seien X,Y kompakte topologische Räume. Dann ist X
 ×
+Y mit der Produkttopologie
 kompakt.
-Beweis: Sei (W ) eine offene Überdeckung von X Y. Für jedes (x,y) X Y gibt es
-i i∈I
-× ∈ ×
-offene Teilmengen U von X und V von Y sowie ein i I, sodass U V W .
-x,y x,y x,y x,y i
-∈ × ⊆
-3Dies gilt nicht für alle n≥n , da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
+Beweis: Sei (W
+i
+)
+i∈I
+eine offene Überdeckung von X
+×
+Y. Für jedes (x,y)
+∈
+X
+×
+Y gibt es
+offene Teilmengen U
+x,y
+von X und V
+x,y
+von Y sowie ein i
+∈
+I, sodass U
+x,y
+×
+V
+x,y
+⊆
+W
+i
+.
+3Dies gilt nicht für alle n≥n
 0
+, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
 4Sogar für unendlich viele.
 17 1.5.KOMPAKTHEIT
-X
 W
 i
 x
-V y
-x,y
-Y
+y
 x
-U
+V
+x,y
+U
 x,y
+Y
+X
 Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen
-Die offenen Mengen U V für festes x und alle y Y überdecken x y. Da Y
-x0,y x0,y 0 0
-× ∈ { }×
+Die offenen Mengen U
+x0,y
+×
+V
+x0,y
+für festes x
+0
+und alle y
+∈
+Y überdecken
+{
+x
+0
+}×
+y. Da Y
+kompakt ist, ist auch
+{
+x
+0 }×
+Y kompakt. Also gibt es y
+1
+,...,y
+m(x0)
+mit
 (cid:83)m(x0)
-kompakt ist, ist auch x Y kompakt. Also gibt es y ,...,y mit U
-{ 0 }× 1 m(x0) i=1 x0,yi ×
-V x Y.
-x0,yi 0
-⊇ { }×
-(cid:84)m(x) (cid:83)n
-Sei U := U . Da X kompakt ist, gibt es x ,...,x X mit U = X
-x0 i=1 x0,yi 1 n ∈ j=1 xj
-(cid:83)k (cid:83)m(xj)(cid:0) (cid:1)
-U V X Y
-⇒ j=1 i=1 xj,yi × xj,yi ⊇ ×
+i=1
+U
+x0,yi ×
+V
+x0,yi
+⊇ {
+x
+0
+}×
+Y.
+Sei U
+x0
+:=
+(cid:84)m(x)
+i=1
+U
+x0,yi
+. Da X kompakt ist, gibt es x
+1
+,...,x
+n ∈
+X mit
+(cid:83)n
+j=1
+U
+xj
+= X
+⇒
+(cid:83)k
+j=1
+(cid:83)m(xj)
+i=1
+(cid:0)
+U
+xj,yi ×
+V
+xj,yi
+(cid:1)
 (cid:124) (cid:123)(cid:122) (cid:125)
 Eingrün-orangesKästchen
-(cid:83) (cid:83) W (x ,y ) = X Y (cid:4)
-⇒ j i i j i ×
+⊇
+X
+×
+Y
+⇒
+(cid:83)
+j
+(cid:83)
+i
+W
+i
+(x
+j
+,y
+i
+) = X
+×
+Y (cid:4)
 Bemerkung 21
-Sei X ein Hausdorffraum und K X kompakt. Dann ist K abgeschlossen.
+Sei X ein Hausdorffraum und K
 ⊆
+X kompakt. Dann ist K abgeschlossen.
 Beweis: z. Z.: Komplement ist offen
-Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y X K. Für jedes x K seien
-∈ \ ∈
-U bzw. V Umgebungen von x bzw. von y, sodass U V = .
-x y x y
-∩ ∅
+Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y
+∈
 X
-i
+\
+K. Für jedes x
+∈
+K seien
+U
+x
+bzw. V
+y
+Umgebungen von x bzw. von y, sodass U
 x
+∩
+V
+y
+=
+∅
+.
+X
+i
 K
+x
 y
+Da K kompakt ist, gibt es endlich viele x
+1
+,...,x
+n ∈
+K, sodass
 (cid:83)m
-Da K kompakt ist, gibt es endlich viele x ,...,x K, sodass U K.
-1 n ∈ i=1 xi ⊇
+i=1
+U
+xi ⊇
+K.
+Sei V :=
 n
 (cid:92)
-Sei V := V
-xi
 i=1
+V
+xi
 18 1.6.WEGEUNDKNOTEN
-(cid:32) n (cid:33)
+⇒
+V
+∩
+(cid:32) n
 (cid:91)
-V U =
-xi
-⇒ ∩ ∅
 i=1
-V K =
-⇒ ∩ ∅
-V ist Überdeckung von y, die ganz in X K enthalten ist.
-⇒ \
-X K ist offen
-⇒ \
+U
+xi
+(cid:33)
+=
+∅
+⇒
+V
+∩
+K =
+∅
+⇒
+V ist Überdeckung von y, die ganz in X
+\
+K enthalten ist.
+⇒
+X
+\
+K ist offen
 Damit ist K abgeschlossen. (cid:4)
 Bemerkung 22
-Seien X,Y topologische Räume, f : X Y stetig.
+Seien X,Y topologische Räume, f : X
 →
-Ist K X kompakt, so ist f(K) Y kompakt.
-⊆ ⊆
-Beweis: Sei (V ) offene Überdeckung von f(K)
-i i∈I
-=f =s =te =tig (f−1(V )) ist offene Überdeckung von K
-i i∈I
+Y stetig.
+Ist K
+⊆
+X kompakt, so ist f(K)
+⊆
+Y kompakt.
+Beweis: Sei (V
+i
+)
+i∈I
+offene Überdeckung von f(K)
+f stetig ====
 ⇒
-=K =o =m =p =akt es gibt i ,...,i , sodass f−1(V ),...,f−1(V ) Überdeckung von K ist.
-1 n i1 in
+(f−1(V
+i
+))
+i∈I
+ist offene Überdeckung von K
+Kompakt =====
 ⇒
-f(f−1(V )),...,f(f−1(V )) überdecken f(K).
-i1 in
+es gibt i
+1
+,...,i
+n
+, sodass f−1(V
+i1
+),...,f−1(V
+in
+) Überdeckung von K ist.
 ⇒
-Es gilt: f(f−1(V)) = V f(X) (cid:4)
+f(f−1(V
+i1
+)),...,f(f−1(V
+in
+)) überdecken f(K).
+Es gilt: f(f−1(V)) = V
 ∩
+f(X) (cid:4)
 Satz 1.1 (Heine-Borel)
 Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und
 abgeschlossen ist.
-Beweis: „ “: Sei K Rn (oder Cn) kompakt.
-⇒ ⊆
+Beweis: „
+⇒
+“: Sei K
+⊆
+Rn (oder Cn) kompakt.
 Da Rn und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset-
-zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden K ist
+zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden
 ⇒
+K ist
 beschränkt.
-„ “ Sei A Rn (oder Cn) beschränkt und abgeschlossen.
-⇐ ⊆
-Dann gibt es einen Würfel W = [ N,N] [ N,N] mit A W bzw. „Polyzylinder“
-− ×···× − ⊆
+„
+⇐
+“ Sei A
+⊆
+Rn (oder Cn) beschränkt und abgeschlossen.
+Dann gibt es einen Würfel W = [
+−
+N,N]
+×···×
+[
+−
+N,N]
 (cid:124) (cid:123)(cid:122) (cid:125)
 nmal
-Z = (z ,...,z ) Cn z N für i = 1,...,n
-1 n i
-{ ∈ | ≤ }
+mit A
+⊆
+W bzw. „Polyzylinder“
+Z =
+{
+(z
+1
+,...,z
+n
+)
+∈
+Cn
+|
+z
+i
+≤
+N für i = 1,...,n
+}
 Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch
 kompakt. Genauso ist Z kompakt, weil
-z C z 1
-{ ∈ (cid:107) | ≤ }
+{
+z
+∈
+C
+(cid:107)
+z
+| ≤
+1
+}
 homöomorph zu
-(cid:8) (x,y) R2 (cid:12) (cid:12) (x,y) 1(cid:9)
-∈ (cid:107) (cid:107) ≤
+(cid:8) (x,y)
+∈
+R2 (cid:12) (cid:12)
+(cid:107)
+(x,y)
+(cid:107) ≤
+1 (cid:9)
 ist. (cid:4)
 1.6 Wege und Knoten
 Definition 17
 Sei X ein topologischer Raum.
 19 1.6.WEGEUNDKNOTEN
-a) Ein Weg in X ist eine stetige Abbildung γ : [0,1] X.
+a) Ein Weg in X ist eine stetige Abbildung γ : [0,1]
 →
+X.
 b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt.
-c) γ heißt einfach, wenn γ injektiv ist.
-[0,1)
+c) γ heißt einfach, wenn γ
 |
+[0,1)
+injektiv ist.
 Beispiel 17
 Ist X diskret, so ist jeder Weg konstant, d. h. von der Form
-x [0,1] : γ(x) = c, c X
-∀ ∈ ∈
+∀
+x
+∈
+[0,1] : γ(x) = c, c
+∈
+X
 Denn γ([0,1]) ist zusammenhängend für jeden Weg γ.
 Definition 18
 Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten
-x,y X einen Weg γ : [0,1] X gibt mit γ(0) = x und γ(1) = y.
-∈ →
+x,y
+∈
+X einen Weg γ : [0,1]
+→
+X gibt mit γ(0) = x und γ(1) = y.
 Bemerkung 23
 Sei X ein topologischer Raum.
-a) X ist wegzusammenhängend X ist zusammenhängend
+a) X ist wegzusammenhängend
 ⇒
-b) X ist wegzusammenhängend X ist zusammenhängend
+X ist zusammenhängend
+b) X ist wegzusammenhängend
 (cid:54)⇐
+X ist zusammenhängend
 Beweis:
-a) Sei X ein wegzusammenhängender topologischer Raum, A ,A nichtleere, disjunkte,
-1 2
-abgeschlossene Teilmengen von X mit A A = X. Sei x A ,y A ,γ : [0,1] X
-1 2 1 2
-∪ ∈ ∈ →
+a) Sei X ein wegzusammenhängender topologischer Raum, A
+1
+,A
+2
+nichtleere, disjunkte,
+abgeschlossene Teilmengen von X mit A
+1
+∪
+A
+2
+= X. Sei x
+∈
+A
+1
+,y
+∈
+A
+2
+,γ : [0,1]
+→
+X
 ein Weg von x nach y.
-Dann ist C := γ([0,1]) X zusammenhängend, weil γ stetig ist.
+Dann ist C := γ([0,1])
 ⊆
-C = (C A ) (C A )
-1 2
-∩ ∪ ∩
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-(cid:51)x (cid:51)y
-ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen Widerspruch
+X zusammenhängend, weil γ stetig ist.
+C = (C
+∩
+A
+1
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+(cid:51)x
+∪
+(C
+∩
+A
+2
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+(cid:51)y
+ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen
 ⇒
-(cid:110) (cid:12) (cid:111)
-b) Sei X = (x,y) R2 (cid:12) x2+y2 = 1 y = 1+2 e− 11 0x .
+Widerspruch
+b) Sei X =
+(cid:110)
+(x,y)
+∈
+R2
 (cid:12)
-∈ ∨ ·
+(cid:12) (cid:12) x2+y2 = 1
+∨
+y = 1+2
+·
+e− 1 10 x
+(cid:111)
+.
 Abbildung 1.8a veranschaulicht diesen Raum.
-Sei U U = X,U = U = ,U offen. X = C S. Dann ist C U oder C U ,
-1 2 1 2 i 1 2
-∪ (cid:54) ∅ ∪ ⊆ ⊆
+Sei U
+1
+∪
+U
+2
+= X,U
+1
+(cid:54)
+= U
+2
+=
+∅
+,U
+i
+offen. X = C
+∪
+S. Dann ist C
+⊆
+U
+1
+oder C
+⊆
+U
+2
+,
 weil C und S zusammenhängend sind.
-Also ist C = U und S = U (oder umgekehrt).
-1 2
-Sei y C = U ,ε > 0 und B (y) U eine Umgebung von y, die in U enthalten ist.
-1 ε 1 1
-∈ ⊆
-Aber: B (y) S = Widerspruch X S ist zusammenhängend, aber nicht
+Also ist C = U
+1
+und S = U
+2
+(oder umgekehrt).
+Sei y
+∈
+C = U
+1
+,ε > 0 und B
+ε
+(y)
+⊆
+U
+1
+eine Umgebung von y, die in U
+1
+enthalten ist.
+Aber: B
 ε
-∩ (cid:54) ∅ ⇒ ⇒ ∪
+(y)
+∩
+S
+(cid:54)
+=
+∅ ⇒
+Widerspruch
+⇒
+X
+∪
+S ist zusammenhängend, aber nicht
 wegzusammenhängend. (cid:4)
 Beispiel 18 (Hilbert-Kurve)
-Es gibt stetige, surjektive Abbildungen [0,1] [0,1] [0,1]. Ein Beispiel ist die in Abbil-
-→ ×
+Es gibt stetige, surjektive Abbildungen [0,1]
+→
+[0,1]
+×
+[0,1]. Ein Beispiel ist die in Abbil-
 dung 1.9 dargestellte Hilbert-Kurve.
 Definition 19
 Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ :
-[0,1] C X bzw. γ : S1 C X, wobei C := Bildγ.
-→ ⊆ → ⊆
+[0,1]
+→
+C
+⊆
+X bzw. γ : S1
+→
+C
+⊆
+X, wobei C := Bildγ.
 20 1.6.WEGEUNDKNOTEN
-1
-(x,sin(1)) X Y
-{ x ∈ × }
-( 1,1) Y
-− ⊆
-X
-0 Y
+(a) Spirale S mit Kreis C
 0.1 1
+−
+1
+0
 1
+X
+Y
+{
+(x,sin(1
+x
+))
+∈
+X
+×
+Y
+}
+(
 −
-(a) Spirale S mit Kreis C (b) Sinus
+1,1)
+⊆
+Y
+(b) Sinus
 Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
 sind.
 (a) n=1 (b) n=2 (c) n=3 (d) n=4 (e) n=5
 Abbildung 1.9: Hilbert-Kurve
 Jede Jordankurve ist also ein einfacher Weg.
 Satz 1.2 (Jordanscher Kurvensatz)
-Ist C = γ([0,1]) eine geschlossene Jordankurve in R2, so hat R2 C genau zwei
+Ist C = γ([0,1]) eine geschlossene Jordankurve in R2, so hat R2
 \
+C genau zwei
 Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt.
 außen
 innen
@@ -1105,48 +2634,98 @@ Idee: Ersetze Weg C durch Polygonzug.
 Definition 20
 Eine geschlossene Jordankurve in R3 heißt Knoten.
 Beispiel 19 (Knoten)
-(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6 -Knoten
+(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6
 2
+-Knoten
 Abbildung 1.11: Beispiele für verschiedene Knoten
 Definition 21
-Zwei Knoten γ ,γ : S1 R3 heißen äquivalent, wenn es eine stetige Abbildung
-1 2
+Zwei Knoten γ
+1
+,γ
+2
+: S1
 →
-H : S1 [0,1] R3
-× →
+R3 heißen äquivalent, wenn es eine stetige Abbildung
+H : S1
+×
+[0,1]
+→
+R3
 gibt mit
-H(z,0) = γ (z) z S1
+H(z,0) = γ
 1
-∀ ∈
-H(z,1) = γ (z) z S1
+(z)
+∀
+z
+∈
+S1
+H(z,1) = γ
 2
-∀ ∈
-und für jedes feste t [0,1] ist
+(z)
+∀
+z
+∈
+S1
+und für jedes feste t
 ∈
-H : S1 R3,z H(z,t)
+[0,1] ist
+H
 z
-→ (cid:55)→
-ein Knoten. Die Abbildung H heißt Isotopie zwischen γ und γ .
-1 2
+: S1
+→
+R3,z
+(cid:55)→
+H(z,t)
+ein Knoten. Die Abbildung H heißt Isotopie zwischen γ
+1
+und γ
+2
+.
 Definition 22
-Sei γ : [0,1] R3 ein Knoten, E eine Ebene und π : R3 E eine Projektion auf E.
-→ →
+Sei γ : [0,1]
+→
+R3 ein Knoten, E eine Ebene und π : R3
+→
+E eine Projektion auf E.
 π heißt Knotendiagramm von γ, wenn gilt:
-(cid:12) (cid:12)π−1(x)(cid:12)
-(cid:12) 2 x π(γ)
-≤ ∀ ∈
-Ist (π )−1(x) = y ,y , so liegt y über y , wenn gilt:
-γ([0,1]) 1 2 1 2
-| { }
-λ > 1 : (y x) = λ(y x)
-1 2
-∃ − −
+(cid:12) (cid:12)π−1(x) (cid:12) (cid:12)
+≤
+2
+∀
+x
+∈
+π(γ)
+Ist (π
+|
+γ([0,1])
+)−1(x) =
+{
+y
+1
+,y
+2
+}
+, so liegt y
+1
+über y
+2
+, wenn gilt:
+∃
+λ > 1 : (y
+1
+−
+x) = λ(y
+2
+−
+x)
 Satz 1.3 (Satz von Reidemeister)
 Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie
 durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können.
 22 1.6.WEGEUNDKNOTEN
-(a) Ω (b) Ω
-1 2
+(a) Ω
+1
+(b) Ω
+2
 (c) Ω
 3
 Abbildung 1.12: Reidemeister-Züge
@@ -1160,41 +2739,97 @@ Abbildung 1.13: Ein 3-gefärber Kleeblattknoten
 23 1.6.WEGEUNDKNOTEN
 Übungsaufgaben
 Aufgabe 1 (Sierpińskiraum)
-Es sei X := 0,1 und T := , 0 ,X . Dies ist der sogenannte Sierpińskiraum.
+Es sei X :=
+{
+0,1
+}
+und T
 X
-{ } {∅ { } }
-(a) Beweisen Sie, dass (X,T ) ein topologischer Raum ist.
+:=
+{∅
+,
+{
+0
+}
+,X
+}
+. Dies ist der sogenannte Sierpińskiraum.
+(a) Beweisen Sie, dass (X,T
 X
-(b) Ist (X,T ) hausdorffsch?
+) ein topologischer Raum ist.
+(b) Ist (X,T
 X
-(c) Ist T von einer Metrik erzeugt?
+) hausdorffsch?
+(c) Ist T
 X
+von einer Metrik erzeugt?
 Aufgabe 2
-Es sei Z mit der von den Mengen U := a+bZ(a Z,b Z 0 ) erzeugten Topologie
+Es sei Z mit der von den Mengen U
 a,b
-∈ ∈ \{ }
+:= a+bZ(a
+∈
+Z,b
+∈
+Z
+\{
+0
+}
+) erzeugten Topologie
 versehen.
 Zeigen Sie:
-(a) Jedes U und jede einelementige Teilmenge von Z ist abgeschlossen.
+(a) Jedes U
 a,b
-(b) 1,1 ist nicht offen.
-{− }
+und jede einelementige Teilmenge von Z ist abgeschlossen.
+(b)
+{−
+1,1
+}
+ist nicht offen.
 (c) Es gibt unendlich viele Primzahlen.
 Aufgabe 3 (Cantorsches Diskontinuum)
-Für jedes i N sei P := 0,1 mit der diskreten Topologie. Weiter Sei P := (cid:81) P .
-i i∈N i
-∈ { }
+Für jedes i
+∈
+N sei P
+i
+:=
+{
+0,1
+}
+mit der diskreten Topologie. Weiter Sei P := (cid:81)
+i∈N
+P
+i
+.
 (a) Wie sehen die offenen Mengen von P aus?
 (b) Was können Sie über den Zusammenhang von P sagen?
 Aufgabe 4 (Kompaktheit)
-(a) Ist GL (R) = A Rn×n det(A) = 0 kompakt?
+(a) Ist GL
 n
-{ ∈ | (cid:54) }
-(b) Ist SL (R) = A Rn×n det(A) = 1 kompakt?
+(R) =
+{
+A
+∈
+Rn×n
+|
+det(A)
+(cid:54)
+= 0
+}
+kompakt?
+(b) Ist SL
 n
-{ ∈ | }
-(c) Ist (R) kompakt?
+(R) =
+{
+A
+∈
+Rn×n
+|
+det(A) = 1
+}
+kompakt?
+(c) Ist
 P
+(R) kompakt?
 Aufgabe 5 (Begriffe)
 Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“.
 Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist,
@@ -1209,25 +2844,46 @@ Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie
 Simplizialkomplexe
 2.1 Topologische Mannigfaltigkeiten
 Definition 24
-Sei (X,T) ein topologischer Raum und n N.
+Sei (X,T) ein topologischer Raum und n
 ∈
-a) Eine n-dimensionale Karte auf X ist ein Paar (U,ϕ), wobei U T und ϕ : U V
-∈ →
-Homöomorphismus von U auf eine offene Teilmenge V Rn.
+N.
+a) Eine n-dimensionale Karte auf X ist ein Paar (U,ϕ), wobei U
+∈
+T und ϕ : U
+→
+V
+Homöomorphismus von U auf eine offene Teilmenge V
 ⊆
-b) Ein n-dimensionaler Atlas auf X ist eine Familie (U ,ϕ ) von Karten auf X,
-i i i∈I
-(cid:83) A
-sodass U = X.
-i∈I i
+Rn.
+b) Ein n-dimensionaler Atlas
+A
+auf X ist eine Familie (U
+i
+,ϕ
+i
+)
+i∈I
+von Karten auf X,
+sodass
+(cid:83)
+i∈I
+U
+i
+= X.
 c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist,
 eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt.
 Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn ähnlich.
 Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten)
-Jede n-dimensionale Mannigfaltigkeit mit n 1 ist mindestens so mächtig wie R.
+Jede n-dimensionale Mannigfaltigkeit mit n
 ≥
-Beweis: Sei (X,T) ein topologischer Raum und (U,ϕ) mit U T und ϕ : U V Rn, wobei
-∈ → ⊆
+1 ist mindestens so mächtig wie R.
+Beweis: Sei (X,T) ein topologischer Raum und (U,ϕ) mit U
+∈
+T und ϕ : U
+→
+V
+⊆
+Rn, wobei
 V offen und ϕ ein Homöomorphismus ist, eine Karte auf X.
 Da jede offene Teilmenge des Rn genauso mächtig ist wie der Rn, ϕ als Homöomorphismus
 insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig
@@ -1236,137 +2892,452 @@ hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn. (cid:4
 Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
 beliebig viele Elemente haben.
 Bemerkung 25
-a) Es gibt surjektive, stetige Abbildungen [0,1] [0,1] [0,1]
-→ ×
-b) Für n = m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz
+a) Es gibt surjektive, stetige Abbildungen [0,1]
+→
+[0,1]
+×
+[0,1]
+b) Für n
 (cid:54)
+= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz
 von der Gebietstreue“ (Brouwer):
-Ist U Rn offen und f : U Rn stetig und injektiv, so ist f(U) offen.
-⊆ →
+Ist U
+⊆
+Rn offen und f : U
+→
+Rn stetig und injektiv, so ist f(U) offen.
 Ist n < m und Rm homöomorph zu Rn, so wäre
-f : Rn Rm Rn, (x ,...,x ) (x ,x ,...,x ,0,...,0)
-1 n 1 2 n
-→ → (cid:55)→
-eine stetige injektive Abbildung. Also müsste f(Rn) offen sein Widerspruch
+f : Rn
+→
+Rm
+→
+Rn, (x
+1
+,...,x
+n
+)
+(cid:55)→
+(x
+1
+,x
+2
+,...,x
+n
+,0,...,0)
+eine stetige injektive Abbildung. Also müsste f(Rn) offen sein
 ⇒
+Widerspruch
 26 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN
 Beispiel 20 (Mannigfaltigkeiten)
-1) Jede offene Teilmenge U Rn ist eine n-dimensionale Mannigfaltigkeit mit einem
+1) Jede offene Teilmenge U
 ⊆
+Rn ist eine n-dimensionale Mannigfaltigkeit mit einem
 Atlas aus einer Karte.
 2) Cn ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
-(z ,...,z ) ( (z ), (z ),..., (z ), (z ))
-1 n 1 1 n n
-(cid:55)→ (cid:60) (cid:61) (cid:60) (cid:61)
-3) n(R) = (Rn+1 0 )/ = Sn/ und n(C) sind Mannigfaltigkeiten der Dimension
-∼ ∼
-P \{ } P
+(z
+1
+,...,z
+n
+)
+(cid:55)→
+(
+(cid:60)
+(z
+1
+),
+(cid:61)
+(z
+1
+),...,
+(cid:60)
+(z
+n
+),
+(cid:61)
+(z
+n
+))
+3)
+P
+n(R) = (Rn+1
+\{
+0
+}
+)/
+∼
+= Sn/
+∼
+und
+P
+n(C) sind Mannigfaltigkeiten der Dimension
 n bzw. 2n, da gilt:
-Sei U := (x : : x ) n(R) x = 0 i 0,...,n. Dann ist n(R) = (cid:83)n U
-i { 0 ··· n ∈ P | i (cid:54) } ∀ ∈ P i=0 i
+Sei U
+i
+:=
+{
+(x
+0
+:
+···
+: x
+n
+)
+∈ P
+n(R)
+|
+x
+i (cid:54)
+= 0
+} ∀
+i
+∈
+0,...,n. Dann ist
+P
+n(R) = (cid:83)n
+i=0
+U
+i
 und die Abbildung
-U Rn
-i
-→
-(cid:18) (cid:19)
-x 0 x (cid:1)(cid:1)i x n
-(x : : x ) ,..., ,...,
-0 n
-··· (cid:55)→ x (cid:1)x x
-i i i
-(y : : y : 1 : y : : y ) (y ,...,y )
-1 i−1 i n 1 n
-··· ··· → (cid:55)
+U
+i
+→
+Rn
+(x
+0
+:
+···
+: x
+n
+)
+(cid:55)→
+(cid:18)
+x 0
+x i
+,...,
+(cid:1)
+(cid:1) (cid:1) x i
+x i
+,..., x n
+x i
+(cid:19)
+(y
+1
+:
+···
+: y
+i−1
+: 1 : y
+i
+:
+···
+: y
+n
+)
+(cid:55) →
+(y
+1
+,...,y
+n
+)
 ist bijektiv.
-Die U mit i = 0,...,n bilden einen n-dimensionalen Atlas:
+Die U
 i
-x = (1 : 0 : 0) U R2 x (0,0)
+mit i = 0,...,n bilden einen n-dimensionalen Atlas:
+x = (1 : 0 : 0)
+∈
+U
 0
-∈ → (cid:55)→
-y = (0 : 1 : 1) U R2 y (0,1)
+→
+R2 x
+(cid:55)→
+(0,0)
+y = (0 : 1 : 1)
+∈
+U
 2
-∈ → (cid:55)→
-Umgebung: B (0,1) (1 : u : v) (u,v) < 1 = V
-1 1
-→ (cid:8){ |(cid:12)(cid:107) (cid:107) }(cid:9)
-Umgebung: B 1(0,1) (w : z : 1) (cid:12) w2+z2 < 1 = V 2
 →
-V V = ?
-1 2
-∩ ∅
-(a : b : c) V V
-1 2
-∈ ∩
-a = 0 und (b)2+(c)2 < 1 c < 1
-⇒ (cid:54) a a ⇒ a
-c = 0 und (a)2+(b)2 < 1 a < 1
-⇒ (cid:54) c c ⇒ c
-Widerspruch
+R2 y
+(cid:55)→
+(0,1)
+Umgebung: B
+1
+(0,1)
+→ {
+(1 : u : v)
+| (cid:107)
+(u,v)
+(cid:107)
+< 1
+}
+= V
+1
+Umgebung: B 1 (0,1)
+→
+(cid:8)
+(w : z : 1)
+(cid:12)
+(cid:12) w2+z2 < 1
+(cid:9)
+= V 2
+V
+1
+∩
+V
+2
+=
+∅
+?
+(a : b : c)
+∈
+V
+1
+∩
+V
+2
+⇒
+a
+(cid:54)
+= 0 und (b
+a
+)2+(c
+a
+)2 < 1
+⇒
+c
+a
+< 1
+⇒
+c
+(cid:54)
+= 0 und (a
+c
+)2+(b
+c
+)2 < 1
+⇒
+a
+c
+< 1
 ⇒
-4) Sn = (cid:8) x Rn+1 (cid:12) (cid:12) x = 1(cid:9) ist n-dimensionale Mannigfaltigkeit.
-∈ (cid:107) (cid:107)
+Widerspruch
+4) Sn = (cid:8) x
+∈
+Rn+1 (cid:12) (cid:12)
+(cid:107)
+x
+(cid:107)
+= 1 (cid:9) ist n-dimensionale Mannigfaltigkeit.
 Karten:
-D := (x ,...,x ) Sn x > 0 B (0,...,0)
-i 1 n+1 i 1
-{ ∈ | } → (cid:124) (cid:123)(cid:122) (cid:125)
+D
+i
+:=
+{
+(x
+1
+,...,x
+n+1
+)
+∈
+Sn
+|
+x
+i
+> 0
+} →
+B
+1
+(0,...,0
+(cid:124) (cid:123)(cid:122) (cid:125)
 ∈Rn
-C := (x ,...,x ) Sn x < 0 B (0,...,0)
-i 1 n+1 i 1
-{ ∈ | } →
-(x 1,...,x n+1) (x 1,...,(cid:26)x(cid:26) i,...,x n+1)1
-(cid:55)→ (cid:113) (cid:113)
-(x ,...,x ) (x ,...,x , 1 (cid:80)n x2,x ,...,x ), oder 1 (cid:80)n x2 für C
-1 n (cid:55)→ 1 i−1 − k=1 k i n − − k=1 k i
-Sn = (cid:83)n+1(C D )
-i=1 i ∪ i
+)
+C
+i
+:=
+{
+(x
+1
+,...,x
+n+1
+)
+∈
+Sn
+|
+x
+i
+< 0
+} →
+B
+1
+(0,...,0)
+(x
+1
+,...,x
+n+1
+)
+(cid:55)→
+(x
+1
+,...,(cid:26)(cid:26) x
+i
+,...,x
+n+1
+)1
+(x
+1
+,...,x
+n
+)
+(cid:55)→
+(x
+1
+,...,x
+i−1
+,
+(cid:113)
+1
+−
+(cid:80)n
+k=1
+x2
+k
+,x
+i
+,...,x
+n
+), oder
+−
+(cid:113)
+1
+−
+(cid:80)n
+k=1
+x2
+k
+für C
+i
+Sn = (cid:83)n+1
+i=1
+(C
+i ∪
+D
+i
+)
 Als kompakte Mannigfaltigkeit wird Sn auch „geschlossene Mannigfaltigkeit“ genannt.
 5) [0,1] ist keine Mannigfaltigkeit, denn:
 Es gibt keine Umgebung von 0 in [0,1], die homöomorph zu einem offenem Intervall
 ist.
-1x wird rausgenommen
+1x
 i
+wird rausgenommen
 27 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN
-6) V 1 = (cid:8) (x,y) R2 (cid:12) (cid:12) x y = 0(cid:9) ist keine Mannigfaltigkeit.
-∈ ·
+6) V 1 = (cid:8) (x,y)
+∈
+R2 (cid:12) (cid:12) x
+·
+y = 0 (cid:9) ist keine Mannigfaltigkeit.
 Das Problem ist (0,0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4
 Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen-
 hangskomponenten, wenn man einen Punkt entfernt.
-7) V 2 = (cid:8) (x,y) R2 (cid:12) (cid:12) x3 = y2(cid:9) ist eine Mannigfaltigkeit.
+7) V 2 = (cid:8) (x,y)
 ∈
-8) X = (R 0 ) (0 ,0 )
-1 2
-\{ } ∪
+R2 (cid:12) (cid:12) x3 = y2 (cid:9) ist eine Mannigfaltigkeit.
+8) X = (R
+\{
+0
+}
+)
+∪
+(0
+1
+,0
+2
+)
+U
+⊆
+X offen
+⇔
 (cid:40)
-U offen in R 0 , falls 0 / U,0 U
-1 2
-U X offen \{ } ∈ ∈
-⊆ ⇔ ε > 0 : ( ε,ε) U falls 0 U,0 U
-1 2
-∃ − ⊆ ∈ ∈
-Insbesondere sind (R 0 ) 0 und (R 0 ) 0 offen und homöomorph
-1 2
-\{ } ∪{ } \{ } ∪{ }
+U offen in R
+\{
+0
+}
+, falls 0
+1
+/
+∈
+U,0
+2
+∈
+U
+∃
+ε > 0 : (
+−
+ε,ε)
+⊆
+U falls 0
+1
+∈
+U,0
+2
+∈
+U
+Insbesondere sind (R
+\{
+0
+}
+)
+∪{
+0
+1
+}
+und (R
+\{
+0
+}
+)
+∪{
+0
+2
+}
+offen und homöomorph
 zu R.
 Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0
 1
-und 0 .
+und 0
 2
-9) GL (R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2
+.
+9) GL
 n
+(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2
 eine Mannigfaltigkeit bilden.
 Definition 25
-Seien X,Y n-dimensionale Mannigfaltigkeiten, U X und V Y offen, Φ : U V ein Ho-
-⊆ ⊆ →
-möomorphismus Z = (X ˙ Y)/ mit der von u Φ(u) u U erzeugten Äquivalenzrelation
+Seien X,Y n-dimensionale Mannigfaltigkeiten, U
+⊆
+X und V
+⊆
+Y offen, Φ : U
+→
+V ein Ho-
+möomorphismus Z = (X ˙
+∪
+Y)/
+∼
+mit der von u
 ∼
-∪ ∼ ∀ ∈
-und der von induzierten Quotiententopologie.
+Φ(u)
+∀
+u
+∈
+U erzeugten Äquivalenzrelation
+und der von
 ∼
+induzierten Quotiententopologie.
 Z heißtVerklebungvonX undY längsU undV.Z besitzteinenAtlasausn-dimensionalen
 Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit.
 Bemerkung 26
-Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X Y eine Mannigfaltigkeit
+Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X
 ×
+Y eine Mannigfaltigkeit
 der Dimension n+m.
 Beweis: Produkte von Karten sind Karten. (cid:4)
 Beispiel 21
@@ -1379,521 +3350,1158 @@ Mannigfaltigkeiten mit Dimension 2:
 3) T2 (1 Henkel)
 4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1
 Bemerkung 27
-Sei n N,F : Rn R stetig differenzierbar und X = V(F) := x Rn F(x) = 0 das
-∈ → { ∈ | }
+Sei n
+∈
+N,F : Rn
+→
+R stetig differenzierbar und X = V(F) :=
+{
+x
+∈
+Rn
+|
+F(x) = 0
+}
+das
 „vanishing set“.
 Dann gilt:
 28 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN
 Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus.
 a) X ist abgeschlossen in Rn
-b) Ist grad(F)(X) = 0 x X, so ist X eine Mannigfaltigkeit der Dimension n 1.
-(cid:54) ∀ ∈ −
+b) Ist grad(F)(X)
+(cid:54)
+= 0
+∀
+x
+∈
+X, so ist X eine Mannigfaltigkeit der Dimension n
+−
+1.
 Beweis:
-a) Sei y Rn V(F). Weil F stetig ist, gibt es δ > 0, sodass F(B (y)) B (F(y)) mit
-δ ε
-∈ \ ⊆
-ε = 1 F(y) . Folgt B (y) V(F) = Rn V(F) ist offen.
-2(cid:107) (cid:107) δ ∩ ∅ ⇒ \
-b) Sei x X mit grad(F)(x) = 0, also o. B. d. A. ∂F (x) = 0, x = (x ,...,x ),
-∈ (cid:54) ∂X1 (cid:54) 1 n
-x(cid:48) := (x ,...,x ) Rn−1. Der Satz von der impliziten Funktion liefert nun: Es
-2 n
-∈
-gibt Umgebungen U von x(cid:48) und differenzierbare Funktionen g : U R, sodass
-→
-G : U Rn, u (g(u),u) eine stetige Abbildung auf eine offene Umgebung V von x
-→ (cid:55)→
+a) Sei y
+∈
+Rn
+\
+V(F). Weil F stetig ist, gibt es δ > 0, sodass F(B
+δ
+(y))
+⊆
+B
+ε
+(F(y)) mit
+ε = 1
+2(cid:107)
+F(y)
+(cid:107)
+. Folgt B
+δ
+(y)
+∩
+V(F) =
+∅ ⇒
+Rn
+\
+V(F) ist offen.
+b) Sei x
+∈
+X mit grad(F)(x)
+(cid:54)
+= 0, also o. B. d. A. ∂F
+∂X1
+(x)
+(cid:54)
+= 0, x = (x
+1
+,...,x
+n
+),
+x(cid:48) := (x
+2
+,...,x
+n
+)
+∈
+Rn−1. Der Satz von der impliziten Funktion liefert nun: Es
+gibt Umgebungen U von x(cid:48) und differenzierbare Funktionen g : U
+→
+R, sodass
+G : U
+→
+Rn, u
+(cid:55)→
+(g(u),u) eine stetige Abbildung auf eine offene Umgebung V von x
 in X ist.
 (cid:4)
 Beispiel 22
-1) F : R3 R, (x,y,z) x2+y2+z2 1,V(F) = S2,grad(F) = (2x,2y,2z) =B =e =m =. =2 =7.b
-→ (cid:55)→ − ⇒
+1) F : R3
+→
+R, (x,y,z)
+(cid:55)→
+x2+y2+z2
+−
+1,V(F) = S2,grad(F) = (2x,2y,2z) Bem. 27.b ======
+⇒
 Sn ist n-dimensionale Mannigfaltigkeit in Rn+1
-2) F : R2 R, (x,y) y2 x3 Es gilt: grad(F) = ( 3x2,2y). Also: grad(0,0) = (0,0).
-→ (cid:55)→ − −
-y
-10
-100 5
-z
+2) F : R2
+→
+R, (x,y)
+(cid:55)→
+y2
+−
+x3 Es gilt: grad(F) = (
+−
+3x2,2y). Also: grad(0,0) = (0,0).
+− 5 − 4 − 3 − 2 − 1 0 1 2 3 4 5
+− 4
+− 2
+0 2 4
+− 100
 0
-x
-−100 2 4 6 8 10 12
-f(x,y) −4 5
-−2 − a= 31
 100
-−0 100 y 0 2 4 5 4 3 2 1 0 x −1 −2 −3 −4 −5 −10 a a= =1 2
-(a) F(x,y)=y2−x3 (b) y2−ax3 =0
+x y
+z
+− 100 0
+100
+f(x,y)
+(a) F(x,y)=y2−x3
+2 4 6 8 10 12
+− 10
+−
+5
+5
+10
+x
+y
+a= 1 3
+a=1 a=2
+(b) y2−ax3 =0
 Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a.
 DaheristBemerkung27.bnichtanwendbar,aberV(F)isttrotzdemeine1-dimensionale
 topologische Mannigfaltigkeit.
 29 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN
 Definition 26
 Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale
-Mannigfaltigkeit mit Rand, wenn es einen Atlas (U ,ϕ ) gibt, wobei U X offen und
-i i i i
+Mannigfaltigkeit mit Rand, wenn es einen Atlas (U
+i
+,ϕ
+i
+) gibt, wobei U
+i
 ⊆
-ϕ ein Homöomorphismus auf eine offene Teilmenge von
+X
+i
+offen und
+ϕ
 i
-Rn := (x ,...,x ) Rn x 0
-+,0 1 n n
-{ ∈ | ≥ }
+ein Homöomorphismus auf eine offene Teilmenge von
+Rn
++,0
+:=
+{
+(x
+1
+,...,x
+n
+)
+∈
+Rn
+|
+x
+n
+≥
+0
+}
 ist.
-Rn ist ein „Halbraum“.
+Rn
 +,0
+ist ein „Halbraum“.
 Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten.
 ∼
 =
 (a) Halbraum
 ∼
 =
+(b) Pair of pants
 ∼
 =
-(b) Pair of pants (c) Sphäre mit einem Loch
+(c) Sphäre mit einem Loch
 Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand
 Definition 27
-Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas . Dann heißt
+Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas
 A
+. Dann heißt
+∂X :=
 (cid:91)
-∂X := x U ϕ(x) = 0
-{ ∈ | }
 (U,ϕ)∈A
+{
+x
+∈
+U
+|
+ϕ(x) = 0
+}
 Rand von X.
-∂X ist eine Mannigfaltigkeit der Dimension n 1.
+∂X ist eine Mannigfaltigkeit der Dimension n
 −
+1.
 Definition 28
-Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U ,ϕ )
-i i i∈I
-Für i,j I mit U U = heißt
-i j
-∈ ∩ (cid:54) ∅
-ϕ := ϕ ϕ−1
-ij j ◦ i
-ϕ (U U ) ϕ (U U )
-i i j j i j
-∩ → ∩
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U
+i
+,ϕ
+i
+)
+i∈I
+Für i,j
+∈
+I mit U
+i
+∩
+U
+j
+(cid:54)
+=
+∅
+heißt
+ϕ
+ij
+:= ϕ
+j ◦
+ϕ−1
+i
+ϕ
+i
+(U
+i
+∩
+U
+j
+)
+→
+ϕ
+j
+(U
+i
+∩
+U
+j
+)
 Kartenwechsel oder Übergangsfunktion.
 30 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN
-X
+Rn Rn
 U i U j
-ϕ ϕ
-i j
 V i V j
-Rn Rn
+X
+ϕ
+i
+ϕ
+j
 Abbildung 2.4: Kartenwechsel
 2.2 Differenzierbare Mannigfaltigkeiten
 Definition 29
-Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U ,ϕ ) .
-i i i∈I
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U
+i
+,ϕ
+i
+)
+i∈I
+.
 a) X heißt differenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Karten-
-wechselabbildung ϕ , i,j I k-mal stetig differenzierbar ist.
+wechselabbildung ϕ
 ij
+, i,j
 ∈
+I k-mal stetig differenzierbar ist.
 b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig-
 faltigkeit der Klasse C∞ ist.
 Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt.
 Definition 30
-Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k N ) mit Atlas
-∈ ∪ {∞}
-= (U ,ϕ ) .
-i i i∈I
+Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k
+∈
+N
+∪ {∞}
+) mit Atlas
+A
+= (U
+i
+,ϕ
+i
+)
+i∈I
+.
+a) Eine Karte (U,ϕ) auf X heißt verträglich mit
 A
-a) Eine Karte (U,ϕ) auf X heißt verträglich mit , wenn alle Kartenwechsel ϕ ϕ−1
-A ◦ i
-und ϕ ϕ−1 (i I mit U U = ) differenzierbar von Klasse Ck sind.
-i i
-◦ ∈ ∩ (cid:54) ∅
-b) Die Menge aller mit verträglichen Karten auf X bildet einen maximalen Atlas der
+, wenn alle Kartenwechsel ϕ
+◦
+ϕ−1
+i
+und ϕ
+i
+◦
+ϕ−1 (i
+∈
+I mit U
+i
+∩
+U
+(cid:54)
+=
+∅
+) differenzierbar von Klasse Ck sind.
+b) Die Menge aller mit
 A
+verträglichen Karten auf X bildet einen maximalen Atlas der
 Klasse Ck. Er heißt Ck-Struktur auf X.
 Eine C∞-Struktur heißt auch differenzierbare Struktur auf X.
 Bemerkung 28
-Für n 4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten
+Für n
 ≥
+4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten
 „exotische Sphären“.
 Definition 31
-Seien X,Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x X.
+Seien X,Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x
 ∈
-a) Eine stetige Abbildung f : X Y heißt differenzierbar in x (von Klasse Ck), wenn
+X.
+a) Eine stetige Abbildung f : X
 →
-es Karten (U,ϕ) von X mit x U und (V,ψ) von Y mit f(U) V gibt, sodass
-∈ ⊆
-ψ f ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist.
-◦ ◦
-b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x X differenzierbar ist.
+Y heißt differenzierbar in x (von Klasse Ck), wenn
+es Karten (U,ϕ) von X mit x
+∈
+U und (V,ψ) von Y mit f(U)
+⊆
+V gibt, sodass
+ψ
+◦
+f
+◦
+ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist.
+b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x
 ∈
+X differenzierbar ist.
 c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine
-differenzierbare Abbildung g : Y X von Klasse C∞ gibt mit g f = id und
+differenzierbare Abbildung g : Y
+→
+X von Klasse C∞ gibt mit g
+◦
+f = id
 X
-→ ◦
-f g = id .
-Y
+und
+f
 ◦
+g = id
+Y
+.
 31 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN
 Bemerkung 29
 Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab.
-Beweis: Seien (U(cid:48),ϕ(cid:48)) und (V(cid:48),ψ(cid:48)) Karten von X bzw. Y um x bzw. f(x) mit f(U(cid:48)) V(cid:48).
-⊆
-ψ(cid:48) f (ϕ(cid:48))−1
-⇒ ◦ ◦
-= ψ(cid:48) (ψ−1 ψ) f (ϕ−1 ϕ) (ϕ(cid:48))−1
-◦ ◦ ◦ ◦ ◦ ◦
-ist genau dann differenzierbar, wenn ψ f ϕ−1 differenzierbar ist.
-◦ ◦
+Beweis: Seien (U(cid:48),ϕ(cid:48)) und (V(cid:48),ψ(cid:48)) Karten von X bzw. Y um x bzw. f(x) mit f(U(cid:48))
+⊆
+V(cid:48).
+⇒
+ψ(cid:48)
+◦
+f
+◦
+(ϕ(cid:48))−1
+= ψ(cid:48)
+◦
+(ψ−1
+◦
+ψ)
+◦
+f
+◦
+(ϕ−1
+◦
+ϕ)
+◦
+(ϕ(cid:48))−1
+ist genau dann differenzierbar, wenn ψ
+◦
+f
+◦
+ϕ−1 differenzierbar ist.
 Beispiel 23
-f : R R, x x3 istkeinDiffeomorphismus,aberHomöomorphismus,damitg(x) := √3 x
-→ (cid:55)→
-gilt: f g = idR, g f = idR
-◦ ◦
+f : R
+→
+R, x
+(cid:55)→
+x3 istkeinDiffeomorphismus,aberHomöomorphismus,damitg(x) := 3 √x
+gilt: f
+◦
+g = idR, g
+◦
+f = idR
 Bemerkung 30
 Sei X eine glatte Mannigfaltigkeit. Dann ist
-Diffeo(X) := f : X X f ist Diffeomorphismus
-{ → | }
+Diffeo(X) :=
+{
+f : X
+→
+X
+|
+f ist Diffeomorphismus
+}
 eine Untergruppe von Homöo(X).
 Definition 32
-S R3 heißt reguläre Fläche : s S Umgebung V(s) R3 U R2 offen:
-⊆ ⇔ ∀ ∈ ∃ ⊆ ∃ ⊆
-differenzierbare Abbildung F : U V S: Rg(J (u)) = 2 u U.
+S
+⊆
+R3 heißt reguläre Fläche :
+⇔ ∀
+s
+∈
+S
+∃
+Umgebung V(s)
+⊆
+R3
+∃
+U
+⊆
+R2 offen:
+∃
+differenzierbare Abbildung F : U
+→
+V
+∩
+S: Rg(J
 F
-∃ → ∩ ∀ ∈
+(u)) = 2
+∀
+u
+∈
+U.
 F heißt (lokale) reguläre Parametrisierung von S.
 F(u,v) = (x(u,v),y(u,v),z(u,v))
-∂x(p) ∂x(p)
-∂u ∂v
-J F(u,v) =  ∂∂ uy (p) ∂ ∂y v(p)
-∂z(p) ∂z(p)
-∂u ∂v
+J F (u,v) =
+
+
+∂x
+∂u
+(p) ∂x
+∂v
+(p)
+∂y ∂u (p) ∂y ∂v (p)
+∂z
+∂u
+(p) ∂z
+∂v
+(p)
+
+
 Beispiel 24
-1) Rotationsflächen: Sei r : R R eine differenzierbare Funktion.
+1) Rotationsflächen: Sei r : R
+→
+R
 >0
+eine differenzierbare Funktion.
+F : R2
 →
-F : R2 R3 (u,v) (r(u)cos(u),r(v)sin(u),v)
-→ (cid:55)→
- r(cid:48)(v)cosu
-r(v)sinu
+R3 (u,v)
+(cid:55)→
+(r(u)cos(u),r(v)sin(u),v)
+J F (u,v) =
+
+
 −
-J F(u,v) =  r(v)cosu r(cid:48)(v)sinu
+r(v)sinu r(cid:48)(v)cosu
+r(v)cosu r(cid:48)(v)sinu
 0 1
-hat Rang 2 für alle (u,v) R2.
+
+
+hat Rang 2 für alle (u,v)
 ∈
-2) Kugelkoordinaten: F : R2 R3,
+R2.
+2) Kugelkoordinaten: F : R2
 →
-(u,v) (Rcosvcosu,Rcosvsinu,Rsinv)
+R3,
+(u,v)
 (cid:55)→
-Es gilt: F(u,v) S2, denn
-∈ R
+(Rcosvcosu,Rcosvsinu,Rsinv)
+Es gilt: F(u,v)
+∈
+S2
+R
+, denn
 R2cos2(v)cos2(u)+R2cos2(v)sin2(u)+R2sin2(v)
 =R2(cos2(v)cos2(u)+cos2(v)sin2(u)+sin2(v))
-=R2(cid:0) cos2(v)(cos2(u)+sin2(u))+sin2(v)(cid:1)
-=R2(cid:0) cos2(v)+sin2(v)(cid:1)
+=R2(cid:0) cos2(v)(cos2(u)+sin2(u))+sin2(v) (cid:1)
+=R2(cid:0) cos2(v)+sin2(v) (cid:1)
 =R2
 32 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN
 N
+S
+v u
+(a) Kugelkoordinaten
+−
 1
-0.8
-u v
-0.6
+0
 1
-− 0 1 2
-1 0
+2
+−
+2 −
+1
+0
+1 2
+0.6
+0.8
 1
-S 2 2 −
+(b) Rotationskörper
+π
+2
+π 3π
+2
+2π
 −
-(a) Kugelkoordinaten (b) Rotationskörper
-y
 1
+−
+0.5
 0.5
+1
 x
-π π 3π 2π
-2 2
+y
 sinx
-0.5
 cosx
-−
-1
-−
 (c) Sinus und Kosinus haben keine gemeinsame Nullstelle
 33 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN
 Die Jacobi-Matrix
- 
-Rcosvsinu Rsinvcosu
-− −
-J F(u,v) =  Rcosvcosu Rsinvsinu
+J F (u,v) =
+
+
+−
+Rcosvsinu
+−
+Rsinvcosu
+Rcosvcosu
 −
+Rsinvsinu
 0 Rcosv
-hat Rang 2 für cosv = 0. In N und S ist cosv = 0.
+
+
+hat Rang 2 für cosv
 (cid:54)
+= 0. In N und S ist cosv = 0.
 Bemerkung 31
-Jede reguläre Fläche S R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
+Jede reguläre Fläche S
 ⊆
+R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
 Beweis:
-S R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
-⊆
-regulären Flächen folgt direkt, dass Karten (U ,F ) und (U R2,F : R2 R3) von S mit
-i i j j
-⊆ →
-U U = existieren, wobei F und F nach Definition differenzierbare Abbildungen sind.
-i j i j
-∩ (cid:54) ∅
-z.Z.: F−1 F ist ein Diffeomorphismus.
-j ◦ i
 S
-s
-F F
-i j
+⊆
+R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
+regulären Flächen folgt direkt, dass Karten (U
+i
+,F
+i
+) und (U
+j
+⊆
+R2,F
+j
+: R2
+→
+R3) von S mit
+U
+i
+∩
+U
+j
+(cid:54)
+=
+∅
+existieren, wobei F
+i
+und F
+j
+nach Definition differenzierbare Abbildungen sind.
+z.Z.: F−1
+j ◦
+F
+i
+ist ein Diffeomorphismus.
 U i U j
-F j−1◦Fi
+S
+s
+F
+i
+F
+j
+F−1
+j
+◦Fi
 Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31
-Idee: Finde differenzierbare Funktion F(cid:103)−1 in Umgebung W von s, sodass F(cid:103)−1 = F−1.
-j j |S∩W j
-Ausführung: Sei u U , v U mit F (u ) = s = F (v ).
-0 i 0 j i 0 j 0
-∈ ∈
-Da Rg(J (v )) = 2 ist, ist o. B. d. A.
-Fj 0
-(cid:18)∂x ∂x(cid:19)
-det ∂u ∂v (v ) = 0
-∂y ∂y 0
-(cid:54)
-∂u ∂v
-und F (u,v) = (x(u,v),y(u,v),z(u,v)).
+Idee: Finde differenzierbare Funktion (cid:103) F−1
 j
-Definiere F(cid:102)j : U R R3 durch
+in Umgebung W von s, sodass (cid:103) F−1
+j | S∩W
+= F−1
 j
-× →
-F(cid:102)j(u,v,t) := (x(u,v),y(u,v),z(u,v)+t)
-Offensichtlich: F(cid:102)j = F
-|Uj×{0} j
-∂x ∂x 0
-∂u ∂v
-J =  ∂∂ uy ∂ ∂y 0 detJ F(cid:102)j(v 0,0) (cid:54)= 0
-F(cid:102)j v ⇒
-∂z ∂z 1
-∂u ∂v
-AnalysisII
-====== Es gibt Umgebungen W von F von F(cid:102)j(v 0,0) = F j(v 0) = s, sodass F(cid:102)j auf W eine
+.
+Ausführung: Sei u
+0
+∈
+U
+i
+, v
+0
+∈
+U
+j
+mit F
+i
+(u
+0
+) = s = F
 j
-differen⇒ zierbar Inverse F−1 hat.
+(v
+0
+).
+Da Rg(J
+Fj
+(v
+0
+)) = 2 ist, ist o. B. d. A.
+det
+(cid:18)∂x
+∂u
+∂x
+∂v
+∂y
+∂u
+∂y
+∂v
+(cid:19)
+(v
+0
+)
+(cid:54)
+= 0
+und F
+j
+(u,v) = (x(u,v),y(u,v),z(u,v)).
+Definiere (cid:102) F j : U j
+×
+R
+→
+R3 durch
+(cid:102) F j (u,v,t) := (x(u,v),y(u,v),z(u,v)+t)
+Offensichtlich: (cid:102) F j
+|
+Uj×{0} = F j
+J (cid:102) Fj =
+
+
+∂x
+∂u
+∂x
+∂v
+0
+∂y ∂u ∂y ∂v 0
+∂z
+∂u
+∂z
+∂v
+1
+
+ ⇒ detJ (cid:102) Fj (v 0 ,0) (cid:54) = 0
+AnalysisII
+======
+⇒
+Es gibt Umgebungen W von F j von (cid:102) F j (v 0 ,0) = F j (v 0 ) = s, sodass (cid:102) F j auf W eine
+differenzierbar Inverse F−1
 j
+hat.
 34 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN
 Weiter gilt:
-F(cid:102)j−1
-= F j−1
-|W∩S |W∩S
-F−1 F = F−1 F
-⇒ j ◦ i |F i−1(W∩S) j ◦ i |F i−1(W∩S)
+(cid:102) F j
+−1
+| W∩S = F−1 j | W∩S
+⇒
+F−1
+j ◦
+F
+i |F−1 i (W∩S)
+= F−1
+j ◦
+F
+i |F−1 i (W∩S)
 ist differenzierbar.
 Definition 33
-Sei G eine Mannigfaltigkeit und (G, ) eine Gruppe.
+Sei G eine Mannigfaltigkeit und (G,
+◦
+) eine Gruppe.
+a) G heißt topologische Gruppe, wenn die Abbildungen
 ◦
-a) G heißt topologische Gruppe, wenn die Abbildungen : G G G und ι : G G
-◦ × → →
+: G
+×
+G
+→
+G und ι : G
+→
+G
 definiert durch
-g h := g h und ι(g) := g−1
-◦ ·
+g
+◦
+h := g
+·
+h und ι(g) := g−1
 stetig sind.
-b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ) und
+b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G,
 ◦
+) und
 (G,ι) differenzierbar sind.
 Beispiel 25 (Lie-Gruppen)
 1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen.
-2) GL (R)
+2) GL
 n
-3) (R×, )
+(R)
+3) (R×,
 ·
-4) (R , )
+)
+4) (R
 >0
+,
 ·
-5) (Rn,+), denn A B(i,j) = (cid:80)n a b ist nach allen Variablen differenzierbar
-· k=1 ik kj
+)
+5) (Rn,+), denn A
+·
+B(i,j) = (cid:80)n
+k=1
+a
+ik
+b
+kj
+ist nach allen Variablen differenzierbar
 (A−1)(i,j) = det(Aij)
 detA
- 
-a ... a
-i1 in
-A ij =   . . . ... . . .   ∈ R(n−1)×(n−1)
-a ... a
-n1 nn
+A ij =
+
+ 
+a
+i1
+... a
+in
+. . . ... . . .
+a
+n1
+... a
+nn
+
+  ∈ R(n−1)×(n−1)
 ist differenzierbar.
-detA kann 0 werden, da:
+detA
 ij
-(cid:18) (cid:19)
+kann 0 werden, da:
+(cid:18)
 1 1
-1 0
 −
-6) SL (R) = A GL (R) det(A) = 1
-n n
-{ ∈ | }
-Bemerkung 32
-Ist G eine Lie-Gruppe und g G, so ist die Abbildung
+1 0
+(cid:19)
+6) SL
+n
+(R) =
+{
+A
 ∈
-l : G G
-g
-→
-h g h
-(cid:55)→ ·
+GL
+n
+(R)
+|
+det(A) = 1
+}
+Bemerkung 32
+Ist G eine Lie-Gruppe und g
+∈
+G, so ist die Abbildung
+l
+g
+: G
+→
+G
+h
+(cid:55)→
+g
+·
+h
 ein Diffeomorphismus.
 35 2.3.SIMPLIZIALKOMPLEX
 2.3 Simplizialkomplex
 Definition 34
-Seien v ,...,v Rn Punkte.
-0 k
-∈
-a) v ,...,v sind in allgemeiner Lage
-0 k
-esgibtkeinen(k 1)-dimensionalenaffinenUntervektorraum,derv ,...,v enthält
-0 k
-⇔ −
-v v ,...,v v sind linear unabhängig.
-1 0 k 0
-⇔ − −
-(cid:110) (cid:12) (cid:111)
-b) conv(v ,...,v ) := (cid:80)k λ v (cid:12) λ 0,(cid:80)k λ = 1 heißt die konvexe Hülle von
-0 k i=0 i i (cid:12) i ≥ i=0 i
-v ,...,v .
-0 k
+Seien v
+0
+,...,v
+k
+∈
+Rn Punkte.
+a) v
+0
+,...,v
+k
+sind in allgemeiner Lage
+⇔
+esgibtkeinen(k
+−
+1)-dimensionalenaffinenUntervektorraum,derv
+0
+,...,v
+k
+enthält
+⇔
+v
+1
+−
+v
+0
+,...,v
+k
+−
+v
+0
+sind linear unabhängig.
+b) conv(v
+0
+,...,v
+k
+) :=
+(cid:110)
+(cid:80)k
+i=0
+λ
+i
+v
+i
+(cid:12)
+(cid:12)
+(cid:12)
+λ
+i ≥
+0, (cid:80)k
+i=0
+λ
+i
+= 1
+(cid:111)
+heißt die konvexe Hülle von
+v
+0
+,...,v
+k
+.
 Definition 35
-a) Sei ∆n = conv(e ,...,e ) Rn+1 die konvexe Hülle der Standard-Basisvektoren
-0 n
+a) Sei ∆n = conv(e
+0
+,...,e
+n
+)
 ⊆
-e ,...,e .
-0 n
+Rn+1 die konvexe Hülle der Standard-Basisvektoren
+e
+0
+,...,e
+n
+.
 Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex.
-b) FürPunktev ,...,v imRn inallgemeinerLageheißt∆(v ,...,v ) = conv(v ,...,v )
-0 k 0 k 0 k
+b) FürPunktev
+0
+,...,v
+k
+imRn inallgemeinerLageheißt∆(v
+0
+,...,v
+k
+) = conv(v
+0
+,...,v
+k
+)
 ein k-Simplex in Rn.
-c) Ist ∆(v ,...,v ) ein k-Simplex und I = i ,...,i 0,...,k , so ist s :=
-0 k 0 r i0,...,ir
-{ } ⊆ { }
-conv(v ,...,v ) ein r-Simplex und heißt Teilsimplex oder Seite von ∆.
-i0 ir
+c) Ist ∆(v
+0
+,...,v
+k
+) ein k-Simplex und I =
+{
+i
+0
+,...,i
+r
+} ⊆ {
+0,...,k
+}
+, so ist s
+i0,...,ir
+:=
+conv(v
+i0
+,...,v
+ir
+) ein r-Simplex und heißt Teilsimplex oder Seite von ∆.
 (a) 0-Simplex ∆0
-3 3
+1 2 3
+1
+2
+3
+e 0
 e
 1
+(b) 1-Simplex ∆1
+1 2 3
+1
+2
+3
+e 0
 e
 1
-2 2 e 2
+e 2
+(c) 2-Simplex ∆2
+e 0 e 1
 e
 2
-1 1
-e 0 e 0 e 3
-1 2 3 1 2 3 e 0 e 1
-(b) 1-Simplex ∆1 (c) 2-Simplex ∆2 (d) 3-Simplex ∆3
+e 3
+(d) 3-Simplex ∆3
 Abbildung 2.6: Beispiele für k-Simplexe
 Definition 36
 a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomplex,
 wenn gilt:
-(i) Für ∆ K und S ∆ Teilsimplex ist S K.
-∈ ⊆ ∈
-(ii) Für ∆ ,∆ K ist ∆ ∆ leer oder ein Teilsimplex von ∆ und von ∆ .
-1 2 1 2 1 2
-∈ ∩
+(i) Für ∆
+∈
+K und S
+⊆
+∆ Teilsimplex ist S
+∈
+K.
+(ii) Für ∆
+1
+,∆
+2
+∈
+K ist ∆
+1
+∩
+∆
+2
+leer oder ein Teilsimplex von ∆
+1
+und von ∆
+2
+.
+b)
+|
+K
+|
+:=
 (cid:83)
-b) K := ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K.
-| | ∆∈K
-c) Ist d = max k N K enthält k-Simplex , so heißt d die Dimension von K.
+∆∈K
+∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K.
+c) Ist d = max
+{
+k
+∈
+N
 0
-{ ∈ | }
+|
+K enthält k-Simplex
+}
+, so heißt d die Dimension von K.
 36 2.3.SIMPLIZIALKOMPLEX
-(a) 1D Simplizialkomplex(b) 2D Simplizialkomplex (c) 2D Simplizialkomplex
+(a) 1D Simplizialkomplex(b) 2D Simplizialkomplex
 (ohne untere Fläche!)
+(c) 2D Simplizialkomplex
 (d) 1D Simplizialkomplex (e) 2D Simplizialkomplex
-P P
-(f) P ist kein Teilsimplex, da Eigen- (g) Simplizialkomplex
+P
+(f) P ist kein Teilsimplex, da Eigen-
 schaft Punkt b.ii verletzt ist
+P
+(g) Simplizialkomplex
 Abbildung 2.7: Beispiele für Simplizialkomplexe
 Definition 37
 Seien K,L Simplizialkomplexe. Eine stetige Abbildung
-f : K L
-| | → | |
-heißt simplizial, wenn für jedes ∆ K gilt:
+f :
+|
+K
+| → |
+L
+|
+heißt simplizial, wenn für jedes ∆
 ∈
-a) f(∆) L
+K gilt:
+a) f(∆)
 ∈
-b) f : ∆ f(∆) ist eine affine Abbildung.
+L
+b) f
+|
 ∆
-| →
+: ∆
+→
+f(∆) ist eine affine Abbildung.
 Beispiel 26 (Simpliziale Abbildungen)
-1) ϕ(e ) := b , ϕ(e ) := b
-1 1 2 2
+1) ϕ(e
+1
+) := b
+1
+, ϕ(e
+2
+) := b
+2
 ϕ ist eine eindeutig bestimmte lineare Abbildung
 37 2.3.SIMPLIZIALKOMPLEX
-b
-2
+0 e 2
 e
 1
+0 b 1
+b
+2
 ϕ
-0 e 2 0 b 1
-2) Folgende Abbildung ϕ : ∆n ∆n−1 ist simplizial:
+2) Folgende Abbildung ϕ : ∆n
 →
+∆n−1 ist simplizial:
 ϕ
 3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8)
-b a a
-b b b b b a b c
-b bc bc b
-d M M M
-b b b b b b b b
-c d d b d
-b b b b
+M M
+a
+a
+a
+b
+b
+b
+c
+c
+c
+d
+d
+d
+M
 a
-b b b b b b b b
+b
+c
+d
+b b b
+b b b
+b b b
+b
+b
+b
+b
+b
+b
+b b
+b
+b b
+b b
+b b
+b
+b
+b
+b
 Abbildung 2.8: Abbildung eines Torus auf eine Sphäre
 Definition 38
-Sei K ein endlicher Simplizialkomplex. Für n 0 sei a (K) die Anzahl der n-Simplizes in
-n
+Sei K ein endlicher Simplizialkomplex. Für n
 ≥
+0 sei a
+n
+(K) die Anzahl der n-Simplizes in
 K.
 Dann heißt
+χ(K) :=
 dimK
 (cid:88)
-χ(K) := ( 1)na (K)
-n
-−
 n=0
+(
+−
+1)na
+n
+(K)
 Eulerzahl (oder Euler-Charakteristik) von K.
 Beispiel 27
-1) χ(∆1) = 2 1 = 1
+1) χ(∆1) = 2
+−
+1 = 1
+χ(∆2) = 3
+−
+3+1 = 1
+χ(∆3) = 4
 −
-χ(∆2) = 3 3+1 = 1
+6+4
 −
-χ(∆3) = 4 6+4 1 = 1
-− −
-2) χ(Oktaeder-Oberfläche) = 6 12+8 = 2
+1 = 1
+2) χ(Oktaeder-Oberfläche) = 6
 −
+12+8 = 2
 χ(Rand des Tetraeders) = 2
-χ(Ikosaeder) = 12 30+20 = 2
+χ(Ikosaeder) = 12
 −
-3) χ(Würfel) = 8 12+6 = 2
+30+20 = 2
+3) χ(Würfel) = 8
 −
-χ(Würfel, unterteilt in Dreiecksflächen) = 8 (12+6)+(6 2) = 2
-− ·
+12+6 = 2
+χ(Würfel, unterteilt in Dreiecksflächen) = 8
+−
+(12+6)+(6
+·
+2) = 2
 Bemerkung 33
-χ(∆n) = 1 für jedes n N
-0
+χ(∆n) = 1 für jedes n
 ∈
+N
+0
 38 2.3.SIMPLIZIALKOMPLEX
-Beweis: ∆n ist die konvexe Hülle von (e ,...,e ) in Rn+1. Jede (k+1)-elementige Teilmenge
-0 n
-von e ,...,e definiert ein k-Simplex.
-0 n
-a{ (∆n) = (cid:0)n+} 1(cid:1) , k = 0,...,n
-⇒ χk (∆n) = (cid:80)k n+1 ( 1)k(cid:0)n+1(cid:1)
-⇒ k=0 − k+1
+Beweis: ∆n ist die konvexe Hülle von (e
+0
+,...,e
+n
+) in Rn+1. Jede (k+1)-elementige Teilmenge
+von
+{
+e
+0
+,...,e
+n
+}
+definiert ein k-Simplex.
+⇒
+a
+k
+(∆n) = (cid:0)n+1
+k+1
+(cid:1) , k = 0,...,n
+⇒
+χ(∆n) = (cid:80)n
+k=0
+(
+−
+1)k(cid:0)n+1
+k+1
+(cid:1)
+f(x) = (x+1)n+1
 Binomischer
-f(x) = (x+1)n+1 Leh=rsatz (cid:80)n+1(cid:0)n+1(cid:1) xk
-k=0 k
-0 = (cid:80)n+1(cid:0)n+1(cid:1) ( 1)k = χ(∆n) 1
-⇒ k=0 k − −
-χ(∆n) = 1 (cid:4)
+Lehrsatz = (cid:80)n+1
+k=0
+(cid:0)n+1
+k
+(cid:1) xk
 ⇒
+0 = (cid:80)n+1
+k=0
+(cid:0)n+1
+k
+(cid:1) (
+−
+1)k = χ(∆n)
+−
+1
+⇒
+χ(∆n) = 1 (cid:4)
 Definition 39
 a) Ein 1D-Simplizialkomplex heißt Graph.
 b) Ein Graph, der homöomorph zu S1 ist, heißt Kreis.
 c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält.
-(a) Dies wird häufig auch als(b) PlanareEinbettungdesTe-
-Multigraph bezeichnet. traeders
-(c) K (d) K
-5 3,3
+(a) Dies wird häufig auch als
+Multigraph bezeichnet.
+(b) PlanareEinbettungdesTe-
+traeders
+(c) K
+5
+(d) K
+3,3
 Abbildung 2.9: Beispiele für Graphen
 Bemerkung 34
 Für jeden Baum T gilt χ(T) = 1.
@@ -1901,48 +4509,91 @@ Beweis: Induktion über die Anzahl der Ecken.
 Bemerkung 35
 a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ
 enthält.2
-b) Ist n = a (Γ) a (T), so ist χ(Γ) = 1 n.
-1 1
-− −
+b) Ist n = a
+1
+(Γ)
+−
+a
+1
+(T), so ist χ(Γ) = 1
+−
+n.
 Beweis:
 a) Siehe „Algorithmus von Kruskal“.
 2T wird „Spannbaum“ genannt.
 39 2.3.SIMPLIZIALKOMPLEX
-b) χ(Γ) = a (Γ) a (Γ)
-0 1
+b) χ(Γ) = a
+0
+(Γ)
 −
-= a (Γ) (n+a (T))
-0 1
+a
+1
+(Γ)
+= a
+0
+(Γ)
 −
-= a (T) a (T) n
-0 1
-− −
-= χ(T) n
+(n+a
+1
+(T))
+= a
+0
+(T)
+−
+a
+1
+(T)
+−
+n
+= χ(T)
 −
-= 1 n
+n
+= 1
 −
+n
 Bemerkung 36
-Sei ∆ ein n-Simplex und x ∆◦ Rn. Sei K der Simplizialkomplex, der aus ∆ durch
-∈ ⊆
+Sei ∆ ein n-Simplex und x
+∈
+∆◦
+⊆
+Rn. Sei K der Simplizialkomplex, der aus ∆ durch
 „Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1.
 (a) K (b) ∆, das aus K durch Unter-
 teilung entsteht
 Abbildung 2.10: Beispiel für Bemerkung 36.
-n (cid:18) (cid:19)
-(cid:88) n+1
-Beweis: χ(K) = χ(∆) ( 1)n + ( 1)k = χ(∆) (cid:4)
-− − − k
+Beweis: χ(K) = χ(∆)
+−
+(
+−
+1)n
 (cid:124) (cid:123)(cid:122) (cid:125)
-k=0
 n-Simplex
++
+n
+(cid:88)
+k=0
+(
+−
+1)k
+(cid:18)
+n+1
+k
+(cid:19)
 (cid:124) (cid:123)(cid:122) (cid:125)
 (1+(−1))n+1
+= χ(∆) (cid:4)
 Definition 40
 Sei X ein topologischer Raum, K ein Simplizialkomplex und
-h : K X
-| | →
-ein Homöomorphismus von der geometrischen Realisierung K auf X. Dann heißt h eine
-| |
+h :
+|
+K
+| →
+X
+ein Homöomorphismus von der geometrischen Realisierung
+|
+K
+|
+auf X. Dann heißt h eine
 Triangulierung von X.
 Beispiel 28 (Triangulierung des Torus)
 Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für
@@ -1951,2601 +4602,6973 @@ in Beispiel 28.
 Satz 2.1 (Eulersche Polyederformel)
 Sei P ein konvexes Polyeder in R3, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex,
 sodass gilt:
-x,y ∂P : [x,y] P
-∀ ∈ ⊆
+∀
+x,y
+∈
+∂P : [x,y]
+⊆
+P
 Dann ist χ(∂P) = 2.
 Beweis:
 1) Die Aussage ist richtig für den Tetraeder.
-2) O. B. d. A. sei 0 P und P B (0). Projeziere ∂P von 0 aus auf ∂B (0) = S2.
-1 1
-∈ ⊆
+2) O. B. d. A. sei 0
+∈
+P und P
+⊆
+B
+1
+(0). Projeziere ∂P von 0 aus auf ∂B
+1
+(0) = S2.
 Erhalte Triangulierung von S2.
 40 2.3.SIMPLIZIALKOMPLEX
-(a) DiebeidenmarkiertenDreieckeschneidensichim(b) DiebeidenmarkiertenDreieckeschneidensichim
-Mittelpunkt und in einer Seite. Mittelpunkt und außen.
+(a) DiebeidenmarkiertenDreieckeschneidensichim
+Mittelpunkt und in einer Seite.
+(b) DiebeidenmarkiertenDreieckeschneidensichim
+Mittelpunkt und außen.
 Abbildung 2.11: Fehlerhafte Triangulierungen
 (a) Einfache Triangulierung (b) Minimale Triangulierung
 Abbildung 2.12: Triangulierungen des Torus
 41 2.3.SIMPLIZIALKOMPLEX
-3) Sind P und P konvexe Polygone und T ,T die zugehörigen Triangulierungen von
-1 2 1 2
-S2, so gibt es eine Triangulierung T, die sowohl um T als auch um T Verfeinerung
-1 2
+3) Sind P
+1
+und P
+2
+konvexe Polygone und T
+1
+,T
+2
+die zugehörigen Triangulierungen von
+S2, so gibt es eine Triangulierung T, die sowohl um T
+1
+als auch um T
+2
+Verfeinerung
 ist (vgl. Abbildung 2.13).
 T
 1
 T
 2
 T
-Abbildung 2.13: T ist eine Triangulierung, die für T und T eine Verfeinerung ist.
-1 2
-NachBemerkung 36istχ(∂P ) = χ(T ) = χ(T) = χ(T ) = χ(∂P ) = 2,weilo.B.d.A.
-1 1 2 2
-P ein Tetraeder ist.
+Abbildung 2.13: T ist eine Triangulierung, die für T
+1
+und T
+2
+eine Verfeinerung ist.
+NachBemerkung 36istχ(∂P
+1
+) = χ(T
+1
+) = χ(T) = χ(T
+2
+) = χ(∂P
+2
+) = 2,weilo.B.d.A.
+P
 2
+ein Tetraeder ist.
 Bemerkung 37 (Der Rand vom Rand ist 0)
 Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V.
-Sei A die Menge der n-Simplizes in K, d. h.
+Sei A
 n
-A (K) := σ K dim(σ) = n für n = 0,...,d = dim(K)
+die Menge der n-Simplizes in K, d. h.
+A
 n
-{ ∈ | }
-und C (K) der R-Vektorraum mit Basis A (K), d. h.
-n n
- (cid:12) 
-(cid:12)
- (cid:88) (cid:12) R
-C n(K) = c σ σ (cid:12) c σ
-· (cid:12) ∈
- 
-σ∈An(K) (cid:12)
-Sei σ = ∆(x ,...,x ) A (K), sodass x < x < < x .
-0 n n 0 1 n
-∈ ···
-Für i = 0,...,n sei ∂ σ := ∆(x ,...,xˆ,...,x ) die i-te Seite von σ und d = d σ :=
-i 0 i n σ n
-(cid:80) ( 1)i∂ σ C (K) und d : C (K) C (K) die dadurch definierte lineare
-i=0 − i ∈ n−1 n n → n−1
-Abbildung.
-Dann gilt: d d = 0
-n−1 n
-◦
-c
-e e
-2 1
+(K) :=
+{
 σ
-a
-e b
-3
-Abbildung 2.14: Simplizialkomplex mit Totalordnung
-Beispiel 29
-Sei a < b < c. Dann gilt:
-d σ = e e +e
-2 1 2 3
-−
-d (e e +e ) = (c b) (c a)+(b a)
-1 1 2 3
-− − − − −
-42 2.3.SIMPLIZIALKOMPLEX
+∈
+K
+|
+dim(σ) = n
+}
+für n = 0,...,d = dim(K)
+und C
+n
+(K) der R-Vektorraum mit Basis A
+n
+(K), d. h.
+C n (K) =
+
+
+
+(cid:88)
+σ∈An(K)
+c σ
+·
+σ
+(cid:12)
+(cid:12)
+(cid:12) (cid:12)
+(cid:12)
+(cid:12)
+c σ
+∈
+R
+
+
+
+Sei σ = ∆(x
+0
+,...,x
+n
+)
+∈
+A
+n
+(K), sodass x
+0
+< x
+1
+<
+···
+< x
+n
+.
+Für i = 0,...,n sei ∂
+i
+σ := ∆(x
+0
+,..., ˆ x
+i
+,...,x
+n
+) die i-te Seite von σ und d
+σ
+= d
+n
+σ :=
+(cid:80)
+i=0
+(
+−
+1)i∂
+i
+σ
+∈
+C
+n−1
+(K) und d
+n
+: C
+n
+(K)
+→
+C
+n−1
+(K) die dadurch definierte lineare
+Abbildung.
+Dann gilt: d
+n−1
+◦
+d
+n
+= 0
+a
+b
+c
+σ
+e
+3
+e
+1
+e
+2
+Abbildung 2.14: Simplizialkomplex mit Totalordnung
+Beispiel 29
+Sei a < b < c. Dann gilt:
+d
+2
+σ = e
+1
+−
+e
+2
++e
+3
+d
+1
+(e
+1
+−
+e
+2
++e
+3
+) = (c
+−
+b)
+−
+(c
+−
+a)+(b
+−
+a)
+42 2.3.SIMPLIZIALKOMPLEX
 = 0
 Sei a < b < c < d. Dann gilt für Tetraeder:
-d (∆(a,b,c,d)) = ∆(b,c,d) ∆(a,c,d)+∆(a,b,d) ∆(a,b,c),wobei:
+d
 3
-− −
-d ( ∆(b,c,d)) = ∆(c,d) ∆(b,d)+∆(b,c)
+(∆(a,b,c,d)) = ∆(b,c,d)
+−
+∆(a,c,d)+∆(a,b,d)
+−
+∆(a,b,c),wobei:
+d
 2
+( ∆(b,c,d)) = ∆(c,d)
 −
-d ( ∆(a,c,d)) = ∆(c,d)+∆(a,d) ∆(a,c)
+∆(b,d)+∆(b,c)
+d
 2
-− − −
-d ( ∆(a,b,d)) = ∆(b,d) ∆(a,d)+∆(a,b)
+(
+−
+∆(a,c,d)) =
+−
+∆(c,d)+∆(a,d)
+−
+∆(a,c)
+d
 2
+( ∆(a,b,d)) = ∆(b,d)
 −
-d ( ∆(a,b,c)) = ∆(b,c)+∆(a,c) ∆(a,b)
+∆(a,d)+∆(a,b)
+d
 2
-− − −
-d (d (∆(a,b,c,d))) = 0
-2 3
+(
+−
+∆(a,b,c)) =
+−
+∆(b,c)+∆(a,c)
+−
+∆(a,b)
 ⇒
-Beweis: Sei σ A . Dann gilt:
-n
+d
+2
+(d
+3
+(∆(a,b,c,d))) = 0
+Beweis: Sei σ
 ∈
+A
+n
+. Dann gilt:
+d
+n−1
+(d
+n
+σ) = d
+n−1
+(
 n
 (cid:88)
-d (d σ) = d ( ( 1)i∂ σ)
-n−1 n n−1 i
-−
 i=0
+(
+−
+1)i∂
+i
+σ)
+=
 n
 (cid:88)
-= ( 1)id (∂ σ)
-n−1 i
+i=0
+(
 −
+1)id
+n−1
+(∂
+i
+σ)
+=
+n
+(cid:88)
 i=0
-n n−1
-(cid:88) (cid:88)
-= ( 1)i ∂ (∂ σ)( 1)j
-i j
-− −
-i=0 j=0
-(cid:88) (cid:88)
-= ( 1)i+j∂ (∂ (σ))+ ( 1)i+j∂ (∂ σ)
-j i i−1 j
-− −
-0≤i≤j≤n−1 0≤j<i≤n
+(
+−
+1)i
+n−1
+(cid:88)
+j=0
+∂
+i
+(∂
+j
+σ)(
+−
+1)j
+=
+(cid:88)
+0≤i≤j≤n−1
+(
+−
+1)i+j∂
+j
+(∂
+i
+(σ))+
+(cid:88)
+0≤j<i≤n
+(
+−
+1)i+j∂
+i−1
+(∂
+j
+σ)
 = 0
 weil jeder Summand aus der ersten Summe auch in der zweiten Summe vorkommt, aber mit
 umgekehrten Vorzeichen. (cid:4)
 Definition 41
-Sei K ein Simplizialkomplex, Z := Kern(d ) C und B := Bild(d ) C .
-n n n n n+1 n
-⊆ ⊆
-a) H = H (K,R) := Z /B heißt n-te Homologiegruppe von K.
-n n n n
-b) b n(K) := dimRH heißt n-te Betti-Zahl von K.
+Sei K ein Simplizialkomplex, Z
+n
+:= Kern(d
+n
+)
+⊆
+C
+n
+und B
+n
+:= Bild(d
+n+1
+)
+⊆
+C
+n
+.
+a) H
+n
+= H
+n
+(K,R) := Z
+n
+/B
+n
+heißt n-te Homologiegruppe von K.
+b) b
 n
+(K) := dimRH
+n
+heißt n-te Betti-Zahl von K.
 Bemerkung 38
-Nach Bemerkung 37 ist B Z , denn d (C) Kern(d ) für C C .
-n n n+1 n n+1
-⊆ ∈ ∈
+Nach Bemerkung 37 ist B
+n
+⊆
+Z
+n
+, denn d
+n+1
+(C)
+∈
+Kern(d
+n
+) für C
+∈
+C
+n+1
+.
 Satz 2.2
 Für jeden endlichen Simplizialkomplex K der Dimension d gilt:
-d d
-(cid:88) (cid:88)
-( 1)kb (K) = ( 1)ka (K) = χ(K)
-k k
-− −
-k=0 k=0
+d
+(cid:88)
+k=0
+(
+−
+1)kb
+k
+(K) =
+d
+(cid:88)
+k=0
+(
+−
+1)ka
+k
+(K) = χ(K)
 Bemerkung 39
-Es gilt nicht a = b k N .
-k k 0
-∀ ∈
+Es gilt nicht a
+k
+= b
+k
+∀
+k
+∈
+N
+0
+.
 43 2.3.SIMPLIZIALKOMPLEX
 Beweis:
-Dimensionsformel für d : a = dimZ +dimB für n 1
-n n n n−1
-• ≥
-Dimensionsformel für Z H = Z /B : dimZ = b +dimB
-n n n n n n n
-• →
-dimZ = b , da dimZ = b +dimB , wobei dimB = 0, da a = 0
-d d d d d d d+1
 •
-a dimB = b , da a dimB = a dimZ +b und a = dimZ , weil a 1 = 0
-0 0 0 0 0 0 0 0 0 0
-• − − − −
-d d
-(cid:88) (cid:88)
-( 1)ka = a + ( 1)k(dimZ +dimB )
-k 0 k k−1
-⇒ − −
-k=0 k=1
-d d−1
-(cid:88) (cid:88)
-= a + ( 1)kdimZ + ( 1)k+1dimB
-0 k k
-− −
-k=1 k=0
-d d−1
-(cid:88) (cid:88)
-= a + ( 1)kdimZ ( 1)kdimB
-0 k k
-− − −
-k=1 k=0
+Dimensionsformel für d
+n
+: a
+n
+= dimZ
+n
++dimB
+n−1
+für n
+≥
+1
+•
+Dimensionsformel für Z
+n
+→
+H
+n
+= Z
+n
+/B
+n
+: dimZ
+n
+= b
+n
++dimB
+n
+•
+dimZ
+d
+= b
+d
+, da dimZ
+d
+= b
+d
++dimB
+d
+, wobei dimB
+d
+= 0, da a
+d+1
+= 0
+•
+a
+0
+−
+dimB
+0
+= b
+0
+, da a
+0
+−
+dimB
+0
+= a
+0
+−
+dimZ
+0
++b
+0
+und a
+0
+= dimZ
+0
+, weil a
+−
+1 = 0
+⇒
+d
+(cid:88)
+k=0
+(
+−
+1)ka
+k
+= a
+0
++
+d
+(cid:88)
+k=1
+(
+−
+1)k(dimZ
+k
++dimB
+k−1
+)
+= a
+0
++
+d
+(cid:88)
+k=1
+(
+−
+1)kdimZ
+k
++
 d−1
 (cid:88)
-= a + ( 1)kb +( 1)ddimZ dimB
-0 k d 0
-− − (cid:124) (cid:123)(cid:122) (cid:125)−
-k=1 =b
+k=0
+(
+−
+1)k+1dimB
+k
+= a
+0
++
 d
+(cid:88)
+k=1
+(
+−
+1)kdimZ
+k
+−
+d−1
+(cid:88)
+k=0
+(
+−
+1)kdimB
+k
+= a
+0
++
 d−1
 (cid:88)
-= b + ( 1)kb +( 1)db
-0 k d
-− −
 k=1
+(
+−
+1)kb
+k
++(
+−
+1)ddimZ
+d
+(cid:124) (cid:123)(cid:122) (cid:125)
+=b
 d
+−
+dimB
+0
+= b
+0
++
+d−1
 (cid:88)
-= ( 1)kb
+k=1
+(
+−
+1)kb
 k
++(
 −
-k=0
-44 2.3.SIMPLIZIALKOMPLEX
+1)db
+d
+=
+d
+(cid:88)
+k=0
+(
+−
+1)kb
+k
+44 2.3.SIMPLIZIALKOMPLEX
 Übungsaufgaben
 Aufgabe 7 (Zusammenhang)
 (a) Beweisen Sie, dass eine topologische Mannigfaltigkeit genau dann wegzusammenhän-
 gend ist, wenn sie zusammenhängend ist
-(b) BetrachtenSienunwieinBeispiel20.8denRaumX := (R 0 ) 0 ,0 versehen
-1 2
-\{ } ∪{ }
+(b) BetrachtenSienunwieinBeispiel20.8denRaumX := (R
+\{
+0
+}
+)
+∪{
+0
+1
+,0
+2
+}
+versehen
 mit der dort definierten Topologie. Ist X wegzusammenhängend?
 3 Fundamentalgruppe und Überlagerungen
 3.1 Homotopie von Wegen
-γ γ
-1 1
-a b a b
-γ γ
-2 2
-(a) γ und γ sind homotop, (b) γ und γ sind wegen dem
-1 2 1 2
-da man sie „zueinander ver- Hindernis nicht homotop.
+a b
+γ
+1
+γ
+2
+(a) γ
+1
+und γ
+2
+sind homotop,
+da man sie „zueinander ver-
 schieben“ kann.
-Abbildung 3.1: Beispiele für Wege γ und γ
-1 2
+a b
+γ
+1
+γ
+2
+(b) γ
+1
+und γ
+2
+sind wegen dem
+Hindernis nicht homotop.
+Abbildung 3.1: Beispiele für Wege γ
+1
+und γ
+2
 Definition 42
-Sei X ein topologischer Raum, a,b X, γ ,γ : I X Wege von a nach b, d. h. γ (0) =
-1 2 1
-∈ →
-γ (0) = a, γ (1) = γ (1) = b
-2 1 2
-γ und γ heißen homotop, wenn es eine stetige Abbildung H : I I X mit
-1 2
-× →
-H(t,0) = γ (t) t I
+Sei X ein topologischer Raum, a,b
+∈
+X, γ
 1
-∀ ∈
-H(t,1) = γ (t) t I
+,γ
 2
-∀ ∈
-und H(0,s) = a und H(1,s) = b für alle s I gibt. Dann schreibt man: γ γ
-1 2
-∈ ∼
-H heißt Homotopie zwischen γ und γ .
-1 2
+: I
+→
+X Wege von a nach b, d. h. γ
+1
+(0) =
+γ
+2
+(0) = a, γ
+1
+(1) = γ
+2
+(1) = b
+γ
+1
+und γ
+2
+heißen homotop, wenn es eine stetige Abbildung H : I
+×
+I
+→
+X mit
+H(t,0) = γ
+1
+(t)
+∀
+t
+∈
+I
+H(t,1) = γ
+2
+(t)
+∀
+t
+∈
+I
+und H(0,s) = a und H(1,s) = b für alle s
+∈
+I gibt. Dann schreibt man: γ
+1
+∼
+γ
+2
+H heißt Homotopie zwischen γ
+1
+und γ
+2
+.
 Bemerkung 40
-Sei X ein topologischer Raum, a,b X, γ ,γ : I X Wege von a nach b und H eine
-1 2
-∈ →
-Homotopie zwischen γ und γ .
-1 2
+Sei X ein topologischer Raum, a,b
+∈
+X, γ
+1
+,γ
+2
+: I
+→
+X Wege von a nach b und H eine
+Homotopie zwischen γ
+1
+und γ
+2
+.
 Dann gilt: Der Weg
-γ : I X, γ (t) = H(t,s)
-s s
+γ
+s
+: I
 →
-ist Weg in X von a nach b für jedes s I.
+X, γ
+s
+(t) = H(t,s)
+ist Weg in X von a nach b für jedes s
 ∈
+I.
 Beweis: H ist stetig, also ist H(t,s) insbesondere für jedes feste s stetig. Da H(0,s) = a und
-H(1,s) = b für alle s I und γ eine Abbildung von I auf X ist, ist γ ein Weg in X von a
-s s
+H(1,s) = b für alle s
 ∈
-nach b für jedes s I. (cid:4)
+I und γ
+s
+eine Abbildung von I auf X ist, ist γ
+s
+ein Weg in X von a
+nach b für jedes s
 ∈
+I. (cid:4)
 Bemerkung 41
 Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege in X von a nach b
 definiert.
 Beweis:
 46 3.1.HOMOTOPIEVONWEGEN
-reflexiv: H(t,s) = γ(t) für alle (t,s) I I
-• ∈ ×
-symmetrisch: H(cid:48)(t,s) = H(t,1 s) für alle (t,s) I I
-• − ∈ ×
-transitiv: Seien H(cid:48) bzw. H(cid:48)(cid:48) Homotopien von γ nach γ bzw. von γ nach γ .
-1 2 2 3
 •
+reflexiv: H(t,s) = γ(t) für alle (t,s)
+∈
+I
+×
+I
+•
+symmetrisch: H(cid:48)(t,s) = H(t,1
+−
+s) für alle (t,s)
+∈
+I
+×
+I
+•
+transitiv: Seien H(cid:48) bzw. H(cid:48)(cid:48) Homotopien von γ
+1
+nach γ
+2
+bzw. von γ
+2
+nach γ
+3
+.
+Dann sei H(t,s) :=
 (cid:40)
-H(cid:48)(t,2s) falls 0 s 1
-Dann sei H(t,s) := ≤ ≤ 2
-H(cid:48)(cid:48)(t,2s 1) falls 1 s 1
-− 2 ≤ ≤
-H ist stetig und Homotopie von γ nach γ .
-1 3
+H(cid:48)(t,2s) falls 0
+≤
+s
+≤
+1
+2
+H(cid:48)(cid:48)(t,2s
+−
+1) falls 1
+2 ≤
+s
+≤
+1
 ⇒
+H ist stetig und Homotopie von γ
+1
+nach γ
+3
+.
 (cid:4)
 Beispiel 30
-1) Sei X = S1. γ und γ aus Abbildung 3.3a nicht homotop.
-1 2
-2) Sei X = T2. γ ,γ und γ aus Abbildung 3.3b sind paarweise nicht homotop.
-1 2 3
+1) Sei X = S1. γ
+1
+und γ
+2
+aus Abbildung 3.3a nicht homotop.
+2) Sei X = T2. γ
+1
+,γ
+2
+und γ
+3
+aus Abbildung 3.3b sind paarweise nicht homotop.
 3) Sei X = R2 und a = b = (0,0).
 Je zwei Wege im R2 mit Anfangs- und Endpunkt (0,0) sind homotop.
 Abbildung 3.2: Zwei Wege im R2 mit Anfangs- und Endpunkt (0,0)
-Sei γ : I R2 der konstante Weg γ (t) = (0,0) t I. Sei γ(0) = γ(1) = (0,0).
-0 0
-→ ∀ ∈
-H(t,s) := (1 s)γ(t) ist stetig, H(t,0) = γ(t) t I und H(t,1) = (0,0) t I.
-− ∀ ∈ ∀ ∈
+Sei γ
+0
+: I
+→
+R2 der konstante Weg γ
+0
+(t) = (0,0)
+∀
+t
+∈
+I. Sei γ(0) = γ(1) = (0,0).
+H(t,s) := (1
+−
+s)γ(t) ist stetig, H(t,0) = γ(t)
+∀
+t
+∈
+I und H(t,1) = (0,0)
+∀
+t
+∈
+I.
 Bemerkung 42
-Sei X ein topologischer Raum, γ : I X ein Weg und ϕ : I I stetig mit ϕ(0) = 0,
-→ →
-ϕ(1) = 1. Dann sind γ und γ ϕ homotop.
+Sei X ein topologischer Raum, γ : I
+→
+X ein Weg und ϕ : I
+→
+I stetig mit ϕ(0) = 0,
+ϕ(1) = 1. Dann sind γ und γ
 ◦
-Beweis: Sei H(t,s) = γ((1 s)t+s ϕ(t)).
-− ·
+ϕ homotop.
+Beweis: Sei H(t,s) = γ((1
+−
+s)t+s
+·
+ϕ(t)).
 Dann ist H stetig, H(t,0) = γ(t), H(t,1) = γ(ϕ(t)), H(0,s) = γ(0) und H(1,s) =
-γ(1 s+s) = γ(1)
+γ(1
 −
-H ist Homotopie. (cid:4)
+s+s) = γ(1)
 ⇒
+H ist Homotopie. (cid:4)
 47 3.1.HOMOTOPIEVONWEGEN
+a
 b
-b a
-γ γ
-2 1
+γ
+1
+γ
+2
+(a) Kreis mit zwei Wegen
 a
-(a) Kreis mit zwei Wegen (b) Torus mit drei Wegen
+b
+(b) Torus mit drei Wegen
 Abbildung 3.3: Beispiele für (nicht)-Homotopie von Wegen
 Definition 43
-Seien γ ,γ Wege in X mit γ (1) = γ (0). Dann ist
-1 2 1 2
+Seien γ
+1
+,γ
+2
+Wege in X mit γ
+1
+(1) = γ
+2
+(0). Dann ist
+γ(t) =
 (cid:40)
-γ (2t) falls 0 t < 1
-γ(t) = 1 ≤ 2
-γ (2t 1) falls 1 t 1
-2 − 2 ≤ ≤
-ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ = γ γ .
-1 2
+γ
+1
+(2t) falls 0
+≤
+t < 1
+2
+γ
+2
+(2t
+−
+1) falls 1
+2 ≤
+t
+≤
+1
+ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ = γ
+1
 ∗
+γ
+2
+.
 Bemerkung 43
 Das Zusammensetzen von Wegen ist nur bis auf Homotopie assoziativ, d. h.:
-γ (γ γ ) = (γ γ ) γ
-1 2 3 1 2 3
-∗ ∗ (cid:54) ∗ ∗
-γ (γ γ ) (γ γ ) γ
-1 2 3 1 2 3
-∗ ∗ ∼ ∗ ∗
-mit γ (1) = γ (0) und γ (1) = γ (0).
-1 2 2 3
-γ γ γ
-1 2 3
+γ
+1
+∗
+(γ
+2
+∗
+γ
+3
+)
+(cid:54)
+= (γ
+1
+∗
+γ
+2
+)
+∗
+γ
+3
+γ
+1
+∗
+(γ
+2
+∗
+γ
+3
+)
+∼
+(γ
+1
+∗
+γ
+2
+)
+∗
+γ
+3
+mit γ
+1
+(1) = γ
+2
+(0) und γ
+2
+(1) = γ
+3
+(0).
+γ
+1
+γ
+2
+γ
+3
 0 1/2 3/4 1
-(a) γ ∗(γ ∗γ )
-1 2 3
-γ γ γ
-1 2 3
+(a) γ
+1
+∗(γ
+2
+∗γ
+3
+)
+γ
+1
+γ
+2
+γ
+3
 0 1/4 1/2 1
-(b) (γ ∗γ )∗γ
-1 2 3
+(b) (γ
+1
+∗γ
+2
+)∗γ
+3
 Abbildung 3.4: Das Zusammensetzen von Wegen ist nicht assoziativ
 Beweis: DasZusammensetzenvonWegenistwegenBemerkung 42bisaufHomotopieassoziativ.
 Verwende dazu
+ϕ(t) =
 
-1t falls 0 t < 1
- 2 ≤ 2
-ϕ(t) = t 1 falls 1 t < 3
- − 4 2 ≤ 4
-2t 1 falls 3 t 1
-− 4 ≤ ≤
+ 
+
+
+1
+2
+t falls 0
+≤
+t < 1
+2
+t
+−
+1
+4
+falls 1
+2 ≤
+t < 3
+4
+2t
+−
+1 falls 3
+4 ≤
+t
+≤
+1
 Bemerkung 44
-Sei X ein topologischer Raum, a,b,c X, γ ,γ(cid:48) Wege von a nach b und γ ,γ(cid:48) Wege von b
-∈ 1 1 2 2
+Sei X ein topologischer Raum, a,b,c
+∈
+X, γ
+1
+,γ(cid:48)
+1
+Wege von a nach b und γ
+2
+,γ(cid:48)
+2
+Wege von b
 nach c.
-Sind γ γ(cid:48) und γ γ(cid:48), so ist γ γ γ(cid:48) γ(cid:48).
-1 ∼ 1 2 ∼ 2 1 ∗ 2 ∼ 1∗ 2
-48 3.2.FUNDAMENTALGRUPPE
-γ 1(cid:48) γ 1 γ
+Sind γ
+1 ∼
+γ(cid:48)
+1
+und γ
+2 ∼
+γ(cid:48)
+2
+, so ist γ
+1 ∗
+γ
+2 ∼
+γ(cid:48)
+1∗
+γ(cid:48)
 2
-a c
+.
+48 3.2.FUNDAMENTALGRUPPE
+γ
+1
+γ(cid:48)
+1
+a
 b
+c
 γ(cid:48)
 2
+γ 2
 Abbildung 3.5: Situation aus Bemerkung 44
 .
-Beweis: Sei H eine Homotopie zwischen γ und γ(cid:48), i = 1,2.
-i i i
+Beweis: Sei H
+i
+eine Homotopie zwischen γ
+i
+und γ(cid:48)
+i
+, i = 1,2.
 Dann ist
+H(t,s) :=
 (cid:40)
-H (2t,s) falls 0 t 1 s I
-H(t,s) := 1 ≤ ≤ 2 ∀ ∈
-H (2t 1,s) falls 1 t 1
-2 − 2 ≤ ≤
-eine Homotopie zwischen γ γ und γ(cid:48) γ(cid:48).
-1 ∗ 2 1∗ 2
+H
+1
+(2t,s) falls 0
+≤
+t
+≤
+1
+2 ∀
+s
+∈
+I
+H
+2
+(2t
+−
+1,s) falls 1
+2 ≤
+t
+≤
+1
+eine Homotopie zwischen γ
+1 ∗
+γ
+2
+und γ(cid:48)
+1∗
+γ(cid:48)
+2
+.
 Eine spezielle Homotopieäquivalenz sind sog. Deformationsretraktionen:
 Definition 44
-Sei X ein topologischer Raum, A X, r : X A eine stetige Abbildung und ι = (id ) .
-X A
-⊆ → |
-a) ι : A X mitι(x) = xheißtdieInklusionsabbildung undmanschreibt:ι : A (cid:44) X.
-→ →
-b) r heißt Retraktion, wenn r = id ist.
-A A
-|
-c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι r id gibt.
+Sei X ein topologischer Raum, A
+⊆
+X, r : X
+→
+A eine stetige Abbildung und ι = (id
 X
-◦ ∼
-Beispiel 31 (Zylinder auf Kreis)
-Sei X = S1 R ein topologischer Raum und
-×
-r : S1 R S1 0 = S1
-∼
-× → ×{ }
+)
+|
+A
+.
+a) ι : A
+→
+X mitι(x) = xheißtdieInklusionsabbildung undmanschreibt:ι : A (cid:44)
+→
+X.
+b) r heißt Retraktion, wenn r
+|
+A
+= id
+A
+ist.
+c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι
+◦
+r
+∼
+id
+X
+gibt.
+Beispiel 31 (Zylinder auf Kreis)
+Sei X = S1
+×
+R ein topologischer Raum und
+r : S1
+×
+R
+→
+S1
+×{
+0
+}
+∼
+= S1
 mit
 r(x,y) := (x,0)
-eine Abbildung. r ist eine Retraktion, da r |S1 ∼= id S1.
-ι r : S1 R S1 R
-◦ × → ×
-(x,y) (x,0)
+eine Abbildung. r ist eine Retraktion, da r
+|
+S1 ∼ = id S1 .
+ι
+◦
+r : S1
+×
+R
+→
+S1
+×
+R
+(x,y)
 (cid:55)→
-H : (S1 R) I S1 R
-× × → ×
-(x,y,t) (x,ty)
+(x,0)
+H : (S1
+×
+R)
+×
+I
+→
+S1
+×
+R
+(x,y,t)
 (cid:55)→
+(x,ty)
 3.2 Fundamentalgruppe
 Für einen Weg γ sei [γ] seine Homotopieklasse.
 Definition 45
-Sei X ein topologischer Raum und x X. Sei außerdem
+Sei X ein topologischer Raum und x
 ∈
-π (X,x) := [γ] γ ist Weg in X mit γ(0) = γ(1) = x
+X. Sei außerdem
+π
 1
-{ | }
+(X,x) :=
+{
+[γ]
+|
+γ ist Weg in X mit γ(0) = γ(1) = x
+}
 49 3.2.FUNDAMENTALGRUPPE
-Durch [γ ] [γ ] := [γ γ ] wird π (X,x) zu einer Gruppe. Diese Gruppe heißt Funda-
-1 G 2 1 2 1
-∗ ∗
+Durch [γ
+1
+]
+∗
+G
+[γ
+2
+] := [γ
+1
+∗
+γ
+2
+] wird π
+1
+(X,x) zu einer Gruppe. Diese Gruppe heißt Funda-
 mentalgruppe von X im Basispunkt x.
 Bemerkung 45
 Im R2 gibt es nur eine Homotopieklasse.
 Beweis: (Fundamentalgruppe ist eine Gruppe)
 a) Abgeschlossenheit folgt direkt aus der Definition von
-G
 ∗
+G
 b) Assoziativität folgt aus Bemerkung 43
-c) Neutrales Element e = [γ ],γ (t) = x t I. e [γ] = [γ] = [γ] e, da γ γ γ
-0 0 0
-∀ ∈ ∗ ∗ ∗ ∼
-d) Inverses Element [γ]−1 = [γ] = [γ(1 t)], denn γ γ γ γ γ
+c) Neutrales Element e = [γ
+0
+],γ
 0
-− ∗ ∼ ∼ ∗
+(t) = x
+∀
+t
+∈
+I. e
+∗
+[γ] = [γ] = [γ]
+∗
+e, da γ
+0
+∗
+γ
+∼
+γ
+d) Inverses Element [γ]−1 = [γ] = [γ(1
+−
+t)], denn γ
+∗
+γ
+∼
+γ
+0
+∼
+γ
+∗
+γ
 Beispiel 32
-1) S1 = z C z = 1 = (cid:8) (cosϕ,sinϕ) R2 (cid:12) (cid:12) 0 ϕ 2π(cid:9)
-{ ∈ | | | } ∈ ≤ ≤
-π 1(S1,1) = (cid:8) [γk] (cid:12) (cid:12) k Z(cid:9) ∼= Z. Dabei ist γ(t) = e2πit = cos(2πt)+isin(2πt) und
+1) S1 =
+{
+z
+∈
+C
+| |
+z
+|
+= 1
+}
+= (cid:8) (cosϕ,sinϕ)
+∈
+R2 (cid:12) (cid:12) 0
+≤
+ϕ
+≤
+2π (cid:9)
+π 1 (S1,1) = (cid:8) [γk] (cid:12) (cid:12) k
 ∈
-γk := γ γ
-(cid:124) ∗·(cid:123)·(cid:122)·∗ (cid:125)
+Z(cid:9) ∼ = Z. Dabei ist γ(t) = e2πit = cos(2πt)+isin(2πt) und
+γk := γ
+∗···∗
+γ
+(cid:124) (cid:123)(cid:122) (cid:125)
 k mal
-[γk] k ist ein Isomorphismus.
+[γk]
 (cid:55)→
-2) π (R2,0) = π (R2,x) = e für jedes x R2
-1 1
-{ } ∈
-3) π (Rn,x) = e für jedes x Rn
+k ist ein Isomorphismus.
+2) π
+1
+(R2,0) = π
+1
+(R2,x) =
+{
+e
+}
+für jedes x
+∈
+R2
+3) π
 1
-{ } ∈
-4) G Rn heißt sternförmig bzgl. x G, wenn für jedes y G auch die Strecke
-⊆ ∈ ∈
-[x,y] G ist.
+(Rn,x) =
+{
+e
+}
+für jedes x
+∈
+Rn
+4) G
+⊆
+Rn heißt sternförmig bzgl. x
+∈
+G, wenn für jedes y
+∈
+G auch die Strecke
+[x,y]
 ⊆
-Für jedes sternförmige G Rn ist π (G,x) = e
+G ist.
+Für jedes sternförmige G
+⊆
+Rn ist π
 1
-⊆ { }
+(G,x) =
+{
+e
+}
 x
 Abbildung 3.6: Sternförmiges Gebiet
 .
-5) π (S2,x ) = e , da im R2 alle Wege homotop zu e sind. Mithilfe der stereogra-
-1 0
-{ } { }
+5) π
+1
+(S2,x
+0
+) =
+{
+e
+}
+, da im R2 alle Wege homotop zu
+{
+e
+}
+sind. Mithilfe der stereogra-
 phischen Projektion kann von S2 auf den R2 abgebildet werden.
 Dieses Argument funktioniert nicht mehr bei flächenfüllenden Wegen, d. h. wenn
-γ : I S2 surjektiv ist.
+γ : I
 →
+S2 surjektiv ist.
 Bemerkung 46
-Sei X ein topologischer Raum, a,b X, δ : I X ein Weg von a nach b.
-∈ →
+Sei X ein topologischer Raum, a,b
+∈
+X, δ : I
+→
+X ein Weg von a nach b.
 Dann ist die Abbildung
-α : π (X,a) π (X,b) [γ] [δ γ δ]
-1 1
-→ (cid:55)→ ∗ ∗
+α : π
+1
+(X,a)
+→
+π
+1
+(X,b) [γ]
+(cid:55)→
+[δ
+∗
+γ
+∗
+δ]
 ein Gruppenisomorphismus.
 50 3.2.FUNDAMENTALGRUPPE
-γ
 a b
+γ
 δ
 Abbildung 3.7: Situation aus Bemerkung 46
 .
 Beweis:
-α([γ ] [γ ]) = [δ (γ γ ) δ]
-1 2 1 2
-∗ ∗ ∗ ∗
-= [δ γ δ δ γ δ]
-1 2
-∗ ∗ ∗ ∗ ∗
-= [δ γ δ] [δ γ δ]
-1 2
-∗ ∗ ∗ ∗ ∗
-= α([γ ]) α([γ ])
-1 2
+α([γ
+1
+]
+∗
+[γ
+2
+]) = [δ
+∗
+(γ
+1
+∗
+γ
+2
+)
+∗
+δ]
+= [δ
+∗
+γ
+1
+∗
+δ
+∗
+δ
+∗
+γ
+2
+∗
+δ]
+= [δ
+∗
+γ
+1
+∗
+δ]
+∗
+[δ
+∗
+γ
+2
+∗
+δ]
+= α([γ
+1
+])
 ∗
+α([γ
+2
+])
 Definition 46
 Ein wegzusammenhängender topologischer Raum X heißt einfach zusammenhängend,
-wenn π (X,x) = e für ein x X.
+wenn π
 1
-{ } ∈
-Wenn π (X,x) = e für ein x X gilt, dann wegen Bemerkung 46 sogar für alle x X.
+(X,x) =
+{
+e
+}
+für ein x
+∈
+X.
+Wenn π
 1
-{ } ∈ ∈
+(X,x) =
+{
+e
+}
+für ein x
+∈
+X gilt, dann wegen Bemerkung 46 sogar für alle x
+∈
+X.
 Bemerkung 47
-Es seien X,Y topologische Räume, f : X Y eine stetige Abbildung, x X,y := f(x) Y.
-→ ∈ ∈
-a) Dann ist die Abbildung f : π (X,x) π (Y,y),[γ] [f γ] ein Gruppenhomomor-
-∗ 1 1
-→ → ◦
-phismus.
-b) Ist Z ein weiterer topologischer Raum und g : Y Z eine stetige Abbildung z := g(y).
-→
-Dann ist (g f) = g f : π (X,x) π (Z,z)
-∗ ∗ ∗ 1 1
-◦ ◦ →
-Beweis:
-a) f ist wohldefiniert: Seien γ ,γ homotope Wege von x. z.Z.: f γ f γ : Nach
-∗ 1 2 1 2
-◦ ∼ ◦
-Voraussetzung gibt es stetige Abbildungen H : I I X mit
-× →
-H(t,0) = γ (t),
-1
-H(t,1) = γ (t),
-2
-H(0,s) = H(1,s) = x.
-Dann ist f H : I I Y stetig mit (f H)(t,0) = f(H(t,0)) = f(γ (t)) = (f γ )(t)
-1 1
-◦ × → ◦ ◦
-etc. f γ f γ .
-1 2
-⇒ ◦ ∼ ◦
-f ([γ ] [γ ]) = [f (γ γ )] = [(f γ )] [(f γ )] = f ([γ ]) f ([γ ])
-∗ 1 2 1 2 1 2 ∗ 1 ∗ 2
-∗ ◦ ∗ ◦ ∗ ◦ ∗
-b) (g f) ([γ]) = [(g f) γ] = [g (f γ)] = g ([f γ]) = g (f ([γ])) = (g f )([γ])
-∗ ∗ ∗ ∗ ∗ ∗
-◦ ◦ ◦ ◦ ◦ ◦ ◦
-Beispiel 33
-1) f : S1 (cid:44) R2 ist injektiv, aber f ∗ : π 1(S1,1) ∼= Z π 1(R2,1) = e ist nicht injektiv.
-→ → { }
-2) f : R S1,t (cos2πt,sin2πt)istsurjektiv,aberf ∗ : π 1(R,0) = e π 1(S1,1) ∼=
-→ (cid:55)→ { } →
-Z ist nicht surjektiv.
-51 3.2.FUNDAMENTALGRUPPE
-Bemerkung 48
-Sei f : X Y ein Homöomorphismus zwischen topologischen Räumen X,Y. Dann gilt:
-→
-f : π (X,x) π (Y,f(x))
-∗ 1 1
+Es seien X,Y topologische Räume, f : X
 →
-ist ein Isomorphismus für jedes x X.
+Y eine stetige Abbildung, x
 ∈
-Beweis: Sei g : Y X die Umkehrabbildung, d. h. g ist stetig und f g = id , g f = id
-Y X
-→ ◦ ◦
-f g = (f g) = (id ) = id und g f = id .
-∗ ∗ ∗ Y ∗ π1(Y,f(X) ∗ ∗ π1(X,x)
-⇒ ◦ ◦ ◦
-Definition 47
-Seien X,Y topologische Räume, x X,y Y,f,g : X Y stetig mit f(x ) = y = g(x ).
-0 0 0 0 0
-∈ ∈ →
-f und g heißen homotop (f g), wenn es eine stetige Abbildung H : X I Y mit
-∼ × →
-H(x,0) = f(x) x X
-∀ ∈
-H(x,1) = g(x) x X
-∀ ∈
-H(x ,s) = y s I
-0 0
-∀ ∈
-gibt.
-Bemerkung 49
-Sind f und g homotop, so ist f = g : π (X,x ) π (Y,y ).
-∗ ∗ 1 0 1 0
-→
-Beweis: Sei γ ein geschlossener Weg in X um x , d. h. [γ] π (X,x ).
-0 1 0
+X,y := f(x)
 ∈
-Z. z.: f γ g γ
-◦ ∼ ◦
-Sei dazu H : I I Y,(t,s) H(γ(t),s). Dann gilt:
-γ
-× → (cid:55)→
-H (t,0) = H(γ(t),0) = (f γ)(t) t I
-γ
-◦ ∀ ∈
-H (1,s) = H(γ(1),s) = H(x ,s) = y s I
-γ 0 0
-∀ ∈
-H (t,1) = H(γ(t),1) = g(γ(t)) t I
-γ
-∀ ∈
-Beispiel 34
-f : X Y,g : Y X mit g f id , f g id
-X Y
-→ → ◦ ∼ ◦ ∼
-f ist Isomorphismus. Konkret: f : R2 0 , g : 0 R2
+Y.
+a) Dann ist die Abbildung f
 ∗
-⇒ → { } { } →
-f g = id , g f : R2 R2, x 0 für alle x.
-{0}
-⇒ ◦ ◦ → (cid:55)→
-g f id R2 mit Homotopie: H : R2 I R2,H(x,s) = (1 s)x (stetig!)
-◦ ∼ × → −
-H(x,0) = x = id R2(x), H(x,1) = 0, H(0,s) = 0 s I.
-⇒ ∀ ∈
-Satz 3.1 (Satz von Seifert und van Kampen „light“)
-Sei X ein topologischer Raum, U,V X offen mit U V = X und U V wegzusam-
-⊆ ∪ ∩
-menhängend.
-Dann wird π (X,x) für x U V erzeugt von geschlossenen Wegen um x, die ganz in
+: π
 1
-∈ ∩
-U oder ganz in V verlaufen.
-52 3.3.ÜBERLAGERUNGEN
-Beweis: Sei γ : I X ein geschlossener Weg um x. Überdecke I mit endlich vielen offenen
+(X,x)
 →
-Intervallen I ,I ,...,I , die ganz in γ−1(U) oder ganz in γ−1(V) liegen.
-1 2 n
-O. B. d. A. sei γ(I ) U,γ(I ) V, etc.
-1 2
-⊆ ⊆
-Wähle t I I , also γ(t ) U V. Sei σ Weg in U V von x nach γ(t ) γ ist
-i i i+1 i i 0 i
-∈ ∩ ∈ ∩ ∩ ⇒
-homotop zu
-γ σ σ γ σ σ γ mit γ := γ
-(cid:124)1 (cid:125)1 ∗(cid:124)1 ∗(cid:123)(cid:122)2 (cid:125)2 n−1 2 i |Ii
-(cid:123)∗(cid:122) ∗ ∗···∗ ∗
-inU inV
-a b
-x
-Abbildung 3.8: Topologischer Raum X
-Beispiel 35 (Satz von Seifert und van Kampen)
-1) Sei X wie in Abbildung 3.8. π (X,x) wird „frei“ erzeugt von a und b, weil π (U,x) =
-1 1
-a = Z,π (V,x) = b = Z, insbesondere ist a b nicht homotop zu b a.
-∼ 1 ∼
-(cid:104) (cid:105) (cid:104) (cid:105) ∗ ∗
-2) Torus: π (T2,X) wird erzeugt von a und b.
+π
 1
-a
-U
-b a b
-V
-V
-Abbildung 3.9: a b = b a a b a b e
-∗ ∗ ⇔ ∗ ∗ ∗ ∼
-3.3 Überlagerungen
-Definition 48
-Es seien X,Y zusammenhängende topologische Räume und p : Y X eine stetige Abbil-
-→
-dung.
-p heißt Überlagerung, wenn jedes x X eine offene Umgebung U = U(x) X besitzt,
-∈ ⊆
-sodass p−1(U) disjunkte Vereinigung von offenen Teilmengen V Y ist (j I) und
-j
-⊆ ∈
-p : V U ein Homöomorphismus ist.
-|Vj j
+(Y,y),[γ]
 →
-I heißt Grad der Überlagerung p und man schreibt:
-| |
-degp := I
-| |
-53 3.3.ÜBERLAGERUNGEN
-Abbildung 3.10: R S1,
+[f
+◦
+γ] ein Gruppenhomomor-
+phismus.
+b) Ist Z ein weiterer topologischer Raum und g : Y
 →
-t (cos2πt,sin2πt)
-(cid:55)→
-Beispiel 36
-1) siehe Abbildung 3.10
-2) siehe Abbildung 3.11
-3) Rn Tn = Rn/Zn
+Z eine stetige Abbildung z := g(y).
+Dann ist (g
+◦
+f)
+∗
+= g
+∗
+◦
+f
+∗
+: π
+1
+(X,x)
 →
-4) Sn n(R)
-→ P
-5) S1 S1, z z2, siehe Abbildung 3.12
-→ (cid:55)→
-6
-* * * * * *
-5
-* * * * * *
-4
-* * * * * *
-3
-* * * * * * *
-−−−→
+π
+1
+(Z,z)
+Beweis:
+a) f
+∗
+ist wohldefiniert: Seien γ
+1
+,γ
 2
-* * * * * *
+homotope Wege von x. z.Z.: f
+◦
+γ
 1
-* * * * * *
-0
-0 1 2 3 4 5 6
-Abbildung 3.11: R2 T2 = R2/Z2
+∼
+f
+◦
+γ
+2
+: Nach
+Voraussetzung gibt es stetige Abbildungen H : I
+×
+I
 →
-Bemerkung 50
-Überlagerungen sind surjektiv.
-Beweis: Sei p : Y X eine Überlagerung und x X beliebig. Dann existiert eine offene
-Umgebung U(x)→ X und offene Teilmengen V ∈ X mit p−1(U) = (cid:83)˙ V und p : V U
-j j |Vj j
-⊆ ⊆ →
-ist Homöomorphismus.
-D. h. es existiert ein y V , so dass p (y) = x. Da x X beliebig war und ein y Y
-j |Vj
-∈ ∈ ∈
-existiert, mit p(y) = x, ist p surjektiv. (cid:4)
-54 3.3.ÜBERLAGERUNGEN
-z2
-i
-z
-ϕ
-ϕ
-z2
+X mit
+H(t,0) = γ
 1
-Abbildung 3.12: t (cos4πt,sin4πt)
-(cid:55)→
-Definition 49
-Seien (X,T ),(Y,T ) topologische Räume und f : X Y eine Abbildung.
-X Y
+(t),
+H(t,1) = γ
+2
+(t),
+H(0,s) = H(1,s) = x.
+Dann ist f
+◦
+H : I
+×
+I
 →
-f heißt offen : U T : f(U) T .
-X Y
-⇔ ∀ ∈ ∈
-Beispiel 37 (Offene und stetige Abbildungen)
-Sei X ein topologischer Raum und seien f : R R mit i 1,2,3 und g : R S1 =
-i
-→ ∈ { } →
-z C z = 1 Abbildungen.
-{ ∈ | (cid:107) (cid:107) }
-1) f := idR ist eine offene und stetige Abbildung.
+Y stetig mit (f
+◦
+H)(t,0) = f(H(t,0)) = f(γ
 1
-2) g(x) := e2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
-3) f (x) := 42 ist eine stetige, aber keine offene Abbildung.
-2
-(cid:40)
-0 falls x Q
-4) f (x) := ∈
-3 42 falls x R Q
-∈ \
-ist weder stetig noch offen.
-Bemerkung 51
-Überlagerungen sind offene Abbildungen.
-Beweis: Sei y V und x p(V), sodass x = p(y) gilt. Sei weiter U = U eine offene Umgebung
-x
-∈ ∈
-von x wie in Definition 48 und V die Komponente von p−1(U), die y enthält.
-j
-Dann ist V V offene Umgebung von y.
-j
-∩
-p(V V ) ist offen in p(V ), also auch offen in X. Außerdem ist p(y) = x p(V V ) und
-j j j
-⇒ ∩ ∈ ∩
-p(V V ) p(V).
-j
-∩ ⊆
-p(V) ist offen.
-⇒
-Definition 50
-Sei X ein topologischer Raum und M X.
-⊆
-M heißt diskret in X, wenn M in X keinen Häufungspunkt hat.
-Bemerkung 52
-Sei p : Y X Überlagerung, x X.
-→ ∈
-a) X hausdorffsch Y hausdorffsch
+(t)) = (f
+◦
+γ
+1
+)(t)
+etc.
 ⇒
-b) p−1(x) ist diskret in Y für jedes x X.
-∈
-Beweis:
-a) Seien y ,y Y.
-1 2
+f
+◦
+γ
+1
+∼
+f
+◦
+γ
+2
+.
+f
+∗
+([γ
+1
+]
+∗
+[γ
+2
+]) = [f
+◦
+(γ
+1
+∗
+γ
+2
+)] = [(f
+◦
+γ
+1
+)]
+∗
+[(f
+◦
+γ
+2
+)] = f
+∗
+([γ
+1
+])
+∗
+f
+∗
+([γ
+2
+])
+b) (g
+◦
+f)
+∗
+([γ]) = [(g
+◦
+f)
+◦
+γ] = [g
+◦
+(f
+◦
+γ)] = g
+∗
+([f
+◦
+γ]) = g
+∗
+(f
+∗
+([γ])) = (g
+∗
+◦
+f
+∗
+)([γ])
+Beispiel 33
+1) f : S1 (cid:44)
+→
+R2 ist injektiv, aber f ∗ : π 1 (S1,1) ∼ = Z
+→
+π 1 (R2,1) =
+{
+e
+}
+ist nicht injektiv.
+2) f : R
+→
+S1,t
+(cid:55)→
+(cos2πt,sin2πt)istsurjektiv,aberf ∗ : π 1 (R,0) =
+{
+e
+} →
+π 1 (S1,1) ∼ =
+Z ist nicht surjektiv.
+51 3.2.FUNDAMENTALGRUPPE
+Bemerkung 48
+Sei f : X
+→
+Y ein Homöomorphismus zwischen topologischen Räumen X,Y. Dann gilt:
+f
+∗
+: π
+1
+(X,x)
+→
+π
+1
+(Y,f(x))
+ist ein Isomorphismus für jedes x
 ∈
-1. Fall: p(y ) = p(y ) = x.
-1 2
-55 3.3.ÜBERLAGERUNGEN
-Sei U Umgebung von x wie in Definition 48, V bzw. V die Komponente von p−1(U),
-j1 j2
-die y bzw. y enthält.
-1 2
-Dann ist V = V , weil beide ein Element aus p−1(x) enthalten.
-j1 j2
-(cid:54)
-V V = nach Voraussetzung.
-j1 j2
-⇒ ∩ ∅
-2. Fall: p(y ) = p(y ).
-1 2
-(cid:54)
-Dann seien U und U disjunkte Umgebungen von p(y ) und p(y ).
-1 2 1 2
-p−1(U ) und p−1(U ) sind disjunkte Umgebungen von y und y .
-1 2 1 2
+X.
+Beweis: Sei g : Y
+→
+X die Umkehrabbildung, d. h. g ist stetig und f
+◦
+g = id
+Y
+, g
+◦
+f = id
+X
 ⇒
-b) Sei x X beliebig, aber fest.
+f
+∗
+◦
+g
+∗
+= (f
+◦
+g)
+∗
+= (id
+Y
+)
+∗
+= id
+π1(Y,f(X)
+und g
+∗
+◦
+f
+∗
+= id
+π1(X,x)
+.
+Definition 47
+Seien X,Y topologische Räume, x
+0
 ∈
-Zu zeigen: y p−1(x) : V T mit y V , sodass gilt:i = j V V = .
-i i Y i i i j
-∀ ∈ ∃ ∈ ∈ (cid:54) ⇒ ∩ ∅
-Die V existieren wegen der Definition einer Überlagerung: p heißt Überlagerung
-i
-: x X U = U(x) T : p−1(U) = (cid:83)˙ V und p ist Homöomorphismus.
-⇔ ∀ ∈ ∃ ∈ X Vi∈TY i |Vi
-(p )−1(x) = y
-|Vi i
-⇒ { }
-Alle y liegen diskret in Y, da Häufungspunkte unendlich viele Elemente in jeder
-i
-⇒
-Umgebung benötigen. (cid:4)
-Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
-Sei p : Y X Überlagerung. Dann gilt:
+X,y
+0
+∈
+Y,f,g : X
 →
-x ,x X : p−1(x ) = p−1(x )
-1 2 1 2
-∀ ∈ | | | |
-Hinweis: p−1(x ) = ist erlaubt!
-1
-| | ∞
-Beweis: Sei U Umgebung von x wie in Definition 48, x U. Dann enthält jedes V mit j I
-1 j
-∈ ∈
-genau ein Element von p−1(x).
-p−1(x) ist konstant für x U
-⇒ | | ∈
-=X ==zh =gd. p−1(x) ist konstant für x X.
-⇒ | | ∈
-Definition 51
-Es seien X,Y,Z topologische Räume, p : Y X eine Überlagerung und f : Z X stetig.
-→ →
-Eine stetige Abbildung f˜: Z Y heißt Liftung von f, wenn p f˜= f ist.
-→ ◦
-f˜
-Y Z
-p
-f
+Y stetig mit f(x
+0
+) = y
+0
+= g(x
+0
+).
+f und g heißen homotop (f
+∼
+g), wenn es eine stetige Abbildung H : X
+×
+I
+→
+Y mit
+H(x,0) = f(x)
+∀
+x
+∈
 X
-Bemerkung 54 (Eindeutigkeit der Liftung)
-Sei Z zusammenhängend und f ,f : Z Y Liftungen von f.
-0 1
+H(x,1) = g(x)
+∀
+x
+∈
+X
+H(x
+0
+,s) = y
+0
+∀
+s
+∈
+I
+gibt.
+Bemerkung 49
+Sind f und g homotop, so ist f
+∗
+= g
+∗
+: π
+1
+(X,x
+0
+)
 →
-z Z : f (z ) = f (z ) f = f
-0 0 0 1 0 0 1
-∃ ∈ ⇒
-Beweis: Sei T = z Z f (z) = f (z) .
-0 1
-{ ∈ | }
-Z. z.: T ist offen und Z T ist auch offen.
-\
-56 3.3.ÜBERLAGERUNGEN
-6
-5
-4
-3
-2
+π
 1
+(Y,y
 0
-0 1 2 3 4 5 6
-T Liften R2/Z2
-−−−→
-Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht ge-
-schlossen
-Sei z T,x = f(z),U Umgebung von x wie in Definition 48, V die Komponente von p−1(U),
+).
+Beweis: Sei γ ein geschlossener Weg in X um x
+0
+, d. h. [γ]
 ∈
-die y := f (z) = f (z) enthält.
-0 1
-Sei q : U V die Umkehrabbildung zu p .
-V
-→ |
-Sei W := f−1(U) f−1(V) f−1(V). W ist offene Umgebung in Z von z.
-∩ 0 ∩ 1
-Behauptung: W T
-⊆
-Denn für w W ist q(f(w)) = q((p f ))(w) = ((q p) f )(w) = f (w) = q(f(w)) = f (w)
-0 0 0 1
-∈ ◦ ◦ ◦
-T ist offen.
-⇒
-Analog: Z T ist offen.
-\
-Satz 3.2
-Sei p : Y X Überlagerung, γ : I X ein Weg, y Y mit p(y) = γ(0) =: x.
-→ → ∈
-Dann gibt es genau einen Weg γ˜ : I Y mit γ˜(0) = y und p γ˜ = γ.
-→ ◦
-p : Y X Überlagerung, X,Y wegzusammenhängend. p stetig und surjektiv, zu x X
-Umgeb→ ung U, so dass p−1(U) = (cid:83) V ∈ ∃
-j
-p : V U Homöomorphismus.
-|Vj j
+π
+1
+(X,x
+0
+).
+Z. z.: f
+◦
+γ
+∼
+g
+◦
+γ
+Sei dazu H
+γ
+: I
+×
+I
 →
-Bemerkung 55
-Wege in X lassen sich zu Wegen in Y liften.
-Zu jedem y p−1(γ(0)) gibt es genau einen Lift von γ.
-∈
-57 3.3.ÜBERLAGERUNGEN
-Proposition 3.3
-Seien p : Y X eine Überlagerung, a,b X, γ ,γ : I X homotope Wege von a
-0 1
-→ ∈ →
-nach b, a˜ p−1(a),γ˜,γ˜ Liftungen von γ bzw. γ mit γ˜(0) = a˜.
-0 1 0 1 i
+Y,(t,s)
+(cid:55)→
+H(γ(t),s). Dann gilt:
+H
+γ
+(t,0) = H(γ(t),0) = (f
+◦
+γ)(t)
+∀
+t
 ∈
-Dann ist γ˜(1) = γ˜(1) und γ˜ γ˜.
-0 1 0 1
-∼
-Beweis: Sei H : I I X Homotopie zwischen γ und γ .
-1 2
-× →
-Für s I sei γ : I X, t H(t,s).
-s
-∈ → (cid:55)→
-Sei γ˜ Lift von γ mit γ˜(0) = a˜
-s s s
-Sei H˜ : I I Y, H˜(t,s) := (γ˜(t),s)
-s
-× →
-Dann gilt:
-(i) H˜ ist stetig (Beweis wie für Bemerkung 54)
-(ii) H˜(t,0) = γ˜(t), H˜(t,1) = γ˜(t)
-0 1
-(iii) H˜(0,s) = γ˜(0) = a˜
+I
+H
+γ
+(1,s) = H(γ(1),s) = H(x
+0
+,s) = y
+0
+∀
 s
-(iv) H˜(1,s) p−1(b)
 ∈
-Da p−1(b) diskrete Teilmenge von Y ist
-b˜ = H˜(1,s) = H˜(1,0) s I
-s
-⇒ b˜ = b˜ und H˜ ist Hom∀ oto∈ pie zwischen γ˜ und γ˜. (cid:4)
-0 1 0 1
-⇒
-Folgerung 3.4
-Sei p : Y X eine Überlagerung, x X,y p−1(x )
-0 0 0
-→ ∈ ∈
-a) p : π (Y,y ) π (X,x ) ist injektiv
-∗ 1 0 1 0
-→
-b) [π (X,x ) : p (π (Y,y ))] = deg(p)
-1 0 ∗ 1 0
-Beweis:
-a) Sei γ˜ ein Weg in Y um y und p ([γ˜]) = e, also p γ˜ γ
-0 ∗ x0
-◦ ∼
-Nach Proposition 3.3 ist dann γ˜ homotop zum Lift des konstanten Wegs γ mit
-x0
-Anfangspunkt y , also zu γ [γ˜] = e
-0 y0
-⇒
-b) Sei d = degp und p−1(x ) = y ,y ,...,y . Für einen geschlossenen Weg γ in X
-0 0 1 d−1
-{ }
-um x sei γ˜ die Liftung mit γ˜(0) = y .
-0 0
-γ˜(1) y ,...,y hängt nur von [γ] π (X,x ) ab.
-0 d−1 1 0
-∈ { } ∈
-Für geschlossene Wege γ ,γ um x gilt:
-0 1
-γ˜(1) = γ˜(1)
-0 1
-[γ˜ γ˜−1] π (Y,y )
-0 1 1 0
-⇔ ∗ ∈
-[γ γ−1] p (π (Y,y ))
-⇔ 0 ∗ 1 ∈ ∗ 1 0
-[γ ] und [γ ]liegen in der selben Nebenklasse bzgl. p (π (Y,y ))
-0 1 ∗ 1 0
-⇔
-58 3.3.ÜBERLAGERUNGEN
-Zu i 0,...,d 1 gibt es Weg δ in Y mit δ (0) = y und δ (1) = y
-i i 0 i i
-∈ { − }
-p δ ist geschlossener Weg in X um x .
-i 0
-⇒ ∪
-Jedes y mit i = 0,...,d 1 ist γ˜(1) für ein [γ] π (X,x ).
-i 1 0
-⇒ − ∈
-Bemerkung 56
-Sei p : Y X Überlagerung und X einfach zusammenhängend.
-→
-Dann ist p ein Homöomorphismus.
-Beweis: Wegen Bemerkung 55.a ist auch Y einfach zusammenhängend und wegen Bemer-
-kung 55.b ist deg(p) = 1, p ist also bijektiv.
-Nach Bemerkung 51 ist p offen p−1 ist stetig. p ist Homöomorphismus. (cid:4)
-⇒ ⇒
-Definition 52
-Eine Überlagerung p : X˜ X heißt universell, wenn X˜ einfach zusammenhängend ist.
+I
+H
+γ
+(t,1) = H(γ(t),1) = g(γ(t))
+∀
+t
+∈
+I
+Beispiel 34
+f : X
 →
-Beispiel 38 (Universelle Überlagerungen)
-R S1, t (cos2πt,sin2πt)
-→ (cid:55)→
-R2 T2 = R2/Z2
+Y,g : Y
 →
-Sn n(R) für n 2
-→ P ≥
-Satz 3.5
-Sei p : X˜ X eine universelle Überlagerung, q : Y X weitere Überlagerung.
-→ →
-Sei x X,x˜ X˜,y Y mit q(y ) = x = p(x˜).
-0 0 0 0 0 0
-∈ ∈ ∈
-Dann gibt es genau eine Überlagerung p˜: X˜ Y mit p˜(x˜) = y .
-0 0
-→
-Beweis: Sei z X˜,γ : I X˜ ein Weg von x˜ nach z.
-z 0
-∈ →
-Sei δ die eindeutige Liftung von p γ nach Y mit δ (0) = y .
-z z z 0
-◦
-Setze p˜(z) = δ (1).
-z
-Da X˜ einfach zusammenhängend ist, hängt p˜(z) nicht vom gewählten Weg γ ab.
-z
-Offensichtlich ist q(p˜(z)) = p(z).
-Zu zeigen: p˜ist stetig in z X˜:
-∈
-Sei W Y offene Umgebung von p˜(z).
-⊆
-q offen
-==== q(W) ist offene Umgebung von p(z) d(p˜(z)).
-⇒ ·
-Sei U q(W) offen wie in Definition 48 und V q−1(U) die Komponente, die p˜(z) enthält.
-⊆ ⊆
-O. B. d. A. sei V W.
-⊆
-Sei Z := p−1(U). Für u Z sei δ ein Weg in Z von z nach u.
-∈
-γ δ ist Weg von x nach u
-z 0
-⇒ ∗
-p˜(u) V
-⇒ ∈
-p−˜1(W)
-Z
-⇒ ⊆
-p˜ist stetig
+X mit g
+◦
+f
+∼
+id
+X
+, f
+◦
+g
+∼
+id
+Y
 ⇒
-59 3.3.ÜBERLAGERUNGEN
-Folgerung 3.6
-Sind p : X˜ X und q : Y˜ X universelle Überlagerungen, so sind X˜ und Y˜ homöomorph.
-→ →
-Beweis: Seien x X,x˜ X˜ mit p(x˜) = x und y˜ q−1(x ) Y˜.
-0 0 0 0 0 0
-∈ ∈ ∈ ⊆
-Nach Satz 3.5 gibt es genau eine Überlagerung
-f : X˜ Y˜ mit f(x ) = y˜ und q f = p
-0 0
-→ ◦
-und genau eine Überlagerung
-g : Y˜ X˜ mit g(y˜) = x˜ und p g = q
-0 0
-→ ◦
-Damit gilt: p q f = q f = p, q f g = p g = q. Also ist g f : X˜ X˜ Lift von
-◦ ◦ ◦ ◦ ◦ ◦ ◦ →
-p : X˜ X mit (g f)(x˜) = x˜.
-0 0
-→ ◦
-Da auch id diese Eigenschaft hat, folgt mit Bemerkung 53: g f = id .
-x˜ X˜
-◦
-Analog gilt f g = id . (cid:4)
-Y˜
+f
+∗
+ist Isomorphismus. Konkret: f : R2
+→ {
+0
+}
+, g :
+{
+0
+} →
+R2
+⇒
+f
 ◦
-Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
-Definition 53
-Sei (X,T) ein topologischer Raum und x X.
+g = id
+{0}
+, g
+◦
+f : R2
+→
+R2, x
+(cid:55)→
+0 für alle x.
+g
+◦
+f
+∼
+id R2 mit Homotopie: H : R2
+×
+I
+→
+R2,H(x,s) = (1
+−
+s)x (stetig!)
+⇒
+H(x,0) = x = id R2 (x), H(x,1) = 0, H(0,s) = 0
+∀
+s
 ∈
-U TheißteineUmgebungsbasisvonx,wennjedeoffeneUmgebungvonxeineTeilmenge
+I.
+Satz 3.1 (Satz von Seifert und van Kampen „light“)
+Sei X ein topologischer Raum, U,V
 ⊆
-von U enthält.
-Satz 3.7
-Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
-Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
-Dann gibt es eine universelle Überlagerung.
-Beweis: Seix X undX˜ := (x,[γ]) x X,γ Weg von x nach x undp : X˜ X,(x,[γ])
-0 o
-∈ { | ∈ } → (cid:55)→
-x.
-Die Topologie auf X˜ ist folgende: Definiere eine Umgebungsbasis von (x,[γ]) wie folgt: Es
-sei U eine einfach zusammenhängende Umgebung von x und
-U˜ = U˜(x,[γ]) := (y,[γ α]) y U,α Weg in U von x nach y
-{ ∗ | ∈ }
-p ist Überlagerung: p : U˜ U bijektiv. p ist stetig und damit p ein Homöomorphismus.
-|U˜ |U˜
-→
-Sind γ ,γ Wege von x nach x und γ γ , so ist U˜(x,[γ ]) U˜(x,[γ ]) = , denn: Ist
-1 2 0 1 2 1 2
-∼ ∩ ∅
-γ α γ α, so ist auch γ γ . Also ist p eine Überlagerung.
-1 2 1 2
-∗ ∼ ∗ ∼
-X˜ ist einfach zusammenhängend: Es sei x˜ := (x ,e) und γ˜ : I X˜ ein geschlossener Weg
-0 0
-→
-um x˜.
-0
-Sei γ := p(γ˜).
-Annahme: [γ˜] = e
-(cid:54)
-Mit Bemerkung 55.a folgt dann: [γ] = e.
-(cid:54)
-Dann ist der Lift von γ nach x˜ mit Anfangspunkt x˜ ein Weg von x˜ nach (x ,[γ]). Wider-
-0 0 0
-spruch.
-60 3.3.ÜBERLAGERUNGEN
-Definition 54
-Es sei p : Y X eine Überlagerung und f : Y Y ein Homöomorphismus.
-→ →
-a) f heißt Decktransformation von p : p f = p.
-⇔ ◦
-b) Die Decktransformationen von p : Y X bilden mit der Verkettung eine Gruppe,
-→
-die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y/X) oder
-Deck(Y X).
+X offen mit U
+∪
+V = X und U
+∩
+V wegzusam-
+menhängend.
+Dann wird π
+1
+(X,x) für x
+∈
+U
+∩
+V erzeugt von geschlossenen Wegen um x, die ganz in
+U oder ganz in V verlaufen.
+52 3.3.ÜBERLAGERUNGEN
+Beweis: Sei γ : I
 →
-c) p heißt regulär, wenn Deck(Y/X) = degp gilt.
-| |
-Bemerkung 57 (Eigenschaften der Decktransformation)
-a) (DeckY/X, ) ist eine Gruppe
-◦
-b) Ist f Deck(Y/X) und f = id, dann hat f keinen Fixpunkt.
-∈ (cid:54)
-c) Deck(Y/X) degp
-| | ≤
-d) Ist f eine reguläre Überlagerung, dann gilt: x X : Deck(Y/X) operiert transitiv
-∀ ∈
-auf der Menge der Urbilder f−1(x).
-Beweis:
-a) Es gilt:
-id DeckY/X,
-Y
-• ∈
-f,g DeckY/X p (f g) = (p f) g = p g f g DeckY/X
-• ∈ ⇒ ◦ ◦ ◦ ◦ ◦ ⇒ ◦ ∈
-f DeckY/X p f = p p f−1 = (p f) f−1 = p (f f−1) = p
-• ∈ ⇒ ◦ ⇒ ◦ ◦ ◦ ◦ ◦ ⇒
-f−1 DeckY/X
+X ein geschlossener Weg um x. Überdecke I mit endlich vielen offenen
+Intervallen I
+1
+,I
+2
+,...,I
+n
+, die ganz in γ−1(U) oder ganz in γ−1(V) liegen.
+O. B. d. A. sei γ(I
+1
+)
+⊆
+U,γ(I
+2
+)
+⊆
+V, etc.
+Wähle t
+i
 ∈
-b) Die Menge
-Fix(f) = y Y f(y) = y
-{ ∈ | }
-ist abgeschlossen als Urbild der Diagonale ∆ Y Y unter der stetigen Abbildung
-⊆ ×
-y (f(y),y). Außerdem ist Fix(f) offen, denn ist y Fix(f), so sei U eine Umgebung
-(cid:55)→ ∈
-von p(y) X wie in Definition 48 und U p−1(U) die Komponente, die y enthält;
-∈ ⊆
-also p : V U ein Homöomorphismus. Dann ist W := f−1(V) V offene Umgebung
-→ ∩
-von y.
-Für z W ist f(z) V und p(f(z)) = p(z). Da p injektiv auf V ist, folgt f(z) = z,
-∈ ∈
-d. h. Fix(f) = .
-(cid:54) ∅
-Da Y zusammenhängend ist, folgt aus Fix(f˜) = schon Fix(f) = Y, also f = id .
-Y
-(cid:54) ∅
-c) Es sei x X, deg(p) = d und p−1(x ) = y ,...,y . Für f Deck(Y/X) ist
-0 0 0 d−1
-∈ { } ∈
-f(y ) = y ,...,y .
-0 0 d−1
-{ }
-Zu i 0,...,d 1 gibt es höchstens ein f Deck(Y/X) mit f(y ) = y , denn ist
-0 1
-∈ { − } ∈
-f(y ) = g(y ), so ist (g−1 f)(y ) = y , also nach Bemerkung 57.c g−1 f = id .
-0 0 0 0 Y
-◦ ◦
-d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
-Beispiel 39 (Decktransformationen)
-1) p : R S1 : Deck(R/S1) = t t+n n Z = Z
-∼
-→ { (cid:55)→ | ∈ }
-2) p : R2 T2 : Deck(R2/T2) = Z Z = Z2
+I
+i
+∩
+I
+i+1
+, also γ(t
+i
+)
+∈
+U
+∩
+V. Sei σ
+i
+Weg in U
+∩
+V von x
+0
+nach γ(t
+i
+)
+⇒
+γ ist
+homotop zu
+γ
+1
+∗
+σ
+1
+(cid:124) (cid:123)(cid:122) (cid:125)
+inU
+∗
+σ
+1
+∗
+γ
+2
+∗
+σ
+2
+(cid:124) (cid:123)(cid:122) (cid:125)
+inV
+∗···∗
+σ
+n−1
+∗
+γ
+2
+mit γ
+i
+:= γ
+|
+Ii
+a b
+x
+Abbildung 3.8: Topologischer Raum X
+Beispiel 35 (Satz von Seifert und van Kampen)
+1) Sei X wie in Abbildung 3.8. π
+1
+(X,x) wird „frei“ erzeugt von a und b, weil π
+1
+(U,x) =
+(cid:104)
+a
+(cid:105)
 ∼
-→ ×
-3) p : Sn n(R) : Deck(Sn/ n(R)) = x x = Z/2Z
+= Z,π
+1
+(V,x) =
+(cid:104)
+b
+(cid:105)
 ∼
-→ P P { (cid:55)→ ± }
-61 3.3.ÜBERLAGERUNGEN
-Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamen-
-talgruppe herstellen:
-Satz 3.8
-Ist p : X˜ X eine universelle Überlagerung, so gilt:
-→
-Deck(X˜/X) = π (X,x ) x X
-∼ 1 0 0
-∀ ∈
-Beweis: Wählex˜ p−1(x ).Esseiρ : Deck(x˜/x) π (X,x )dieAbbildung,dief auf[p(γ )]
-0 0 1 0 f
-∈ →
-abbildet, wobei γ ein Weg von x˜ nach f(x˜) sei. Da x˜ einfach zusammenhängend ist, ist
-f 0 0
-γ bis auf Homotopie eindeutig bestimmt und damit auch ρ wohldefiniert.
-f
-ρ ist Gruppenhomomorphismus: Seien f,g Deck(X˜/X) γ = γ g(γ )
-g◦f g f
-• ∈ ⇒ ∗ ⇒
-p(γ ) = p(γ ) (p g)(γ ) = ρ(g) = ρ(f)
-g◦f g f
-∗ ◦ (cid:54)
-(cid:124) (cid:123)(cid:122) (cid:125)
-=p
-Satz 3.2 Bem. 57.c
-ρ ist injektiv: ρ(f) = e p(γ ) γ ==== γ γ f(x ) = x˜ ====== f =
-f x0 f x˜0 0 0
-• ⇒ ∼ ⇒ ∼ ⇒ ⇒
-id .
-x˜
-ρ ist surjektiv: Sei [γ] π (X,x ), γ˜ Lift von γ nach x˜ mit Anfangspunkt x˜. Der
-1 0 0
-• ∈
-Endpunkt von γ˜ sei x˜.
-1
-p ist reguläre Überlagerung: Seien x˜,x˜ X˜ mit p(x˜) = p(x˜). Nach Satz 3.5 gibt
-0 1 0 1
-∈
-es genau eine Überlagerung p˜: X˜ X mit p = p p˜und p˜(x˜) = x˜. Somit ist p˜eine
-0 1
-→ ◦
-Decktransformation und damit p eine reguläre Überlagerung.
-Da p reguläre Überlagerung ist, gibt es ein f Deck(X˜/X) mit f(x˜) = x˜.
-0 1
-∈
-Aus der Definition von ρ folgt: ρ(f) = p(γ ) = γ
-f
-(cid:4)
-Beispiel 40 (Bestimmung von π (S1))
+= Z, insbesondere ist a
+∗
+b nicht homotop zu b
+∗
+a.
+2) Torus: π
 1
-p : R S1, t (cos2πt,sin2πt) ist universelle Überlagerung, da R zusammenhängend ist.
-→ (cid:55)→
-Für n Z sei f : R R,t t+n die Translation um n.
-n
-∈ → (cid:55)→
-Es gilt: (p f )(t) = p(f (t)) = p(t) t R, d. h. f ist Decktransformation.
-n n n
-◦ ∀ ∈
-Ist umgekehrt g irgendeine Decktransformation, so gilt insbesondere für t = 0:
-(cos(2πg(0)),sin(2πg(0))) = (p g)(0) = p(0) = (1,0)
-◦
-Es existiert n Z mit g(0) = n. Da auch f (0) = 0+n = n gilt, folgt mit Bemerkung 57.c
-n
+(T2,X) wird erzeugt von a und b.
+V
+U
+a
+b
+V
+a b
+Abbildung 3.9: a
+∗
+b = b
+∗
+a
+⇔
+a
+∗
+b
+∗
+a
+∗
+b
+∼
+e
+3.3 Überlagerungen
+Definition 48
+Es seien X,Y zusammenhängende topologische Räume und p : Y
+→
+X eine stetige Abbil-
+dung.
+p heißt Überlagerung, wenn jedes x
 ∈
-g = f . Damit folgt:
-n
-Deck(R/S1) = f n n Z ∼= Z
-{ | ∈ }
-Nach Satz 3.8 also π (S1) = Deck(R/S1) = Z
-1 ∼ ∼
-62 3.4.GRUPPENOPERATIONEN
-3.4 Gruppenoperationen
-Definition 55
-Sei (G, ) eine Gruppe und X eine Menge.
-·
-Eine Gruppenoperation von G auf X ist eine Abbildung : G X X für die gilt:
-◦ × →
-a) 1 x = x x X
-G
-◦ ∀ ∈
-b) (g h) x = g (h x) g,h G x X
-· ◦ ◦ ◦ ∀ ∈ ∀ ∈
-Beispiel 41
-1) G = (Z,+),X = R,n x = x+n
-◦
-2) G operiert auf X = G durch g h := g h
-◦ ·
-3) G operiert auf X = G durch g h := g h g−1, denn
-◦ · ·
-i) 1 h = 1 h 1−1 = h
-G ◦ G · · G
-ii) (g g ) h = (g g ) h (g g )−1
-1 2 1 2 2
-· ◦ · · · ·
-= g (g h g−1) g−1
-1 · 2 · · 2 · 1
-= g (g h)
-1 2
-◦ ◦
-Definition 56
-Sei G eine Gruppe, X ein topologischer Raum und : G X X eine Gruppenoperation.
-◦ × →
-a) G operiert durch Homöomorphismen, wenn für jedes g G die Abbildung
+X eine offene Umgebung U = U(x)
+⊆
+X besitzt,
+sodass p−1(U) disjunkte Vereinigung von offenen Teilmengen V
+j
+⊆
+Y ist (j
 ∈
-m : X X,x g x
-g
-→ (cid:55)→ ◦
-ein Homöomorphismus ist.
-b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation stetig, wenn
-◦
-g G : m ist stetig
-g
-∀ ∈
-gilt.
-Bemerkung 58
-Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
-Beweis: Nach Voraussetzung ist m := : X X,x g x stetig.
-g {g}×X
-◦| → (cid:55)→ ◦
-Die Umkehrabbildung zu m ist m :
-g g−1
-(m m )(x) = m (m (x))
-g−1 g g−1 g
-◦
-= m (g x)
-g−1
-◦
-= g−1 (g x)
-◦ ◦
-Def. =55 (. gb−1
-g) x
-· ◦
-= 1 x
-G
-◦
-Def. 55.a
-= x
-Beispiel 42
-In Beispiel 41.1 operiert Z durch Homöomorphismen.
-63 3.4.GRUPPENOPERATIONEN
-Bemerkung 59
-Sei G eine Gruppe und X eine Menge.
-a) DieGruppenoperationvonGaufX entsprechenbijektivdenGruppenhomomorphismen
-(cid:37) : G Perm(X) = Sym(X) = f : X X f ist bijektiv
-→ { → | }
-b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
-Homöomorphismus den Gruppenhomomorphismen G Homöo(X)
+I) und
+p
+|
+Vj
+: V
+j
 →
-Beweis:
-Sei : G X X eine Gruppenoperation von G auf X. Dann sei (cid:37) : G Perm(X)
-◦ × → →
-definiert durch (cid:37)(g)(X) = g x g G,x X, also (cid:37)(g) = m .
-g
-· ∀ ∈ ∈
-(cid:37) ist Homomorphismus: (cid:37)(g g ) = m = m m = (cid:37)(g ) (cid:37)(g ), denn für x X :
-1 2 g1·g2 g1 g2 1 2
-· ◦ ◦ ∈
-(cid:37)(g g )(x) = (g g ) x = g (g x) = (cid:37)(g )((cid:37)(g )(x)) = ((cid:37)(g ) (cid:37)(g ))(x)
-1 2 1 2 1 2 1 2 1 2
-· · ◦ ◦ ◦ ◦
-Umgekehrt: Sei (cid:37) : G Perm(X) Gruppenhomomorphismus. Definiere : G X X
-→ ◦ × →
-durch g x = (cid:37)(g)(x).
-◦
-z. z. Definition 55.b:
-g (g x) = (cid:37)(g )(g x)
-1 2 1 2
-◦ ◦ ◦
-= (cid:37)(g )((cid:37)(g )(x))
-1 2
-= ((cid:37)(g ) (cid:37)(g ))(x)
-1 2
-◦
-(cid:37)istHom.
-= (cid:37)(g g )(x)
-1 2
-·
-= (g g ) x
-1 2
-· ◦
-z. z. Definition 55.a: 1 x = (cid:37)(1 )(x) = id (x) = x, weil (cid:37) ein Homomorphismus ist.
-G G X
-·
-Beispiel 43
-Sei X ein wegzusammenhängender topologischer Raum, p : X˜ X eine universelle Überla-
+U ein Homöomorphismus ist.
+|
+I
+|
+heißt Grad der Überlagerung p und man schreibt:
+degp :=
+|
+I
+|
+53 3.3.ÜBERLAGERUNGEN
+Abbildung 3.10: R
 →
-gerung, x X, x˜ X˜ mit p(x˜) = x .
-0 0 0 0
-∈ ∈
-Dann operiert π (X,x ) auf X˜ durch Homöomorphismen wie folgt:
-1 0
-Für [γ] π (X,x ) und x˜ X˜ sei [γ] x˜ = γ˜(cid:37)(1) wobei γ˜ ein Weg von x˜ nach x˜ in X˜
-1 0 0
-sei, (cid:37) :=∈ p(δ˜) = p δ. ∈ ◦ ∗
-◦
-(cid:93)
-Also: δ ist ein Weg in X von x nach x = p(x˜) und γ δ die Liftung von γ δ mit
-0
-∗ ∗
-Anfangspunkt x˜.
-0
-[γ] x˜ hängt nicht von der Wahl von γ˜ ab; ist γ˜(cid:48) ein anderer Weg von x˜ nach x˜, so sind δ˜
+S1,
+t
+(cid:55)→
+(cos2πt,sin2πt)
+Beispiel 36
+1) siehe Abbildung 3.10
+2) siehe Abbildung 3.11
+3) Rn
+→
+Tn = Rn/Zn
+4) Sn
+→ P
+n(R)
+5) S1
+→
+S1, z
+(cid:55)→
+z2, siehe Abbildung 3.12
+0 1 2 3 4 5 6
 0
-und· δ˜(cid:48) homotop, also auch γ(cid:93) δ und γ(cid:93) δ(cid:48) homotop.
-∗ ∗
-Gruppenoperation, denn:
-i) [e] x˜ = e(cid:103)δ = x˜
-◦ ∗
-(cid:94)
-ii) γ γ δ(1) = [γ γ ] x˜ = ([γ ] [γ ]) x˜
-1 2 1 2 1 2
-∗ ∗ ]∗ ˜◦ [γ∗ ([γ◦
-γ γ δ(1) = [γ (γ δ)(1) = ] ] x˜)
-1 2 1 2 1 2
-∗ ∗ ◦ ∗ ◦ ◦
-Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π (X,x )
-1 0
-aus Beispiel 43 einen Gruppenhomomorphismus (cid:37) : π (X,x ) Homöo(X). Nach Satz 3.8 ist
-1 0
+1
+2
+3
+4
+5
+6
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+−−−→
+Abbildung 3.11: R2
 →
-(cid:37)(π (X,x )) = Deck(X˜/X)
-1 0
-(cid:110) (cid:12) (cid:111)
-= f : X˜ X˜ Homöomorphismus (cid:12) p f = p
-(cid:12)
-→ ◦
-64 3.4.GRUPPENOPERATIONEN
-Beispiel 44
-Sei X := S2 R3 und τ die Drehung um die z-Achse um 180◦.
+T2 = R2/Z2
+Bemerkung 50
+Überlagerungen sind surjektiv.
+Beweis: Sei p : Y
+→
+X eine Überlagerung und x
+∈
+X beliebig. Dann existiert eine offene
+Umgebung U(x)
 ⊆
-g = τ = id,τ operiert auf S2 durch Homöomorphismen.
-(cid:104) (cid:105) { }
-Frage: Was ist S2/G? Ist S2/G eine Mannigfaltigkeit?
-4 Euklidische und nichteuklidische
-Geometrie
-Definition 57
-Das Tripel (X,d,G) heißt genau dann eine Geometrie, wenn (X,d) ein metrischer Raum
-und = G (X) gilt. Dann heißt G die Menge aller Geraden.
-∅ (cid:54) ⊆ P
-4.1 Axiome für die euklidische Ebene
-Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
-nennt man Axiomensystem. Da der Begriff des Axiomensystems so grundlegend ist, hat man
-auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
-sein, die Axiome sollen möglichst unabhängig sein und Vollständigkeit wäre auch toll. Mit
-Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
-auf den ersten Blick eine einfache Eigenschaft zu sein. Auf den zweiten Blick muss man jedoch
-einsehen, dass das Parallelenproblem, also die Frage ob das Parallelenaxiom unabhängig von
-den restlichen Axiomen ist, über 2000 Jahre nicht gelöst wurde. Ein ganz anderes Kaliber ist
-die Frage nach der Vollständigkeit. Ein Axiomensystem gilt als Vollständig, wenn jede Aussage
-innerhalb des Systems verifizierbar oder falsifizierbar ist. Interessant ist hierbei der Gödelsche
-Unvollständigkeitssatz, der z. B. für die Arithmetik beweist, dass nicht alle Aussagen formal
-bewiesen oder widerlegt werden können.
-Kehren wir nun jedoch zurück zur Geometrie. Euklid hat in seiner Abhandlung „Die Elemente“
-ein Axiomensystem für die Geometrie aufgestellt.
-Euklids Axiome
-Strecke zwischen je zwei Punkten
-•
-Jede Strecke bestimmt genau eine Gerade
-•
-Kreis (um jeden Punkt mit jedem Radius)
-•
-Je zwei rechte Winkel sind gleich (Isometrie, Bewegung)
-•
-Parallelenaxiom von Euklid:
-•
-Wird eine Gerade so von zwei Geraden geschnitten, dass die Summe der Innenwinkel
-kleiner als zwei Rechte ist, dann schneiden sich diese Geraden auf der Seite dieser Winkel.
-Man mache sich klar, dass das nur dann nicht der Fall ist, wenn beide Geraden par-
-allel sind und senkrecht auf die erste stehen.
-Definition 58
-Eine euklidische Ebene ist eine Geometrie (X,d,G), die Axiome §1 - §5 erfüllt:
-§1) Inzidenzaxiome:
-66 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-(i) Zu P = Q X gibt es genau ein g G mit P,Q g.
-(cid:54) ∈ ∈ { } ⊆
-(ii) g 2 g G
-| | ≥ ∀ ∈
-(iii) X / G
-∈
-§2) Abstandsaxiom: Zu P,Q,R X gibt es genau dann ein g G mit P,Q,R g,
-∈ ∈ { } ⊆
-wenn gilt:
-d(P,R) = d(P,Q)+d(Q,R) oder
-•
-d(P,Q) = d(P,R)+d(R,Q) oder
-•
-d(Q,R) = d(Q,P)+d(P,R)
-•
-Definition 59
-Sei (X,d,G) eine Geometrie und seien P,Q,R X.
+X und offene Teilmengen V
+j
+⊆
+X mit p−1(U) = ˙ (cid:83) V
+j
+und p
+|
+Vj
+: V
+j
+→
+U
+ist Homöomorphismus.
+D. h. es existiert ein y
 ∈
-a) P,Q,R liegen kollinear, wenn es g G gibt mit P,Q,R g.
-∈ { } ⊆
-b) Q liegt zwischen P und R, wenn d(P,R) = d(P,Q)+d(Q,R)
-c) Strecke PR := Q X Q liegt zwischen P und R
-{ ∈ | }
-d) Halbgeraden:
-PR+ := Q X Q liegt zwischen P und R oder
-{ ∈ |
-R liegt zwischen P und Q
+V
+j
+, so dass p
+|
+Vj
+(y) = x. Da x
+∈
+X beliebig war und ein y
+∈
+Y
+existiert, mit p(y) = x, ist p surjektiv. (cid:4)
+54 3.3.ÜBERLAGERUNGEN
+1
+i
+z
+z2
+ϕ
+ϕ
+z2
+Abbildung 3.12: t
+(cid:55)→
+(cos4πt,sin4πt)
+Definition 49
+Seien (X,T
+X
+),(Y,T
+Y
+) topologische Räume und f : X
+→
+Y eine Abbildung.
+f heißt offen :
+⇔ ∀
+U
+∈
+T
+X
+: f(U)
+∈
+T
+Y
+.
+Beispiel 37 (Offene und stetige Abbildungen)
+Sei X ein topologischer Raum und seien f
+i
+: R
+→
+R mit i
+∈ {
+1,2,3
 }
-PR− := Q X P liegt zwischen Q und R
-{ ∈ | }
-P R
-PR−
-PR
-PR+
-Abbildung 4.1: Halbgeraden
-Bemerkung 60
-a) PR+ PR− = PR
-∪
-b) PR+ PR− = P
-∩ { }
-Beweis:
-a) „ “ folgt direkt aus der Definition von PR+ und PR−
-⊆
-„ “: Sei Q PR P,Q,R sind kollinear.
-⊇ ∈ ⇒
-Q liegt zwischen P und R Q PR
-
- ⇒ ∈
-2
-R liegt zwischen P und Q Q PR
-⇒  ⇒ ∈
-P liegt zwischen Q und R Q PR
-⇒ ∈
-b) „ “ ist offensichtlich
-⊇
-„ “: Sei PR+ PR−. Dann ist d(Q,R) = d(P,Q)+d(P,R) weil Q PR− und
-⊆ ∩ ∈
-(cid:26) (cid:27)
-d(P,R) = d(P,Q)+d(Q,R) oder
-d(P,Q) = d(P,R)+d(R,Q)
-67 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-d(Q,R) = 2d(P,Q)+d(Q,R)
-⇒
-d(P,Q) = 0
-⇒
-P = Q
-⇒
-d(P,Q) = 2d(P,R)+d(P,Q)
-P = R
-⇒
-Widerspruch
-⇒
-Definition 60
-§3) Anordnungsaxiome
-(i) Zu jeder Halbgerade H mit Anfangspunkt P X und jedem r R gibt es
-≥0
-∈ ∈
-genau ein Q H mit d(P,Q) = r.
-∈
-(ii) Jede Gerade zerlegt X g = H ˙ H in zwei nichtleere Teilmengen H ,H , sodass
-1 2 1 2
-\ ∪
-für alle A H , B H mit i,j 1,2 gilt: AB g = i = j.
-i j
-∈ ∈ ∈ { } ∩ (cid:54) ∅ ⇔ (cid:54)
-Diese Teilmengen H heißen Halbebenen bzgl. g.
-i
-§4) Bewegungsaxiom: Zu P,Q,P(cid:48),Q(cid:48) X mit d(P,Q) = d(P(cid:48),Q(cid:48)) gibt es mindestens
-∈
-2 Isometrien ϕ ,ϕ mit ϕ (P) = P(cid:48) und ϕ (Q) = Q(cid:48) mit i = 1,2.1
-1 2 i i
-§5) Parallelenaxiom: Zu jeder Geraden g G und jedem Punkt P X g gibt es
-∈ ∈ \
-höchstens ein h G mit P h und h g = . h heißt Parallele zu g durch P.
-∈ ∈ ∩ ∅
-Satz 4.1 (Satz von Pasch)
-Seien P, Q, R nicht kollinear, g G mit g P,Q,R = und g PQ = .
-∈ ∩{ } ∅ ∩ (cid:54) ∅
-Dann ist entweder g PR = oder g QR = .
-∩ (cid:54) ∅ ∩ (cid:54) ∅
-Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
-schneiden, auch eine weitere Seite schneiden.
-Beweis: g PQ =
-∩ (cid:54) ∅
-3(ii)
-P und Q liegen in verschiedenen Halbebenen bzgl. g
-⇒
-o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
-⇒
-g RP =
-⇒ ∩ (cid:54) ∅
-Bemerkung 61
-Sei P,Q X mit P = Q sowie A,B X PQ mit A = B. Außerdem seien A und B in der
-∈ (cid:54) ∈ \ (cid:54)
-selben Halbebene bzgl. PQ sowie Q und B in der selben Halbebene bzgl. PA.
-Dann gilt: PB+ AQ =
-∩ (cid:54) ∅
-Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
-eines konvexen Vierecks schneiden sich.
-Beweis: Sei P(cid:48) PQ−,P(cid:48) = P =S =at =z =4.1 PB schneidet AP(cid:48) AQ
-∈ (cid:54) ⇒ ∪
-Sei C der Schnittpunkt. Dann gilt:
-1Die„Verschiebung“ vonP(cid:48)Q(cid:48) nachPQunddieIsometrie,diezusätzlichanderGeradedurchP undQspiegelt.
-68 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-A B
+und g : R
+→
+S1 =
+{
+z
+∈
 C
+| (cid:107)
+z
+(cid:107)
+= 1
+}
+Abbildungen.
+1) f
+1
+:= idR ist eine offene und stetige Abbildung.
+2) g(x) := e2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
+3) f
+2
+(x) := 42 ist eine stetige, aber keine offene Abbildung.
+4) f
+3
+(x) :=
+(cid:40)
+0 falls x
+∈
 Q
-P(cid:48)
-P
-Abbildung 4.2: Situation aus Bemerkung 61
-(i) C PB+, denn A und B liegen in derselben Halbebene bzgl. PQ = P(cid:48)Q, also auch
+42 falls x
 ∈
-AP(cid:48) und AQ.
-(ii) C liegt in derselben Halbebene bzgl. PA wie B, weil das für Q gilt.
-AP(cid:48) liegt in der anderen Halbebene bzgl. PA C / P(cid:48)A C AQ
-⇒ ∈ ⇒ ∈
-Da C PB+ und C AQ folgt nun direkt: = C PB+ AQ (cid:4)
-∈ ∈ ∅ (cid:54) { } ⊆ ∩
-Bemerkung 62
-SeienP,Q X mitP = QundA,B X PQinderselbenHalbebenebzgl.PQ.Außerdem
-∈ (cid:54) ∈ \
-sei d(A,P) = d(B,P) und d(A,Q) = d(B,Q).
-Dann ist A = B.
-B
-A
+R
+\
 Q
-P
-Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
-Intuitiv weiß man, dass daraus folgt, dass A = B gilt.
-Beweis: durch Widerspruch
-Annahme: A = B
-(cid:54)
-Dann ist B / (PA QA) wegen §2.
-∈ ∪
-1. Fall: Q und B liegen in derselben Halbebene bzgl. PA
-=B =e =m =. =61 PB+ AQ = .
-⇒ ∩ (cid:54) ∅
-Sei C der Schnittpunkt vom PB und AQ.
-Dann gilt:
-Vor.
-(i) d(A,C)+d(C,Q) = d(A,Q) = d(B,Q) < d(B,C)+d(C,Q) d(A,C) < d(B,C)
-⇒
-69 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-B A
-A
-C Q
-B
-P
-P Q
-(a) 1. Fall (b) 2. Fall
-Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
-(ii) a) B liegt zwischen P und C.
-d(P,A) + d(A,C) > d(P,C) = d(P,B) + d(B,C) = d(P,A) + d(B,C)
-⇒
-d(A,C) > d(B,C) Widerspruch zu Punkt (i)
-⇒
-b) C liegt zwischen P und B
-d(P,C)+d(C,A) > d(P,A) = d(P,B) = d(P,C)+d(C,B)
-d(C,A) > d(C,B)
+ist weder stetig noch offen.
+Bemerkung 51
+Überlagerungen sind offene Abbildungen.
+Beweis: Sei y
+∈
+V und x
+∈
+p(V), sodass x = p(y) gilt. Sei weiter U = U
+x
+eine offene Umgebung
+von x wie in Definition 48 und V
+j
+die Komponente von p−1(U), die y enthält.
+Dann ist V
+∩
+V
+j
+offene Umgebung von y.
 ⇒
-Widerspruch zu Punkt (i)
+p(V
+∩
+V
+j
+) ist offen in p(V
+j
+), also auch offen in X. Außerdem ist p(y) = x
+∈
+p(V
+∩
+V
+j
+) und
+p(V
+∩
+V
+j
+)
+⊆
+p(V).
 ⇒
-2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. PA.
-Dann liegen A und Q in derselben Halbebene bzgl. PB.
-Tausche A und B Fall 1 (cid:4)
+p(V) ist offen.
+Definition 50
+Sei X ein topologischer Raum und M
+⊆
+X.
+M heißt diskret in X, wenn M in X keinen Häufungspunkt hat.
+Bemerkung 52
+Sei p : Y
+→
+X Überlagerung, x
+∈
+X.
+a) X hausdorffsch
 ⇒
-Bemerkung 63
-Sei (X,d,G) eine Geometrie, die §1 - §3 erfüllt, P,Q X mit P = Q und ϕ eine Isometrie
-∈ (cid:54)
-mit ϕ(P) = P und ϕ(Q) = Q.
-Dann gilt ϕ(S) = S S PQ.
-∀ ∈
+Y hausdorffsch
+b) p−1(x) ist diskret in Y für jedes x
+∈
+X.
 Beweis:
+a) Seien y
+1
+,y
 2
-O. B. d. A. sei S PQ d(P,Q) = d(P,S)+d(S,Q)
-∈ ⇔
-ϕ∈Iso(X)
-d(ϕ(P),ϕ(Q)) = d(ϕ(P),ϕ(S))+d(ϕ(S),ϕ(Q))
-⇒
-P,Q∈Fix(ϕ)
-d(P,Q) = d(P,ϕ(S))+d(ϕ(S),Q)
-⇒
-ϕ(S) liegt zwischen P und Q
-⇒
-d(P,S) = d(ϕ(P),ϕ(S)) = d(P,ϕ(S))
-⇒
-3(i)
-ϕ(S) = S
-⇒
-(cid:4)
-Proposition 4.2
-In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P,P(cid:48),Q,Q(cid:48) mit d(P,Q) = d(P(cid:48),Q(cid:48))
-höchstens zwei Isometrien mit ϕ(P) = P(cid:48) und ϕ(Q) = Q(cid:48)
-70 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit
-ϕ (P) = P(cid:48) und ϕ (Q) = Q(cid:48) gibt.
-i i
-Beweis: Seien ϕ ,ϕ ,ϕ Isometrien mit ϕ (P) = P(cid:48), ϕ (Q) = Q(cid:48) mit i = 1,2,3.
-1 2 3 i i
-Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
-(Teil i) R X PQ mit ϕ (R) = ϕ (R).
-1 2
-∃ ∈ \
-(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id .
-X
-Aus (Teil i) und (Teil ii) folgt, dass ϕ−1 ϕ = id , also ϕ = ϕ , da P, Q und R in diesem
-2 ◦ 1 X 2 1
-Fall Fixpunkte sind.
-Nun zu den Beweisen der Teilaussagen:
-(Teil i) Sei R X PQ. Von den drei Punkten ϕ (R),ϕ (R),ϕ (R) liegen zwei in der selben
-1 2 3
-∈ \
-Halbebene bzgl. P(cid:48)Q(cid:48) = ϕ (PQ).
-i
-O. B. d. A. seien ϕ (R) und ϕ (R) in der selben Halbebene.
-1 2
-Es gilt: d(P(cid:48),ϕ (R)) = d(ϕ (P),ϕ (R))
-1 1 1
-= d(P,R)
-= d(ϕ (P),ϕ (R))
-2 2
-= d(P(cid:48),ϕ (R))
+∈
+Y.
+1. Fall: p(y
+1
+) = p(y
 2
-und analog d(Q(cid:48),ϕ (R)) = d(Q(cid:48),ϕ (R))
-1 2
-(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R / PQ und A / PQ PR QR. Sei B
-∈ ∈ ∪ ∪ ∈
-PQ P,Q . Dann ist ϕ(B) = B wegen Bemerkung 63.
-\{ }
-Bem. 63
-Ist R AB, so enthält AB 2 Fixpunkte von ϕ ===== ϕ(A) = A.
-∈ ⇒
-A
-R
-C
-P B Q
-Abbildung 4.5: P,Q,R sind Fixpunkte, B PQ P,Q , A / PQ PR QR
-∈ \{ } ∈ ∪ ∪
-Ist R / AB, so ist AB PR = oder AB RQ = nach Satz 4.1. Der Schnittpunkt
-∈ ∩ (cid:54) ∅ ∈ (cid:54) ∅
-C ist dann Fixpunkt von ϕ(cid:48) nach Bemerkung 63 ϕ(A) = A.
-⇒
-Bemerkung 64 (SWS-Kongruenzsatz)
-Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem ABC und A(cid:48)B(cid:48)C(cid:48)
-(cid:52) (cid:52)
-Dreiecke, für die gilt:
-(i) d(A,B) = d(A(cid:48),B(cid:48))
-(ii) ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48)
-∼
-71 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-(iii) d(A,C) = d(A(cid:48),C(cid:48))
-Dann ist ABC kongruent zu A(cid:48)B(cid:48)C(cid:48) .
-(cid:52) (cid:52)
-Beweis: Sei ϕ die Isometrie mit ϕ(A(cid:48)) = A, ϕ(A(cid:48)C(cid:48)+) = AC+ und ϕ(A(cid:48)B(cid:48)+) = AB+. Diese
-Isometrie existiert wegen Punkt §4.
-C ϕ(A(cid:48)C(cid:48)+) und B ϕ(A(cid:48)B(cid:48)+).
-⇒ ∈ ∈
-d(A(cid:48),C(cid:48)) = d(ϕ(A(cid:48)),ϕ(C(cid:48))) = d(A,ϕ(C(cid:48))) =3 =(i) ϕ(C(cid:48)) = C
+) = x.
+55 3.3.ÜBERLAGERUNGEN
+Sei U Umgebung von x wie in Definition 48, V
+j1
+bzw. V
+j2
+die Komponente von p−1(U),
+die y
+1
+bzw. y
+2
+enthält.
+Dann ist V
+j1
+(cid:54)
+= V
+j2
+, weil beide ein Element aus p−1(x) enthalten.
 ⇒
-d(A(cid:48),B(cid:48)) = d(ϕ(A(cid:48)),ϕ(B(cid:48))) = d(A,ϕ(B(cid:48))) =3 =(i) ϕ(B(cid:48)) = B
+V
+j1
+∩
+V
+j2
+=
+∅
+nach Voraussetzung.
+2. Fall: p(y
+1
+)
+(cid:54)
+= p(y
+2
+).
+Dann seien U
+1
+und U
+2
+disjunkte Umgebungen von p(y
+1
+) und p(y
+2
+).
 ⇒
-Also gilt insbesondere ϕ( A(cid:48)B(cid:48)C(cid:48)) = ABC. (cid:4)
-(cid:52) (cid:52)
-Bemerkung 65 (WSW-Kongruenzsatz)
-Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem ABC und A(cid:48)B(cid:48)C(cid:48)
-(cid:52) (cid:52)
-Dreiecke, für die gilt:
-(i) d(A,B) = d(A(cid:48),B(cid:48))
-(ii) ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48)
-∼
-(iii) ∠ABC = ∠A(cid:48)B(cid:48)C(cid:48)
-∼
-Dann ist ABC kongruent zu A(cid:48)B(cid:48)C(cid:48) .
-(cid:52) (cid:52)
-Beweis: Sei ϕ die Isometrie mit ϕ(A(cid:48)) = A, ϕ(B(cid:48)) = B und ϕ(C(cid:48)) liegt in der selben Halbebene
-bzgl. AB wie C. Diese Isometrie existiert wegen §4.
-Aus ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48) = ∠ϕ(C(cid:48))ϕ(A(cid:48))ϕ(B(cid:48)) = ∠ϕ(C(cid:48))AB folgt, dass ϕ(C(cid:48)) AC+.
-∈
-Analog folgt aus ∠ABC = ∠A(cid:48)B(cid:48)C(cid:48) = ∠ϕ(A(cid:48))ϕ(B(cid:48))ϕ(C(cid:48)) = ∠ABϕ(C(cid:48)), dass ϕ(C(cid:48))
+p−1(U
+1
+) und p−1(U
+2
+) sind disjunkte Umgebungen von y
+1
+und y
+2
+.
+b) Sei x
 ∈
-BC+.
-Dann gilt ϕ(C(cid:48)) AC BC = C ϕ(C(cid:48)) = C.
-∈ ∩ { } ⇒
-Es gilt also ϕ( A(cid:48)B(cid:48)C(cid:48)) = ABC. (cid:4)
-(cid:52) (cid:52)
-Definition 61
-a) Ein Winkel ist ein Punkt P X zusammen mit 2 Halbgeraden mit Anfangspunkt P.
+X beliebig, aber fest.
+Zu zeigen:
+∀
+y
+i
 ∈
-Man schreibt: ∠R PR bzw. ∠R PR 2
-1 2 2 1
-b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
+p−1(x) :
+∃
+V
+i
+∈
+T
+Y
+mit y
+i
+∈
+V
+i
+, sodass gilt:i
+(cid:54)
+= j
+⇒
+V
+i
+∩
+V
+j
+=
+∅
+.
+Die V
+i
+existieren wegen der Definition einer Überlagerung: p heißt Überlagerung
+:
+⇔ ∀
+x
+∈
+X
+∃
+U = U(x)
+∈
+T
+X
+: p−1(U) = ˙ (cid:83)
+Vi∈TY
+V
+i
+und p
+| Vi
+ist Homöomorphismus.
+⇒
+(p
+|
+Vi
+)−1(x) =
+{
+y
+i
+}
+⇒
+Alle y
+i
+liegen diskret in Y, da Häufungspunkte unendlich viele Elemente in jeder
+Umgebung benötigen. (cid:4)
+Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
+Sei p : Y
+→
+X Überlagerung. Dann gilt:
+∀
+x
+1
+,x
+2
+∈
+X :
+|
+p−1(x
+1
+)
+|
+=
+|
+p−1(x
+2
+)
+|
+Hinweis:
+|
+p−1(x
+1
+)
+|
+=
+∞
+ist erlaubt!
+Beweis: Sei U Umgebung von x
+1
+wie in Definition 48, x
+∈
+U. Dann enthält jedes V
+j
+mit j
+∈
+I
+genau ein Element von p−1(x).
+⇒ |
+p−1(x)
+|
+ist konstant für x
+∈
+U
+X zhgd. ====
+⇒ |
+p−1(x)
+|
+ist konstant für x
+∈
+X.
+Definition 51
+Es seien X,Y,Z topologische Räume, p : Y
+→
+X eine Überlagerung und f : Z
+→
+X stetig.
+Eine stetige Abbildung ˜ f : Z
+→
+Y heißt Liftung von f, wenn p
+◦
+˜ f = f ist.
+Y
+X
+Z
+p
+˜ f
+f
+Bemerkung 54 (Eindeutigkeit der Liftung)
+Sei Z zusammenhängend und f
+0
+,f
+1
+: Z
+→
+Y Liftungen von f.
+∃
+z
+0
+∈
+Z : f
+0
+(z
+0
+) = f
+1
+(z
+0
+)
+⇒
+f
+0
+= f
+1
+Beweis: Sei T =
+{
+z
+∈
+Z
+|
+f
+0
+(z) = f
+1
+(z)
+}
+.
+Z. z.: T ist offen und Z
+\
+T ist auch offen.
+56 3.3.ÜBERLAGERUNGEN
+0 1 2 3 4 5 6
+0
+1
+2
+3
+4
+5
+6
+T Liften
+−−−→
+R2/Z2
+Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht ge-
+schlossen
+Sei z
+∈
+T,x = f(z),U Umgebung von x wie in Definition 48, V die Komponente von p−1(U),
+die y := f
+0
+(z) = f
+1
+(z) enthält.
+Sei q : U
+→
+V die Umkehrabbildung zu p
+|
+V
+.
+Sei W := f−1(U)
+∩
+f−1
+0
+(V)
+∩
+f−1
+1
+(V). W ist offene Umgebung in Z von z.
+Behauptung: W
+⊆
+T
+Denn für w
+∈
+W ist q(f(w)) = q((p
+◦
+f
+0
+))(w) = ((q
+◦
+p)
+◦
+f
+0
+)(w) = f
+0
+(w) = q(f(w)) = f
+1
+(w)
+⇒
+T ist offen.
+Analog: Z
+\
+T ist offen.
+Satz 3.2
+Sei p : Y
+→
+X Überlagerung, γ : I
+→
+X ein Weg, y
+∈
+Y mit p(y) = γ(0) =: x.
+Dann gibt es genau einen Weg ˜ γ : I
+→
+Y mit ˜ γ(0) = y und p
+◦
+˜ γ = γ.
+p : Y
+→
+X Überlagerung, X,Y wegzusammenhängend. p stetig und surjektiv, zu x
+∈
+X
+∃ Umgebung U, so dass p−1(U) = (cid:83) V
+j
+p
+|
+Vj
+: V
+j
+→
+U Homöomorphismus.
+Bemerkung 55
+Wege in X lassen sich zu Wegen in Y liften.
+Zu jedem y
+∈
+p−1(γ(0)) gibt es genau einen Lift von γ.
+57 3.3.ÜBERLAGERUNGEN
+Proposition 3.3
+Seien p : Y
+→
+X eine Überlagerung, a,b
+∈
+X, γ
+0
+,γ
+1
+: I
+→
+X homotope Wege von a
+nach b, ˜ a
+∈
+p−1(a), ˜ γ
+0
+, ˜ γ
+1
+Liftungen von γ
+0
+bzw. γ
+1
+mit ˜ γ
+i
+(0) = ˜ a.
+Dann ist ˜ γ
+0
+(1) = ˜ γ
+1
+(1) und ˜ γ
+0
+∼
+˜ γ
+1
+.
+Beweis: Sei H : I
+×
+I
+→
+X Homotopie zwischen γ
+1
+und γ
+2
+.
+Für s
+∈
+I sei γ
+s
+: I
+→
+X, t
+(cid:55)→
+H(t,s).
+Sei ˜ γ
+s
+Lift von γ
+s
+mit ˜ γ
+s
+(0) = ˜ a
+Sei ˜ H : I
+×
+I
+→
+Y, ˜ H(t,s) := (˜ γ
+s
+(t),s)
+Dann gilt:
+(i) ˜ H ist stetig (Beweis wie für Bemerkung 54)
+(ii) ˜ H(t,0) = ˜ γ
+0
+(t), ˜ H(t,1) = ˜ γ
+1
+(t)
+(iii) ˜ H(0,s) = ˜ γ
+s
+(0) = ˜ a
+(iv) ˜ H(1,s)
+∈
+p−1(b)
+Da p−1(b) diskrete Teilmenge von Y ist
+⇒
+˜ b
+s
+= ˜ H(1,s) = ˜ H(1,0)
+∀
+s
+∈
+I
+⇒
+˜ b
+0
+= ˜ b
+1
+und ˜ H ist Homotopie zwischen ˜ γ
+0
+und ˜ γ
+1
+. (cid:4)
+Folgerung 3.4
+Sei p : Y
+→
+X eine Überlagerung, x
+0
+∈
+X,y
+0
+∈
+p−1(x
+0
+)
+a) p
+∗
+: π
+1
+(Y,y
+0
+)
+→
+π
+1
+(X,x
+0
+) ist injektiv
+b) [π
+1
+(X,x
+0
+) : p
+∗
+(π
+1
+(Y,y
+0
+))] = deg(p)
+Beweis:
+a) Sei ˜ γ ein Weg in Y um y
+0
+und p
+∗
+([˜ γ]) = e, also p
+◦
+˜ γ
+∼
+γ
+x0
+Nach Proposition 3.3 ist dann ˜ γ homotop zum Lift des konstanten Wegs γ
+x0
+mit
+Anfangspunkt y
+0
+, also zu γ
+y0
+⇒
+[˜ γ] = e
+b) Sei d = degp und p−1(x
+0
+) =
+{
+y
+0
+,y
+1
+,...,y
+d−1
+}
+. Für einen geschlossenen Weg γ in X
+um x
+0
+sei ˜ γ die Liftung mit ˜ γ(0) = y
+0
+.
+˜ γ(1)
+∈ {
+y
+0
+,...,y
+d−1
+}
+hängt nur von [γ]
+∈
+π
+1
+(X,x
+0
+) ab.
+Für geschlossene Wege γ
+0
+,γ
+1
+um x gilt:
+˜ γ
+0
+(1) = ˜ γ
+1
+(1)
+⇔
+[˜ γ
+0
+∗
+˜ γ
+1
+−1]
+∈
+π
+1
+(Y,y
+0
+)
+⇔
+[γ
+0 ∗
+γ−1
+1
+]
+∈
+p
+∗
+(π
+1
+(Y,y
+0
+))
+⇔
+[γ
+0
+] und [γ
+1
+]liegen in der selben Nebenklasse bzgl. p
+∗
+(π
+1
+(Y,y
+0
+))
+58 3.3.ÜBERLAGERUNGEN
+Zu i
+∈ {
+0,...,d
+−
+1
+}
+gibt es Weg δ
+i
+in Y mit δ
+i
+(0) = y
+0
+und δ
+i
+(1) = y
+i
+⇒
+p
+∪
+δ
+i
+ist geschlossener Weg in X um x
+0
+.
+⇒
+Jedes y
+i
+mit i = 0,...,d
+−
+1 ist ˜ γ(1) für ein [γ]
+∈
+π
+1
+(X,x
+0
+).
+Bemerkung 56
+Sei p : Y
+→
+X Überlagerung und X einfach zusammenhängend.
+Dann ist p ein Homöomorphismus.
+Beweis: Wegen Bemerkung 55.a ist auch Y einfach zusammenhängend und wegen Bemer-
+kung 55.b ist deg(p) = 1, p ist also bijektiv.
+Nach Bemerkung 51 ist p offen
+⇒
+p−1 ist stetig.
+⇒
+p ist Homöomorphismus. (cid:4)
+Definition 52
+Eine Überlagerung p : ˜ X
+→
+X heißt universell, wenn ˜ X einfach zusammenhängend ist.
+Beispiel 38 (Universelle Überlagerungen)
+R
+→
+S1, t
+(cid:55)→
+(cos2πt,sin2πt)
+R2
+→
+T2 = R2/Z2
+Sn
+→ P
+n(R) für n
+≥
+2
+Satz 3.5
+Sei p : ˜ X
+→
+X eine universelle Überlagerung, q : Y
+→
+X weitere Überlagerung.
+Sei x
+0
+∈
+X, ˜ x
+0
+∈
+˜ X,y
+0
+∈
+Y mit q(y
+0
+) = x
+0
+= p(˜ x
+0
+).
+Dann gibt es genau eine Überlagerung ˜ p : ˜ X
+→
+Y mit ˜ p(˜ x
+0
+) = y
+0
+.
+Beweis: Sei z
+∈
+˜ X,γ
+z
+: I
+→
+˜ X ein Weg von ˜ x
+0
+nach z.
+Sei δ
+z
+die eindeutige Liftung von p
+◦
+γ
+z
+nach Y mit δ
+z
+(0) = y
+0
+.
+Setze ˜ p(z) = δ
+z
+(1).
+Da ˜ X einfach zusammenhängend ist, hängt ˜ p(z) nicht vom gewählten Weg γ
+z
+ab.
+Offensichtlich ist q(˜ p(z)) = p(z).
+Zu zeigen: ˜ p ist stetig in z
+∈
+˜ X:
+Sei W
+⊆
+Y offene Umgebung von ˜ p(z).
+q offen
+====
+⇒
+q(W) ist offene Umgebung von p(z)
+·
+d(˜ p(z)).
+Sei U
+⊆
+q(W) offen wie in Definition 48 und V
+⊆
+q−1(U) die Komponente, die ˜ p(z) enthält.
+O. B. d. A. sei V
+⊆
+W.
+Sei Z := p−1(U). Für u
+∈
+Z sei δ ein Weg in Z von z nach u.
+⇒
+γ
+z
+∗
+δ ist Weg von x
+0
+nach u
+⇒
+˜ p(u)
+∈
+V
+⇒
+Z
+⊆
+˜ p−1(W)
+⇒
+˜ p ist stetig
+59 3.3.ÜBERLAGERUNGEN
+Folgerung 3.6
+Sind p : ˜ X
+→
+X und q : ˜ Y
+→
+X universelle Überlagerungen, so sind ˜ X und ˜ Y homöomorph.
+Beweis: Seien x
+0
+∈
+X, ˜ x
+0
+∈
+˜ X mit p(˜ x
+0
+) = x
+0
+und ˜ y
+0
+∈
+q−1(x
+0
+)
+⊆
+˜ Y.
+Nach Satz 3.5 gibt es genau eine Überlagerung
+f : ˜ X
+→
+˜ Y mit f(x
+0
+) = ˜ y
+0
+und q
+◦
+f = p
+und genau eine Überlagerung
+g : ˜ Y
+→
+˜ X mit g(˜ y
+0
+) = ˜ x
+0
+und p
+◦
+g = q
+Damit gilt: p
+◦
+q
+◦
+f = q
+◦
+f = p, q
+◦
+f
+◦
+g = p
+◦
+g = q. Also ist g
+◦
+f : ˜ X
+→
+˜ X Lift von
+p : ˜ X
+→
+X mit (g
+◦
+f)(˜ x
+0
+) = ˜ x
+0
+.
+Da auch id
+˜ x
+diese Eigenschaft hat, folgt mit Bemerkung 53: g
+◦
+f = id
+˜ X
+.
+Analog gilt f
+◦
+g = id
+˜ Y
+. (cid:4)
+Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
+Definition 53
+Sei (X,T) ein topologischer Raum und x
+∈
+X.
+U
+⊆
+TheißteineUmgebungsbasisvonx,wennjedeoffeneUmgebungvonxeineTeilmenge
+von U enthält.
+Satz 3.7
+Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
+Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
+Dann gibt es eine universelle Überlagerung.
+Beweis: Seix
+0
+∈
+X und ˜ X :=
+{
+(x,[γ])
+|
+x
+∈
+X,γ Weg von x
+o
+nach x
+}
+undp : ˜ X
+→
+X,(x,[γ])
+(cid:55)→
+x.
+Die Topologie auf ˜ X ist folgende: Definiere eine Umgebungsbasis von (x,[γ]) wie folgt: Es
+sei U eine einfach zusammenhängende Umgebung von x und
+˜ U = ˜ U(x,[γ]) :=
+{
+(y,[γ
+∗
+α])
+|
+y
+∈
+U,α Weg in U von x nach y
+}
+p ist Überlagerung: p
+| ˜ U
+: ˜ U
+→
+U bijektiv. p ist stetig und damit p
+| ˜ U
+ein Homöomorphismus.
+Sind γ
+1
+,γ
+2
+Wege von x
+0
+nach x und γ
+1
+∼
+γ
+2
+, so ist ˜ U(x,[γ
+1
+])
+∩
+˜ U(x,[γ
+2
+]) =
+∅
+, denn: Ist
+γ
+1
+∗
+α
+∼
+γ
+2
+∗
+α, so ist auch γ
+1
+∼
+γ
+2
+. Also ist p eine Überlagerung.
+˜ X ist einfach zusammenhängend: Es sei ˜ x
+0
+:= (x
+0
+,e) und ˜ γ : I
+→
+˜ X ein geschlossener Weg
+um ˜ x
+0
+.
+Sei γ := p(˜ γ).
+Annahme: [˜ γ]
+(cid:54)
+= e
+Mit Bemerkung 55.a folgt dann: [γ]
+(cid:54)
+= e.
+Dann ist der Lift von γ nach ˜ x mit Anfangspunkt ˜ x
+0
+ein Weg von ˜ x
+0
+nach (x
+0
+,[γ]). Wider-
+spruch.
+60 3.3.ÜBERLAGERUNGEN
+Definition 54
+Es sei p : Y
+→
+X eine Überlagerung und f : Y
+→
+Y ein Homöomorphismus.
+a) f heißt Decktransformation von p :
+⇔
+p
+◦
+f = p.
+b) Die Decktransformationen von p : Y
+→
+X bilden mit der Verkettung eine Gruppe,
+die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y/X) oder
+Deck(Y
+→
+X).
+c) p heißt regulär, wenn
+|
+Deck(Y/X)
+|
+= degp gilt.
+Bemerkung 57 (Eigenschaften der Decktransformation)
+a) (DeckY/X,
+◦
+) ist eine Gruppe
+b) Ist f
+∈
+Deck(Y/X) und f
+(cid:54)
+= id, dann hat f keinen Fixpunkt.
+c)
+|
+Deck(Y/X)
+| ≤
+degp
+d) Ist f eine reguläre Überlagerung, dann gilt:
+∀
+x
+∈
+X : Deck(Y/X) operiert transitiv
+auf der Menge der Urbilder f−1(x).
+Beweis:
+a) Es gilt:
+•
+id
+Y
+∈
+DeckY/X,
+•
+f,g
+∈
+DeckY/X
+⇒
+p
+◦
+(f
+◦
+g) = (p
+◦
+f)
+◦
+g = p
+◦
+g
+⇒
+f
+◦
+g
+∈
+DeckY/X
+•
+f
+∈
+DeckY/X
+⇒
+p
+◦
+f = p
+⇒
+p
+◦
+f−1 = (p
+◦
+f)
+◦
+f−1 = p
+◦
+(f
+◦
+f−1) = p
+⇒
+f−1
+∈
+DeckY/X
+b) Die Menge
+Fix(f) =
+{
+y
+∈
+Y
+|
+f(y) = y
+}
+ist abgeschlossen als Urbild der Diagonale ∆
+⊆
+Y
+×
+Y unter der stetigen Abbildung
+y
+(cid:55)→
+(f(y),y). Außerdem ist Fix(f) offen, denn ist y
+∈
+Fix(f), so sei U eine Umgebung
+von p(y)
+∈
+X wie in Definition 48 und U
+⊆
+p−1(U) die Komponente, die y enthält;
+also p : V
+→
+U ein Homöomorphismus. Dann ist W := f−1(V)
+∩
+V offene Umgebung
+von y.
+Für z
+∈
+W ist f(z)
+∈
+V und p(f(z)) = p(z). Da p injektiv auf V ist, folgt f(z) = z,
+d. h. Fix(f)
+(cid:54)
+=
+∅
+.
+Da Y zusammenhängend ist, folgt aus Fix(˜ f)
+(cid:54)
+=
+∅
+schon Fix(f) = Y, also f = id
+Y
+.
+c) Es sei x
+0
+∈
+X, deg(p) = d und p−1(x
+0
+) =
+{
+y
+0
+,...,y
+d−1
+}
+. Für f
+∈
+Deck(Y/X) ist
+f(y
+0
+) =
+{
+y
+0
+,...,y
+d−1
+}
+.
+Zu i
+∈ {
+0,...,d
+−
+1
+}
+gibt es höchstens ein f
+∈
+Deck(Y/X) mit f(y
+0
+) = y
+1
+, denn ist
+f(y
+0
+) = g(y
+0
+), so ist (g−1
+◦
+f)(y
+0
+) = y
+0
+, also nach Bemerkung 57.c g−1
+◦
+f = id
+Y
+.
+d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
+Beispiel 39 (Decktransformationen)
+1) p : R
+→
+S1 : Deck(R/S1) =
+{
+t
+(cid:55)→
+t+n
+|
+n
+∈
+Z
+}
+∼
+= Z
+2) p : R2
+→
+T2 : Deck(R2/T2)
+∼
+= Z
+×
+Z = Z2
+3) p : Sn
+→ P
+n(R) : Deck(Sn/
+P
+n(R)) =
+{
+x
+(cid:55)→ ±
+x
+}
+∼
+= Z/2Z
+61 3.3.ÜBERLAGERUNGEN
+Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamen-
+talgruppe herstellen:
+Satz 3.8
+Ist p : ˜ X
+→
+X eine universelle Überlagerung, so gilt:
+Deck( ˜ X/X)
+∼
+= π
+1
+(X,x
+0
+)
+∀
+x
+0
+∈
+X
+Beweis: Wähle ˜ x
+0
+∈
+p−1(x
+0
+).Esseiρ : Deck(˜ x/x)
+→
+π
+1
+(X,x
+0
+)dieAbbildung,dief auf[p(γ
+f
+)]
+abbildet, wobei γ
+f
+ein Weg von ˜ x
+0
+nach f(˜ x
+0
+) sei. Da ˜ x einfach zusammenhängend ist, ist
+γ
+f
+bis auf Homotopie eindeutig bestimmt und damit auch ρ wohldefiniert.
+•
+ρ ist Gruppenhomomorphismus: Seien f,g
+∈
+Deck( ˜ X/X)
+⇒
+γ
+g◦f
+= γ
+g
+∗
+g(γ
+f
+)
+⇒
+p(γ
+g◦f
+) = p(γ
+g
+)
+∗
+(p
+◦
+g)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=p
+(γ
+f
+) = ρ(g)
+(cid:54)
+= ρ(f)
+•
+ρ ist injektiv: ρ(f) = e
+⇒
+p(γ
+f
+)
+∼
+γ
+x0
+Satz 3.2
+====
+⇒
+γ
+f
+∼
+γ
+˜ x0
+⇒
+f(x
+0
+) = ˜ x
+0
+Bem. 57.c
+======
+⇒
+f =
+id
+˜ x
+.
+•
+ρ ist surjektiv: Sei [γ]
+∈
+π
+1
+(X,x
+0
+), ˜ γ Lift von γ nach ˜ x mit Anfangspunkt ˜ x
+0
+. Der
+Endpunkt von ˜ γ sei ˜ x
+1
+.
+p ist reguläre Überlagerung: Seien ˜ x
+0
+, ˜ x
+1
+∈
+˜ X mit p(˜ x
+0
+) = p(˜ x
+1
+). Nach Satz 3.5 gibt
+es genau eine Überlagerung ˜ p : ˜ X
+→
+X mit p = p
+◦
+˜ p und ˜ p(˜ x
+0
+) = ˜ x
+1
+. Somit ist ˜ p eine
+Decktransformation und damit p eine reguläre Überlagerung.
+Da p reguläre Überlagerung ist, gibt es ein f
+∈
+Deck( ˜ X/X) mit f(˜ x
+0
+) = ˜ x
+1
+.
+Aus der Definition von ρ folgt: ρ(f) = p(γ
+f
+) = γ
+(cid:4)
+Beispiel 40 (Bestimmung von π
+1
+(S1))
+p : R
+→
+S1, t
+(cid:55)→
+(cos2πt,sin2πt) ist universelle Überlagerung, da R zusammenhängend ist.
+Für n
+∈
+Z sei f
+n
+: R
+→
+R,t
+(cid:55)→
+t+n die Translation um n.
+Es gilt: (p
+◦
+f
+n
+)(t) = p(f
+n
+(t)) = p(t)
+∀
+t
+∈
+R, d. h. f
+n
+ist Decktransformation.
+Ist umgekehrt g irgendeine Decktransformation, so gilt insbesondere für t = 0:
+(cos(2πg(0)),sin(2πg(0))) = (p
+◦
+g)(0) = p(0) = (1,0)
+Es existiert n
+∈
+Z mit g(0) = n. Da auch f
+n
+(0) = 0+n = n gilt, folgt mit Bemerkung 57.c
+g = f
+n
+. Damit folgt:
+Deck(R/S1) =
+{
+f n
+|
+n
+∈
+Z
+}
+∼ = Z
+Nach Satz 3.8 also π
+1
+(S1)
+∼
+= Deck(R/S1)
+∼
+= Z
+62 3.4.GRUPPENOPERATIONEN
+3.4 Gruppenoperationen
+Definition 55
+Sei (G,
+·
+) eine Gruppe und X eine Menge.
+Eine Gruppenoperation von G auf X ist eine Abbildung
+◦
+: G
+×
+X
+→
+X für die gilt:
+a) 1
+G
+◦
+x = x
+∀
+x
+∈
+X
+b) (g
+·
+h)
+◦
+x = g
+◦
+(h
+◦
+x)
+∀
+g,h
+∈
+G
+∀
+x
+∈
+X
+Beispiel 41
+1) G = (Z,+),X = R,n
+◦
+x = x+n
+2) G operiert auf X = G durch g
+◦
+h := g
+·
+h
+3) G operiert auf X = G durch g
+◦
+h := g
+·
+h
+·
+g−1, denn
+i) 1
+G ◦
+h = 1
+G ·
+h
+·
+1−1
+G
+= h
+ii) (g
+1
+·
+g
+2
+)
+◦
+h = (g
+1
+·
+g
+2
+)
+·
+h
+·
+(g
+·
+g
+2
+)−1
+= g
+1 ·
+(g
+2 ·
+h
+·
+g−1
+2
+)
+·
+g−1
+1
+= g
+1
+◦
+(g
+2
+◦
+h)
+Definition 56
+Sei G eine Gruppe, X ein topologischer Raum und
+◦
+: G
+×
+X
+→
+X eine Gruppenoperation.
+a) G operiert durch Homöomorphismen, wenn für jedes g
+∈
+G die Abbildung
+m
+g
+: X
+→
+X,x
+(cid:55)→
+g
+◦
+x
+ein Homöomorphismus ist.
+b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation
+◦
+stetig, wenn
+∀
+g
+∈
+G : m
+g
+ist stetig
+gilt.
+Bemerkung 58
+Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
+Beweis: Nach Voraussetzung ist m
+g
+:=
+◦|
+{g}×X
+: X
+→
+X,x
+(cid:55)→
+g
+◦
+x stetig.
+Die Umkehrabbildung zu m
+g
+ist m
+g−1
+:
+(m
+g−1
+◦
+m
+g
+)(x) = m
+g−1
+(m
+g
+(x))
+= m
+g−1
+(g
+◦
+x)
+= g−1
+◦
+(g
+◦
+x)
+Def. 55.b = (g−1
+·
+g)
+◦
+x
+= 1
+G
+◦
+x
+Def. 55.a
+= x
+Beispiel 42
+In Beispiel 41.1 operiert Z durch Homöomorphismen.
+63 3.4.GRUPPENOPERATIONEN
+Bemerkung 59
+Sei G eine Gruppe und X eine Menge.
+a) DieGruppenoperationvonGaufX entsprechenbijektivdenGruppenhomomorphismen
+(cid:37) : G
+→
+Perm(X) = Sym(X) =
+{
+f : X
+→
+X
+|
+f ist bijektiv
+}
+b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
+Homöomorphismus den Gruppenhomomorphismen G
+→
+Homöo(X)
+Beweis:
+Sei
+◦
+: G
+×
+X
+→
+X eine Gruppenoperation von G auf X. Dann sei (cid:37) : G
+→
+Perm(X)
+definiert durch (cid:37)(g)(X) = g
+·
+x
+∀
+g
+∈
+G,x
+∈
+X, also (cid:37)(g) = m
+g
+.
+(cid:37) ist Homomorphismus: (cid:37)(g
+1
+·
+g
+2
+) = m
+g1·g2
+= m
+g1
+◦
+m
+g2
+= (cid:37)(g
+1
+)
+◦
+(cid:37)(g
+2
+), denn für x
+∈
+X :
+(cid:37)(g
+1
+·
+g
+2
+)(x) = (g
+1
+·
+g
+2
+)
+◦
+x = g
+1
+◦
+(g
+2
+◦
+x) = (cid:37)(g
+1
+)((cid:37)(g
+2
+)(x)) = ((cid:37)(g
+1
+)
+◦
+(cid:37)(g
+2
+))(x)
+Umgekehrt: Sei (cid:37) : G
+→
+Perm(X) Gruppenhomomorphismus. Definiere
+◦
+: G
+×
+X
+→
+X
+durch g
+◦
+x = (cid:37)(g)(x).
+z. z. Definition 55.b:
+g
+1
+◦
+(g
+2
+◦
+x) = (cid:37)(g
+1
+)(g
+2
+◦
+x)
+= (cid:37)(g
+1
+)((cid:37)(g
+2
+)(x))
+= ((cid:37)(g
+1
+)
+◦
+(cid:37)(g
+2
+))(x)
+(cid:37)istHom.
+= (cid:37)(g
+1
+·
+g
+2
+)(x)
+= (g
+1
+·
+g
+2
+)
+◦
+x
+z. z. Definition 55.a: 1
+G
+·
+x = (cid:37)(1
+G
+)(x) = id
+X
+(x) = x, weil (cid:37) ein Homomorphismus ist.
+Beispiel 43
+Sei X ein wegzusammenhängender topologischer Raum, p : ˜ X
+→
+X eine universelle Überla-
+gerung, x
+0
+∈
+X, ˜ x
+0
+∈
+˜ X mit p(˜ x
+0
+) = x
+0
+.
+Dann operiert π
+1
+(X,x
+0
+) auf ˜ X durch Homöomorphismen wie folgt:
+Für [γ]
+∈
+π
+1
+(X,x
+0
+) und ˜ x
+∈
+˜ X sei [γ]
+◦
+˜ x = ˜ γ
+∗
+(cid:37)(1) wobei ˜ γ ein Weg von ˜ x
+0
+nach ˜ x in ˜ X
+sei, (cid:37) := p(˜ δ) = p
+◦
+δ.
+Also: δ ist ein Weg in X von x
+0
+nach x = p(˜ x) und
+(cid:93)
+γ
+∗
+δ die Liftung von γ
+∗
+δ mit
+Anfangspunkt ˜ x
+0
+.
+[γ]
+·
+˜ x hängt nicht von der Wahl von ˜ γ ab; ist ˜ γ(cid:48) ein anderer Weg von ˜ x
+0
+nach ˜ x, so sind ˜ δ
+und ˜ δ(cid:48) homotop, also auch (cid:93) γ
+∗
+δ und (cid:93) γ
+∗
+δ(cid:48) homotop.
+Gruppenoperation, denn:
+i) [e]
+◦
+˜ x = (cid:103) e
+∗
+δ = ˜ x
+ii)
+(cid:94)
+γ
+1
+∗
+γ
+2
+∗
+δ(1) = [γ
+1
+∗
+γ
+2
+]
+◦
+˜ x = ([γ
+1
+]
+∗
+[γ
+2
+])
+◦
+˜ x
+γ
+1
+∗
+γ
+2
+∗
+δ(1) = [γ
+1
+]
+◦
+( ˜ γ
+2
+∗
+δ)(1) = [γ
+1
+]
+◦
+([γ
+2
+]
+◦
+˜ x)
+Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π
+1
+(X,x
+0
+)
+aus Beispiel 43 einen Gruppenhomomorphismus (cid:37) : π
+1
+(X,x
+0
+)
+→
+Homöo(X). Nach Satz 3.8 ist
+(cid:37)(π
+1
+(X,x
+0
+)) = Deck( ˜ X/X)
+=
+(cid:110)
+f : ˜ X
+→
+˜ X Homöomorphismus
+(cid:12)
+(cid:12)
+(cid:12)
+p
+◦
+f = p
+(cid:111)
+64 3.4.GRUPPENOPERATIONEN
+Beispiel 44
+Sei X := S2
+⊆
+R3 und τ die Drehung um die z-Achse um 180◦.
+g =
+(cid:104)
+τ
+(cid:105)
+=
+{
+id,τ
+}
+operiert auf S2 durch Homöomorphismen.
+Frage: Was ist S2/G? Ist S2/G eine Mannigfaltigkeit?
+4 Euklidische und nichteuklidische
+Geometrie
+Definition 57
+Das Tripel (X,d,G) heißt genau dann eine Geometrie, wenn (X,d) ein metrischer Raum
+und
+∅ (cid:54)
+= G
+⊆ P
+(X) gilt. Dann heißt G die Menge aller Geraden.
+4.1 Axiome für die euklidische Ebene
+Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
+nennt man Axiomensystem. Da der Begriff des Axiomensystems so grundlegend ist, hat man
+auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
+sein, die Axiome sollen möglichst unabhängig sein und Vollständigkeit wäre auch toll. Mit
+Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
+auf den ersten Blick eine einfache Eigenschaft zu sein. Auf den zweiten Blick muss man jedoch
+einsehen, dass das Parallelenproblem, also die Frage ob das Parallelenaxiom unabhängig von
+den restlichen Axiomen ist, über 2000 Jahre nicht gelöst wurde. Ein ganz anderes Kaliber ist
+die Frage nach der Vollständigkeit. Ein Axiomensystem gilt als Vollständig, wenn jede Aussage
+innerhalb des Systems verifizierbar oder falsifizierbar ist. Interessant ist hierbei der Gödelsche
+Unvollständigkeitssatz, der z. B. für die Arithmetik beweist, dass nicht alle Aussagen formal
+bewiesen oder widerlegt werden können.
+Kehren wir nun jedoch zurück zur Geometrie. Euklid hat in seiner Abhandlung „Die Elemente“
+ein Axiomensystem für die Geometrie aufgestellt.
+Euklids Axiome
+•
+Strecke zwischen je zwei Punkten
+•
+Jede Strecke bestimmt genau eine Gerade
+•
+Kreis (um jeden Punkt mit jedem Radius)
+•
+Je zwei rechte Winkel sind gleich (Isometrie, Bewegung)
+•
+Parallelenaxiom von Euklid:
+Wird eine Gerade so von zwei Geraden geschnitten, dass die Summe der Innenwinkel
+kleiner als zwei Rechte ist, dann schneiden sich diese Geraden auf der Seite dieser Winkel.
+Man mache sich klar, dass das nur dann nicht der Fall ist, wenn beide Geraden par-
+allel sind und senkrecht auf die erste stehen.
+Definition 58
+Eine euklidische Ebene ist eine Geometrie (X,d,G), die Axiome §1 - §5 erfüllt:
+§1) Inzidenzaxiome:
+66 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+(i) Zu P
+(cid:54)
+= Q
+∈
+X gibt es genau ein g
+∈
+G mit
+{
+P,Q
+} ⊆
+g.
+(ii)
+|
+g
+| ≥
+2
+∀
+g
+∈
+G
+(iii) X /
+∈
+G
+§2) Abstandsaxiom: Zu P,Q,R
+∈
+X gibt es genau dann ein g
+∈
+G mit
+{
+P,Q,R
+} ⊆
+g,
+wenn gilt:
+•
+d(P,R) = d(P,Q)+d(Q,R) oder
+•
+d(P,Q) = d(P,R)+d(R,Q) oder
+•
+d(Q,R) = d(Q,P)+d(P,R)
+Definition 59
+Sei (X,d,G) eine Geometrie und seien P,Q,R
+∈
+X.
+a) P,Q,R liegen kollinear, wenn es g
+∈
+G gibt mit
+{
+P,Q,R
+} ⊆
+g.
+b) Q liegt zwischen P und R, wenn d(P,R) = d(P,Q)+d(Q,R)
+c) Strecke PR :=
+{
+Q
+∈
+X
+|
+Q liegt zwischen P und R
+}
+d) Halbgeraden:
+PR+ :=
+{
+Q
+∈
+X
+|
+Q liegt zwischen P und R oder
+R liegt zwischen P und Q
+}
+PR− :=
+{
+Q
+∈
+X
+|
+P liegt zwischen Q und R
+}
+P R
+PR−
+PR
+PR+
+Abbildung 4.1: Halbgeraden
+Bemerkung 60
+a) PR+
+∪
+PR− = PR
+b) PR+
+∩
+PR− =
+{
+P
+}
+Beweis:
+a) „
+⊆
+“ folgt direkt aus der Definition von PR+ und PR−
+„
+⊇
+“: Sei Q
+∈
+PR
+⇒
+P,Q,R sind kollinear.
+2
+⇒
+
+
+
+
+
+Q liegt zwischen P und R
+⇒
+Q
+∈
+PR
+R liegt zwischen P und Q
+⇒
+Q
+∈
+PR
+P liegt zwischen Q und R
+⇒
+Q
+∈
+PR
+b) „
+⊇
+“ ist offensichtlich
+„
+⊆
+“: Sei PR+
+∩
+PR−. Dann ist d(Q,R) = d(P,Q)+d(P,R) weil Q
+∈
+PR− und
+(cid:26)
+d(P,R) = d(P,Q)+d(Q,R) oder
+d(P,Q) = d(P,R)+d(R,Q)
+(cid:27)
+67 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+⇒
+d(Q,R) = 2d(P,Q)+d(Q,R)
+⇒
+d(P,Q) = 0
+⇒
+P = Q
+d(P,Q) = 2d(P,R)+d(P,Q)
+⇒
+P = R
+⇒
+Widerspruch
+Definition 60
+§3) Anordnungsaxiome
+(i) Zu jeder Halbgerade H mit Anfangspunkt P
+∈
+X und jedem r
+∈
+R
+≥0
+gibt es
+genau ein Q
+∈
+H mit d(P,Q) = r.
+(ii) Jede Gerade zerlegt X
+\
+g = H
+1
+˙
+∪
+H
+2
+in zwei nichtleere Teilmengen H
+1
+,H
+2
+, sodass
+für alle A
+∈
+H
+i
+, B
+∈
+H
+j
+mit i,j
+∈ {
+1,2
+}
+gilt: AB
+∩
+g
+(cid:54)
+=
+∅ ⇔
+i
+(cid:54)
+= j.
+Diese Teilmengen H
+i
+heißen Halbebenen bzgl. g.
+§4) Bewegungsaxiom: Zu P,Q,P(cid:48),Q(cid:48)
+∈
+X mit d(P,Q) = d(P(cid:48),Q(cid:48)) gibt es mindestens
+2 Isometrien ϕ
+1
+,ϕ
+2
+mit ϕ
+i
+(P) = P(cid:48) und ϕ
+i
+(Q) = Q(cid:48) mit i = 1,2.1
+§5) Parallelenaxiom: Zu jeder Geraden g
+∈
+G und jedem Punkt P
+∈
+X
+\
+g gibt es
+höchstens ein h
+∈
+G mit P
+∈
+h und h
+∩
+g =
+∅
+. h heißt Parallele zu g durch P.
+Satz 4.1 (Satz von Pasch)
+Seien P, Q, R nicht kollinear, g
+∈
+G mit g
+∩{
+P,Q,R
+}
+=
+∅
+und g
+∩
+PQ
+(cid:54)
+=
+∅
+.
+Dann ist entweder g
+∩
+PR
+(cid:54)
+=
+∅
+oder g
+∩
+QR
+(cid:54)
+=
+∅
+.
+Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
+schneiden, auch eine weitere Seite schneiden.
+Beweis: g
+∩
+PQ
+(cid:54)
+=
+∅
+3(ii)
+⇒
+P und Q liegen in verschiedenen Halbebenen bzgl. g
+⇒
+o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
+⇒
+g
+∩
+RP
+(cid:54)
+=
+∅
+Bemerkung 61
+Sei P,Q
+∈
+X mit P
+(cid:54)
+= Q sowie A,B
+∈
+X
+\
+PQ mit A
+(cid:54)
+= B. Außerdem seien A und B in der
+selben Halbebene bzgl. PQ sowie Q und B in der selben Halbebene bzgl. PA.
+Dann gilt: PB+
+∩
+AQ
+(cid:54)
+=
+∅
+Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
+eines konvexen Vierecks schneiden sich.
+Beweis: Sei P(cid:48)
+∈
+PQ−,P(cid:48)
+(cid:54)
+= P Satz 4.1 ====
+⇒
+PB schneidet AP(cid:48)
+∪
+AQ
+Sei C der Schnittpunkt. Dann gilt:
+1Die„Verschiebung“ vonP(cid:48)Q(cid:48) nachPQunddieIsometrie,diezusätzlichanderGeradedurchP undQspiegelt.
+68 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+P
+P(cid:48)
+Q
+A B
+C
+Abbildung 4.2: Situation aus Bemerkung 61
+(i) C
+∈
+PB+, denn A und B liegen in derselben Halbebene bzgl. PQ = P(cid:48)Q, also auch
+AP(cid:48) und AQ.
+(ii) C liegt in derselben Halbebene bzgl. PA wie B, weil das für Q gilt.
+AP(cid:48) liegt in der anderen Halbebene bzgl. PA
+⇒
+C /
+∈
+P(cid:48)A
+⇒
+C
+∈
+AQ
+Da C
+∈
+PB+ und C
+∈
+AQ folgt nun direkt:
+∅ (cid:54)
+=
+{
+C
+} ⊆
+PB+
+∩
+AQ (cid:4)
+Bemerkung 62
+SeienP,Q
+∈
+X mitP
+(cid:54)
+= QundA,B
+∈
+X
+\
+PQinderselbenHalbebenebzgl.PQ.Außerdem
+sei d(A,P) = d(B,P) und d(A,Q) = d(B,Q).
+Dann ist A = B.
+P
+Q
+A
+B
+Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
+Intuitiv weiß man, dass daraus folgt, dass A = B gilt.
+Beweis: durch Widerspruch
+Annahme: A
+(cid:54)
+= B
+Dann ist B /
+∈
+(PA
+∪
+QA) wegen §2.
+1. Fall: Q und B liegen in derselben Halbebene bzgl. PA
+Bem. 61 =====
+⇒
+PB+
+∩
+AQ
+(cid:54)
+=
+∅
+.
+Sei C der Schnittpunkt vom PB und AQ.
+Dann gilt:
+(i) d(A,C)+d(C,Q) = d(A,Q)
+Vor.
+= d(B,Q) < d(B,C)+d(C,Q)
+⇒
+d(A,C) < d(B,C)
+69 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+P Q
+B
+C
+A
+(a) 1. Fall
+P
+Q
+A B
+(b) 2. Fall
+Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
+(ii) a) B liegt zwischen P und C.
+d(P,A) + d(A,C) > d(P,C) = d(P,B) + d(B,C) = d(P,A) + d(B,C)
+⇒
+d(A,C) > d(B,C)
+⇒
+Widerspruch zu Punkt (i)
+b) C liegt zwischen P und B
+d(P,C)+d(C,A) > d(P,A) = d(P,B) = d(P,C)+d(C,B)
+⇒
+d(C,A) > d(C,B)
+⇒
+Widerspruch zu Punkt (i)
+2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. PA.
+Dann liegen A und Q in derselben Halbebene bzgl. PB.
+Tausche A und B
+⇒
+Fall 1 (cid:4)
+Bemerkung 63
+Sei (X,d,G) eine Geometrie, die §1 - §3 erfüllt, P,Q
+∈
+X mit P
+(cid:54)
+= Q und ϕ eine Isometrie
+mit ϕ(P) = P und ϕ(Q) = Q.
+Dann gilt ϕ(S) = S
+∀
+S
+∈
+PQ.
+Beweis:
+O. B. d. A. sei S
+∈
+PQ
+2
+⇔
+d(P,Q) = d(P,S)+d(S,Q)
+ϕ∈Iso(X)
+⇒
+d(ϕ(P),ϕ(Q)) = d(ϕ(P),ϕ(S))+d(ϕ(S),ϕ(Q))
+P,Q∈Fix(ϕ)
+⇒
+d(P,Q) = d(P,ϕ(S))+d(ϕ(S),Q)
+⇒
+ϕ(S) liegt zwischen P und Q
+⇒
+d(P,S) = d(ϕ(P),ϕ(S)) = d(P,ϕ(S))
+3(i)
+⇒
+ϕ(S) = S
+(cid:4)
+Proposition 4.2
+In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P,P(cid:48),Q,Q(cid:48) mit d(P,Q) = d(P(cid:48),Q(cid:48))
+höchstens zwei Isometrien mit ϕ(P) = P(cid:48) und ϕ(Q) = Q(cid:48)
+70 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit
+ϕ
+i
+(P) = P(cid:48) und ϕ
+i
+(Q) = Q(cid:48) gibt.
+Beweis: Seien ϕ
+1
+,ϕ
+2
+,ϕ
+3
+Isometrien mit ϕ
+i
+(P) = P(cid:48), ϕ
+i
+(Q) = Q(cid:48) mit i = 1,2,3.
+Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
+(Teil i)
+∃
+R
+∈
+X
+\
+PQ mit ϕ
+1
+(R) = ϕ
+2
+(R).
+(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id
+X
+.
+Aus (Teil i) und (Teil ii) folgt, dass ϕ−1
+2 ◦
+ϕ
+1
+= id
+X
+, also ϕ
+2
+= ϕ
+1
+, da P, Q und R in diesem
+Fall Fixpunkte sind.
+Nun zu den Beweisen der Teilaussagen:
+(Teil i) Sei R
+∈
+X
+\
+PQ. Von den drei Punkten ϕ
+1
+(R),ϕ
+2
+(R),ϕ
+3
+(R) liegen zwei in der selben
+Halbebene bzgl. P(cid:48)Q(cid:48) = ϕ
+i
+(PQ).
+O. B. d. A. seien ϕ
+1
+(R) und ϕ
+2
+(R) in der selben Halbebene.
+Es gilt: d(P(cid:48),ϕ
+1
+(R)) = d(ϕ
+1
+(P),ϕ
+1
+(R))
+= d(P,R)
+= d(ϕ
+2
+(P),ϕ
+2
+(R))
+= d(P(cid:48),ϕ
+2
+(R))
+und analog d(Q(cid:48),ϕ
+1
+(R)) = d(Q(cid:48),ϕ
+2
+(R))
+(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /
+∈
+PQ und A /
+∈
+PQ
+∪
+PR
+∪
+QR. Sei B
+∈
+PQ
+\{
+P,Q
+}
+. Dann ist ϕ(B) = B wegen Bemerkung 63.
+Ist R
+∈
+AB, so enthält AB 2 Fixpunkte von ϕ
+Bem. 63
+=====
+⇒
+ϕ(A) = A.
+P B Q
+C
+R
+A
+Abbildung 4.5: P,Q,R sind Fixpunkte, B
+∈
+PQ
+\{
+P,Q
+}
+, A /
+∈
+PQ
+∪
+PR
+∪
+QR
+Ist R /
+∈
+AB, so ist AB
+∩
+PR
+(cid:54)
+=
+∅
+oder AB
+∈
+RQ
+(cid:54)
+=
+∅
+nach Satz 4.1. Der Schnittpunkt
+C ist dann Fixpunkt von ϕ(cid:48) nach Bemerkung 63
+⇒
+ϕ(A) = A.
+Bemerkung 64 (SWS-Kongruenzsatz)
+Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem
+(cid:52)
+ABC und
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48)
+Dreiecke, für die gilt:
+(i) d(A,B) = d(A(cid:48),B(cid:48))
+(ii) ∠CAB
+∼
+= ∠C(cid:48)A(cid:48)B(cid:48)
+71 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+(iii) d(A,C) = d(A(cid:48),C(cid:48))
+Dann ist
+(cid:52)
+ABC kongruent zu
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48) .
+Beweis: Sei ϕ die Isometrie mit ϕ(A(cid:48)) = A, ϕ(A(cid:48)C(cid:48)+) = AC+ und ϕ(A(cid:48)B(cid:48)+) = AB+. Diese
+Isometrie existiert wegen Punkt §4.
+⇒
+C
+∈
+ϕ(A(cid:48)C(cid:48)+) und B
+∈
+ϕ(A(cid:48)B(cid:48)+).
+d(A(cid:48),C(cid:48)) = d(ϕ(A(cid:48)),ϕ(C(cid:48))) = d(A,ϕ(C(cid:48)))
+3(i)
+==
+⇒
+ϕ(C(cid:48)) = C
+d(A(cid:48),B(cid:48)) = d(ϕ(A(cid:48)),ϕ(B(cid:48))) = d(A,ϕ(B(cid:48)))
+3(i)
+==
+⇒
+ϕ(B(cid:48)) = B
+Also gilt insbesondere ϕ(
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48)) =
+(cid:52)
+ABC. (cid:4)
+Bemerkung 65 (WSW-Kongruenzsatz)
+Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem
+(cid:52)
+ABC und
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48)
+Dreiecke, für die gilt:
+(i) d(A,B) = d(A(cid:48),B(cid:48))
+(ii) ∠CAB
+∼
+= ∠C(cid:48)A(cid:48)B(cid:48)
+(iii) ∠ABC
+∼
+= ∠A(cid:48)B(cid:48)C(cid:48)
+Dann ist
+(cid:52)
+ABC kongruent zu
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48) .
+Beweis: Sei ϕ die Isometrie mit ϕ(A(cid:48)) = A, ϕ(B(cid:48)) = B und ϕ(C(cid:48)) liegt in der selben Halbebene
+bzgl. AB wie C. Diese Isometrie existiert wegen §4.
+Aus ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48) = ∠ϕ(C(cid:48))ϕ(A(cid:48))ϕ(B(cid:48)) = ∠ϕ(C(cid:48))AB folgt, dass ϕ(C(cid:48))
+∈
+AC+.
+Analog folgt aus ∠ABC = ∠A(cid:48)B(cid:48)C(cid:48) = ∠ϕ(A(cid:48))ϕ(B(cid:48))ϕ(C(cid:48)) = ∠ABϕ(C(cid:48)), dass ϕ(C(cid:48))
+∈
+BC+.
+Dann gilt ϕ(C(cid:48))
+∈
+AC
+∩
+BC =
+{
+C
+} ⇒
+ϕ(C(cid:48)) = C.
+Es gilt also ϕ(
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48)) =
+(cid:52)
+ABC. (cid:4)
+Definition 61
+a) Ein Winkel ist ein Punkt P
+∈
+X zusammen mit 2 Halbgeraden mit Anfangspunkt P.
+Man schreibt: ∠R
+1
+PR
+2
+bzw. ∠R
+2
+PR
+1
+2
+b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
 anderen abbildet.
-c) ∠R(cid:48)P(cid:48)R(cid:48) heißt kleiner als ∠R PR , wenn es eine Isometrie ϕ gibt, mit ϕ(P(cid:48)) = P,
-1 2 1 2
-ϕ(P(cid:48)R(cid:48)+) = PR+ und ϕ(R(cid:48)) liegt in der gleichen Halbebene bzgl. PR wie R und in
-1 1 2 1 2
-der gleichen Halbebene bzgl. PR wie R
-2 1
-d) Im Dreieck PQR gibt es Innenwinkel und Außenwinkel.
+c) ∠R(cid:48)
+1
+P(cid:48)R(cid:48)
+2
+heißt kleiner als ∠R
+1
+PR
+2
+, wenn es eine Isometrie ϕ gibt, mit ϕ(P(cid:48)) = P,
+ϕ(P(cid:48)R(cid:48)+
+1
+) = PR+
+1
+und ϕ(R(cid:48)
+2
+) liegt in der gleichen Halbebene bzgl. PR
+1
+wie R
+2
+und in
+der gleichen Halbebene bzgl. PR
+2
+wie R
+1
+d) Im Dreieck
 (cid:52)
+PQR gibt es Innenwinkel und Außenwinkel.
 Bemerkung 66
 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel.
 Beweis: Zeige ∠PRQ < ∠RQP(cid:48).
-Sei M der Mittelpunkt der Strecke QR und P(cid:48) PQ+ PQ. Sei A MP− mit d(P,M) =
-∈ \ ∈
+Sei M der Mittelpunkt der Strecke QR und P(cid:48)
+∈
+PQ+
+\
+PQ. Sei A
+∈
+MP− mit d(P,M) =
 d(M,A).
-2Für dieses Skript gilt: ∠R PR =∠R PR . Also sind insbesondere alle Winkel ≤180◦.
-1 2 2 1
+2Für dieses Skript gilt: ∠R
+1
+PR
+2
+=∠R
+2
+PR
+1
+. Also sind insbesondere alle Winkel ≤180◦.
 72 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+P R(cid:48)
+1
+R
+1
+R(cid:48)
+2
+R
+2
+(a) ∠R(cid:48)
+1
+P(cid:48)R(cid:48)
+2
+istkleinerals∠R
+1
+PR
+2
+,
+vgl. Definition 61.c
+P
+Q R
+(b) Innenwinkel und Außenwin-
+kel in (cid:52)PQR, vgl. Definiti-
+on 61.d
+Abbildung 4.6: Situation aus Definition 61
+Q M
+A
+P
+R
+(a) Parallelogramm AQPR
+α
+β
+R
+Q P
+(b) Innen- und Außenwin-
+kel von (cid:52)PQR
+Abbildung 4.7: Situation aus Bemerkung 66
+Es gilt: d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ
+⇒ (cid:52)
+MRQ
+ist kongruent zu
+(cid:52)
+AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet,
+bildet R auf Q und P auf A ab.
+⇒
+∠MQA = ∠MRP = ∠QRP = ∠PRQ.
+Noch zu zeigen: ∠MQA < ∠RQP(cid:48), denn A liegt in der selben Halbebene bzgl. PQ wie M.
+Proposition 4.3 (Existenz der Parallelen)
+Sei (X,d,G) eine Geometrie mit den Axiomen §1 - §4.
+Dann gibt es zu jeder Geraden g
+∈
+G und jedem Punkt P
+∈
+X
+\
+g mindestens eine
+Parallele h
+∈
+G mit P
+∈
+h und g
+∩
+h =
+∅
+.
+Beweis: Seien P,Q
+∈
+f
+∈
+G und ϕ die Isometrie, die Q auf P und P auf P(cid:48)
+∈
+f mit
+d(P,P(cid:48)) = d(P,Q) abbildet und die Halbebenen bzgl. f erhält.
+73 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+Q
+h
+f
+g
+P
+Abbildung 4.8: Situation aus Proposition 4.3
+Annahme: ϕ(g)
+∩
+g
+(cid:54)
+=
+∅
+⇒
+Es gibt einen Schnittpunkt
+{
+R
+}
+= ϕ(g)
+∩
+g.
+Dann ist ∠RQP = ∠RQP(cid:48) < ∠RPP(cid:48) nach Bemerkung 66 und ∠RQP = ∠RPP(cid:48), weil
+ϕ(∠RQP) = ∠RPP(cid:48).
+⇒
+Widerspruch
+⇒
+ϕ(g)
+∩
+g =
+∅
+(cid:4)
+Folgerung 4.4
+Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
+D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP+) = PR+, sodass ϕ(R) in der gleichen
+Halbebene bzgl. PQ liegt wie R.
+Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die
+beiden Halbgeraden bilden eine Gerade.
+Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
+Dreiecke mit drei 90◦-Winkeln.
+Proposition 4.5
+In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
+Innenwinkel
+≤
+π.
+74 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
+Sei im Folgenden „IWS“ die „Innenwinkelsumme“.
+Beweis: Sei
+(cid:52)
+ein Dreieck mit IWS(
+(cid:52)
+) = π+ε
+α
+β
+γ
+P
+(a) Summe der Winkel α, β und γ
+α
+1
+α 2 β
+γ
+M
+A B
+C A(cid:48)
+α
+(b) Situation aus Proposition 4.5
+Abbildung 4.10: Situation aus Proposition 4.5
+Sei α ein Innenwinkel von
+(cid:52)
+.
+Beh.: Es gibt ein Dreieck
+(cid:52)
+(cid:48) mit IWS(
+(cid:52)
+(cid:48)) = IWS(
+(cid:52)
+) und einem Innenwinkel α(cid:48)
+≤
+α
+2
+.
+Dann gibt es für jedes n ein
+(cid:52) n
+mit IWS(
+(cid:52) n
+) = IWS(
+(cid:52)
+) und Innenwinkel α(cid:48)
+≤
+α
+2n
+. Für
+α
+2n
+< ε ist dann die Summe der beiden Innenwinkel um
+(cid:52) n
+größer als π
+⇒
+Widerspruch
+zu Folgerung 4.4.
+Beweis: Es seien A,B,C
+∈
+X und
+(cid:52)
+das Dreieck mit den Eckpunkten A,B,C und α sei
+der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C.
+Sei M der Mittelpunkt der Strecke BC. Sei außerdem α
+1
+= ∠CAM und α
+2
+= ∠BAM.
+Sei weiter A(cid:48)
+∈
+MA− mit d(A(cid:48),M) = d(A,M).
+Die Situation ist in Abbildung 4.10b skizziert.
+⇒ (cid:52)
+(MA(cid:48)C) und
+(cid:52)
+(MAB) sind kongruent.
+⇒
+∠ABM = ∠A(cid:48)CM und ∠MA(cid:48)C =
+∠MAB.
+⇒
+α+β+γ = IWS(
+(cid:52)
+ABC) = IWS(
+(cid:52)
+AA(cid:48)C)undα
+1
++α
+2
+= α,alsoo.B.d.A.
+α
+1 ≤
+α
+2
+Bemerkung 67
+In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
+α(cid:48)
+α(cid:48)(cid:48)
+α β
+β(cid:48)
+γ
+A B
+C
+g
+Abbildung 4.11: Situation aus Bemerkung 67
+Beweis: Sei g eine Parallele von AB durch C.
+•
+Es gilt α(cid:48) = α wegen Proposition 4.3.
+•
+Es gilt β(cid:48) = β wegen Proposition 4.3.
+•
+Es gilt α(cid:48)(cid:48) = α(cid:48) wegen Aufgabe 8.
+75 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE
+⇒
+IWS(
+(cid:52)
+ABC) = γ +α(cid:48)(cid:48)+β(cid:48) = π
+Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
+π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
+4.2 Weitere Eigenschaften einer euklidischen Ebene
+Satz 4.6 (Strahlensatz)
+In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich.
+x
+y
+−
+1 0 1 2 3 4
+0
+1
+2
+3
+z
+x
+λ2z
+λ2x
+Abbildung 4.12: Strahlensatz
+Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar.
+A B(cid:48)
+C(cid:48)
+B
+C
+c
+b a
+c(cid:48)
+b(cid:48)
+a(cid:48)
+Abbildung 4.13: Die Dreiecke
+(cid:52)
+ABC und
+(cid:52)
+AB(cid:48)C(cid:48) sind ähnlich.
+4.2.1 Flächeninhalt
+Definition 62
+„Simplizialkomplexe“ in euklidischer Ebene (X,d) heißen flächengleich, wenn sie sich in
+kongruente Dreiecke zerlegen lassen.
+76 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE
+(a) Zwei kongruente Dreiecke (b) ZweiweiterekongruenteDrei-
+ecke
+Abbildung 4.14: Flächengleichheit
+Der Flächeninhalt eines Dreiecks ist 1/2
+·
+Grundseite
+·
+Höhe.
+A B
+C
+L C
+h
+c
+c
+(a) 1/2·|AB|·|h
+c
+|
+·
+A B
+C
+L
+A
+h
+a
+c
+(b) 1/2·|BC|·|h
+a
+|
+Abbildung 4.15: Flächenberechnung im Dreieck
+Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
+α
+α
+γ
+γ
+A B
+C
+L A
+L
+C
+Abbildung 4.16:
+(cid:52)
+ABL
+a
+und
+(cid:52)
+CL
+C
+B sind ähnlich, weil IWS = π
+Strahlensatz =======
+⇒
+a
+hc
+= c
+ha →
+a
+·
+h
+a
+= c
+·
+h
+c
+Satz 4.7 (Satz des Pythagoras)
+Im rechtwinkligen Dreieck gilt a2+b2 = c2, wobei c die Hypotenuse und a,b die beiden
+Katheten sind.
+Beweis: (a+b)
+·
+(a+b) = a2+2ab+b2 = c2+4
+·
+(1
+2 ·
+a
+·
+b)
+77 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE
+c
+b a
+A B
+C
+·
+(a) a,b sind Katheten und c ist die Hypo-
+tenuse
+b a
+b
+a
+b a
+b
+a
+·
+· ·
+·
+γ
+(b) Beweisskizze
+Abbildung 4.17: Satz des Pythagoras
+Satz 4.8
+Bis auf Isometrie gibt es genau eine euklidische Ebene (X,d,G), nämlich X = R2,
+d = euklidischer Abstand, G = Menge der üblichen Geraden.
+Beweis:
+(i) (R2,d
+Euklid
+) ist offensichtlich eine euklidische Ebene.
+(ii) Sei (X,d) eine euklidische Ebene und g
+1
+,g
+2
+Geraden in X, die sich in einem Punkt 0
+im rechten Winkel schneiden.
+Sei P
+∈
+X
+\
+(g
+1
+∪
+g
+2
+) ein Punkt und P
+X
+der Fußpunkt des Lots von P auf g
+1
+(vgl.
+Aufgabe 9 (c)) und P
+Y
+der Fußpunkt des Lots von P auf g
+2
+.
+Sei x
+P
+:= d(P
+X
+,0) und y
+P
+:= d(P
+Y
+,0).
+In Abbildung 4.19 wurde die Situation skizziert.
+Sei h : X
+→
+R2 eine Abbildung mit h(P) := (x
+P
+,y
+P
+) Dadurch wird h auf dem
+Quadranten definiert, in dem P liegt, d. h.
+∀
+Q
+∈
+X mit PQ
+∩
+g
+1
+=
+∅
+= PQ
+∩
+g
+2
+Fortsetzung auf ganz X durch konsistente Vorzeichenwahl.
+Im Folgenden werden zwei Aussagen gezeigt:
+(i) h ist surjektiv
+(ii) h ist eine Isometrie
+Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist.
+Nun zu den Beweisen der Teilaussagen:
+78 4.3.HYPERBOLISCHEGEOMETRIE
+· g 1
+g
+2
+P
+X
+(a) Schritt 1
+· g 1
+g
+2
+x P
+y
+P
+P
+0 P X
+P
+Y
+X
+(b) Schritt 2
+Abbildung 4.18: Beweis zu Satz 4.8
+(i) Sei (x,y)
+∈
+R2, z. B. x
+≥
+0,y
+≥
+0. Sei P(cid:48)
+∈
+g
+1
+mit d(0,P(cid:48)) = x und P(cid:48) auf der
+gleichen Seite von g
+2
+wie P.
+g 1
+g
+2
+x P
+y
+P
+P
+Q
+0
+R
+X
+Abbildung 4.19: Beweis zu Satz 4.8
+(ii) Zu Zeigen: d(P,Q) = d(h(P),h(Q))
+d(P,Q)2
+Pythagoras
+= d(P,R)2+d(R,Q)2 = (y
+Q
+−
+y
+P
+)2+(x
+Q
+−
+x
 P
+)2.
+h(Q) = (x
+Q
+,y
+Q
+)
+4.3 Hyperbolische Geometrie
+Definition 63
+Sei
+H :=
+{
+z
+∈
+C
+| (cid:61)
+(z) > 0
+}
+= (cid:8) (x,y)
+∈
+R2 (cid:12) (cid:12) y > 0 (cid:9)
+79 4.3.HYPERBOLISCHEGEOMETRIE
+die obere Halbebene bzw. Poincaré-Halbebene und G = G
+1
+∪
+G
+2
+mit
+G
+1
+=
+{
+g
+1
+⊆
+H
+| ∃
+m
+∈
+R,r
+∈
 R
+>0
+: g
+1
+=
+{
+z
+∈
+H :
+|
+z
+−
+m
+|
+= r
+}}
+G
+2
+=
+{
+g
+2
+⊆
+H
+| ∃
+x
+∈
+R : g
+2
+=
+{
+z
+∈
+H :
+(cid:60)
+(z) = x
+}}
+Die Elemente aus G heißen hyperbolische Geraden.
+Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
+Die hyperbolischen Geraden erfüllen...
+a) ...die Inzidenzaxiome §1
+b) ...das Anordnungsaxiom §3 (ii)
+c) ...nicht das Parallelenaxiom §5
+Beweis:
+a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
+Gegeben z
+1
+,z
+2
+∈
+H
+Existenz:
+Fall 1
+(cid:60)
+(z
+1
+) =
+(cid:60)
+(z
+2
+)
+⇒
+z
+1
+und z
+2
+liegen auf
+g =
+{
+z
+∈
+C
+| (cid:60)
+(z) =
+(cid:60)
+(z
+1
+)
+∧
+H
+}
+Siehe Abbildung 4.20a.
+Fall 2
+(cid:60)
+(z
+1
+)
+(cid:54)
+=
+(cid:60)
+(z
+2
+)
+Betrachtenunz
+1
+undz
+2
+alsPunkteindereuklidischenEbene.DieMittelsenkrech-
+tezudiesenPunktenschneidetdiex-Achse.AllePunkteaufderMittelsenkrechten
+zuz
+1
+undz
+2
+sindgleichweitvonz
+1
+undz
+2
+entfernt.DaheristderSchnittpunktmit
+der x-Achse der Mittelpunkt eines Kreises durch z
+1
+und z
+2
+(vgl. Abbildung 4.20b)
+x
+y
+−
+1 0 1 2 3 4 5 0
+1
+2
+3
+4
+Z
+1
+Z
+2
+(cid:60)
+(Z
+1
+)
+(a) Fall 1
+x
+y
+−
+1 0 1 2 3 4 5 0
+1
+2
+3
+4
+Z 1
+Z
+2
+(b) Fall 2
+Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
+Geraden
+b) Sei g
+∈
+G
+1
+˙
+∪
+G
+2
+eine hyperbolische Gerade.
+80 4.3.HYPERBOLISCHEGEOMETRIE
+Es existieren disjunkte Zerlegungen von H
+\
+g:
+Fall 1: g =
+{
+z
+∈
+H
+(cid:107)
+z
+−
+m
+|
+= r
+} ∈
+G
+1
+Dann gilt:
+H =
+{
+z
+∈
+H
+(cid:107)
+z
+−
+m
+|
+< r
+}
+(cid:124) (cid:123)(cid:122) (cid:125)
+=:H1 (Kreisinneres)
+˙
+∪{
+z
+∈
+H
+(cid:107)
+z
+−
+m
+|
+> r
+}
+(cid:124) (cid:123)(cid:122) (cid:125)
+=:H2 (Kreisäußeres)
+Da r > 0 ist H
+1
+nicht leer, da r
+∈
+R ist H
 2
-R(cid:48)
+nicht leer.
+Fall 2: g =
+{
+z
+∈
+H
+| (cid:60)
+z = x
+} ∈
+G
 2
-Q R
-P R 1(cid:48) R 1
-(a) ∠R(cid:48)P(cid:48)R(cid:48) istkleinerals∠R PR ,(b) Innenwinkel und Außenwin-
-1 2 1 2
-vgl. Definition 61.c kel in (cid:52)PQR, vgl. Definiti-
-on 61.d
-Abbildung 4.6: Situation aus Definition 61
-P
-R
-R
-Q M
-β
+Die disjunkte Zerlegung ist:
+H =
+{
+z
+∈
+H
+| (cid:60)
+(z) < x
+}
+(cid:124) (cid:123)(cid:122) (cid:125)
+=:H1 (Links)
+˙
+∪{
+z
+∈
+H
+| (cid:60)
+(z) > x
+}
+(cid:124) (cid:123)(cid:122) (cid:125)
+=:H2 (Rechts)
+Zu zeigen:
+∀
 A
-Qα P
-(a) Parallelogramm AQPR(b) Innen- und Außenwin-
-kel von (cid:52)PQR
-Abbildung 4.7: Situation aus Bemerkung 66
-Es gilt: d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ MRQ
-⇒ (cid:52)
-ist kongruent zu AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet,
-(cid:52)
-bildet R auf Q und P auf A ab.
-∠MQA = ∠MRP = ∠QRP = ∠PRQ.
+∈
+H
+i
+, B
+∈
+H
+j
+mit i,j
+∈ {
+1,2
+}
+gilt: AB
+∩
+g
+(cid:54)
+=
+∅ ⇔
+i
+(cid:54)
+= j
+„
+⇐
+“: A
+∈
+H
+1
+,B
+∈
+H
+2
+: AB
+∩
+g
+(cid:54)
+=
+∅
+Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H
+1
+haben einen Abstand
+von m der kleiner ist als r und alle Punkte in H
+2
+haben einen Abstand von m der
+größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige
+Abbildung f : R
+→
+R
+>0
+auffassen kann, greift der Zwischenwertsatz
 ⇒
-Noch zu zeigen: ∠MQA < ∠RQP(cid:48), denn A liegt in der selben Halbebene bzgl. PQ wie M.
-Proposition 4.3 (Existenz der Parallelen)
-Sei (X,d,G) eine Geometrie mit den Axiomen §1 - §4.
-Dann gibt es zu jeder Geraden g G und jedem Punkt P X g mindestens eine
-∈ ∈ \
-Parallele h G mit P h und g h = .
-∈ ∈ ∩ ∅
-Beweis: Seien P,Q f G und ϕ die Isometrie, die Q auf P und P auf P(cid:48) f mit
-∈ ∈ ∈
-d(P,P(cid:48)) = d(P,Q) abbildet und die Halbebenen bzgl. f erhält.
-73 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-f
-h
-P
+AB
+∩
 g
-Q
-Abbildung 4.8: Situation aus Proposition 4.3
-Annahme: ϕ(g) g =
-∩ (cid:54) ∅
-Es gibt einen Schnittpunkt R = ϕ(g) g.
-⇒ { } ∩
-Dann ist ∠RQP = ∠RQP(cid:48) < ∠RPP(cid:48) nach Bemerkung 66 und ∠RQP = ∠RPP(cid:48), weil
-ϕ(∠RQP) = ∠RPP(cid:48).
-Widerspruch
+(cid:54)
+=
+∅
+„
 ⇒
-ϕ(g) g = (cid:4)
-⇒ ∩ ∅
-Folgerung 4.4
-Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
-D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP+) = PR+, sodass ϕ(R) in der gleichen
-Halbebene bzgl. PQ liegt wie R.
-Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die
-beiden Halbgeraden bilden eine Gerade.
-Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
-Dreiecke mit drei 90◦-Winkeln.
-Proposition 4.5
-In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
-Innenwinkel π.
-≤
-74 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE
-Sei im Folgenden „IWS“ die „Innenwinkelsumme“.
-Beweis: Sei ein Dreieck mit IWS( ) = π+ε
-(cid:52) (cid:52)
-C A(cid:48)
-γ
-β
-M
-γ α
-α
+“: A
+∈
+H
+i
+,B
+∈
+H
+j
+mit i,j
+∈ {
+1,2
+}
+: AB
+∩
+g
+(cid:54)
+=
+∅ ⇒
+i
+(cid:54)
+= j
+Sei h die Gerade, die durch A und B geht.
+Da A,B /
+∈
+g, aber A,B
+∈
+h gilt, haben g und h insbesondere mindestens einen
+unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt
+schneiden. Sei C dieser Punkt.
+Aus A,B /
+∈
+g folgt: C
+(cid:54)
+= A und C
+(cid:54)
+= B. Also liegt C zwischen A und B. Daraus folgt,
+dass A und B bzgl. g in verschiedenen Halbebenen liegen.
+c) Siehe Abbildung 4.21.
+x
+y
+−
+5
+−
+4
+−
+3
+−
+2
+−
+1 0 1 2 3 4 5 6
+0
+1
+2
+3
+4
+5
+Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
+81 4.3.HYPERBOLISCHEGEOMETRIE
+Definition 64
+Es seien a,b,c,d
+∈
+R mit ad
+−
+bc
+(cid:54)
+= 0 und σ : C
+→
+C eine Abbildung definiert durch
+σ(z) :=
+az+b
+cz+d
+σ heißt Möbiustransformation.
+Proposition 4.9
+a) Die Gruppe SL
+2
+(R) operiert auf H durch die Möbiustransformation
+σ(z) :=
+(cid:18)
+a b
+c d
+(cid:19)
+◦
+z :=
+az+b
+cz+d
+b) Die Gruppe PSL
+2
+(R) = SL
+2
+(R)/
+(±I)
+operiert durch σ auf H.
+c) PSL
+2
+(R) operiert auf R
+∪{∞}
+. Diese Gruppenoperation ist 3-fach transitiv, d. h.
+zu x
+0
+< x
+1
+< x
+∞
+∈
+R gibt es genau ein σ
+∈
+PSL
+2
+(R) mit σ(x
+0
+) = 0, σ(x
+1
+) = 1,
+σ(x
+∞
+) =
+∞
+.
+d) SL
+2
+(R) wird von den Matrizen
+(cid:18)
+λ 0
+0 λ−1
+(cid:19)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=:A
+λ
+,
+(cid:18)
+1 t
+0 1
+(cid:19)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=:Bt
+und
+(cid:18)
+0 1
+−
+1 0
+(cid:19)
+(cid:124) (cid:123)(cid:122) (cid:125)
+=:C
+mit t,λ
+∈
+R×
+erzeugt.
+e) PSL
+2
+(R) operiert auf G.
+Beweis:
+a) Sei z = x+iy
+∈
+H, d. h. y > 0 und σ =
+(cid:18)
+a b
+c d
+(cid:19)
+∈
+SL
+2
+(R)
+⇒
+σ(z) =
+a(x+iy)+b
+c(x+iy)+d
+=
+(ax+b)+iay
+(cx+d)+icy ·
+(cx+d)
+−
+icy
+(cx+d)
+−
+icy
+=
+(ax+b)(cx+d)+aycy
+(cx+d)2+(cy)2
++i
+ay(cx+d)
+−
+(ax+b)cy
+(cx+d)2+(cy)2
+=
+axcx+axd+bcx+bd+aycy
+(cx+d)2+(cy)2
++i
+(ad
+−
+bc)y
+(cx+d)2+(cy)2
+SL2(R)
+=
+ac(x2+y2)+adx+bcx+bd
+(cx+d)2+(cy)2
++i
+y
+(cx+d)2+(cy)2
+⇒ (cid:61)
+(σ(z)) =
+y
+(cx+d)2+(cy)2
+> 0
+Die Abbildung bildet also nach H ab. Außerdem gilt:
+(cid:18)
+1 0
+0 1
+(cid:19)
+◦
+z =
+x+iy
+1
+= x+iy = z
+82 4.3.HYPERBOLISCHEGEOMETRIE
+und
+(cid:18) a b
+c d
+(cid:19)
+◦
+(cid:18)(cid:18) a(cid:48) b(cid:48)
+c(cid:48) d(cid:48)
+(cid:19)
+◦
+z
+(cid:19)
+=
+(cid:18) a b
+c d
+(cid:19)
+◦
+a(cid:48)z+b(cid:48)
+c(cid:48)z+d(cid:48)
+=
+aa(cid:48)z+b(cid:48)
+c(cid:48)z+d(cid:48)
++b
+ca(cid:48)z+b(cid:48)
+c(cid:48)z+d(cid:48)
++d
+=
+a(a(cid:48)z+b(cid:48))+b(c(cid:48)z+d(cid:48))
+c(cid:48)z+d(cid:48)
+c(a(cid:48)z+b(cid:48))+d(c(cid:48)z+d(cid:48))
+c(cid:48)z+d(cid:48)
+=
+a(a(cid:48)z+b(cid:48))+b(c(cid:48)z+d(cid:48))
+c(a(cid:48)z+b(cid:48))+d(c(cid:48)z+d(cid:48))
+=
+(aa(cid:48)+bc(cid:48))z+ab(cid:48)+bd(cid:48)
+(ca(cid:48)+db(cid:48))z+cb(cid:48)+dd(cid:48)
+=
+(cid:18) aa(cid:48)+bc(cid:48) ab(cid:48)+bd(cid:48)
+ca(cid:48)+db(cid:48) cb(cid:48)+dd(cid:48)
+(cid:19)
+◦
+z
+=
+(cid:18)(cid:18) a b
+c d
+(cid:19)
+·
+(cid:18) a(cid:48) b(cid:48)
+c(cid:48) d(cid:48)
+(cid:19)(cid:19)
+◦
+z
+b) Es gilt σ(z) = (
+−
+σ)(z) für alle σ
+∈
+SL
+2
+(R) und z
+∈
+H.
+c) Ansatz: σ =
+(cid:18)
+a b
+c d
+(cid:19)
+σ(x
+0
+) = ax0+b
+cx0+d
+! = 0
+⇒
+ax
+0
++b = 0
+⇒
+b =
+−
+ax
+0
+σ(x
+∞
+) =
+∞ ⇒
+cx
+∞
++d = 0
+⇒
+d =
+−
+cx
+∞
+σ(x
+1
+) = 1
+⇒
+ax
 1
-P α α 2 β
-A B
-(a) Summe der Winkel α, β und γ (b) Situation aus Proposition 4.5
-Abbildung 4.10: Situation aus Proposition 4.5
-Sei α ein Innenwinkel von .
-(cid:52)
-Beh.: Es gibt ein Dreieck (cid:48) mit IWS( (cid:48)) = IWS( ) und einem Innenwinkel α(cid:48) α.
-(cid:52) (cid:52) (cid:52) ≤ 2
-Dann gibt es für jedes n ein mit IWS( ) = IWS( ) und Innenwinkel α(cid:48) α . Für
-(cid:52)n (cid:52)n (cid:52) ≤ 2n
-α < ε ist dann die Summe der beiden Innenwinkel um größer als π Widerspruch
-2n (cid:52)n ⇒
-zu Folgerung 4.4.
-Beweis: Es seien A,B,C X und das Dreieck mit den Eckpunkten A,B,C und α sei
-∈ (cid:52)
-der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C.
-Sei M der Mittelpunkt der Strecke BC. Sei außerdem α = ∠CAM und α = ∠BAM.
-1 2
-Sei weiter A(cid:48) MA− mit d(A(cid:48),M) = d(A,M).
-∈
-Die Situation ist in Abbildung 4.10b skizziert.
-(MA(cid:48)C) und (MAB) sind kongruent. ∠ABM = ∠A(cid:48)CM und ∠MA(cid:48)C =
-⇒ (cid:52) (cid:52) ⇒
-∠MAB. α+β+γ = IWS( ABC) = IWS( AA(cid:48)C)undα +α = α,alsoo.B.d.A.
-1 2
-⇒ (cid:52) (cid:52)
-α α
-1 ≤ 2
-Bemerkung 67
-In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
-β(cid:48)C α(cid:48)
-α(cid:48)(cid:48) g
-γ
-α β
-A B
-Abbildung 4.11: Situation aus Bemerkung 67
-Beweis: Sei g eine Parallele von AB durch C.
-Es gilt α(cid:48) = α wegen Proposition 4.3.
-•
-Es gilt β(cid:48) = β wegen Proposition 4.3.
-•
-Es gilt α(cid:48)(cid:48) = α(cid:48) wegen Aufgabe 8.
-•
-75 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE
-IWS( ABC) = γ +α(cid:48)(cid:48)+β(cid:48) = π
-⇒ (cid:52)
-Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
-π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
-4.2 Weitere Eigenschaften einer euklidischen Ebene
-Satz 4.6 (Strahlensatz)
-In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich.
-y
-3 λ2z
-2 z
++b = cx
 1
-x λ2x
++d
+a(x
+1 −
+x
 0
+) = c(x
+1 −
 x
-1 0 1 2 3 4
+∞
+)
+⇒
+c = a x1−x0
+x1−x∞
+⇒ −
+a2
+·
+x
+∞
+x1−x0
+x1−x∞
++a2x
+0
+x1−x0
+x1−x∞
+= 1
+⇒
+a2 x1−x0
+x0−x∞
+(x
+0 −
+x
+∞
+) = 1
+⇒
+a2 = x1−x∞
+(x1−x∞)(x1−x0)
+d) Es gilt:
+A−1
+λ
+= A
+1
+λ
+B−1
+t
+= B
+−t
+C−1 = C3
+Daher genügt es zu zeigen, dass man mit A
+λ
+, B
+t
+und C alle Matrizen aus SL
+2
+(R)
+erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
+Matrizen der Form A
+λ
+, B
+t
+und C die Einheitsmatrix zu generieren.
+Sei also
+M =
+(cid:18)
+a b
+c d
+(cid:19)
+∈
+SL
+2
+(R)
+beliebig.
+Fall 1: a = 0
+Da M
+∈
+SL
+2
+(R) ist, gilt detM = 1 = ad
 −
-Abbildung 4.12: Strahlensatz
-Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar.
-C(cid:48)
-b(cid:48)
-C
-a(cid:48)
-b a
-A c B c(cid:48) B(cid:48)
-Abbildung 4.13: Die Dreiecke ABC und AB(cid:48)C(cid:48) sind ähnlich.
-(cid:52) (cid:52)
-4.2.1 Flächeninhalt
-Definition 62
-„Simplizialkomplexe“ in euklidischer Ebene (X,d) heißen flächengleich, wenn sie sich in
-kongruente Dreiecke zerlegen lassen.
-76 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE
-(a) Zwei kongruente Dreiecke (b) ZweiweiterekongruenteDrei-
-ecke
-Abbildung 4.14: Flächengleichheit
-Der Flächeninhalt eines Dreiecks ist 1/2 Grundseite Höhe.
-· ·
-C
-C
-c
-L
-h A
-c
+bc =
+−
+bc. Daher ist insbesondere c
+(cid:54)
+= 0. Es
+folgt:
+(cid:18)
+0 1
+−
+1 0
+(cid:19)
 ·
-h
+(cid:18)
+a b
+c d
+(cid:19)
+=
+(cid:18)
+c d
+−
 a
-c
-L C A B A B
-(a) 1/2·|AB|·|h c| (b) 1/2·|BC|·|h a|
-Abbildung 4.15: Flächenberechnung im Dreieck
-Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
-C
-α
-γ L A
-γ
-α
-A L B
-C
-Abbildung 4.16: ABL und CL B sind ähnlich, weil IWS = π
-a C
-(cid:52) (cid:52)
-=S =tr =a =hl =en =s =atz a = c a h = c h
-⇒ hc ha → · a · c
-Satz 4.7 (Satz des Pythagoras)
-Im rechtwinkligen Dreieck gilt a2+b2 = c2, wobei c die Hypotenuse und a,b die beiden
-Katheten sind.
-Beweis: (a+b) (a+b) = a2+2ab+b2 = c2+4 (1 a b)
-· · 2 · ·
-77 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE
+−
+b
+(cid:19)
+83 4.3.HYPERBOLISCHEGEOMETRIE
+Gehe zu Fall 2.
+Fall 2: a
+(cid:54)
+= 0
+Nun wird in M durch M
+·
+A
+1
+a
+an der Stelle von a eine 1 erzeugt:
+(cid:18)
 a b
-b · ·
-γ
-C
+c d
+(cid:19)
+·
+(cid:18)1
+a
+0
+0 a
+(cid:19)
+=
+(cid:18)
+1 ab
+c
 a
+ad
+(cid:19)
+Gehe zu Fall 3.
+Fall 3: a = 1
+(cid:18)
+1 b
+c d
+(cid:19)
 ·
-b a a
+(cid:18)
+1
+−
 b
-· ·
-A c B b a
-(a) a,b sind Katheten und c ist die Hypo- (b) Beweisskizze
-tenuse
-Abbildung 4.17: Satz des Pythagoras
-Satz 4.8
-Bis auf Isometrie gibt es genau eine euklidische Ebene (X,d,G), nämlich X = R2,
-d = euklidischer Abstand, G = Menge der üblichen Geraden.
-Beweis:
-(i) (R2,d ) ist offensichtlich eine euklidische Ebene.
-Euklid
-(ii) Sei (X,d) eine euklidische Ebene und g ,g Geraden in X, die sich in einem Punkt 0
-1 2
-im rechten Winkel schneiden.
-Sei P X (g g ) ein Punkt und P der Fußpunkt des Lots von P auf g (vgl.
-1 2 X 1
-∈ \ ∪
-Aufgabe 9 (c)) und P der Fußpunkt des Lots von P auf g .
-Y 2
-Sei x := d(P ,0) und y := d(P ,0).
-P X P Y
-In Abbildung 4.19 wurde die Situation skizziert.
-Sei h : X R2 eine Abbildung mit h(P) := (x ,y ) Dadurch wird h auf dem
-P P
-→
-Quadranten definiert, in dem P liegt, d. h.
-Q X mit PQ g = = PQ g
-1 2
-∀ ∈ ∩ ∅ ∩
-Fortsetzung auf ganz X durch konsistente Vorzeichenwahl.
-Im Folgenden werden zwei Aussagen gezeigt:
-(i) h ist surjektiv
-(ii) h ist eine Isometrie
-Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist.
-Nun zu den Beweisen der Teilaussagen:
-78 4.3.HYPERBOLISCHEGEOMETRIE
-g g
-2 2
-X X
-P
-P
-Y
-P
+0 1
+(cid:19)
+=
+(cid:18)
+1 0
+c d
+−
+bc
+(cid:19)
+Da wir detM = 1 = ad
+−
+bc = d
+−
+bc wissen, gilt sogar M
+2,2
+= 1.
+Gehe zu Fall 4.
+Fall 4: a = 1, b = 0, d = 1
+A
+−1
+CB
+c
+C
+(cid:18)
+1 0
+c 1
+(cid:19)
+=
+(cid:18)
+1 0
+0 1
+(cid:19)
+Daher erzeugen Matrizen der Form A
+λ
+, B
+t
+und C die Gruppe SL
+2
+R. (cid:4)
+e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
+•
+σ =
+(cid:18)
+λ 0
+0 λ−1
+(cid:19)
+, also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in
+Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
+x
 y
-P
-· g 1 0 · x P P X g 1
-(a) Schritt 1 (b) Schritt 2
-Abbildung 4.18: Beweis zu Satz 4.8
-(i) Sei (x,y) R2, z. B. x 0,y 0. Sei P(cid:48) g mit d(0,P(cid:48)) = x und P(cid:48) auf der
+−
+1 0 1 2 3 4 5 6 7
+0
+1
+2
+3
+m λ2m
+m+ir
+λ2m+iλ2r
+m+1
+(a) Fall 1
+x
+y
+−
+1 0 1 2 3 4
+0
+1
+2
+3
+z
+x
+λ2z
+λ2x
+(b) Fall 2 (Strahlensatz)
+Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
+•
+Offensichtlich gilt die Aussage für σ =
+(cid:18)
+1 a
+0 1
+(cid:19)
+•
+Sei nun σ =
+(cid:18)
+0 1
+−
+1 0
+(cid:19)
+, also σ(z) =
+−
+1
+z
+Bemerkung 69
+Zu hyperbolischen Geraden g
+1
+,g
+2
+gibt es σ
+∈
+PSL
+2
+(R) mit σ(g
+1
+) = g
+2
+.
+84 4.3.HYPERBOLISCHEGEOMETRIE
+·
+x
+y
+−
+1 0 1
+0
+1
+z = r
+·
+eiϕ
+1
+z
+= 1
+r ·
+eiϕ
+Abbildung 4.23: Inversion am Kreis
+Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a
+1
+) = b
+1
+und σ(a
+2
+) = b
+2
+. Dann existiert
+σ(g
+1
+) := g
+2
+wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
+Definition 65
+Seien z
+1
+,z
+2
+,z
+3
+,z
+4
+∈
+C paarweise verschieden.
+Dann heißt
+DV(z
+1
+,z
+2
+,z
+3
+,z
+4
+) :=
+z1−z4
+z1−z2
+z3−z4
+z3−z2
+=
+(z
+1
+−
+z
+4
+)
+·
+(z
+3
+−
+z
+2
+)
+(z
+1 −
+z
+2
+)
+·
+(z
+3 −
+z
+4
+)
+Doppelverhältnis von z
+1
+,...,z
+4
+.
+Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
+a) DV(z
+1
+,...,z
+4
+)
+∈
+C
+\{
+0,1
+}
+b) DV(z
 1
-∈ ≥ ≥ ∈
-gleichen Seite von g wie P.
+,z
+4
+,z
+3
+,z
 2
-R Q
-g
+) = 1
+DV(z1,z2,z3,z4)
+c) DV(z
+3
+,z
 2
-X
-P
-y
-P
-0 x P g 1
-Abbildung 4.19: Beweis zu Satz 4.8
-(ii) Zu Zeigen: d(P,Q) = d(h(P),h(Q))
-Pythagoras
-d(P,Q)2 = d(P,R)2+d(R,Q)2 = (y y )2+(x x )2.
-Q P Q P
-− −
-h(Q) = (x ,y )
-Q Q
-4.3 Hyperbolische Geometrie
-Definition 63
-Sei
-H := z C (z) > 0 = (cid:8) (x,y) R2 (cid:12) (cid:12) y > 0(cid:9)
-{ ∈ | (cid:61) } ∈
-79 4.3.HYPERBOLISCHEGEOMETRIE
-die obere Halbebene bzw. Poincaré-Halbebene und G = G G mit
-1 2
-∪
-G = g H m R,r R : g = z H : z m = r
-1 1 >0 1
-{ ⊆ | ∃ ∈ ∈ { ∈ | − | }}
-G = g H x R : g = z H : (z) = x
-2 2 2
-{ ⊆ | ∃ ∈ { ∈ (cid:60) }}
-Die Elemente aus G heißen hyperbolische Geraden.
-Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
-Die hyperbolischen Geraden erfüllen...
-a) ...die Inzidenzaxiome §1
-b) ...das Anordnungsaxiom §3 (ii)
-c) ...nicht das Parallelenaxiom §5
-Beweis:
-a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
-Gegeben z ,z H
-1 2
+,z
+1
+,z
+4
+) = 1
+DV(z1,z2,z3,z4)
+d) DV ist auch wohldefiniert, wenn eines der z
+i
+=
+∞
+oder wenn zwei der z
+i
+gleich sind.
+e) DV(0,1,
+∞
+,z
+4
+) = z
+4
+(Der Fall z
+4
+∈ {
+0,1,
+∞}
+ist zugelassen).
+f) Für σ
 ∈
-Existenz:
-Fall 1 (z ) = (z )
-1 2
-(cid:60) (cid:60)
-z und z liegen auf
-1 2
-⇒
-g = z C (z) = (z ) H
+PSL
+2
+(C) und z
 1
-{ ∈ | (cid:60) (cid:60) ∧ }
-Siehe Abbildung 4.20a.
-Fall 2 (z ) = (z )
-1 2
-(cid:60) (cid:54) (cid:60)
-Betrachtenunz undz alsPunkteindereuklidischenEbene.DieMittelsenkrech-
-1 2
-tezudiesenPunktenschneidetdiex-Achse.AllePunkteaufderMittelsenkrechten
-zuz undz sindgleichweitvonz undz entfernt.DaheristderSchnittpunktmit
-1 2 1 2
-der x-Achse der Mittelpunkt eines Kreises durch z und z (vgl. Abbildung 4.20b)
-1 2
-y y
-4 4
-3 Z 3
-2
-2 Z 2
-1 Z
-2
-1 1 Z 1
-(Z )
-1 0 0 1 2(cid:60) 1 3 4 5 x 1 0 0 1 2 3 4 5 x
-− −
-(a) Fall 1 (b) Fall 2
-Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
-Geraden
-b) Sei g G ˙ G eine hyperbolische Gerade.
-1 2
-∈ ∪
-80 4.3.HYPERBOLISCHEGEOMETRIE
-Es existieren disjunkte Zerlegungen von H g:
-\
-Fall 1: g = z H z m = r G
+,...,z
+4
+∈
+C
+∪{∞}
+ist
+DV(σ(z
 1
-{ ∈ (cid:107) − | } ∈
-Dann gilt:
-H = z H z m < r ˙ z H z m > r
-{ ∈ (cid:107) − | }∪{ ∈ (cid:107) − | }
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-=:H1 (Kreisinneres) =:H2 (Kreisäußeres)
-Da r > 0 ist H nicht leer, da r R ist H nicht leer.
-1 2
+),σ(z
+2
+),σ(z
+3
+),σ(z
+4
+)) = DV(z
+1
+,z
+2
+,z
+3
+,z
+4
+)
+und für σ(z) = 1
+z
+gilt
+DV(σ(z
+1
+),σ(z
+2
+),σ(z
+3
+),σ(z
+4
+)) = DV(z
+1
+,z
+2
+,z
+3
+,z
+4
+)
+g) DV(z
+1
+,z
+2
+,z
+3
+,z
+4
+)
 ∈
-Fall 2: g = z H z = x G
+R
+∪{∞} ⇔
+z
+1
+,...,z
+4
+liegen auf einer hyperbolischen Geraden.
+Beweis:
+a) DV(z
+1
+,...,z
+4
+)
+(cid:54)
+= 0, da z
+i
+paarweise verschieden
+DV(z
+1
+,...,z
+4
+)
+(cid:54)
+= 1, da:
+Annahme: DV(z
+1
+,...,z
+4
+) = 1
+⇔
+(z
+1
+−
+z
 2
-{ ∈ | (cid:60) } ∈
-Die disjunkte Zerlegung ist:
-H = z H (z) < x ˙ z H (z) > x
-{ ∈ | (cid:60) }∪{ ∈ | (cid:60) }
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-=:H1 (Links) =:H2 (Rechts)
-Zu zeigen: A H , B H mit i,j 1,2 gilt: AB g = i = j
-i j
-∀ ∈ ∈ ∈ { } ∩ (cid:54) ∅ ⇔ (cid:54)
-„ “: A H ,B H : AB g =
-1 2
-⇐ ∈ ∈ ∩ (cid:54) ∅
-Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H haben einen Abstand
+)(z
+3
+−
+z
+4
+) = (z
 1
-von m der kleiner ist als r und alle Punkte in H haben einen Abstand von m der
+−
+z
+4
+)(z
+3
+−
+z
 2
-größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige
-Abbildung f : R R auffassen kann, greift der Zwischenwertsatz AB g =
->0
-→ ⇒ ∩ (cid:54) ∅
-„ “: A H ,B H mit i,j 1,2 : AB g = i = j
-i j
-⇒ ∈ ∈ ∈ { } ∩ (cid:54) ∅ ⇒ (cid:54)
-Sei h die Gerade, die durch A und B geht.
-Da A,B / g, aber A,B h gilt, haben g und h insbesondere mindestens einen
-∈ ∈
-unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt
-schneiden. Sei C dieser Punkt.
-Aus A,B / g folgt: C = A und C = B. Also liegt C zwischen A und B. Daraus folgt,
-∈ (cid:54) (cid:54)
-dass A und B bzgl. g in verschiedenen Halbebenen liegen.
-c) Siehe Abbildung 4.21.
-y
-5
+)
+85 4.3.HYPERBOLISCHEGEOMETRIE
+⇔
+z
+1
+z
+3
+−
+z
+2
+z
+3
+−
+z
+1
+z
+4
++z
+2
+z
 4
+= z
+1
+z
 3
+−
+z
+3
+z
+4
+−
+z
+1
+z
+2
++z
+2
+z
+4
+⇔
+z
 2
+z
+3
++z
 1
-0
-x
-5 4 3 2 1 0 1 2 3 4 5 6
-− − − − −
-Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
-81 4.3.HYPERBOLISCHEGEOMETRIE
-Definition 64
-Es seien a,b,c,d R mit ad bc = 0 und σ : C C eine Abbildung definiert durch
-∈ − (cid:54) →
-az+b
-σ(z) :=
-cz+d
-σ heißt Möbiustransformation.
-Proposition 4.9
-a) Die Gruppe SL (R) operiert auf H durch die Möbiustransformation
+z
+4
+= z
+3
+z
+4
++z
+1
+z
 2
-(cid:18) (cid:19)
-a b az+b
-σ(z) := z :=
-c d ◦ cz+d
-b) Die Gruppe PSL (R) = SL (R)/ operiert durch σ auf H.
-2 2 (±I)
-c) PSL (R) operiert auf R . Diese Gruppenoperation ist 3-fach transitiv, d. h.
+⇔
+z
 2
-∪{∞}
-zu x < x < x R gibt es genau ein σ PSL (R) mit σ(x ) = 0, σ(x ) = 1,
-0 1 ∞ 2 0 1
-∈ ∈
-σ(x ) = .
-∞
-∞
-d) SL (R) wird von den Matrizen
+z
+3
+−
+z
+3
+z
+4
+= z
+1
+z
 2
-(cid:18) (cid:19) (cid:18) (cid:19) (cid:18) (cid:19)
-λ 0 1 t 0 1
-, und mit t,λ R×
-0 λ−1 0 1 1 0 ∈
 −
-(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125)
-=:A =:Bt =:C
-λ
-erzeugt.
-e) PSL (R) operiert auf G.
+z
+1
+z
+4
+⇔
+z
+3
+(z
 2
-Beweis:
-(cid:18) (cid:19)
-a b
-a) Sei z = x+iy H, d. h. y > 0 und σ = SL (R)
-∈ c d ∈ 2
-a(x+iy)+b
-σ(z) =
-⇒ c(x+iy)+d
-(ax+b)+iay (cx+d) icy
-= −
-(cx+d)+icy · (cx+d) icy
-−
-(ax+b)(cx+d)+aycy ay(cx+d) (ax+b)cy
-= +i −
-(cx+d)2+(cy)2 (cx+d)2+(cy)2
-axcx+axd+bcx+bd+aycy (ad bc)y
-= +i −
-(cx+d)2+(cy)2 (cx+d)2+(cy)2
-SL =2(R) ac(x2+y2)+adx+bcx+bd y
-+i
-(cx+d)2+(cy)2 (cx+d)2+(cy)2
-y
-(σ(z)) = > 0
-⇒ (cid:61) (cx+d)2+(cy)2
-Die Abbildung bildet also nach H ab. Außerdem gilt:
-(cid:18) (cid:19)
-1 0 x+iy
-z = = x+iy = z
-0 1 ◦ 1
-82 4.3.HYPERBOLISCHEGEOMETRIE
-und
-(cid:18) a b(cid:19) (cid:18)(cid:18) a(cid:48) b(cid:48)(cid:19) (cid:19) (cid:18) a b(cid:19) a(cid:48)z+b(cid:48)
-z =
-c d ◦ c(cid:48) d(cid:48) ◦ c d ◦ c(cid:48)z+d(cid:48)
-aa(cid:48)z+b(cid:48)
-+b
-c(cid:48)z+d(cid:48)
-=
-ca(cid:48)z+b(cid:48)
-+d
-c(cid:48)z+d(cid:48)
-a(a(cid:48)z+b(cid:48))+b(c(cid:48)z+d(cid:48))
-c(cid:48)z+d(cid:48)
-=
-c(a(cid:48)z+b(cid:48))+d(c(cid:48)z+d(cid:48))
-c(cid:48)z+d(cid:48)
-a(a(cid:48)z+b(cid:48))+b(c(cid:48)z+d(cid:48))
-=
-c(a(cid:48)z+b(cid:48))+d(c(cid:48)z+d(cid:48))
-(aa(cid:48)+bc(cid:48))z+ab(cid:48)+bd(cid:48)
-=
-(ca(cid:48)+db(cid:48))z+cb(cid:48)+dd(cid:48)
-(cid:18) aa(cid:48)+bc(cid:48) ab(cid:48)+bd(cid:48)(cid:19)
+−
+z
+4
+) = z
+1
+(z
+2
+−
+z
+4
+)
+⇔
+z
+3
+= z
+1
+oder z
+2
+= z
+4
+Alle z
+i
+sind paarweise verschieden
+⇒
+Widerspruch (cid:4)
+b) DV(z
+1
+,z
+4
+,z
+3
+,z
+2
+) = (z1−z2)·(z3−z4)
+(z1−z4)·(z3−z2)
+= 1
+DV(z1,z2,z3,z4)
+c) DV(z
+3
+,z
+2
+,z
+1
+,z
+4
+) = (z3−z4)·(z1−z2)
+(z3−z2)·(z1−z4)
+= 1
+DV(z1,z2,z3,z4)
+d) Zwei der z
+i
+dürfen gleich sein, da:
+Fall 1 z
+1
 = z
-ca(cid:48)+db(cid:48) cb(cid:48)+dd(cid:48)
-◦
-(cid:18)(cid:18) a b(cid:19) (cid:18) a(cid:48) b(cid:48)(cid:19)(cid:19)
+4
+oder z
+3
 = z
-c d · c(cid:48) d(cid:48) ◦
-b) Es gilt σ(z) = ( σ)(z) für alle σ SL (R) und z H.
-2
-− ∈ ∈
-(cid:18) (cid:19)
-c) Ansatz: σ = a b σ(x ) = ax0+b =! 0 ax +b = 0 b = ax
-c d 0 cx0+d ⇒ 0 ⇒ − 0
-σ(x ) = cx +d = 0 d = cx
-∞ ∞ ∞
-∞ ⇒ ⇒ −
-σ(x ) = 1 ax +b = cx +d
-1 1 1
-⇒
-a(x x ) = c(x x ) c = a x1−x0
-1 − 0 1 − ∞ ⇒ x1−x∞
-a2 x x1−x0 +a2x x1−x0 = 1
-∞x1−x∞ 0x1−x∞
-⇒ − ·
-a2 x1−x0 (x x ) = 1 a2 = x1−x∞
-⇒ x0−x∞ 0 − ∞ ⇒ (x1−x∞)(x1−x0)
-d) Es gilt:
-A−1 = A
-λ 1
-λ
-B−1 = B
-t −t
-C−1 = C3
-Daher genügt es zu zeigen, dass man mit A , B und C alle Matrizen aus SL (R)
-λ t 2
-erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
-Matrizen der Form A , B und C die Einheitsmatrix zu generieren.
-λ t
-Sei also
-(cid:18) (cid:19)
-a b
-M = SL (R)
-c d ∈ 2
-beliebig.
-Fall 1: a = 0
-Da M SL (R) ist, gilt detM = 1 = ad bc = bc. Daher ist insbesondere c = 0. Es
 2
-∈ − − (cid:54)
-folgt:
-(cid:18) (cid:19) (cid:18) (cid:19) (cid:18) (cid:19)
-0 1 a b c d
-=
-1 0 · c d a b
-− − −
-83 4.3.HYPERBOLISCHEGEOMETRIE
-Gehe zu Fall 2.
-Fall 2: a = 0
-(cid:54)
-Nun wird in M durch M A an der Stelle von a eine 1 erzeugt:
+In diesem Fall ist DV(z
 1
-· a
-(cid:18) b(cid:19) (cid:18)1 0(cid:19) (cid:18) ab(cid:19)
-a 1
-a =
-c d · 0 a c ad
-a
-Gehe zu Fall 3.
-Fall 3: a = 1
-(cid:18) (cid:19) (cid:18) (cid:19) (cid:18) (cid:19)
-1 b 1 b 1 0
-− =
-c d · 0 1 c d bc
-−
-Da wir detM = 1 = ad bc = d bc wissen, gilt sogar M = 1.
-2,2
-− −
-Gehe zu Fall 4.
-Fall 4: a = 1, b = 0, d = 1
-(cid:18) (cid:19) (cid:18) (cid:19)
-1 0 1 0
-A CB C =
-−1 c
-c 1 0 1
-Daher erzeugen Matrizen der Form A , B und C die Gruppe SL R. (cid:4)
-λ t 2
-e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
-(cid:18) (cid:19)
-λ 0
-σ = , also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in
-• 0 λ−1
-Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
-y
-3 λ2z
-y
-2 z
+,...,z
+4
+) = 0
+Fall 2 z
+1
+= z
+2
+oder z
 3
-λ2m+iλ2r
-2 1
-m+ir
-1 x λ2x
-0
-m λ2m x
-0 1 0 1 2 3 4
-1 0 1 2 3m+1 4 5 6 7 x −
-−
-(a) Fall 1 (b) Fall 2 (Strahlensatz)
-Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
-(cid:18) (cid:19)
-1 a
-Offensichtlich gilt die Aussage für σ =
-• 0 1
-(cid:18) (cid:19)
-0 1
-Sei nun σ = , also σ(z) = 1
-• 1 0 −z
-−
-Bemerkung 69
-Zu hyperbolischen Geraden g ,g gibt es σ PSL (R) mit σ(g ) = g .
-1 2 2 1 2
-∈
-84 4.3.HYPERBOLISCHEGEOMETRIE
-z = r eiϕ
-y ·
+= z
+4
+Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z
 1
-·
-1 = 1 eiϕ
-0 z r ·
-x
-1 0 1
-−
-Abbildung 4.23: Inversion am Kreis
-Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a ) = b und σ(a ) = b . Dann existiert
-1 1 2 2
-σ(g ) := g wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
-1 2
-Definition 65
-Seien z ,z ,z ,z C paarweise verschieden.
-1 2 3 4
-∈
-Dann heißt
-z1−z4
-(z z ) (z z )
-z1−z2 1 4 3 2
-DV(z ,z ,z ,z ) := = − · −
-1 2 3 4 z3−z4 (z z ) (z z )
-z3−z2 1 − 2 · 3 − 4
-Doppelverhältnis von z ,...,z .
-1 4
-Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
-a) DV(z ,...,z ) C 0,1
-1 4
-∈ \{ }
-b) DV(z ,z ,z ,z ) = 1
-1 4 3 2 DV(z1,z2,z3,z4)
-c) DV(z ,z ,z ,z ) = 1
-3 2 1 4 DV(z1,z2,z3,z4)
-d) DV ist auch wohldefiniert, wenn eines der z = oder wenn zwei der z gleich sind.
-i i
+,...,z
+4
+) =
 ∞
-e) DV(0,1, ,z ) = z (Der Fall z 0,1, ist zugelassen).
-4 4 4
-∞ ∈ { ∞}
-f) Für σ PSL (C) und z ,...,z C ist
-2 1 4
-∈ ∈ ∪{∞}
-DV(σ(z ),σ(z ),σ(z ),σ(z )) = DV(z ,z ,z ,z )
-1 2 3 4 1 2 3 4
-und für σ(z) = 1 gilt
-z
-DV(σ(z ),σ(z ),σ(z ),σ(z )) = DV(z ,z ,z ,z )
-1 2 3 4 1 2 3 4
-g) DV(z ,z ,z ,z ) R z ,...,z liegen auf einer hyperbolischen Geraden.
-1 2 3 4 1 4
-∈ ∪{∞} ⇔
-Beweis:
-a) DV(z ,...,z ) = 0, da z paarweise verschieden
-1 4 i
-(cid:54)
-DV(z ,...,z ) = 1, da:
-1 4
-(cid:54)
-Annahme: DV(z ,...,z ) = 1
-1 4
-(z z )(z z ) = (z z )(z z )
-1 2 3 4 1 4 3 2
-⇔ − − − −
-85 4.3.HYPERBOLISCHEGEOMETRIE
-z z z z z z +z z = z z z z z z +z z
-1 3 2 3 1 4 2 4 1 3 3 4 1 2 2 4
-⇔ − − − −
-z z +z z = z z +z z
-2 3 1 4 3 4 1 2
-⇔
-z z z z = z z z z
-2 3 3 4 1 2 1 4
-⇔ − −
-z (z z ) = z (z z )
-3 2 4 1 2 4
-⇔ − −
-z = z oder z = z
-3 1 2 4
-⇔
-Alle z sind paarweise verschieden Widerspruch (cid:4)
-i
-⇒
-b) DV(z ,z ,z ,z ) = (z1−z2)·(z3−z4) = 1
-1 4 3 2 (z1−z4)·(z3−z2) DV(z1,z2,z3,z4)
-c) DV(z ,z ,z ,z ) = (z3−z4)·(z1−z2) = 1
-3 2 1 4 (z3−z2)·(z1−z4) DV(z1,z2,z3,z4)
-d) Zwei der z dürfen gleich sein, da:
-i
-Fall 1 z = z oder z = z
-1 4 3 2
-In diesem Fall ist DV(z ,...,z ) = 0
-1 4
-Fall 2 z = z oder z = z
-1 2 3 4
-Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z ,...,z ) = gilt.
-1 4
+gilt.
+Fall 3 z
+1
+= z
+3
+oder z
+2
+= z
+4
+Durch Einsetzen ergibt sich DV(z
+1
+,...,z
+4
+) = 1.
+Im Fall, dass ein z
+i
+=
+∞
+ist, ist entweder DV(0,1,
+∞
+,z
+4
+) = 0 oder DV(0,1,
+∞
+,z
+4
+)
+±∞
+e) DV(0,1,
 ∞
-Fall 3 z = z oder z = z
-1 3 2 4
-Durch Einsetzen ergibt sich DV(z ,...,z ) = 1.
-1 4
-Im Fall, dass ein z = ist, ist entweder DV(0,1, ,z ) = 0 oder DV(0,1, ,z )
-i 4 4
-∞ ∞ ∞ ±∞
-(0−z4)·(∞−1) z4·(∞−1)
-e) DV(0,1, ,z ) = = = z
-∞ 4 (0−1)·(∞−z4) ∞−z4 4
+,z
+4
+) =
+(0−z4)·(∞−1)
+(0−1)·(∞−z4)
+=
+z4·(∞−1)
+∞−z4
+= z
+4
 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-g) Sei σ PSL (C) mit σ(z ) = 0, σ(z ) = 1, σ(z ) = . Ein solches σ existiert, da man
-2 1 2 3
-∈ ∞
+g) Sei σ
+∈
+PSL
+2
+(C) mit σ(z
+1
+) = 0, σ(z
+2
+) = 1, σ(z
+3
+) =
+∞
+. Ein solches σ existiert, da man
 drei Parameter von σ wählen darf.
 Bem. 70.f
-DV(z ,...,z ) = DV(0,1, ,σ(z ))
-1 4 4
-⇒ ∞
-DV(z ,...,z ) R
-1 4
-⇒ ∈ ∪{∞}
-σ(z ) R
+⇒
+DV(z
+1
+,...,z
+4
+) = DV(0,1,
+∞
+,σ(z
+4
+))
+⇒
+DV(z
+1
+,...,z
+4
+)
+∈
+R
+∪{∞}
+⇔
+σ(z
 4
-⇔ ∈ ∪{∞}
-Behauptung folgt, weil σ−1(R ) ein Kreis oder eine Gerade in C ist.
+)
+∈
+R
+∪{∞}
+Behauptung folgt, weil σ−1(R
 ∪∞
+) ein Kreis oder eine Gerade in C ist.
 Definition 66
-Für z ,z H sei g die eindeutige hyperbolische Gerade durch z und z und a ,a die
-1 2 z1,z2 1 2 1 2
+Für z
+1
+,z
+2
 ∈
-„Schnittpunkte“ von g mit R .
+H sei g
+z1,z2
+die eindeutige hyperbolische Gerade durch z
+1
+und z
+2
+und a
+1
+,a
+2
+die
+„Schnittpunkte“ von g
 z1,z2
+mit R
 ∪{∞}
-Dann sei dH(z 1,z 2) := 21 |lnDV(a 1,z 1,a 2,z 2) und heiße hyperbolische Metrik.
+.
+Dann sei dH(z
+1
+,z
+2
+) := 1
+2|
+lnDV(a
+1
+,z
+1
+,a
+2
+,z
+2
+)
 |
-Beh.: Für z ,z H sei g die eindeutige hyperbolische Gerade durch z und z und a ,a
-1 2 z1,z2 1 2 1 2
+und heiße hyperbolische Metrik.
+Beh.: Für z
+1
+,z
+2
 ∈
-die „Schnittpunkte“ von g mit R .
+H sei g
+z1,z2
+die eindeutige hyperbolische Gerade durch z
+1
+und z
+2
+und a
+1
+,a
+2
+die „Schnittpunkte“ von g
 z1,z2
+mit R
 ∪{∞}
+.
 Dann gilt:
-1 1
-lnDV(a ,z ,a ,z ) = lnDV(a ,z ,a ,z )
-1 1 2 2 2 1 1 2
-2| | 2| |
+1
+2|
+lnDV(a
+1
+,z
+1
+,a
+2
+,z
+2
+)
+|
+=
+1
+2|
+lnDV(a
+2
+,z
+1
+,a
+1
+,z
+2
+)
+|
 Beweis: Wegen Bemerkung 70.c gilt:
+DV(a
 1
-DV(a ,z ,a ,z ) =
-1 1 2 2
-DV(a ,z ,a ,z )
-2 1 1 2
+,z
+1
+,a
+2
+,z
+2
+) =
+1
+DV(a
+2
+,z
+1
+,a
+1
+,z
+2
+)
 Außerdem gilt:
+ln
 1
-ln = lnx−1 = ( 1) lnx = lnx
-x − · −
+x
+= lnx−1 = (
+−
+1)
+·
+lnx =
+−
+lnx
 86 4.3.HYPERBOLISCHEGEOMETRIE
 Da der ln im Betrag steht, folgt direkt:
-1 1
-lnDV(a ,z ,a ,z ) = lnDV(a ,z ,a ,z )
-1 1 2 2 2 1 1 2
-2| | 2| |
+1
+2|
+lnDV(a
+1
+,z
+1
+,a
+2
+,z
+2
+)
+|
+=
+1
+2|
+lnDV(a
+2
+,z
+1
+,a
+1
+,z
+2
+)
+|
 Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelver-
 hältnis genutzt werden. (cid:4)
 Beh.: Die hyperbolische Metrik ist eine Metrik auf H.
 Beweis: Wegen Bemerkung 70.f ist
-d(z ,z ) := d(σ(z ),σ(z )) mit σ(a ) = 0, σ(a ) =
-1 2 1 2 1 2
+d(z
+1
+,z
+2
+) := d(σ(z
+1
+),σ(z
+2
+)) mit σ(a
+1
+) = 0, σ(a
+2
+) =
 ∞
-d. h. σ(g ) = iR (imaginäre Achse).
+d. h. σ(g
 z1,z2
-also gilt o. B. d. A. z = ia und z = ib mit a,b R und a < b.
-1 2
+) = iR (imaginäre Achse).
+also gilt o. B. d. A. z
+1
+= ia und z
+2
+= ib mit a,b
 ∈
-2d(ia,ib) = lnDV(0,ia, ,ib)
-| ∞ |
-(0 ib)( ia)
-= ln − ∞−
-| (0 ia)( ib) |
-− ∞−
+R und a < b.
+2d(ia,ib) =
+|
+lnDV(0,ia,
+∞
+,ib)
+|
+=
+|
+ln
+(0
+−
+ib)(
+∞−
+ia)
+(0
+−
+ia)(
+∞−
+ib) |
+=
+|
+ln
 b
-= ln
-| a |
-= lnb lna
-−
-Also: d(z ,z ) 0, d(z ,z ) = 0 z = z
-1 2 1 2 1 2
-≥ ⇔
-2d(z ,z ) = lnDV(a ,z ,a ,z )
-2 1 2 2 1 1
-| |
-= lnDV( ,ib,0,ia)
-| ∞ |
+a |
+= lnb
+−
+lna
+Also: d(z
+1
+,z
+2
+)
+≥
+0, d(z
+1
+,z
+2
+) = 0
+⇔
+z
+1
+= z
+2
+2d(z
+2
+,z
+1
+) =
+|
+lnDV(a
+2
+,z
+2
+,a
+1
+,z
+1
+)
+|
+=
+|
+lnDV(
+∞
+,ib,0,ia)
+|
 Bem. 70.b
-= lnDV(0,ib, ,ia)
-| ∞ |
-= 2d(z ,z )
-1 2
-Liegen drei Punkte z ,z ,z C auf einer hyperbolischen Geraden, so gilt d(z ,z ) =
-1 2 3 1 3
+=
+|
+lnDV(0,ib,
+∞
+,ia)
+|
+= 2d(z
+1
+,z
+2
+)
+Liegen drei Punkte z
+1
+,z
+2
+,z
+3
 ∈
-d(z ,z )+d(z ,z ) (wenn z zwischen z und z liegt).
-1 2 2 3 2 1 3
+C auf einer hyperbolischen Geraden, so gilt d(z
+1
+,z
+3
+) =
+d(z
+1
+,z
+2
+)+d(z
+2
+,z
+3
+) (wenn z
+2
+zwischen z
+1
+und z
+3
+liegt).
 Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die
 Vorlesung „Hyperbolische Geometrie“ verwiesen.
 Satz 4.10
@@ -4555,8 +11578,9 @@ aber Axiom §5 ist verletzt.
 87 4.3.HYPERBOLISCHEGEOMETRIE
 Übungsaufgaben
 Aufgabe 8
-Seien (X,d) eine absolute Ebene und P,Q,R X Punkte. Der Scheitelwinkel des Winkels
+Seien (X,d) eine absolute Ebene und P,Q,R
 ∈
+X Punkte. Der Scheitelwinkel des Winkels
 ∠PQR ist der Winkel, der aus den Halbgeraden QP− und QR− gebildet wird. Die
 Nebenwinkel von ∠PQR sind die von QP+ und QR− bzw. QP− und QR+ gebildeten
 Winkel.
@@ -4564,874 +11588,2248 @@ Zeigen Sie:
 (a) Die beiden Nebenwinkel von ∠PQR sind gleich.
 (b) Der Winkel ∠PQR ist gleich seinem Scheitelwinkel.
 Aufgabe 9
-Sei (X,d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y X von
+Sei (X,d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y
 ⊆
-Punkten ist definiert durch d(P,Y) := infd(P,y) y Y.
-| ∈
+X von
+Punkten ist definiert durch d(P,Y) := infd(P,y)
+|
+y
+∈
+Y.
 Zeigen Sie:
-(a) Ist ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die
+(a) Ist
 (cid:52)
+ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die
 Winkel ∠ABC und ∠BCA gleich.
-(b) Ist ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
+(b) Ist
 (cid:52)
+ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
 gegenüber und umgekehrt.
-(c) Sind g eine Gerade und P / g ein Punkt, so gibt es eine eindeutige Gerade h mit
+(c) Sind g eine Gerade und P /
 ∈
-P h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
+g ein Punkt, so gibt es eine eindeutige Gerade h mit
+P
 ∈
+h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
 und der Schnittpunkt des Lots mit g heißt Lotfußpunkt.
 Aufgabe 10
-Seien f,g,h G und paarweise verschieden.
+Seien f,g,h
 ∈
-Zeigen Sie: f g g h f h
-(cid:107) ∧ (cid:107) ⇒ (cid:107)
+G und paarweise verschieden.
+Zeigen Sie: f
+(cid:107)
+g
+∧
+g
+(cid:107)
+h
+⇒
+f
+(cid:107)
+h
 Aufgabe 11
 Beweise den Kongruenzsatz SSS.
 5 Krümmung
 Definition 67
-Sei f : [a,b] Rn eine eine Funktion aus C∞. Dann heißt f Kurve.
+Sei f : [a,b]
 →
+Rn eine eine Funktion aus C∞. Dann heißt f Kurve.
 5.1 Krümmung von Kurven
 Definition 68
-Sei γ : I = [a,b] Rn eine Kurve.
+Sei γ : I = [a,b]
 →
+Rn eine Kurve.
 a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt:
-γ(cid:48)(t) = 1 t I
+(cid:107)
+γ(cid:48)(t)
+(cid:107)
+2
+= 1
+∀
+t
+∈
+I
+Dabei ist γ(cid:48)(t) = (γ(cid:48)
+1
+(t),γ(cid:48)
 2
-(cid:107) (cid:107) ∀ ∈
-Dabei ist γ(cid:48)(t) = (γ(cid:48)(t),γ(cid:48)(t),...,γ(cid:48)(t)).
-1 2 n
-b) l(γ) = (cid:82)b γ(cid:48)(t) dt heißt Länge von γ.
-a (cid:107) (cid:107)
+(t),...,γ(cid:48)
+n
+(t)).
+b) l(γ) = (cid:82)b
+a (cid:107)
+γ(cid:48)(t)
+(cid:107)
+dt heißt Länge von γ.
 Bemerkung 71 (Eigenschaften von Kurven I)
-Sei γ : I = [a,b] Rn eine C∞-Funktion.
+Sei γ : I = [a,b]
 →
-a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b a.
+Rn eine C∞-Funktion.
+a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b
 −
-b) Ist γ durch Bogenlänge parametrisiert, so ist γ(cid:48)(t) orthogonal zu γ(cid:48)(cid:48)(t) für alle t I.
+a.
+b) Ist γ durch Bogenlänge parametrisiert, so ist γ(cid:48)(t) orthogonal zu γ(cid:48)(cid:48)(t) für alle t
 ∈
+I.
 Beweis:
-a) l(γ) = (cid:82)b γ(cid:48)(t) dt = (cid:82)b 1dt = b a.
-a (cid:107) (cid:107) a −
-b) ImFolgendenwirddieAussagenurfürγ : [a,b] R2 bewiesen.Allerdingsfunktioniert
+a) l(γ) = (cid:82)b
+a (cid:107)
+γ(cid:48)(t)
+(cid:107)
+dt = (cid:82)b
+a
+1dt = b
+−
+a.
+b) ImFolgendenwirddieAussagenurfürγ : [a,b]
 →
+R2 bewiesen.Allerdingsfunktioniert
 der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden.
-1 = γ(cid:48)(t) = γ(cid:48)(t) 2 = γ(cid:48)(t),γ(cid:48)(t)
-(cid:107) (cid:107) (cid:107) (cid:107) (cid:104) (cid:105)
+1 =
+(cid:107)
+γ(cid:48)(t)
+(cid:107)
+=
+(cid:107)
+γ(cid:48)(t)
+(cid:107)
+2 =
+(cid:104)
+γ(cid:48)(t),γ(cid:48)(t)
+(cid:105)
+⇒
+0 =
 d
-0 = γ(cid:48)(t),γ(cid:48)(t)
-⇒ dt(cid:104) (cid:105)
+dt(cid:104)
+γ(cid:48)(t),γ(cid:48)(t)
+(cid:105)
+=
 d
-= (γ(cid:48)(t)γ(cid:48)(t)+γ(cid:48)(t)γ(cid:48)(t))
-dt 1 1 2 2
-= 2 (γ(cid:48)(cid:48)(t) γ(cid:48)(t)+γ(cid:48)(cid:48)(t) γ(cid:48)(t))
-· 1 · 1 2 · 2
-= 2 γ(cid:48)(cid:48)(t),γ(cid:48)(t)
-·(cid:104) (cid:105)
+dt
+(γ(cid:48)
+1
+(t)γ(cid:48)
+1
+(t)+γ(cid:48)
+2
+(t)γ(cid:48)
+2
+(t))
+= 2
+·
+(γ(cid:48)(cid:48)
+1
+(t)
+·
+γ(cid:48)
+1
+(t)+γ(cid:48)(cid:48)
+2
+(t)
+·
+γ(cid:48)
+2
+(t))
+= 2
+·(cid:104)
+γ(cid:48)(cid:48)(t),γ(cid:48)(t)
+(cid:105)
 Definition 69
-Sei γ : I R2 eine durch Bogenlänge parametrisierte Kurve.
+Sei γ : I
 →
-a) Für t I sei n(t) Normalenvektor an γ in t wenn gilt:
+R2 eine durch Bogenlänge parametrisierte Kurve.
+a) Für t
 ∈
-n(t),γ(cid:48)(t) = 0, n(t) = 1 und det((γ(cid:48)(t),n(t))) = +1
-(cid:104) (cid:105) (cid:107) (cid:107)
+I sei n(t) Normalenvektor an γ in t wenn gilt:
+(cid:104)
+n(t),γ(cid:48)(t)
+(cid:105)
+= 0,
+(cid:107)
+n(t)
+(cid:107)
+= 1 und det((γ(cid:48)(t),n(t))) = +1
 89 5.1.KRÜMMUNGVONKURVEN
-b) Seit κ : I R so, dass gilt:
+b) Seit κ : I
 →
-γ(cid:48)(cid:48)(t) = κ(t) n(t)
+R so, dass gilt:
+γ(cid:48)(cid:48)(t) = κ(t)
 ·
+n(t)
 Dann heißt κ(t) Krümmung von γ in t.
 Da n(t) und γ(cid:48)(cid:48)(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t).
 Beispiel 45
 Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt:
-(cid:18) (cid:19)
-t t
-γ(t) = r cos ,r sin für t [0,2πr]
-· r · r ∈
+γ(t) =
+(cid:18)
+r
+·
+cos
+t
+r
+,r
+·
+sin
+t
+r
+(cid:19)
+für t
+∈
+[0,2πr]
 ist parametrisiert durch Bogenlänge, da gilt:
-(cid:18) (cid:19)
-1 t 1 t
-γ(cid:48)(t) = (r )( sin ),r cos
-· r − r r r
-(cid:18) (cid:19)
-t t
-= sin ,cos
-− r r
+γ(cid:48)(t) =
+(cid:18)
+(r
+·
+1
+r
+)(
+−
+sin
+t
+r
+),r
+1
+r
+cos
+t
+r
+(cid:19)
+=
+(cid:18)
+−
+sin
+t
+r
+,cos
+t
+r
+(cid:19)
 Der Normalenvektor von γ in t ist
-(cid:18) (cid:19)
-t t
-n(t) = cos , sin
-− r − r
+n(t) =
+(cid:18)
+−
+cos
+t
+r
+,
+−
+sin
+t
+r
+(cid:19)
 da gilt:
-(cid:28)(cid:18) t(cid:19) (cid:18) t(cid:19)(cid:29)
-cos sin
-n(t),γ(cid:48)(t) = − r , − r
-(cid:104) (cid:105) sin t cos t
-− r r
-t t t t
-= ( cos ) ( sin )+( sin ) (cos )
-− r · − r − r · r
+(cid:104)
+n(t),γ(cid:48)(t)
+(cid:105)
+=
+(cid:28)(cid:18)
+−
+cos t
+r
+−
+sin t
+r
+(cid:19)
+,
+(cid:18)
+−
+sin t
+r
+cos t
+r
+(cid:19)(cid:29)
+= (
+−
+cos
+t
+r
+)
+·
+(
+−
+sin
+t
+r
+)+(
+−
+sin
+t
+r
+)
+·
+(cos
+t
+r
+)
 = 0
-(cid:13) (cid:13)
-(cid:13) t t (cid:13)
-n(t) = (cid:13)( cos , sin )(cid:13)
-(cid:107) (cid:107) (cid:13) − r − r (cid:13)
-t t
-= ( cos )2+( sin )2
-− r − r
+(cid:107)
+n(t)
+(cid:107)
+=
+(cid:13)
+(cid:13)
+(cid:13)
+(cid:13)
+(
+−
+cos
+t
+r
+,
+−
+sin
+t
+r
+)
+(cid:13)
+(cid:13)
+(cid:13)
+(cid:13)
+= (
+−
+cos
+t
+r
+)2+(
+−
+sin
+t
+r
+)2
 = 1
-det(γ(cid:48)(t),n(t)) = (cid:13) (cid:13) (cid:13)(cid:18) −sin rt −cos rt(cid:19)(cid:13) (cid:13) (cid:13)
-1 (cid:13) cos t sin t (cid:13)
-r − r
-t t t
-= ( sin )2 ( cos ) cos
-− r − − r · r
+det(γ(cid:48)
+1
+(t),n(t)) = (cid:13) (cid:13) (cid:13)
+(cid:13)
+(cid:18) − sin t r − cos t r
+cos t
+r −
+sin t
+r
+(cid:19)(cid:13) (cid:13) (cid:13)
+(cid:13)
+= (
+−
+sin
+t
+r
+)2
+−
+(
+−
+cos
+t
+r
+)
+·
+cos
+t
+r
 = 1
-Die Krümmung ist für jedes t konstant 1, da gilt:
+Die Krümmung ist für jedes t konstant 1
+r
+, da gilt:
+γ(cid:48)(cid:48)(t) =
+(cid:18)
+−
+1
+r
+cos
+t
+r
+,
+−
+1
+r
+sin
+t
 r
-(cid:18) (cid:19)
-1 t 1 t
-γ(cid:48)(cid:48)(t) = cos , sin
-−r r −r r
-(cid:18) (cid:19)
-1 t t
-= cos , sin
-r · − r − r
+(cid:19)
+=
 1
+r ·
+(cid:18)
+−
+cos
+t
+r
+,
+−
+sin
+t
+r
+(cid:19)
+⇒
 κ(t) =
-⇒ r
+1
+r
 90 5.2.TANGENTIALEBENE
 Definition 70
-Sei γ : I R3 eine durch Bogenlänge parametrisierte Kurve.
+Sei γ : I
 →
-a) Für t I heißt κ(t) := γ(cid:48)(cid:48)(t) die Krümmung von γ in t.
-∈ (cid:107) (cid:107)
-b) Ist für t I die Ableitung γ(cid:48)(cid:48)(t) = 0, so heißt γ(cid:48)(cid:48)(t) Normalenvektor an γ in t.
-∈ (cid:54) (cid:107)γ(cid:48)(cid:48)(t)(cid:107)
+R3 eine durch Bogenlänge parametrisierte Kurve.
+a) Für t
+∈
+I heißt κ(t) :=
+(cid:107)
+γ(cid:48)(cid:48)(t)
+(cid:107)
+die Krümmung von γ in t.
+b) Ist für t
+∈
+I die Ableitung γ(cid:48)(cid:48)(t)
+(cid:54)
+= 0, so heißt γ(cid:48)(cid:48)(t)
+(cid:107)γ(cid:48)(cid:48)(t)(cid:107)
+Normalenvektor an γ in t.
 c) b(t)seieinVektor,derγ(cid:48)(t),n(t)zueinerorientiertenOrthonormalbasisvonR3 ergänzt.
 Also gilt:
 det(γ(cid:48)(t),n(t),b(t)) = 1
 b(t) heißt Binormalenvektor, die Orthonormalbasis
-(cid:8) γ(cid:48)(t),n(t),b(t)(cid:9)
+(cid:8) γ(cid:48)(t),n(t),b(t) (cid:9)
 heißt begleitendes Dreibein.
 Bemerkung 72 (Eigenschaften von Kurven II)
-Sei γ : I R3 durch Bogenlänge parametrisierte Kurve.
+Sei γ : I
 →
+R3 durch Bogenlänge parametrisierte Kurve.
 a) n(t) ist orthogonal zu γ(cid:48)(t).
 b) b(t) aus Definition 70.c ist eindeutig.
 5.2 Tangentialebene
 Erinnerung Sie sich an Definition 32 „reguläre Fläche“.
 Äquivalent dazu ist: S ist lokal von der Form
-V(f) = (cid:8) x R3 (cid:12) (cid:12) f(x) = 0(cid:9)
+V(f) = (cid:8) x
 ∈
-für eine C∞-Funktion f : R3 R.
+R3 (cid:12) (cid:12) f(x) = 0 (cid:9)
+für eine C∞-Funktion f : R3
 →
+R.
 Definition 71
-Sei S R3 eine reguläre Fläche, s S, F : U V S eine lokale Parametrisierung um
-⊆ ∈ → ∩
-s V:
+Sei S
+⊆
+R3 eine reguläre Fläche, s
+∈
+S, F : U
+→
+V
+∩
+S eine lokale Parametrisierung um
+s
 ∈
-(u,v) (x(u,v),y(u,v),z(u,v))
+V:
+(u,v)
 (cid:55)→
-Für p = F−1(s) U sei
-∈ ∂x(p) ∂x(p)
-∂u ∂v
-J F(p) =  ∂∂ uy (p) ∂ ∂y v(p)
-∂z(p) ∂z(p)
-∂u ∂v
-und D F : R2 R3 die durch J (p) definierte lineare Abbildung.
-p F
-→
-Dann heißt T S := Bild(D F) die Tangentialebene an s S.
-s p
+(x(u,v),y(u,v),z(u,v))
+Für p = F−1(s)
+∈
+U sei
+J F (p) =
+
+
+∂x
+∂u
+(p) ∂x
+∂v
+(p)
+∂y ∂u (p) ∂y ∂v (p)
+∂z
+∂u
+(p) ∂z
+∂v
+(p)
+
+
+und D
+p
+F : R2
+→
+R3 die durch J
+F
+(p) definierte lineare Abbildung.
+Dann heißt T
+s
+S := Bild(D
+p
+F) die Tangentialebene an s
 ∈
+S.
 Bemerkung 73 (Eigenschaften der Tangentialebene)
-a) T S ist 2-dimensionaler Untervektorraum von R3.
+a) T
+s
+S ist 2-dimensionaler Untervektorraum von R3.
+b) T
 s
-b) T S = u˜,v˜ , wobei u˜,v˜ die Spaltenvektoren der Jacobi-Matrix J (p) sind.
-s F
-(cid:104) (cid:105)
-c) T S hängt nicht von der gewählten Parametrisierung ab.
+S =
+(cid:104)
+˜ u,˜ v
+(cid:105)
+, wobei ˜ u,˜ v die Spaltenvektoren der Jacobi-Matrix J
+F
+(p) sind.
+c) T
 s
+S hängt nicht von der gewählten Parametrisierung ab.
 91 5.2.TANGENTIALEBENE
-d) Sei S = V(f) eine reguläre Fläche in R3, also f : V R eine C∞-Funktion, V R3
-→ ⊆
-offen, grad(f)(x) = 0 für alle x S.
-(cid:54) ∈
-Dann ist T S = (grad(f)(s))⊥ für jedes s S.
+d) Sei S = V(f) eine reguläre Fläche in R3, also f : V
+→
+R eine C∞-Funktion, V
+⊆
+R3
+offen, grad(f)(x)
+(cid:54)
+= 0 für alle x
+∈
+S.
+Dann ist T
 s
+S = (grad(f)(s))⊥ für jedes s
 ∈
+S.
 Beweis:
-a) J ist eine 3 2-Matrix, die mit einem 2 1-Vektor multipliziert wird. Das ist
+a) J
 F
-× ×
+ist eine 3
+×
+2-Matrix, die mit einem 2
+×
+1-Vektor multipliziert wird. Das ist
 eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein
-Vektorraum ist. Da Rg(J ) = 2, ist auch dim(T S) = 2.
-F s
+Vektorraum ist. Da Rg(J
+F
+) = 2, ist auch dim(T
+s
+S) = 2.
 b) Hier kann man wie in Punkt a) argumentieren
-c) T S = x R3 parametrisierte Kurve γ : [ ε,+ε] S für ein ε > 0 mit γ(0) =
+c) T
 s
-{ ∈ |∃ − →
+S =
+{
+x
+∈
+R3
+|∃
+parametrisierte Kurve γ : [
+−
+ε,+ε]
+→
+S für ein ε > 0 mit γ(0) =
 s und γ(cid:48)(0) = x
 }
 Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-d) Sei x T S,γ : [ ε,+ε] S eine parametrisierte Kurve mit ε > 0 und γ(cid:48)(0) = s,
-s
-∈ − →
-sodass γ(cid:48)(0) = x gilt. Da γ(t) S für alle t [ ε,ε], ist f γ = 0
-∈ ∈ − ◦
-0 = (f γ)(cid:48)(0) = grad(f)(γ(0)),γ(cid:48)(0)
-⇒ ◦ (cid:104) (cid:105)
-T S grad(f)(s)⊥
+d) Sei x
+∈
+T
 s
-⇒ ⊆
-=d =i =m ==2 T S = (grad(f)(s))⊥
+S,γ : [
+−
+ε,+ε]
+→
+S eine parametrisierte Kurve mit ε > 0 und γ(cid:48)(0) = s,
+sodass γ(cid:48)(0) = x gilt. Da γ(t)
+∈
+S für alle t
+∈
+[
+−
+ε,ε], ist f
+◦
+γ = 0
+⇒
+0 = (f
+◦
+γ)(cid:48)(0) =
+(cid:104)
+grad(f)(γ(0)),γ(cid:48)(0)
+(cid:105)
+⇒
+T
 s
+S
+⊆
+grad(f)(s)⊥
+dim=2 ====
 ⇒
+T
+s
+S = (grad(f)(s))⊥
 Definition 72
-a) Ein Normalenfeld auf der regulären Fläche S R3 ist eine Abbildung n : S S2
-⊆ → ⊆
-R3 mit n(s) T S⊥ für jedes s S.
+a) Ein Normalenfeld auf der regulären Fläche S
+⊆
+R3 ist eine Abbildung n : S
+→
+S2
+⊆
+R3 mit n(s)
+∈
+T
 s
-∈ ∈
+S⊥ für jedes s
+∈
+S.
 b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt.
 Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden.
 Im Folgenden werden diese Begriffe jedoch synonym benutzt.
 Bemerkung 74 (Eigenschaften von Normalenfeldern)
 a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞).
-b) Zu jedem s S gibt es eine Umgebung V R3 von s und eine lokale Parametrisierung
-∈ ⊆
-F : U V von S um s, sodass auf F(U) = V S ein stetiges Normalenfeld existiert.
-→ ∩
+b) Zu jedem s
+∈
+S gibt es eine Umgebung V
+⊆
+R3 von s und eine lokale Parametrisierung
+F : U
+→
+V von S um s, sodass auf F(U) = V
+∩
+S ein stetiges Normalenfeld existiert.
 c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen
-Parametrisierungen F : U V , i I gibt, sodass für alle i,j F und alle
-i i i
-→ ∈ ∈
-s V V S gilt:
-i j
-∈ ∩ ∩
+Parametrisierungen F
+i
+: U
+i
+→
+V
+i
+, i
+∈
+I gibt, sodass für alle i,j
+∈
+F und alle
+s
+∈
+V
+i
+∩
+V
+j
+∩
+S gilt:
+det(D
+s
 Vi→Vj
 (cid:122) (cid:125)(cid:124) (cid:123)
-det(D F F−1) > 0
-s j ◦ i
+F
+j ◦
+F−1
+i
 (cid:124) (cid:123)(cid:122) (cid:125)
 ∈R3×3
+) > 0
 Beweis: Wird hier nicht geführt.
 Beispiel 46 (Normalenfelder)
-1) S = S2, n = id ist ein stetiges Normalenfeld.
-1 S2
-Auch n = id ist ein stetiges Normalenfeld.
-2 S2
+1) S = S2, n
+1
+= id
+S2
+ist ein stetiges Normalenfeld.
+Auch n
+2
+=
 −
+id
+S2
+ist ein stetiges Normalenfeld.
 2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma-
 lenfeld, aber kein stetiges Normalenfeld.
 92 5.3.GAUSS-KRÜMMUNG
 Abbildung 5.1: Möbiusband
 5.3 Gauß-Krümmung
 Bemerkung 75
-Sei S eine reguläre Fläche, s S, n(s) ist ein Normalenvektor in s, x T S, x = 1.
+Sei S eine reguläre Fläche, s
+∈
+S, n(s) ist ein Normalenvektor in s, x
+∈
+T
 s
-∈ ∈ (cid:107) (cid:107)
+S,
+(cid:107)
+x
+(cid:107)
+= 1.
 Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3.
-Dann gibt es eine Umgebung V R3 von s, sodass
+Dann gibt es eine Umgebung V
 ⊆
-C := (s+E) S V
-∩ ∩
-das Bild einer durch Bogenlänge parametrisierten Kurve γ : [ ε,ε] S enthält mit γ(0) = s
-− →
+R3 von s, sodass
+C := (s+E)
+∩
+S
+∩
+V
+das Bild einer durch Bogenlänge parametrisierten Kurve γ : [
+−
+ε,ε]
+→
+S enthält mit γ(0) = s
 und γ(cid:48)(0) = x.
 Beweis: „Satz über implizite Funktionen“1
 Definition 73
-In der Situation aus Bemerkung 75 heißt die Krümmung κ (0) der Kurve γ in der Ebene
+In der Situation aus Bemerkung 75 heißt die Krümmung κ
 γ
+(0) der Kurve γ in der Ebene
 (s+E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ(cid:48)(0).
-Man schreibt: κ (s,x) := κ (0)
-Nor γ
+Man schreibt: κ
+Nor
+(s,x) := κ
+γ
+(0)
 Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt.
 Beispiel 47 (Gauß-Krümmung)
-1) S = S2 = V(X2+Y2+Z2 1) ist die Kugel um den Ursprung mit Radius 1, n = id,
+1) S = S2 = V(X2+Y2+Z2
 −
+1) ist die Kugel um den Ursprung mit Radius 1, n = id,
 s = (0,0,1), x = (1,0,0)
-E = R x+R n(s) (x,z-Ebene)
-⇒ · ·
-C = E S ist Kreislinie
+⇒
+E = R
+·
+x+R
+·
+n(s) (x,z-Ebene)
+C = E
 ∩
-κ (s,x) = 1 = 1
-Nor r
-2) S = V(X2+Z2 1) R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1,0,0)
-− ⊆
-x = (0,1,0) E = R e +R e (x,y-Ebene)
-1 1 1 2
-⇒ · ·
-S E = V(X2+Y2 1) E, Kreislinie in E
-1
-∩ − ∩
-κ (s,x ) = 1
-Nor 1
-⇒ ±
-x = (0,0,1),E = R e +R e (x,z-Ebene)
-2 2 1 3
-· ·
+S ist Kreislinie
+κ
+Nor
+(s,x) = 1
+r
+= 1
+2) S = V(X2+Z2
+−
+1)
+⊆
+R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1,0,0)
+x
+1
+= (0,1,0)
+⇒
+E
+1
+= R
+·
+e
+1
++R
+·
+e
+2
+(x,y-Ebene)
+S
+∩
+E
+1
+= V(X2+Y2
+−
+1)
+∩
+E, Kreislinie in E
+⇒
+κ
+Nor
+(s,x
+1
+) =
+±
+1
+x
+2
+= (0,0,1),E
+2
+= R
+·
+e
+1
++R
+·
+e
+3
+(x,z-Ebene)
 1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II
 93 5.3.GAUSS-KRÜMMUNG
-V E 2 S = (cid:8) (1,0,z) R3 (cid:12) (cid:12) z R(cid:9) ist eine Gerade
-∩ ∩ ∈ ∈
-κ (s,x ) = 0
-Nor 2
+V
+∩
+E 2
+∩
+S = (cid:8) (1,0,z)
+∈
+R3 (cid:12) (cid:12) z
+∈
+R(cid:9) ist eine Gerade
 ⇒
-3) S = V(X2 Y2 Z), s = (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
-− −
-x = (1,0,0), n(s) = (0,0,1)
+κ
+Nor
+(s,x
+2
+) = 0
+3) S = V(X2
+−
+Y2
+−
+Z), s = (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
+x
+1
+= (1,0,0), n(s) = (0,0,1)
+x
+2
+= (0,1,0)
+κ
+Nor
+(s,x
 1
-x = (0,1,0)
+) = 2
+κ
+Nor
+(s,x
 2
-κ (s,x ) = 2
-Nor 1
-κ (s,x ) = 2
-Nor 2
+) =
 −
-5
-4
 2
-3
-0 z z
+− 1.5 − 1 − 0.5 0 0.5 1 1.5 − 1
+0
+1
+0
+1
 2
-1 −2
-0 f(x,y)
-42
+3
+4
+5
+x y
+z
+(a) S =V(X2+Z2−1)
+− 2 − 1.5 − 1 − 0.5 0 0.5 1 1.5 2 − 2 − 1
+0
 1
-2 1
-0 0 0
-y −1 −1.5 −1 −0.5 0 x 0.5 1 1.5 −− 42 y −1 −2−2 −1.5 −1 −0.5 0 x 0.5 1 1.5 2
-(a) S =V(X2+Z2−1) (b) S =V(X2−Y2−Z)
+2
+− 2
+0
+2
+x y
+z
+− 4 − 2
+0
+2
+4
+f(x,y)
+(b) S =V(X2−Y2−Z)
 Abbildung 5.2: Beispiele für reguläre Flächen
 Definition 74
-Sei S R3 eine reguläre Fläche, s S und n ein stetiges Normalenfeld auf S.
-⊆ ∈
-γ : [ ε,ε] S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
-− →
-γ(cid:48)(cid:48)(0) = 0.
+Sei S
+⊆
+R3 eine reguläre Fläche, s
+∈
+S und n ein stetiges Normalenfeld auf S.
+γ : [
+−
+ε,ε]
+→
+S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
+γ(cid:48)(cid:48)(0)
 (cid:54)
+= 0.
+Sei n(0) :=
 γ(cid:48)(cid:48)(0)
-Sei n(0) := . Zerlege
 (cid:107)γ(cid:48)(cid:48)(0)(cid:107)
-n(0) = n(0)t+n(0)⊥ mit n(0)t T S und n(0)⊥ (T S)⊥
-s s
-∈ ∈
-Dann ist n(0)⊥ = n(0),n(s) n(s)
-(cid:104) (cid:105)·
-κ (s,γ) := γ(cid:48)(cid:48)(0),n(s) die Normalkrümmung.
+. Zerlege
+n(0) = n(0)t+n(0)⊥ mit n(0)t
+∈
+T
+s
+S und n(0)⊥
+∈
+(T
+s
+S)⊥
+Dann ist n(0)⊥ =
+(cid:104)
+n(0),n(s)
+(cid:105)·
+n(s)
+κ
 Nor
-(cid:104) (cid:105)
+(s,γ) :=
+(cid:104)
+γ(cid:48)(cid:48)(0),n(s)
+(cid:105)
+die Normalkrümmung.
 Bemerkung 76
-Sei γ(t) = γ( t), t [ ε,ε]. Dann ist κ (s,γ) = κ (s,γ).
-Nor Nor
-− ∈ −
-Beweis: γ(cid:48)(cid:48)(0) = γ(cid:48)(cid:48)(0), da γ(cid:48)(0) = γ(cid:48)(0).
+Sei γ(t) = γ(
 −
-Es gilt: κ (s,γ) hängt nur von γ(cid:48)(0) ab und ist gleich κ (s,γ(cid:48)(0)).
-Nor Nor
-| |
+t), t
+∈
+[
+−
+ε,ε]. Dann ist κ
+Nor
+(s,γ) = κ
+Nor
+(s,γ).
+Beweis: γ(cid:48)(cid:48)(0) = γ(cid:48)(cid:48)(0), da γ(cid:48)(0) =
+−
+γ(cid:48)(0).
+Es gilt: κ
+Nor
+(s,γ) hängt nur von
+|
+γ(cid:48)(0)
+|
+ab und ist gleich κ
+Nor
+(s,γ(cid:48)(0)).
 Bemerkung 77
 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
-Sei T s1S = x T sS x = 1 ∼= S1. Dann ist
-{ ∈ | (cid:107) (cid:107) }
-κn (s) : T1S R, x κ (s,x)
-Nor s Nor
-→ (cid:55)→
-eine glatte Funktion und Bildκn (s) ist ein abgeschlossenes Intervall.
+Sei T1 s S =
+{
+x
+∈
+T s S
+| (cid:107)
+x
+(cid:107)
+= 1
+}
+∼ = S1. Dann ist
+κn
+Nor
+(s) : T1
+s
+S
+→
+R, x
+(cid:55)→
+κ
 Nor
+(s,x)
+eine glatte Funktion und Bildκn
+Nor
+(s) ist ein abgeschlossenes Intervall.
 Definition 75
 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
 94 5.3.GAUSS-KRÜMMUNG
-a) κn(s) : = min(cid:8) κn (s,x) (cid:12) (cid:12) x T1S (cid:9) und heißen Hauptkrümmungen von S in s.
-1 Nor ∈ s
-κn(s) : = max(cid:8) κn (s,x) (cid:12) (cid:12) x T1S (cid:9)
-2 Nor ∈ s
-b) K(s) := κn(s) κn(s) heißt Gauß-Krümmung von S in s.
-1 · 2
+a) κn
+1
+(s) : = min (cid:8) κn
+Nor
+(s,x) (cid:12) (cid:12) x
+∈
+T1
+s
+S (cid:9) und
+κn
+2
+(s) : = max (cid:8) κn
+Nor
+(s,x) (cid:12) (cid:12) x
+∈
+T1
+s
+S (cid:9)
+heißen Hauptkrümmungen von S in s.
+b) K(s) := κn
+1
+(s)
+·
+κn
+2
+(s) heißt Gauß-Krümmung von S in s.
 Bemerkung 78
-Ersetzt man n durch n, so gilt:
-−
-κ−n(s,x) = κn (x) x T1S
-Nor − Nor ∀ ∈ s
-κ−n(s) = κn(s)
-⇒ 1 − 2
-κ−n(s) = κn(s)
-2 − 1
+Ersetzt man n durch
+−
+n, so gilt:
+κ−n
+Nor
+(s,x) =
+−
+κn
+Nor
+(x)
+∀
+x
+∈
+T1
+s
+S
+⇒
+κ−n
+1
+(s) =
+−
+κn
+2
+(s)
+κ−n
+2
+(s) =
+−
+κn
+1
+(s)
 und K−n(s) = Kn(s) =: K(s)
 Beispiel 48
-1) S = S2. Dann ist κ (s) = κ (s) = 1 s S2
-1 2
-± ∀ ∈
-K(s) = 1
+1) S = S2. Dann ist κ
+1
+(s) = κ
+2
+(s) =
+±
+1
+∀
+s
+∈
+S2
 ⇒
+K(s) = 1
 2) Zylinder:
-κ (s) = 0,κ (s) = 1 K(s) = 0
-1 2
+κ
+1
+(s) = 0,κ
+2
+(s) = 1
 ⇒
+K(s) = 0
 3) Sattelpunkt auf hyperbolischem Paraboloid:
-κ (s) < 0,κ (s) = 0 K(s) < 0
-1 2
+κ
+1
+(s) < 0,κ
+2
+(s) = 0
 →
+K(s) < 0
 4) S = Torus. Siehe Abbildung 5.3
 s
+1
+s
 2
-s s
-3 1
-Abbildung 5.3: K(s ) > 0, K(s ) = 0, K(s ) < 0
-1 2 3
+s
+3
+Abbildung 5.3: K(s
+1
+) > 0, K(s
+2
+) = 0, K(s
+3
+) < 0
 Bemerkung 79
-Sei S eine reguläre Fläche, s S ein Punkt.
+Sei S eine reguläre Fläche, s
 ∈
+S ein Punkt.
 95 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM
-a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T S +s.
+a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T
 s
-b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T S +s.
+S +s.
+b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T
 s
+S +s.
 5.4 Erste und zweite Fundamentalform
-Sei S R3 eine reguläre Fläche, s S, T S die Tangentialebene an S in s und F : U V eine
+Sei S
+⊆
+R3 eine reguläre Fläche, s
+∈
+S, T
 s
-⊆ ∈ →
+S die Tangentialebene an S in s und F : U
+→
+V eine
 lokale Parametrisierung von S um s. Weiter sei p := F−1(s).
 Definition 76
-Sei I R2×2 definiert als
+Sei I
 S
 ∈
-(cid:18) (cid:19) (cid:18) (cid:19)
-g (s) g (s) E(s) F(s)
-1,1 1,2
-I : = =
+R2×2 definiert als
+I
 S
-g (s) g (s) F(s) G(s)
-1,2 2,2
-mit g = g (D F(e ),D F(e ))
-i,j s p i p j
-∂F ∂F
-= (p), (p) i,j 1,2
-(cid:104)∂u ∂u (cid:105) ∈ { }
-i j
-Die Matrix I heißt erste Fundamentalform von S bzgl. der Parametrisierung F.
+: =
+(cid:18)
+g
+1,1
+(s) g
+1,2
+(s)
+g
+1,2
+(s) g
+2,2
+(s)
+(cid:19)
+=
+(cid:18)
+E(s) F(s)
+F(s) G(s)
+(cid:19)
+mit g
+i,j
+= g
+s
+(D
+p
+F(e
+i
+),D
+p
+F(e
+j
+))
+=
+(cid:104)
+∂F
+∂u
+i
+(p),
+∂F
+∂u
+j
+(p)
+(cid:105)
+i,j
+∈ {
+1,2
+}
+Die Matrix I
 S
+heißt erste Fundamentalform von S bzgl. der Parametrisierung F.
 Bemerkung 80
-a) Die Einschränkung des Standardskalarproduktes des R3 auf T S macht T S zu einem
-s s
+a) Die Einschränkung des Standardskalarproduktes des R3 auf T
+s
+S macht T
+s
+S zu einem
 euklidischen Vektorraum.
-b) D F(e ),D F(e ) ist eine Basis von T S.
-p 1 p 2 s
-{ }
-c) Bzgl. der Basis D F(e ),D F(e ) hat das Standardskalarprodukt aus Bemer-
-p 1 p 2
-{ }
-kung 80.a die Darstellungsmatrix I .
+b)
+{
+D
+p
+F(e
+1
+),D
+p
+F(e
+2
+)
+}
+ist eine Basis von T
+s
+S.
+c) Bzgl. der Basis
+{
+D
+p
+F(e
+1
+),D
+p
+F(e
+2
+)
+}
+hat das Standardskalarprodukt aus Bemer-
+kung 80.a die Darstellungsmatrix I
 S
-d) g (s) ist eine differenzierbare Funktion von s.
+.
+d) g
 i,j
+(s) ist eine differenzierbare Funktion von s.
 Bemerkung 81
-(cid:13) (cid:13)2
-(cid:13)∂F ∂F (cid:13)
-det(I S) = (cid:13) (p) (p)(cid:13)
-(cid:13)∂u × ∂u (cid:13)
-1 2
-   
-x y
-1 1
-Beweis: Sei ∂∂ uF 1(p) = x 2, ∂∂ uF 2(p) = y 2
-x y3
+det(I S ) =
+(cid:13)
+(cid:13)
+(cid:13)
+(cid:13)
+∂F
+∂u
+1
+(p)
+×
+∂F
+∂u
+2
+(p)
+(cid:13)
+(cid:13)
+(cid:13)
+(cid:13)
+2
+Beweis: Sei ∂F ∂u1 (p) =
+
+
+x
+1
+x 2
+x
 3
- 
+
+, ∂F ∂u2 (p) =
+
+
+y
+1
+y 2
+y3
+
+
+Dann ist ∂F ∂u1 (p) × ∂F ∂u2 (p) =
+
+
 z
 1
-Dann ist ∂∂ uF 1(p) ∂∂ uF 2(p) = z 2 mit
-×
+z 2
+z
+3
+
+ mit
 z
+1
+= x
+2
+y
 3
-z = x y x y
-1 2 3 3 2
 −
-z = x y x y
-2 3 1 1 3
+x
+3
+y
+2
+z
+2
+= x
+3
+y
+1
 −
-z = x y x y
-3 1 2 2 1
+x
+1
+y
+3
+z
+3
+= x
+1
+y
+2
 −
-∂F ∂F
-(p) (p) = z2+z2+z2
-⇒ (cid:107)∂u × ∂u (cid:107) 1 2 3
-1 2
+x
+2
+y
+1
+⇒ (cid:107)
+∂F
+∂u
+1
+(p)
+×
+∂F
+∂u
+2
+(p)
+(cid:107)
+= z2
+1
++z2
+2
++z2
+3
 96 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM
-det(I ) = g g g2
-S 1,1 2,2 1,2
+det(I
+S
+) = g
+1,1
+g
+2,2
+−
+g2
+1,2
+=
+(cid:42) 
+
+x
+1
+x 2
+x
+3
+
+,
+
+
+x
+1
+x 2
+x
+3
+
+
+(cid:43)(cid:42) 
+
+y
+1
+y 2
+y
+3
+
+,
+
+
+y
+1
+y 2
+y
+3
+
+
+(cid:43)
 −
-(cid:42) x   x  (cid:43)(cid:42) y   y  (cid:43) (cid:42) x   y  (cid:43)2
-1 1 1 1 1 1
-= x 2,x 2 y 2,y 2 x 2,y 2
+(cid:42) 
+
+x
+1
+x 2
+x
+3
+
+,
+
+
+y
+1
+y 2
+y
+3
+
+
+(cid:43)2
+= (x2
+1
++x2
+2
++x2
+3
+)(y2
+1
++y2
+2
++y2
+3
+)
 −
-x x y y x y
-3 3 3 3 3 3
-= (x2+x2+x2)(y2+y2+y2) (x y +x y +x y )2
-1 2 3 1 2 3 − 1 1 2 2 3 3
+(x
+1
+y
+1
++x
+2
+y
+2
++x
+3
+y
+3
+)2
 Definition 77
+a) Das Differential dA =
 (cid:112)
-a) Das Differential dA = det(I)du du heißt Flächenelement von S bzgl. der Para-
-1 2
+det(I)du
+1
+du
+2
+heißt Flächenelement von S bzgl. der Para-
 metrisierung F.
-b) Für eine Funktion f : V R heißt
+b) Für eine Funktion f : V
 →
-(cid:90) (cid:90)
-(cid:112)
-fdA := f(F(u ,u )) detI(s)du du
-1 2 1 2
-V U (cid:124) (cid:123)(cid:122) (cid:125)
+R heißt
+(cid:90)
+V
+fdA :=
+(cid:90)
+U
+f(F(u
+1
+,u
+2
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
 =:s
+)
+(cid:112)
+detI(s)du
+1
+du
+2
 der Wert des Integrals von f über V, falls das Integral rechts existiert.
 Bemerkung 82
+a)
 (cid:82)
-a) fdA ist unabhängig von der gewählten Parametrisierung.
 V
-b) Sei f : S R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
+fdA ist unabhängig von der gewählten Parametrisierung.
+b) Sei f : S
 →
+R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
+Dann ist
 (cid:82)
-Dann ist fdA wohldefiniert, falls (z. B.) S kompakt ist.
 S
+fdA wohldefiniert, falls (z. B.) S kompakt ist.
 Etwa:
-(cid:90) n (cid:90)
+(cid:90)
+S
+fdA =
+n
 (cid:88)
-fdA = fdA
-S Vi
 i=1
 (cid:90)
-(cid:88)
+Vi
 fdA
 −
-Vi∩Vj
+(cid:88)
 i(cid:54)=j
 (cid:90)
+Vi∩Vj
+fdA
++
 (cid:88)
-+ fdA
+i,j,k
+(cid:90)
 Vi∩Vj∩V
-i,j,k k
-...
+k
+fdA
 −
+...
 Beweis:
 a) Mit Transformationsformel.
 b) Ist dem Leser überlassen.
 Proposition 5.1
-Sei S R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S S2.
-⊆ →
+Sei S
+⊆
+R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S
+→
+S2.
 Dann gilt:
-a) n induziert für jedes s S eine lineare Abbildung d n : T S T S2 durch
-s s n(s)
-∈ →
-d (cid:12)
-d n(x) = n(s„+“tx)(cid:12)
-s (cid:12)
-dt (cid:124) (cid:123)(cid:122) (cid:125) t=0
+a) n induziert für jedes s
+∈
+S eine lineare Abbildung d
+s
+n : T
+s
+S
+→
+T
+n(s)
+S2 durch
+d
+s
+n(x) =
+d
+dt
+n(s„+“tx
+(cid:124) (cid:123)(cid:122) (cid:125)
 SollaufFlächeS bleiben
-Die Abbildung d n heißt Weingarten-Abbildung
+)
+(cid:12)
+(cid:12)
+(cid:12)
+t=0
+Die Abbildung d
 s
+n heißt Weingarten-Abbildung
 97 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM
-b) T S2 = T S.
-n(s) s
-c) d n ist ein Endomorphismus von T S.
-s s
-d) d n ist selbstadjungiert bzgl. des Skalarproduktes I .
-s S
+b) T
+n(s)
+S2 = T
+s
+S.
+c) d
+s
+n ist ein Endomorphismus von T
+s
+S.
+d) d
+s
+n ist selbstadjungiert bzgl. des Skalarproduktes I
+S
+.
 Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt.
 98 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM
 Beweis:
 a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-b) T S2 = n(s) ⊥ = T S
-n(S) s
-(cid:104) (cid:105)
-c) Wegen Proposition 5.1 (a) ist d n ein Homomorphismus.
+b) T
+n(S)
+S2 =
+(cid:104)
+n(s)
+(cid:105)
+⊥ = T
+s
+S
+c) Wegen Proposition 5.1 (a) ist d
+s
+n ein Homomorphismus.
+d) Zu zeigen:
+∀
+x,y
+∈
+I
+s
+S :
+(cid:104)
+x,d
+s
+n(y)
+(cid:105)
+=
+(cid:104)
+d
 s
-d) Zu zeigen: x,y I S : x,d n(y) = d n(x),y
-s s s
-∀ ∈ (cid:104) (cid:105) (cid:104) (cid:105)
+n(x),y
+(cid:105)
 Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
 Basisvektoren zu zeigen.
-Sei x = D F(e ) = ∂F (p) i = 1,2
-i p i ∂ui
+Sei x
+i
+= D
+p
+F(e
+i
+) = ∂F
+∂ui
+(p) i = 1,2
+Beh.:
+(cid:104)
+x
+i
+,d
+s
+n(x
+j
+)
+(cid:105)
+=
+(cid:104)
 ∂2F
-Beh.: x ,d n(x ) = (p),d n(x )
-(cid:104) i s j (cid:105) (cid:104)∂ui∂uj s i (cid:105)
+∂ui∂uj
+(p),d
+s
+n(x
+i
+)
+(cid:105)
+⇒ (cid:104)
 ∂2F
-(p),d n(x ) = x ,d n(x )
-⇒ (cid:104)∂ui∂uj s i (cid:105) (cid:104) j s i (cid:105)
+∂ui∂uj
+(p),d
+s
+n(x
+i
+)
+(cid:105)
+=
+(cid:104)
+x
+j
+,d
+s
+n(x
+i
+)
+(cid:105)
+Bew.: 0 =
+(cid:104)
+∂F
+∂u
+(p+te
+j
+),n(p+te
+j
+)
+(cid:105)
+⇒
+0 =
+d
+dt
+(cid:18)
+(cid:104)
+∂F
+∂u
+(p+te
+j
+),n(p+te
+j
+)
+(cid:105)
+(cid:19)(cid:12)
+(cid:12)
+(cid:12)
+t=0
+=
+(cid:104)
+d
+dt
 ∂F
-Bew.: 0 = (p+te ),n(p+te )
-j j
-(cid:104)∂u (cid:105)
-(cid:18) (cid:19)(cid:12)
-d ∂F
-0 = (p+te ),n(p+te ) (cid:12)
-j j (cid:12)
-⇒ dt (cid:104)∂u (cid:105) t=0
-d ∂F (cid:12)
-= (p+te )(cid:12) ,n(s) + x ,d nD F(e )
-j (cid:12) i s p j
-(cid:104)dt∂u i t=0 (cid:105) (cid:104) (cid:124) (cid:123)(cid:122) (cid:125)(cid:105)
-(cid:124) (cid:123)(cid:122) (cid:125) xj
+∂u i
+(p+te
+j
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
 ∂2F
-(p)
 ∂uj∂ui
+(p)
+(cid:12)
+(cid:12)
+(cid:12)
+t=0
+,n(s)
+(cid:105)
++
+(cid:104)
+x
+i
+,d
+s
+nD
+p
+F(e
+j
+)
+(cid:124) (cid:123)(cid:122) (cid:125)
+xj
+(cid:105)
 Definition 78
-Die durch d n definierte symmetrische Bilinearform auf T S heißt zweite Fundamental-
-s s
+Die durch
 −
+d
+s
+n definierte symmetrische Bilinearform auf T
+s
+S heißt zweite Fundamental-
 form von S in s bzgl. F.
-Man schreibt: II (x,y) = d n(x),y = I ( d n(x),y)
-s s s s
-(cid:104)− (cid:105) −
+Man schreibt: II
+s
+(x,y) =
+(cid:104)−
+d
+s
+n(x),y
+(cid:105)
+= I
+s
+(
+−
+d
+s
+n(x),y)
 Bemerkung 83
-Bezüglich der Basis x ,x von T S hat II die Darstellungsmatrix
-1 2 s s
-{ }
+Bezüglich der Basis
+{
+x
+1
+,x
+2
+}
+von T
+s
+S hat II
+s
+die Darstellungsmatrix
+(h
+(s)
+i,j
+)
+i,j=1,2
+mit h
+i,j
+(s) =
+(cid:104)
 ∂2F
-(h(s)
-) mit h (s) = (p),n(s)
-i,j i,j=1,2 i,j (cid:104)∂u ∂u (cid:105)
-i j
+∂u
+i
+∂u
+j
+(p),n(s)
+(cid:105)
 Proposition 5.2
-Sei γ : [ ε,ε] S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt:
-− →
-κ (s,γ) = II (γ(cid:48)(0),γ(cid:48)(0))
-Nor s
-Beweis: Nach Definition 74 ist κ (s,γ) = γ(cid:48)(cid:48)(0),n(s) . Nach Voraussetzung gilt
+Sei γ : [
+−
+ε,ε]
+→
+S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt:
+κ
+Nor
+(s,γ) = II
+s
+(γ(cid:48)(0),γ(cid:48)(0))
+Beweis: Nach Definition 74 ist κ
 Nor
-(cid:104) (cid:105)
-n(γ(t)) γ(cid:48)(t) γ(cid:48)(cid:48)(0),n(s) = 0
-⊥ ⇔ (cid:104) (cid:105)
+(s,γ) =
+(cid:104)
+γ(cid:48)(cid:48)(0),n(s)
+(cid:105)
+. Nach Voraussetzung gilt
+n(γ(t))
+⊥
+γ(cid:48)(t)
+⇔ (cid:104)
+γ(cid:48)(cid:48)(0),n(s)
+(cid:105)
+= 0
 Die Ableitung nach t ergibt
+0 =
 d
-0 = ( n(γ(t)),γ(cid:48)(t))
-dt (cid:104)
-(cid:28) d (cid:12) (cid:29)
-= n(γ(t))(cid:12) ,γ(cid:48)(0) + n(s),γ(cid:48)(cid:48)(0)
+dt
+(
+(cid:104)
+n(γ(t)),γ(cid:48)(t))
+=
+(cid:28) d
+dt
+n(γ(t))
+(cid:12)
+(cid:12)
 (cid:12)
-dt t=0 (cid:104) (cid:105)
+t=0
+,γ(cid:48)(0)
+(cid:29)
++
+(cid:104)
+n(s),γ(cid:48)(cid:48)(0)
+(cid:105)
 99 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM
-= d n(γ(cid:48)(0)),γ(cid:48)(0) +κ (s,γ)
-s Nor
-(cid:104) (cid:105)
-= II (γ(cid:48)(0),γ(cid:48)(0))+κ (s,γ)
-s Nor
+=
+(cid:104)
+d
+s
+n(γ(cid:48)(0)),γ(cid:48)(0)
+(cid:105)
++κ
+Nor
+(s,γ)
+=
 −
+II
+s
+(γ(cid:48)(0),γ(cid:48)(0))+κ
+Nor
+(s,γ)
 Folgerung 5.3
 Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein:
-κ (s,γ) = κ (s,γ(cid:48)(0))
-Nor Nor
+κ
+Nor
+(s,γ) = κ
+Nor
+(s,γ(cid:48)(0))
 Satz 5.4
-Sei S R3 eine reguläre, orientierbare Fläche und s S.
-⊆ ∈
-a) Die Hauptkrümmungen κ (s),κ (s) sind die Eigenwerte von II .
-1 2 s
-b) Für die Gauß-Krümmung gilt: K(s) = det(II )
+Sei S
+⊆
+R3 eine reguläre, orientierbare Fläche und s
+∈
+S.
+a) Die Hauptkrümmungen κ
+1
+(s),κ
+2
+(s) sind die Eigenwerte von II
+s
+.
+b) Für die Gauß-Krümmung gilt: K(s) = det(II
 s
+)
 Beweis:
-a) II ist symmetrisch, I S hat also eine Orthonormalbasis aus Eigenvektoren y ,y von
-s s 1 2
-II . Ist x T S, x = 1, so gibt es ϕ [0,2π) mit x = cosϕ y +sinϕ y .
-s s 1 2
-∈ (cid:107) (cid:107) ∈ · ·
-Seien λ ,λ die Eigenwerte von II , also II (y ,y ) = λ . Dann gilt:
-1 2 s s i i i
-II (x,x) = cos2ϕλ +sin2ϕλ
-s 1 2
-= (1 sin2ϕ)λ +sin2ϕλ
-1 2
+a) II
+s
+ist symmetrisch, I
+s
+S hat also eine Orthonormalbasis aus Eigenvektoren y
+1
+,y
+2
+von
+II
+s
+. Ist x
+∈
+T
+s
+S,
+(cid:107)
+x
+(cid:107)
+= 1, so gibt es ϕ
+∈
+[0,2π) mit x = cosϕ
+·
+y
+1
++sinϕ
+·
+y
+2
+.
+Seien λ
+1
+,λ
+2
+die Eigenwerte von II
+s
+, also II
+s
+(y
+i
+,y
+i
+) = λ
+i
+. Dann gilt:
+II
+s
+(x,x) = cos2ϕλ
+1
++sin2ϕλ
+2
+= (1
+−
+sin2ϕ)λ
+1
++sin2ϕλ
+2
+= λ
+1
++sin2ϕ(λ
+2
+−
+λ
+1
+)
+≥
+λ
+1
+= cos2ϕ+(1
+−
+cos2ϕ)λ
+2
+= λ
+2
 −
-= λ +sin2ϕ(λ λ ) λ
-1 2 1 1
-− ≥
-= cos2ϕ+(1 cos2ϕ)λ
+cos2ϕ(λ
 2
 −
-= λ cos2ϕ(λ λ ) λ
-2 2 1 2
-− − ≤
-=P =r =op =. =5.2 λ 1 = min(cid:8) κ Nor(s,x) (cid:12) (cid:12) x T s1S (cid:9)
-⇒ ∈
-λ 2 = max(cid:8) κ Nor(s,x) (cid:12) (cid:12) x T s1S (cid:9)
+λ
+1
+)
+≤
+λ
+2
+Prop. 5.2 =====
+⇒
+λ 1 = min (cid:8) κ Nor (s,x) (cid:12) (cid:12) x
 ∈
+T1 s S (cid:9)
+λ 2 = max (cid:8) κ Nor (s,x) (cid:12) (cid:12) x
+∈
+T1 s S (cid:9)
 Satz 5.5 (Satz von Gauß-Bonnet)
-Sei S R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:
+Sei S
 ⊆
+R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:
 (cid:90)
-K(s)dA = 2πχ(S)
 S
+K(s)dA = 2πχ(S)
 Dabei ist χ(S) die Euler-Charakteristik von S.
 Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von
 Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden.
 Lösungen der Übungsaufgaben
 Lösung zu Aufgabe 1
 Teilaufgabe a) Es gilt:
-(i) ,X T .
+(i)
+∅
+,X
+∈
+T
 X
-∅ ∈
-(ii) T ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U ,U
-X 1 2
+.
+(ii) T
+X
+ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U
+1
+,U
+2
+∈
+T
+X
+: U
+1
+∩
+U
+2
 ∈
-T : U U T .
-X 1 2 X
-∩ ∈
-(iii) Auch unter beliebigen Vereinigungen ist T abgeschlossen, d. h. es gilt für eine
+T
+X
+.
+(iii) Auch unter beliebigen Vereinigungen ist T
+X
+abgeschlossen, d. h. es gilt für eine
+beliebige Indexmenge I und alle U
+i ∈
+T
 X
+für alle i
+∈
+I :
 (cid:83)
-beliebige Indexmenge I und alle U T für alle i I : U T
-i ∈ X ∈ i∈I i ∈ X
-Also ist (X,T ) ein topologischer Raum.
+i∈I
+U
+i ∈
+T
+X
+Also ist (X,T
 X
-Teilaufgabe b) Wähle x = 1,y = 0. Dann gilt x = y und die einzige Umgebung von x
+) ein topologischer Raum.
+Teilaufgabe b) Wähle x = 1,y = 0. Dann gilt x
 (cid:54)
-ist X. Da y = 0 X können also x und y nicht durch offene Mengen getrennt werden.
+= y und die einzige Umgebung von x
+ist X. Da y = 0
 ∈
-(X,T ) ist also nicht hausdorffsch.
+X können also x und y nicht durch offene Mengen getrennt werden.
+(X,T
 X
-Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,T ) nach
+) ist also nicht hausdorffsch.
+Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,T
 X
-(b)nichthausdorffschist, liefertdieKontrapositionder Trennungseigenschaft, dass(X,T )
+) nach
+(b)nichthausdorffschist, liefertdieKontrapositionder Trennungseigenschaft, dass(X,T
 X
+)
 kein metrischer Raum sein kann.
 Lösung zu Aufgabe 2
 Teilaufgabe a)
-Beh.: a Z : a ist abgeschlossen.
-∀ ∈ { }
-Sei a Z beliebig. Dann gilt:
+Beh.:
+∀
+a
+∈
+Z :
+{
+a
+}
+ist abgeschlossen.
+Sei a
 ∈
+Z beliebig. Dann gilt:
 Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de
 schicken.
 Teilaufgabe b)
-Beh.: 1,1 ist nicht offen
-{− }
+Beh.:
+{−
+1,1
+}
+ist nicht offen
 Bew.: durch Widerspruch
-Annahme: 1,1 ist offen.
-{− }
+Annahme:
+{−
+1,1
+}
+ist offen.
+Dann gibt es T
+⊆
+B, sodass
 (cid:83)
-Dann gibt es T B, sodass M = 1,1 . Aber alle U B haben unendlich viele
-⊆ M∈T {− } ∈
+M∈T
+M =
+{−
+1,1
+}
+. Aber alle U
+∈
+B haben unendlich viele
 Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente
-keine endliche nicht-leere Menge kann in dieser Topologie offen sein 1,1 ist
-⇒ ⇒ {− }
+⇒
+keine endliche nicht-leere Menge kann in dieser Topologie offen sein
+⇒ {−
+1,1
+}
+ist
 nicht offen. (cid:4)
 Teilaufgabe c)
 Beh.: Es gibt unendlich viele Primzahlen.
 101 LösungenderÜbungsaufgaben
 Bew.: durch Widerspruch
-Annahme: Es gibt nur endlich viele Primzahlen p P
+Annahme: Es gibt nur endlich viele Primzahlen p
 ∈
+P
 Dann ist
-Z 1,+1 FSd.A =rithmetik (cid:91) U
-0,p
-\{− }
+Z
+\{−
+1,+1
+}
+FSd.Arithmetik = (cid:91)
 p∈P
-endlich. Das ist ein Widerspruch zu Z ist unendlich und 1,1 ist endlich. (cid:4)
-| | |{− }|
+U
+0,p
+endlich. Das ist ein Widerspruch zu
+|
+Z
+|
+ist unendlich und
+|{−
+1,1
+}|
+ist endlich. (cid:4)
 Lösung zu Aufgabe 3
 (a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form
-(cid:89) (cid:89)
-U P
-j i
+(cid:89)
+j∈J
+U
+j
 ×
-j∈J i∈N,i(cid:54)=j
-wobei J N endlich und U P offen ist.
-j j
-⊆ ⊆
+(cid:89)
+i∈N,i(cid:54)=j
+P
+i
+wobei J
+⊆
+N endlich und U
+j
+⊆
+P
+j
+offen ist.
 Beweis: Nach Definition der Produkttopologie bilden Mengen der Form
-(cid:89) (cid:89)
-U P
-j i
+(cid:89)
+i∈J
+U
+j
 ×
-i∈J i∈N\J
-wobei J N endlich und U P offen j J eine Basis der Topologie.
-j j
-⊆ ⊆ ∀ ∈
+(cid:89)
+i∈N\J
+P
+i
+wobei J
+⊆
+N endlich und U
+j
+⊆
+P
+j
+offen
+∀
+j
+∈
+J eine Basis der Topologie.
 Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen
 Form. (cid:4)
 (b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig.
-Beweis: Es seinen x,y P und x sowie y liegen in der gleichen Zusammenhangs-
-∈
-komponente Z P. Da Z zusammenhängend ist und i I : p : P P ist
-i i
-⊆ ∀ ∈ →
-stetig, ist p (Z) P zusammenhängend für alle i N. Die zusammenhängenden
-i i
-⊆ ∈
-Mengen von P sind genau 0 und 1 , d. h. für alle i N gilt entweder
-i
-{ } { } ∈
-p (Z) 0 oder p (Z) 1 . Es sei z 0,1 so, dass p (Z) z für
-i i i i i
-⊆ { } ⊆ { } ∈ { } ⊆ { }
-alle i N. Dann gilt also:
-∈
-p (x) = z = p (y) i N
-i i i
-∀ ∈
-(cid:124)(cid:123)(cid:122)(cid:125) (cid:124)(cid:123)(cid:122)(cid:125)
-=xi =yi
+Beweis: Es seinen x,y
+∈
+P und x sowie y liegen in der gleichen Zusammenhangs-
+komponente Z
+⊆
+P. Da Z zusammenhängend ist und
+∀
+i
+∈
+I : p
+i
+: P
+→
+P
+i
+ist
+stetig, ist p
+i
+(Z)
+⊆
+P
+i
+zusammenhängend für alle i
+∈
+N. Die zusammenhängenden
+Mengen von P
+i
+sind genau
+{
+0
+}
+und
+{
+1
+}
+, d. h. für alle i
+∈
+N gilt entweder
+p
+i
+(Z)
+⊆ {
+0
+}
+oder p
+i
+(Z)
+⊆ {
+1
+}
+. Es sei z
+i
+∈ {
+0,1
+}
+so, dass p
+i
+(Z)
+⊆ {
+z
+i
+}
+für
+alle i
+∈
+N. Dann gilt also:
+p
+i
+(x)
+(cid:124)(cid:123)(cid:122)(cid:125)
+=xi
+= z
+i
+= p
+i
+(y)
+(cid:124)(cid:123)(cid:122)(cid:125)
+=yi
+∀
+i
+∈
+N
 Somit folgt: x = y (cid:4)
 Lösung zu Aufgabe 4
-(a) Beh.: GL (R) ist nicht kompakt.
+(a) Beh.: GL
+n
+(R) ist nicht kompakt.
+Bew.: det : GL
+n
+(R)
+→
+R
+\{
+0
+}
+ist stetig. Außerdem ist det(GL
+n
+(R)) = R
+\{
+0
+}
+nicht kompakt. 22
+⇒
+GL
 n
-Bew.: det : GL (R) R 0 ist stetig. Außerdem ist det(GL (R)) = R 0
-n n
-→ \{ } \{ }
-nicht kompakt. 22 GL (R) ist nicht kompakt. (cid:4)
+(R) ist nicht kompakt. (cid:4)
+(b) Beh.: SL
+1
+(R) ist nicht kompakt, für n > 1 ist SL
 n
+(R) kompakt.
+Bew.: Für SL 1 (R) gilt: SL 1 (R) = (cid:8) A
+∈
+R1×1 (cid:12) (cid:12) detA = 1 (cid:9) = (cid:0) 1 (cid:1) ∼ =
+{
+1
+}
+. 22
 ⇒
-(b) Beh.: SL (R) ist nicht kompakt, für n > 1 ist SL (R) kompakt.
-1 n
-Bew.: Für SL 1(R) gilt: SL 1(R) = (cid:8) A R1×1 (cid:12) (cid:12) detA = 1(cid:9) = (cid:0) 1(cid:1) ∼= 1 . 22 SL 1(R)
-∈ { } ⇒
+SL 1 (R)
 ist kompakt.
 102 LösungenderÜbungsaufgaben
-SL (R) GL (R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1
-n n
+SL
+n
+(R)
 ⊆
+GL
+n
+(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1
 sinddiesegenaudannkompakt,wennsiebeschränktundabgeschlossensind.Definiere
-nun für für n N ,m N:
+nun für für n
+∈
+N
 ≥2
-∈ ∈
+,m
+∈
+N:
+A
+m
+= diag
+n
+(m,
 1
-A = diag (m, ,...,1)
-m n
 m
-Dann gilt: detA = 1, d. h. A SL (R), und A ist unbeschränkt, da A =
-m m n m m ∞
-∈ (cid:107) (cid:107)
-m . (cid:4)
-−m−−→−∞→ ∞
-(c) Beh.: (R) ist kompakt.
+,...,1)
+Dann gilt: detA
+m
+= 1, d. h. A
+m
+∈
+SL
+n
+(R), und A
+m
+ist unbeschränkt, da
+(cid:107)
+A
+m
+(cid:107)
+∞
+=
+m
+−−−−→ m→∞ ∞
+. (cid:4)
+(c) Beh.:
 P
-Bew.: (R) ∼= Sn/ x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil-
+(R) ist kompakt.
+Bew.:
 P
+(R) ∼ = Sn/ x∼−x . Per Definition der Quotiententopologie ist die Klassenabbil-
 dung stetig. Da Sn als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt
-ist 22 (R) ist kompakt. (cid:4)
+ist 22
 ⇒ P
+(R) ist kompakt. (cid:4)
 Lösung zu Aufgabe 5
 Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden.
 Definition 79
-Seien (G, ) und (H, ) Gruppen und ϕ : G H eine Abbildung.
-∗ ◦ →
+Seien (G,
+∗
+) und (H,
+◦
+) Gruppen und ϕ : G
+→
+H eine Abbildung.
 ϕ heißt Homomorphismus, wenn
-g ,g G : ϕ(g g ) = ϕ(g ) ϕ(g )
-1 2 1 2 1 2
-∀ ∈ ∗ ◦
+∀
+g
+1
+,g
+2
+∈
+G : ϕ(g
+1
+∗
+g
+2
+) = ϕ(g
+1
+)
+◦
+ϕ(g
+2
+)
 gilt.
 Es folgt direkt:
-1) Sei X = R mit der Standarttopologie und ϕ : idR und R = (R,+). Dann ist ϕ ein
-1 1
+1) Sei X = R mit der Standarttopologie und ϕ
+1
+: idR und R = (R,+). Dann ist ϕ
+1
+ein
 Gruppenhomomorphismus und ein Homöomorphismus.
-2) Sei G = (Z,+) und H = (Z/3Z,+). Dann ist ϕ : G H,x x mod 3 ein
+2) Sei G = (Z,+) und H = (Z/3Z,+). Dann ist ϕ
 2
-→ (cid:55)→
-Gruppenhomomorphismus. Jedoch ist ϕ nicht injektiv, also sicher kein Homöomor-
+: G
+→
+H,x
+(cid:55)→
+x mod 3 ein
+Gruppenhomomorphismus. Jedoch ist ϕ
 2
+nicht injektiv, also sicher kein Homöomor-
 phismus.
-3) Sei X ein topologischer Raum. Dann ist id ein Homöomorphismus. Da keine
+3) Sei X ein topologischer Raum. Dann ist id
 X
+ein Homöomorphismus. Da keine
 Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup-
 penhomomorphismus.
 Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten
@@ -5440,146 +13838,328 @@ Lösung zu Aufgabe 6
 Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf
 Seite 6.
 Definition 80
-Seien (G, ) und (H, ) Gruppen und ϕ : G H eine Abbildung.
-∗ ◦ →
+Seien (G,
+∗
+) und (H,
+◦
+) Gruppen und ϕ : G
+→
+H eine Abbildung.
 ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist.
 Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen
 Sinn und ein Isomorphismus benötigt eine Gruppenstruktur.
 103 LösungenderÜbungsaufgaben
 Lösung zu Aufgabe 7
 (a) Vor.: Sei M eine topologische Mannigfaltigkeit.
-Beh.: M ist wegzusammehängend M ist zusammenhängend
+Beh.: M ist wegzusammehängend
 ⇔
-Beweis: „ “: Da M insbesondere ein topologischer Raum ist folgt diese Richtung
+M ist zusammenhängend
+Beweis: „
 ⇒
+“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung
 direkt aus Bemerkung 23.
-„ “: Seien x,y M und
-⇐ ∈
-Z := z M Weg von x nach z
-{ ∈ | ∃ }
+„
+⇐
+“: Seien x,y
+∈
+M und
+Z :=
+{
+z
+∈
+M
+| ∃
+Weg von x nach z
+}
 Es gilt:
-(i) Z = , da M lokal wegzusammenhängend ist
-(cid:54) ∅
+(i) Z
+(cid:54)
+=
+∅
+, da M lokal wegzusammenhängend ist
 (ii) Z ist offen, da M lokal wegzusammenhängend ist
-(iii) ZC := z˜ M (cid:64)Weg von x nach z˜ ist offen
-{ ∈ | }
-Da M eine Mannigfaltigkeit ist, existiert zu jedem z˜ ZC eine offene und
-∈
-wegzusammenhängende Umgebung U M.
-z˜
-⊆
-Es gilt sogar U ZC, denn gäbe es ein U z Z, so gäbe es Wege γ :
-z˜ z˜ 2
-⊆ (cid:51) ∈
-[0,1] M,γ (0) = z,γ (1) = x und γ : [0,1] M,γ (0) = z˜,γ (1) = z.
-2 2 1 1 1
-→ →
+(iii) ZC :=
+{
+˜ z
+∈
+M
+|
+(cid:64)Weg von x nach ˜ z
+}
+ist offen
+Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜ z
+∈
+ZC eine offene und
+wegzusammenhängende Umgebung U
+˜ z
+⊆
+M.
+Es gilt sogar U
+˜ z
+⊆
+ZC, denn gäbe es ein U
+˜ z
+(cid:51)
+z
+∈
+Z, so gäbe es Wege γ
+2
+:
+[0,1]
+→
+M,γ
+2
+(0) = z,γ
+2
+(1) = x und γ
+1
+: [0,1]
+→
+M,γ
+1
+(0) = ˜ z,γ
+1
+(1) = z.
 Dann wäre aber
-γ : [0,1] M,
+γ : [0,1]
 →
+M,
+γ(x) =
 (cid:40)
-γ (2x) falls 0 x 1
-γ(x) = 1 ≤ ≤ 2
-γ (2x 1) falls 1 < x 1
-2 − 2 ≤
-ein stetiger Weg von z˜ nach x Widerspruch.
-⇒
-DaM zusammenhängendistundM = Z ZC ,sowieZ = folgtZC = .
-(cid:124)(cid:123)(cid:122)(cid:125)∪(cid:124)(cid:123)(cid:122)(cid:125) (cid:54) ∅ ∅
-offen offen
+γ
+1
+(2x) falls 0
+≤
+x
+≤
+1
+2
+γ
+2
+(2x
+−
+1) falls 1
+2
+< x
+≤
+1
+ein stetiger Weg von ˜ z nach x
+⇒
+Widerspruch.
+DaM zusammenhängendistundM = Z
+(cid:124)(cid:123)(cid:122)(cid:125)
+offen
+∪
+ZC
+(cid:124)(cid:123)(cid:122)(cid:125)
+offen
+,sowieZ
+(cid:54)
+=
+∅
+folgtZC =
+∅
+.
 Also ist M = Z wegzusammenhängend. (cid:4)
 (b) Beh.: X ist wegzusammenhängend.
-Beweis: X := (R 0 ) 0 ,0 und (R 0 ) 0 sind homöomorph zu R.
-1 2 2
-\{ } ∪{ } \{ } ∪{ }
+Beweis: X := (R
+\{
+0
+}
+)
+∪{
+0
+1
+,0
+2
+}
+und (R
+\{
+0
+}
+)
+∪{
+0
+2
+}
+sind homöomorph zu R.
 Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte
-0 und 0 .
-1 2
-Da (R 0 ) 0 homöomorph zu R ist, exisitert ein Weg γ von 0 zu einem
-1 1 1
-\{ } ∪{ }
-beliebigen Punkt a R 0 .
-∈ \{ }
-Da (R 0 ) 0 ebenfalls homöomorph zu R ist, existiert außerdem ein
-2
-\{ } ∪{ }
-Weg γ von a nach 0 . Damit existiert ein (nicht einfacher) Weg γ von 0 nach
-2 2 1
-0 . (cid:4)
+0
+1
+und 0
+2
+.
+Da (R
+\{
+0
+}
+)
+∪{
+0
+1
+}
+homöomorph zu R ist, exisitert ein Weg γ
+1
+von 0
+1
+zu einem
+beliebigen Punkt a
+∈
+R
+\{
+0
+}
+.
+Da (R
+\{
+0
+}
+)
+∪{
+0
 2
+}
+ebenfalls homöomorph zu R ist, existiert außerdem ein
+Weg γ
+2
+von a nach 0
+2
+. Damit existiert ein (nicht einfacher) Weg γ von 0
+1
+nach
+0
+2
+. (cid:4)
 Lösung zu Aufgabe 9
-Vor.: Sei (X,d) eine absolute Ebene, A,B,C X und ABC ein Dreieck.
-∈ (cid:52)
+Vor.: Sei (X,d) eine absolute Ebene, A,B,C
+∈
+X und
+(cid:52)
+ABC ein Dreieck.
 104 LösungenderÜbungsaufgaben
-(a) Beh.: AB = AC ∠ABC = ∠ACB
-∼ ∼
+(a) Beh.: AB
+∼
+= AC
 ⇒
-Bew.: Sei AB = AC.
+∠ABC
 ∼
-Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
+= ∠ACB
+Bew.: Sei AB
+∼
+= AC.
 ⇒ ∃
+Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
+⇒
 ϕ(∠ABC) = ∠ACB
 ⇒
-∠ABC ∠ACB (cid:4)
-=
+∠ABC
 ∼
-⇒
-(b) Beh.: Der längeren Seite von ABC liegt der größere Winkel gegenüber und umge-
+=
+∠ACB (cid:4)
+(b) Beh.: Der längeren Seite von
 (cid:52)
+ABC liegt der größere Winkel gegenüber und umge-
 kehrt.
-Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C(cid:48) AC+ mit d(A,C(cid:48)) = d(A,B)
+Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C(cid:48)
 ∈
-C(cid:48) liegt zwischen A und C.
+AC+ mit d(A,C(cid:48)) = d(A,B)
 ⇒
+C(cid:48) liegt zwischen A und C.
 Es gilt (cid:93)ABC(cid:48) < (cid:93)ABC und aus Aufgabe 9 (a) folgt: (cid:93)ABC(cid:48) = (cid:93)AC(cid:48)B.
-∠BC(cid:48)A ist ein nicht anliegender Außenwinkel zu ∠BCA =B =e =m =. =66 (cid:93)BC(cid:48)A > (cid:93)BCA
+∠BC(cid:48)A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 =====
 ⇒
-(cid:93)BCA < (cid:93)BC(cid:48)A = (cid:93)ABC(cid:48) < (cid:93)ABC Sei umgekehrt (cid:93)ABC > (cid:93)BCA, kann
+(cid:93)BC(cid:48)A > (cid:93)BCA
 ⇒
+(cid:93)BCA < (cid:93)BC(cid:48)A = (cid:93)ABC(cid:48) < (cid:93)ABC Sei umgekehrt (cid:93)ABC > (cid:93)BCA, kann
 wegen 1. Teil von Aufgabe 9 (b) nicht d(A,B) > d(A,C) gelten.
 Wegen Aufgabe 9 (a) kann nicht d(A,B) = d(A,C) gelten.
-d(A,B) < d(A,C) (cid:4)
 ⇒
-(c) Vor.: Sei g eine Gerade, P X und P / g
-∈ ∈
-Beh.: ! Lot
+d(A,B) < d(A,C) (cid:4)
+(c) Vor.: Sei g eine Gerade, P
+∈
+X und P /
+∈
+g
+Beh.:
 ∃
+! Lot
 Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden
 Halbebenen bzgl. g.
-ϕ(P)P schneidet g in F.
 ⇒
+ϕ(P)P schneidet g in F.
 Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g
+⇒
 ϕ(P)P schneidet g in F.
+SeiA
+∈
+g
+\{
+F
+}
+. Danngilt ϕ(∠AFP) = ∠AFϕ(P) = π
 ⇒
-SeiA g F . Danngilt ϕ(∠AFP) = ∠AFϕ(P) = π ∠AFP istrechterWinkel.
-∈ \{ } ⇒
-Gäbe es nun G g F , so dass PG weiteres Lot von P auf g ist, wäre PFG
-∈ \{ } (cid:52)
-ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
-P
-A
+∠AFP istrechterWinkel.
+Gäbe es nun G
+∈
+g
+\{
 F
+}
+, so dass PG weiteres Lot von P auf g ist, wäre
+(cid:52)
+PFG
+ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
 ·
 ·
+A
 G
+P
+F
 g
 Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P
 Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π
-G gibt es nicht. (cid:4)
 ⇒
+G gibt es nicht. (cid:4)
 Lösung zu Aufgabe 10
-Sei f h und o. B. d. A. f g.
-(cid:107) (cid:107)
-f ∦ h f h = , sei also x f h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
-⇒ ∩ (cid:54) ∅ ∈ ∩
-zu g durch x, da x / g. Diese ist f, da x f und f g. Da aber x h, kann h nicht
-∈ ∈ (cid:107) ∈
+Sei f
+(cid:107)
+h und o. B. d. A. f
+(cid:107)
+g.
+f ∦ h
+⇒
+f
+∩
+h
+(cid:54)
+=
+∅
+, sei also x
+∈
+f
+∩
+h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
+zu g durch x, da x /
+∈
+g. Diese ist f, da x
+∈
+f und f
+(cid:107)
+g. Da aber x
+∈
+h, kann h nicht
 105 LösungenderÜbungsaufgaben
-parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f = h). g ∦ h (cid:4)
-(cid:54) ⇒
+parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f
+(cid:54)
+= h).
+⇒
+g ∦ h (cid:4)
 Lösung zu Aufgabe 11
-Sei (X,d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem ABC und A(cid:48)B(cid:48)C(cid:48)
-(cid:52) (cid:52)
+Sei (X,d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem
+(cid:52)
+ABC und
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48)
 Dreiecke, für die gilt:
 d(A,B) = d(A(cid:48),B(cid:48))
 d(A,C) = d(A(cid:48),C(cid:48))
@@ -5589,10 +14169,14 @@ bzgl. AB wie C. Diese Isometrie existiert wegen §4.
 Es gilt d(A,C) = d(A(cid:48),C(cid:48)) = d(ϕ(A(cid:48)),ϕ(C(cid:48))) = d(A,ϕ(C(cid:48))) und d(B,C) = d(B(cid:48),C(cid:48)) =
 d(ϕ(B(cid:48)),ϕ(C(cid:48))) = d(B,ϕ(C(cid:48))).
 Bem. 62
-===== C = ϕ(C).
+=====
 ⇒
-Es gilt also ϕ( A(cid:48)B(cid:48)C(cid:48)) = ABC. (cid:4)
-(cid:52) (cid:52)
+C = ϕ(C).
+Es gilt also ϕ(
+(cid:52)
+A(cid:48)B(cid:48)C(cid:48)) =
+(cid:52)
+ABC. (cid:4)
 Bildquellen
 Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt.
 Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert.
@@ -5604,8 +14188,9 @@ Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie:
 – Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png
 – Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
 – Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png
-– 6 -Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
+– 6
 2
+-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
 Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3)
 Abb. 1.13 Kleeblattknoten,3-Färbung:Jim.belk,commons.wikimedia.org/wiki/File:Tricoloring.
 png
@@ -5643,352 +14228,635 @@ benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurde
 aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra
 und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen.
 Definition 81
-Sei D R und x R. x heißt ein Häufungspunkt von D : Folge x in D x
-0 0 n 0
-⊆ ∈ ⇔ ∃ \{ }
-mit x x .
-n 0
+Sei D
+⊆
+R und x
+0
+∈
+R. x
+0
+heißt ein Häufungspunkt von D :
+⇔ ∃
+Folge x
+n
+in D
+\{
+x
+0
+}
+mit x
+n
 →
+x
+0
+.
 Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra
 entnommen:
 Definition 82
 Es seien V und W K-Vektorräume und A(V) und A(W) die zugehörigen affinen Räume.
-Eine Abbildung f : V W heißt affin, falls für alle a,b V und alle λ,µ K mit λ+µ = 1
-→ ∈ ∈
+Eine Abbildung f : V
+→
+W heißt affin, falls für alle a,b
+∈
+V und alle λ,µ
+∈
+K mit λ+µ = 1
 gilt:
 f(λa+µb) = λf(a)+µf(b)
 Definition 83
-Sei V ein Vektorraum und S V eine Teilmenge.
+Sei V ein Vektorraum und S
 ⊆
+V eine Teilmenge.
 S heißt eine Orthonormalbasis von V, wenn gilt:
 (i) S ist eine Basis von V
-(ii) v S : v = 1
-∀ ∈ (cid:107) (cid:107)
-(iii) v ,v S : v = v v ,v = 0
-1 2 1 2 1 2
-∀ ∈ (cid:54) ⇒ (cid:104) (cid:105)
+(ii)
+∀
+v
+∈
+S :
+(cid:107)
+v
+(cid:107)
+= 1
+(iii)
+∀
+v
+1
+,v
+2
+∈
+S : v
+1
+(cid:54)
+= v
+2
+⇒ (cid:104)
+v
+1
+,v
+2
+(cid:105)
+= 0
 Satz (Zwischenwertsatz)
-Sei a < b und f C[a,b] := C([a,b]), weiter sei y R und f(a) < y < f(b) oder
-0 0
-∈ ∈
-f(b) < y < f(a). Dann existiert ein x [a,b] mit f(x ) = y .
-0 0 0 0
+Sei a < b und f
+∈
+C[a,b] := C([a,b]), weiter sei y
+0
+∈
+R und f(a) < y
+0
+< f(b) oder
+f(b) < y
+0
+< f(a). Dann existiert ein x
+0
 ∈
+[a,b] mit f(x
+0
+) = y
+0
+.
 Definition 84
-Sei V ein Vektorraum über einem Körper K und f : V V eine lineare Abbildung.
+Sei V ein Vektorraum über einem Körper K und f : V
 →
-v V 0 heißt Eigenvektor : λ K : f(v) = λv.
-∈ \{ } ⇔ ∃ ∈
-Wenn ein solches λ K existiert, heißt es Eigenwert von f.
+V eine lineare Abbildung.
+v
+∈
+V
+\{
+0
+}
+heißt Eigenvektor :
+⇔ ∃
+λ
 ∈
+K : f(v) = λv.
+Wenn ein solches λ
+∈
+K existiert, heißt es Eigenwert von f.
 Satz (Binomischer Lehrsatz)
-Sei x,y R. Dann gilt:
+Sei x,y
 ∈
-n (cid:18) (cid:19)
-(cid:88) n
-(x+y)n = xn−kyk n N
-0
-k ∀ ∈
+R. Dann gilt:
+(x+y)n =
+n
+(cid:88)
 k=0
+(cid:18)
+n
+k
+(cid:19)
+xn−kyk
+∀
+n
+∈
+N
+0
 Definition 85
-Seien a,b R3 Vektoren.
+Seien a,b
 ∈
-     
-a a a b a b
-1 1 2 3 3 2
+R3 Vektoren.
+a
+×
+b :=
+
+
+a
+1
+b 3
+a
+3
+
+
+×
+
+
+a
+1
+b 3
+a
+3
+
+ =
+
+
+a
+2
+b
+3
+−
+a
+3
+b
+2
+a 3 b 1
 −
-a b := b 3 b 3 = a 3b 1 a 1b 3
-× × −
-a a a b a b
-3 3 1 2 2 1
+a 1 b 3
+a
+1
+b
+2
 −
+a
+2
+b
+1
+
+
 Symbolverzeichnis
-Mengenoperationen Perm(X) Permutationsgruppe
-Sym(X) Symmetrische Gruppe
+Mengenoperationen
 Seien A,B und M Mengen.
-AC Komplement von A Wege
-(M) Potenzmenge von M
+AC Komplement von A
 P
-M Abschluss von M Sei γ : I X ein Weg.
-∂M Rand der Menge M →
-M◦ Inneres der Menge M [γ] Homotopieklasse von γ
-γ γ Zusammenhängen von Wegen
-A B Kreuzprodukt 1 2
+(M) Potenzmenge von M
+M Abschluss von M
+∂M Rand der Menge M
+M◦ Inneres der Menge M
+A
+×
+B Kreuzprodukt
+A
+⊆
+B Teilmengenbeziehung
+A (cid:40) B echte Teilmengenbeziehung
+A
+\
+B Differenzmenge
+A
+∪
+B Vereinigung
+A ˙
+∪
+B Disjunkte Vereinigung
+A
+∩
+B Schnitt
+Geometrie
+AB Gerade durch die Punkte A und
+B
+AB Strecke mit Endpunkten A und B
+(cid:52)
+ABC Dreieck mit Eckpunkten A,B,C
+AB ∼ = CD Die Strecken AB und CD sind
+isometrisch
+|
+K
+|
+Geometrische Realisierung des
+Simplizialkomplexes K
+Gruppen
+Sei X ein topologischer Raum und K ein Kör-
+per.
+Homöo(X) Homöomorphismengruppe
+Iso(X) Isometriengruppe
+GL
+n
+(K) Allgemeine lineare Gruppe (von
+General Linear Group)
+SL
+n
+(K) Spezielle lineare Gruppe
+PSL
+n
+(K) Projektive lineare Gruppe
+Perm(X) Permutationsgruppe
+Sym(X) Symmetrische Gruppe
+Wege
+Sei γ : I
+→
+X ein Weg.
+[γ] Homotopieklasse von γ
+γ
+1
 ∗
-× γ γ Homotopie von Wegen
-A B Teilmengenbeziehung 1 2
+γ
+2
+Zusammenhängen von Wegen
+γ
+1
 ∼
-A ⊆ (cid:40) B echte Teilmengenbeziehung γ(x) Inverser Weg, also γ(x) := γ(1 x)
+γ
+2
+Homotopie von Wegen
+γ(x) Inverser Weg, also γ(x) := γ(1
 −
+x)
 C Bild eines Weges γ, also C :=
-A B Differenzmenge
-\ γ([0,1])
-A B Vereinigung
-∪
-A ˙ B Disjunkte Vereinigung
-∪
-A B Schnitt
+γ([0,1])
 Weiteres
-∩
-Geometrie B Basis einer Topologie
-B (x) δ-Kugel um x
+B Basis einer Topologie
+B
 δ
-Subbasis einer Topologie
-AB Gerade durch die Punkte A und
+(x) δ-Kugel um x
 S
+Subbasis einer Topologie
 T Topologie
-B
-AB Strecke mit Endpunkten A und B Atlas
-ABC Dreieck mit Eckpunkten A,B,C A Projektiver Raum
-(cid:52)
-AB ∼= CD Die Strecken AB und CD sind P , Skalarprodukt
-isometrisch (cid:104)· ·(cid:105)
-X/ X modulo
+A
+Atlas
+P
+Projektiver Raum
+(cid:104)·
+,
+·(cid:105)
+Skalarprodukt
+X/
 ∼
-K Geometrische Realisierung des [x] Äquivalenzk∼ lassen von x bzgl.
-| | ∼
-Simplizialkomplexes K x Norm von x ∼
-(cid:107) (cid:107)
-x Betrag von x
-| |
-a Erzeugnis von a
-Gruppen
-(cid:104) (cid:105)
+X modulo
+∼ [x]
+∼
+Äquivalenzklassen von x bzgl.
+∼
+(cid:107)
+x
+(cid:107)
+Norm von x
+|
+x
+|
+Betrag von x
+(cid:104)
+a
+(cid:105)
+Erzeugnis von a
 Sn Sphäre
 Tn Torus
-Sei X ein topologischer Raum und K ein Kör-
-per.
-f g Verkettung von f und g
-Homöo(X) Homöomorphismengruppe ◦
-π Projektion auf X
+f
+◦
+g Verkettung von f und g
+π
 X
-Iso(X) Isometriengruppe
-f f eingeschränkt auf U
+Projektion auf X
+f
+|
 U
-GL (K) Allgemeine lineare Gruppe (von |
-n f−1(M) Urbild von M
-General Linear Group)
+f eingeschränkt auf U
+f−1(M) Urbild von M
 Rg(M) Rang von M
-SL (K) Spezielle lineare Gruppe
-n χ(K) Euler-Charakteristik von K
-PSL (K) Projektive lineare Gruppe
-n
+χ(K) Euler-Charakteristik von K
 110 Symbolverzeichnis
 ∆k Standard-Simplex
 X#Y Verklebung von X und Y
-d Lineare Abbildung aus Bemer-
+d
 n
+Lineare Abbildung aus Bemer-
 kung 37
-A = B A ist isometrisch zu B
+A
 ∼
-f Abbildung zwischen Fundamental-
+= B A ist isometrisch zu B
+f
 ∗
+Abbildung zwischen Fundamental-
 gruppen (vgl. Seite 49)
 111 Symbolverzeichnis
 Zahlenmengen
-N = 1,2,3,... Natürliche Zahlen
-{ }
-Z = N 0, 1, 2,... Ganze Zahlen
-Q = Z∪{(cid:8) 1,− 1, 2−(cid:9) = (cid:8)} z mit z Z und n Z 0 (cid:9) Rationale Zahlen
-R = Q∪ (cid:8) √2 23 , 3 √3 3,...n (cid:9) Reel∈ e Zahlen ∈ \{ }
-∪ −
-R Echt positive reele Zahlen
+N =
+{
+1,2,3,...
+}
+Natürliche Zahlen
+Z = N
+∪{
+0,
+−
+1,
+−
+2,...
+}
+Ganze Zahlen
+Q = Z
+∪
+(cid:8) 1
+2
+, 1
+3
+, 2
+3
+(cid:9) = (cid:8) z
+n
+mit z
+∈
+Z und n
+∈
+Z
+\{
+0
+}
+(cid:9) Rationale Zahlen
+R = Q
+∪
+(cid:8) √2,
+−
+3 √3,... (cid:9) Reele Zahlen
+R
 +
-Rn := (x ,...,x ) Rn x 0 Halbraum
-+,0 1 n n
-{ ∈ | ≥ }
-R× = R 0 Einheitengruppe von R
-\{ }
-C = a+ib a,b R Komplexe Zahlen
-{ | ∈ }
-P = 2,3,5,7,... Primzahlen
-{ }
-H = z C z > 0 obere Halbebene
-{ ∈ | (cid:61) }
+Echt positive reele Zahlen
+Rn
++,0
+:=
+{
+(x
+1
+,...,x
+n
+)
+∈
+Rn
+|
+x
+n
+≥
+0
+}
+Halbraum
+R× = R
+\{
+0
+}
+Einheitengruppe von R
+C =
+{
+a+ib
+|
+a,b
+∈
+R
+}
+Komplexe Zahlen
+P =
+{
+2,3,5,7,...
+}
+Primzahlen
+H =
+{
+z
+∈
+C
+| (cid:61)
+z > 0
+}
+obere Halbebene
 I = [0,1] (cid:40) R Einheitsintervall
-f : S1 (cid:44) R2 Einbettung der Kreislinie in die Ebene
+f : S1 (cid:44)
 →
-π (X,x) Fundamentalgruppe im topologischen Raum X um x X
+R2 Einbettung der Kreislinie in die Ebene
+π
 1
+(X,x) Fundamentalgruppe im topologischen Raum X um x
 ∈
+X
 Fix(f) Menge der Fixpunkte der Abbildung f
-2-Norm; Euklidische Norm
-2
 (cid:107)·(cid:107)
+2
+2-Norm; Euklidische Norm
 κ Krümmung
-κ Normalenkrümmung
+κ
 Nor
+Normalenkrümmung
 V(f) Nullstellenmenge von f2
 Krümmung
-D F : R2 R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
+D
 p
+F : R2
 →
-T S Tangentialebene an S R3 durch s S
+R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
+T
 s
-⊆ ∈
-d n(x) Weingarten-Abbildung
+S Tangentialebene an S
+⊆
+R3 durch s
+∈
+S
+d
 s
+n(x) Weingarten-Abbildung
 2von Vanishing Set
 Stichwortverzeichnis
-Abbildung einfach zusammenhängend, 49
-affine, 107 Einheitsnormalenfeld, 90
-differenzierbare, 29 Euler-Charakteristik, siehe Eulerzahl
-homotope, 50 Eulersche Polyederformel, 38
-offene, 53 Eulerzahl, 36
+Abbildung
+affine, 107
+differenzierbare, 29
+homotope, 50
+offene, 53
 simpliziale, 35
-Färbbarkeit, 21
 stetige, 9
-Faser, siehe Urbild
 Abschluss, 3
-Fläche
 Abstand, 86
-orientierbare, 90
 Abstandsaxiom, 65
-reguläre, 30
 Achterknoten, 20
-Flächenelement, 95
 Aktion, siehe Gruppenoperation
-Formoperator, siehe Weingarten-Abbildung
 Anordnungsaxiome, 66
-Fundamentalform
 Atlas, 24
-erste, 94
 Außenwinkel, 70
-zweite, 97
 Axiom, 64
-Fundamentalgruppe, 47
 Axiomensystem, 64
-Gauß-Krümmung, 92, 91–94
 Basis, 3
-Geometrie, 64
 Baum, 37
-Gerade, 64
 Betti-Zahl, 41
-hyperbolische, 77
 Bewegungsaxiom, 66
-Graph, 37
 Binormalenvektor, 89
+Cantorsches Diskontinuum, 22
+Ck-Struktur, 29
+Decktransformation, 59
+Decktransformationsgruppe, 59
+Deformationsretrakt, 47
+dicht, 3
+Diffeomorphismus, 29
+Dimension, 34
+diskret, 53
+Doppelverhältnis, 83
+Dreibein
+begleitendes, 89
+Ebene
+euklidische, 64
+Eigenvektor, 107
+Eigenwert, 107
+einfach zusammenhängend, 49
+Einheitsnormalenfeld, 90
+Euler-Charakteristik, siehe Eulerzahl
+Eulersche Polyederformel, 38
+Eulerzahl, 36
+Färbbarkeit, 21
+Faser, siehe Urbild
+Fläche
+orientierbare, 90
+reguläre, 30
+Flächenelement, 95
+Formoperator, siehe Weingarten-Abbildung
+Fundamentalform
+erste, 94
+zweite, 97
+Fundamentalgruppe, 47
+Gauß-Krümmung, 92, 91–94
+Geometrie, 64
+Gerade, 64
+hyperbolische, 77
+Graph, 37
 Grenzwert, 8
-Cantorsches Diskontinuum, 22 Gruppe
-Ck-Struktur, 29 allgemeine lineare, 22, 26
+Gruppe
+allgemeine lineare, 22, 26
 spezielle lineare, 22
-Decktransformation, 59 topologische, 33
-Decktransformationsgruppe, 59 Gruppe operiert durch Homöomorphismen,
-Deformationsretrakt, 47 61
-dicht, 3 Gruppenaktion, siehe Gruppenoperation
-Diffeomorphismus, 29 Gruppenoperation, 60, 60–63
-Dimension, 34 stetige, 61
-diskret, 53
-Doppelverhältnis, 83 Häufungspunkt, 107
-Dreibein Hülle
-begleitendes, 89 konvexe, 34
+topologische, 33
+Gruppe operiert durch Homöomorphismen,
+61
+Gruppenaktion, siehe Gruppenoperation
+Gruppenoperation, 60, 60–63
+stetige, 61
+Häufungspunkt, 107
+Hülle
+konvexe, 34
 Halbebene, 66
-Ebene Halbgerade, 65
-euklidische, 64 Halbraum, 28
-Eigenvektor, 107 Hauptkrümmung, 92
-Eigenwert, 107 Hilbert-Kurve, 19, 19
+Halbgerade, 65
+Halbraum, 28
+Hauptkrümmung, 92
+Hilbert-Kurve, 19, 19
 113 Stichwortverzeichnis
-Homöomorphismengruppe, 10 lokal, 3
-Homöomorphismus, 9 Lot, 86
-Homologiegruppe, 41 Lotfußpunkt, 86
+Homöomorphismengruppe, 10
+Homöomorphismus, 9
+Homologiegruppe, 41
 Homomorphismus, 101
-Möbiusband, 91
 Homotopie, 44
-Möbiustransformation, 80
 Homotopieklasse, 47
-Mannigfaltigkeit, 24
-Inklusionsabbildung, 47 differenzierbare, 29
-Innenwinkel, 70 geschlossene, 25
-Inneres, 3 glatte, 29
-Inzidenzaxiome, 64 mit Rand, 28
-Isometrie, 6, 10 Menge
-Isometriegruppe, 10 abgeschlossene, 2
-Isomorphismus, 101 offene, 2
-Isotopie, 20 zusammenhängende, 11
-Metrik, 6
+Inklusionsabbildung, 47
+Innenwinkel, 70
+Inneres, 3
+Inzidenzaxiome, 64
+Isometrie, 6, 10
+Isometriegruppe, 10
+Isomorphismus, 101
+Isotopie, 20
 Jordankurve, 19
-diskrete, 6
 geschlossene, 19
-hyperbolische, 84
-SNCF, 8
 Karte, 24
 Kartenwechsel, 28
-Nebenwinkel, 86
 Kern
-Neilsche Parabel, 27
 offener, 3
-Normalenfeld, 90
 Kleeblattknoten, 20
-Normalenvektor, 87, 89
 Klumpentopologie, siehe triviale Topologie
-Normalkrümmung, 91, 92, 98
 Knoten, 20, 17–21
-äquivalente, 20 Oktaeder, 34
-trivialer, 20 Orthonormalbasis, 107
+äquivalente, 20
+trivialer, 20
 Knotendiagramm, 20
-Paraboloid
 kollinear, 65
-hyperbolisches, 92
 kongruent, siehe isometrisch
-Parallele, 66
 Kongruenz, siehe Isometrie
-Parallelenaxiom, 64
 Kongruenzsatz
-parametrisiert
 SSS, 104
-durch Bogenlänge, 87
 SWS, 69
-Parametrisierung
 SWW, 74
-reguläre, 30
 WSW, 70
-Polyzylinder, 17
 Krümmung, 88, 89
-Produkttopologie, 4
 Kreis, 37
-Projektion
 Kreuzprodukt, 107
-stereographische, 11
 Kurve, 87
-Punkt, 34
 Länge einer, 87
-Quotiententopologie, 5, 10, 11
 Lage
 allgemeine, 34
-Rand, 3, 28
 Lehrsatz
-Raum
 Binomischer, 107
-hausdorffscher, 8
 Lie-Gruppe, 33
-kompakter, 14
 liegt zwischen, 65
-metrischer, 6
 Liftung, 54
-projektiver, 5, 22, 25, 52
 Limes, 8
+lokal, 3
+Lot, 86
+Lotfußpunkt, 86
+Möbiusband, 91
+Möbiustransformation, 80
+Mannigfaltigkeit, 24
+differenzierbare, 29
+geschlossene, 25
+glatte, 29
+mit Rand, 28
+Menge
+abgeschlossene, 2
+offene, 2
+zusammenhängende, 11
+Metrik, 6
+diskrete, 6
+hyperbolische, 84
+SNCF, 8
+Nebenwinkel, 86
+Neilsche Parabel, 27
+Normalenfeld, 90
+Normalenvektor, 87, 89
+Normalkrümmung, 91, 92, 98
+Oktaeder, 34
+Orthonormalbasis, 107
+Paraboloid
+hyperbolisches, 92
+Parallele, 66
+Parallelenaxiom, 64
+parametrisiert
+durch Bogenlänge, 87
+Parametrisierung
+reguläre, 30
+Polyzylinder, 17
+Produkttopologie, 4
+Projektion
+stereographische, 11
+Punkt, 34
+Quotiententopologie, 5, 10, 11
+Rand, 3, 28
+Raum
+hausdorffscher, 8
+kompakter, 14
+metrischer, 6
+projektiver, 5, 22, 25, 52
 114 Stichwortverzeichnis
-topologischer, 2 verträglich, 29
+topologischer, 2
 zusammenhängender, 11
-Würfel, 34
 Realisierung
-Weg, 17
 geometrische, 34
-einfacher, 17
 Retraktion, 47
-geschlossener, 17
-Satz von homotope, 44
-Gauß-Bonnet, 98 inverser, 48
-Scheitelwinkel, 86 zusammengesetzter, 46
-Seite, 34 Wegzusammenhang, 18
-Sierpińskiraum, 3, 22 Weingarten-Abbildung, 95
-Simplex, 34 Winkel, 70
+Satz von
+Gauß-Bonnet, 98
+Scheitelwinkel, 86
+Seite, 34
+Sierpińskiraum, 3, 22
+Simplex, 34
 Simplizialkomplex, 34
-Zusammenhang, 11–14
 Simplizialkomplexe
-Zusammenhangskomponente, 13
 flächengleiche, 74
-Zwischenwertsatz, 107
 Sphäre
 exotische, 29
 Standard-Simplex, 34
@@ -6022,3 +14890,17 @@ Umgebungsbasis, 58
 vanishing set, 26
 Vektorprodukt, siehe Kreuzprodukt
 Verklebung, 26
+verträglich, 29
+Würfel, 34
+Weg, 17
+einfacher, 17
+geschlossener, 17
+homotope, 44
+inverser, 48
+zusammengesetzter, 46
+Wegzusammenhang, 18
+Weingarten-Abbildung, 95
+Winkel, 70
+Zusammenhang, 11–14
+Zusammenhangskomponente, 13
+Zwischenwertsatz, 107
diff --git a/read/results/pdftotext/1601.03642.txt b/read/results/pdftotext/1601.03642.txt
index b6b8e05..85909a9 100644
--- a/read/results/pdftotext/1601.03642.txt
+++ b/read/results/pdftotext/1601.03642.txt
@@ -39,17 +39,14 @@ w3
 
 wn
 
-(a) Example of an artificial neuron unit.(b)
-xi are the input signals and wi are
-weights which have to get learned.
+(a) Example of an artificial neuron unit.(b) A visualization of a simple feedxi are the input signals and wi are
+forward neural network. The 5 inweights which have to get learned. put nodes are red, the 2 bias nodes
 Each input signal gets multiplied
-with its weight, everything gets
-summed up and the activation function ϕ is applied.
-
-A visualization of a simple feedforward neural network. The 5 input nodes are red, the 2 bias nodes
 are gray, the 3 hidden units are
+with its weight, everything gets
 green and the single output node
-is blue.
+summed up and the activation func- is blue.
+tion ϕ is applied.
 
 Fig. 1: Neural networks are based on simple units which get
 combined to complex networks.
@@ -694,35 +691,21 @@ along with this program; if not, write to the Free Software Foundation,
 *
 * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
-#include
-#include
-#include
-#include
-#include
-
-<linux/kexec.h>
-<linux/errno.h>
-<linux/io.h>
-<linux/platform_device.h>
-<linux/multi.h>
+#include <linux/kexec.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/multi.h>
 
 8
 
 #include <linux/ckevent.h>
-#include
-#include
-#include
-#include
-#include
-#include
-
-<asm/io.h>
-<asm/prom.h>
-<asm/e820.h>
-<asm/system_info.h>
-<asm/setew.h>
-<asm/pgproto.h>
-
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/e820.h>
+#include <asm/system_info.h>
+#include <asm/setew.h>
+#include <asm/pgproto.h>
 #define REG_PG
 vesa_slot_addr_pack
 #define PFM_NOCOMP AFSR(0, load)
diff --git a/read/results/pdftotext/1602.06541.txt b/read/results/pdftotext/1602.06541.txt
index a643151..fad0f1c 100644
--- a/read/results/pdftotext/1602.06541.txt
+++ b/read/results/pdftotext/1602.06541.txt
@@ -100,21 +100,16 @@ used layered models [YHRF12].
 C. Input Data
 The available data which can be used for the
 inference of a segmentation varies by application.
-•
-
-•
-
-•
-
 Grayscale vs colored: Grayscale images are
 commonly used in medical imaging such as
 magnetic resonance (MR) imaging or ultrasonography whereas colored photographs are obviously
 widespread.
-Excluding or including depth data: RGB-D,
+• Excluding or including depth data: RGB-D,
 sometimes also called range [HJBJ+ 96] is available in robotics, autonomous cars and recently
 also in consumer electronics such as Microsoft
 Kinect [Zha12].
-Single image vs stereo images vs cosegmentation: Single image segmentation is the
+• Single
+image vs stereo images vs cosegmentation: Single image segmentation is the
 most wide-spread kind of segmentation, but using
 stereo images was already tried in [BVZ01]. It can
 be seen as a more natural way of segmentation as
@@ -130,10 +125,11 @@ after the first can be used as an additional source
 of information to find a meaningful segmentation.
 This idea can be extended to time series such as
 videos.
-2D vs 3D: Segmenting images is a 2D segmentation task where the smallest unit is called a pixel.
+• 2D vs 3D: Segmenting images is a 2D segmentation task where the smallest unit is called a pixel.
 In 3D data, such as volumetric X-ray CT images
 as they were used in [HHR01], the smallest unit
 is called a voxel.
+•
 
 (a) Example Scene
 
@@ -159,7 +155,6 @@ of the obtained segmentations is by showing examples
 such as Figure 1.
 However, this can only support the explanation of
 particular problems or showcase special situation. For
-•
 meaningful information about the overall accuracy, there
 are a couple of metrics how accuracy can be defined.
 For this section, let k ∈ N be the number of classes,
@@ -1851,6 +1846,9 @@ Classes
 5
 
 Channels
+
+Data source
+
 3
 3
 3
@@ -1860,7 +1858,6 @@ Channels
 3
 3
 
-Data source
 [CRSS]
 [KKV+ 14]
 [FKG13]
diff --git a/read/results/pdftotext/1707.09725.txt b/read/results/pdftotext/1707.09725.txt
index 938a193..718ee7a 100644
--- a/read/results/pdftotext/1707.09725.txt
+++ b/read/results/pdftotext/1707.09725.txt
@@ -381,30 +381,26 @@ channels. The filter F is convolved with the image I ∈ Rw×h×d to produce a n
 The output image I 0 has only one channel. Each pixel I 0 (x, y) of the output image gets
 calculated by point-wise multiplication of one filter element with one element of the original
 image I:
-b k2w c
 
 I 0 (x, y) =
 
-ix =1−d k2w
+k
 
-kh
+b k2w c
 
-c
-2
-X
+b 2h c
 
-b
+X
 
 X
-e
 
 d
 X
 
 I(x + ix , y + iy , ic ) · F (ix , iy , ic )
 
-k
-iy =1−d 2h e ic =1
+ix =1−d k2w e iy =1−d kh e ic =1
+2
 
 This procedure is explained by Figure 2.1. It is essentially a discrete convolution.
 
@@ -656,7 +652,6 @@ wij · xj
 [2.1]
 
 j=1
-b k2w c
 
 o
 
@@ -664,18 +659,15 @@ o
 
 (I) = b +
 
-b
+k
 
-ix =1−d k2w
+b k2w c
 
-kh
+b 2h c
 
-c
-2
 X
 
 X
-e
 
 d
 X
@@ -684,8 +676,8 @@ Fz (ix , iy , ic ) · I(x + ix , y + iy , ic )
 
 [2.2]
 
-k
-iy =1−d 2h e ic =1
+ix =1−d k2w e iy =1−d kh e ic =1
+2
 
 with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d }
 
@@ -816,14 +808,7 @@ picked with probability pi = P ai aj . This assumes the activations ai are non-n
 aj ∈A
 
 Pooling is applied for three reasons: To get local translational invariance, to get invariance
-against minor local changes and, most important, for data reduction to
-
-1
-th
-s2
-
-of the data by
-
+against minor local changes and, most important, for data reduction to s12 th of the data by
 using strides of s > 1.
 See Figure 2.3 for a visualization of max pooling.
 2
@@ -896,9 +881,7 @@ Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with str
 Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If
 the input of the pooling layer are d(i−1) feature maps, the convolutional layer has to have
 d(i−1) filters of size p × p and stride s. The ith filter has the values
-
-
-1
+1
 p2
 
 .
@@ -967,11 +950,8 @@ B)i,j := (A)i,j (B)i,j
 
 Hence every value of the input gets set to zero with a dropout probability of p. Typically,
 Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout probability than later layers. In order to keep the expected output at the same value, the
-output of a dropout layer is multiplied with
-
 1
-1−p
-
+output of a dropout layer is multiplied with 1−p
 when dropout is enabled [Las17, tf-16b].
 
 At inference time, dropout is disabled.
@@ -1001,28 +981,19 @@ point-wise to
 x(k) − x̄(k)
 x̂(k) = p
 s0 [x(k) ]2 + ε
-with x̄(k) =
-
 1
-m
+with x̄(k) = m
 
 (k)
-i=1 xi
-
-Pm
-
-being the sample mean and s0 [x(k) ]2 =
-
-1
-m
-
 (k)
-i=1 (xi
+1 Pm
+(k)
+being the sample mean and s0 [x(k) ]2 = m
+i=1 xi
+i=1 (xi − x̄ ) the
 
 Pm
 
-− x̄(k) ) the
-
 sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0
 (k)
 
@@ -1144,13 +1115,9 @@ aspect to using the group network without an aggregation block.
 Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The
 idea is to connect each convolutional layer directly to subsequent convolutional layers.
 Traditional CNNs with L layers and one input layer have L connections between layers,
-but dense blocks have
-
-L(L+1)
-2
-
+but dense blocks have L(L+1)
 connections between layers. The input feature maps are
-
+2
 concatenated in depth. According to the authors, this prevents features from being relearned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16
 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors
 used only on the order of 12 feature maps per layer.
@@ -1239,25 +1206,25 @@ P PK
 diagonal cii and all wrong classifications are of the diagonal. The sum K
 i=1
 j=1 cij is the
-total number of samples which were evaluated and
-
 P
-cii
-PK
-PK i=1
-i=1
 
-The sums r(i) =
+c
 
-PK
+i=1
 
 j=1 cij
 
-j=1 cij
+ii
+PK
+total number of samples which were evaluated and PK i=1
+
+The sums r(i) =
 
 is the accuracy.
 
-of each class i are worth being investigated as they show if the
+PK
+
+j=1 cij of each class i are worth being investigated as they show if the
 
 classes are skewed. If the number of samples of one class dominates the data set, then the
 classifier can get a high accuracy by simply always prediction the most common class. If
@@ -1450,9 +1417,10 @@ One problem of accuracy as a quality criterion are skewed classes. If one class
 more common than all other classes, then the simplest way to achieve a high score is to
 always classify everything as the most common class.
 In order to fix this problem, one can use the mean accuracy:
+k
+
 mean-accuracy(c) =
 
-k
 1 X cii
 ·
 ∈ [0, 1]
@@ -1646,9 +1614,11 @@ VGG-16 (see Appendix D.3) have many filters which are highly correlated. They fo
 this by comparing the averaged maximum k-translational correlation of the networks with
 Gaussian-distributed initialized filters. The averaged maximum k-translational correlation
 is defined as
-ρ̄k (W) =
 
 N
+
+ρ̄k (W) =
+
 1 X N
 max ρk (Wi , Wj )
 j=1,j6=i
@@ -2132,17 +2102,6 @@ Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsamp
 15
 16
 
-Input
-Convolution
-BN + ELU
-Convolution
-BN + ELU
-Max pooling
-
-Filters @
-Patch size / stride
-32 @ 3 × 3 × 3
-
 Parameters
 
 FLOPs
@@ -2163,80 +2122,12 @@ Output size
 163 904
 40 960
 
-3 @ 32 ×
-32 @ 32 ×
-32 @ 32 ×
-32 @ 32 ×
-32 @ 32 ×
-32 @ 16 ×
-
-32
-32
-32
-32
-32
-16
-
-9 420 800
-82 048
-18 857 984
-82 048
-20 480
-4 714 496
-20 608
-5 120
-1 048 064
-3 584
-0
-523 776
-3 584
-0
-1024 · k
-k
-7k
-
-64 @ 16 ×
-64 @ 16 ×
-64 @ 16 ×
-64 @ 16 ×
-64 @ 8 ×
-64 @ 8 ×
-64 @ 8 ×
-64 @ 4 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-k @ 1×
-k @ 1×
-k @ 1×
-
-16
-16
-16
-16
-8
-8
-8
-4
-1
-1
-1
-1
-1
-1
-1
-1
-1
-
-/1
-
-32 @ 3 × 3 × 32 / 1
-2×2
-
-/2
+3 @ 32 × 32
+32 @ 32 × 32
+32 @ 32 × 32
+32 @ 32 × 32
+32 @ 32 × 32
+32 @ 16 × 16
 
 Convolution
 64 @ 3 × 3 × 32 / 1
@@ -2280,16 +2171,71 @@ Global avg Pooling
 0
 BN + Softmax
 2k
+
+9 420 800
+82 048
+18 857 984
+82 048
+20 480
+4 714 496
+20 608
+5 120
+1 048 064
+3 584
+0
+523 776
+3 584
+0
+1024 · k
+k
+7k
+
+64 @ 16 × 16
+64 @ 16 × 16
+64 @ 16 × 16
+64 @ 16 × 16
+64 @ 8 × 8
+64 @ 8 × 8
+64 @ 8 × 8
+64 @ 4 × 4
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+k @ 1× 1
+k @ 1× 1
+k @ 1× 1
+
 515k
 +892 512
 
-P
-
 1032k
 +55 729 664
 
 103 424+2k
 
+Input
+Convolution
+BN + ELU
+Convolution
+BN + ELU
+Max pooling
+
+Filters @
+Patch size / stride
+32 @ 3 × 3 × 3
+
+/1
+
+32 @ 3 × 3 × 32 / 1
+2×2
+
+/2
+
+P
+
 Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers
 use SAME padding, except for layer 11 which used VALID padding in order to decrease
 the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for
@@ -2303,9 +2249,11 @@ max pooling 2 × 2/2
 16 × 16
 
 max pooling 2 × 2/2
-8×8
 
 max pooling 2 × 2/2
+
+8×8
+
 4×4
 
 C 512@1 × 1/1
@@ -2378,23 +2326,14 @@ Test Set
 94.12 %
 99.02 %
 
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-
-= 3.49
-= 1.10
-= 1.48
-= 0.00
-= 0.42
-= 0.07
-= 0.87
-= 0.07
+σ = 3.49
+σ = 1.10
+σ = 1.48
+σ = 0.00
+σ = 0.42
+σ = 0.07
+σ = 0.87
+σ = 0.07
 
 94.37 %
 85.84 %
@@ -2405,23 +2344,14 @@ Test Set
 75.67 %
 96.28 %
 
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-
-= 3.47
-= 0.87
-= 0.55
-= 0.11
-= 0.10
-= 0.06
-= 0.34
-= 0.10
+σ = 3.47
+σ = 0.87
+σ = 0.55
+σ = 0.11
+σ = 0.10
+σ = 0.06
+σ = 0.34
+σ = 0.10
 
 Ensemble of 10
 Training Set Test Set
@@ -2612,13 +2542,9 @@ training. The image might lead to the wrong conclusion that models which are bet
 the start are also better at the end. In order to check this hypothesis, the relative order of
 validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering
 stays approximately the same, then it can be considered to run the first few epochs many
-times and only train the best models to the end. For 10 models, there can be
-
-102 −10
 2
 
-= 45
-
+times and only train the best models to the end. For 10 models, there can be 10 2−10 = 45
 pair-wise changes in the ordering at maximum if the relative order of validation accuracies
 is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred
 in average for each pair of epochs (i, i + 1). This means if one knows only the relative order
@@ -3196,11 +3122,8 @@ Layer
 Filter count
 Baseline New
 
-9
-9
-11
-11
-13
+Total
+parameters
 
 64
 64
@@ -3208,20 +3131,24 @@ Baseline New
 512
 512
 
-638
-974
-3786
-1024
-8704
-
-Total
-parameters
 5 978 566
 8 925 622
 5 982 698
 1 731 980
 5 982 092
 
+9
+9
+11
+11
+13
+
+638
+974
+3786
+1024
+8704
+
 Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer
 was increased.
 
@@ -3240,6 +3167,9 @@ Model
 Parameters
 
 Single Model
+Mean
+
+std
 
 Training
 
@@ -3249,9 +3179,7 @@ Mean Epochs
 
 Mean Time
 
-Mean
-
-std
+baseline
 
 944 012
 
@@ -3335,8 +3263,6 @@ m13
 
 4485 s
 
-baseline
-
 Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9 , m11 , m13
 as well as their accuracies.
 54
@@ -3518,13 +3444,14 @@ Mean total
 
 Single model
 
+Ensemble
+
 training time
 
 Accuracy
 
 std
 
-Ensemble
 Accuracy
 
 8
@@ -3544,7 +3471,14 @@ s
 
 16
 
-62
+s
+62 epoch
+s
+35 epoch
+s
+25 epoch
+s
+18 epoch
 
 103 – 173
 
@@ -3556,10 +3490,6 @@ s
 
 66.98 %
 
-32
-
-35
-
 119 – 179
 
 5171 s
@@ -3570,10 +3500,6 @@ s
 
 65.89 %
 
-64
-
-25
-
 133 – 195
 
 2892 s
@@ -3584,19 +3510,6 @@ s
 
 64.70 %
 
-128
-
-18
-
-s
-epoch
-s
-epoch
-s
-epoch
-s
-epoch
-
 145 – 239
 
 3126 s
@@ -3607,6 +3520,10 @@ epoch
 
 63.55 %
 
+32
+64
+128
+
 Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation)
 of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on
 CIFAR-100.
@@ -3792,6 +3709,8 @@ Yes
 
 Yes
 
+ReLU
+
 Yes1
 
 No
@@ -3830,8 +3749,6 @@ Yes
 
 No
 
-ReLU
-
 Table 5.10.: Properties of activation functions.
 
 1
@@ -4483,19 +4400,12 @@ Output size
 353 418
 40 960
 
-3 @ 32 ×
-69 @ 32 ×
-69 @ 32 ×
-69 @ 32 ×
-69 @ 32 ×
-32 @ 16 ×
-
-32
-32
-32
-32
-32
-16
+3 @ 32 × 32
+69 @ 32 × 32
+69 @ 32 × 32
+69 @ 32 × 32
+69 @ 32 × 32
+32 @ 16 × 16
 
 39 808
 128
@@ -4530,41 +4440,23 @@ Output size
 k
 7k
 
-64 @ 16 ×
-64 @ 16 ×
-64 @ 16 ×
-64 @ 16 ×
-64 @ 8 ×
-64 @ 8 ×
-64 @ 8 ×
-64 @ 4 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-512 @ 1 ×
-k @ 1×
-k @ 1×
-k @ 1×
-
-16
-16
-16
-16
-8
-8
-8
-4
-1
-1
-1
-1
-1
-1
-1
-1
-1
+64 @ 16 × 16
+64 @ 16 × 16
+64 @ 16 × 16
+64 @ 16 × 16
+64 @ 8 × 8
+64 @ 8 × 8
+64 @ 8 × 8
+64 @ 4 × 4
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+512 @ 1 × 1
+k @ 1× 1
+k @ 1× 1
+k @ 1× 1
 
 514k
 +947 654
@@ -4572,11 +4464,11 @@ k @ 1×
 520k
 +87 870 996
 
+179 200+2k
+
 36 928
 128
 
-179 200+2k
-
 Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers
 use SAME padding, except for layer 11 which used VALID padding in order to decrease
 the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each
@@ -4663,23 +4555,14 @@ Test Set
 95.43 %
 99.08 %
 
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-
-= 4.70
-= 0.70
-= 2.18
-= 0.00
-= 0.45
-= 0.10
-= 3.57
-= 0.07
+σ = 4.70
+σ = 0.70
+σ = 2.18
+σ = 0.00
+σ = 0.45
+σ = 0.10
+σ = 3.57
+σ = 0.07
 
 90.75 %
 87.92 %
@@ -4690,23 +4573,14 @@ Test Set
 75.09 %
 96.37 %
 
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-σ
-
-= 4.73
-= 0.46
-= 0.73
-= 0.10
-= 0.15
-= 0.13
-= 2.39
-= 0.12
+σ = 4.73
+σ = 0.46
+σ = 0.73
+σ = 0.10
+σ = 0.15
+σ = 0.13
+σ = 2.39
+σ = 0.12
 
 Ensemble of 10
 Training Set Test Set
@@ -4905,6 +4779,8 @@ Dataset
 Early Stopping
 val. acc train loss
 
+Fixed epochs
+
 Asirra
 CIFAR-10
 CIFAR-100
@@ -4919,8 +4795,6 @@ STL-10
 99.67 %
 78.66 %
 
-Fixed epochs
-
 96.01 %3
 91.75 %
 71.01 %
@@ -5137,41 +5011,23 @@ Layer
 99-percentile interval
 filter
 bias
-[-0.50,
-[-0.21,
-[-0.20,
-[-0.15,
-[-0.14,
-[-0.08,
-[-0.08,
-[-0.10,
-
-0.48]
-0.19]
-0.17]
-0.14]
-0.15]
-0.08]
-0.08]
-0.11]
-
-[-0.06,
-[-0.07,
-[-0.07,
-[-0.05,
-[-0.04,
-[-0.00,
-[-0.00,
-[-0.01,
-
-0.07]
-0.07]
-0.05]
-0.06]
-0.03]
-0.00]
-0.00]
-0.01]
+[-0.50, 0.48]
+[-0.21, 0.19]
+[-0.20, 0.17]
+[-0.15, 0.14]
+[-0.14, 0.15]
+[-0.08, 0.08]
+[-0.08, 0.08]
+[-0.10, 0.11]
+
+[-0.06, 0.07]
+[-0.07, 0.07]
+[-0.07, 0.05]
+[-0.05, 0.06]
+[-0.04, 0.03]
+[-0.00, 0.00]
+[-0.00, 0.00]
+[-0.01, 0.01]
 
 Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model
 trained on CIFAR-100.
@@ -5657,8 +5513,6 @@ He
 
 α=0
 
-β=
-
 γ=0
 
 [HZRS15b]
@@ -5667,6 +5521,8 @@ Orthogonal
 
 —
 
+β = n2in
+
 —
 
 γ=0
@@ -5683,9 +5539,6 @@ LSUV
 
 [MM15]
 
-2
-nin
-
 Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N (0, 1) + γ.
 nin , nout are the number of units in the previous layer and the next layer. Typically,
 biases are initialized with constant 0 and weights by one of the other schemes to prevent
@@ -5769,10 +5622,9 @@ t
 
 • Exponential Decay Learning Rate [SHY+ 13]: η(t) = η(0) · 10− k where t ∈ N0 is the
 training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps
-until the learning rate is decreased by
-
 1
-10 th.
+th.
+until the learning rate is decreased by 10
 
 • Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential
 Decay Scheduling.
@@ -5808,9 +5660,8 @@ CNNs have the following hyperparameters:
 Name
 Sign function†
 Heaviside
-function†
+step function†
 
-step
 Logistic function
 
 Function ϕ(x)
@@ -5854,8 +5705,11 @@ Tanh
 1+e−x
 x
 e −e−x
+= tanh(x)
 ex +e−x
 
+[−1, 1]
+
 ReLU†
 
 max(0, x)
@@ -5879,32 +5733,28 @@ log(e
 Softplus
 ELU
 
-= tanh(x)
-
 if x > 0
 
 α(ex − 1) if x ≤ 0
+xj
+
+(−∞, +∞)
 
 Softmax‡
 
-o(x)j =
+o(x)j = PKe
 
-xj
-PKe
-k=1
+[0, 1]K
 
 Maxout‡
 
-exk
-
 o(x) = maxx∈x x
 
-[−1, 1]
-
-(−∞, +∞)
-[0, 1]K
 (−∞, +∞)
 
+xk
+k=1 e
+
 sech
  (x)
 1 if x > 0
@@ -5959,18 +5809,17 @@ as it produces a probability distribution. See Figure B.1 for a plot of some of
 α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.
 
 2.0
-1
-1+e−x
-
-ϕ1 (x) =
+ϕ1 (x) = 1+e1−x
 ϕ2 (x) = tanh(x)
-ϕ3 (x) = max(0, x)
-ϕ4 (x) = log(ex + 1)
-ϕ5 (x) = max(x, ex − 1)
 
 y
 
 1.5
+
+ϕ3 (x) = max(0, x)
+ϕ4 (x) = log(ex + 1)
+ϕ5 (x) = max(x, ex − 1)
+
 1.0
 0.5
 x
@@ -6055,14 +5904,9 @@ FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ .
 • As Dropout is only calculated during training, the number of FLOPs was set to 0.
 • The number of FLOPs for max pooling is dominated by the number of positions to
 which the pooling kernel is applied. For a feature map of size w × h a max pooling
-filter with stride s gets applied
-
-w·h
-.
+filter with stride s gets applied w·h
+. The number of FLOPs per application depends
 s2
-
-The number of FLOPs per application depends
-
 on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs.
 • The number of FLOPs for Batch Normalization is the same as the number of its
 parameters.
@@ -6153,6 +5997,8 @@ Parameters
 
 FLOPs
 
+Output size
+
 0
 156
 2
@@ -6171,11 +6017,6 @@ FLOPs
 20 580
 1 730
 
-61 710
-
-15 144 446
-
-Output size
 1 @ 32 × 32
 6 @ 28 × 28
 6 @ 14 × 14
@@ -6184,6 +6025,11 @@ Output size
 120
 84
 10
+
+61 710
+
+15 144 446
+
 9118
 
 Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied.
@@ -6243,30 +6089,6 @@ Parameters
 FLOPs
 
 Output size
-
-0
-885 120
-663 936
-442 624
-0
-37 752 832
-16 781 312
-4 097 000
-
-211 M
-12 M
-301 k
-448 M
-3M
-50 k
-299 M
-224 M
-150 M
-50 k
-75 M
-34 M
-8M
-
 3 @ 224 × 224
 96 @ 55 × 55
 96 @ 55 × 55
@@ -6281,11 +6103,6 @@ Output size
 4096
 4096
 1000
-
-60 965 224
-
-3300 M
-
 1 122 568
 
 96 @ 11 × 11 × 3 / 4
@@ -6300,19 +6117,42 @@ Output size
 307 456
 
 3×3
-384 @ 3 × 3 × 256
-384 @ 3 × 3 × 192
-256 @ 3 × 3 × 192
+/2
+384 @ 3 × 3 × 256 / 1
+384 @ 3 × 3 × 192 / 1
+256 @ 3 × 3 × 192 / 1
 3×3
+/2
 4096 neurons
 4096 neurons
 1000 neurons
 
-/2
-/1
-/1
-/1
-/2
+0
+885 120
+663 936
+442 624
+0
+37 752 832
+16 781 312
+4 097 000
+
+211 M
+12 M
+301 k
+448 M
+3M
+50 k
+299 M
+224 M
+150 M
+50 k
+75 M
+34 M
+8M
+
+60 965 224
+
+3300 M
 
 Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to
 computational restrictions at the time of its development. This also reduces the number
@@ -6343,15 +6183,19 @@ Input
 224 × 224
 
 max pooling 2 × 2/1
-112 × 112
 
 max pooling 2 × 2/1
-56 × 56
 
 max pooling 2 × 2/1
-28 × 28
 
 max pooling 2 × 2/1
+
+112 × 112
+
+56 × 56
+
+28 × 28
+
 14 × 14
 
 max pooling 2 × 2/1
@@ -6444,46 +6288,6 @@ FC
 
 Filters @
 Patch size / stride
-64 @ 3 × 3 × 3
-64 @ 3 × 3 × 64
-2×2
-128 @ 3 × 3 × 64
-128 @ 3 × 3 × 128
-2×2
-256 @ 3 × 3 × 128
-256 @ 3 × 3 × 256
-256 @ 3 × 3 × 256
-2×2
-512 @ 3 × 3 × 256
-512 @ 3 × 3 × 512
-512 @ 3 × 3 × 512
-2×2
-512 @ 3 × 3 × 512
-512 @ 3 × 3 × 512
-512 @ 3 × 3 × 512
-2×2
-4096 neurons
-4096 neurons
-1000 neurons
-
-/1
-/1
-/2
-/1
-/1
-/2
-/1
-/1
-/1
-/2
-/1
-/1
-/1
-/2
-/1
-/1
-/1
-/2
 
 Parameters
 
@@ -6491,6 +6295,31 @@ FLOPs
 
 Output size
 
+64 @ 3 × 3 × 3 / 1
+64 @ 3 × 3 × 64 / 1
+2×2
+/2
+128 @ 3 × 3 × 64 / 1
+128 @ 3 × 3 × 128 / 1
+2×2
+/2
+256 @ 3 × 3 × 128 / 1
+256 @ 3 × 3 × 256 / 1
+256 @ 3 × 3 × 256 / 1
+2×2
+/2
+512 @ 3 × 3 × 256 / 1
+512 @ 3 × 3 × 512 / 1
+512 @ 3 × 3 × 512 / 1
+2×2
+/2
+512 @ 3 × 3 × 512 / 1
+512 @ 3 × 3 × 512 / 1
+512 @ 3 × 3 × 512 / 1
+2×2
+/2
+4096 neurons
+
 1 792
 36 928
 0
@@ -6570,6 +6399,9 @@ Output size
 
 15 245 800
 
+4096 neurons
+1000 neurons
+
 Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have
 learnable parameters. All convolutions are zero padded to prevent size changes and
 use ReLU activation functions. The channels mean is subtracted from each pixel as
@@ -6626,9 +6458,6 @@ P
 3×
 
 Type
-
-Parameters
-
 Input
 Stem
 Inception A
@@ -6640,6 +6469,10 @@ Global Average Pooling
 Dropout (p=0.8)
 Softmax
 
+Parameters
+
+Output size
+
 605 728
 317 632
 2 306 112
@@ -6650,36 +6483,15 @@ Softmax
 0
 1 537 000
 
-Output size
-3
-384
-384
-1024
-1024
-1536
-1536
-1536
-1536
-
-@
-@
-@
-@
-@
-@
-@
-@
-@
-
-299 × 299
-35 × 35
-35 × 35
-17 × 17
-17 × 17
-8× 8
-8× 8
-1× 1
-1× 1
+3 @ 299 × 299
+384 @ 35 × 35
+384 @ 35 × 35
+1024 @ 17 × 17
+1024 @ 17 × 17
+1536 @ 8 × 8
+1536 @ 8 × 8
+1536 @ 1 × 1
+1536 @ 1 × 1
 1000
 
 42 679 816
@@ -6849,6 +6661,13 @@ GTSRB
 
 [SSSI, SSSI12]
 
+Asirra3
+
+(4 px − 500 px)
+×(4 px − 500 px)
+480 px × 640 px
+and 640 px × 480 px
+
 25 000
 
 2
@@ -6865,14 +6684,8 @@ GTSRB
 
 [Mar08, MS07]
 
-Asirra3
 Graz-02
 
-(4 px − 500 px)
-×(4 px − 500 px)
-480 px × 640 px
-and 640 px × 480 px
-
 Table E.1.: An overview over publicly available image databases for classification. The number
 of images row gives the sum of the training and the test images. Some datasets, like
 SVHN, have additional unlabeled data which is not given in this table.
@@ -6886,55 +6699,58 @@ Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs”
 
 97
 
-Dataset
-
-Model type / name
-
-MNIST
-
-—
+Result
 
-HASYv2
+Score
 
-TF-CNN
+Achieved /
 
-SVHN
+Dataset
 
-DenseNet (k = 24)
+Model type / name
 
-CIFAR-10
+MNIST
 
-DenseNet-BC (k = 40)
+—
 
-CIFAR-100
+0.21 %
 
-Result
+error
 
-Score
+[WZZ+ 13]
 
-Achieved /
-Claimed by
+HASYv2
 
-error
+TF-CNN
 
-[WZZ+ 13]
+81.00 %
 
 accuracy
 
 [Tho17a]
 
+SVHN
+
+DenseNet (k = 24)
+
 1.59 %
 
 error
 
 [HLW16]
 
+CIFAR-10
+
+DenseNet-BC (k = 40)
+
 3.46 %
 
 error
 
 [HLW16]
 
+CIFAR-100
+
 WRN-28-10
 
 16.21 %
@@ -7013,8 +6829,7 @@ accuracy
 
 [BMDP10]
 
-0.21 %
-81.00 %
+Claimed by
 
 Table E.2.: An overview over state of the art results achieved in computer vision datasets.
 
@@ -7537,13 +7352,14 @@ Available: https://arxiv.org/abs/1202.2745v1
 
 D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate
 deep network learning by exponential linear units (ELUs),”
-preprint arXiv:1511.07289,
+
+arXiv
 
 Nov. 2015. [Online]. Available:
 
-arXiv
 https:
 
+preprint arXiv:1511.07289,
 //arxiv.org/abs/1511.07289
 [CWV+ 14]
 
diff --git a/read/results/pdftotext/2201.00021.txt b/read/results/pdftotext/2201.00021.txt
index ac44757..ad6241a 100644
--- a/read/results/pdftotext/2201.00021.txt
+++ b/read/results/pdftotext/2201.00021.txt
@@ -7,27 +7,26 @@ Discovery of ammonia (9,6) masers in two high-mass star-forming
 regions
 Y. T. Yan (闫耀庭)1,? , C. Henkel1, 2, 3 , K. M. Menten1 , Y. Gong (龚龑)1 , J. Ott4 , T. L. Wilson1 , A. Wootten4 , A.
 Brunthaler1 , J. S. Zhang (张江水)5 , J. L. Chen (陈家梁)5 , and K. Yang (杨楷)6, 7
-1
-2
-3
 
 arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022
 
-4
-5
-6
-7
+1
 
 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany
 e-mail: yyan@mpifr-bonn.mpg.de
+2
 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia
+3
 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China
+4
 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA
+5
 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China
+6
 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China
+7
 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s
 Republic of China
-
 Received 13 December 2021 / Accepted 30 December 2021
 ABSTRACT
 Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact
diff --git a/read/results/pdftotext/2201.00022.txt b/read/results/pdftotext/2201.00022.txt
index 078cc41..4c7eda3 100644
--- a/read/results/pdftotext/2201.00022.txt
+++ b/read/results/pdftotext/2201.00022.txt
@@ -1,28 +1,33 @@
-Draft version January 4, 2022
+Draft version July 7, 2022
 Typeset using LATEX twocolumn style in AASTeX631
 
 The Formation of Intermediate Mass Black Holes in Galactic Nuclei
 Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3
-1 Department
+1 Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA
+2 Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA
 
-arXiv:2201.00022v1 [astro-ph.GA] 31 Dec 2021
+arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022
 
-2 Mani
-
-of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA
-L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA
 3 Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel
 
 ABSTRACT
 Most stellar evolution models predict that black holes (BHs) should not exist above approximately
-50 − 70 M . However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and
-above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs),
-can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding
-main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M . Our
-results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This formation channel also has implications for observations. Collisions between stars and BHs can produce
-electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally,
-formed through this channel, both black holes in the mass gap and IMBHs can merge with the supermassive black hole at the center of a galactic nucleus through gravitational waves. These gravitational
-wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively).
+50 − 70 M , the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
+indicate the existence of BHs with masses at and above this threshold. We suggest that massive
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
+processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite
+efficient, forming IMBHs as massive as 104 M . This upper limit assumes that (1) the BHs accrete a
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
+new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar
+disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs,
+respectively).
 1. INTRODUCTION
 
 The recently detected gravitational wave source
@@ -37,19 +42,18 @@ more than <
 GW170104, and GW170814 fall within the mass gap
 (e.g., Abbott et al. 2016, 2017a,b). BH mergers that
 form second generation BHs and, in some cases, intermediate mass BHs (IMBHs), these gravitational wave
-(GW) events can occur in globular clusters, young stellar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
-2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
-2021; Arca Sedda et al. 2021). However, IMBHs are
-not limited to these locations and may reside in galacCorresponding author: Sanaea C. Rose
+(GW) events can occur in globular clusters, young stelCorresponding author: Sanaea C. Rose
 srose@astro.ucla.edu
-1
+1 Note that the exact lower and upper limits may be sensitive to
 
-Note that the exact lower and upper limits may be sensitive to
 metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski
 et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
 
-tic nuclei as well. Several studies propose that our
+lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
+2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
+2021; Arca Sedda et al. 2021). However, IMBHs are
+not limited to these locations and may reside in galactic nuclei as well. Several studies propose that our
 own galactic center may host an IMBH in the inner pc
 (e.g., Hansen & Milosavljević 2003; Maillard et al. 2004;
 Gürkan & Rasio 2005; Gualandris & Merritt 2009; Chen
@@ -63,26 +67,27 @@ as a result of the very first stars (e.g., Madau & Rees
 Valiante et al. 2016) or from direct collapse of accumulated gas (e.g., Begelman et al. 2006; Yue et al. 2014;
 Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
 2016). These high redshift IMBHs would need to survive galaxy evolution and mergers to present day (e.g.,
+
+2
+
+Rose et al.
+
 Rashkov & Madau 2014), with significant effects on their
 stellar and even dark matter surroundings (e.g., Bertone
 et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
 et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
 popular formation channel relies on the coalescence of
-many stellar-mass black holes. For example, IMBHs
+many stellar-mass black holes, which may seed objects
+as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs
 may form in the centers of globular clusters, where fewbody interactions lead to the merger of stellar-mass BHs
 (e.g., O’Leary et al. 2006; Gürkan et al. 2006; Blecha
-et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
-
-2
-
-Rose et al.
-
-driguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
+et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
 2020b). Other formation mechanisms invoke successive
-collisions and mergers of massive stars (e.g., Portegies
-Zwart & McMillan 2002; Portegies Zwart et al. 2004;
-Freitag et al. 2006; Kremer et al. 2020; González et al.
-2021; Di Carlo et al. 2021).
+collisions and mergers of massive stars (e.g., Ebisuzaki
+et al. 2001; Portegies Zwart & McMillan 2002; Portegies
+Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017;
+Kremer et al. 2020; González et al. 2021; Di Carlo et al.
+2021; Das et al. 2021a,b; Escala 2021).
 The main obstacle to sequential BH mergers in clusters is that the merger recoil velocity kick often exceeds
 the escape velocity from the cluster (e.g., Schnittman
 & Buonanno 2007; Centrella et al. 2010; O’Leary et al.
@@ -93,29 +98,36 @@ clusters without a SMBH. They considered BH binarysingle interactions, binary BH
 merger recoil kicks. The post-kick merger product sinks
 back towards the cluster center over a dynamical friction timescale. Using this approach, they showed that
 103 − 104 M IMBHs can form efficiently over the lifetime of a cluster.
-However, as discussed in Section 2.2, direct star-BH
+However, as discussed in Section 2.2, direct BH-star
 collisions are much more frequent than BH-BH collision
-in galactic nuclei, making the former a promising channel for BH growth. We propose that IMBHs can form
-naturally within the central pc of a SMBH in a galactic
-center. Specifically, these IMBHs form through repeated
-collisions with main sequence stars, accreting some or
-all of the star’s mass depending on the details of the
-collision. We demonstrate that this channel can create
-IMBHs with masses as large as 104 M , depending on
-the density profile of the surrounding stars.
+in galactic nuclei, making the former a promising channel for BH growth. In an N-body study of young star
+clusters, Rizzuto et al. (2022) find that BH-star collisions are a main contributor to the formation of BHs
+in the mass gap and IMBHs. In a similar vein, Stone
+et al. (2017) demonstrate that massive BHs can form
+from repeated tidal encounters between stars and BHs.
+More generally, several studies have explored the role of
+collisions in a GN, with implications for the stellar and
+red giant populations (e.g., Dale & Davies 2006; Dale
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+et al. 2021). We propose that IMBHs can form naturally
+within the central pc of a galactic center through repeated collisions between BHs and main sequence stars.
+During a collision, the BH can accrete some portion of
+the star’s mass. Over many collisions, it can grow appreciably in size. We demonstrate that this channel can
+
+create IMBHs with masses as large as 104 M , an upper
+limit that depends on the density profile of the surrounding stars and the efficiency of the accretion.
 The paper is structured as follows: we describe relevant physical processes and our approach in Section 2.
 In particular, we provide an overview of collisions in
 Section 2.2 and present our statistical approach in Section 2.3. Section 2.4 discusses our treatment of the
 mass growth with each collision and presents analytic
 solutions to our equations in two different regimes, efficient collisions and inefficient collisions We compare
-these solutions to our statistical results. Sections 2.5
-and 2.7 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in
-Section 2.8. Finally, we discuss and summarize our findings in Section 3.
+these solutions to our statistical results. Sections 2.6
+and 2.8 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in
+Section 2.9. Finally, we discuss and summarize our findings in Section 3.
 2. METHODOLOGY
 
 We consider a population of stellar mass BHs embedded in a cluster of 1 M stars. When stars and BHs
 collide, the BHs can accrete mass. The growth rate depends on the physical processes outlined below. We use
-
 a statistical approach to estimate the stellar encounters
 and final IMBH masses.
 2.1. Physical Picture
@@ -140,9 +152,26 @@ build a comprehensive physical picture of BH growth at
 all distances from the SMBH, including within 0.01 pc.
 Otherwise, the innermost region of the GN would be
 poorly represented in our sample. We consider other
-observationally motivated distributions in Section 2.8,
-but reserve a more detailed examination of the distribution’s impact for future work.
-2.2. Direct Collisions
+
+3
+
+IMBH Formation in Galactic Nuclei
+
+in Figure 1.2 As this timescale depends on the density
+of surrounding stars, we adopt a density profile of the
+form:
+
+ρ(r• ) = ρ0
+
+Figure 1. We plot the relevant timescales, including collision (green), relaxation (gold), and BH-BH GW capture
+(purple), for a single BH in the GN as a function of distance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit. The timescales depend on the
+density, so we adopt a range of density profiles, bounded by
+α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
+blue line represents the time for a 105 M BH to merge with
+the SMBH through GW emission.
+
 BHs in the GN can undergo direct collisions with other
 objects. The timescale for this process, tcoll , can be estimated using a simple rate calculation: t−1
 coll = nσA,
@@ -153,8 +182,7 @@ coll = πn(a• )σ(a• )
 
 
 2G(mBH + m? )
-2
-× f1 (e• )rc + f2 (e• )rc
+× f1 (e• )rc2 + f2 (e• )rc
 . (1)
 σ(a• )2
 where G is the gravitational constant and rc is the sum
@@ -164,41 +192,11 @@ et al. (2020), f1 (e• ) and f2 (e• ) account for the effect of
 the eccentricity of the BH’s orbit about the SMBH on
 the collision rate, while n and σ are simply evaluated
 at the semimajor axis of the orbit (see below). Note
-
-IMBH Formation in Galactic Nuclei
-
-3
-
-The collision timescale also depends on the velocity dispersion, which we express as:
-s
-GM•
-σ(r• ) =
-,
-(4)
-r• (1 + α)
-
-Figure 1. We plot the relevant timescales, including collision (green), relaxation (gold), and BH-BH GW capture
-(purple), for a single BH in the GN as a function of distance
-from the SMBH. For the collision timescale, we assume the
-BH is on a circular orbit. The timescales depend on the
-density, so we adopt a range of density profiles, bounded by
-α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
-blue line represents the time for a 105 M BH to merge with
-the SMBH through GW emission.
-
 that this timescale equation includes the effects of gravitational focusing, which enhances the cross-section of
 interaction.
 Assuming a circular orbit for simplicity, we plot the
 timescale for a BH orbiting in the GN to collide with
 a 1 M star as a function of distance from the SMBH
-in Figure 1.2 As this timescale depends on the density
-of surrounding stars, we adopt a density profile of the
-form:
-
-ρ(r• ) = ρ0
-
-r•
-r0
 
 −α
 ,
@@ -212,9 +210,7 @@ Genzel et al. 2003). In this case, the normalization in
 Eq. (2) is ρ0 = 1.35 × 106 M /pc3 at r0 = 0.25 pc (Genzel et al. 2010). Additionally, in Eq. (2), α gives the
 slope of the power law. We assume that a uniform population of solar mass stars account for most of the mass
 in the GN, making the stellar number density:
-
 n(r• ) =
-2
 
 ρ(r• )
 .
@@ -222,8 +218,23 @@ n(r• ) =
 
 (3)
 
-We note that the eccentricity has a very minor effect on the
-collision timescale (Rose et al. 2020).
+The collision timescale also depends on the velocity dispersion, which we express as:
+s
+
+observationally motivated distributions in Section 2.9,
+but reserve a more detailed examination of the distribution’s impact for future work.
+2.2. Direct Collisions
+
+r•
+r0
+
+σ(r• ) =
+
+GM•
+,
+r• (1 + α)
+
+(4)
 
 where α is the slope of the density profile and M• denotes the mass of the SMBH (Alexander 1999; Alexander & Pfuhl 2014). As mentioned above, Eq. (1) depends
 on the sum of the radii of the colliding objects, rc . We
@@ -241,7 +252,16 @@ of density profiles is many orders of magnitude shorter
 than the BH-BH GW collision timescale (for the relevant equations, see O’Leary et al. 2009; Gondán et al.
 2018, for example). Thus, we expect that star-BH collisions will be the main driver of IMBH growth in the
 GN.
+2 We
+
+note that the eccentricity has a very minor effect on the
+collision timescale (Rose et al. 2020).
+
+4
+
+Rose et al.
 2.3. Statistical Approach to Collisions
+
 We simulate the mass growth of a population of BHs
 with initial conditions detailed in Section 2.1. Over an
 increment ∆t of 106 yr, we calculate the probability of
@@ -257,18 +277,7 @@ expected to accrete in a single collision (see Section 2.4
 for details). We recalculate the collision timescale using
 the updated BH mass and repeat this process until the
 time elapsed equals the simulation time of 10 Gyr3 .
-3
-
-Closer to the SMBH, ∆t may exceed the collision timescale by
-a factor of a few for steep density profiles. We include a safeguard in our code which takes the ratio tcoll /∆t and rounds it
-to the nearest integer. We take this integer to be the number of
-collisions and increase the BH mass accordingly.
-
-4
-
-Rose et al.
 2.4. Mass Growth
-
 When a BH collides with a star, it may accrete material and grow in mass. The details of the accretion
 depend on the relative velocity between the BH and
 star. For simplicity, this calculation assumes that the
@@ -276,15 +285,18 @@ two objects experience a head on collision, with the BH
 passing through the star’s center. We begin by considering the escape velocity from the BH at the star’s
 outermost point, its surface, which corresponds to the
 maximum impact parameter 1 R . Qualitatively, one
-might expect that the BH could accrete the entire star
+might expect that the BH could capture the entire star
 (i.e., ∆m ∼ 1 M ) if the relative velocity is smaller than
 the escape velocity from the BH at this point. However,
 in the vicinity of the SMBH, the dispersion velocity of
 the stars may be much larger than the escape velocity
 from the BH at the star’s surface. In this case, the BH
-accretes a “tunnel” of material through the star. This
+captures a “tunnel” of material through the star. This
 tunnel has radius equal to the Bondi radius and length
-approximately 1 R .
+approximately 1 R . For the purposes of this study, we
+assume that the BH accretes all of the material that
+it captures. The details of the accretion are uncertain,
+however, and it may be much less efficient than our results imply. We discuss accretion in Section 2.5.
 To estimate ∆m, we begin with the Bondi-Hoyle accretion rate, ṁ, given by:
 ṁ =
 
@@ -297,6 +309,25 @@ ṁ =
 
 (5)
 
+3 Closer to the SMBH, ∆t may exceed the collision timescale by
+
+a factor of a few for steep density profiles. We include a safeguard in our code which takes the ratio tcoll /∆t and rounds it
+to the nearest integer. We take this integer to be the number of
+collisions and increase the BH mass accordingly.
+
+Figure 2. We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10 M for all of the BHs. Assuming
+the density profile of stars has α = 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+
 where cs is the speed of sound in the star and ρstar is its
 density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
 et al. 1985; Edgar 2004, see latter for a review). We
@@ -319,21 +350,12 @@ start with identical populations of 10 M BHs (grey)
 and simulate growth through collisions using a statistical approach. As the BHs grow, the collision timescale,
 which depends on mBH , decreases. Simultaneously,
 ∆m, which also depends on mBH , increases. The result is exponential growth (see discussion and details
-surrounding Eq. (8)). In Figure 2, however, the simulations assume α = 1 for the stellar density profile, ensuring the collision timescale is long compared to the simulation time, 10 Gyr. Therefore, the BHs grow slowly,
+surrounding Eq. (8)). In Figure 2, however, the simulations assume α = 1 for the stellar density profile, ensuring the collision timescale is long compared to the sim-
 
-Figure 2. We consider an example that highlights the mass
-growth as a function of distance from the SMBH. Grey dots
-represent the initial masses and distances from the SMBH
-of the BHs involved in the simulation. For simplicity, we set
-the inital mass equal to 10 M for all of the BHs. Assuming
-the density profile of stars has α = 1, we consider two cases:
-BHs accrete all of the star’s mass during a collision (red) and
-only a portion of the star’s mass is accreted during a collision
-given by Eq. 6 (blue). The latter case results in less growth
-closer to the SMBH where the velocity dispersion becomes
-high. The shaded regions and dashed lines represent the
-analytical predictions detailed in Section 2.4.
+5
 
+IMBH Formation in Galactic Nuclei
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
 and their final masses can be approximated using the
 following equation:
 mfinal (tcoll → const.) = minitial + ∆m
@@ -363,10 +385,6 @@ star’s mass.
 Eq. 7 does not apply for other values of α. When the
 collision timescale is shorter, corresponding to a larger
 index α in the density profile (see Figure 1), the growth
-
-5
-
-IMBH Formation in Galactic Nuclei
 is very efficient and ∆m quickly approaches 1 M . Consequently, while we can now assume ∆m = 1 M , we
 can no longer assume the collision timescale is constant.
 The final mass grows exponentially as a result. For
@@ -377,42 +395,97 @@ mfinal (∆m → 1 M ) = −A + (minitial + A) eCT (8)
 where A = σ 2 Rstar /G and C = 2πGnstar Rstar /σ. As an
 example, we plot this curve in purple for the α = 2 case,
 in Figure 3, which agrees with the simulated masses.
-2.5. GW Inspiral
+2.5. Uncertainties in Accretion
+We note that the ∆M calculated in this proof-ofconcept study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challenging; this complex problem requires numerically solving
+the generalized GR fluid equations with cooling, heating, and radiative transfer, etc. and remains an active
+field of research (e.g., Blandford & Begelman 1999; Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star results in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity remain uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accumulate significant amounts of mass over many collisions
+even if it accretes very little in a single one. We explore the viability of our channel using a physically motivated inefficient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately vesc /(cη), where vesc is
+the escape velocity from the BH at 1 R and η is the
+accretion efficiency at the ISCO. We take η to be 0.1
+(e.g., Yu & Tremaine 2002). This expression for the
+fraction accreted is consistent with Kremer et al. (2022)
+equation 19 for s = 0.5, which is a reasonable value for
+s, a free parameter between 0.2 and 0.8. We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more efficient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radiation away from the captured material (e.g., Blandford
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumenshchev 2008; Begelman 2012a,b; McKinney et al. 2014).
+2.6. GW Inspiral
 When a BH is close to the SMBH, GW emission can
 circularize and shrink its orbit. We implement the effects of GW emission on the BH’s semimajor axis and
 eccentricity following Peters & Mathews (1963a). The
 characteristic timescale to merge a BH with an SMBH
 is given by:
+tGW ≈ 2.9 × 10
 
-−1 
-−1
-M•
-mBH
+×
+
 12
-tGW ≈ 2.9 × 10 yr
-106 M
-106 M
+
 
+
+M•
+yr
+106 M
 −1 
-4
+
 M• + mBH
-a•
-×
 2 × 106 M
-10−4 pc
+
 × f (e• )(1 − e2• )7/2 ,
 
+−1 
+
+a•
+−2
+10 pc
+
+mBH
+106 M
+4
+
+−1
+
 (9)
 
 where f (e• ) is a function of e• . For all values of e• ,
 f (e• ) is between 0.979 and 1.81 (Blaes et al. 2002). We
 plot this timescale for a 1 × 105 M BH in Figure 1 in
 blue.
+
+6
+
+Rose et al.
+
+Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
+cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
+of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
+merger times of these BHs.
+
 In our simulations, we assume a BH has merged with
 the SMBH when the condition tGW < telapsed is met.
 When this condition is satisfied, we terminate mass
 growth through collisions for that BH.4
-2.6. IMBH growth
+2.7. IMBH growth
 As detailed above, BH-stellar collisions can increase
 the BH masses as a function of time. Here, we examine
 the sensitivity of the BH growth to the density power
@@ -420,25 +493,24 @@ law. From Eq. (1), it is clear that the growth rate depends on the stellar densi
 profiles, will result in more efficient mass growth. In
 Figure 1, larger values of α lead to collision timescales
 in the GN’s inner region, inwards of 0.25 pc, that are
-4
-
-For comparison, we also incrementally changed the semimajor
-axis and eccentricity from GW emission following the equations
-in Peters & Mathews (1963b). This method leads to a slight
-increase in the final IMBH masses because it accounts for the
-collisions that take place while the orbit is gradually shrinking.
-
 much smaller that the 10 Gyr simulation time. Figure 3
 confirms this expectation. It depicts the mass growth of
 a uniform distribution of BHs with initial conditions detailed in Section 2.1 for five α values, spanning 1 (green)
 to 2 (purple). The most massive IMBHs form inwards
 of 0.25 pc for the α = 2 case.
-2.7. Gravitational Wave Mergers and Intermediate
+2.8. Gravitational Wave Mergers and Intermediate
 and Extreme Mass Ratio Inspiral Candidates
 Towards the SMBH, efficient collisions can create BHs
 massive enough to merge with the SMBH through GWs.
-Following the method detailed in Section 2.5, when a
+Following the method detailed in Section 2.6, when a
 given BH meets the criterion tGW < telapsed , we mark
+4 For comparison, we also incrementally changed the semimajor
+
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b). This method leads to a slight
+increase in the final IMBH masses because it accounts for the
+collisions that take place while the orbit is gradually shrinking.
+
 it as merged with the SMBH. We assume that at this
 point the dynamics of the BH will be determined by GW
 emission, shrinking and circularizing the BHs orbit until it undergoes an extreme or intermediate mass ratio
@@ -447,7 +519,7 @@ plot in Figure 3 shows the BH masses versus time of
 merger. It is interesting to note that even in the absence of relaxation processes, which are often invoked
 to explain the formation of EMRIs, EMRIs and notably
 IMRIs can form in this region.
-2.8. Two Body Relaxation Processes
+2.9. Two Body Relaxation Processes
 A BH orbiting the SMBH experiences weak gravitational interactions with other objects in the GN. Over a
 relaxation time, these interactions alter its orbit about
 the SMBH. The two-body relaxation timescale for a
@@ -455,9 +527,10 @@ single-mass system is:
 trelax = 0.34
 
 σ3
-,
 G2 ρhM∗ i ln Λrlx
 
+,
+
 (10)
 
 where ln Λrlx is the Coulomb logarithm and hM∗ i is the
@@ -469,24 +542,24 @@ its orbital energy and angular momentum by order of
 themselves. The BH experiences diffusion in its angular
 momentum and energy as a function of time (depending
 on the eccentricity of the orbit, this process can be more
-efficient Fragione & Sari 2018; Sari & Fragione 2019). In
-Figure 1, we plot the relaxation timescale in gold for a
-range of α. We note that the Bahcall & Wolf (1976) profile, α = 7/4, corresponds to zero net flux and therefore
-does not preferentially migrate objects inward.
-Additionally, because they are more massive on
-average than the surrounding objects, BHs are expected to segregate inwards in the GN (e.g., Shapiro
-& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
-Miralda-Escudé & Gould 2000; Baumgardt et al. 2004).
-
-6
-
-Rose et al.
-
-Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
-cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
-of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
-merger times of these BHs.
+efficient Fragione & Sari 2018; Sari & Fragione 2019).
+Relaxation can cause the orbit of an object in a GN to
+reach high eccentricities. If the object is a BH, it can
+spiral into the SMBH and form an EMRI, while a star
 
+IMBH Formation in Galactic Nuclei
+can be tidally disrupted by the SMBH (e.g. Magorrian
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
+Alexander 2005; Aharon & Perets 2016; Stone & Metzger 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
+Naoz et al. 2022). The relaxation process is therefore
+crucial to our study. In Figure 1, we plot the relaxation
+timescale in gold for a range of α. We note that the Bahcall & Wolf (1976) profile, α = 7/4, corresponds to zero
+net flux and therefore does not preferentially migrate
+objects inward.
+Additionally, because BHs are more massive on average than the surrounding objects, they are expected
+to segregate inwards in the GN (e.g., Shapiro &
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+Miralda-Escudé & Gould 2000; Baumgardt et al. 2004).
 They sink toward the SMBH on the mass segregation
 timescale, tseg ≈ hM∗ i/mBH × trelax (e.g., Spitzer 1987;
 Fregeau et al. 2002; Merritt 2006), which is typically an
@@ -501,8 +574,8 @@ of zero andpa standard deviation of ∆vrlx / 3, where
 ∆vrlx = v• P• /trlx (see Bradnick et al. 2017, for an
 approach to changes in the angular momentum). The
 new orbital parameters can be calculated following Lu
-& Naoz (2019), and see Naoz et al. in prep for full set
-of equations.
+& Naoz (2019), and see Naoz et al. (2022) for the full
+set of equations.
 We account for the effects of relaxation processes,
 including mass-segregation, using a multi-faceted approach. We begin by migrating each BH towards the
 center over its mass-segregation timescale, shifting it incrementally inward such that its orbital energy changes
@@ -514,11 +587,13 @@ scattering for both black holes and stars. Within this radius, BH self-interacti
 BHs will then settle onto a Bahcall-Wolf profile, while
 the stars may follow a shallower profile, with approximately n? ∝ r−1.5 , inwards of the transition radius
 (Linial & Sari in prep.).
-
 Therefore, after the initial mass segregation, we allow
 the BHs to begin diffusing over a relaxation timescale,
 their orbital parameters changing slowly through a random process. In this random process, some of the BHs
 may migrate closer to the SMBH. We terminate mass
+
+7
+
 growth when the BH enters the inner 200 au of the GN,
 within which the density of stars is uncertain. This cutoff is based on the 120 au pericenter of S0-2, the closest
 known star to the SMBH (e.g., Ghez et al. 2005).
@@ -535,27 +610,12 @@ between the BHs. As mentioned above, as the BHs sink
 towards the SMBH, their concentration in the inner region of the GN increases, allowing them to dominate the
 scattering. We reserve the inclusion of these interactions
 for future study.
-2.9. Effect of Relaxation Processes
+2.10. Effect of Relaxation Processes
 As depicted in Figure 4, two-body relaxation processes
 result in more EMRIs and IMRIs events. These processes allow BHs that begin further from the SMBH
 to migrate inwards and grow more efficiently in mass.
 However, it also impedes the growth of BHs that are
-initially closer to the SMBH by allowing them to dif-
-
-IMBH Formation in Galactic Nuclei
-
-7
-
-Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red)
-for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We
-assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward
-migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more
-BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two
-different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes.
-The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded
-stars represent BHs that merged with the SMBH.
-
-fuse out of the inner region where collisions are efficient.
+initially closer to the SMBH by allowing them to diffuse out of the inner region where collisions are efficient.
 As can be seen in Figure 4, the net result is that more
 BHs grow, but the maximum mass is lower compared
 to the scenario that ignores two-body relaxation. The
@@ -573,15 +633,29 @@ We explore the feasibility of forming IMBHs in a
 GN through successive collisions between a stellar-mass
 BH and main-sequence stars. Taking both a statistical and analytic approach, we show that this channel
 can produce IMBHs efficiently with masses as high as
-103−4 M and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspiral, IMRIs) and EMRIs.
+103−4 M and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspirals, or IMRIs) and
+EMRIs.
+
+8
+
+Rose et al.
+
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction.
+We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
+
 As the stellar mass BH collides with a star, the BH
 will grow in mass. The increase may equal star’s entire mass if the relative velocity is smaller than the escape velocity from the BH at 1 R . However, near the
 SMBH, the velocity dispersion may be larger than the
 escape velocity from the BH at the star’s radius. In this
-limit, the BH accretes a “tunnel” of material through
+limit, the BH captures a “tunnel” of material through
 the star, estimated using Bondi-Hoyle-Lyttleton accretion. In our statistical analysis, we account for BondiHoyle-Lyttleton accretion and find that BHs outside of
-
-10−2 pc from the SMBH can accrete the entire star (see
+10−2 pc from the SMBH can capture the entire star (see
 Figure 2).
 The efficiency of collisions, and therefore IMBH,
 EMRI, and IMRI formation as well, are sensitive to
@@ -594,6 +668,24 @@ profile by allowing BHs to diffuse into regions of more
 or less efficient growth. As a result, more BHs grow in
 mass, but their maximum mass is smaller (∼ 104 M ).
 Additionally, the final masses have no apparent dependence on distance from the SMBH (see Figure 4).
+Most simulations in our study assume that the BHs
+accrete all of the mass that they capture. The final BH
+masses can be taken as an upper limit. We note that
+the accretion is a highly uncertain process and represents an active field of study (e.g., Blandford & Begelman 1999; Park & Ostriker 2001; Narayan et al. 2003;
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+Narayan et al. 2022). To assess the limits of our model,
+
+we also consider a physically motivated accretion model,
+momentum-driven winds (Section 2.5). We present the
+final mass distribution for momentum-driven winds in
+Figure 4. Importantly, we find that BHs within the
+mass gap still form naturally despite the substantially
+reduced accretion. About 5% of the BHs grow by 10
+to 100 M . Furthermore, if we increase this ∆M estimate by a factor of 2 (i.e., use η = 0.05), the simulation produces a 3.5 × 103 M IMBH for the same initial
+conditions. Our proof-of-concept demonstrates that collisions between BH and stars are an important process
+that should be taken into account in dense places such
+as a GN.
 Mass growth through BH-main-sequence star collisions may act in concert with other IMBH formation
 channels, such as compact object binary mergers (e.g.,
 Hoang et al. 2018; Stephan et al. 2019; Fragione et al.
@@ -607,18 +699,78 @@ as highlighted in previous studies, a substantial fraction of these binaries may
 Kozai Lidov mechanism, leaving behind a single star or
 a single compact object (e.g., Stephan et al. 2016, 2019;
 Hoang et al. 2018). Additionally, to be susceptible to
-evaporation, BH binaries must have a wider configuration. Otherwise, they will be more tightly bound that
+evaporation, BH binaries must have a wider configuration. Otherwise, they will be more tightly bound than
+the average kinetic energy of the surrounding objects
+and will only harden through weak gravitational inter-
 
-8
+IMBH Formation in Galactic Nuclei
+actions with neighboring stars (see for example Figure
+6 in Rose et al. 2020).
+We note that we assume a steady-state and treat the
+stars as a reservoir in this model. Future work will take a
+more nuanced approach to the background stars, whose
+density as a function of time can be influenced by several
+factors. Firstly, the relaxation of the stellar population
+occurs on Gyr timescales. Some studies have suggested
+that in situ star formation can occur in the Galactic
+Center as close as 0.04 pc from the SMBH (e.g., Levin
+& Beloborodov 2003; Paumard et al. 2006), and star
+formation episodes can occur as often as every ∼ 5 Myr
+(e.g. Lu et al. 2009). Therefore, we expect that after
+the first Gyr, stars within . 0.01 pc will be replenished
+at intervals consistent with the star formation episodes;
+the infalling populations of stars are separated by ∼
+5 −10 Myr, which is shorter than the collision timescale.
+However, star-star collisions may complicate this picture within ∼ 0.01 pc. As discussed above, regular star
+formation ensures the BHs always have a stellar population to interact with outside of ∼ 0.01 pc.5 At 0.01 pc,
+however, the kinetic energy during a collision between
+two 1 M stars is larger than their binding energies.
+Collisions can therefore thin out the stellar populations
+during the time it takes them to diffuse to these small
+radii, . 0.01 pc, and may reduce the BH growth in the
+innermost region. We reserve the inclusion of star-star
+collisions for future work. We also note that the disruption of binary stars by the SMBH may help replenish
+the stellar population even as collisions work to deplete
+it (e.g., Balberg et al. 2013); when a binary is disrupted,
+one of the stars is captured on a tightly bound orbit
+about the SMBH.
+An IMBH may also affect the stellar density profile.
+As it spirals into the SMBH, it can perturb stellar orbits,
+and these interactions can lead to hypervelocity stars
+(e.g., Baumgardt et al. 2006a; Löckmann & Baumgardt
+2008). Löckmann & Baumgardt (2008) show that an
+IMBH can modify an initially steep stellar density profile to become consistent with the flatter cusp observed
+in the Galactic Center. The stars may then be replenished on 100 Myr timescales (Baumgardt et al. 2006a).
+Therefore, after the formation of the first few IMBHs,
+subsequent BH growth may occur in bursts, coinciding
+with replenishment of the stars.
+While there are many competing dynamical processes
+that shape the stellar density profile, we stress that α
+5 In fact, the star-star collision timescale is greater than 10 Myr
+
+for the entire parameter space, save at 0.001 pc for larger values
+of α; the BH-star collision timescale plotted in Fig. 1 is the same
+order of magnitude as the star-star collision timescale.
 
-Rose et al.
+9
 
-the average kinetic energy of the surrounding objects,
-and will only harden through weak gravitational interactions with neighboring stars (see for example Figure
-6 in Rose et al. 2020).
-Not included in this study, collisions between the BH
-and other compact objects will increase the BH growth
-rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fragione et al. 2021) and even neutron star BH mergers
+can simply be chosen to encapsulate all of the relevant
+physics. A value for α that is constrained by observations must already reflect ongoing processes like starstar collisions and replenishment. Schödel et al. (2018)
+find the observed stellar mass enclosed within 0.01 pc of
+the Milky Way’s Galactic Center to be approximately
+180 M . This estimate is consistent to order of magnitude with our α = 1.25 case. In a simulation like those
+depicted in Figure 4, which include relaxation, α = 1.25
+leads to a maximum IMBH mass of 140 M . Furthermore, while the stellar mass within 0.01 pc may be a
+few hundred M , Do et al. (2019) and GRAVITY Collaboration et al. (2020) set an upper limit on the mass
+enclosed within the orbit of S0-2 to be about a few thousand M , or 0.1% of the central mass. This upper limit
+can include mass that was previously in stars but is now
+in BHs. In that case, the 180 M is what remains of the
+stars, while BHs and IMBHs make up the ∼ 1000 M
+in the innermost region.
+Also not included in this study, collisions between the
+BH and other compact objects will increase the BH
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+Fragione et al. 2021) and even neutron star BH mergers
 (e.g., Hoang et al. 2020) become more likely as the BHs
 increase in mass through stellar collisions. As a result,
 the BH-BH collision timescale, discussed in Section 2.2,
@@ -626,38 +778,48 @@ will become relevant to our simulations, allowing the
 BHs to grow through this channel in addition to stellar collisions. Additionally, this compact object mergers
 result in GW recoil, which may have a large impact on
 the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
-2021)
+2021).
 The BH’s mass growth increases GW emission, which
-dissipates energy from the orbit. Along with relaxation
-processes, GW emission causes BHs to sink towards the
-SMBH and eventually undergo a merger. As a result,
-the GN environment is conducive to the formation of
-EMRIs and IMRIs. The GW emission from EMRIs and
-IMRIs is expected to be at mHz frequencies, making
-them promising candidates for LISA to observe. While
-the exact rate calculation is beyond the scope of this
-study, the mechanism outlined here seems very promising.
-
-Our results also suggest that IMBHs are likely to exists in many galactic nuclei, as well as within our own
-galactic center. This implication seems to be consistent with recent observational and theoretical studies
-(e.g., Hansen & Milosavljević 2003; Maillard et al. 2004;
-Gürkan & Rasio 2005; Gualandris & Merritt 2009; Chen
-& Liu 2013; Generozov & Madigan 2020; Fragione et al.
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-Collaboration et al. 2020).
+dissipates energy from the orbit. Along with relaxation,
+GW emission causes BHs to sink towards the SMBH
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
+and IMRIs. The GW emission from EMRIs and IMRIs is expected to be at mHz frequencies, making them
+promising candidates for LISA to observe. While the
+exact rate calculation is beyond the scope of this study,
+the mechanism outlined here seems very promising.
+Our results also suggest that BHs within the mass gap
+as well as IMBHs likely exist in many galactic nuclei, as
+well as within our own galactic center. This implication
+seems to be consistent with recent observational and
+theoretical studies (e.g., Hansen & Milosavljević 2003;
+Maillard et al. 2004; Gürkan & Rasio 2005; Gualandris
+& Merritt 2009; Chen & Liu 2013; Generozov & Madigan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz
+et al. 2020; GRAVITY Collaboration et al. 2020).
+
+10
+
+Rose et al.
+
 Lastly, the collisions between stellar mass BHs and
 stars may contribute to the x-ray emission from our
-galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al.
-2018; Zhu et al. 2018; Cheng et al. 2018)5 . These interactions, in particular grazing collisions, may also result
-in tidal disruption events (e.g., Perets et al. 2016; Samsing et al. 2019; Kremer et al. 2021). Thus, the process
-outlined here may produce electromagnetic signatures
-in addition to GW mergers.
-SR thanks the Charles E Young fellowship, the Nina
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer et al. (2022) for a discussion of electromagnetic signatures from BH-star collisions)6 . These interactions,
+in particular grazing collisions, may also result in tidal
+disruption events (e.g., Baumgardt et al. 2006b; Perets
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer et al. 2021). Thus, the process outlined here may
+produce electromagnetic signatures in addition to GW
+mergers.
+We thank the anonymous referee for useful comments.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful discussion.
+SR thanks the Charles E. Young Fellowship, the Nina
 Byers Fellowship, and the Michael A. Jura Memorial
 Graduate Award for support. SR and SN acknowledge
 the partial support from NASA ATP 80NSSC20K0505.
 SN thanks Howard and Astrid Preston for their generous support. IL thanks support from the Adams Fellowship. SN and RS thank the Bhaumik Institute visitor
-program.
+program. This work was performed in part at the Aspen Center for Physics, which is supported by National
+Science Foundation grant PHY-1607611.
 
 REFERENCES
 Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
@@ -666,60 +828,72 @@ doi: 10.1103/PhysRevLett.116.241102
 —. 2017a, PhRvL, 118, 221101,
 doi: 10.1103/PhysRevLett.118.221101
 —. 2017b, PhRvL, 119, 141101,
+doi: 10.1103/PhysRevLett.119.141101
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
+doi: 10.3847/2041-8205/830/1/L1
+Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
+Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
+doi: 10.1088/0004-637X/780/2/148
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
+doi: 10.1007/s41114-018-0013-8
 
+Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
+2021, arXiv e-prints, arXiv:2109.12119.
+https://arxiv.org/abs/2109.12119
+Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
+doi: 10.1086/154711
 Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
 043002, doi: 10.1103/PhysRevD.102.043002
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+doi: 10.1093/mnrasl/slt071
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+2006a, MNRAS, 372, 174,
+doi: 10.1111/j.1365-2966.2006.10818.x
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+Makino, J. 2006b, MNRAS, 372, 467,
+doi: 10.1111/j.1365-2966.2006.10885.x
 Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
 613, 1143, doi: 10.1086/423299
-Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
-MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
 
-Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
+6 The connection between the observed X-ray sources at the Galac-
 
-Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
-890, 113, doi: 10.3847/1538-4357/ab6d77
+tic Center and tidal capture has been suggested by Generozov
+et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
+alternative channels.
 
-Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
+Begelman, M. C. 1979, MNRAS, 187, 237,
+doi: 10.1093/mnras/187.2.237
+—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3
 
+IMBH Formation in Galactic Nuclei
+—. 2012b, MNRAS, 420, 2912,
+doi: 10.1111/j.1365-2966.2011.20071.x
+Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
+MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
+Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
+890, 113, doi: 10.3847/1538-4357/ab6d77
 —. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77
-
-doi: 10.1103/PhysRevLett.119.141101
-
-2021, arXiv e-prints, arXiv:2109.12119.
-
 Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R.
 2009, New Journal of Physics, 11, 105016,
 doi: 10.1088/1367-2630/11/10/105016
-
-https://arxiv.org/abs/2109.12119
-
 Binney, J., & Tremaine, S. 1987, Galactic dynamics
-
-doi: 10.1088/0004-637X/780/2/148
-Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
-
-Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
-doi: 10.1086/154711
-
-5
-
-The connection between the observed X-ray sources at the Galactic Center and tidal capture has been suggested by Generozov
-et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
-alternative channels.
-
 —. 2008, Galactic Dynamics: Second Edition
 Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
 doi: 10.1086/342655
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433,
+doi: 10.1093/mnras/179.3.433
 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
 427, doi: 10.1086/500727
 Bondi, H. 1952, MNRAS, 112, 195,
 doi: 10.1093/mnras/112.2.195
-
-IMBH Formation in Galactic Nuclei
 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
 doi: 10.1093/mnras/104.5.273
 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
 2042, doi: 10.1093/mnras/stx1007
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+860, 14, doi: 10.3847/1538-4357/aac2c4
 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
 C. 2012, JCAP, 2012, 054,
 doi: 10.1088/1475-7516/2012/07/054
@@ -737,16 +911,38 @@ et al. 1996, Science, 272, 1286,
 doi: 10.1126/science.272.5266.1286
 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
 doi: 10.1086/156685
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
+doi: 10.1111/j.1365-2966.2005.09937.x
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
+2009, MNRAS, 393, 1016,
+doi: 10.1111/j.1365-2966.2008.14254.x
+
+11
+
 Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
 MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+C. N. 2021a, MNRAS, 505, 2186,
+doi: 10.1093/mnras/stab1428
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+T. C. N. 2021b, MNRAS, 503, 1051,
+doi: 10.1093/mnras/stab402
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+2005, ApJ, 620, 878, doi: 10.1086/427142
 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
 MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
 MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+doi: 10.1126/science.aav8137
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+562, L19, doi: 10.1086/338118
 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
 110, 221101, doi: 10.1103/PhysRevLett.110.221101
 Edgar, R. 2004, NewAR, 48, 843,
 doi: 10.1016/j.newar.2004.06.001
+Escala, A. 2021, ApJ, 908, 57,
+doi: 10.3847/1538-4357/abd93c
 Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
 Monthly Notices of the Royal Astronomical Society, 443,
 2410, doi: 10.1093/mnras/stu1280
@@ -766,9 +962,6 @@ Rasio, F. A. 2004, MNRAS, 352, 1,
 doi: 10.1111/j.1365-2966.2004.07914.x
 Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
 Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
-
-9
-
 Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
 649, 91, doi: 10.1086/506193
 Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
@@ -776,6 +969,11 @@ doi: 10.3847/1538-4357/ab94bc
 Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
 J. P. 2018, MNRAS, 478, 4030,
 doi: 10.1093/mnras/sty1262
+
+12
+
+Rose et al.
+
 Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
 Modern Physics, 82, 3121,
 doi: 10.1103/RevModPhys.82.3121
@@ -807,27 +1005,42 @@ Dosopoulou, F. 2018, ApJ, 856, 140,
 doi: 10.3847/1538-4357/aaafce
 Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
 doi: 10.3847/1538-4357/abb66a
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
+doi: 10.1086/431475
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
+doi: 10.1086/529025
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
+2003, ApJ, 592, 1042, doi: 10.1086/375769
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
+106, doi: 10.1088/0004-637X/796/2/106
 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
 Royal Astronomical Society, 374, 1557,
 doi: 10.1111/j.1365-2966.2006.11275.x
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+https://arxiv.org/abs/2201.12368
 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
 doi: 10.3847/1538-4357/abeb14
 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
 45, doi: 10.3847/1538-4357/abb945
+
+Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
+doi: 10.1086/376675
 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
 doi: 10.3847/1538-4365/aacb24
 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
+Löckmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
+doi: 10.1111/j.1365-2966.2007.12699.x
 Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
 doi: 10.1093/mnras/stz036
-
-10
-
-Rose et al.
-
 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
 690, 1463, doi: 10.1088/0004-637X/690/2/1463
 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
 doi: 10.1086/319848
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+doi: 10.1046/j.1365-8711.1999.02853.x
 Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda,
@@ -835,6 +1048,15 @@ M., & Artale, M. C. 2021a, arXiv e-prints,
 arXiv:2109.06222. https://arxiv.org/abs/2109.06222
 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b,
 MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+doi: 10.1111/j.1365-2966.2006.10256.x
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+doi: 10.1086/422244
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+Narayan, R. 2014, MNRAS, 441, 3177,
+doi: 10.1093/mnras/stu762
 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
 doi: 10.1088/0034-4885/69/9/R01
 Miralda-Escudé, J., & Gould, A. 2000, ApJ, 545, 847,
@@ -844,17 +1066,37 @@ Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL,
 622, L113, doi: 10.1086/429721
 Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009,
 ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+618, 569, doi: 10.1086/426067
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+L18, doi: 10.3847/2041-8213/ac574b
 Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
 doi: 10.1088/0004-637X/795/2/102
 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
 L35, doi: 10.3847/2041-8213/ab4fed
+
+IMBH Formation in Galactic Nuclei
 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
 888, L8, doi: 10.3847/2041-8213/ab5e3b
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+Curd, B. 2022, MNRAS, 511, 3795,
+doi: 10.1093/mnras/stac285
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005,
+ApJ, 628, 368, doi: 10.1086/430728
 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
 2127, doi: 10.1111/j.1365-2966.2009.14653.x
 O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
 & O’Shaughnessy, R. 2006, ApJ, 637, 937,
 doi: 10.1086/498446
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+D. 2010, ApJ, 722, 642,
+doi: 10.1088/0004-637X/722/1/642
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+doi: 10.1086/319042
+Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643,
+1011, doi: 10.1086/503273
 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
 Stephen R., J. 2016, ApJ, 823, 113,
 doi: 10.3847/0004-637X/823/2/113
@@ -872,7 +1114,8 @@ Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
 doi: 10.1088/0004-637X/780/2/187
 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
 A56, doi: 10.1051/0004-6361/202037710
-
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+MNRAS, doi: 10.1093/mnras/stac231
 Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
 Rasio, F. A. 2018, PhRvL, 120, 151101,
 doi: 10.1103/PhysRevLett.120.151101
@@ -883,9 +1126,14 @@ Phys. Rev. D, 100, 043027,
 doi: 10.1103/PhysRevD.100.043027
 Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
 113, doi: 10.3847/1538-4357/abc557
+
+13
+
 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
 & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
 https://arxiv.org/abs/2009.01213
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
 100, 043009, doi: 10.1103/PhysRevD.100.043009
 Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
@@ -895,6 +1143,8 @@ Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K.
 doi: 10.1086/339917
 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
 doi: 10.1086/519309
+Schödel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
+609, A27, doi: 10.1051/0004-6361/201730452
 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
 doi: 10.1086/156521
 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
@@ -910,6 +1160,10 @@ Spitzer, L. 1987, Dynamical evolution of globular clusters
 Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
 e-prints. https://arxiv.org/abs/1603.02709
 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
+Stone, N. C., Küpper, A. H. W., & Ostriker, J. P. 2017,
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+doi: 10.1093/mnras/stv2281
 The LIGO Scientific Collaboration, the Virgo
 Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
 arXiv:2009.01075. https://arxiv.org/abs/2009.01075
@@ -924,15 +1178,20 @@ Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
 G. N. 2021, MNRAS, 504, 146,
 doi: 10.1093/mnras/stab842
 
-IMBH Formation in Galactic Nuclei
+14
 Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
 Breivik, K. 2021, ApJ, 917, 76,
 doi: 10.3847/1538-4357/ac088d
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+doi: 10.1086/379767
 Woosley, S. E. 2017, ApJ, 836, 244,
 doi: 10.3847/1538-4357/836/2/244
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+doi: 10.1046/j.1365-8711.2002.05532.x
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+doi: 10.1088/0004-637X/761/2/129
 
-11
-
+Rose et al.
 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
 2014, Monthly Notices of the Royal Astronomical
 Society, 440, 1263, doi: 10.1093/mnras/stu351
diff --git a/read/results/pdftotext/2201.00037.txt b/read/results/pdftotext/2201.00037.txt
index ddf92ba..3c351c1 100644
--- a/read/results/pdftotext/2201.00037.txt
+++ b/read/results/pdftotext/2201.00037.txt
@@ -2,28 +2,19 @@ Confidential manuscript submitted to JGR-Planets
 
 The influence of a fluid core and a solid inner core on the
 Cassini sate of Mercury
-Mathieu Dumberry
+Mathieu Dumberry 1
 
 arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 2021
 
-1 Department
-
-1
-
-of Physics, University of Alberta, Edmonton, Alberta, Canada.
+1 Department of Physics, University of Alberta, Edmonton, Alberta, Canada.
 
 Key Points:
-•
-
-•
-
-•
-
 The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid
 planet by no more than 0.01 arcmin.
-For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
+• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
 cores into a common precession motion.
-The larger the inner core is, the more the obliquity of the polar moment of inertia approaches that expected for a rigid planet.
+• The larger the inner core is, the more the obliquity of the polar moment of inertia approaches that expected for a rigid planet.
+•
 
 Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca
 
@@ -281,7 +272,6 @@ Stark et al. [2015b]
 Baland et al. [2017]
 
 Table 1.
-3
 
 Baland et al. [2017]
 Baland et al. [2017]
@@ -294,15 +284,12 @@ Perry et al. [2015]
 
 Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109
 
-2
-
-m /s taken from Genova et al. [2019]. The mean density is calculated from
-
-4π
-ρ̄R3
+ρ̄R3 = M . The numerical
+m /s taken from Genova et al. [2019]. The mean density is calculated from 4π
+3
 3
 
-= M . The numerical
+2
 
 values of r and ξr are calculated from r = (ā − c)/R and ξr = (a − b)/R, where ā = 12 (a + b) and where
 a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor
@@ -350,17 +337,17 @@ ef =
 Cf − Āf
 Āf
 
-B−A
-Ā
-
-γs =
-
 es =
 
 Cs − Ās
 ,
 Ās
 
+B−A
+Ā
+
+γs =
+
 B s − As
 .
 Ās
@@ -626,7 +613,8 @@ Ā
 Ā
 Ā
 iΩo Ā
-ω m̃ + (1 + ω + ef ) m̃f − ωα1 es
+
+(12a)
 
 
 1 
@@ -636,6 +624,10 @@ ñs = 2
 Āf
 iΩo Āf
 
+(12b)
+
+ω m̃ + (1 + ω + ef ) m̃f − ωα1 es
+
 (ω − α3 es )m̃ + α1 es m̃f + (1 + ω) m̃s + (1 + ω − α2 ) es ñs =
 
 1
@@ -647,10 +639,6 @@ iΩ2o Ās
 
 Γ̃ssun + Γ̃icb ,
 
-(12a)
-
-(12b)
-
 (12c)
 
 and a fourth equation consists of a kinematic relation that expresses the change in the orientation of the inner core figure as a result of its own rotation,
@@ -732,18 +720,14 @@ and where G210 and G201 are functions of the orbital eccentricity ec ,
 (1 − e2c )3/2
 7
 123 3 489 5
-= ec −
+G201 = ec −
 e +
 e .
 2
 16 c 128 c
-
 G210 =
 
 (16a)
-
-G201
-
 (16b)
 
 The gravitational torque by the Sun acting on the inner core alone, Γ̃ssun , is
@@ -755,13 +739,11 @@ The gravitational torque by the Sun acting on the inner core alone, Γ̃ssun , i
 CMB and on the inner core at the ICB, respectively. These torques can be parameterized in
 terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angular velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002],
 Γ̃icb = iΩ2o Ās Kicb (m̃f − m̃s ) ,
-Γ̃cmb =
 
-iΩ2o Āf Kcmb
+(18a)
 
-m̃f .
+Γ̃cmb = iΩ2o Āf Kcmb m̃f .
 
-(18a)
 (18b)
 
 Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the
@@ -827,20 +809,21 @@ M = ω − α3 es
 Ā
 
 (1 + ω) Āf
-1 + ω + ef + Kcmb +
-α1 es − Kicb
-0
-0
-
 Ās
 Kicb
-Āf
+1 + ω + ef + Kcmb + Ā
+f
 
 (1 + ω) ĀĀs
 Ās
 − Ā
 Kicb
 f
+
+α1 es − Kicb
+0
+0
+
 1 + ω + Kicb
 1
 0
@@ -1301,16 +1284,13 @@ are given by
 
 
 
-ωf cn
-ωf icn
-
 
 
 Ā
 e f φm
 e f + φm + Ω o
 ,
-≈ −Ωo
+ωf cn ≈ −Ωo
 (ef + φm )
 Ām + Ās
 
@@ -1318,7 +1298,7 @@ Ām + Ās
 
 Ā + Ās
 es α1 − es α3 αg − α3 φs .
-≈ Ωo
+ωf icn ≈ Ωo
 Ā − Ās
 
 (38a)
@@ -1586,6 +1566,8 @@ been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester , 1976]
 these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based
 on them are given in Mathews and Guo [2005],
 
+πρf rf4
+
 r
 
 
@@ -1597,15 +1579,12 @@ Āf
 4r
 πρf rs
 ν 
-=
+Kicb =
 0.195 − 1.976i ,
 2Ωo
 Ās
 
 Kcmb =
-Kicb
-
-πρf rf4
 
 (44a)
 (44b)
diff --git a/read/results/pdftotext/2201.00069.txt b/read/results/pdftotext/2201.00069.txt
index 773e5c6..8f003d7 100644
--- a/read/results/pdftotext/2201.00069.txt
+++ b/read/results/pdftotext/2201.00069.txt
@@ -126,9 +126,8 @@ angular scales of a few arcseconds, but resolved out to scales of
 2021).
 Localisations of four one-off FRBs through imaging of
 
-★
+★ james.chibueze@nwu.ac.za
 
-james.chibueze@nwu.ac.za
 † manisha.caleb@manchester.ac.uk
 1 https://www.wis-tns.org/
 
@@ -349,15 +348,16 @@ CT1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter
 https://github.com/e-merlin/eMERLIN_CASA_pipeline
 
 1
+√︃
+𝜂𝑐 𝑛
+
 SEFD
 .
-√︃
-𝜂 𝑐 𝑛 × 𝑁 (𝑁 − 1) × Δ𝜈 × 𝑡
-pol
-int
 
 (1)
 
+pol × 𝑁 (𝑁 − 1) × Δ𝜈 × 𝑡 int
+
 The system equivalent flux density (SEFD) of MeerKAT at the
 1.28 GHz is 443 Jy and 𝜂 𝑐 is the correlator efficiency. We used 𝑛pol
 = 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 =
@@ -592,6 +592,8 @@ limits while the magenta region indicates the background region used. The green
 et al. (2019).
 
 Table 1. Details of the FRB fields observed with MeerKAT.
+Field name
+
 Observation date
 
 Synthesized beam
@@ -644,13 +646,8 @@ FRB 20190714A
 54.4 𝜇Jy beam−1
 52.0 𝜇Jy beam−1
 
-Field name
-
 Table 2. Details of the radio continuum source associated with FRB 20190714A.
 Field name
-FRB 20190714A
-FRB 20190714A
-FRB 20190714A
 
 Observation date
 
@@ -668,6 +665,10 @@ Pos. angle
 
 Int. flux density
 
+FRB 20190714A
+FRB 20190714A
+FRB 20190714A
+
 28 September 2019
 18 October 2019
 13 January 2021
@@ -978,9 +979,8 @@ Zhang B., 2018, ApJ, 854, L21
 de Naurois M., Rolland L., 2009, Astroparticle Physics, 32, 231
 
 APPENDIX A: AUTHOR AFFILIATIONS
-1 Centre
+1 Centre for Space Research, North-West University, Potchefstroom
 
-for Space Research, North-West University, Potchefstroom
 2531, South Africa
 2 Department of Physics and Astronomy, Faculty of Physical Sciences, University of Nigeria, Carver Building, 1 University Road,
 Nsukka 410001, Nigeria
@@ -1027,9 +1027,8 @@ Avenue, Braamfontein, Johannesburg, 2050 South Africa
 MNRAS 000, 1–15 (2021)
 
 MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs
-25 Sorbonne
+25 Sorbonne Université, Université Paris Diderot, Sorbonne Paris
 
-Université, Université Paris Diderot, Sorbonne Paris
 Cité, CNRS/IN2P3, Laboratoire de Physique Nucléaire et de Hautes
 Energies,
 LPNHE, 4 Place Jussieu, F-75252 Paris, France
diff --git a/read/results/pdftotext/2201.00151.txt b/read/results/pdftotext/2201.00151.txt
index 23b6b9c..bd76df4 100644
--- a/read/results/pdftotext/2201.00151.txt
+++ b/read/results/pdftotext/2201.00151.txt
@@ -272,6 +272,10 @@ intermediate
 
 80
 
+log(Σ) [M⊙/kpc2]
+
+major
+
 6.5
 
 0
@@ -280,7 +284,6 @@ intermediate
 -40
 
 5.3
-160
 
 40
 
@@ -298,24 +301,14 @@ intermediate
 
 0
 
-V [km/s]
+-40
 
 -80
 
-[kpc]
-
-160
-
-V [km/s]
+-40
 
 -80
 
-[kpc]
-
-log(Σ) [M⊙/kpc2]
-
-major
-
 -80
 
 -160
@@ -352,7 +345,6 @@ major
 
 -80
 
--80
 -80
 
 -40
@@ -387,19 +379,29 @@ major
 
 σ [km/s]
 
--80
+[kpc]
+
+σ [km/s]
 
 [kpc]
 
--40
+-80
 
-σ [km/s]
+V [km/s]
+
+160
+
+[kpc]
 
 -80
 
+V [km/s]
+
+160
+
 [kpc]
 
--40
+-80
 
 -40
 
@@ -890,26 +892,25 @@ aim was to recover the profiles of the total mass and the velocity
 anisotropy.
 
 where
-ρ0 =
+3.1. Overview of the method
 
 I0
 πRc [1 + (Rt /Rc )2 ]3/2
 
-3.1. Overview of the method
-
 (3)
 
+s
+
+(4)
+
+ρ0 =
 and
 z=
 
-s
-
 r2 + R2c
 .
 R2c + R2t
 
-(4)
-
 We follow the approach introduced in Kowalczyk et al. (2018),
 namely we model the total mass profile with the mass-to-light
 ratio Υ varying with radius:
diff --git a/read/results/pdftotext/2201.00178.txt b/read/results/pdftotext/2201.00178.txt
index 794e2ab..c4a1ed3 100644
--- a/read/results/pdftotext/2201.00178.txt
+++ b/read/results/pdftotext/2201.00178.txt
@@ -10,11 +10,8 @@ Prasad Mani
 
 1, 2
 
-1 Department
-2 Center
-
-of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India
-for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE
+1 Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India
+2 Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE
 
 arXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022
 
@@ -149,14 +146,11 @@ and toroidal flow sensitivity kernels respectively, that allow us to relate the
 and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and
 Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj (z).
 1.2. Least-squares of cross-correlation
-φω∗
-k
-
-φω
-k+q
+ω
+Even though φω∗
+k φk+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea-
 
-Even though
-isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the
+surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the
 ω
 dimension of the problem. A least-squares fit to the cross-correlation φω∗
 k φk+q (see Woodard 2006, 2014, 2016) results
@@ -174,14 +168,10 @@ Bk,q =
 ω
 
 ω∗
-Hkk
-0 nn0
+Multiplying eq 4 on both sides by Hkk
+0 nn0 and substituting by eq 5 on the left-hand-side results in a concisely defined
 
-Multiplying eq 4 on both sides by
 forward problem (compare with eq 4)
-
-and substituting by eq 5 on the left-hand-side results in a concisely defined
-
 Bk,q =
 
 X
@@ -915,10 +905,8 @@ Nk =
 Q
 k
 
-where the
-
 1
-Q
+where the Q
 
 Q
 P
@@ -1050,12 +1038,8 @@ Figure 8. Left: Kernel Kk,q (z) (eq B14) shown vs depth z for the three radial o
 (eq B17) using SOLA, for qR = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15).
 Integral of the averaging kernel over z is 0.89.
 
-Setting
-
-∂X
-∂α
-
-→ 0 gives us the matrix problem to be solved
+Setting ∂X
+∂α → 0 gives us the matrix problem to be solved
 A{α} = v,
 h
 i−1
diff --git a/read/results/pdftotext/2201.00200.txt b/read/results/pdftotext/2201.00200.txt
index 9c1ccdf..252161b 100644
--- a/read/results/pdftotext/2201.00200.txt
+++ b/read/results/pdftotext/2201.00200.txt
@@ -7,14 +7,14 @@ Local heating due to convective overshooting and the solar
 modelling problem
 I. Baraffe1,2 , T. Constantino1 , J. Clarke1 , A. Le Saux1,2 , T. Goffrey4 , T. Guillet1 , J. Pratt3 , D. G. Vlaykov1
 1
-2
-3
-4
 
 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk)
 École Normale Supérieure, Lyon, CRAL (UMR CNRS 5574), Université de Lyon, France
+3
 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA
+4
 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK
+2
 
 arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022
 
diff --git a/read/results/pdftotext/2201.00214.txt b/read/results/pdftotext/2201.00214.txt
index 798aa9e..a60c98d 100644
--- a/read/results/pdftotext/2201.00214.txt
+++ b/read/results/pdftotext/2201.00214.txt
@@ -175,9 +175,7 @@ to each point of the loop’s direction. Then by using these data, we straighten
 considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available
 empty area around each loop and the distance to the neighbor loop). The area around the
 loop is needed for calculations of background subtraction. The selected loop segment is cut in
-1 Based
-
-on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/
+1 Based on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/
 
 all wavelengths and at the same considered box from the images set. These loop images are
 necessary entrances for our thermal analysis process. Then the loop is divided into different
diff --git a/read/results/pdftotext/GeoTopo-book.txt b/read/results/pdftotext/GeoTopo-book.txt
index 6dfe023..94f64fb 100644
--- a/read/results/pdftotext/GeoTopo-book.txt
+++ b/read/results/pdftotext/GeoTopo-book.txt
@@ -60,205 +60,13 @@ Zahlentheorie“ gehört zu haben.
 
 Inhaltsverzeichnis
 1 Topologische Grundbegriffe
-1.1 Topologische Räume . .
-1.2 Metrische Räume . . . .
-1.3 Stetigkeit . . . . . . . .
-1.4 Zusammenhang . . . . .
-1.5 Kompaktheit . . . . . .
-1.6 Wege und Knoten . . . .
-Übungsaufgaben . . . . . . .
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
+1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 
 2
 2
@@ -270,125 +78,10 @@ Zahlentheorie“ gehört zu haben.
 22
 
 2 Mannigfaltigkeiten und Simplizialkomplexe
-2.1 Topologische Mannigfaltigkeiten . . . . .
-2.2 Differenzierbare Mannigfaltigkeiten . . .
-2.3 Simplizialkomplex . . . . . . . . . . . .
-Übungsaufgaben . . . . . . . . . . . . . . . .
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
+2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . .
+2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 
 24
 24
@@ -396,410 +89,37 @@ Zahlentheorie“ gehört zu haben.
 34
 43
 
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-.
-.
-
-3 Fundamentalgruppe und Überlagerungen
-3.1 Homotopie von Wegen . . . . . . . . .
-3.2 Fundamentalgruppe . . . . . . . . . .
-3.3 Überlagerungen . . . . . . . . . . . . .
-3.4 Gruppenoperationen . . . . . . . . . .
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-44
-44
-47
-51
-61
-
-4 Euklidische und nichteuklidische Geometrie
-4.1 Axiome für die euklidische Ebene . . . . . . . .
-4.2 Weitere Eigenschaften einer euklidischen Ebene
-4.2.1 Flächeninhalt . . . . . . . . . . . . . . .
-4.3 Hyperbolische Geometrie . . . . . . . . . . . . .
-Übungsaufgaben . . . . . . . . . . . . . . . . . . . .
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-.
-.
-.
-.
-.
-
-64
-64
-74
-74
-77
-86
-
-5 Krümmung
-5.1 Krümmung von Kurven . . . . . .
-5.2 Tangentialebene . . . . . . . . . . .
-5.3 Gauß-Krümmung . . . . . . . . . .
-5.4 Erste und zweite Fundamentalform
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
+3 Fundamentalgruppe und Überlagerungen
+3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 
-.
-.
-.
-.
+44
+44
+47
+51
+61
 
-.
-.
-.
-.
+4 Euklidische und nichteuklidische Geometrie
+4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . .
+4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . .
+4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 
-.
-.
-.
-.
+64
+64
+74
+74
+77
+86
 
-.
-.
-.
-.
+5 Krümmung
+5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . .
 
 87
 87
@@ -809,61 +129,6 @@ Zahlentheorie“ gehört zu haben.
 
 Lösungen der Übungsaufgaben
 
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
-.
-.
-.
-.
-
 99
 
 Bildquellen
@@ -1712,9 +977,7 @@ V xi
 
 Sm
 
-i=1 Uxi
-
-⊇ K.
+i=1 Uxi ⊇ K.
 
 18
 
@@ -2050,9 +1313,6 @@ n bzw. 2n, da gilt:
 S
 Sei Ui := { (x0 : · · · : xn ) ∈ P n (R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist P n (R) = ni=0 Ui
 und die Abbildung
-
-(y1 : · · · : yi−1
-
 Ui → Rn
 
 
@@ -2064,16 +1324,15 @@ x0
 xi
 xi
 xi
-: 1 : yi : · · · : yn ) →7 (y1 , . . . , yn )
-
+(y1 : · · · : yi−1 : 1 : yi : · · · : yn ) →7 (y1 , . . . , yn )
 ist bijektiv.
 Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas:
 x = (1 : 0 : 0) ∈ U0 → R2
 
-x 7→ (0, 0)
-
 y = (0 : 1 : 1) ∈ U2 → R2
 
+x 7→ (0, 0)
+
 y 7→ (0, 1)
 
 Umgebung: B1 (0, 1) → 
@@ -2089,26 +1348,22 @@ V1 ∩ V2 = ∅?
 Karten:
 Di := {(x1 , . . . , xn+1 ) ∈ S n |xi > 0} → B1 (0, . . . , 0)
 | {z }
-S n |xi
 
 ∈Rn
-
-Ci := {(x1 , . . . , xn+1 ) ∈
-< 0} → B1 (0, . . . , 0)
-1
+n
+Ci := {(x1 , . . . , xn+1 ) ∈ S |xi < 0} → B1 (0, . . . , 0)
 (x1 , . . . , xn+1 ) 7→ (x1 , . . . , 
 x
-,
-.
-.
-i q. , xn+1 )
+. . . , xn+1 )1
+i , q
 q
-Pn
 P
-(x1 , . . . , xn ) 7→ (x1 , . . . , xi−1 , 1 − k=1 x2k , xi , . . . , xn ), oder − 1 − nk=1 x2k für Ci
+P
+(x1 , . . . , xn ) 7→ (x1 , . . . , xi−1 , 1 − nk=1 x2k , xi , . . . , xn ), oder − 1 − nk=1 x2k für Ci
 S
 S n = n+1
 i=1 (Ci ∪ Di )
+
 Als kompakte Mannigfaltigkeit wird S n auch „geschlossene Mannigfaltigkeit“ genannt.
 5) [0, 1] ist keine Mannigfaltigkeit, denn:
 Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall
@@ -2558,11 +1813,9 @@ det
 
 ∂x 
 ∂v
-∂y (v0 )
+∂y (v0 ) 6= 0
 ∂v
 
-6= 0
-
 und Fj (u, v) = (x(u, v), y(u, v), z(u, v)).
 fj : Uj × R → R3 durch
 Definiere F
@@ -2572,11 +1825,10 @@ fj |U ×{ 0 } = Fj
 Offensichtlich: F
 j
  ∂x
-JFfj =
 
 ∂u
- ∂y
-∂u
+∂y
+JFfj =  ∂u
 ∂z
 ∂u
 
@@ -2632,9 +1884,7 @@ Beispiel 25 (Lie-Gruppen)
 
 Pn
 
-k=1 aik bkj
-
-ist nach allen Variablen differenzierbar
+k=1 aik bkj ist nach allen Variablen differenzierbar
 
 det(Aij )
 det A
@@ -2760,10 +2010,10 @@ c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension vo
 (a) 1D Simplizialkomplex (b) 2D Simplizialkomplex
 (ohne untere Fläche!)
 
-(d) 1D Simplizialkomplex
-
 (c) 2D Simplizialkomplex
 
+(d) 1D Simplizialkomplex
+
 (e) 2D Simplizialkomplex
 
 P
@@ -4612,13 +3862,9 @@ O. B. d. A. seien ϕ1 (R) und ϕ2 (R) in der selben Halbebene.
 Es gilt: d(P 0 , ϕ1 (R)) = d(ϕ1 (P ), ϕ1 (R))
 = d(P, R)
 = d(ϕ2 (P ), ϕ2 (R))
-und analog
-
 = d(P 0 , ϕ2 (R))
-= d(Q0 , ϕ2 (R))
-
-d(Q0 , ϕ1 (R))
-
+0
+und analog d(Q , ϕ1 (R)) = d(Q0 , ϕ2 (R))
 (Teil ii) Seien P , Q und R Fixpunkte von ϕ, R ∈
 / P Q und A ∈
 / P Q ∪ P R ∪ QR. Sei B ∈
@@ -4955,29 +4201,22 @@ hc
 
 ha
 
-c
-
 LC
 
+c
 A
 
 A
 
 B
 
-(a)
-
-1/2
-
 B
 
-· |AB| · |hc |
+(a) 1/2 · |AB| · |hc |
 
-(b)
+(b) 1/2 · |BC| · |h
 
-1/2
-
-· |BC| · |ha |
+a|
 
 Abbildung 4.15: Flächenberechnung im Dreieck
 Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
@@ -4998,18 +4237,7 @@ B
 Abbildung 4.16: 4ABLa und 4CLC B sind ähnlich, weil IWS = π
 Strahlensatz
 
-=======⇒
-
-a
-hc
-
-=
-
-c
-ha
-
-→ a · ha = c · hc
-
+=======⇒ hac = hca → a · ha = c · hc
 Satz 4.7 (Satz des Pythagoras)
 Im rechtwinkligen Dreieck gilt a2 + b2 = c2 , wobei c die Hypotenuse und a, b die beiden
 Katheten sind.
@@ -5429,13 +4657,7 @@ SL2 (R) ac(x + y ) + adx + bcx + bd
 
 ⇒ σ(z) =
 
-⇒ =(σ(z)) =
-
-y
-(cx+d)2 +(cy)2
-
->0
-
+⇒ =(σ(z)) = (cx+d)y2 +(cy)2 > 0
 Die Abbildung bildet also nach H ab. Außerdem gilt:
 
 
@@ -5468,7 +4690,6 @@ c0 d0
 c d
 c0 z + d0
 =
-=
 
 0
 
@@ -5484,14 +4705,16 @@ a ac0 z+d
 z+b
 c ac0 z+d
 0 + d
+
 a(a0 z+b0 )+b(c0 z+d0 )
+0 z+d0
+= c(a0 z+bc0 )+d(c
+0 z+d0 )
 c0 z+d0
-c(a0 z+b0 )+d(c0 z+d0 )
-c0 z+d0
-0
-a(a z + b0 ) + b(c0 z
+a(a0 z + b0 ) + b(c0 z + d0 )
+
+=
 
-+ d0 )
 c(a0 z + b0 ) + d(c0 z + d0 )
 (aa0 + bc0 )z + ab0 + bd0
 =
@@ -5511,8 +4734,6 @@ a b
 ◦z
 c d
 c d0
-=
-
 b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2 (R) und z ∈ H.
 
 
@@ -5546,12 +4767,12 @@ A−1
 
 λ
 
-Bt−1
+Bt−1 = B−t
 −1
+3
 C
 
-= B−t
-= C3
+=C
 
 Daher genügt es zu zeigen, dass man mit Aλ , Bt und C alle Matrizen aus SL2 (R)
 erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
@@ -5672,14 +4893,16 @@ m + ir
 
 0
 
-m
 0
 
 1
 
-2
+m
 
 λ2 m
+
+2
+
 3m + 14
 
 (a) Fall 1
@@ -5741,14 +4964,9 @@ y
 −1
 
 1
-z
-
-=
-
 1
-r
-
-· eiϕ
+iϕ
+z = r ·e
 
 0
 
@@ -5762,11 +4980,11 @@ Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1 ) = b1 und σ(a2 ) = b2 .
 Definition 65
 Seien z1 , z2 , z3 , z4 ∈ C paarweise verschieden.
 Dann heißt
-DV(z1 , z2 , z3 , z4 ) :=
 
 z1 −z4
-z1 −z2
-z3 −z4
+2
+DV(z1 , z2 , z3 , z4 ) := zz13 −z
+−z4
 z3 −z2
 
 =
@@ -5777,28 +4995,14 @@ z3 −z2
 Doppelverhältnis von z1 , . . . , z4 .
 Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
 a) DV(z1 , . . . , z4 ) ∈ C \ { 0, 1 }
-b) DV(z1 , z4 , z3 , z2 ) =
-
-1
-DV(z1 ,z2 ,z3 ,z4 )
-
-c) DV(z3 , z2 , z1 , z4 ) =
-
-1
-DV(z1 ,z2 ,z3 ,z4 )
-
+b) DV(z1 , z4 , z3 , z2 ) = DV(z1 ,z12 ,z3 ,z4 )
+c) DV(z3 , z2 , z1 , z4 ) = DV(z1 ,z12 ,z3 ,z4 )
 d) DV ist auch wohldefiniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind.
 e) DV(0, 1, ∞, z4 ) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen).
 f) Für σ ∈ PSL2 (C) und z1 , . . . , z4 ∈ C ∪ { ∞ } ist
 DV(σ(z1 ), σ(z2 ), σ(z3 ), σ(z4 )) = DV(z1 , z2 , z3 , z4 )
-und für σ(z) =
-
-1
-z
-
-gilt
+und für σ(z) = z1 gilt
 DV(σ(z1 ), σ(z2 ), σ(z3 ), σ(z4 )) = DV(z1 , z2 , z3 , z4 )
-
 g) DV(z1 , z2 , z3 , z4 ) ∈ R ∪ { ∞ } ⇔ z1 , . . . , z4 liegen auf einer hyperbolischen Geraden.
 Beweis:
 a) DV(z1 , . . . , z4 ) 6= 0, da zi paarweise verschieden
@@ -5819,27 +5023,17 @@ Annahme: DV(z1 , . . . , z4 ) = 1
 ⇔ z3 = z1 oder z2 = z4
 
 Alle zi sind paarweise verschieden ⇒ Widerspruch
-b) DV(z1 , z4 , z3 , z2 ) =
-
-(z1 −z2 )·(z3 −z4 )
-(z1 −z4 )·(z3 −z2 )
 
-=
+
 
 1
-DV(z1 ,z2 ,z3 ,z4 )
-
-c) DV(z3 , z2 , z1 , z4 ) =
-
+1 −z2 )·(z3 −z4 )
+b) DV(z1 , z4 , z3 , z2 ) = (z
+(z1 −z4 )·(z3 −z2 ) = DV(z1 ,z2 ,z3 ,z4 )
 (z3 −z4 )·(z1 −z2 )
-(z3 −z2 )·(z1 −z4 )
-
-=
-
-1
-DV(z1 ,z2 ,z3 ,z4 )
-
-
+c) DV(z3 , z2 , z1 , z4 ) = (z
+= DV(z1 ,z12 ,z3 ,z4 )
+3 −z2 )·(z1 −z4 )
 
 d) Zwei der zi dürfen gleich sein, da:
 Fall 1 z1 = z4 oder z3 = z2
@@ -5849,17 +5043,13 @@ Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1 , . . . , z4 ) =
 Fall 3 z1 = z3 oder z2 = z4
 Durch Einsetzen ergibt sich DV(z1 , . . . , z4 ) = 1.
 Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4 ) = 0 oder DV(0, 1, ∞, z4 ) ± ∞
-e) DV(0, 1, ∞, z4 ) =
-
+·(∞−1)
 (0−z4 )·(∞−1)
-(0−1)·(∞−z4 )
-
-=
-
-z4 ·(∞−1)
-∞−z4
-
+= z4∞−z
+e) DV(0, 1, ∞, z4 ) = (0−1)·(∞−z
 = z4
+4
+4)
 
 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
 g) Sei σ ∈ PSL2 (C) mit σ(z1 ) = 0, σ(z2 ) = 1, σ(z3 ) = ∞. Ein solches σ existiert, da man
@@ -6002,14 +5192,13 @@ Beweis:
 a) l(γ) =
 
 Rb
-a
 
-kγ 0 (t)kdt =
+0
+a kγ (t)kdt =
 
 Rb
-a
 
-1dt = b − a.
+a 1dt = b − a.
 
 b) Im Folgenden wird die Aussage nur für γ : [a, b] → R2 bewiesen. Allerdings funktioniert
 der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden.
@@ -6157,11 +5346,10 @@ r
 Definition 70
 Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve.
 a) Für t ∈ I heißt κ(t) := kγ 00 (t)k die Krümmung von γ in t.
-b) Ist für t ∈ I die Ableitung γ 00 (t) 6= 0, so heißt
-
-γ 00 (t)
-kγ 00 (t)k
+00
 
+(t)
+b) Ist für t ∈ I die Ableitung γ 00 (t) 6= 0, so heißt kγγ 00 (t)k
 Normalenvektor an γ in t.
 
 c) b(t) sei ein Vektor, der γ 0 (t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt.
@@ -6190,11 +5378,12 @@ s∈V:
 Für p = F −1 (s) ∈ U sei
 
  ∂x
-JF (p) =
 
 ∂u (p)
- ∂y (p)
-∂u
+∂y
+
+JF (p) = ∂u
+(p)
 ∂z
 ∂u (p)
 
@@ -6402,13 +5591,10 @@ Definition 74
 Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S.
 γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
 γ 00 (0) 6= 0.
-Sei n(0) :=
-
-γ 00 (0)
-kγ 00 (0)k .
-
-Zerlege
+00
 
+Sei n(0) := kγγ 00 (0)
+(0)k . Zerlege
 n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ Ts S und n(0)⊥ ∈ (Ts S)⊥
 Dann ist n(0)⊥ = hn(0), n(s)i · n(s)
 κNor (s, γ) := hγ 00 (0), n(s)i die Normalkrümmung.
@@ -6516,7 +5702,7 @@ b) { Dp F (e1 ), Dp F (e2 ) } ist eine Basis von Ts S.
 c) Bzgl. der Basis { Dp F (e1 ), Dp F (e2 ) } hat das Standardskalarprodukt aus Bemerkung 80.a die Darstellungsmatrix IS .
 d) gi,j (s) ist eine differenzierbare Funktion von s.
 Bemerkung 81
-
+2
 ∂F
 ∂F
 (p) ×
@@ -6550,8 +5736,6 @@ Dann ist ∂u
 1
 z3
 
-2
-
 det(IS ) =
 
 z1 = x2 y3 − x3 y2
@@ -6658,9 +5842,8 @@ ds n(x) =
 
 d
 n(s„+“tx)
-dt | {z }
+dt | {z } t=0
 
-t=0
 Soll auf Fläche S bleiben
 
 Die Abbildung ds n heißt Weingarten-Abbildung
@@ -6686,13 +5869,10 @@ c) Wegen Proposition 5.1 (a) ist ds n ein Homomorphismus.
 d) Zu zeigen: ∀x, y ∈ Is S : hx, ds n(y)i = hds n(x), yi
 Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
 Basisvektoren zu zeigen.
-Sei xi = Dp F (ei ) =
-
 ∂F
-∂ui (p)
-
-i = 1, 2
-
+Sei xi = Dp F (ei ) = ∂u
+(p) i = 1, 2
+i
 2
 
 F
@@ -6943,12 +6123,11 @@ nun für für n ∈ N≥2 , m ∈ N:
 1
 , . . . , 1)
 m
-∈ SLn (R), und Am ist unbeschränkt, da kAm k∞ =
+Dann gilt: det Am = 1, d. h. Am ∈ SLn (R), und Am ist unbeschränkt, da kAm k∞ =
+m −−−−→ ∞.
 
-
 Am = diagn (m,
-Dann gilt: det Am = 1, d. h. Am
-m −−−−→ ∞.
+
 m→∞
 
 (c) Beh.: P(R) ist kompakt.
@@ -7041,8 +6220,8 @@ Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈ X und 4ABC ein Dreieck.
 
 Lösungen der Übungsaufgaben
 
-∼ AC ⇒ ∠ABC ∼
-(a) Beh.: AB =
+(a) Beh.: AB ∼
+= AC ⇒ ∠ABC ∼
 = ∠ACB
 ∼
 Bew.: Sei AB = AC.
@@ -7230,30 +6409,21 @@ Symmetrische Gruppe
 
 Seien A, B und M Mengen.
 AC
-P(M )
-M
-∂M
-M◦
-A×B
-A⊆B
-A(B
-A\B
-A∪B
-A ∪˙ B
-A∩B
-
 Komplement von A
-Potenzmenge von M
+P(M ) Potenzmenge von M
+M
 Abschluss von M
+∂M
 Rand der Menge M
+M◦
 Inneres der Menge M
-Kreuzprodukt
-Teilmengenbeziehung
-echte Teilmengenbeziehung
-Differenzmenge
-Vereinigung
-Disjunkte Vereinigung
-Schnitt
+A × B Kreuzprodukt
+A ⊆ B Teilmengenbeziehung
+A ( B echte Teilmengenbeziehung
+A \ B Differenzmenge
+A ∪ B Vereinigung
+A ∪˙ B Disjunkte Vereinigung
+A ∩ B Schnitt
 
 Geometrie
 AB
@@ -7286,62 +6456,48 @@ PSLn (K) Projektive lineare Gruppe
 Wege
 Sei γ : I → X ein Weg.
 [γ]
-γ1 ∗ γ2
-γ1 ∼ γ2
-γ(x)
-C
-
 Homotopieklasse von γ
-Zusammenhängen von Wegen
-Homotopie von Wegen
+γ1 ∗ γ2 Zusammenhängen von Wegen
+γ1 ∼ γ2 Homotopie von Wegen
+γ(x)
 Inverser Weg, also γ(x) := γ(1 − x)
 Bild eines Weges γ, also C :=
+C
 γ([0, 1])
 
 Weiteres
 B
-Bδ (x)
+Basis einer Topologie
+Bδ (x) δ-Kugel um x
 S
+Subbasis einer Topologie
 T
+Topologie
 A
+Atlas
 P
-h·, ·i
-X/∼
-[x]∼
-kxk
-|x|
-hai
+Projektiver Raum
+h·, ·i Skalarprodukt
+X/∼ X modulo ∼
+[x]∼ Äquivalenzklassen von x bzgl. ∼
+kxk Norm von x
+|x| Betrag von x
+hai Erzeugnis von a
 Sn
 Tn
 
-Basis einer Topologie
-δ-Kugel um x
-Subbasis einer Topologie
-Topologie
-
-Atlas
-Projektiver Raum
-Skalarprodukt
-X modulo ∼
-Äquivalenzklassen von x bzgl. ∼
-Norm von x
-Betrag von x
-Erzeugnis von a
 Sphäre
 Torus
 
 f ◦g
-πX
-f |U f
-f −1 (M )
-Rg(M )
-χ(K)
-
 Verkettung von f und g
+πX
 Projektion auf X
+f |U f
 eingeschränkt auf U
-Urbild von M
-Rang von M
+f −1 (M ) Urbild von M
+Rg(M ) Rang von M
+χ(K)
 Euler-Charakteristik von K
 
 110
@@ -7381,22 +6537,21 @@ C = { a + ib | a, b ∈ R } Komplexe Zahlen
 P = { 2, 3, 5, 7, . . . } Primzahlen
 H = { z ∈ C | =z > 0 } obere Halbebene
 I = [0, 1] ( R Einheitsintervall
-f : S 1 ,→ R2
-π1 (X, x)
-Fix(f )
-k · k2
-κ
-κNor
-V (f )
 
 Rationale Zahlen
 
-Einbettung der Kreislinie in die Ebene
+f : S 1 ,→ R2 Einbettung der Kreislinie in die Ebene
+π1 (X, x)
 Fundamentalgruppe im topologischen Raum X um x ∈ X
+Fix(f )
 Menge der Fixpunkte der Abbildung f
+k · k2
 2-Norm; Euklidische Norm
+κ
 Krümmung
+κNor
 Normalenkrümmung
+V (f )
 Nullstellenmenge von f 2
 
 Krümmung
diff --git a/read/results/playa/1601.03642.txt b/read/results/playa/1601.03642.txt
new file mode 100644
index 0000000..0b87099
--- /dev/null
+++ b/read/results/playa/1601.03642.txt
@@ -0,0 +1,629 @@
+
+Creativity in Machine Learning
+Martin Thoma
+E-Mail: info@martin-thoma.de
+Abstract—Recent machine learning techniques can be modified
+to produce creative results. Those results did not exist before; it
+is not a trivial combination of the data which was fed into the
+machine learning system. The obtained results come in multiple
+forms: As images, as text and as audio.
+This paper gives a high level overview of how they are created
+and gives some examples. It is meant to be a summary of the
+current work and give people who are new to machine learning
+some starting points.
+ I. INTRODUCTION
+According to [Gad06] creativity is “the ability to use your
+imagination to produce new ideas, make things etc.” and
+imagination is “the ability to form pictures or ideas in your
+mind”.
+Recent advances in machine learning produce results which the
+author would intuitively call creative. A high-level overview
+over several of those algorithms are described in the following.
+This paper is structured as follows: Section II introduces the
+reader on a very simple and superficial level to machine
+learning, Section III gives examples of creativity with images,
+Section IV gives examples of machines producing textual
+content, and Section V gives examples of machine learning
+and music. A discussion follows in Section VI.
+II. BASICS OF MACHINE LEARNING
+The traditional approach of solving problems with software
+is to program machines to do so. The task is divided in as
+simple sub-tasks as possible, the subtasks are analyzed and the
+machine is instructed to process the input with human-designed
+algorithms to produce the desired output. However, for some
+tasks like object recognition this approach is not feasible. There
+are way to many different objects, different lighting situations,
+variations in rotation and the arrangement of a scene for a
+human to think of all of them and model them. But with the
+internet, cheap computers, cameras, crowd-sourcing platforms
+like Wikipedia and lots of Websites, services like Amazon
+Mechanical Turk and several other changes in the past decades
+a lot of data has become available. The idea of machine learning
+is to make use of this data.
+A formal definition of the field of Machine Learning is given
+by Tom Mitchel [Mit97]:
+A computer program is said to learn from experience
+ E with respect to some class of tasks T and
+performance measure P , if its performance at tasks
+in T , as measured by P , improves with experience E . Σ ϕx
+0
+x
+1
+x
+2
+x
+3
+x
+n w
+0
+w
+1
+w
+2
+w
+3
+wn.
+.
+.
+(a) Example of an artificial neuron unit.
+x
+i are the input signals and w
+i are
+weights which have to get learned.
+Each input signal gets multiplied
+with its weight, everything gets
+summed up and the activation function
+ ϕ is applied. (b) A visualization of a simple feedforward
+ neural network. The 5 input
+ nodes are red, the 2 bias nodes
+are gray, the 3 hidden units are
+green and the single output node
+is blue.
+Fig. 1: Neural networks are based on simple units which get
+combined to complex networks.
+This means that machine learning programs adjust internal
+parameters to fit the data they are given. Those computer
+programs are still developed by software developers, but the
+developer writes them in a way which makes it possible to
+adjust them without having to re-program everything. Machine
+learning programs should generally improve when they are fed
+with more data.
+The field of machine learning is related to statistics. Some
+algorithms directly try to find models which are based on wellknown
+ distribution assumptions of the developer, others are
+more general.
+A common misunderstanding of people who are not related
+in this field is that the developers don’t understand what their
+machine learning program is doing. It is understood very well
+in the sense that the developer, given only a pen, lots of paper
+and a calculator could calculate the same result as the machine
+does when he gets the same data. And lots of time, of course. It
+is not understood in the sense that it is hard to make predictions
+how the algorithm behaves without actually trying it. However,
+this is similar to expecting from an electrical engineer to
+explain how a computer works. The electrical engineer could
+probably get the knowledge he needs to do so, but the amount
+of time required to understand such a complex system from
+basic building blocks is a time-intensive and difficult task.
+An important group of machine learning algorithms was
+inspired by biological neurons and are thus called artificial
+neural networks. Those networks are based on mathematical
+functions called artificial neurons which take n ∈ N numbers
+ x
+1, . . . , x
+n ∈ R as input, multiply them with weights
+w
+1, . . . , w
+n ∈ R, add them and apply a so called activation
+function ϕ as visualized in Figure 1(a). One example of such
+an activation function is the sigmoid function ϕ(x) = 1
+1+e−x .
+Those functions act as building blocks for more complex
+systems as they can be chained and grouped in layers as
+visualized in Figure 1(b). The interesting question is how
+the parameters w
+i are learned. This is usually done by an
+optimization technique called gradient descent. The gradient
+descent algorithm takes a function which has to be derivable,
+starts at any point of the surface of this error function andarXiv:1601.03642v1  [cs.CV]  12 Jan 2016
+
+makes a step in the direction which goes downwards. Hence
+it tries to find a minimum of this high-dimensional function.
+There is, of course, a lot more to say about machine learning.
+The interested reader might want to read the introduction given
+by Mitchell [Mit97].
+ III. IMAGE DATA
+Applying a simple neural network on image data directly can
+work, but the number of parameters gets extraordinary large.
+One would take one neuron per pixel and channel. This means
+for 500 px × 500 px RGB images one would get 750,000 input
+signals. To approach this problem, so called Convolutional
+Neural Networks (CNNs) were introduced. Instead of learning
+the full connection between the input layer and the first
+hidden layer, those networks make use of convolution layers.
+Convolution layers learn a convolution; this means they learn
+the weights of an image filter. An additional advantage is that
+CNNs make use of spacial relationships of the pixels instead
+of flattening the image to a stream of single numbers.
+An excellent introduction into CNNs is given by [Nie15].
+A. Google DeepDream
+The gradient descent algorithm which optimizes most of the
+parameters in neural networks is well-understood. However, the
+effect it has on the recognition system is difficult to estimate.
+[MOT15] proposes a technique to analyze the weights learned
+by such a network. A similar idea was applied by [VKMT13].
+For example, consider a neural network which was trained to
+recognize various images like bananas. This technique turns
+the network upside down and starts with random noise. To
+analyze what the network considers bananas to look like, the
+random noise image is gradually tweaked so that it generates
+the output “banana”. Additionally, the changes can be restricted
+in a way that the statistics of the input image have to be similar
+to natural images. One example of this is that neighboring
+pixels are correlated.
+Another technique is to amplify the output of layers. This was
+described in [MOT15]:
+We ask the network: “Whatever you see there, I want
+more of it!” This creates a feedback loop: if a cloud
+looks a little bit like a bird, the network will make
+it look more like a bird. This in turn will make the
+network recognize the bird even more strongly on
+the next pass and so forth, until a highly detailed
+bird appears, seemingly out of nowhere.
+The name “Inceptionism” in the title of [MOT15] comes from
+the science-fiction movie “Inception” (2010). One reason it
+might be chosen is because neural networks are structured
+in layers. Recent publications tend to have more and more
+layers [HZRS15]. The used jargon is to say they get “deeper”.
+As this technique as published by Google engineers, the
+technique is called Google DeepDream. Fig. 2: Aurelia aurita
+Fig. 3: DeepDream impression of Aurelia aurita
+It has become famous in the internet [Red]. Usually, the images
+are generated in iterations and in each iteration it is zoomed
+into the image.
+Images and videos published by the Google engineers can be
+seen at [goo15]. Figure 2 shows the original image from which
+Figure 3 was created with the deep dream algorithm.
+B. Artistic Style Imitation
+A key idea of neural networks is that they learn different
+representations of the data in each layer. In the case of
+CNNs, this can easily be visualized as it was done in various
+papers [ZF14]. Usually, one finds that the network learned
+to build edge detectors in the first layer and more complex
+structures in the upper layers.
+Gatys, Ecker and Bethge showed in [GEB15] that with a clever
+choice of features it is possible to separate the general style of
+an image in terms of local image appearance from the content
+of an image. They support their claim by applying the style of
+different artists to an arbitrary image of their choice.
+
+(a) Original Image (b) Style image
+(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph
+of a Scottish Highland Cattle.
+Fig. 4: The algorithm takes both, the original image and the
+style image to produce the result.
+This artistic style imitation can be seen itself as creative work.
+An example is given by Figure 4. The code which created this
+example is available under [Joh16].
+Something similar was done by [SPB+
+14], where the style of
+a portrait photograph was transferred to another photograph.
+A demo can be seen on [Shi14].
+C. Drawing Robots
+Patrick Tresset and Frdric Fol Leymarie created a system called
+AIKON (Automatic IKONic drawing) which can automatically
+generated sketches for portraits [TL05]. AIKON takes a digital
+photograph, detects faces on them and sketches them with a
+pen-plotter.
+Tresset and Leymaire use k-means clustering [KMN+
+02] to
+segment regions of the photograph with similar color which,
+in turn, will get a similar shading.
+Such a drawing robot could apply machine learning techniques
+known from computer vision for detecting the human. It
+could apply self-learning techniques to draw results most
+similar to the artists impression of the image. However, the
+system described in [TL05] seems not to be a machine
+learning computer program according to the definition by Tom
+Mitchell [Mit97].  IV. TEXT DATA
+Digital text is the first form of natural communication which
+involved computers. It is used in the form of chats, websites,
+on collaborative projects like Wikipedia, in scientific literature.
+Of course, it was used in pre-digital times, too: In newspaper,
+in novels, in dramas, in religious texts like the bible, in books
+for education, in notes from conversations.
+This list could be continued and most of these kinds of texts
+are now available in digital form. This digital form can be
+used to teach machines to generate similar texts.
+The most simple language model which is of use is an n-gram
+model. This model makes use of sequences of the length n to
+model language. It can be used to get the probability of a third
+word, given the previous two words. This way, a complete text
+can be generated word by word. Refinements and extensions
+to this model are discussed in the field of Natural Language
+Processing (NLP).
+However, there are much more sophisticated models. One
+of those are character predictors based on Recurrent Neural
+Networks (RNNs). Those character predictors take a sequence
+of characters as input and predict the next character. In that
+sense they are similar to the n-gram model, but operate on
+a lower level. Using such a predictor, one can generate texts
+character by character. If the model is good, the text can have
+the correct punctuation. This would not be possible with a
+word predictor.
+Character predictors can be implemented with RNNs. In contrast
+ to standard feed-forward neural networks like multilayer
+Perceptrons (MLPs) which was shown in Figure 1(b), those
+networks are trained to take their output at some point as well as
+the normal input. This means they can keep some information
+over time. One of the most common variant to implement
+RNNs is by using so called Long short-term memory (LSTM)
+cells [HS97].
+Recurrent networks apply two main ideas in order to learn: The
+first is called unrolling and means that an recurrent network
+is imagined to be an infinite network over time. At each time
+step the recurrent neurons get duplicated. The second idea is
+weight sharing which means that those unrolled neurons share
+the same weight.
+A. Similar Texts Generation
+Karpathy trained multiple character RNNs on different datasets
+and gave an excellent introduction [Kar15b]. He trained it on
+Paul Graham’s essays, all the works of Shakespeare, the Hutter
+Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw
+LA
+T
+EX source file of a book about algebraic stacks and geometry
+and Linux C code.
+With that training data, the models can generate similar texts.
+New works which look like Shakespeare plays, new Wikipedia
+articles, new Linux code and new papers about algebraic
+geometry can thus automatically be generated. At a first
+
+glance, they do look authentic. The syntax was mostly used
+correctly, the formatting looks as expected, the sentences are
+grammatically correct. However, when one looks at the broader
+context it is easy to recognize that the algorithm has no insight
+in what it is doing. It does match patterns really well, but it
+fails to follow a central theme. In the context of C code this
+means that new variables are introduced, but not used. At the
+same time, variables which were not declared are used. In
+the context of Shakespear plays this means that a lot of new
+characters are introduced, but they don’t speak with each other
+or about each other.
+The code used to generate these examples is available and
+ready to use through [Kar15a]. A couple of examples are
+in Section A.
+B. Chatbots
+Chatbots are computer programs which participate in chat
+rooms as autonomous agents. This means they have similar
+permissions and possibilities as usual human users have, but
+users can trigger a set of commands to make the bot give them
+valuable information or features.
+A special category of chatbots are such bots which actively
+participate in the conversation, which is usually not the case.
+One of the earliest programs in this category is ELIZA, a bot
+created by Joseph Weizenbaum in the 1960s [Wei76]. This
+program had a set of patterns implemented to which it would
+reply in a seemingly smart way in a psychologists fashion.
+This means quite often the program would simply repeat the
+last sentence and add something meaningless like “How do
+you feel about it?”. According to [Cur14], Weizenbaum once
+found his secretary — who was aware of the fact that this is
+a computer program — chatting with the machine. When he
+looked over her shoulder, she asked him “would you mind
+leaving the room”.
+Today, much more sophisticated chatbots exist. They make use
+of the vast amount of data which is available by millions of
+Wikipedia articles, chat protocols, websites, help desk protocols,
+subtitles of movies as well as the astonishing increase in
+computing power to train RNNs and language models similar
+to the ones described before.
+Interesting results like the following were obtained by [VL15]:
+Human: what is the purpose of life ?
+Machine: to serve the greater good .
+Human: what is the purpose of living ?
+Machine: to live forever .
+V. AUDIO DATA
+Common machine learning tasks which involve audio data
+are speech recognition, speaker identification, identification of
+songs. This leads to some less-common, but interesting topics:
+The composition of music, the synthesizing of audio as art.
+While the composition might be considered in Section IV, we will now investigate the work which was done in audio
+synthesization.
+A. Emily Howell
+David Cope created a project called “Experiments in Musical
+Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He
+introduces the idea of seeing music as a language which
+can be analyzed with natural language processing (NLP)
+methods. Cope mentions that EMI was more useful to him,
+when he used the system to “create small phrase-size textures
+as next possibilities using its syntactic dictionary and rule
+base” [Cop87].
+In 2003, Cope started a new project which was based on EMI:
+Emily Howell [Cop13]. This program is able to “creat[e] both
+highly authentic replications and novel music compositions”.
+The reader might want to listen to [Cop12] to get an impression
+of the beauty of the created music.
+According to Cope, an essential part of music is “a set of
+instructions for creating different, but highly related selfreplications”.
+ Emmy was programmed to find this set of
+instructions. It tries to find the “signature” of a composer,
+which Cope describes as “contiguous patterns that recur in two
+or more works of the composer”.
+The new feature of Emily Howell compared to Emmy is that
+Emily Howell does not necessarily remain in a single, already
+known style.
+Emily Howell makes use of association network. Cope emphasizes
+ that this is not a form of a neural network. However, it
+is not clear from [Cop13] how exactly an association network
+is trained. Cope mentions that Emily Howell is explained in
+detail in [Cop05].
+B. GRUV
+Recurrent neural networks — LSTM networks, to be exact
+— are used in [NV15] together with Gated Recurrent Units
+(GRU) to build a network which can be trained to generate
+music. Instead of taking notes directly or MIDI files, Nayebi
+and Vitelli took raw audio waveforms as input. Those audio
+waveforms are feature vectors given for time steps 0, 1, . . . , t −
+1, t. The network is given those feature vectors X
+1, . . . , X
+t
+and has to predict the following feature vector X
+t+1. This
+means it continues the music. As the input is continuous, the
+problem was modeled as a regression task. Discrete Fourier
+Transformation (DFT) was used on chunks of length N of the
+music to obtain features in the frequency domain.
+An implementation can be found at [VN15] and a demonstration
+can be found at [Vit15].
+C. Audio Synthesization
+Audio synthesization is generating new audio files. This can
+either be music or speech. With the techniques described before,
+
+neural networks can be trained to generate music note by note.
+However, it is desirable to allow multiple notes being played
+at the same time.
+This idea and some others were applied by Daniel Johnson. He
+wrote a very good introduction into neural networks for music
+composition which explains those ideas [Joh15b]. Example
+compositions are available there, too. He also made the code for
+his Biaxial Recurrent Neural Network available under [Joh15a].
+VI. DISCUSSION
+What does these examples mean for our understanding of
+creativity? Does it influence how much we value art? Could
+we define art and creativity better after having those and similar
+results?
+I think we might readjust our understanding of creativity just
+like we adjusted our understanding of algorithmically hard
+problems after Deep Blue won against the reigning world
+chess champion Garry Kasparov in 1997.
+However, by now it is obvious that machine learning algorithms
+cannot compete with human artists. Today’s state of the art
+algorithms which are purely based on machine learning don’t
+follow a central theme. They lack the ability to plan. Although
+clever algorithms were implemented for composing music, it
+seems as if there is still a lot of supervision involved.
+REFERENCES
+[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987.
+[Online]. Available: http://hdl.handle.net/2027/spo.bbp2372.1987.
+025
+[Cop05] ——, Computer models of musical creativity. MIT Press
+Cambridge, 2005.
+[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online].
+Available: https://www.youtube.com/watch?v=jLR- c uCwI
+[Cop13] ——, “The well-programmed clavier: Style in computer music
+composition,” XRDS: Crossroads, The ACM Magazine for
+Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available:
+http://dl.acm.org/citation.cfm?id=2460444
+[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [Online].
+ Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/
+78691781- c9b7-30a0- 9a0a-3ff76e8bfe58
+[Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson
+Education Limited, 2006.
+[GEB15] L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of
+artistic style,” arXiv preprint arXiv:1508.06576, 2015. [Online].
+Available: http://arxiv.org/abs/1508.06576
+[goo15] “Inceptionism: Going deeper into neural networks,” Google
+Photos, Jun. 2015. [Online]. Available: https://goo.gl/Bydofw
+[HS97] S. Hochreiter and J. Schmidhuber, “Long short-term memory,”
+Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/freeabs all.jsp?
+arnumber=6795963
+[hut] “50’000 euro prize for compressing human knowledge.” [Online].
+Available: http://prize.hutter1.net/
+[HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning
+for image recognition,” arXiv preprint arXiv:1512.03385, 2015.
+[Online]. Available: http://arxiv.org/abs/1512.03385 [Joh15a] D. Johnson, “Biaxial recurrent neural network for music
+composition,” GitHub, Aug. 2015. [Online]. Available: https:
+//github.com/hexahedria/biaxial-rnn- music-composition
+[Joh15b] ——, “Composing music with recurrent neural
+ networks,” Personal Blog, Aug. 2015. [Online].
+ Available: http://www.hexahedria.com/2015/08/03/
+composing-music- with-recurrent- neural-networks/
+[Joh16]
+ J. Johnson, “neural-style,” GitHub, Jan. 2016. [Online]. Available:
+https://github.com/jcjohnson/neural-style
+[Kar15a] A. Karpathy, “char-rnn,” GitHub, Nov. 2015. [Online]. Available:
+https://github.com/karpathy/char-rnn
+[Kar15b]
+ ——, “The unreasonable effectiveness of recurrent neural
+networks,” Personal Blog, May 2015. [Online]. Available:
+http://karpathy.github.io/2015/05/21/rnn-effectiveness/
+[KMN+
+02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman,
+and A. Wu, “An efficient k-means clustering algorithm: analysis
+and implementation,” Pattern Analysis and Machine Intelligence,
+IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002.
+[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in
+computer science. McGraw-Hill, 1997.
+[MOT15]
+ A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going
+deeper into neural networks,” googleresearch.blogspot.co.uk,
+Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/
+2015/06/inceptionism-going- deeper-into- neural.html
+[Nie15] M. A. Nielsen, Neural Networks and Deep Learning.
+ Determination Press, 2015. [Online]. Available:
+ http://neuralnetworksanddeeplearning.com/chap6.html#
+introducing convolutional networks
+[NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation
+using recurrent neural networks,” 2015. [Online]. Available:
+http://cs224d.stanford.edu/reports/NayebiAran.pdf
+[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit.
+com/r/deepdream/
+[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun.
+2014. [Online]. Available: https://www.youtube.com/watch?v=
+Hj5lGFzlubU
+[SPB+
+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand,
+“Style transfer for headshot portraits,” ACM Transactions on
+Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available:
+http://dl.acm.org/citation.cfm?id=2601137
+[TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in
+Proceedings of VSMM, 2005, pp. 739–748.
+[Vit15] M. Vitelli, “Algorithmic music generation with recurrent
+neural networks,” YouTube, Jun. 2015. [Online]. Available:
+https://youtu.be/0VTI1BBLydE
+[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba,
+“Hoggles: Visualizing object detection features,” in Computer
+Vision (ICCV), 2013 IEEE International Conference on. IEEE,
+2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs all.jsp?arnumber=6751109
+[VL15] O. Vinyals and Q. Le, “A neural conversational model,”
+arXiv preprint arXiv:1506.05869, Jul. 2015. [Online]. Available:
+http://arxiv.org/abs/1506.05869v2
+[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online].
+Available: https://github.com/MattVitelli/GRUV
+[Wei76] J. Weizenbaum, Computer Power and Human Reason: From
+Judgement to Calculation. W.H.Freeman & Co Ltd, 1976.
+[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional
+ networks,” in Computer Vision–ECCV 2014. Springer,
+2014, pp. 818–833.
+
+APPENDIX A
+AUTOM ATICALLY GENERATED TEXTS
+The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for
+convenience and the case that the website is not reachable.
+A. Shakespeare
+PANDARUS:
+Alas, I think he shall be come approached and the day
+When little srain would be attain’d into being never fed,
+And who is but a chain and subjects of his death,
+I should not sleep.
+Second Senator:
+They are away this miseries, produced upon my soul,
+Breaking and strongly should be buried, when I perish
+The earth and thoughts of many states.
+DUKE VINCENTIO:
+Well, your wit is in the care of side and that.
+Second Lord:
+They would be ruled after this chamber, and
+my fair nues begun out of the fact, to be conveyed,
+Whose noble souls I’ll have the heart of the wars.
+Clown:
+Come, sir, I will make did behold your worship.
+VIOLA:
+I’ll drink it.
+B. Wikipeda
+Naturalism and decision for the majority of Arab countries’ capitalide was grounded
+by the Irish language by [[John Clair]], [[An Imperial Japanese Revolt]], associated
+with Guangzham’s sovereignty. His generals were the powerful ruler of the Portugal
+in the [[Protestant Immineners]], which could be said to be directly in Cantonese
+Communication, which followed a ceremony and set inspired prison, training. The
+emperor travelled back to [[Antioch, Perth, October 25|21]] to note, the Kingdom
+of Costa Rica, unsuccessful fashioned the [[Thrales]], [[Cynth’s Dajoard]], known
+in western [[Scotland]], near Italy to the conquest of India with the conflict.
+Copyright was the succession of independence in the slop of Syrian influence that
+was a famous German movement based on a more popular servicious, non-doctrinal
+and sexual power post. Many governments recognize the military housing of the
+[[Civil Liberalization and Infantry Resolution 265 National Party in Hungary]],
+that is sympathetic to be to the [[Punjab Resolution]]
+(PJS)[http://www.humah.yahoo.com/guardian.
+cfm/7754800786d17551963s89.htm Official economics Adjoint for the Nazism, Montgomery
+was swear to advance to the resources for those Socialism’s rule,
+was starting to signing a major tripad of aid exile.]]
+C. Linux Code, 1
+/
+*
+
+* Increment the size file of the new incorrect UI_FILTER group information
+* of the size generatively.
+*/
+static int indicate_policy(void)
+{
+ int error;
+if (fd == MARN_EPT) {
+/
+*
+* The kernel blank will coeld it to userspace.
+*/
+if (ss->segment < mem_total)
+unblock_graph_and_set_blocked();
+else
+ret = 1;
+goto bail;
+}
+segaddr = in_SB(in.addr);
+selector = seg / 16;
+setup_works = true;
+for (i = 0; i < blocks; i++) {
+seq = buf[i++];
+bpf = bd->bd.next + i
+ * search;
+if (fd) {
+current = blocked;
+}
+}
+rw->name = "Getjbbregs";
+bprm_self_clearl(&iv->version);
+regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12;
+return segtable;
+}
+D. Linux Code, 2
+/
+*
+* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 as published by
+* the Free Software Foundation.
+*
+*  This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+*
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include <linux/kexec.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/multi.h>
+
+#include <linux/ckevent.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/e820.h>
+#include <asm/system_info.h>
+#include <asm/setew.h>
+#include <asm/pgproto.h>
+#define REG_PG vesa_slot_addr_pack
+#define PFM_NOCOMP AFSR(0, load)
+#define STACK_DDR(type) (func)
+#define SWAP_ALLOCATE(nr) (e)
+#define emulate_sigs() arch_get_unaligned_child()
+#define access_rw(TST) asm volatile("movd %%esp, %0, %3" : : "r" (0)); \
+if (__type & DO_READ)
+static void stat_PC_SEC __read_mostly offsetof(struct seq_argsqueue, \
+pC>[1]);
+static void
+os_prefix(unsigned long sys)
+{
+#ifdef CONFIG_PREEMPT
+PUT_PARAM_RAID(2, sel) = get_state_state();
+set_pid_sum((unsigned long)state, current_state_str(),
+(unsigned long)-1->lr_full; low;
+}
\ No newline at end of file
diff --git a/read/results/playa/1602.06541.txt b/read/results/playa/1602.06541.txt
new file mode 100644
index 0000000..ab4a3ed
--- /dev/null
+++ b/read/results/playa/1602.06541.txt
@@ -0,0 +1,1785 @@
+
+A Survey of Semantic Segmentation
+Martin Thoma
+info@martin-thoma.de
+Abstract—This survey gives an overview over different
+techniques used for pixel-level semantic segmentation.
+Metrics and datasets for the evaluation of segmentation
+ algorithms and traditional approaches for segmentation
+ such as unsupervised methods, Decision Forests
+and SVMs are described and pointers to the relevant
+papers are given. Recently published approaches with
+convolutional neural networks are mentioned and typical
+problematic situations for segmentation algorithms are
+examined. A taxonomy of segmentation algorithms is
+given.
+ I. INTRODUCTION
+Semantic segmentation is the task of clustering
+parts of images together which belong to the same
+object class. This type of algorithm has several usecases
+ such as detecting road signs [MBLAGJ+
+07],
+detecting tumors [MBVLG02], detecting medical instruments
+ in operations [WAH97], colon crypts segmentation
+ [CRSS14], land use and land cover classification
+ [HDT02]. In contrast, non-semantic segmentation
+only clusters pixels together based on general characteristics
+ of single objects. Hence the task of non-semantic
+segmentation is not well-defined, as many different
+segmentations might be acceptable.
+Several applications of segmentation in medicine are
+listed in [PXP00].
+Object detection, in comparison to semantic segmentation,
+ has to distinguish different instances of the
+same object. While having a semantic segmentation
+is certainly a big advantage when trying to get object
+instances, there are a couple of problems: neighboring
+pixels of the same class might belong to different object
+instances and regions which are not connected my
+belong to the same object instance. For example, a
+tree in front of a car which visually divides the car into
+two parts.
+This paper is organized as follows: It begins by giving
+a taxonomy of segmentation algorithms in Section II.
+A summary of quality measures and datasets which are
+used for semantic segmentation follows in Section III.
+A summary of traditional segmentation algorithms and
+their characteristics follows in Section V, as well as a
+brief, non-exhaustive summary of recently published
+semantic segmentation algorithms which are based on
+neural networks in Section VI. Finally, Section VII
+informs the reader about typical problematic cases for
+segmentation algorithms.  II. TAXONOMY OF SEGMENTATION ALGORITHMS
+The computer vision community has published a
+wide range of segmentation algorithms so far. Those
+algorithms can be grouped by the kind of data they
+operate on and the kind of segmentation they are able
+to produce.
+The following subsections will give four different
+criteria by which segmentation algorithms can be
+classified.
+This survey describes fixed-class (see Section II-A),
+single-class affiliation (see Section II-B) algorithms
+which work on grayscale or colored single pixel images
+(see Section II-C) in a completely automated, passive
+fashion (see Section II-D).
+A. Allowed classes
+Semantic segmentation is a classification task. As
+such, the classes on which the algorithm is trained is a
+central design decision.
+Most algorithms work with a fixed set of classes;
+some even only work on binary classes like foreground
+ vs background [RM07], [CS10] or street vs
+no street [BKTT15].
+However, there are also unsupervised segmentation
+algorithms which do not distinguish classes at all (see
+Section V-B) as well as segmentation algorithms which
+are able to recognize when they don’t know a class.
+For example, in [GRC+
+08] a void class was added
+for classes which were not in the training set. Such
+a void class was also used in the MSRCv2 dataset
+(see Section III-B2) to make it possible to make more
+coarse segmentations and thus having to spend less
+time annotating the image.
+B. Class affiliation of pixels
+Humans do an incredible job when looking at the
+world. For example, when we see a glass of water
+standing on a table we can automatically say that there
+is the glass and behind it the table, even if we only had a
+single image and were not allowed to move. This means
+we simultaneously two labels to the coordinates of the
+glass: Glass and table. Although there is much more
+work being done on single class affiliation segmentation
+ algorithms, there is a publication about multiple
+class affiliation segmentation [LRAL08]. Similarly,
+recent publications in pixel-level object segmentation
+used layered models [YHRF12].arXiv:1602.06541v2  [cs.CV]  11 May 2016
+
+C. Input Data
+The available data which can be used for the
+inference of a segmentation varies by application.
+• Grayscale vs colored: Grayscale images are
+commonly used in medical imaging such as
+magnetic resonance (MR) imaging or ultrasonography
+ whereas colored photographs are obviously
+widespread.
+• Excluding or including depth data: RGB-D,
+sometimes also called range [HJBJ+
+96] is available
+ in robotics, autonomous cars and recently
+also in consumer electronics such as Microsoft
+Kinect [Zha12].
+• Single image vs stereo images vs cosegmentation:
+ Single image segmentation is the
+most wide-spread kind of segmentation, but using
+stereo images was already tried in [BVZ01]. It can
+be seen as a more natural way of segmentation as
+most mammals have two eyes. It can also be seen
+as being related to having depth data.
+Co-segmentation as in [RMBK06], [CXGS12] is
+the problem of finding a consistent segmentation
+for multiple images. This problem can be seen
+in two ways: One the one hand, it can be seen
+as the problem of finding common objects in at
+least two images. On the other hand, every image
+after the first can be used as an additional source
+of information to find a meaningful segmentation.
+This idea can be extended to time series such as
+videos.
+• 2D vs 3D
+: Segmenting images is a 2D segmentation
+ task where the smallest unit is called a pixel.
+In 3D data, such as volumetric X-ray CT images
+as they were used in [HHR01], the smallest unit
+is called a voxel.
+D. Operation state
+The operation state of the classifying machine can
+either be active as in [SUM+
+11], [SSA12] where robots
+can move objects to find a segmentation or passive,
+where the received image cannot be influenced. Among
+the passive algorithms, some segment in a completely
+automatic fashion, others work in an interactive mode.
+One example would be a system where the user clicks
+on the background or marks a coarse segmentation and
+the algorithm finds a fine-grained segmentation. [BJ00],
+[RKB04], [PS07] describe systems which work in an
+interactive mode.  (a) Example Scene (b) Visualization of a found segmentation
+
+Figure 1: An example of a scene and a possible visualization
+ of a found segmentation.
+III. EVALUATION AND DATASETS
+A. Quality measures for evaluation
+A performance measure is a crucial part of any
+machine learning system. As users of a semantic
+segmentation system expect correct results, the accuracy
+is the most commonly used performance measure, but
+there are other measures of quality which matter when
+segmentation algorithms are compared. This section
+gives an overview of those quality measures.
+1) Accuracy: Showing the correctness of the segmentation
+ hypotheses is done in most publications about
+semantic segmentation. However, there are a couple
+of different ways how this accuracy can be displayed.
+One way to give readers a first qualitative impression
+of the obtained segmentations is by showing examples
+such as Figure 1.
+However, this can only support the explanation of
+particular problems or showcase special situation. For
+meaningful information about the overall accuracy, there
+are a couple of metrics how accuracy can be defined.
+For this section, let k ∈ N be the number of classes,
+n
+ij ∈ N
+0 with i, j ∈ 1, . . . , k be the number of pixels
+which belong to class i and were labeled as class j .
+(n
+ij ) is called a confusion matrix. Let t
+i =
+k
+j=1 n
+ij
+be the total number of pixels of class i.
+One way to compare segmentation algorithms is by
+the pixel-wise accuracy of the predicted segmentation
+as done in many publications [SWRC06], [CP08],
+[LSD14]. This is also called per-pixel rate and defined
+ as
+k
+i=1 n
+ii
+
+k
+i=1 t
+i . Taking the pixel-wise classification
+accuracy has two major drawbacks:
+P1 Tasks like segmenting images for autonomous cars
+have large regions which have one class. This
+makes achieving classification accuracies of more
+than 30 % with a priori knowledge only possible.
+For example, a system might learn that a certain
+position of the image is most of the time “sky”
+while another position is most of the time “road”.
+
+P2 The manually labeled images could have a more
+coarse labeling. For example, a human classifier
+could have labeled a region as “car” and the
+algorithm could have split that region into the
+general “car” and the more specific “wheel of a
+car”
+Three accuracy metrics which do not suffer from
+problem P1 are used in [LSD14]:
+• mean accuracy: 1
+k ·
+k
+i=1 n
+ii
+t
+i ∈ [0, 1]
+• mean intersection over union:
+1
+k ·
+k
+i=1 n
+ii
+t
+i−n
+ii+
+k
+j=1 n
+ji ∈ [0, 1]
+• frequency weighted intersection over union:
+(
+k
+i=1 t
+i)−1
+k
+i=1 t
+i · n
+ii
+t
+i−n
+ii+
+k
+j=1 n
+ji ∈ [0, 1]
+Another problem might be pixels which cannot be
+assigned to one of the known classes. For this reason,
+[SWRC06] makes use of a void class. This class gets
+completely ignored for all quality measures. Hence the
+total number of pixels is assumed to be width · height −
+number of void pixels.
+One way to deal with problem P1 and problem P2
+is giving the confusion matrix as done in [SWRC06].
+However, this approach is not feasible if many classes
+are given.
+The F -measure is useful for binary classification
+ task such as the KITTI road segmentation
+benchmark [FKG13] or crypt segmentation as done
+by [CRSS14]. It is calculated as “the harmonic mean
+of the precision and recall” [PH05]:
+F
+β = (1 + β )2 tp
+(1 + β 2
+) · tp + β 2
+ · fn + fp
+where β = 1 is chosen in most cases and tp means
+true positive, fn means false negative and fp means
+false positive.
+Finally, it should be noted that a lot of other measures
+for the accuracy of segmentations were proposed for
+non-semantic segmentation. One of those accuracy
+measures is Normalized Probabilistic Rand (NPR)
+index which was introduced in [UPH05] and evaluated
+ in [CSI+
+09] on dermoscopy images. Other
+non-semantic segmentation measures were introduced
+in [MFTM01], but the reason for creating them seems to
+be to deal with the under-defined task description of nonsemantic
+ segmentation. These accuracy measures try to
+deal with different levels of coarsity of the segmentation.
+This is much less of a problem in semantic segmentation
+and thus those measures are not explained here.
+2) Speed: A maximum upper bound on the execution
+time for the inference on a single image is a hard
+requirement for some applications. For example, in the
+case of autonomous cars an algorithm which classifies
+pixel as street or no-street and thus makes a semantic segmentation, every image needs to be processed within
+20 ms [BKTT15]. This time is called latency.
+Most papers do not give exact values for the time
+their application needs. One reason might be that this is
+very hardware, implementation and in some cases even
+data specific. For example, [HJBJ+
+96] notes that their
+algorithm needs 10 s on a Sun SparcStation 20. The
+fastest CPU ever produced for this system had 200 MHz.
+Comparing this directly with results which were obtained
+ using an Intel i7-4820K with 3.9 GHz would not
+be meaningful.
+However, it does still make sense to mention the
+execution time as well as the hardware in individual
+papers. This gives the interested reader the possibility to
+estimate how difficult it might be to adjust the algorithm
+to work in the required time-constraints.
+Besides the latency, the throughput is another
+relevant characteristic of algorithms and implementations
+ for semantic segmentation. For example, for the
+automatic description of images in order to enable text
+search the throughput is of much higher importance
+than latency.
+3) Stability: A reasonable requirement on semantic
+segmentation algorithms is the stability of a segmentation
+ over slight changes in the input image. When
+the image data is sightly blurred by smoke such as
+in Figure 4(c), the segmentation should not change.
+Also, two images which show a slight change in
+perspective should also only result in slight changes in
+the segmentation [PH05].
+4) Memory usage: Peak memory usage matters
+when segmentation algorithms are used in devices like
+smartphones or cameras, or when the algorithms have
+to finish in a given time frame, run on the graphics
+processing unit (GPU) and consume so much memory
+for single image segmentation that only the latest
+graphic cards can be used. However, no publication
+were available mentioning the peak memory usage.
+B. Datasets
+The computer vision community produced a couple
+of different datasets which are publicly available. In
+the following, only the most widely used ones as well
+as three medical databases are described. An overview
+over the quantity and the kind of data is given by
+Table I.
+1) PASCAL VOC: The PASCAL1
+ VOC2
+ challenge
+was organized eight times with different datasets:
+Once every year from 2005 to 2012 [EVGW+
+b].
+1
+pattern analysis, statistical modelling and computational learning,
+an EU network of excellence
+2
+Visual Object Classes
+
+Beginning with 2007, a segmentation challenge was
+added [EVGW+
+a].
+The dataset consists of annotated photographs from
+www.flicker.com, a photo sharing website. There are
+multiple challenges for PASCAL VOC. The 2012
+competition had five challenges of which one is a
+segmentation challenge where a single class label was
+given for each pixel. The classes are: aeroplane, bicycle,
+bird, boat, bottle, bus, car, cat, chair, cow, dining table,
+dog, horse, motorbike, person, potted plant, sheep, sofa,
+train, tv/monitor.
+Although no new competitions will be held, new
+algorithms can be evaluated on the 2010, 2011 and
+2012 data via http://host.robots.ox.ac.uk:8080/
+The PASCAL VOC segmentation challenges use the
+segmentation over union criterion (see Section III-A).
+2) MSRCv2: Microsoft Research has published a
+database of 591 photographs with pixel-level annotation
+of 21 classes: aeroplane, bike, bird, boat, body, book,
+building, car, cat, chair, cow, dog, face, flower, grass,
+road, sheep, sign, sky, tree, water. Additionally, there
+is a void label for pixels which do not belong to
+any of the 21 classes or which are close to the
+segmentation boundary. This allows a “rough and quick
+hand-segmentation which does not align exactly with
+the object boundaries” [SWRC06].
+3) Medical Databases: The Warwick-QU Dataset
+consists of 165 images with pixel-level annotation of
+5 classes: “healthy, adenomatous, moderately differentiated,
+ moderately-to-poorly differentiated, and poorly
+differentiated” [CSM09]. This dataset is part of the
+Gland Segmentation (GlaS) challenge.
+The DIARETDB1 [KKV+
+14] is a dataset of 89 images
+ fundus images. Those images show the interior
+surface of the eye. Fundus images can be used to detect
+diabetic retinopathy. The images have four classes of
+coarse annotations: hard and soft exudates, hemorrhages
+and red small dots.
+20 test and additionally 20 training retinal fundus
+ images are available through the DRIVE data
+set [SAN+
+04]. The vessels were annotated. Additionally,
+ [AP11] added vascular features.
+The Open-CAS Endoscopic Datasets [MHMK+
+14]
+are 60 images taken from laparoscopic adrenalectomies
+and 60 images taken from laparoscopic pancreatic
+resections. Those are from 3 surgical procedures each.
+Half of the data was annotated by a medical expert for
+“medial instrument” and “no medical instrument”. All
+images were labeled by anonymous untrained workers
+to which they refer to as knowledge workers (KWs).
+One crowd annotation was obtained for each image by
+a majority vote on a pixel basis of 10 segmentations
+given by 10 different KWs.  Training
+Prediction
+ PostprocessingWindow-wise
+
+ClassificationWindow
+extraction Data
+augmentation
+Feature extractionPreprocessing
+Figure 2: A typical segmentation pipeline gets raw
+pixel data, applies preprocessing techniques
+like scaling and feature extraction like HOG
+features. For training, data augmentation
+techniques such as image rotation can be
+applied. For every single image, patches of
+the image called windows are extracted and
+those windows are classified. The resulting
+semantic segmentation can be refined by
+simple morphologic operations or by more
+complex approaches such as Markov Random
+Fields (MRFs).
+IV. SEGMENTATION PIPELINE
+Typically, semantic segmentation is done with a
+classifier which operates on fixed-size feature inputs
+and a sliding-window approach [DT05], [YBCK10],
+[SCZ08]. This means a classifier is trained on images
+of a fixed size. The trained classifier is then fed with
+rectangular regions of the image which are called windows.
+ Although the classifier gets an image patch of e.g.
+51 px × 51 px of the environment, it might only classify
+the center pixel or a subset of the complete window.
+This segmentation pipeline is visualized in Figure 2.
+This approach was taken by [BKTT15] and a majority
+ of the VOC2007 participants [EVGW+
+a]. As this
+approach has to apply the patch classifier 512 · 512 =
+262 144 times for images of size 512 px × 512 px, there
+are techniques for speeding it up such as applying a
+stride and interpolating the results.
+Neural networks are able to apply the sliding window
+approach in a very efficient way by handling a trained
+network as a convolution and applying the convolution
+on the complete image.
+However, there are alternatives. Namely MRFs and
+Conditional Random Fields (CRFs) which take the
+information of the complete image and segment it in
+an holistic approach.
+
+V. TRADITIONAL APPROACHES
+Image segmentation algorithms which use traditional
+approaches, hence don’t apply neural networks and
+make heavy use of domain knowledge, are wide-spread
+in the computer vision community. Features which can
+be used for segmentation are described in Section V-A,
+a very brief overview of unsupervised, non-semantic
+segmentation is given in Section V-B, Random Decision
+Forests are described in Section V-C, Markov Random
+Fields in Section V-E and Support Vector Machines
+(SVMs) in Section V-D. Postprocessing is covered in
+Section V-G.
+It should be noted that algorithms can use combination
+ of methods. For example, [TNL14] makes use of a
+combination of a SVM and a MRF. Also, auto-encoders
+can be used to learn features which in turn can be used
+by any classifier.
+A. Features and Preprocessing methods
+The choice of features is very important in traditional
+approaches. The most commonly used local and global
+features are explained in the following as well as feature
+dimensionality reduction algorithms.
+1) Pixel Color: Pixel color in different image spaces
+(e.g. 3 features for RGB, 3 features for HSV, 1 feature
+for the gray-value) are the most widely used features. A
+typical image is in the RGB color space, but depending
+on the classifier and the problem another color space
+might result in better segmentations. RGB, YcBcr, HSL,
+Lab and YIQ are some examples used by [CRSS14].
+No single color space has been proven to be superior
+to all others in all contexts [CJSW01]. However, the
+most common choices seem to be RGB and HSI.
+Reasons for choosing RGB is simplicity and the support
+by programming languages, whereas the choice of
+the HSI color space might make it simpler for the
+classifier to become invariant to illumination. One
+reason for choosing CIE-L*a*b* color space is that it
+approximates human perception of brightness [KP92].
+It follows that choosing the L*a*b color space helps
+algorithms to detect structures which are seen by
+humans. Another way of improving the structure within
+an image is histogram equalization, which can be
+applied to improve contrast [PAA+
+87], [RM07].
+2) Histogram of oriented Gradients: Histogram of
+oriented gradients (HOG) features interpret the image
+as a discrete function I : N2
+ → { 0, . . . , 255 } which
+maps the position (x, y) to a color. For each pixel, there
+are two gradients: The partial derivative of x and y.
+Now the original image is transformed to two feature
+maps of equal size which represents the gradient. These
+feature maps are splitted into patches and a histogram of the directions is calculated for each patch. HOG features
+were proposed in [DT05] and are used in [BMBM10],
+[FGMR10] for segmentation tasks.
+3) SIFT: Scale-invariant feature transform (SIFT)
+feature descriptors describe keypoints in an image. The
+image patch of the size 16 × 16 around the keypoint
+is taken. This patch is divided in 16 distinct parts of
+the size 4 × 4. For each of those parts a histogram of
+8 orientations is calculated similar as for HOG features.
+This results in a 128-dimensional feature vector for
+each keypoint.
+It should be emphasized that SIFT is a global feature
+for a complete image.
+SIFT is described in detail in [Low04] and are used
+in [PTN09].
+4) BOV: Bag-of-visual-words (BOV), also called
+bag of keypoints, is based on vector quantization.
+Similar to HOG features, BOV features are histograms
+which count the number of occurrences of certain
+patterns within a patch of the image. BOV are described
+in [CDF+
+04] and used in combination with SIFT
+feature descriptors in [CP08].
+5) Poselets: Poselets rely on manually added extra
+keypoints such as “right shoulder”, “left shoulder”,
+“right knee” and “left knee”. They were originally
+used for human pose estimation. Finding those extra
+keypoints is easily possible for well-known image
+classes like humans. However, it is difficult for classes
+like airplanes, ships, organs or cells where the human
+annotators do not know the keypoints. Additionally, the
+keypoints have to be chosen for every single class. There
+are strategies to deal with those problems like viewpointdependent
+ keypoints. Poselets were used in [BMBM10]
+to detect people and in [BBMM11] for general object
+detection of the PASCAL VOC dataset.
+6) Textons: A texton is the minimal building block
+of vision. The computer vision literature does not give a
+strict definition for textons, but edge detectors could be
+one example. One might argue that deep learning techniques
+ with Convolution Neuronal Networks (CNNs)
+learn textons in the first filters.
+An excellent explanation of textons can be found
+in [ZGWX05].
+7) Dimensionality Reduction: High-resolution images
+ have a lot of pixels. Having one or more feature per
+pixel results in well over a million features. This makes
+training difficult while the higher resolution might not
+contain much more information. A simple approach
+to deal with this is downsampling the high-resolution
+image to a low-resolution variant. Another way of
+doing dimensionality reduction is principal component
+analysis (PCA), which is applied by [COWR11]. The
+idea behind PCA is to find a hyperplane on which all
+
+feature vectors can be projected with a minimal loss
+of information. A detailed description of PCA is given
+by [Smi02].
+One problem of PCA is the fact that it does not
+distinguish different classes. This means it can happen
+that a perfectly linearly separable set of feature vectors
+becomes not separable at all after applying PCA.
+There are many other techniques for dimensionality
+reduction. An overview and a comparison over some
+of them is given by [vdMPvdH09].
+B. Unsupervised Segmentation
+Unsupervised segmentation algorithms can be used
+in supervised segmentation as another source of information
+ or to refine a segmentation. While unsupervised
+segmentation algorithms can never be semantic, they are
+well-studied and deserve at least a very brief overview.
+Semantic segmentation algorithms store information
+about the classes they were trained to segment while
+non-semantic segmentation algorithms try to detect
+consistent regions or region boundaries.
+1) Clustering Algorithms: Clustering algorithms can
+directly be applied on the pixels, when one gives a
+feature vector per pixel. Two clustering algorithms are
+k-means and the mean-shift algorithm.
+The k-means algorithm is a general-purpose clustering
+ algorithm which requires the number of clusters to
+be given beforehand. Initially, it places the k centroids
+randomly in the feature space. Then it assigns each
+data point to the nearest centroid, moves the centroid
+to the center of the cluster and continues the process
+until a stopping criterion is reached. A faster variant is
+described in [Har75].
+k-means was applied by [CLP98] for medical image
+segmentation.
+Another clustering algorithm is the mean-shift algorithm
+ which was introduced by [CM02] for segmentation
+ tasks. The algorithm finds the cluster centers
+by initializing centroids at random seed points and
+iteratively shifting them to the mean coordinate within
+a certain range. Instead of taking a hard range constraint,
+the mean can also be calculated by using any kernel.
+This effectively applies a weight to the coordinates
+of the points. The mean shift algorithm finds cluster
+centers at positions with a highest local density of
+points.
+2) Graph Based Image Segmentation: Graph-based
+image segmentation algorithms typically interpret pixels
+as vertices and an edge weight is a measure of
+dissimilarity such as the difference in color [FH04],
+[Fel]. There are several different candidates for edges. The 4-neighborhood (north, east, south west) or an 8neighborhood
+ (north, north-east, east, south-east, south,
+south-west, west, north-west) are plausible choices.
+One way to cut the edges is by building a minimum
+spanning tree and removing edges above a threshold.
+This threshold can either be constant, adapted to the
+graph or adjusted by the user. After the edge-cutting
+step, the connected components are the segments.
+A graph-based method which ranked 2nd
+ in the
+Pascal VOC 2010 challenge [EVGW+
+10] is described
+in [CS10]. The system makes heavy use of the multicue
+ contour detector globalPb [MAFM08] and needs
+about 10 GB of main memory [CS11].
+3) Random Walks: Random walks belong to the
+graph-based image segmentation algorithms. Random
+walk image segmentation usually works as follows:
+Seed points are placed on the image for the different
+objects in the image. From every single pixel, the
+probability to reach the different seed points by a
+random walk is calculated. This is done by taking
+image gradients as described in Section V-A for HOG
+features. The class of the pixel is the class of which a
+seed point will be reached with highest probability. At
+first, this is an interactive segmentation method, but it
+can be extended to be non-interactive by using another
+segmentation methods output as seed points.
+4) Active Contour Models: Active contour models
+(ACMs) are algorithms which segment images roughly
+along edges, but also try to find a border which is
+smooth. This is done by defining a so called energy
+function which will be minimized. They were initially
+described in [KWT88]. ACMs can be used to segment
+an image or to refine segmentation as it was done
+in [AM98] for brain MR images.
+5) Watershed Segmentation: The watershed algorithm
+ takes a grayscale image and interprets it as a
+height map. Low values are catchment basins and
+the higher values between two neighboring catchment
+basins is the watershed. The catchment basins should
+contain what the developer wants to capture. This
+implies that those areas must be dark on grayscale
+images. The algorithm starts to fill the basins from
+the lowest point. When two basins are connected, a
+watershed is found. The algorithm stops when the
+highest point is reached.
+A detailed description of the watershed segmentation
+algorithm is given in [RM00].
+The watershed segmentation was used in [JLD03] to
+segment white blood cells. As the authors describe,
+the segmentation by watershed transform has two
+flaws: Over-segmentation due to local minima and thick
+watersheds due to plateaus.
+
+C. Random Decision Forests
+Random Decision Forests were first proposed
+in [Ho95]. This type of classifier applies techniques
+called ensemble learning, where multiple classifiers
+are trained and a combination of their hypotheses is
+used. One ensemble learning technique is the random
+subspaces method where each classifier is trained
+on a random subspace of the feature space. Another
+ensemble learning technique is bagging, which is
+training the trees on random subsets of the training set.
+In the case of Random Decision Forests, the classifiers
+are decision trees. A decision tree is a tree where each
+inner node uses one or more features to decide in which
+branch to descend. Each leaf is a class.
+One strength of Random Decision Forests compared
+to many other classifiers like SVMs and neural networks
+is that the scale of measure of the features (nominal,
+ordinal, interval, ratio) can be arbitrary. Another advantage
+ of Random Decision Forests compared to SVMs,
+for example, is the speed of training and classification.
+Decision trees were extensively studied in the past
+20 years and a multitude of training algorithms have
+been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]).
+Possible training hyperparameters are the measure to
+evaluate the “goodness of split” [Min89], the number of
+decision trees being used, and if the depth of the trees
+is restricted. Typically in the context of classification,
+decision trees are trained by adding new nodes until
+each leaf contains only nodes of a single class or until it
+is not possible to split further. This is called a stopping
+criterion.
+There are two typical training modes: Central axis
+projection and perceptron training. In training, for
+each node a hyperplane is searched which is optimal
+according to an error function.
+Random Decision Forests with texton features (see
+Section V-A6) are applied in [SJC08] for segmentation.
+In the [MSC] dataset, they report a per-pixel accuracy
+rate of 66.9 % for their best system. This system
+requires 415 ms for the segmentation of 320 px × 213 px
+images on a single 2.7 GHz core. On the Pascal
+VOC 2007 dataset, they report an average per-pixel
+accuracy for their best segmentation system of 42 %.
+An excellent introduction to Random Decision
+Forests for semantic segmentation is given by [SCZ08].
+D. SVMs
+SVMs are well-studied binary classifiers which can
+be described by five central ideas. For those ideas, the
+training data is represented as (x
+i, y
+i) where x
+i is the
+feature vector and y
+i ∈ { −1, 1 } the binary label for
+training example i ∈ { 1, . . . , m }. 1) If data is linearly separable, it can be separated
+by a hyperplane. There is one hyperplane which
+maximizes the distance to the next datapoints
+(support vectors). This hyperplane should be taken:
+minimize
+w,b 1
+2 w2
+s.t. ∀m
+i=1y
+i · (w, x
+i + b)
+
+sgn applied to this gives the classification≥ 1
+2) Even if the underlying process which generates the
+features for the two classes is linearly separable,
+noise can make the data not separable. The introduction
+ of slack variables to relax the requirement
+of linear separability solves this problem. The
+trade-off between accepting some errors and a
+more complex model is weighted by a parameter
+C ∈ R+
+0 . The bigger C , the more errors are
+accepted. The new optimization problem is:
+minimize
+w 1
+2 w2
+ + C · m
+
+i=1 ξ
+i
+s.t. ∀m
+i=1y
+i · (w, x
+i + b) ≥ 1 − ξ
+i
+Note that 0 ≤ ξ
+i ≤ 1 means that the data point
+is within the margin, whereas ξ
+i ≥ 1 means it is
+misclassified. An SVM with C > 0 is also called
+a soft-margin SVM.
+3) The primal problem is to find the normal vector
+w and the bias b. The dual problem is to express
+w as a linear combination of the training data x
+i:
+w = m
+
+i=1 α
+iy
+ix
+i
+where y
+i ∈ { −1, 1 } represents the class of the
+training example and α
+i are Lagrange multipliers.
+The usage of Lagrange multipliers is explained
+with some examples in [Smi04]. The usage of the
+Lagrange multipliers α
+i changes the optimization
+problem depend on the α
+i which are weights for
+the feature vectors. It turns out that most α
+i will
+be zero. The non-zero weighted vectors are called
+support vectors.
+The optimization problem is now, according
+to [Bur98]:
+maximize
+α
+i m
+
+i=1 α
+i − 1
+2 m
+
+i=1 m
+
+j=1 α
+iα
+j y
+iy
+j x
+i, x
+j
+s.t. ∀m
+i=10 ≤ α
+i ≤ C
+s.t. m
+
+i=1 α
+iy
+i = 0
+
+4) Not every dataset is linearly separable. This problem
+ is approached by transforming the feature
+vectors x with a non-linear mapping Φ into
+a higher dimensional (probably ∞-dimensional)
+space. As the feature vectors x are only used
+within scalar product x
+i, x
+j , it is not necessary
+to do the transformation. It is enough to do the
+calculation
+ K (x
+i, x
+j ) = x
+i, x
+j
+This function K is called a kernel. The idea of
+never explicitly transforming the vectors x
+i to the
+higher dimensional space is called the kernel trick.
+Common kernels include the polynomial kernel
+K
+P (x
+i, x
+j ) = (x
+i, x
+j  + r)p
+of degree p and coefficient r, the Gaussian radial
+basis function (RBF) kernel
+K
+Gauss(x
+i, x
+j ) = e −γx
+i−x
+j 2
+2σ2
+and the sigmoid kernel
+K
+tanh(x
+i, x
+j ) = tanh(γ x
+i, x
+j  − r)
+where the parameter γ determines how much
+influence single training examples have.
+5) The described SVMs can only distinguish between
+two classes. Common strategies to expand those
+binary classifiers to multi-class classification is
+the one-vs-all and the one-vs-one strategy. In the
+one-vs-all strategy n classifiers have to be trained
+which can distinguish one of the n classes against
+all other classes. In the one-vs-one strategy n2
+−n
+2
+classifiers are trained; one classifier for each pair
+of classes.
+A detailed description of SVMs can be found
+in [Bur98].
+SVMs are used by [YHRF12] on the 2009 and 2010
+PASCAL segmentation challenge [EVGW+
+10]. They
+did not hand their classifier in to the challenge itself,
+but calculated an average rank of 7 among the different
+categories.
+[FGMR10] also used an SVM based method with
+HOG features and achieved the 7th
+ rank in the 2010
+PASCAL segmentation challenge by mean accuracy. It
+needs about 2 s on a 2.8 GHz 8-core Intel processor.
+E. Markov Random Fields
+MRFs are undirected probabilistic graphical models
+which are wide-spread model in computer vision. The
+overall idea of MRFs is to assign a random variable for
+each feature and a random variable for each pixel which x
+1 x
+2 x
+3x
+4 x
+5 x
+6x
+7 x
+8 x
+9
+y
+1 y
+2 y
+3y
+4 y
+5 y
+6y
+7 y
+8 y
+9
+x
+1 x
+2 x
+3x
+4 x
+5 x
+6x
+7 x
+8 x
+9
+y
+1 y
+2 y
+3y
+4 y
+5 y
+6y
+7 y
+8 y
+9
+Figure 3: CRF with 4-neighborhood. Each node x
+i
+represents a pixel and each node y
+i represents
+a label.
+gets labeled as shown in Figure 3. For example, a MRF
+which is trained on images of the size 224 px×224 pixel
+and gets the raw RGB values as features has
+224 · 224 · 3
+
+input + 224 · 224
+
+output = 200 704
+random variables. Those random variables are conditionally
+ independent, given their local neighborhood.
+These (in)dependencies can be expressed with a graph.
+Let G = (V , E ) be the associated undirected graph
+of an MRF and C be the set of all maximal cliques in
+that graph. Nodes represent random variables x, y and
+edges represent conditional dependencies. Just like in
+he 4-neighborhood [SWRC06] and the 8-neighborhood
+are reasonable choices for constructing the graph.
+Typically, random variables y represent the class of a
+single pixel, random variables x represent a pixel values
+and edges represent pixel neighborhood in computer
+vision problems segmentation problems where MRFs
+are used. Accordingly, the random variables y live
+on 1, . . . , nr of classes and the random variables x
+typically live on 0, . . . , 255 or [0, 1].
+The probability of x, y can be expressed as
+P (x, y) = 1
+Z e−E(x,y)
+where Z =
+x,y e−E(x,y)
+ is a normalization term
+called the partition function and E is called the energy
+function. A common choice for the energy function is
+E (x, y) =
+c∈C ψ
+c(x, y)
+where ψ is called a clique potential. One choice for
+cliques of size two x, y = (x
+1, x
+2) is [KP06]
+ψ
+c(x
+1, x
+2) = wδ(x
+1, x
+2) =
++w if x
+1 = x
+2
+−w if x
+1 = x
+2
+According to [Mur12], the most common way of
+inference over the posterior MRF in computer vision
+problems is Maximum A Posteriori (MAP) estimation.
+
+Detailed introductions to MRFs are given by
+[BKR11], [Mur12]. MRFs are used by [ZBS01] and
+[MSB12] for image segmentation.
+F. Conditional Random Fields
+CRFs are MRFs where all clique potentials are
+conditioned on input features [Mur12]. This means,
+instead of learning the distribution P (y, x), the task
+is reformulated to learn the distribution P (y|x). One
+consequence of this reformulation is that CRFs need
+much less parameters as the distribution of x does
+not have to be estimated. Another advantage of CRFs
+compared to MRFs is that no distribution assumption
+about x has to be made.
+A CRF has the partition function Z :
+Z (x) =
+y P (x, y)
+and joint probability distribution
+P (y|x) = 1
+Z (x)
+c∈C ψ
+c(y
+c|x)
+The simplest way to define the clique potentials ψ is
+the count of the class y
+c given x added with a positive
+smoothing constant to prevent the complete term from
+getting zero.
+CRFs as described in [LRKT09] have reached top
+performance in PASCAL VOC 2010 [VOC10] and
+are also used in [HZCP04], [SWRC06] for semantic
+segmentation.
+A method similar to CRFs was proposed
+in [GBVdW+
+10]. The system of Gonfaus et.al.
+ranked 1st
+ by mean accuracy in the segmentation task
+of the PASCAL VOC 2010 challenge [EVGW+
+10].
+An introduction to CRFs is given by [SM11].
+G. Post-processing methods
+Post-processing refine a found segmentation and
+remove obvious errors. For example, the morphological
+operations opening and closing can remove noise. The
+opening operation is a dilation followed by a erosion.
+This removes tiny segments. The closing operation is a
+erosion followed by a dilation. This removes tiny gaps
+in otherwise filled regions. They were used in [CLP98]
+for biomedical image segmentation.
+Another way of refinement of the found segmentation
+is by adjusting the segmentation to match close edges.
+This was used in [BBMM11] with an ultra-metric
+contour map [AMFM09].
+Active contour models are another example of a
+post-processing method [KWT88].  VI. NEURAL NETWORKS FOR SEM ANTIC
+SEGM ENTATION
+Artificial neural networks are classifiers which are
+inspired by biologic neurons. Every single artificial
+neuron has some inputs which are weighted and sumed
+up. Then, the neuron applies a so called activation
+function to the weighted sum and gives an output. Those
+neurons can take either a feature vector as input or the
+output of other neurons. In this way, they build up
+feature hierarchies.
+The parameters they learn are the weights w ∈ R.
+They are learned by gradient descent. To do so, an error
+function — usually cross-entropy or mean squared error
+— is necessary. For the gradient descent algorithm, one
+sees the labeled training data as given, the weights
+as variables and the error function as a surface in
+this weight-space. Minimizing the error function in the
+weight space adapts the neural network to the problem.
+There are lots of ideas around neural networks like
+regularization, better optimization algorithms, automatically
+ building up architectures, design choices for
+activation functions. This is not explained in detail here,
+but some of the mayor breakthroughs are outlined.
+CNNs are neural networks which learn image filters.
+They drastically reduce the number of parameters which
+have to be learned while being still general enough for
+the problem domain of images. This was shown by Alex
+Krizhevsky et al. in [KSH12]. One major idea was a
+clever regularization called dropout training, which set
+the output of neurons while training randomly to zero.
+Another contribution was the usage of an activation
+function called rectified linear unit:
+ϕ
+ReLU(x) = max(0, x)
+Those are much faster to train than the commonly used
+sigmoid activation functions
+ϕ
+Sigmoid(x) = 1
+e−x
+ + 1
+Krizhevsky et al. implemented those ideas and participated
+ in the ImageNet Large-Scale Visual Recognition
+Challenge (ILSVRC). The best other system, which
+used SIFT features and Fisher Vectors, had a performance
+ of about 25.7 % while the network by Alex
+Krizhevsky et al. got 17.0 % error rate on the ILSVRC2010
+ dataset. As a preprocessing step, they downsampled
+ all images to a fixed size of 256 px × 256 px before
+they fed the features into their network. This network
+is commonly known as AlexNet.
+Since AlexNet was developed, a lot of different
+neural networks have been proposed. One interesting
+example is [PC13], where a recurrent CNN for semantic
+segmentation is presented.
+
+Another notable paper is [LSD14]. The algorithm
+presented there makes use of a classifying network such
+as AlexNet, but applies the complete network as an
+image filter. This way, each pixel gets a probability
+distribution for each of the trained classes. By taking
+the most likely class, a semantic segmentation can be
+done with arbitrary image sizes.
+A very recent publication by Dai et al. [DHS15]
+showed that segmentation with much deeper networks
+is possible and achieves better results.
+More detailed explanations to neural networks for
+visual recognition is given by [LKJ15].
+VII. POSSIBLE PROBLEMS IN THE DATA FOR
+SEGMENTATION ALGORITHMS
+Different segmentation workflows have different
+problems. However, there are a couple of special cases
+which should be tested. Those cases might not occur
+often in the training data, but it could still happen in
+the productive system.
+I am not aware of any systematic work which examined
+ the influence of problems such as the following.
+A. Lens Flare
+Lens flare is the effect of light getting scattered in
+the lens system of the camera. The testing data set of
+the KITTI road evaluation benchmark [FKG13] has a
+couple of photos with this problem. Figure 4(a) shows
+an extreme example of lens flare.
+B. Vignetting
+Vignetting is the effect of a photograph getting darker
+in the corners. This can have many reasons, for example
+filters on the camera blocking light at the corners.
+C. Blurred images
+Images can be blurred for a couple of reasons. A
+problem with the lenses mechanics, focusing on the
+wrong point, too quick movement, smoke or foam. One
+example of a blurred image is Figure 4(c), which was
+taken during an in vivo porcine procedure of diaphragm
+dissection. The smoke was caused by cauterization.
+D. Other Problems
+If the following effects can occur at all and if they
+are problems depends heavily on the problem domain
+and the used model.
+1) Partial Occlusions: Segmentation systems which
+employ a model of the objects which should be
+segmented might suffer from partial occlusions. (a) Lens Flare
+Image by [Hus07] (b) Vignetting
+Image by [Man12]
+(c) Smoke by cauterization
+Image by [GVSY13] (d) Camouflage
+Image by [Kaf07]
+(e) Transparency (f) Viewpoint
+Figure 4: Examples of images which might cause
+semantic segmentation systems to fail.
+2) Camouflage: Some objects, like animals in the
+wild, actively try to hide (see Figure 4(d) as an example).
+In other cases it might just be bad luck that objects
+are hard for humans to detect. This problem has two
+interesting aspects: On the one hand, the segmenting
+system might suffer from the same problems as humans
+do. On the other hand, the segmenting system might be
+better than humans are, but it is forced to learn from
+images labeled by humans. If the labels are wrong, the
+system is forced to learn something wrong.
+3) Semi-transparent Occlusion: Some objects like
+drinking glasses can be visible and still leave the object
+behind them visible as shown in Figure 4(e). This is
+mainly a definition problem: Is the seen pixel the glass
+label or the smartphone label?
+4) Viewpoints: Changes in viewpoints can be a
+problem, if they don’t occur in the training data. For
+example, an image captioning system which was trained
+on photographs of professional photographers might
+not have photos from the point of view of a child. This
+is visualized in Figure 4(f).
+
+VIII. DISCUSSION
+Ohta et al. wrote [OKS78] 38 years ago. It is one
+of the first papers mentioning semantic segmentation.
+In this time, a lot of work was done and many
+different directions have been explored. Different kinds
+of semantic segmentation have emerged.
+This paper presents a taxonomy of those kinds
+of semantic segmentation and a brief overview of
+completely automatic, passive, semantic segmentation
+algorithms.
+Future work includes a comparative study of
+those algorithms on publicly available dataset such
+as the ones presented in Table I. Another open
+question is the influence of the problems described
+in Section VII. This could be done using a subset of the
+thousands of images of Wikipedia Commons, such as
+https://commons.wikimedia.org/wiki/Category:Blurring
+for blurred images.
+A combination of different classifiers in an ensemble
+would be an interesting option to explore in order to
+improve accuracy. Another direction which is currently
+studied is combining classifiers such as neural networks
+with CRFs [ZJRP+
+15].  REFERENCES
+[AM98] M. S. Atkins and B. T. Mackiewich, “Fully
+automatic segmentation of the brain in
+mri,” Medical Imaging, IEEE Transactions
+on, vol. 17, no. 1, pp. 98–107, Feb. 1998.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=668699
+[AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and
+J. Malik, “From contours to regions: An
+empirical evaluation,” in Computer Vision and
+Pattern Recognition, 2009. CVPR 2009. IEEE
+Conference on. IEEE, Jun. 2009, pp. 2294–2301.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5206707
+[AP11] G. Azzopardi and N. Petkov, “Detection of
+retinal vascular bifurcations by trainable v4-like
+filters,” in Computer Analysis of Images and
+Patterns. Springer, 2011, pp. 451–459. [Online].
+Available: http://www.cs.rug.nl/~imaging/databases/
+retina_database/retinalfeatures_database.html
+[BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik,
+“Object segmentation by alignment of poselet
+activations to image contours,” in Computer Vision
+and Pattern Recognition (CVPR), 2011 IEEE
+Conference on. IEEE, Jun. 2011, pp. 2225–2232.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5995659
+[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ
+segmentation using graph cuts,” in Medical Image
+Computing and Computer-Assisted Intervention–
+MICCAI 2000. Springer, 2000, pp. 276–
+286. [Online]. Available: http://link.springer.com/
+chapter/10.1007/978- 3-540- 40899-4_28
+[BKR11] A. Blake, P. Kohli, and C. Rother, Markov random
+fields for vision and image processing. Mit Press,
+2011.
+[BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma,
+“Pixel-wise segmentation of street with neural
+networks,” arXiv preprint arXiv:1511.00513, 2015.
+[Online]. Available: http://arxiv.org/abs/1511.00513
+[BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik,
+“Detecting people using mutually consistent
+poselet activations,” in Computer Vision–ECCV
+2010. Springer, 2010, pp. 168–181. [Online].
+Available: http://link.springer.com/chapter/10.1007/
+978- 3-642- 15567-3_13#page- 1
+[Bur98] C. J. Burges, “A tutorial on support vector machines
+for pattern recognition,” Data mining and knowledge
+discovery, vol. 2, no. 2, pp. 121–167, 1998.
+[BVZ01] Y. Boykov, O. Veksler, and R. Zabih, “Fast
+approximate energy minimization via graph cuts,”
+Pattern Analysis and Machine Intelligence, IEEE
+Transactions on, vol. 23, no. 11, pp. 1222–1239,
+2001. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=969114
+[CDF+
+04] G. Csurka, C. Dance, L. Fan, J. Willamowski,
+and C. Bray, “Visual categorization with bags of
+keypoints,” in Workshop on statistical learning in
+computer vision, ECCV, vol. 1, no. 1-22. Prague,
+2004, pp. 1–2.
+[CJSW01] H.-D. Cheng, X. Jiang, Y. Sun, and J. Wang,
+“Color image segmentation: advances and prospects,”
+Pattern recognition, vol. 34, no. 12, pp. 2259–2281,
+2001.
+[CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image
+segmentation via adaptive k-mean clustering and
+knowledge-based morphological operations with
+biomedical applications,” Image Processing, IEEE
+Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec.
+
+1998. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=730379
+[CM02] D. Comaniciu and P. Meer, “Mean shift: A
+robust approach toward feature space analysis,”
+Pattern Analysis and Machine Intelligence, IEEE
+Transactions on, vol. 24, no. 5, pp. 603–619, 2002.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/
+login.jsp?tp=&arnumber=1000236
+[COWR11]
+ C. Chen, J. Ozolek, W. Wang, and G. K. Rohde,
+“A pixel classification system for segmenting
+biomedical images using intensity neighborhoods
+and dimension reduction,” in Biomedical Imaging:
+From Nano to Macro, 2011 IEEE International
+Symposium on. IEEE, 2011, pp. 1649–1652.
+[Online]. Available: https://www.andrew.cmu.edu/
+user/gustavor/chen_isbi_11.pdf
+[CP08] G. Csurka and F. Perronnin, “A simple high
+performance approach to semantic segmentation.”
+in BMVC, 2008, pp. 1–10. [Online]. Available:
+ http://www.xrce.xerox.com/layout/set/print/
+content/download/16654/118653/file/2008-023.pdf
+[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and
+E. Sabo, “Colon crypt segmentation website.” [Online].
+ Available: http://mis.haifa.ac.il/~ishimshoni/
+SegmentCrypt/Download.htm
+[CRSS14] ——, “Memory based active contour algorithm
+using pixel-level classified images for colon crypt
+segmentation,” Computerized Medical Imaging
+and Graphics, Nov. 2014. [Online]. Available:
+http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/
+Active%20contour%20based%20on%20pixellevel%20classified%20image%20for%20colon%
+
+20crypts%20segmentation.pdf
+[CS10] J. Carreira and C. Sminchisescu, “Constrained
+parametric min-cuts for automatic object segmentation,”
+ in Computer Vision and Pattern Recognition
+(CVPR), 2010 IEEE Conference on. IEEE, 2010,
+pp. 3241–3248.
+[CS11] ——, “Cpmc: Constrained parametric min-cuts for
+automatic object segmentation,” Feb. 2011. [Online].
+Available: http://www.maths.lth.se/matematiklth/
+personal/sminchis/code/cpmc/
+[CSI+
+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V.
+Stoecker, J. M. Malters, and J. M. Grichnik, “An
+improved objective evaluation measure for border
+detection in dermoscopy images,” Skin Research
+and Technology, vol. 15, no. 4, pp. 444–450, 2009.
+[Online]. Available: http://arxiv.org/abs/1009.1020
+[CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear
+segmentation in microscope cell images: a handsegmented
+ dataset and comparison of algorithms,”
+in Biomedical Imaging: From Nano to Macro,
+2009. ISBI’09. IEEE International Symposium on.
+IEEE, 2009, pp. 518–521. [Online]. Available:
+http://murphylab.web.cmu.edu/data
+[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh,
+“Random walks based multi-image segmentation:
+Quasiconvexity results and gpu-based solutions,”
+in Computer Vision and Pattern Recognition
+(CVPR), 2012 IEEE Conference on. IEEE,
+2012, pp. 1656–1663. [Online]. Available: http:
+//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf
+[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware semantic
+ segmentation via multi-task network cascades,”
+arXiv preprint arXiv:1512.04412, 2015.
+[DT05] N. Dalal and B. Triggs, “Histograms of oriented
+gradients for human detection,” in Computer
+Vision and Pattern Recognition, 2005. CVPR
+2005. IEEE Computer Society Conference on, vol. 1, June 2005, pp. 886–893 vol. 1.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=1467360
+[EVGW+
+a] M. Everingham, L. Van Gool, C. K. I.
+Williams, J. Winn, and A. Zisserman, “The
+PASCAL Visual Object Classes Challenge
+2007 (VOC2007) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2007/workshop/index.html.
+
+[Online]. Available: http://host.robots.ox.ac.uk:
+8080/pascal/VOC/voc2007/index.html
+[EVGW+
+b] ——, “The PASCAL Visual Object Classes Challenge
+ 2012 (VOC2012) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2012/workshop/index.html.
+
+[Online]. Available: http://host.robots.ox.ac.uk:
+8080/pascal/VOC/voc2012/index.html
+[EVGW+
+10] M. Everingham, L. Van Gool, C. K. Williams,
+J. Winn, and A. Zisserman, “The pascal visual object
+classes (voc) challenge,” International journal of
+computer vision, vol. 88, no. 2, pp. 303–338, 2010.
+[EVGW+
+12] M. Everingham, L. Van Gool, C. K. I. Williams,
+J. Winn, and A. Zisserman, “Visual object
+classes challenge 2012 (voc2012),” 2012. [Online].
+Available: http://host.robots.ox.ac.uk:8080/pascal/
+VOC/voc2012/index.html
+[Fel] P. F. Felzenszwalb, “Graph based image
+ segmentation.” [Online]. Available: http:
+//cs.brown.edu/~pff/segment/
+[FGMR10]
+ P. F. Felzenszwalb, R. B. Girshick, D. McAllester,
+and D. Ramanan, “Object detection with discriminatively
+ trained part-based models,” Pattern Analysis
+and Machine Intelligence, IEEE Transactions on,
+vol. 32, no. 9, pp. 1627–1645, 2010.
+[FH04] P. F. Felzenszwalb and D. P. Huttenlocher,
+“Efficient graph-based image segmentation,”
+International Journal of Computer Vision,
+vol. 59, no. 2, pp. 167–181, 2004. [Online].
+Available: http://link.springer.com/article/10.1023/
+B:VISI.0000022288.19776.77
+[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A
+new performance measure and evaluation
+benchmark for road detection algorithms,” in
+International Conference on Intelligent Transportation
+ Systems (ITSC), 2013. [Online]. Available:
+http://www.cvlibs.net/datasets/kitti/eval_road.php
+[GBVdW+
+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D.
+Bagdanov, J. Serrat, and J. Gonzalez, “Harmony potentials
+ for joint classification and segmentation,” in
+Computer Vision and Pattern Recognition (CVPR),
+2010 IEEE Conference on. IEEE, 2010, pp. 3280–
+3287.
+[GRC+
+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and
+D. Koller, “Multi-class segmentation with relative
+location prior,” International Journal of Computer
+Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008.
+[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.Z.
+ Yang, “Probabilistic tracking of affine-invariant
+anisotropic regions,” Pattern Analysis and Machine
+Intelligence, IEEE Transactions on, vol. 35, no. 1,
+pp. 130–143, 2013.
+[Har75] J. A. Hartigan, Clustering algorithms. John Wiley
+& Sons, Inc., 1975.
+[HDT02] C. Huang, L. Davis, and J. Townshend, “An
+assessment of support vector machines for land
+cover classification,” International Journal of remote
+sensing, vol. 23, no. 4, pp. 725–749, 2002.
+[HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic
+lung segmentation for accurate quantitation of
+volumetric x-ray ct images,” Medical Imaging, IEEE
+
+Transactions on, vol. 20, no. 6, pp. 490–498, Jun.
+2001.
+[HJBJ+
+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J.
+Flynn, H. Bunke, D. B. Goldgof, K. Bowyer,
+D. W. Eggert, A. Fitzgibbon, and R. B.
+Fisher, “An experimental comparison of range
+image segmentation algorithms,” Pattern Analysis
+and Machine Intelligence, IEEE Transactions
+on, vol. 18, no. 7, pp. 673–689, Jul. 1996.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=506791
+[Ho95] T. K. Ho, “Random decision forests,” in
+Document Analysis and Recognition, 1995.,
+Proceedings of the Third International Conference
+on, vol. 1. IEEE, 1995, pp. 278–282.
+[Online]. Available: http://ect.bell-labs.com/who/
+tkh/publications/papers/odt.pdf
+[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia
+Commons, Nov. 2007. [Online]. Available:
+ https://commons.wikimedia.org/wiki/File:
+CCTV_Lens_flare.jpg
+[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn,
+“Multiscale conditional random fields for image
+labeling,” in Computer Vision and Pattern
+Recognition, 2004. CVPR 2004. Proceedings
+of the 2004 IEEE Computer Society Conference
+on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2.
+[Online]. Available: http://ieeexplore.ieee.org/xpl/
+login.jsp?tp=&arnumber=1315232
+[JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white
+blood cell segmentation scheme using scale-space
+filtering and watershed clustering,” in Machine
+Learning and Cybernetics, 2003 International
+Conference on, vol. 5, Nov 2003, pp. 2820–2825
+Vol.5. [Online]. Available: http://ieeexplore.ieee.org/
+xpl/login.jsp?tp=&arnumber=1260033
+[Kaf07] L. Kaffer, “File:great male leopard in south afrikajd.jpg,”
+ Wikipedia Commons, Jul. 2007. [Online].
+Available: https://commons.wikimedia.org/wiki/File:
+Great_male_Leopard_in_South_Afrika-JD.JPG
+[KKV+
+14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen,
+J. Pietilä, H. Kälviäinen, and H. Uusitalo,
+“Diaretdb1 diabetic retinopathy database and
+evaluation protocol,” 2014. [Online]. Available:
+http://www2.it.lut.fi/project/imageret/diaretdb1/
+[KP92] J. M. Kasson and W. Plouffe, “An analysis of
+selected computer interchange color spaces,” ACM
+Transactions on Graphics (TOG), vol. 11, no. 4, pp.
+373–405, 1992.
+[KP06] Z. Kato and T.-C. Pong, “A markov random
+field image segmentation model for color
+textured images,” Image and Vision Computing,
+vol. 24, no. 10, pp. 1103–1114, 2006. [Online].
+Available: http://www.sciencedirect.com/science/
+article/pii/S0262885606001223
+[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton,
+“Imagenet classification with deep convolutional
+neural networks,” in Advances in neural information
+processing systems, 2012, pp. 1097–1105.
+[KWT88] M. Kass, A. Witkin, and D. Terzopoulos,
+“Snakes: Active contour models,” International
+journal of computer vision, vol. 1, no. 4, pp.
+321–331, Jan. 1988. [Online]. Available: http:
+//link.springer.com/article/10.1007/BF00133570
+[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson,
+“CS231n: Convolutional neural networks for
+visual recognition,” 2015. [Online]. Available:
+http://cs231n.stanford.edu/
+[Low04] D. Lowe, “Distinctive image features from scale- invariant keypoints,” International Journal of
+Computer Vision, vol. 60, no. 2, pp. 91–110, 2004.
+[Online]. Available: http://dx.doi.org/10.1023/B%
+3AVISI.0000029664.99615.94
+[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski,
+“Spectral matting,” Pattern Analysis and
+Machine Intelligence, IEEE Transactions on,
+vol. 30, no. 10, pp. 1699–1712, 2008.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4547428
+[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr,
+“Associative hierarchical crfs for object class image
+segmentation,” in Computer Vision, 2009 IEEE 12th
+International Conference on, 2009, pp. 739–746.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5459248
+[LSD14]
+ J. Long, E. Shelhamer, and T. Darrell, “Fully
+convolutional networks for semantic segmentation,”
+arXiv preprint arXiv:1411.4038, 2014. [Online].
+Available: http://arxiv.org/abs/1411.4038
+[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and
+J. Malik, “Using contours to detect and localize
+junctions in natural images,” in Computer Vision
+and Pattern Recognition, 2008. CVPR 2008.
+IEEE Conference on, June 2008, pp. 1–8.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587420
+[Man12] M. Manske, “File:randabschattung mikroskop
+kamera 6.jpg,” Wikipedia Commons,
+ Dec. 2012. [Online]. Available:
+ https://commons.wikimedia.org/wiki/File:
+Randabschattung_Mikroskop_Kamera_6.JPG
+[MBLAGJ+
+07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez,
+ H. Gomez-Moreno, and F. LopezFerreras,
+ “Road-sign detection and recognition
+based on support vector machines,” Intelligent
+Transportation Systems, IEEE Transactions on,
+vol. 8, no. 2, pp. 264–278, Jun. 2007.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4220659
+[MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig,
+“Automatic brain and tumor segmentation,” in Medical
+ Image Computing and Computer-Assisted Intervention—MICCAI
+ 2002. Springer, 2002, pp.
+372–379.
+[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik,
+“A database of human segmented natural
+images and its application to evaluating
+segmentation algorithms and measuring ecological
+statistics,” in Computer Vision, 2001. ICCV
+2001. Proceedings. Eighth IEEE International
+Conference on, vol. 2. IEEE, 2001, pp. 416–423.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=937655
+[MHMK+
+14] L. Maier-Hein, S. Mersmann, D. Kondermann,
+S. Bodenstedt, A. Sanchez, C. Stock, H. G.
+Kenngott, M. Eisenmann, and S. Speidel, “Can
+masses of non-experts train highly accurate
+image classifiers?” in Medical Image Computing
+and Computer-Assisted Intervention–MICCAI 2014.
+Springer, 2014, pp. 438–445. [Online]. Available:
+http://opencas.webarchiv.kit.edu/?q=node/26
+[Min89] J. Mingers, “An empirical comparison of selection
+measures for decision-tree induction,” Machine
+Learning, vol. 3, no. 4, pp. 319–342, 1989.
+[Online]. Available: http://dx.doi.org/10.1023/A%
+3A1022645801436
+[MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson,
+“Markov random field models for supervised land
+
+cover classification from very high resolution
+multispectral remote sensing images,” in Advances
+in Radar and Remote Sensing (TyWRRS), 2012
+Tyrrhenian Workshop on. IEEE, 2012, pp. 235–
+242. [Online]. Available: http://ieeexplore.ieee.org/
+xpl/login.jsp?tp=&arnumber=6381135
+[MSC] “Object class recognition image database.”
+[Online]. Available: http://research.microsoft.com/
+vision/cambridge/recognition/
+[MSR] “Image understanding - research data,”
+Microsoft Research. [Online]. Available:
+ http://research.microsoft.com/en-us/projects/
+objectclassrecognition/
+[Mur12] K. P. Murphy, Machine learning: a probabilistic
+perspective. MIT press, 2012.
+[OKS78] Y.-i. Ohta, T. Kanade, and T. Sakai, “An analysis
+system for scenes containing objects with substructures,”
+ in Proceedings of the Fourth International
+Joint Conference on Pattern Recognitions, 1978, pp.
+752–754.
+[PAA+
+87] S. M. Pizer, E. P. Amburn, J. D. Austin,
+R. Cromartie, A. Geselowitz, T. Greer, B. ter
+Haar Romeny, J. B. Zimmerman, and K. Zuiderveld,
+“Adaptive histogram equalization and its variations,”
+Computer vision, graphics, and image processing,
+vol. 39, no. 3, pp. 355–368, 1987. [Online].
+Available: http://www.sciencedirect.com/science/
+article/pii/S0734189X8780186X
+[PC13] P. H. Pinheiro and R. Collobert, “Recurrent
+convolutional neural networks for scene parsing,”
+arXiv preprint arXiv:1306.2795, 2013. [Online].
+Available: http://arxiv.org/abs/1306.2795v1
+[PH05] C. Pantofaru and M. Hebert, “A
+comparison of image segmentation algorithms,”
+Robotics Institute, p. 336, 2005. [Online].
+Available: http://riweb-backend.ri.cmu.edu/
+pub_files/pub4/pantofaru_caroline_2005_1/
+pantofaru_caroline_2005_1.pdf
+[PS07] A. Protiere and G. Sapiro, “Interactive
+image segmentation via adaptive weighted
+distances,” Image Processing, IEEE Transactions
+on, vol. 16, no. 4, pp. 1046–1057, 2007.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4130436
+[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multiclass
+ image segmentation using conditional random
+fields and global classification,” in Proceedings
+of the 26th Annual International Conference on
+Machine Learning. ACM, 2009, pp. 817–824.
+[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A
+survey of current methods in medical image
+segmentation,” Annual Review of Biomedical
+Engineering, vol. 2, no. 1, pp. 315–337, 2000,
+pMID: 11701515. [Online]. Available: http://
+dx.doi.org/10.1146/annurev.bioeng.2.1.315
+[Qui86] J. R. Quinlan, “Induction of decision trees,”
+Machine learning, vol. 1, no. 1, pp. 81–106,
+Aug. 1986. [Online]. Available: http://dx.doi.org/
+10.1023/A%3A1022643204877
+[Qui93] ——, C4.5: Programs for Machine Learning, P. Langley,
+ Ed. Morgan Kaufmann Publishers, Inc., 1993.
+[RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut:
+Interactive foreground extraction using iterated
+graph cuts,” ACM Transactions on Graphics
+(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online].
+Available: http://delivery.acm.org/10.1145/1020000/
+1015720/p309- rother.pdf
+[RM00] J. B. Roerdink and A. Meijster, “The watershed
+transform: Definitions, algorithms and paralleliza- tion strategies,” Fundam. Inform., vol. 41, no. 1-2,
+pp. 187–228, 2000.
+[RM07] J. Reynolds and K. Murphy, “Figure-ground
+segmentation using a hierarchical conditional
+random field,” in Computer and Robot
+Vision, 2007. CRV ’07. Fourth Canadian
+Conference on, May 2007, pp. 175–182.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4228537
+[RMBK06]
+ C. Rother, T. Minka, A. Blake, and V. Kolmogorov,
+“Cosegmentation of image pairs by histogram
+matching - incorporating a global constraint
+into mrfs,” in Computer Vision and Pattern
+Recognition, 2006 IEEE Computer Society
+Conference on, vol. 1, June 2006, pp. 993–
+1000. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=1640859
+[SAN+
+04] J. Staal, M. D. Abràmoff, M. Niemeijer,
+M. Viergever, B. Van Ginneken et al., “Ridge-based
+vessel segmentation in color images of the retina,”
+Medical Imaging, IEEE Transactions on, vol. 23,
+no. 4, pp. 501–509, 2004. [Online]. Available:
+http://www.isi.uu.nl/Research/Databases/DRIVE/
+[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman,
+“Object class segmentation using random
+forests.” in BMVC, 2008, pp. 1–10. [Online].
+ Available: http://research.microsoft.com/pubs/
+72423/Criminisi_bmvc2008.pdf
+[SJC08]
+ J. Shotton, M. Johnson, and R. Cipolla,
+“Semantic texton forests for image categorization
+and segmentation,” in Computer vision and
+pattern recognition, 2008. CVPR 2008. IEEE
+Conference on. IEEE, Jun. 2008, pp. 1–8.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587503
+[SM11] C. Sutton and A. McCallum, “An introduction
+to conditional random fields,” Machine Learning,
+vol. 4, no. 4, pp. 267–373, 2011. [Online].
+Available: http://homepages.inf.ed.ac.uk/csutton/
+publications/crftutv2.pdf
+[Smi02] L. I. Smith, “A tutorial on principal components
+analysis,” Cornell University, USA, vol. 51, p. 52,
+2002.
+[Smi04] B. T. Smith, “Lagrange multipliers tutorial in the
+context of support vector machines,” Memorial University
+ of Newfoundland St. John’s, Newfoundland,
+Canada, Jun. 2004.
+[SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery,
+segmentation and reactive grasping of unknown
+objects.” in Humanoids, 2012, pp. 71–77. [Online].
+ Available: http://h2t.anthropomatik.kit.edu/
+pdf/Schiebener2012.pdf
+[SUM+
+11] D. Schiebener, A. Ude, J. Morimotot,
+T. Asfour, and R. Dillmann, “Segmentation
+and learning of unknown objects through physical
+interaction,” in Humanoid Robots (Humanoids),
+2011 11th IEEE-RAS International Conference
+on. IEEE, 2011, pp. 500–506. [Online].
+Available: http://ieeexplore.ieee.org/ielx5/6086637/
+6100798/06100843.pdf
+[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi,
+“Textonboost: Joint appearance, shape and context
+modeling for multi-class object recognition and
+segmentation,” in Computer Vision–ECCV 2006.
+Springer, 2006, pp. 1–15. [Online]. Available: http:
+//link.springer.com/chapter/10.1007/11744023_1
+[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik,
+“Scene parsing with object instances and
+occlusion ordering,” in Computer Vision and
+
+Pattern Recognition (CVPR), 2014 IEEE
+Conference on. IEEE, 2014, pp. 3748–3755.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=6909874
+[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert,
+“A measure for objective evaluation of
+image segmentation algorithms,” in Computer
+Vision and Pattern Recognition-Workshops, 2005.
+CVPR Workshops. IEEE Computer Society
+Conference on. IEEE, 2005, pp. 34–34.
+[Online]. Available: http://repository.cmu.edu/cgi/
+viewcontent.cgi?article=1365&context=robotics
+[vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J.
+van den Herik, “Dimensionality reduction: A comparative
+ review,” Journal of Machine Learning
+Research, vol. 10, no. 1-41, pp. 66–71, 2009.
+[VOC10] “Voc2010 preliminary results,” 2010. [Online].
+Available: http://host.robots.ox.ac.uk/pascal/VOC/
+voc2010/results/index.html
+[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic
+tracking of laparoscopic instruments by color
+coding,” in CVRMed-MRCAS’97, ser. Lecture
+Notes in Computer Science, J. Troccaz, E. Grimson,
+and R. Mösges, Eds. Springer Berlin Heidelberg,
+1997, vol. 1205, pp. 357–366. [Online]. Available:
+http://dx.doi.org/10.1007/BFb0029257
+[YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell
+segmentation in microscopy imagery using a
+bag of local bayesian classifiers,” in Biomedical
+Imaging: From Nano to Macro, 2010 IEEE
+International Symposium on, Apr. 2010, pp. 125–
+128. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=5490399
+[YHRF12] Y. Yang, S. Hallman, D. Ramanan, and
+C. C. Fowlkes, “Layered object models for
+image segmentation,” Pattern Analysis and
+Machine Intelligence, IEEE Transactions on,
+vol. 34, no. 9, pp. 1731–1743, Sep. 2012.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=6042883
+[ZBS01] Y. Zhang, M. Brady, and S. Smith, “Segmentation
+of brain MR images through a hidden Markov
+random field model and the expectationmaximization
+ algorithm,” Medical Imaging, IEEE
+Transactions on, vol. 20, no. 1, pp. 45–57, 2001.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=906424
+[ZGWX05]
+ S.-C. Zhu, C.-E. Guo, Y. Wang, and Z. Xu, “What
+are textons?” International Journal of Computer
+Vision, vol. 62, no. 1-2, pp. 121–143, 2005.
+[Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,”
+MultiMedia, IEEE, vol. 19, no. 2, pp. 4–10, Feb.
+2012.
+[ZJRP+
+15] S. Zheng, S. Jayasumana, B. Romera-Paredes,
+V. Vineet, Z. Su, D. Du, C. Huang, and
+P. H. Torr, “Conditional random fields as
+recurrent neural networks,” in Proceedings
+of the IEEE International Conference on
+Computer Vision, 2015, pp. 1529–1537. [Online].
+Available: http://www.robots.ox.ac.uk/~szheng/
+papers/CRFasRNN.pdf GLOSSARY
+ACM active contour model. 6
+BOV bag-of-visual-words. 5
+CNN Convolution Neuronal Network. 5, 9
+CRF Conditional Random Field. 4, 8, 9, 11
+GPU graphics processing unit. 3
+HOG histogram of oriented gradients. 5, 6, 8
+ILSVRC ImageNet Large-Scale Visual Recognition
+Challenge. 9
+MAP Maximum A Posteriori. 8
+MR magnetic resonance. 2, 6
+MRF Markov Random Field. 4, 8
+PCA principal component analysis. 5
+RBF radial basis function. 8
+SIFT scale-invariant feature transform. 5
+SVM Support Vector Machine. 4, 6–8
+
+APPENDIX A
+TABLES
+Database Image Resolution (width × height) Number
+of
+Images Number
+of
+Classes Channels Data source
+Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS]
+DIARETDB1  1500 px × 1500 px 89 4 3 [KKV+
+14]
+KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13]
+MSRCv1  (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR]
+MSRCv2  (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR]
+Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK+
+14]
+PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW+
+12]
+Warwick-QU  (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09]
+Table I: An overview over publicly available image databases with a semantic segmentation ground trouth.
\ No newline at end of file
diff --git a/read/results/playa/1707.09725.txt b/read/results/playa/1707.09725.txt
new file mode 100644
index 0000000..d1597f2
--- /dev/null
+++ b/read/results/playa/1707.09725.txt
@@ -0,0 +1,4286 @@
+Analysis and Optimization of
+Convolutional Neural Network
+Architectures
+Master Thesis of
+Mar tin Thoma
+Depar tment of Computer Science
+Institute for Anthropomatics
+and
+FZI Research Center for Information Technology
+Reviewer: Prof. Dr.–Ing. R. Dillmann
+Second reviewer: Prof. Dr.–Ing. J. M. Zöllner
+Advisor: Dipl.–Inform. Michael Weber
+Research Period: 03. May 2017 – 03. August 2017
+KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association
+ www.kit.eduarXiv:1707.09725v1  [cs.CV]  31 Jul 2017
+
+Analysis and Optimization of Convolutional Neural
+Network Architectures
+by
+Mar tin Thoma
+Master Thesis
+August 2017
+Master Thesis, FZI
+Department of Computer Science, 2017
+Gutachter: Prof. Dr.–Ing. R. Dillmann, Prof. Dr.–Ing. J. M. Zöllner
+Abteilung Technisch Kognitive Assistenzsysteme
+FZI Research Center for Information Technology
+Affirmation
+Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfsmittel
+ vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus
+Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde.
+Karlsruhe,  Martin Thoma
+August 2017
+ 
+
+Abstract
+Convolutional Neural Networks (CNNs) dominate various computer vision tasks since
+Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error
+from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many
+aspects of CNNs are examined in various publications, but literature about the analysis
+and construction of neural network architectures is rare. This work is one step to close this
+gap. A comprehensive overview over existing techniques for CNN analysis and topology
+construction is provided. A novel way to visualize classification errors with confusion
+matrices was developed. Based on this method, hierarchical classifiers are described and
+evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For
+example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation
+and test-time transformations on the accuracy. Other results, such as the positive impact of
+learned color transformation on the test accuracy could not be confirmed. A model which
+has only one million learned parameters for an input size of 32 × 32 × 3 and 100 classes and
+which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and
+STL-10 was developed.
+ 
+Zusammenfassung
+Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen
+Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese
+effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual
+recognition challenge Benchmark von 26.2 % auf 15.3 % drücken konnte. Viele Aspekte
+von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleichsweise
+ wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen
+geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine
+umfassende Überblick über Analyseverfahren und Topologielernverfahren wird gegeben. Ein
+neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde
+entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt
+und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtungen
+ wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der
+Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die Invarianzbildung
+ durch künstliche Transformationen zur Test-Zeit (Test-time transformations)
+experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss
+gelernter Farbraumtransformationen konnten nicht bestätigt werden. Ein Modell welches
+weniger als eine Millionen Parameter nutzt und auf den Benchmark-Datensätzen Asirra,
+GTSRB, HASYv2 und STL-10 den Stand der Technik neu definiert wurde entwickelt.
+Acknowledgment
+I would like to thank Stephan Gocht and Marvin Teichmann for the many inspiring
+conversations we had about various topics, including machine learning.
+I also want to thank my father for the support he gave me. He made it possible for me to
+study without having to worry about anything besides my studies. Thank you!
+Finally, I want to thank Timothy Gebhard, Daniel Schütz and Yang Zhang for proof-reading
+my masters thesis and Stephan Gocht for giving me access to a GTX 1070.
+ 
+This work can be cited the following way:
+@MastersThesis{Thoma:2017,
+Title = {Analysis and Optimization of Convolutional Neural Network
+Architectures},
+Author = {Martin Thoma},
+School = {Karlsruhe Institute of Technology},
+Year = {2017},
+Address = {Karlsruhe, Germany},
+Month = jun,
+Type = {Masters’s Thesis},
+Keywords = {machine learning; artificial neural networks;
+classification; supervised learning; CNNs},
+Url = {https://martin-thoma.com/msthesis/}
+}
+A DVD with a digital version of this master thesis and the source code as well as the used
+data is part of this work.
+Contents
+1 Introduction 1
+2 Convolutional Neural Networks 3
+2.1 Linear Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
+2.2 CNN Layer Types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
+2.2.1 Convolutional Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . 5
+2.2.2 Pooling Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7
+2.2.3 Dropout . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
+2.2.4 Normalization Layers . . . . . . . . . . . . . . . . . . . . . . . . . . 9
+2.3 CNN Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+2.3.1 Residual Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+2.3.2 Aggregation Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . 12
+2.3.3 Dense Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
+2.4 Transition Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+2.5 Analysis Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15
+2.5.1 Qualitative Analysis by Example . . . . . . . . . . . . . . . . . . . . 15
+2.5.2 Confusion Matrices . . . . . . . . . . . . . . . . . . . . . . . . . . . 16
+2.5.3 Validation Curves: Accuracy, loss and other metrics . . . . . . . . . 16
+2.5.4 Learning Curves . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
+2.5.5 Input-feature based model explanations . . . . . . . . . . . . . . . . 21
+2.5.6 Argmax Method . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22
+2.5.7 Feature Map Reconstructions . . . . . . . . . . . . . . . . . . . . . . 22
+2.5.8 Filter comparison . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23
+2.5.9 Weight update tracking . . . . . . . . . . . . . . . . . . . . . . . . . 23
+2.6 Accuracy boosting techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 24
+3 Topology Learning 27
+3.1 Growing approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
+3.1.1 Cascade-Correlation . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
+3.1.2 Meiosis Networks . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
+3.1.3 Automatic Structure Optimization . . . . . . . . . . . . . . . . . . . . 29
+3.2 Pruning approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29
+3.3 Genetic approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30
+3.4 Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30
+
+3.5 Convolutional Neural Fabrics . . . . . . . . . . . . . . . . . . . . . . . . . . 31
+4 Hierarchical Classification 33
+4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34
+4.2 Clustering classes . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
+5 Experimental Evaluation 37
+5.1 Baseline Model and Training setup . . . . . . . . . . . . . . . . . . . . . . . 38
+5.1.1 Baseline Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . 40
+5.1.2 Weight distribution . . . . . . . . . . . . . . . . . . . . . . . . . . . . 41
+5.1.3 Training behavior . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 45
+5.2 Confusion Matrix Ordering . . . . . . . . . . . . . . . . . . . . . . . . . . . . 48
+5.3 Spectral Clustering vs CMO . . . . . . . . . . . . . . . . . . . . . . . . . . . 51
+5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53
+5.5 Increased width for faster learning . . . . . . . . . . . . . . . . . . . . . . . 54
+5.6 Weight updates . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 55
+5.7 Multiple narrow layers vs One wide layer . . . . . . . . . . . . . . . . . . . . 56
+5.8 Batch Normalization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 57
+5.9 Batch size . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59
+5.10 Bias . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59
+5.11 Learned Color Space Transformation . . . . . . . . . . . . . . . . . . . . . . 60
+5.12 Pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60
+5.13 Activation Functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60
+5.14 Label smoothing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64
+5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66
+5.16 Early Stopping vs More Data . . . . . . . . . . . . . . . . . . . . . . . . . . 68
+5.17 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 68
+6 Conclusion and Outlook 71
+A Figures, Tables and Algorithms 75
+B Hyperparameters 79
+B.1 Preprocessing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 79
+B.2 Data augmentation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80
+B.3 Initialization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81
+B.4 Objective function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81
+B.5 Optimization Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 82
+B.6 Network Design . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84
+B.7 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85
+C Calculating Network Characteristics 87
+C.1 Parameter Numbers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
+C.2 FLOPs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
+C.3 Memory Footprint . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88
+D Common Architectures 89
+D.1 LeNet-5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
+D.2 AlexNet . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+D.3 VGG-16 D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92
+D.4 GoogleNet, Inception v2 and v3 . . . . . . . . . . . . . . . . . . . . . . . . . 94
+D.5 Inception-v4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95
+E Datasets 97
+F List of Tables 99
+G List of Figures 101
+H Bibliography 103
+I Glossary 119
+
+. Introduction
+Computer vision is the academic field which aims to gain a high-level understanding of the
+low-level information given by raw pixels from digital images.
+Robots, search engines, self-driving cars, surveillance agencies and many others have
+applications which include one of the following six problems in computer vision as subproblems:
+
+• Classification:1
+ The algorithm is given an image and k possible classes. The task is
+to decide which of the k classes the image belongs to. For example, an image from
+a self-driving cars on-board camera contains either paved road, unpaved road or
+no road: Which of those given three classes is in the image?
+• Localization: The algorithm is given an image and one class k. The task is to find
+bounding boxes for all instances of k.
+• Detection: Given an image and k classes, find bounding boxes for all instances of
+those classes.
+• Semantic Segmentation: Given an image and k classes, classify each pixel.
+• Instance segmentation: Given an image and k classes, classify each pixel as one of
+the k classes, but distinguish different instances of the classes.
+• Content-based Image Retrieval: Given an image x and n images in a database,
+find the top u images which are most similar to x.
+There are many techniques to approach those problems, but since AlexNet [KSH12] was
+published, all of those problems have high-quality solutions which make use of Convolutional
+Neural Networks (CNNs) [HZRS15a, LAE+
+16, RFB15, DHS16, SKP15].
+Today, most neural networks are constructed by rules of thumb and gut feeling. The
+architectures evolved and got deeper, more hyperparameters were added. Although there
+are methods for analyzing CNNs, those methods are not enough to determine all steps in
+the development of network architectures without gut feeling. A detailed introduction to
+CNNs as well as nine methods for analysis of CNNs is given in Chapter 2.
+1
+Classification is also called identification if the classes are humans. Another name is object recognition,
+although the classes can be humans and animals as well.
+ 
+1. Introduction
+Despite the fact that most researchers and developers do not use topology learning, a couple
+of algorithms have been proposed for this task. Five classes of topology learning algorithms
+are introduced in Chapter 3.
+When datasets and the number of classes are large, evaluating a single idea how to improve
+the network can take several weeks just for the training. Hence the idea of building a
+hierarchy of classifiers which allows to split the classification task into various sub-tasks
+that can easily be combined is evaluated in Chapter 4.
+Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters
+and label smoothing are evaluated in Chapter 5.
+This work focuses on classification problems to keep the presented ideas as pure and
+simple as possible. The described techniques are relevant to all six described computer
+vision problems due to the fact that Encoder-Decoder architectures are one component of
+state-of-the-art algorithms for all six of them.
+
+2. Convolutional Neural Networks
+In the following, it is assumed that the reader knows what a multilayer perceptron (MLP)
+is and how they are designed for classification problems, what activation functions are and
+how gradient descent works. In case the reader needs a refresher on any of those topics, I
+recommend chapter 4.3 and 4.4 of [Tho14a] as well as [LBH15].
+This chapter introduces linear image filters in Section 2.1, then standard layer types of
+CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3,
+transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5.
+2.1. Linear Image Filters
+A linear image filter (also called a filter bank or a kernel ) is an element F ∈ Rk
+w ×k
+h×d
+,
+where k
+w represents the filter’s width, k
+h the filter’s height and d the number of input
+channels. The filter F is convolved with the image I ∈ Rw×h×d
+ to produce a new image I
+.
+The output image I
+ has only one channel. Each pixel I
+(x, y) of the output image gets
+calculated by point-wise multiplication of one filter element with one element of the original
+image I :
+ I
+(x, y) =  k
+w
+2
+
+i
+x=1− k
+w
+2   k
+h
+2
+
+i
+y =1− k
+h
+2  d
+
+i
+c=1 I (x + i
+x, y + i
+y , i
+c) · F (i
+x, i
+y , i
+c)
+This procedure is explained by Figure 2.1. It is essentially a discrete convolution.
+I ∈ R7×7
+ Filter kernel
+F ∈ R3×3 Result of point-wise
+multiplication I
+ ∈ R7×7
+104 116 116 112 58 47 47
+109 97 114 116 105 110 45
+116 104 111 109 97 46 100
+101 47 109 97 115 116 101
+114 47 99 97 116 99 97
+116 99 97 116 46 112 104
+112 63 118 61 49 46 48 9 -3 -1
+-6 5 3
+2 -8 0 936 -333 -109
+-282 545 291
+94 -792 0 -4 -254 -498 -662 -849 -642 187
+-520 45 240 211 388 215 -861
+-340 559 -105 185 -138 -180 503
+-718 429 350 173 251 268 -655
+-567 -53 -75 80 571 -128 24
+-408 596 -550 368 26 976 156
+302 647 879 223 811 54 660
+Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the
+output image, k2
+ multiplications and k2
+ additions of the products have to be calculated.
+
+2. Convolutional Neural Networks
+One important detail is how boundaries are treated. There are four common ways of
+boundary treatment:
+• don’t compute: The image I
+ will be smaller than the original image. I
+ ∈
+R(w−k
+w +1)×(h−k
+h+1)×d
+3
+ , to be exact.
+• zero padding
+: The image I is padded by zeros where the filter would access elements
+which do not exist. This will result in edges being detected at the border if the border
+pixels are not black, but doesn’t need any computation.
+• nearest: Repeat the pixel which is closest to the boundary.
+• reflect: Reflect the image at the boundaries.
+Common tasks that can be done with linear filters include edge detection, corner detection,
+smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples.
+Please note that the result of a filtering operation is again an image. This means filters
+can be applied successively. While each pixel after one filtering operation with a 3 × 3
+filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3
+filters increase the area of the original image which influenced the output. The output is
+then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is
+detected by a filter is called a feature. The bigger the receptive field is, the more complex
+can features get as they are able to consider more of the original image. Instead of taking
+one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters
+with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering
+operations compared to the two 3 × 3 filters, but the relevance of this technique will become
+clear in Section 2.2.
+2.2. CNN Layer Types
+While the idea behind deep MLPs is that feature hierarchies capture the important parts
+of the input more easily, CNNs are inspired by the idea of translational invariance : Many
+features in an image are translationally invariant. For example, if a car is developed, one
+could try to detect it by its parts [FGMR10]. But then there are many positions at which
+the wheels could be. Combining those, it is desirable to capture low-level, translationally
+invariant features at lower layers of an artificial neural network (ANN) and in higher layers
+high-level features which are combinations of the low-level features.
+Also, models should utilize the fact that the pixels of images are ordered. One way to use
+this is by learning image filters in so called convolutional layers.
+While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature
+map is a matrix m ∈ Rw×h
+, but typically the width equals the height (w = h). For an RGB
+
+2.2. CNN Layer Types
+input image, the number of feature maps is d = 3. Each color channel is a feature map.
+Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are
+state-of-the-art in various computer vision tasks.
+Traditional CNNs have three important building tools:
+• Convolutional layers with a non-linear activation function as described in Section 2.2.1,
+• pooling layers as described in Section 2.2.2 and
+• normalization layers as described in Section 2.2.4.
+2.2.1. Convolutional Layers
+Convolutional layers take several feature maps as input and produce n feature maps1
+ as
+output, where n is the number of filters in the convolution layer. The filter weights of
+the linear convolutions are the parameters which are adapted to the training data. The
+number n of filters as well as the filter’s size k
+w × k
+h are hyperparameters of convolutional
+layers. Sometimes, it is denoted as n@k
+w × k
+h. Although the filter depth is usually omitted
+in the notation, the filters are of dimension k
+w × k
+h × d(i−1)
+, where d(i−1)
+ is the number of
+feature maps of the input layer (i − 1).
+Another hyperparameter of convolution layers is the stride s ∈ N
+≥1 and the padding.
+Padding (usually zero-padding [SCL12, SEZ+
+13, HZRS15a]) is used to make sure that the
+size of the feature maps doesn’t change.
+The hyperparameters of convolutional layers are
+• the number of filters n ∈ N
+≥1,
+• k
+w , k
+h ∈ N
+≥1 of the filter size k
+w × k
+h × d(i−1)
+,
+• the activation function of the layer (see Table B.3) and
+• the stride s ∈ N
+≥1
+Typical choices are n ∈ { 32, 64, 128 }, k
+w = k
+h = k ∈ { 1, 3, 5, 11 } such as in [KSH12,
+SZ14, SLJ+
+15], rectified linear unit (ReLU) activation and s = 1.
+The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+
+89].
+With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just
+like MLPs. In fact, every CNN has an equivalent MLP which computes the same function
+if only the flattened output is compared.
+1
+also called activation maps or channels
+ 
+2. Convolutional Neural Networks
+This is easier to see when the filtering operation is denoted formally:
+o(i)
+(x) = b + k
+
+j=1 w
+ij · x
+j with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1]
+o(x,y,z)
+(I ) = b +  k
+w
+2
+
+i
+x=1− k
+w
+2   k
+h
+2
+
+i
+y =1− k
+h
+2  d
+
+i
+c=1 F
+z (i
+x, i
+y , i
+c) · I (x + i
+x, y + i
+y , i
+c) [2.2]
+with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d }
+One can see that most weights of the equivalent MLP are zero and many weights are
+equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters.
+The effect of fewer parameters is that less training data is necessary to get suitable
+estimations for those. This means a MLP which is able to compute the same functions as a
+CNN will likely have worse results on the same dataset, if a CNN architecture is suitable
+for the dataset.
+See Figure 2.2 for a visualization of the application of a convolutional layer.
+3 feature maps
+(e.g. RGB) n feature mapsn filters of
+size k × k × 3
+width w
+ width w
+heighth
+ heighthneural
+network
+data  apply
+ . . .
+. . .
+. . .. . .
+. . .
+. . .
+Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride
+s = 1 to input data of size width × height with three channels.
+
+2.2. CNN Layer Types
+A convolutional layer with n filters of size k
+w × k
+h and SAME padding after d(i−1)
+ feature
+maps of size s
+x × s
+y has n · d(i−1)
+ · (k
+w · k
+h) parameters if no bias is used. In contrast, a fully
+connected layer which produces the same output size and does not use a bias would have
+n · d(i−1)
+ · (s
+x × s
+y )2
+ parameters. This means a convolutional layer has drastically fewer
+parameters. One the one hand, this means it can learn less complex decision boundaries. On
+the other hand, it means fewer parameters have to be learned and hence the optimization
+procedure needs fewer examples and the optimization ob jective is simpler.
+It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does
+learn a linear combination of the d input feature maps. This can be used for dimensionality
+reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps.
+Another insight recently got important: Every fully connected layer has an equivalent
+convolutional layer which has the same weights.2
+ This way, one can use the complete
+classification network as a very complex non-linear image filter which can be used for
+semantic segmentation.
+A fully connected layer with d ∈ N
+≥1 inputs and n ∈ N
+≥1 nodes can be interpreted as a
+convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1. This will
+produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs.
+When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize
+to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output,
+it is completely equivalent to a fully connected layer. However, the vectorization can be
+omitted if a convolution layer without padding and a filter size equal to the feature maps
+size is applied. This was used by [LSD15].
+2.2.2. Pooling Layers
+Pooling summarizes a p × p area of the input feature map. Just like convolutional layers,
+pooling can be used with a stride of s ∈ N
+>1. As s ≥ 2 is the usual choice, pooling layers
+are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as
+for AlexNet [KSH12] and VGG-16 [SZ14].
+The type of summary for the set of activations A varies between the functions listed
+in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling
+functions as introduced in [LGT16].
+2
+But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1
+
+2. Convolutional Neural Networks
+Name Definition Used by
+Max pooling max { a ∈ A } [BPL10, KSH12]
+Average / mean pooling 1
+|A|
+a∈A a LeNet-5 [LBBH98] and [KSlB+
+10]
+
+2 pooling
+
+a∈A a2
+ [Le13]
+Stochastic pooling * [ZF13]
+Table 2.1.: Pooling types for a set A of activations a ∈ R.
+(*) For stochastic pooling, each of the p × p activation values a
+i in the pooling region gets
+picked with probability p
+i = a
+i
+
+a
+j ∈A a
+j . This assumes the activations a
+i are non-negative.
+Pooling is applied for three reasons: To get local translational invariance, to get invariance
+against minor local changes and, most important, for data reduction to 1
+s2 th of the data by
+using strides of s > 1.
+See Figure 2.3 for a visualization of max pooling.
+7 9 3 5 9 40 7 0 0 9 05 0 9 3 7 59 2 9 6 4 3
+ 2 × 2 max pooling
+ 9 5 99 9 72 2
+Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with stride s = 2 and padding.
+Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If
+the input of the pooling layer are d(i−1)
+ feature maps, the convolutional layer has to have
+d(i−1)
+ filters of size p × p and stride s. The ith filter has the values
+
+
+
+ 1
+p2 . . . 1
+p2
+.
+.
+. .
+ .
+ . .
+.
+.
+1
+p2 . . . 1
+p2 
+
+
+
+for the dimension i and the zero matrix
+
+
+
+0 . . . 0
+.
+.
+. .
+ .
+ . .
+.
+.
+0 . . . 0
+
+
+
+for all other dimensions i = 1, . . . , d(i−1)
+.
+
+2.2. CNN Layer Types
+2.2.3. Dropout
+Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting
+the output of any neuron to zero with probability p. It was introduced in [HSK+
+12] and is
+well-described in [SHK+
+14].
+A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of
+the same shape D ∈ { 0, 1 }s
+ is sampled, where each element d
+i is sampled independently
+from a Bernoulli distribution. The results are element-wise multiplied to calculate the
+output out of the Dropout layer:
+out = D  in with d
+i ∼ B (1, p)
+where  is the Hadamard product
+(A  B )
+i,j := (A)
+i,j (B )
+i,j
+Hence every value of the input gets set to zero with a dropout probability of p. Typically,
+Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout probability
+ than later layers. In order to keep the expected output at the same value, the
+output of a dropout layer is multiplied with 1
+1−p when dropout is enabled [Las17, tf-16b].
+At inference time, dropout is disabled.
+Dropout is usually only applied after fully connected layers, but not after convolutional
+layers as it usually increases the test error as pointed out in [GG16].
+Models which use Dropout can be interpreted as an ensemble of models with different
+numbers of neurons in each layer, but also with weight sharing.
+Conceptually similar are DropConnect and networks with stochastic depth. DropConnect
+ [WZZ+
+13] is a generalization of Dropout, which sets weights to zero in contrast to
+setting the output of a neuron to zero. Networks with stochastic depth as introduced
+in [HSL+
+16] dropout only complete layers. This can be done by having Residual networks
+which have one identity connection and one residual feature connection. Hence the residual
+features can be dropped out and the identity connection remains.
+2.2.4. Normalization Layers
+One problem when training deep neural networks is internal covariate shift : While the
+parameters of layers close to the output are adapted to some input produced by lower layers,
+those lower layers parameters are also adapted. This leads to the parameters in the upper
+layers being worse. A very low learning rate has to be chosen to adjust for the fact that the
+input features might drastically change over time.
+ 
+2. Convolutional Neural Networks
+One way to approach this problem is by normalizing mini-batches as described in [IS15]. A
+Batch Normalization layer with d-dimensional input x = (x(1)
+, . . . , x(d)
+) is first normalized
+point-wise to
+ ˆx(k)
+ = x(k)
+ − ¯x(k)
+
+s
+[x(k)
+]2
+ + ε
+with ¯x(k)
+ = 1
+m
+m
+i=1 x(k)
+i being the sample mean and s
+[x(k)
+]2
+ = 1
+m
+m
+i=1(x(k)
+i − ¯x(k)
+) the
+sample variance where m ∈ N
+≥1 is the number of training samples per mini-batch, ε > 0
+being a small constant to prevent division by zero and x(k)
+i is the activation of neuron k for
+training sample i.
+Additionally, for each activation x(k)
+ two parameters γ (k)
+, β (k)
+ are introduced which scale
+and shift the feature:
+ y(k)
+ = γ (k)
+ · ˆx(k)
+ + β (k)
+In the case of fully connected layers, this is applied to the activation, before the non-linearity
+is applied. If it is applied after the activation, it harms the training in early stages. For
+convolution, only one γ and one β is learned per feature map.
+One important special case is γ (k)
+ =
+s
+[x(k)
+]2
+ + ε and β (k)
+ = ¯x(k)
+, which would make the
+Batch Normalization layer an identity layer.
+During evaluation time,3
+ the expected value and the variance are calculated once for the
+complete dataset. An unbiased estimate of the empirical variance is used.
+The question where Batch Normalization layers (BN) should be applied and for which
+reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the
+activation function. Considering this, the possible options for the order are:
+1. CONV / FC → BN → activation function → Dropout → . . .
+2. CONV / FC → activation function → BN → Dropout → . . .
+3. CONV / FC → activation function → Dropout → BN → . . .
+4. CONV / FC → Dropout → BN → activation function → . . .
+The authors of [IS15] suggest to use Batch Normalization before the activation function
+as in Items 1 and 4. Batch Normalization after the activation lead to better results in
+https://github.com/ducha- aiki/caffenet-benchmark/blob/master/batchnorm.md
+Another normalization layer is Local Response Normalization as described in [KSH12],
+which includes
+2 normalization as described in [WWQ13]. Those two normalization layers,
+however, are superseded by Batch Normalization.
+3
+also called inference time
+
+2.3. CNN Blocks
+2.3. CNN Blocks
+This section describes more complex building blocks than simple layers. CNN blocks act
+similar to a layer, but they are themselves composed of layers.
+2.3.1. Residual Blocks
+Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They
+enabled the computer vision community to go from about 16 layers as in VGG 16-D (see
+Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets)
+as introduced in [HZRS15a] is to add an identity connection which skips two layers. This
+identity connection adds the feature maps onto the other feature maps and thus requires
+the output of the input layer of the residual block to be of the same dimension as last layer
+of the residual block.
+Formally, it can be described as follows. If x
+i are the feature maps after layer i and x
+0 is
+the input image, H is a non-linear transformation of feature maps, then
+y = H (x)
+describes a traditional CNN. Note that this could be multiple layers. A residual block as
+visualized in Figure 2.4 is described by
+y = H (x) + x
+In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if
+conv
+i(x
+i) describes the application of the convolutional layer i to the input x
+i without the
+nonlinearity, then such a residual block is
+x
+i+2 = conv
+ i+1(ReLU(conv
+ i(x
+i))) + x
+i
+Figure 2.4.: ResNet module
+Image source: [HZRS15a]
+[HM16] provides some insights why deep residual networks are successful.
+ 
+2. Convolutional Neural Networks
+2.3.2. Aggregation Blocks
+Two common ways to add more parameters to neural networks are increasing their depth
+by adding more layers or increasing their width by adding more neurons / filters. Inception
+blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+
+16] as
+“ResNeXt block”: Increasing the cardinality C ∈ N
+≥1. By cardinality, the authors describe
+the concept of having C small convolutional networks with the same topology but different
+weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not
+combine aggregation blocks with residual blocks as the authors did.
+256-d in
+concatenate total 32
+groups
+. . .
+128-d out4 @ 1 × 1 × 256
+4 @ 3 × 3 × 4 4 @ 1 × 1 × 256
+4 @ 3 × 3 × 4 4 @ 1 × 1 × 256
+4 @ 3 × 3 × 4
+Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer
+convolutional network. The first layer receives 256 feature maps and applies four 1 × 1
+filters to it. The second layer applies four 3 × 3 filters. Although every group has
+the same topology, the learned weights are different. The outputs of the groups are
+concatenated.
+The hyperparameters of an aggregation block are:
+• The topology of the group members.
+• The cardinality C ∈ N
+≥1. Note that a cardinality of C = 1 is equivalent in every
+aspect to using the group network without an aggregation block.
+
+2.3. CNN Blocks
+2.3.3. Dense Blocks
+Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The
+idea is to connect each convolutional layer directly to subsequent convolutional layers.
+Traditional CNNs with L layers and one input layer have L connections between layers,
+but dense blocks have L(L+1)
+2 connections between layers. The input feature maps are
+concatenated in depth. According to the authors, this prevents features from being relearned
+ and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16
+have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors
+used only on the order of 12 feature maps per layer.
+A dense block is visualized in Figure 2.6.
+ 256-d in
+k @ 3 × 3
+concatenate
+k @ 3 × 3
+concatenate256-d
+k-d
+(256 + k)-d
+k-d
+(256 + L · k)-d out
+Figure 2.6.: Dense block with L = 2 layers and a growth factor of k.
+Dense block have five hyperparameters:
+• The activation function being used. The authors use ReLU.
+• The size k
+w × k
+h of filters. The authors use k
+w = k
+h = 3.
+• The number of layers L, where L = 2 is a simple convolutional layer.
+• The number k of filters added per layer (called growth rate in the paper)
+It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps.
+
+2. Convolutional Neural Networks
+2.4. Transition Layers
+Transition layers are used to overcome constraints imposed by resource limitations or
+architectural design choices. One constraint is the number of feature maps (see Appendix C.3
+for details). In order to reduce the number of feature maps while still keeping as much
+relevant information as possible in the network, a convolutional layer i with k
+i filters of
+the shape 1 × 1 × k
+i−1 is added. The number of filters k
+i directly controls the number of
+generated feature maps.
+In order to reduce the dimensionality (width and height) of the feature maps, one typically
+applies pooling.
+Global pooling is another type of transition layer. It applies pooling over the complete
+feature map size to shrink the input to a constant 1 × 1 feature map and hence allows one
+network to have different input sizes.
+
+2.5. Analysis Techniques
+2.5. Analysis Techniques
+CNNs have dozens of hyperparameters and ways to tune them. Although there are
+automatic methods like random search [BB12], grid search [LBOM98], gradient-based
+hyperparameter optimization [MDA15] and Hyperband [LJD+
+16] some actions need a
+manual investigation to improve the model’s quality. For this reason, analysis techniques
+which guide developers and researchers to the important hyperparameters are necessary. In
+the following, nine diagnostic techniques are explained.
+A machine learning developer has the following choices to improve the model’s quality:
+(I1) Change the problem definition (e.g., the classes which are to be distinguished)
+(I2) Get more training data
+(I3) Clean the training data
+(I4) Change the preprocessing (see Appendix B.1)
+(I5) Augment the training data set (see Appendix B.2)
+(I6) Change the training setup (see Appendices B.3 to B.5)
+(I7) Change the model (see Appendices B.6 and B.7)
+The preprocessing is usually not changed in modern architectures. However, this still leaves
+six very different ways to improve the classifier. Changing the training setup and the model
+each have too many possible choices to explore them completely. Thus, techniques are
+necessary to guide the developer to changes which are most promising to improve the model.
+For all of the following methods, it is important to use only the training set and the
+validation set.
+2.5.1. Qualitative Analysis by Example
+The most basic analysis technique which should always be used is looking at examples
+which the network correctly predicted with a high certainty and what the classifier got
+wrong with a high certainty. Those examples can be arranged by applying t-SNE [MH08].
+One the one hand, this might reveal errors in the training data. Most of the time, training
+data is manually labeled by humans who make mistakes. If a model is fit to those errors,
+its quality decreases.
+On the other hand, this can show differences in the distribution of validation data which
+are not covered by the training set and thus indicate the need to collect more data.
+ 
+2. Convolutional Neural Networks
+2.5.2. Confusion Matrices
+A confusion matrix is a matrix (c)
+ij ∈ NK ×K
+≥0 , where K ∈ N
+≥2 is the number of classes,
+which contains all correct and wrong classifications. The item c
+ij is the number of times
+items of class i were classified as class j . This means the correct classification is on the
+diagonal c
+ii and all wrong classifications are of the diagonal. The sum
+K
+i=1
+K
+j=1 c
+ij is the
+total number of samples which were evaluated and
+i=1 c
+ii
+
+K
+i=1
+K
+j=1 c
+ij is the accuracy.
+The sums r(i) =
+K
+j=1 c
+ij of each class i are worth being investigated as they show if the
+classes are skewed. If the number of samples of one class dominates the data set, then the
+classifier can get a high accuracy by simply always prediction the most common class. If
+the accuracy of the classifier is close to the a priory probability of the most common class,
+techniques to deal with skewed classes might help.
+An automatic criterion to check for this problem is
+accuracy ≤ max({ r(i) | i = 1, . . . , k })
+
+k
+i=1 r(i) + ε
+where ε is a small value to compensate the fact that some examples might be correct just
+by chance.
+Other values which should be checked are the class-wise sensitivities:
+s(k) = # correctly identified instances of class k
+# instances of class k = c
+kk
+r(k) ∈ [0, 1]
+If s(i) is much lower than s(j ), it is an indicator that more or cleaner training data is
+necessary for s(i).
+The class-wise confusion
+ f
+confusability(k
+1, k
+2) = c
+k
+1k
+2
+
+K
+j=1 c
+k
+1j
+indicates if class k
+1 gets often classified as class k
+2. The highest values here can indicate
+if two classes should be merged or a specialized model for separating those classes could
+improve the overall system.
+2.5.3. Validation Curves: Accuracy, loss and other metrics
+Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal
+axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are
+typical quality metrics. Other quality metrics can be found in [OHIL16].
+In case that the number of training epochs are used as the examined hyperparameter,
+validation curves give an indicator if training longer improves the model’s performance. By
+
+2.5. Analysis Techniques
+plotting the error on the training set as well as the error on a validation set, one can also
+estimate if overfitting might become a problem. See Figure 2.7 for an example.
+10 20 30 40 50 60 70 80 90 1000.20.40.60.8
+ overfitting
+ EpochsError
+ Training set
+Validation set
+Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs
+and the quality metric is the error (1 − accuracy). The longer the network is trained,
+the better it gets on the training set. At some point the network is fit too well to the
+training data and loses its capability to generalize. At this point the quality curve of
+the training set and the validation set diverge. While the classifier is still improving on
+the training set, it gets worse on the validation and the test set.
+When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the optimization
+ process did not improve for several epochs. Three possible ways to reduce the
+problem of plateaus are (i) to change weight initialization if the plateau was at the beginning,
+(ii) regularizing the model or (iii) changing the optimization algorithm.
+Loss functions
+The loss function (also called error function or cost function ) is a function which assigns a
+real value to a complex event like the predicted class of a feature vector. It is used to define
+the objective function. For classification problems the loss function is typically cross-entropy
+with
+1 or
+2 regularization, as it was described in [NH92]:
+E
+C E (W ) = −
+x∈X K
+
+k=1 [tx
+k log(ox
+k ) + (1 − tx
+k ) log(1 − ox
+k )]
+
+cross-entropy data loss + λ
+1 ·
+1
+
+
+w∈W |w| +λ
+2 ·
+2
+
+
+w∈W w2
+
+model complexity loss
+where W are the weights, X is the training data set, K ∈ N
+≥0 is the number of classes and
+tx
+k indicates if the training example x is of class k. ox
+k is the output of the classification
+algorithm which depends on the weights. λ
+1, λ
+2 ∈ [0, ∞) weights the regularization and is
+typically smaller than 0.1.
+ 
+2. Convolutional Neural Networks
+Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange
+curve is smoothed, but the non-smoothed curve is also plotted in light orange.
+The data loss is positive whenever the classification is not correct, whereas the model
+complexity loss is higher for more complex models. The model complexity loss exists due
+to the intuition of Occam’s razor : If two models explain the same data with an accuracy of
+100 %, the simpler model is to be preferred.
+A reason to show the loss for the validation curve technique instead of other quality metrics
+is that it contains more information about the quality of the model. A reason against the
+loss is that it has no upper bound like the accuracy and can be hard to interpret. The
+loss only shows relative learning progress whereas the accuracy shows absolute progress to
+human readers.
+There are three observations in the loss validation curve which can help to improve the
+network:
+• If the loss does not decrease for several epochs, the learning rate might be too low.
+The optimization process might also be stuck in a local minimum.
+•
+ Loss being NAN might be due to too high learning rates. Another reason is division
+by zero or taking the logarithm of zero. In both cases, adding a small constant like
+10−7
+ fixes the problem.
+• If the loss-epoch validation curve has a plateau at the beginning, the weight initialization
+ might be bad.
+
+2.5. Analysis Techniques
+Quality criteria
+There are several quality criteria for classification models. Most quality criteria are based
+the confusion matrix c which denotes at c
+ij the number of times the real class was i and j
+was predicted. This means the diagonal contains the number of correct predictions. For
+the following, let t
+i =
+k
+j=1 c
+ij be the number of training samples for class i. The most
+common quality criterion is accuracy:
+accuracy(c) =
+k
+i=1 c
+ii
+
+k
+i=1 t
+i ∈ [0, 1]
+One problem of accuracy as a quality criterion are skewed classes. If one class is by far
+more common than all other classes, then the simplest way to achieve a high score is to
+always classify everything as the most common class.
+In order to fix this problem, one can use the mean accuracy:
+mean-accuracy(c) = 1
+k · k
+
+i=1 c
+ii
+t
+i ∈ [0, 1]
+For two-class problems there are many other metrics like precision, recall and F
+β -score.
+Quality criteria for semantic segmentation are explained in [Tho16].
+Besides the quality of the classification result, several other quality criteria are important
+in practice:
+• Speed of evaluation for new images,
+• latency,
+• power consumption,
+• robustness against (non)random perturbations in the training data (see [SZS+
+13,
+PMW+
+15]),
+• robustness against (non)random perturbations in the training labels (see [NDRT13,
+XXE12]),
+• model size
+As reducing the floating point accuracy allows to process more data on a given device [Har15],
+analysis under this aspect is also highly relevant in some scenarios.
+However, the following focuses on the quality of the classification result.
+ 
+2. Convolutional Neural Networks
+2.5.4. Learning Curves
+A learning curve is a plot where the horizontal axis displays the number of training samples
+given to the network and the vertical axis displays the error. Two curves are plotted: The
+error on the training set (of which the size is given by the horizontal axis) and the error on
+the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the
+validation set is an indicator if more training data without any other changes will improve
+the networks performance. Having the training set’s learning curve, it is possible to estimate
+if the capacity of the model to fit the data is high enough for the desired classification error.
+The error on the validation set should never be expected to be significantly lower than the
+error on the training set. If the error on the training set is too high, then more data will
+not help. Instead, the model or the training algorithm need to be adjusted.
+If the training set’s learning curve is significantly higher than the validation set’s learning
+curve, then removing features (e.g., by decreasing the images resolution), more training
+samples or more regularization will help.
+10 20 30 40 50 60 70 80 90 1000.20.40.6
+ avoidable biasvariance
+ human-level error
+ Training samplesError
+ Validation set
+Training set
+Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given
+architecture will make to fit the given training data. At the same time, it is expected
+that the training data gets more similar to the true distribution of the data which
+should be captured by the test data. At some point, the error on the training and
+test set should be about the same. The term “avoidable bias” was coined by Andrew
+Ng [Ng16]. In some cases it is not possible to classify data correctly by the given
+features. If humans can classify the data given the features correctly, however, then
+the bias is avoidable by building a better classifier.
+The ma jor drawback of this analysis technique is its computational intensity. In order to
+get one point on the training curve and one point on the testing curve, a complete training
+has to be executed. On the full data set, this can be several days on high-end computers.
+
+2.5. Analysis Techniques
+2.5.5. Input-feature based model explanations
+Understanding which clues the model took to come to its prediction is crucial to check if
+the model actually learns what the developer thinks it learns. For example, a model which
+has to distinguish sled dogs from Chihuahuas might simply look at the background and
+check if there is snow. Depending on the training and test data, this works exceptionally
+well. However, it is not the desired solution.
+For classification problems in computer vision, there are two types of visualizations which
+help to diagnose such problems. Both color superpixels of the original image to convey
+information how the model used those superpixels:
+• Correct class heatmap: The probability of the correct class is encoded to give a
+heat map which superpixels are important for the correct class. This can also be done
+by setting the opacity accordingly.
+• Most-likely class image
+: Each of the most likely classes for all superpixels is
+represented by a color. The colored image thus gives clues why different predictions
+were assigned a high probability.
+Two methods to generate such images are explained in the following.
+Occlusion Sensitivity Analysis
+Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the
+image by something. This could be a gray square as in [ZF14] or a black superpixel as
+in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g.,
+superpixel or position of the square) and the regions are then colored to generate either a
+correct class heatmap of the most-likely class image. It is important to note that the color
+at region r
+i denotes the result if r
+i is occluded.
+Both visualizations are shown in Figure 2.10. One can see that the network makes sensible
+predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan
+Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation.
+Gradient-based approaches
+In [SVZ13], a gradient-based approach was used to generate image-specific class saliency
+maps. The authors describe the problem as a ranking problem, where each pixel of the
+image I
+0 is assigned a score S
+c(I
+0) for a class c of interest. CNNs are non-linear functions,
+but they can be approximated by the first order Taylor expansion S
+c(I ) ≈ wT
+ I + b where
+w is the derivative of S
+c at I
+0.
+ 
+2. Convolutional Neural Networks
+2.5.6. Argmax Method
+The argmax method has two variants:
+• Fixed class argmax: Propagate all elements of a given class through the network
+and analyze which neurons are activated most often / have the highest activation.
+• Fixed neuron argmax: Propagate the data through the network and find the n
+data elements which cause the highest activation for a given neuron.
+Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an
+image I is calculated by applying F to I and calculating the element-wise sum of the result.
+Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides
+showing the 9 images which caused the highest activation, they also trained a deconvolutional
+neural network to pro ject the activation of the filter back into pixel space.
+The fixed neuron argmax can be used qualitatively to get an impression of the kind of
+features which are learned. This is useful to diagnose problems, for example in [AM15] it is
+described that the network recognized the class “dumbbell” only if a hand was present, too.
+Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters
+being shared between classes or how many parameters are mainly assigned to which classes.
+Going one step further from the fixed neuron argmax method is using an optimization
+algorithm to change an initial image minimally in such a way that any desired class gets
+predicted. This is called caricaturization in [MV16].
+2.5.7. Feature Map Reconstructions
+Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights
+into the learned features. This shows what the network emphasizes. However, it is not
+necessarily the case that the feature maps allow direct and easy conclusions about the
+learned features. This technique is called inversion in [MV16].
+A key idea of feature map visualizations is to reconstruct a layers input, given its activation.
+This makes it possible find which inputs would cause neurons to activate with extremely
+high or low values.
+More recent work like [NYC16] tries to make the reconstructions appearance look more
+natural.
+
+2.5. Analysis Techniques
+2.5.8. Filter comparison
+One question which might lead to some insight is how robust the features are which
+are learned. If the same network is trained with the same data, but different weight
+initializations, the learned weights should still be comparable.
+If the set of learned filters changes with initialization, this might be an indicator for too
+little capacity of that layer. Hence adding more filters to that layer could improve the
+performance.
+Filters can be compared with the k-translation correlation as introduced in [ZCZL16]:
+ρ
+k (W
+i, W
+j) = max
+(x,y)∈{−k,...,k}2
+\(0,0) W
+i, T (W
+j, x, y)
+f
+W
+i
+2 W
+j
+2 ∈ [−1, 1],
+where T (·, x, y) denotes the translation of the first operand by (x, y), with zero padding at
+the borders to keep the shape. ·, ·
+f denotes the flattened inner product, where the two
+operands are flattened into column vectors before applying the standard inner product. The
+closer the absolute value of the k-translation correlation to one, the more similar two filters
+W
+i, W
+j are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and
+VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found
+this by comparing the averaged maximum k-translational correlation of the networks with
+Gaussian-distributed initialized filters. The averaged maximum k-translational correlation
+is defined as
+ ¯ρ
+k (W) = 1
+N N
+
+i=1 N
+max
+j=1,j=i ρ
+k (W
+i, W
+j )
+where N is the number of filters in the layer W and W
+i denotes the ith filter.
+2.5.9. Weight update tracking
+Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if
+the learning rate is well-chosen. He suggests that the weight update should be in the order
+of 10−3
+. If the weight update is too high, then the learning rate has to be decreased. If the
+weight update is too low, then the learning rate has to be increased.
+The order of the weight updates as well as possible implications highly depend on the model
+and the training algorithm. See Appendix B.5 for a short overview of training algorithms
+for neural networks.
+ 
+2. Convolutional Neural Networks
+2.6. Accuracy boosting techniques
+There are techniques which can almost always be applied to improve accuracy of CNN
+classifiers:
+• Ensembles [CMS12]
+• Training-time augmentation (see Appendix B.2)
+• Test-time transformations [DDFK16, How13, HZRS15b]
+• Pre-training and fine-tuning [ZDGD14, GDDM14]
+One of the most simple ensemble techniques which was introduced in [CMS12] is averaging
+the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly
+the same training setup by reducing variance.
+Data augmentation techniques give the optimizer the possibility to take invariances like
+rotation into account by generating artificial training samples from real training samples.
+Data augmentation hence reduces bias and variance with no cost at inference time.
+Data augmentation at inference time reduces the variance of the classifier. Similar to using
+an ensemble, it increases the computational cost of inference.
+Pretraining the classifier on another dataset to obtain start from a good position or finetuning
+a model which was originally created for another task is also a common technique.
+
+2.6. Accuracy boosting techniques
+Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images,
+where a gray square occluded a part of the image. This gray squares center (x, y) was
+moved over the complete image and the classifier was run on each of the occluded
+images. The probability of the correct class, depending on the gray squares position,
+is showed in the middle column. One can see that the predicted probability of the
+correct class “Pomeranian” drops if the face of the dog is occluded. The last image
+gives the class with the highest predicted probability. In the case of the Pomeranian,
+it always predicts the correct class if the head is visible. However, if the head of the
+dog is occluded, it predicts other classes.
+ 
+2. Convolutional Neural Networks
+Figure 2.11.: Filter visualization from [ZF14]: The filters themselves as well as the input feature
+maps which caused the highest activation are displayed.
+
+3. Topology Learning
+The topology of a neural network is crucial for the number of parameters, the number
+of floating point operations (FLOPs), the required memory, as well as the features being
+learned. The choice of the topology, however, is still mainly done by trial-and-error.
+This chapter introduces three general approaches to automatic topology learning: Growing a
+networks from a minimal network in Section 3.1, pruning in Section 3.2, genetic approaches
+in Section 3.3 and reinforcement learning approaches in Section 3.4.
+3.1. Growing approaches
+Growing approaches for topology learning start with a minimal network, which only has
+the necessary number of input nodes and the number of output nodes which are determined
+by the application and the features of the input. They then apply a criterion to insert new
+layers / neurons into the network.
+In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Optimization
+ are introduced.
+3.1.1. Cascade-Correlation
+Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which
+is similar to dense block described in Section 2.3.3.
+Cascade-Correlation works as follows:
+1. Initialization: The number of input nodes and the number of output nodes are
+defined by the problem. Create a minimal, fully connected network for those.
+2. Training: Train the network until the error no longer decreases.
+3. Candidate Generation: Generate candidate nodes. Each candidate node is connected
+ to all inputs. They are not connected to other candidate nodes and not
+connected to the output nodes.
+ 
+3. Topology Learning
+4. Correlation Maximization: Train the weights of the candidates by maximizing S ,
+the correlation between candidates output value V with the networks residual error:
+S =
+o∈O
+
+
+
+
+
+p∈T
+V
+p − ¯
+V
+ (E
+p,o − ¯
+E
+o)
+
+
+
+
+
+where O is the set of output nodes, T is the training set, V
+p is the candidate neurons
+activation for a training pattern p. E
+p,o is the residual output error at node o for
+pattern p. ¯
+V
+ and ¯
+E
+o are averaged values over all elements of T . This step is finished
+when the correlation no longer increases.
+5. Candidate selection: Keep the candidate node with the highest correlation, freeze
+its incoming weights and add connections to the output nodes.
+6. Continue: If the error is higher than desired, continue with step 2.
+One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1.
+1
+Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray)
+to the left, three hidden nodes (green) in the middle and two output nodes in the upper
+right corner. The black squares represent frozen weights which are found by correlation
+maximization whereas the white squares are trainable weights.
+3.1.2. Meiosis Networks
+Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where
+weights are deterministic and fixed at prediction time, each weight w
+ij in Meiosis networks
+follows a normal distribution:
+ w
+ij ∼ N (µ
+ij , σ2
+ij )
+
+3.2. Pruning approaches
+Hence every connection has two learned parameters: µ
+ij and σ2
+ij .
+The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell
+division. A node j is splitted, when the random part dominates the value of the sampled
+weights:
+
+i σ
+ij
+
+i µ
+ij > 1 and
+k σ
+jk
+
+k µ
+jk > 1
+The mean of the new nodes is sampled around the old mean, half the variance is assigned
+to the new connections.
+Hence Meiosis networks only change the number of neurons per layer. They do not add
+layers or add skip connections.
+3.1.3. Automatic Structure Optimization
+Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of online
+ handwriting recognition. It makes use of the confusion matrix C = (c
+ij ) ∈ Nk×k
+≥0
+(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix
+S with s
+ij = s
+j i = c
+ij · c
+ji. The maximum of S defines where the ASO algorithm adds
+more parameters. The details how the resources are added are not transferable to CNNs.
+3.2. Pruning approaches
+Pruning approaches start with a network which is bigger than necessary and prune it. The
+motivation to prune a network which has the desired accuracy is to save storage for easier
+model sharing, memory for easier deployment and FLOPs to reduce inference time and
+energy consumption. Especially for embedded systems, deployment is a challenge and low
+energy consumption is important.
+Pruning generally works as follows:
+1. Train a given network until a reasonable solution is obtained,
+2. prune weights according to a pruning criterion and
+3. retrain the pruned network.
+This procedure can be repeated.
+One family of pruning criterions uses the Hessian matrix. For example, Optimal Brain
+Damage (OBD) as introduced in [LDS+
+89]. For every single parameter k, OBD calculates
+the effect on the ob jective function of deleting k. The authors call the effect of the deletion
+
+3. Topology Learning
+of parameter k the saliency s
+k . The parameters with the lowest saliency are deleted, which
+means they are set to 0 and are not updated anymore.
+A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights
+in a much better way. This requires, however, to calculate the inverse Hessian matrix
+H −1
+ ∈ Rn×n
+ where n ∈ N is typically n > 106
+.
+A much simpler and computationally cheaper pruning criterion is the weight magnitude.
+[HPTD15] prunes all weights w which are below a threshold θ:
+w ← 
+
+w if w ≥ θ
+0 otherwise
+3.3. Genetic approaches
+The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which
+can recombine themselves via crossover and inversion. An introduction to such algorithms
+is given in [ES03].
+Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its
+successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10].
+The results, however, are of unacceptable quality: On MNIST (see Appendix E), where
+random chance gives 10 % accuracy, even simple topologies trained with SGD achieve
+about 92 % accuracy [TF-16a] and state of the art is 99.79 % [WZZ+
+13], the HyperNEAT
+algorithm achieves only 23.9 % accuracy [VH13].
+Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve 96.47 % accuracy
+on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer
+so that not more than three hidden layers could be trained. At the same time, VGG19
+ [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers
+in [HZRS15a].
+[LX17] shows that Genetic algorithms can achieve competitive results on MNIST and
+SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is
+at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves 29.03 % error on CIFAR-100,
+but the state of the art is 17.18 % [HLW16].
+3.4. Reinforcement Learning
+Reinforcement learning is a sub-field of machine learning, which focuses on the question
+how to choose actions that lead to high rewards.
+
+3.5. Convolutional Neural Fabrics
+One can think of the search for good neural network topologies as a reinforcement learning
+problem. The agent is a recurrent neural network which can generate bitstrings. Those
+variable-length bitstrings encode neural network topologies.
+In 2016, this approach was applied to construct neural networks for computer vision.
+In [BGNR16], Q-learning with an ε-greedy exploration was applied.
+In [ZL16], the REINFORCE algorithm from [Wil92] was used to train state of the art models
+for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous
+amounts of computational resources were used to obtain those results.
+3.5. Convolutional Neural Fabrics
+Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions
+about topologies by learning an ensemble of different CNN architectures. The idea is to
+define a single architecture as a trellis through a 3D grid of nodes. Each node represents a
+convolutional layer. One dimension is the index of the layer, the other two dimensions are
+the amount of filters and the feature size. Each node is connected to nine other nodes and
+thus represents nine possible choices of convolutional layers:
+• Resolution
+: (i) convolution with stride=1 or (ii) convolution with stride=2 or
+(iii) deconvolution (doubling the resolution)
+• Channels: (i) half the number of filters than the layer before (ii) the same number
+of filters as the layer before (iii) double the number of filters than the layer before
+They always use ReLU as an activation function and they always use filters of size 3 × 3.
+They don’t use pooling at all.
+ 
+3. Topology Learning
+
+4. Hierarchical Classification
+Designing a classifier for a new dataset is hard for two main reasons: Many design choices are
+not clearly superior to others and evaluating one design choice takes much time. Especially
+CNNs are known to take several days [KSH12, SLJ+
+15] or even weeks [SZ14] to train.
+Additionally, some methods for analyzing a dataset become harder to use with more classes
+and more training samples. Examples are t-SNE, the manual inspection of errors and
+confusion matrices, and the argmax method.
+One idea to approach this problem is by building a hierarchy of classifiers. The root
+classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single
+classes. Figure 4.1 gives an example for an hierarchy of classifiers.
+Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle.
+The root classifier C
+0 has to distinguish six coarse classes (pedestrian, four+
+-wheelers,
+traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C
+0 predicts a
+pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C
+0
+predicts traffic sign, then another classifier has to predict if it is a speed limit, a
+sign indicating danger or something else. If C
+0, however, predicts road, then no other
+classifier will become active.
+In this example, the problem has 17 classes. The hierarchical approach introduces
+7 clusters of classes and thus uses 8 classifiers.
+Such a hierarchy of classifiers needs clusters of classes.
+ 
+4. Hierarchical Classification
+4.1. Advantages of classifier hierarchies
+Having a classifier hierarchy has five advantages:
+• Division of labor: Different teams can work together. Instead of having a monolithic
+task, the solutions can be combined.
+• Guarantees: Changing a classifier will only change the prediction of itself and its
+children. Siblings are not affected. In the example from Figure 4.1, the classifier
+which distinguishes traffic signs can be changed while the classification as pedestrian,
+four+
+-wheelers, traffic sign, street, other will not be affected. Also, the
+classification between speed limits, danger signs and other signs will not change.
+• Faster training: Except for the root classifier C
+0, each other classifier will have
+less than the total amount of training data. Depending on the combined classes, the
+models could also be simpler. Hence the training time is reduced.
+• Weighting of errors: In practice, some errors are more severe than others. For
+example, it could be acceptable if the two-wheelers classifier has an error rate of
+40 %. But it is not acceptable if the speed limit classifier has such a high error rate.
+• Post-hoc explanations: The simpler a model is, the easier it is to explain why a
+classification is made the way it is made.
+4.2. Clustering classes
+There are two ways to cluster classes: By similarity or by semantics. While semantic
+clustering needs either additional information or manual work, the similarity can be
+automatically inferred from the data. As pointed out in [XZY+
+14], semantically similar
+classes are often also visually similar. For example, in the ImageNet dataset most dogs
+are semantically and visually more similar to each other than to non-dogs. An example
+where this is obviously not the case are symbols: The summation symbol \sum is identical
+in appearance to the Greek letter \Sigma, but semantically much closer to the addition
+operator +.
+One approach to cluster classes by similarity is to train a classifier and examine its
+predictions. Each class is represented in the confusion matrix by one row. Those rows
+can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS+
+96],
+OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral
+clustering as in [XZY+
+14]. Those clusterings, however, are hard to interpret and most of
+them do not allow a human to improve the found clustering manually.
+The confusion matrix (c)
+ij ∈ Nk×k
+ states how often class i was present and class j was
+
+4.2. Clustering classes
+predicted. The more often this confusion happens, the more similar those two classes are to
+the classifier. Based on the confusion matrix, the classes can be clustered as explained in
+the following.
+[HAE16] indicates that more classes make it easier to generalize, but the accuracy gains
+diminish after a critical point of classes is reached. Hence a binary tree might not be a
+good choice. As an alternative, an approach which allows building arbitrary many clusters,
+is proposed.
+The proposed algorithm has two main ideas:
+• The order of columns and rows in the confusion matrix is arbitrary. This means one
+can swap rows and columns. If row i and j are swapped, then the columns i and j
+have to be swapped to in order to keep the same confusion matrix.
+• If two classes are confused often, then they are similar to the classifier.
+Hence the order of the classes is permutated in such a way that the highest errors are close
+to the diagonal. One possible ob jective function to be minimized is
+f (C ) = n
+
+i=1 n
+
+j=1 C
+ij · |i − j | [4.1]
+which punishes errors linearly with the distance to the diagonal. This method is called CMO
+in the following.
+As pointed out by Tobias Ribizel (personal communication), this optimization problem
+is a weighted version of Optimal Linear Arrangement problem. That problem is NPcomplete
+ [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however,
+produces reasonable clusterings as well as visually appealing confusion matrices. The
+algorithm works as follows: First, decide with probability 0.5 if only two random rows are
+swapped or a block is swapped. If two rows are swapped, choose both of them randomly.
+If a block is swapped, then choose the start randomly and the end of the block randomly
+after the start. The insert position has to be a valid position considering the block length,
+but besides that it is also chosen uniformly random.
+Simple row-swapping can exploit local improvements. For example, in the context of
+ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier
+and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters
+of dog breeds could be separated by car and bus due to random chance. Moving any single
+class increases the score, but moving either one of the dog breed clusters or the vehicle
+cluster decreases the score. Hence it is beneficial to implement block moving.
+One advantage of permutating the classes in order to minimize Equation (4.1) in comparison
+to spectral clustering as used in [XZY+
+14] is that the adjusted confusion matrix can be
+
+4. Hierarchical Classification
+split into many much smaller matrices along the diagonal. In the case of many classes (e.g.,
+1000 classes of ImageNet or 369 classes of HASYv2) this permutation makes it possible to
+visualize the types of errors made. If the errors are systematic due to visual similarity, many
+confusions are not made and thus many elements of the confusion matrix are close to 0.
+Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1).
+Once a permutation of the classes is found which has a low score Equation (4.1), the clusters
+can either be made by hand by deciding why classes should not be in one clusters. With
+such a permutation, only n − 1 binary decisions have to be made and hence only the list of
+classes has to be read. Alternatively, one can calculate the confusions C
+i,i+1 + C
+i+1,i for
+each pair of classes which are neighbors in the confusion matrix. The higher this value, the
+more similar are the classes according to the classifier. Hence a threshold θ can be applied.
+θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold)
+or semi-automatically by asking the user for information if two classes belong to the same
+cluster. Such an approach only needs log(n) binary decisions from the user where n is the
+number of classes.
+Please note that CMO only works if the classifier is neither too bad nor too good. A classifier
+which does not solve the task at all might just give almost uniform predictions whereas the
+confusion matrix of an extremely good classifier is almost diagonal and thus contains no
+information about the similarity of classes. One possible solution to this problem is to take
+the prediction of the class in contrast to using only the argmax in order to find a useful
+permutation.
+
+5. Experimental Evaluation
+All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+
+16]
+and cuDNN 5.1 [CWV+
+14] as the backend. The experiments were run on different machines
+with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce
+GTX 970 and GeForce 940MX.
+The GTSRB [SSSI12], SVHN [NWC+
+11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98],
+HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are
+used as their size is small enough to be trained within a day. Other classification datasets
+which were considered are listed in Appendix E.
+CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color
+images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer,
+dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 % [HLW16].
+According to [Kar11], human accuracy is at about 94 %.
+CIFAR-100 is a 100-class dataset of color images of the size 32 px × 32 px. Its 100 classes
+are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles
+and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain
+the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16].
+GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs.
+The 51 839 images are in color and of a minimum size of 25 px × 25 px up to 266 px × 232 px.
+The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11].
+According to [SSSI], human performance is at 98.84 %.
+HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images
+of the size 32 px × 32 px. The 369 classes contain the Latin and Greek letters, arrows,
+mathematical symbols. The state of the art achieves an accuracy of 82.00 % [Tho17a].
+STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px × 96 px.
+Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state
+of the art achieves an accuracy of 74.80 % [ZMGL15]. It contains 100 000 unlabeled images
+for unsupervised training and 500 images per class for supervised training.
+SVHN (Street View House Numbers) exists in two formats. For the following experiments,
+the cropped digit format was used. It contains the 10 digits cropped from photos of Google
+Street View. The images are in color and of size 32 px × 32 px. The state of the art
+
+5. Experimental Evaluation
+achieves an accuracy of 98.41 % [HLW16]. According to [NWC+
+11a], human performance
+is at 98.0 %.
+As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0, 1].
+For GTSRB, the training and test data was scaled to 32 px × 32 px.
+5.1. Baseline Model and Training setup
+The baseline model is trained with Adam [KB14], an initial learning rate of 10−4
+, a batch
+size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation
+depends on the dataset:
+• CIFAR-10, CIFAR-100 and STL-10: Random width and height shift by at most
+±3 pixels in either direction; Random horizontal flip.
+• GTSRB
+, MNIST: Random width and height shift by at most ±5 pixels in either
+direction; random rotation by at most ±15 degrees; random channel shift; random
+zoom in [0.5, 1.5]; random shear by at most 6 degrees.
+• HASYv2: Random width and height shift by at most ±5 pixels in either direction;
+random rotation by at most ±5 degree.
+• SVHN: No data augmentation.
+If the dataset does not define a training/test set, a stratified 67 % / 33 % split is applied. If
+the dataset does not define a validation set, the training set is split in a stratified manner
+into 90 % training set / 10 % test set.
+Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of
+10 epochs is applied. After this, the model is trained without data augmentation for at most
+1000 epochs with early stopping and the validation accuracy as a stopping criterion and a
+patience of 10 epochs. Kernel weights are initialized according to the uniform initialization
+scheme of He [HZRS15b] (see Appendix B.3).
+The architecture of the baseline model uses a pattern of
+Conv-Block(n) = (Convolution − Batch Normalization − Activation)n
+ − Pooling
+The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for
+the last layer where softmax is used. Before the last two convolutional layer, a dropout
+layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1.
+Please note that the number of input- and output channels of the network depends on
+the dataset. If the input image is larger than 32 px × 32 px, for each power of two a
+Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to
+32 px × 32 px.
+
+5.1. Baseline Model and Training setup
+# Type Filters @
+Patch size / stride Parameters FLOPs Output size
+Input 0 0 3 @ 32 × 32
+1 Convolution 32 @ 3 × 3 × 3 / 1 896 1 736 704 32 @ 32 × 32
+2 BN + ELU 64 163 904 32 @ 32 × 32
+3 Convolution 32 @ 3 × 3 × 32 / 1 9 248 18 841 600 32 @ 32 × 32
+4 BN + ELU 64 163 904 32 @ 32 × 32
+Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16
+5 Convolution 64 @ 3 × 3 × 32 / 1 18 496 9 420 800 64 @ 16 × 16
+6 BN + ELU 128 82 048 64 @ 16 × 16
+7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16
+8 BN + ELU 128 82 048 64 @ 16 × 16
+Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8
+9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8
+10 BN + ELU 128 20 608 64 @ 8 × 8
+Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4
+11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 800 1 048 064 512 @ 1 × 1
+12 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+13 Convolution 512 @ 1 × 1 × 512 / 1 262 656 523 776 512 @ 1 × 1
+14 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+15 Convolution k @ 1 × 1 × 512 / 1 k · (512 + 1) 1024 · k k @ 1 × 1
+Global avg Pooling 1 × 1 0 k k @ 1 × 1
+16 BN + Softmax  2k 7k k @ 1 × 1
+
+ 515k
++892 512 1032k
++55 729 664 103 424+2k
+Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers
+use SAME padding, except for layer 11 which used VALID padding in order to decrease
+the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for
+each power of two there are two Convolution + BN + ELU blocks and one Max pooling
+block added. This is the framed part in the table.
+32 × 32Input
+C 32@3 × 3/1
+BN + ELU
+C 32@3 × 3/1
+BN + ELU 16 × 16max pooling 2 × 2/2
+C 64@3 × 3/1
+BN + ELU
+C 64@3 × 3/1
+BN + ELU 8 × 8max pooling 2 × 2/2
+C 64@3 × 3/1
+BN + ELU 4 × 4max pooling 2 × 2/2
+C 512@4 × 4/1 (V)
+BN + ELU
+Dropout, p = 0.5 1 × 1C 512@1 × 1/1
+BN + ELU
+Dropout, p = 0.5
+C k@1 × 1/1
+Global AVG pooling
+BN + Softmax
+Figure 5.1.: Architecture of the baseline model. C 32@3 × 3/1 is a convolutional layer with 32 filters
+of kernel size 3 × 3 with stride 1.
+ 
+5. Experimental Evaluation
+5.1.1. Baseline Evaluation
+The results for the baseline model evaluated on eight datasets are given in Table 5.2. The
+speed for inference for different GPUs is given in Table 5.3.
+Dataset Single Model Accuracy Ensemble of 10
+Training Set Test Set Training Set Test Set
+Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 %
+CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 %
+CIFAR-100 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 %
+GTSRB 100.00 % σ = 0.00 99.18 % σ = 0.11 100.00 % 99.46 %
+HASYv2 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 %
+MNIST 99.93 % σ = 0.07 99.53 % σ = 0.06 99.99 % 99.58 %
+STL-10 94.12 % σ = 0.87 75.67 % σ = 0.34 96.35 % 77.62 %
+SVHN 99.02 % σ = 0.07 96.28 % σ = 0.10 99.42 % 97.20 %
+Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models
+used in the ensemble. The empirical standard deviation σ of the accuracy is also given.
+CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
+models uses unlabeled data or data from other datasets. For HASYv2 no test time
+transformations are used.
+Network GPU Tensorflow Inference per Training
+1 Image 128 images time / epoch
+Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s
+Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s
+Baseline Default GeForce 940MX 4 ms 120 ms 145.6 s
+Baseline Default GTX 970 6 ms 32 ms 25.0 s-26.3 s
+Baseline Default GTX 980 3 ms 24 ms 20.5 s-21.1 s
+Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22.1 s
+Baseline Default GTX 1070 2 ms 15 ms 14.4 s-14.5 s
+Baseline Default Titan Black 4 ms 25 ms 28.1 s-28.1 s
+Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24.4 s
+DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms —
+Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on
+six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Ma j17].
+Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build
+makes use of SSE4.X, AVX, AVX2 and FMA instructions.
+
+5.1. Baseline Model and Training setup
+5.1.2. Weight distribution
+The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution
+of bias weights by layer is shown in Figure 5.3. Although both figures only show the
+distribution for one specific model trained on CIFAR-100, the following observed patterns
+are consistent for 70 models (7 datasets and 10 models per dataset):
+• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the
+filter weights is almost symmetric around zero. The same is true for the bias weights.
+• The farther a layer is from the input away, the smaller the 99-percentile interval is,
+except for the last layer (see Table A.1).
+• The 99-percentile interval of the first layers filter weights is about [−0.5, +0.5], except
+for MNIST and HASYv2 where it is in [−0.8, 0.8].
+• The 99-percentile interval of the first layers bias weights is always in [−0.2, 0.2].
+• The distribution of filter weights of the last convolutional layer is not symmetric. In
+some cases the distribution is also not unimodal.
+•
+ The bias weights of the last three layers are very close to zero. The absolute value of
+most of them is smaller than 10−2
+.
+Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of
+Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases,
+the Batch Normalization layer equals the identity and thus is only relevant for the training.
+While γ and β do not show as clear patterns as the filter and bias weights of convolutional
+layers, some observations are also consistent through all models even for different datasets:
+• γ of the last layer (layer 16) is bigger than 1.3.
+• The 99-percentile interval for β of the last layer is longer than the other 99-percentile
+intervals.
+• The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for
+all other models) is more negative then all other layers.
+Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional
+layer. The ranges are calculated for each channel and filter separately. The smaller the
+values are, the less information is lost if the filters are replaced by smaller filters.
+ 
+5. Experimental Evaluation
+Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100.
+ The weights of the first layer are relatively evenly spread in the interval [−0.4, +0.4].
+With every layer the interval which contains 95 % of the weights and is centered around
+the mean becomes smaller, especially with layer 11 where the feature maps are of
+size 1 × 1. In contrast to the other layers, the last convolutional layer has a bimodal
+distribution.
+This plot indicates that the network might benefit from bigger filters in the first layer,
+whereas the filters in layers 7 – 11 could potentially be smaller.
+Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100.
+While the first layers biases are in [−0.1, +0.1], after each max-pooling layer the interval
+which contains 95 % of the weights and is centered around the mean becomes smaller.
+In the last three convolutional layer, most bias weights are in [−0.005, +0.005].
+
+5.1. Baseline Model and Training setup
+Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a
+baseline model trained on CIFAR-100.
+Figure 5.5.: The distribution of the β parameter of Batch Normalization layers of a baseline model
+trained on CIFAR-100.
+ 
+5. Experimental Evaluation
+Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For
+each filter, the range of values is recorded by channel. The smaller this range is, the
+less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter.
+
+5.1. Baseline Model and Training setup
+5.1.3. Training behavior
+Due to early stopping, the number of epochs which a model was trained differ. The number
+of epochs trained with augmentation ranged from 133 epochs to 182 epochs with a standard
+deviation of 17.3 epochs for CIFAR-100.
+Figure 5.7 shows the worst and the best validation accuracy during the training with
+augmented data. Different initializations lead to very similar validation accuracies during
+training. The image might lead to the wrong conclusion that models which are better at
+the start are also better at the end. In order to check this hypothesis, the relative order of
+validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering
+stays approximately the same, then it can be considered to run the first few epochs many
+times and only train the best models to the end. For 10 models, there can be 102
+−10
+2 = 45
+pair-wise changes in the ordering at maximum if the relative order of validation accuracies
+is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred
+in average for each pair of epochs (i, i + 1). This means if one knows only the relative order
+of the validation accuracy of two models m and m
+ in epoch i, it is doubtful if one can
+make any statement about the ordering of m and m
+ in epoch i + 1.
+0
+ 10 20
+ 30 40 50 60 70 80 90
+ 100 110 120
+ 130 1400.20.30.40.50.60.7
+ epochvalidationaccuracy
+ maximum validation accuracy
+minimum validation accuracy 1.5
+2
+2.5
+3
+3.5
+4
+4.5 loss
+maximum validation accuracy
+minimum validation accuracy
+mean loss
+Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The
+differences do not exceed 1 % and does not increase by training epoch. Four models
+stopped the first training stage at epoch 133 which causes the shift in the loss and the
+maximum validation accuracy.
+Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was
+expected that the absolute value of weight updates during epochs (sum, max, and mean)
+decrease in later training stages. The intuition was that weights need to be adjusted in a
+coarse way first. After that, the intuition was that only slight modifications are applied by
+
+5. Experimental Evaluation
+the SGD based training algorithm (ADAM). The mean, max and sum of weight updates as
+displayed in Figures 5.8 to 5.10, however, do not show such a clear pattern. The biggest
+change happens as expected in the first epoch after the weights are initialized. The change
+from augmented training to non-augmented training was at epoch 156 to epoch 157
+It can be observed, that layers which receive more input feature maps get larger weight
+updates in mean. As layers which are closer to the output take more input feature maps,
+their weight updates are larger. This pattern does not occur when SGD is used as the
+optimizer.
+Figure 5.8.: Mean weight updates of the baseline model between epochs by layer.
+
+5.1. Baseline Model and Training setup
+Figure 5.9.: Maximum weight updates of the baseline model between epochs by layer.
+Figure 5.10.: Sum of weight updates of the baseline model between epochs by layer.
+ 
+5. Experimental Evaluation
+5.2. Confusion Matrix Ordering
+The visualization of the confusion matrix can give valuable information about which part
+of the task is hard. For more than about 10 classes, however, it becomes hard to visualize
+and read.
+For CIFAR-10, the proposed method groups the four ob ject classes and the six animal
+classes together (see Figure 5.11a).
+(a) CIFAR-10 Test set (b) Random
+Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal
+elements are set to 0 in order to make other elements easier to see.
+Figure 5.11b shows a confusion matrix with random mistakes.
+The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test
+accuracy where a good permutation was found. Please note that this is not the best classifier.
+The confusion matrix which resulted from a baseline classifier with 99.32 % test accuracy is
+displayed in as the second image.
+Those results suggest that the ordering of classes is a valuable tool to make patterns easier
+to see. Humans, however, are good at finding patterns even if they come from random noise.
+Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy
+and 40 % uniformly random errors of a balanced dataset is created, optimized according to
+Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a.
+On the HASYv2 dataset the class-ordering is necessary to see anything as most possible
+confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the
+unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a
+maximum size of 50 × 50 are displayed, the ordered method can show only 8 matrices
+because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be
+displayed.
+
+5.2. Confusion Matrix Ordering
+Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization
+to Equation (4.1). The diagonal elements are set to 0 in order to make other elements
+easier to see. The symbols next to the label on the vertical axis indicate the shape
+and the color of the signs.
+The second image shows the same, but with baseline model.
+Best viewed in electronic form.  
+Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal
+elements are set to 0 in order to make other elements easier to see. The top image
+shows arbitrary class ordering, the bottom image shows the optimized ordering.
+5.3. Spectral Clustering vs CMO
+5.3. Spectral Clustering vs CMO
+This section evaluates the clustering quality of CMO in comparison to the clustering quality
+of spectral clustering.
+The evaluated model achieves 70.50 % training accuracy and 53.16 % test accuracy on
+CIFAR-100. Figure 5.14 shows the sorted confusion matrix.
+Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The
+diagonal elements are set to 0 in order to make other elements easier to see. Best
+viewed in electronic form.
+CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters
+which are to be found. The number of errors is determined by (i) Join all n clusters which
+contain the classes of the coarse class C to a set M . The error is n. (ii) Within M , find the
+set of classes M −
+ which do not belong to C . (iii) The final error is n + |M −
+|. As can be
+seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has
+only half the error of spectral clustering.
+The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be
+noted that the number of clusters was determined by using the semi-automatic method
+based on CMO as described in Section 4.2.
+ 
+5. Experimental Evaluation
+Cluster Spectral clustering Errors CMO Errors
+fish aquarium fish, orchid + flatfish
++ ray, shark + trout, lion 5 aquarium fish, orchid + flatfish
++ ray + shark, trout 4
+flowers orchid, aquarium fish + sunflower
+ + poppy, tulip + rose,
+train  5
+ orchid, aquarium fish + sunflower,
+ poppy, tulip, rose 2
+people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0
+reptiles crocodile, plain, road, table,
+wardrobe + dinosaur + lizard
++ snake, worm + turtle 9 crocodile, lizard, lobster, caterpillar
+ + dinosaur + snake + turtle,
+ crab 6
+trees maple, oak, pine + willow, forest
++ palm  3 palm, willow, pine, maple, oak 0
+Total 24 12
+Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by ,
+whereas clusters are separated by +.
+Cluster Spectral clustering Errors CMO Errors
+A A, A, A 0 A, A, A , Å 1
+B B , B 0 B, B 0
+C C , c, ⊂ and C , ξ, E and C 4 C , c, ⊂, C and C 1
+D D, D, D ,  1 D, D, D 0
+E E and E , ε 2 E and E , ε, , ∈ 4
+F F and F , F 1 F and F , F 1
+H H and H , κ and H 3 H and H, H 1
+K K , κ 0 K , κ 0
+L L,  and L, L 1 L,  and L, L 1
+M M and M and M 2 M and µ, M and M 3
+N N and N, N and N 2 N and N, N and N , ℵ 3
+O O, O, 0, ◦, °,  and o 1 O, O, 0, ◦, ° and  and o 2
+P P , P and p, ρ and P and ℘ 3 P and P , P , ℘ and p, ρ 2
+Q Q, Q, Q, ι, , , , , Æ, 1 7 Q and Q, Q 1
+R R, R and R, R, k and  3 R and , R, R, R 1
+S S , s, S 0 S , s, S 0
+T T ,  and T , τ 1 T ,  and T , τ 1
+U U , ∪ and u, U , A 1 U , u, U , A and ∪ 2
+V V , v, ∨ 0 V , v, ∨ 0
+W W , w, ω 0 W , w and ω 1
+X X , x, X , χ, × 0 X , x, X , χ, × 0
+Y Y and y 1 Y , y 0
+Z Z , z, Z and Z, Z 1 Z , z, Z, Z , Z 0
+Total 34 25
+Table 5.5.: Differences in spectral clustering and CMO.
+
+5.4. Hierarchy of Classifiers
+5.4. Hierarchy of Classifiers
+In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root
+classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on
+the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better
+results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either
+be due to limited training data, overfitting or the small size of 32 px × 32 px of the data.
+The experiment also shows that most of the errors are due to not identifying the correct
+cluster. Hence, in this case, more work in improving the root classifier is necessary rather
+than improving the discrimination of classes within a cluster.
+Although the classes within a cluster capture most of the classifications, many misclassifications
+ happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would
+push the accuracy in the ful l column only to 63.50 % due to errors of the root classifier
+where the root classifier does not predict the correct cluster.
+The leaf classifiers use the same topology as the root classifier. By initializing them with
+the root classifiers weights their performance can be pushed at about the inner accuracy.
+They are, however, only useful if their accuracy is well above the inner accuracy of the root
+classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful.
+Cluster Classes  accuracy
+root classifier leaf classifier
+cluster identified class identified | cluster class identified | cluster
+1 3 69.67 % 84.27 % 72.98 %
+2 5 46.60 % 58.54 % 43.47 %
+3 2 58.50 % 92.13 % 83.46 %
+4 2 50.50 % 87.83 % 81.74 %
+5 3 44.67 % 79.29 % 71.01 %
+6 2 29.50 % 78.67 % 72.00 %
+7 2 52.50 % 92.11 % 87.72 %
+8 2 59.50 % 86.23 % 81.88 %
+9 2 59.00 % 90.08 % 87.79 %
+10 2 62.00 % 85.52 % 73.10 %
+11 2 67.00 % 87.01 % 75.32 %
+12 2 72.50 % 94.77 % 76.77 %
+13 2 64.00 % 82.58 % 86.27 %
+14 2 79.67 % 89.85 % 89.10 %
+Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on
+14 clusters of classes. Each class has 100 elements to test. The column cluster identified
+gives the percentage that the root classifiers argmax prediction is within the correct
+cluster, but not necessarily the correct class. The columns class identified | cluster only
+consider data points where the root classifier correctly identified the cluster.
+ 
+5. Experimental Evaluation
+5.5. Increased width for faster learning
+More filters in one layer could simplify the optimization problem as each filter needs smaller
+updates. Hence a CNN N with n
+i filters in layer i is expected to take more epochs than a
+CNN N
+ with 2 · n
+i filters in layer i to achieve the same validation accuracy.
+This hypothesis can be falsified by training a CNN N and a CNN N
+ and comparing the
+trained number of epochs. As more filters can lead to different results depending on the
+layer where they are added, five models are trained. The details about those models are
+given in Table 5.7
+ Name Layer Filter count Total
+Baseline New parameters
+m
+9 9 64 638 5 978 566
+m
+9 9 64 974 8 925 622
+m
+11 11 512 3786 5 982 698
+m
+11 11 512 1024 1 731 980
+m
+13 13 512 8704 5 982 092
+Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer
+was increased.
+The detailed results are given in Table 5.8. As expected, the number of training epochs of
+the models with increased numbers of parameters is lower. The wall-clock time, however, is
+higher due to the increase in computation per forward- and backward-pass.
+For m
+9, m
+11 and m
+13, the filter weight range of the layer with increased capacity decreases
+compared to Figure 5.6, the filter weights of the layer with increased capacity are more
+concentrated around zero compared to Figure 5.2. For model m
+13, the distribution of
+weight of the output layer changed to a more bell-shaped distribution. Except for this, the
+distribution of filter weights in other layers did not change for all three models compared to
+the baseline.
+Model Parameters Accuracy Training
+Single Model Ensemble Mean Epochs Mean Time
+Mean std
+baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s
+m
+9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s
+m
+9 8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s
+m
+11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s
+m
+11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s
+m
+13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s
+Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m
+9, m
+11, m
+13
+as well as their accuracies.
+
+5.6. Weight updates
+5.6. Weight updates
+Section 5.5 shows that wider networks learn faster. One hypothesis why this happens is
+that every single weight updates can be smaller to learn the same function. Thus the loss
+function is smoother and thus gradient descent based optimization algorithms lead to more
+consistent weight updates.
+Consequently, it is expected that layers with fewer filters have more erratic updates. If
+there are many filters, the weights of a filter which does not contribute much to the end
+results or is even harmful filter can gradually be set to zero, essentially removing one path
+in the network.
+In order to test the hypothesis, the baseline model was adjusted. The number of filters in
+layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean
+weight update of the layers 1, 3, 5, 7 and 9 have a far bigger range than the layers 11, 13 and
+15 after epoch 50. Compared to the baseline models mean updates (Figure 5.8, Page 46),
+the mean weight updates of layers 1 and 3 are higher, the range of the mean weight update
+from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher.
+For the maximum and the sum, no similar pattern could be observed (see Figures A.3
+and A.4).
+Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but
+with layer 5 reduced to 3 filters.
+ 
+5. Experimental Evaluation
+5.7. Multiple narrow layers vs One wide layer
+On a given feature map size one can have an arbitrary number of convolutional layers with
+SAME padding and each layer can have an arbitrary number of filters. A convolutional layer
+with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called
+narrower and the number of filters in a convolutional layer is the layers width.
+If the number of parameters which may be used for the feature map scale is fixed and high
+enough, there are still many combinations. If n
+i with i = 0, . . . , k is the number of output
+feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without
+a bias, then the number of parameters is
+Parameters = k
+
+i=1
+(n
+i−1 · 32
+ + 1) · n
+i
+Hence the width of one layer does not only influence the parameters in this layer, but also
+in the next layer.
+The number of possible subsequent layers of one feature map size is enormous, even if
+constraints are placed on the number of parameters. For example, the first convolutional
+layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per
+layer are not desirable, one keeps all layers having a bias and all layers only use 3 × 3 filters,
+then the maximum depth is 10. If one furthermore assumes that at least 800 parameters
+should be used, there are still 120 possible layer combinations. As experimentally evaluating
+one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible
+to evaluate all layer combinations. In the following, a couple of changes to the network
+width / depth will be evaluated.
+Each layer expands the perceptive field. Hence deeper layer can use more of the input for
+every single output value. But deeper networks need more time for inference as the output
+of layer i has to be computed before the output of i + 1 can be computed. Hence there is
+less potential to parallelize computations. Each filter can be seen as a concept which can
+be learned. The deeper the filter is in the network, the higher is the abstraction level of the
+concept. In most cases, both is necessary: Many different concepts (width) and high-level
+concepts (depth).
+Reducing the two first convolutional layers of the baseline model (see Page 39) to one
+convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model
+has 944 012 parameters) resulted in a mean accuracy of 61.64 % (-1.74 %) and a standard
+deviation of σ = 1.12 (+0.57). The ensemble achieved 63.18 % (-1.52 %). As expected,
+the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of
+the baseline model to 15 s of the model with one less convolutional layer, one less Batch
+Normalization and one less activation layer. The inference time was also reduced from 6 ms
+
+5.8. Batch Normalization
+to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of
+more then one percentage point of the mean model and the increased standard deviation of
+the models performance, at least two convolutional layers are on the 32 px × 32 px feature
+map scale are recommendable for CIFAR-100.
+Changing the baseline to have less filters but more layers is another option. This was tried
+for the first block at the 32 px × 32 px feature map scale. The two convolutional layers
+(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one
+convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model
+has 944 132 parameters. Compared to the baseline model, the time for inference was the
+same. This is unexpected, because the inference time changed when a layer was removed at
+this scale. The mean test accuracy was 63.66 % (+0.28) and the standard deviation was
+σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21).
+Having two nonlinearities at each feature map scale could be important to learn nonlinear
+transformations at that scale. As the baseline model does only have one nonlinearity at the
+8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization
+and ELU was added. To keep the number of parameters constant, layer 11 of the baseline
+model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy
+of 63.09 % (-0.29) with a standard deviation of σ = 0.70 (+0.15). The ensemble achieves
+an accuracy of 64.39 % (+0.31). This could indicate that having two convolutional layers
+is more important for layers close to the input than intermediate layer. Alternatively, the
+parameters could be more important in layer 11 than having a new convolutional layer after
+layer 9.
+In order to control the hypothesis that having two convolutional layers are less important in
+the middle of a network, the second convolutional layer at the 16 × 16 feature map scale is
+removed. The first convolutional layer was increased from 32 filters to 59 filters, the second
+convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of
+parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test
+accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 %
+test accuracy (-0.66).
+Even more extreme, if both convolutional layers are removed from the 16 × 16 feature map
+scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51
+(-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important
+to have at least one convolutional layer at this feature map scale.
+5.8. Batch Normalization
+In [CUH15], the authors write that Batch Normalization does not improve ELU networks.
+Hence the effect of removing Batch Normalization from the baseline is investigated in this
+
+5. Experimental Evaluation
+experiment.
+As before, 10 models are trained on CIFAR-100. The training setup and the model m
+no-bn
+are identical to the baseline model m, except that in m
+no-bn the Batch Normalization layers
+are removed.
+One notable difference is the training time: While m needs 21 ms per epoch in average on
+a GTX 980, m
+no-bn only needs 21 ms per epoch. The number of epochs used for training,
+however, also increased noticeably from 149 epochs to 178 epochs in average. The standard
+deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m
+no-bn.
+The mean accuracy of m
+no-bn is 62.86 % and hence 0.52 percentage points worse. The
+standard deviation between models increased from 0.55 to 0.61. This is likely a result of the
+early stopping policy and the differences in training epochs. This can potentially be fixed
+by retraining the models which stopped earlier than the model which was trained for the
+biggest amount of epochs. The ensemble test accuracy is 63.88 % and hence 0.82 percentage
+points worse than the baseline.
+The filter weight range and distribution is approximately the same as Figure 5.6 and
+Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of
+the baseline are spread out in the first layer and much more concentrated in subsequent layers
+(see Figure 5.3), the model without Batch Normalization has rather concentrated weights
+in the first layers and only the bias weights of the last layer is spread out (see Figure A.2).
+Another model m
+no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to
+compensate for the loss of parameters in Batch Normalization. The mean test accuracy of
+10 such models is 62.87 % which is 0.51 percentage points worse than the baseline. The
+ensemble of m
+no-bn achieves 64.33 % which is 0.37 percentage points worse than the baseline.
+The mean training time was 14 s per epoch and 157.4 epochs with a standard deviation of
+20.7 epochs.
+Hence it is not advisable to remove Batch Normalization for the final model. It could,
+however, be possible to remove Batch Normalization for the experiments to iterate quicker
+through different ideas if the relative performance changes behave the same with or without
+Batch Normalization.
+
+5.9. Batch size
+5.9. Batch size
+The mini-batch size m ∈ N
+≥1 influences
+• Epochs until convergence: The smaller m, the more often the model is updated
+in one epoch. Those updates, however, are based on fewer samples of the dataset.
+Hence the gradients of different mini-batches can noticeably differ. In the literature,
+this is referred to as gradient noise [KMN+
+16].
+• Training time per epoch
+: The smaller the batch size, the higher the training time
+per epoch as the hardware is not optimally utilized.
+• Resulting model quality: The choice of the hyperparameter m influences the
+accuracy of the classifier when training is finished. [KMN+
+16] supports the view that
+smaller m result in less sharp minima. Hence smaller m lead to better generalization.
+Empiric evaluation results can be found in Table 5.9. Those results confirm the claim
+of [KMN+
+16] that lower batch sizes generalize better.
+m Training
+ Epochs Mean total Single model Ensemble
+time training time Accuracy std Accuracy
+8 118 s
+epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 %
+16 62 s
+epoch 103 – 173 8349 s 64.16 % σ = 0.81 66.98 %
+32 35 s
+epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 %
+64 25 s
+epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 %
+128 18 s
+epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 %
+Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation)
+of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on
+CIFAR-100.
+5.10. Bias
+Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a
+model m
+no-bias is created which is identical to the baseline model m, except that the bias of
+layers 11, 13 and 15 is removed.
+The mean test accuracy of 10 trained m
+no-bias is 63.74 % which is an improvement of
+0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 %
+which is 0.43 percentage points better than the baseline. Hence the bias can safely be
+removed.
+Removing the biases did not have a noticeable effect on the filter weight range, the filter
+weight distribution or the distribution of the remaining biases. Also, the γ and β parameters
+of the Batch Normalization layers did not noticeably change.
+ 
+5. Experimental Evaluation
+5.11. Learned Color Space Transformation
+In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1
+directly after the input and then another convolutional layer with 3 filters of size 1 × 1 acts
+as a learned transformation in another color space and boosts the accuracy.
+This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU activation
+ and 10 filters followed by another convolutional layer with ELU activation and
+3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37.
+The standard deviation is noticeable higher than the standard deviation of the baseline
+model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of
+the ensemble is at 64.77 % and hence 0.07 percentage points higher than the accuracy of
+the baseline models.
+The inference time for 1 image and for 128 images did not change compared to the baseline.
+The training time per epoch increased from 26 s to 30 s on the GTX 970.
+Hence it is not advisable to use the learned color space transformation.
+5.12. Pooling
+An alternative to max pooling with stride 2 with a 2 × 2 kernel is using a 3 × 3 kernel with
+stride 2.
+This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the
+3 × 3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 %
+(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test
+accuracy (+0.45).
+The training time per epoch decreased from 20.5 s-21.1 s to 18.6 s (mean of 10 training runs)
+on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch
+of 128 images.
+5.13. Activation Functions
+Nonlinear, differentiable activation functions are important for neural networks to allow them
+to learn nonlinear decision boundaries. One of the simplest and most widely used activation
+functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized
+rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+
+15] and softsign [BDLB09] have
+been proposed. The baseline uses ELU.
+
+5.13. Activation Functions
+Activation functions differ in the range of values and the derivative. The definitions and
+other comparisons of eleven activation functions are given in Table B.3.
+Theoretical explanations why one activation function is preferable to another in some
+scenarios are the following:
+• Vanishing Gradient: Activation functions like tanh and the logistic function saturate
+ outside of the interval [−5, 5]. This means weight updates are very small for
+preceding neurons, which is especially a problem for very deep or recurrent networks as
+described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12].
+• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem.
+The gradient of the ReLU function is 0 for all non-positive values. This means if all
+elements of the training set lead to a negative input for one neuron at any point in the
+training process, this neuron does not get any update and hence does not participate
+in the training process. This problem is addressed in [MHN13].
+• Mean unit activation: Some publications like [CUH15, IS15] claim that mean
+unit activations close to 0 are desirable. They claim that this speeds up learning
+by reducing the bias shift effect. The speedup of learning is supported by many
+experiments. Hence the possibility of negative activations is desirable.
+Those considerations are listed in Table 5.10 for 11 activation functions. Besides the
+theoretical properties, empiric results are provided in Tables 5.11 and 5.12. The baseline
+network was adjusted so that every activation function except the one of the output layer
+was replaced by one of the 11 activation functions.
+As expected, PReLU and ELU performed best. Unexpected was that the logistic function,
+tanh and softplus performed worse than the identity and it is unclear why the pure-softmax
+network performed so much better than the logistic function. One hypothesis why the
+logistic function performs so bad is that it cannot produce negative outputs. Hence the
+logistic−
+ function was developed:
+logistic−
+(x) = 1
+1 + e−x − 0.5
+The logistic−
+ function has the same derivative as the logistic function and hence still suffers
+from the vanishing gradient problem. The network with the logistic−
+ function achieves an
+accuracy which is 11.30 % better than the network with the logistic function, but is still
+5.54 % worse than the ELU.
+Similarly, ReLU was adjusted to have a negative output:
+ReLU−
+(x) = max(−1, x) = ReLU(x + 1) − 1
+The results of ReLU−
+ are much worse on the training set, but perform similar on the test
+
+5. Experimental Evaluation
+set. The result indicates that the possibility of hard zero and thus a sparse representation
+is either not important or similar important as the possibility to produce negative outputs.
+This contradicts [GBB11, SMGS14].
+A key difference between the logistic−
+ function and ELU is that ELU does neither suffers
+from the vanishing gradient problem nor is its range of values bound. For this reason, the
+S2ReLU activation function, defined as
+S2ReLU(x) = ReLU ( x
+2 + 1) − ReLU (− x
+2 + 1) = 
+
+
+
+
+
+
+
+− x
+2 + 1 if x ≤ −2
+x if − 2 ≤ x ≤ 2
+x
+2 + 1 if x > −2
+This function is similar to SReLUs as introduced in [JXF+
+16]. The difference is that S2ReLU
+does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be
+the identity close to zero and have a smaller absolute value than the identity farther away.
+It is easy to compute and easy to implement.
+Those results — not only the absolute values, but also the relative comparison — might
+depend on the network architecture, the training algorithm, the initialization and the
+dataset. Results for MNIST can be found in Table 5.13 and for HASYv2 in Table A.2. For
+both datasets, the logistic function has a much shorter training time and a noticeably lower
+test accuracy.
+Function Vanishing Gradient Negative Activation possible Bound activation
+Identity  No Yes No
+Logistic
+ Yes No Yes
+Logistic−
+ Yes Yes Yes
+Softmax  Yes Yes Yes
+tanh  Yes Yes Yes
+Softsign  Yes Yes Yes
+ReLU  Yes1
+ No Half-sided
+Softplus  No No Half-sided
+S2ReLU  No Yes No
+LReLU/PReLU No Yes No
+ELU  No Yes No
+Table 5.10.: Properties of activation functions.
+1
+The dying ReLU problem is similar to the vanishing gradient problem.
+
+5.13. Activation Functions
+Function Single model Ensemble of 10
+Training set Test set Training set Test set
+Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 %
+Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 %
+Logistic−
+ 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 %
+Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 %
+Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 %
+Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 %
+ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 %
+ReLU−
+ 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 %
+Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 %
+S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 %
+LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 %
+PReLU 80.01 % σ = 2.03 62.16 % σ = 0.73 83.50 % 64.79 %
+ELU 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 %
+Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation
+functions on CIFAR-100. For LReLU, α = 0.3 was chosen.
+Function Inference per Training
+ Epochs Mean total
+1 Image 128 time training time
+Identity 8 ms 42 ms 31 s
+epoch 108 – 148 3629 s
+Logistic 6 ms 31 ms 24 s
+epoch 101 – 167 2234 s
+Logistic−
+ 6 ms 31 ms 22 s
+epoch 133 – 255 3421 s
+Softmax 7 ms 37 ms 33 s
+epoch 127 – 248 5250 s
+Tanh 6 ms 31 ms 23 s
+epoch 125 – 211 3141 s
+Softsign 6 ms 31 ms 23 s
+epoch 122 – 205 3505 s
+ReLU 6 ms 31 ms 23 s
+epoch 118 – 192 3449 s
+Softplus 6 ms 31 ms 24 s
+epoch 101 – 165 2718 s
+S2ReLU 5 ms 32 ms 26 s
+epoch 108 – 209 3231 s
+LReLU 7 ms 34 ms 25 s
+epoch 109 – 198 3388 s
+PReLU 7 ms 34 ms 28 s
+epoch 131 – 215 3970 s
+ELU 6 ms 31 ms 23 s
+epoch 146 – 232 3692 s
+Table 5.12.: Training time and inference time of adjusted baseline models trained with different
+activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the
+identity is the fastest function. This result is likely an implementation specific problem
+of Keras 2.0.4 or Tensorflow 1.1.0.
+ 
+5. Experimental Evaluation
+Function Single model Ensemble Epochs
+Accuracy std Accuracy Range Mean
+Identity 99.45 % σ = 0.09 99.63 % 55 – 77 62.2
+Logistic 97.27 % σ = 2.10 99.48 % 37 – 76 54.5
+Softmax 99.60 % σ = 0.03 99.63 % 44 – 73 55.6
+Tanh 99.40 % σ = 0.09 99.57 % 56 – 80 67.6
+Softsign 99.40 % σ = 0.08 99.57 % 72 – 101 84.0
+ReLU 99.62 % σ = 0.04 99.73 % 51 – 94 71.7
+Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9
+PReLU 99.57 % σ = 0.07 99.73 % 44 – 89 71.2
+ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5
+Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions
+on MNIST.
+5.14. Label smoothing
+Ensembles consisting of n models trained by the same procedure on the same data but
+initialized with different weights and trained with a different order of the training data
+perform consistently better than single models. One drawback of ensembles in applications
+such as self-driving cars is that they increase the computation by a factor of n. One idea
+why they improve the test accuracy is by reducing the variance.
+The idea of label smoothing is to use the ensemble prediction of the training data as labels
+for another classifier. For every element x of the training set, the one-hot encoded target
+t(x) is smoothed by the ensemble prediction y
+E (x)
+t
+(x) = α · t(x) + (1 − α)y
+E (x)
+where α ∈ [0, 1] is the smoothing factor.
+There are three reasons why label smoothing could be beneficial:
+• Training speed: The ensemble prediction contains more information about the
+image than binary class decisions. Classifiers in computer vision predict how similar
+the input looks to other input of the classes they are trained on. By smoothing the
+labels, the information that one image could also belong to another class is passed to
+the optimizer. In early stages of the optimization this could lead to a lower loss on
+the non-smoothed validation set.
+• Higher accuracy: Using smoothed labels for the optimization could lead to a higher
+accuracy of the base-classifier due to a smoothed error surface. It might be less likely
+
+5.14. Label smoothing
+that the classifier gets into bad local minima.
+• Label noise: Depending on the way how the labels are obtained, it might not always
+be clear which label is the correct one. Also, labeling errors can be present in training
+datasets. Those errors severely harm the training. By smoothing the labels errors
+could be relaxed.
+10 models m
+smooth are trained with the α = 0.5 smoothed labels from the prediction
+of an ensemble of 10 baseline models. The mean accuracy of the models trained on the
+smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72
+(+0.17 %). The ensemble of 10 m
+smooth models achieved 64.79 % accuracy (+0.09 %). Hence
+the effect of this kind of label smoothing on the final accuracy is questionable.
+The training speed didn’t noticeably change either: The number of trained epochs ranged
+from 144 to 205, the mean number of epochs was 177. The baseline training ranged from
+146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training
+methods accuracy differed by less than one percentage point. Hence it is unlikely that label
+smoothing has a positive effect on the training speed.
+Hinton et al. called this method distil lation in [HVD15]. Hinton et al. used smooth and
+hard labels for training, this work only used smoothed labels.
+ 
+5. Experimental Evaluation
+5.15. Optimized Classifier
+In comparison to the baseline classifier, the following changes are applied to the optimized
+classifier:
+• Remove the bias for the last layers: For all layers which output a 1 × 1 feature
+map, the bias is removed
+• Increase the max pooling kernel to 3 × 3
+• More filters in the first layers
+The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation
+is given in Table 5.15 and the timing comparison is given in Table 5.16.
+# Type Filters @
+Patch size / stride Parameters FLOPs Output size
+Input 0 0 3 @ 32 × 32
+1 Convolution 69 @ 3 × 3 × 3 / 1 1 932 3 744 768 69 @ 32 × 32
+2 BN + ELU 138 353 418 69 @ 32 × 32
+3 Convolution 69 @ 3 × 3 × 32 / 1 42 918 37 684 096 69 @ 32 × 32
+4 BN + ELU 138 353 418 69 @ 32 × 32
+Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16
+5 Convolution 64 @ 3 × 3 × 32 / 1 39 808 20 332 544 64 @ 16 × 16
+6 BN + ELU 128 82 048 64 @ 16 × 16
+7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16
+8 BN + ELU 128 82 048 64 @ 16 × 16
+Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8
+9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8
+10 BN + ELU 128 20 608 64 @ 8 × 8
+Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4
+11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 288 1 048 064 512 @ 1 × 1
+12 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+13 Convolution 512 @ 1 × 1 × 512 / 1 262 144 523 776 512 @ 1 × 1
+14 BN + ELU 1 024 3 584 512 @ 1 × 1
+Dropout 0.5 0 0 512 @ 1 × 1
+15 Convolution k @ 1 × 1 × 512 / 1 512 · k 512 · k k @ 1 × 1
+Global avg Pooling 1 × 1 0 k k @ 1 × 1
+16 BN + Softmax  2k 7k k @ 1 × 1
+
+ 514k
++947 654 520k
++87 870 996 179 200+2k
+Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers
+use SAME padding, except for layer 11 which used VALID padding in order to decrease
+the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each
+power of two there are two Convolution + BN + ELU blocks and one Max pooling
+block added. This is the framed part in the table.
+
+5.15. Optimized Classifier
+32 × 32Input
+C 69@3 × 3/1
+BN + ELU
+C 69@3 × 3/1
+BN + ELU 16 × 16max pooling 3 × 3/2
+C 64@3 × 3/1
+BN + ELU
+C 64@3 × 3/1
+BN + ELU 8 × 8max pooling 3 × 3/2
+C 64@3 × 3/1
+BN + ELU 4 × 4max pooling 3 × 3/2
+C* 512@4 × 4/1 (V)
+BN + ELU
+Dropout, p = 0.5 1 × 1C* 512@1 × 1/1
+BN + ELU
+Dropout, p = 0.5
+C* k@1 × 1/1
+Global AVG pooling
+BN + Softmax
+Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with
+32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used.
+Dataset Single Model Accuracy Ensemble of 10
+Training Set Test Set Training Set Test Set
+Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 %
+CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 %
+CIFAR-100 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 %
+GTSRB 100.00 % σ = 0.00 99.28 % σ = 0.10 100.00 % 99.51 %
+HASYv2 88.79 % σ = 0.45 85.36 % σ = 0.15 89.36 % 85.92 %
+MNIST 99.88 % σ = 0.10 99.48 % σ = 0.13 99.99 % 99.67 %
+STL-10 95.43 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 %
+SVHN 99.08 % σ = 0.07 96.37 % σ = 0.12 99.50 % 97.47 %
+Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models
+used in the ensemble. The empirical standard deviation σ of the accuracy is also given.
+CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
+models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN
+and HASY, no test time transformations are used.
+Network GPU Tensorflow Inference per Training
+1 Image 128 images time / epoch
+Optimized Default Intel i7-4930K 5 ms 432 ms 386 s
+Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s
+Optimized Default GeForce 940MX 4 ms 205 ms 192 s
+Optimized Default GTX 970 6 ms 41 ms 35 s
+Optimized Default GTX 980 3 ms 35 ms 27 s
+Optimized Default GTX 980 Ti 6 ms 36 ms 26 s
+Optimized Default GTX 1070 2 ms 24 ms 21 s
+Optimized Default Titan Black 4 ms 46 ms 43 s
+Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is
+evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken
+from [Ma j17]. Weights the baseline model can be found at [Tho17b]. The optimized
+Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions.
+ 
+5. Experimental Evaluation
+5.16. Early Stopping vs More Data
+A separate validation set is necessary for two reasons: (1) Early stopping and (2) preventing
+overfitting due to many experiments. To prevent overfitting, a different dataset can be used.
+For example, all decisions about hyperparameters in this thesis are based on CIFAR-100,
+but the network is finally trained and evaluated with the same hyperparameters on all
+datasets.2
+ The validation set can hence be removed if early stopping is removed. Instead,
+the validation data is used in a first run to determine the number of epochs necessary for
+training. In a second training run the validation data is added to the training set. The
+number of used epochs for the second run is given in Table 5.17.
+Dataset Mean epochs Train data classes average data / class
+Asirra 60 15 075 2 7538
+MNIST 41 54 000 10 5400
+SVHN 45 543 949 10 54 395
+CIFAR-10 84 45 000 10 4500
+HASYv2 92 136 116 369 369
+GTSRB 97 35 288 43 821
+STL-10 116 4500 10 450
+CIFAR-100 155 45 000 100 450
+Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total
+amount of used training data, the number of classes of the dataset and the average
+amount of data per class is given.
+Alternatively, the model can be trained with early stopping (ES) purely on the training
+loss. All three methods – early stopping on the validation set accuracy, early stopping on
+the training loss and training a fixed number of epochs are evaluated. While having more
+data helped with Asirra and CIFAR-100, the results as shown in Table 5.18 on the other
+datasets are only marginally different. For CIFAR-10, training with more data did not
+improve the results when the number of epochs is fixed, but notably improved the results
+when the training loss was used as the early stopping criterion.
+5.17. Regularization
+Stronger regularization might even improve the results when using the training loss as an
+early stopping criterion.
+2 regularization with a weighting factor of λ = 0.0001 is used in
+all other experiments. While the accuracy as shown in Table 5.19 does not show a clear
+pattern, the number of epochs increases with lower model regularization (see Table 5.20).
+2
+Except data augmentation and test time transformations.
+3
+Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model.
+4
+Only 3 models are in this ensemble due to the long training time of more than 8 hours per model.
+
+5.17. Regularization
+Dataset Early Stopping Fixed epochs
+val. acc train loss
+Asirra 93.09 % 96.01 %3
+ 96.01 %
+CIFAR-10 89.86 % 91.75 % 88.88 %
+CIFAR-100 67.03 % 71.01 % 69.08 %
+HASYv2 85.92 % 82.89 %4
+ 85.05 %
+MNIST 99.67 % 99.64 % 99.57 %
+STL-10 78.66 % 83.25 % 78.64 %
+Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy
+compared training setups without a validation set and thus more training data. The
+second column uses the training loss as a stopping criterion, the third column uses a
+fixed number of epochs which is equal to the mean number of training epochs of the
+models with early stopping on the validation set accuracy.
+λ Single Model Accuracy Ensemble of 10
+Training Set Test Set Training Set Test Set
+λ = 0.01 73.83 % σ = 1.78 58.94 % σ = 1.33 87.78 % 69.98 %
+λ = 0.001 82.86 % σ = 0.89 63.03 % σ = 0.67 91.86 % 71.02 %
+λ = 0.0001 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 %
+Table 5.19.: Different choices of
+2 model regularization applied to the optimized model.
+λ min max mean std
+λ = 0.01 457 503 404.6 37.2
+λ = 0.001 516 649 588.4 41.6
+λ = 0.0001 579 833 696.1 79.1
+Table 5.20.: Training time in epochs of models with early stopping on training loss by different
+choices of
+2 model regularization applied to the optimized model.
+ 
+5. Experimental Evaluation
+
+6. Conclusion and Outlook
+This master thesis gave an extensive overview over the design patterns of CNNs in Chapter 2,
+the methods how CNNs can be analyzed and the principle directions of topology learning
+algorithms in Chapter 3.
+Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations
+of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering
+algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are:
+• Ordering the classes in the confusion matrix allows to display the relevant parts even
+for several hundred classes.
+• A hierarchy of classifiers based on the classes does not improve the results on CIFAR100.
+ There are three possible reasons for this:
+– 32 px × 32 px is too low dimensional
+– 100 classes are not enough for this approach
+– More classes are always easier to distinguish if each new class comes with more
+data. One reason why this might be the case is that distinguishing the ob ject
+from background has similar properties even for different classes.
+• Label smoothing had only a minor effect on the accuracy and no effect on the training
+time when a single base classifier was used to train with the smoothed labels by an
+ensemble of base classifiers.
+A baseline model was defined and evaluated on eight publicly available datasets. The
+baselines topology and training setup are described in detail as well as its behavior during
+training and properties of the weights of the trained model.
+The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100.
+The insights of those experiments are:
+• Averaging ensembles of 10 base classifiers of the same architecture and trained with the
+same setup consistently improve the accuracy. The amount of improvement depends
+on the base classifiers, but the ensemble tends to improve the test accuracy by about
+one percentage point.
+• Wider networks learn in fewer epochs. This, however, does not mean that the
+
+6. Conclusion and Outlook
+wall-clock time is lower due to increased computation in forward- and backward
+passes.
+• Batch Normalization increases the training time noticeably. For the described ELU
+baseline model it also increases accuracy, which contradicts [CUH15].
+• The lower the batch size, the longer the time for each epoch of training and the less
+epochs need to be trained. Higher accuracy by lower batch sizes was empirically
+confirmed. The batch size, however, can also be too low.
+• An analysis of the weights of the baseline indicated that the bias of layers close to
+the output layer can be removed. This was experimentally confirmed.
+• It could not be confirmed that learned color space transformation, as described
+in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear
+unit (LReLU) and α = 0.3.
+• It could be confirmed that ELU networks gives better results than any other activation
+function on CIFAR-100. For the character datasets MNIST and HASYv2, however,
+ReLU, LReLU, PReLU, Softplus and ELU all performed similar.
+• Changing the activation functions to the identity had very little impact on the HASYv2
+and MNIST classifiers. Note that those networks are still able to learn nonlinear
+decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however,
+the accuracy drops by 6.64 % when ELU is replaced by the identity.
+Based on the results of those experiments, an optimized classifier was developed and
+evaluated on all eight datasets.
+The state of the art of STL-10 was improved from 74.80 % [ZMGL15] to 78.66 % without
+using the unlabeled part of the dataset. The state of the art of HASYv2 was improved
+from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from
+99.46 % [SL11] to 99.51 %, for Asirra it was improved from 82.7 % [Gol08] to 93.09 %.1
+This was mainly achieved by the combination of ELU, Dropout, ensembles, training data
+augmentation and test-time transformations. The removal of the bias of layers close to the
+output and re-usage of those parameters in layers close to the input as well as using 3 × 3
+pooling instead of 2 × 2 pooling improved the baseline.
+While writing this masters thesis, several related questions could not be answered:
+• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting
+is not a problem. But at which subsampling-level does having more layers have the
+biggest effect? Can this question be answered before a deeper network is trained?
+• Is label smoothing helpful for noisy labels?
+1
+The baseline is better than the optimized model on Asirra and on HASYv2.
+
+• How does the choice of activation functions influence residual architectures? Could the
+results be the same for different activation functions in architectures with hundreds
+of layers?
+• The results for the pooling kernel were inconclusive. Larger pooling kernels might be
+advantageous as well as fractional max pooling [Gra15].
+• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that
+can and should be fixed?
+• Why is softmax so much better than the logistic function? Can the reason be used to
+further improve ELU?
+Besides those questions, the influence of optimizers on time per epoch, epochs until
+convergence, total training time, memory consumption, accuracy of the models and standard
+deviation of the models was not evaluated. This, and the stopping criterion for training
+might be crucial for the models quality.
+ 
+
+A. Figures, Tables and Algorithms
+(a) Original image (b) Smoothing filter (c) Laplace edge detection filter
+(d) Sobel edge detection filter (e) Prewitt edge detection filter (f ) Canny filter
+Figure A.1.: Examples of image filters. Best viewed in electronic form.
+Layer 99-percentile interval
+filter bias
+1 [-0.50, 0.48] [-0.06, 0.07]
+3 [-0.21, 0.19] [-0.07, 0.07]
+5 [-0.20, 0.17] [-0.07, 0.05]
+7 [-0.15, 0.14] [-0.05, 0.06]
+9 [-0.14, 0.15] [-0.04, 0.03]
+11 [-0.08, 0.08] [-0.00, 0.00]
+13 [-0.08, 0.08] [-0.00, 0.00]
+15 [-0.10, 0.11] [-0.01, 0.01]
+Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model
+trained on CIFAR-100.
+ 
+Figure A.2.: The distribution of bias weights of a model without batch normalization trained on
+CIFAR-100.
+Algorithm 1 Simulated Annealing for minimizing Equation (4.1).
+Require: C ∈ Nn×n
+, steps ∈ N, T ∈ R+
+, c ∈ (0, 1)
+procedure SimulatedAnnealing(C , steps, T , c)
+bestScore ← accuracy(C )
+bestC ← C
+for i = 0; i < steps; i ← i + 1 do
+p ← randomFloat(0, 1)
+if p < 0.5 then   Swap rows
+i ← randomInteger(1, . . . , n)
+j ← randomInteger(1, . . . , n) \ { i }
+p ← randomUniform(0, 1)
+C
+ ← swap(C, i, j )
+s ← accuracy(C
+)
+if p < exp( s−bestScore
+T ) then
+C ← C
+if s > bestScore then
+bestScore ← s
+bestC ← C
+T ← T · c
+else   Move Block
+s ← randomInteger(1, . . . , n)  Block start
+e ← randomInteger(s, . . . , n)  Block end
+i ← randomInteger(1, . . . , n − (e − s))  Block insert position
+Move Block (s, . . . , e) to position i
+return bestM
+
+Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model,
+but with layer 5 reduced to 3 filters.
+Function Single model Ensemble of 10 Epochs
+Training set Test set Train Test Range Mean
+Identity 87.92 % σ = 0.40 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 114.5
+Logistic 81.46 % σ = 5.08 79.67 % σ = 4.85 86.38 % 84.60 % 58 – 91 77.3
+Softmax 88.19 % σ = 0.31 84.70 % σ = 0.15 88.69 % 85.43 % 124 – 171 145.8
+Tanh 88.41 % σ = 0.36 84.46 % σ = 0.27 89.24 % 85.45 % 89 – 123 108.7
+Softsign 88.00 % σ = 0.47 84.46 % σ = 0.23 88.77 % 85.33 % 77 – 119 104.1
+ReLU 88.93 % σ = 0.46 85.35 % σ = 0.21 89.35 % 85.95 % 96 – 132 102.8
+Softplus 88.42 % σ = 0.29 85.16 % σ = 0.15 88.90 % 85.73 % 108 – 143 121.0
+LReLU 88.61 % σ = 0.41 85.21 % σ = 0.05 89.07 % 85.83 % 87 – 117 104.5
+PReLU 89.62 % σ = 0.41 85.35 % σ = 0.17 90.10 % 86.01 % 85 – 111 100.5
+ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % 73 – 113 92.4
+Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on
+HASYv2. For LReLU, α = 0.3 was chosen.
+ 
+Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but
+with layer 5 reduced to 3 filters.
+Function Single model Ensemble of 10 Epochs
+Training set Test set Train Test Range Mean
+Identity 87.49 % σ = 2.50 69.86 % σ = 1.41 89.78 % 71.90 % 51 – 65 53.4
+Logistic 45.32 % σ = 14.88 40.85 % σ = 12.56 51.06 % 45.49 % 38 – 93 74.6
+Softmax 87.90 % σ = 3.58 67.91 % σ = 2.32 91.51 % 70.96 % 108 – 150 127.5
+Tanh 85.38 % σ = 4.04 67.65 % σ = 2.01 90.47 % 71.29 % 48 – 92 65.2
+Softsign 88.57 % σ = 4.00 69.32 % σ = 1.68 93.04 % 72.40 % 55 – 117 83.2
+ReLU 94.35 % σ = 3.38 71.01 % σ = 1.63 98.20 % 74.85 % 52 – 98 75.5
+Softplus 83.03 % σ = 2.07 68.28 % σ = 1.74 93.04 % 75.99 % 56 – 89 68.9
+LReLU 93.83 % σ = 3.89 74.66 % σ = 2.11 97.56 % 78.08 % 52 – 120 80.1
+PReLU 95.53 % σ = 1.92 71.69 % σ = 1.37 98.17 % 74.69 % 59 – 101 78.8
+ELU 95.42 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % 66 – 72 67.2
+Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on
+STL-10. For LReLU, α = 0.3 was chosen.
+
+B. Hyperparameters
+Hyperparameters are parameters of models which are not optimized automatically (e.g., by
+gradient descent), but by methods like random search [BB12], grid search [LBOM98] or
+manual search.
+B.1. Preprocessing
+Preprocessing used to be of ma jor importance in machine learning. However, with the
+availability of data sets with hundreds of examples per class and the possibility of CNNs to
+learn features themselves, most models today rely on raw pixel values. The only common
+preprocessing is size normalization. In order to get a fixed input-size for a CNN, the
+following procedure can be used:
+• Take one or multiple crops of the image which have the desired aspect ratio.
+• Scale the crop(s) to the desired size.
+• In training, all crops can be used independently. In testing, all crops can be passed
+through the network and the output probability distributions can get fusioned, for
+example by averaging.
+Other preprocessing methods are:
+• Color space transformations (RGB, HSV, etc.)
+• Mean subtraction
+• Standardization of pixel-values to [0, 1] by dividing through 255 (used by [HLW16])
+• Dimensionality reduction
+– Principal component analysis (PCA): An unsupervised linear transformation
+which can be learned in the first hidden layer. It is hence doubtful if PCA
+improves the network.
+– Linear discriminant analysis (LDA)
+• Zero Components Analysis (ZCA) whitening (used by [KH09])
+ 
+B.2. Data augmentation
+Data augmentation techniques aim at making artificially more data from real data items by
+applying invariances. For computer vision, they include:
+Name Augmentation Factor Used by
+Horizontal flip 2 [KSH12, WYS+
+15]
+Vertical flip 2 [DWD15]1
+Rotation ∼ 40 (δ = 20) [DSRB14]
+Scaling ∼ 14 (δ ∈ [0.7, 1.4]) [DSRB14]
+Crops 322
+ = 1024 [KSH12, WYS+
+15]
+Shearing [Gra15]
+GANs [BCW+
+17]
+Brightness ∼ 20 (δ ∈ [0.5, 1.5]) [How13]
+Hue 51 (δ = 0.1) [MRM15, DSRB14]
+Saturation ∼ 20 (δ = 0.5) [DSRB14]
+Contrast ∼ 20 (δ ∈ [0.5, 1.5]) [How13]
+Channel shift [KSH12]
+Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for
+typical situations. For example, the augmentation factor for random crops is calculated
+for 256 px × 256 px images which are cropped to 224 px × 224 px.
+Taking several scales if the original is of higher resolution than desired is another technique.
+Combinations of the techniques above can also be applied. Please note that the order of
+operations does matter in many cases and hence the order is another augmentation factor.
+Less common, but also reasonable are:
+• Adding noise
+• Elastic deformations
+• Color casting (used by [WYS+
+15])
+• Vignetting (used by [WYS+
+15])
+• Lens distortion (used by [WYS+
+15])
+1
+Vertical flipping combined with 180◦
+ rotation is equivalent to horizontal flipping
+
+B.3. Initialization
+Weight initializations are usually chosen to be small and centered around zero. One way to
+characterize many initialization schemes is by
+w ∼ α · U [−1, 1] + β · N (0, 1) + γ with α, β , γ ≥ 0
+Table B.2 shows six commonly used weight initialization schemes. Several schemes use the
+same idea, that unit-variance is desired for each layer as the training converges faster [IS15].
+Name α β γ Reference
+Constant α = 0 β = 0 γ ≥ 0 used by [ZF14]
+Xavier/Glorot uniform α =
+ 6
+n
+in+n
+out β = 0 γ = 0 [GB10]
+Xavier/Glorot normal α = 0 β =
+ 2
+(n
+in+n
+out)
+2
+ γ = 0 [GB10]
+He α = 0 β = 2
+n
+in γ = 0 [HZRS15b]
+Orthogonal — — γ = 0 [SMG13]
+LSUV — — γ = 0 [MM15]
+Table B.2.: Weight initialization schemes of the form w ∼ α · U [−1, 1] + β · N (0, 1) + γ .
+n
+in, n
+out are the number of units in the previous layer and the next layer. Typically,
+biases are initialized with constant 0 and weights by one of the other schemes to prevent
+unit-coadaptation. However, dropout makes it possible to use constant initialization for
+all parameters.
+LSUV and Orthogonal initialization cannot be described with this simple pattern.
+B.4. Objective function
+For classification tasks, the cross-entropy
+E
+C E (W ) = −
+x∈X K
+
+k=1 [tx
+k log(ox
+k ) + (1 − tx
+k ) log(1 − ox
+k )]
+is by far the most commonly used ob jective function (e.g., used by [ZF14]). In this equation,
+X is the set of training examples, K is the number of classes, tx
+k ∈ { 0, 1 } indicates if the
+training example x is of class k, ox
+k is the output of the classifier for the training example x
+and class k.
+However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added:
+• LASSO:
+1 (e.g., used in [HPTD15])
+• Weight decay:
+2 (e.g., λ = 0.0005 as in [MSM16])
+• Orthogonality regularization (|(W T
+ · W − I )|, see [VTKP17])
+ 
+B.5. Optimization Techniques
+Most relevant optimization techniques for CNNs are based on SGD, which updates the
+weights according to the rule
+w
+ji ← w
+ji + ∆w
+ji with ∆w
+ji = −η ∂ E
+x
+∂ w
+ji
+where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate.
+A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically
+mini-batch sizes are |B | ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes
+lead to sharp minima and thus poor generalization [KMN+
+16]. Smaller mini-batch sizes
+lead to longer training times due to computational overhead and to more training steps due
+to gradient noise.
+ w
+ji ← w
+ji + ∆w
+ji with ∆w
+ji = −η ∂ E
+B
+∂ w
+ji
+Nine variations which adjust the learning rate during training are:
+• Momentum:
+ w(t+1)
+ji ← w(t)
+ji + ∆w(t+1)
+ji with ∆w(t+1)
+ji = −η ∂ E
+B
+∂ w
+ji + α∆w(t)
+ji
+with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16])
+• Adagrad [DHS11]
+• RProp and the mini-batch version RMSProp [TH12]
+• Adadelta [Zei12]
+• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)−c
+, where t ∈ N
+0 is the training step,
+a, c are constants.
+• Performance Scheduling [SHY+
+13]: Measure the error on the cross validation set and
+decrease the learning rate when the algorithms improvement is below a threshold.
+• Exponential Decay Learning Rate [SHY+
+13]: η(t) = η(0) · 10− t
+k
+ where t ∈ N
+0 is the
+training step, η(0) is the initial learning rate, k ∈ N
+≥1 is the number of training steps
+until the learning rate is decreased by 1
+10 th.
+• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential
+Decay Scheduling.
+• Adam and AdaMax [KB14]
+
+• Nadam [Doz15]
+Some of those are explained in [Rud16].
+Other first-order gradient optimization methods are:
+• Quickprop [Fah88]
+• Nesterov Accellerated Momentum (NAG) [Nes83]
+• Conjugate Gradient method [Cha92]: Combines a line search for the step size with
+the gradients direction.
+Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS
+and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs.
+However, there are alternatives which do not use gradient information:
+• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02]
+• Simulated Annealing [vLA87]
+• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described
+on [Tho14b]
+There are also approaches which learn the optimization algorithm [ADG+
+16, LM16].
+ 
+B.6. Network Design
+CNNs have the following hyperparameters:
+• Depth: The number of layers
+• Width: The number of filters per layer
+• Layer and block connectivity graph
+• Layer and block hyperparameters:
+– Activation Functions as shown in Table B.3
+– For more, see Sections 2.2 and 2.3.
+Name Function ϕ(x) Range of Values ϕ
+(x) Used by
+Sign function† 
+
++1 if x ≥ 0
+−1 if x < 0 { −1, 1 } 0 [KS02]
+Heaviside
+step function† 
+
++1 if x > 0
+0 if x < 0 { 0, 1 } 0 [MP43]
+Logistic function 1
+1+e−x [0, 1] ex
+(ex
++1)2 [DJ99]
+Tanh  ex
+−e−x
+ex
++e−x = tanh(x) [−1, 1] sech2
+(x) [LBBH98, Tho14a]
+ReLU†
+ max(0, x) [0, +∞) 
+
+1 if x > 0
+0 if x < 0 [KSH12]
+LReLU†2
+(PReLU)  ϕ(x) = max(αx, x) (−∞, +∞) 
+
+1 if x > 0
+α if x < 0 [MHN13, HZRS15b]
+Softplus  log(ex
+ + 1) (0, +∞) ex
+ex
++1 [DBB+
+01, GBB11]
+ELU  
+
+x if x > 0
+α(ex
+ − 1) if x ≤ 0 (−∞, +∞) 
+
+1 if x > 0
+αex
+ otherwise [CUH15]
+Softmax‡
+ o(x)
+j = ex
+j
+
+K
+k=1 ex
+k [0, 1]K
+ o(x)
+j ·
+K
+k=1 ex
+k
+ −ex
+j
+
+K
+k=1 ex
+k [KSH12, Tho14a]
+Maxout‡
+ o(x) = max
+x∈x x (−∞, +∞) 
+
+1 if x
+i = max x
+0 otherwise [GWFM+
+13]
+Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0
+and functions marked with ‡ operate on all elements of a layer simultaneously. The
+hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other
+activation function like randomized leaky ReLUs exist [XWCL15], but are far less
+commonly used.
+Some functions are smoothed versions of others, like the logistic function for the
+Heaviside step function, tanh for the sign function, softplus for ReLU.
+Softmax is the standard activation function for the last layer of a classification network
+as it produces a probability distribution. See Figure B.1 for a plot of some of them.
+2
+α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.
+
+−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0
+−1.0−0.50.51.01.52.0
+ xy
+ϕ
+1(x) = 1
+1+e−x
+ϕ
+2(x) = tanh(x)
+ϕ
+3(x) = max(0, x)
+ϕ
+4(x) = log(ex
+ + 1)
+ϕ
+5(x) = max(x, ex
+ − 1)
+Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative
+numbers. The image of ELU, ReLU and Softplus is not bound on the positive side,
+whereas tanh and the logistic function are always below 1.
+B.7. Regularization
+Regularization techniques aim to make the fitted function smoother and reduce overfitting.
+Regularization techniques are:
+•
+1,
+2, and Orthogonality regularization: See Appendix B.4
+• Max-norm regularization (e.g. used ins [SHK+
+14])
+• Dropout (introduced in [SHK+
+14]), DropConnect (see [WZZ+
+13]), Stochastic Depth
+(see [HSL+
+16])
+• Feature scale clipping (see [ZF14])
+• Data augmentation (according to [ZBH+
+16])
+• Global average pooling (according to [ZKL+
+15])
+• Dense-Sparse-Dense training (see [HPN+
+16])
+• Soft targets (see [HVD15])
+ 
+
+C. Calculating Network Characteristics
+C.1. Parameter Numbers
+• A fully connected layer with n nodes, k inputs has n · (k + 1) parameters. The +1 is
+due to the bias.
+• A convolutional layer i with k
+i filters of size n × m being applied to k
+i−1 feature maps
+has k
+i · k
+i−1(n · m + 1) parameters. The +1 is due to the bias.
+• A fully connected layer with n nodes after k feature maps of size m
+1 × m
+2 has
+n · (k · m
+1 · m
+2 + 1) parameters.
+• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 32
+ +
+32
+ · n2
+L
+i=0(L − i) = L + 9n + 9n2 L2
+−L
+2 parameters.
+According to [HPTD15], AlexNet has 60 million parameters which is roughly the number
+calculated in Table D.2.
+C.2. FLOPs
+The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence
+the following number are only giving rough estimates.
+In the following, n
+ϕ denotes the number of FLOPs to compute the non-linearity ϕ. For
+simplicity, n
+ϕ = 5 was chosen.
+• A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with
+W ∈ Rn×k
+ , x ∈ Rk×1
+, b ∈ Rn×1
+. It hence needs about n · (k + (k − 1) + 1) = 2nk
+additions / multiplications before the non-linearity ϕ is calculated. The total number
+of FLOPs is 2 · n · k + n · n
+ϕ.
+• In the following, biases are ignored. A convolutional layer with k
+i filters of size n × m
+being applied to k
+i−1 filter maps of size w × h results in k
+i filter maps of size w × h if
+padding is applied. For each element of each filter map, n · m · k
+i−1 multiplications and
+(n · m · k
+i−1 − 1) additions have to be made. This results in (2nmk
+i−1 − 1) · (k
+i · w · h)
+operations. The total number of FLOPs is (2 · n · m · k
+i−1 − 1) · (k
+i · w · h) + k
+i · w · h · n
+ϕ.
+This is, of course, a naive way of calculating a convolution. There are other ways of
+calculating convolutions [LG16].
+ 
+• A fully connected layer with n nodes after k feature maps of size w × h needs 2n(k · w · h)
+FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · n
+ϕ.
+• As Dropout is only calculated during training, the number of FLOPs was set to 0.
+•
+ The number of FLOPs for max pooling is dominated by the number of positions to
+which the pooling kernel is applied. For a feature map of size w × h a max pooling
+filter with stride s gets applied w·h
+s2 . The number of FLOPs per application depends
+on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs.
+•
+ The number of FLOPs for Batch Normalization is the same as the number of its
+parameters.
+Here are some references which give information for the FLOPs:
+• AlexNet
+– 1.5B in total [HPTD15].
+– 725M in total [KPY+
+15].
+– 3300M in total in Table D.2
+• VGG-16:
+– 15484M in total [HPTD15].
+– 31000M in total in Table D.3.
+• GoogleNet: 1566M in total [HPTD15].
+One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same
+network.
+C.3. Memory Footprint
+The memory footprint of CNNs determines when networks can be used at all and if they
+can be trained efficiently. In order to be able to train CNNs efficiently, one weight update
+step has to fit in the memory of the GPU. This includes the following:
+• Activations: All activations of one mini-batch in order to calculate the gradients
+in the backward pass. This is the number of floats in the feature maps of all weight
+layers combined.
+• Weights
+• Optimization algorithm: The optimization algorithm introduces some overhead.
+For example, Adam stores two parameters per weights.
+At inference time, every two consecutive layers have to fit into memory. When the forward
+pass of layer A to layer B is calculated, the memory can be freed if no skip connections are
+used.
+
+D. Common Architectures
+In the following, some of the most important CNN architectures are explained. Understanding
+ the development of these architectures helps understanding critical insights the machine
+learning community got in the past years for convolutional networks for image recognition.
+It starts with LeNet-5 from 1998, continues with AlexNet from 2012, VGG-16 D from
+2014, the Inception modules v1 to v3 as well as ResNets in 2015. The recently developed
+Inception-v4 is also covered.
+The summation row gives the sum of all floats for the output size column. This allows
+conclusions about the maximum mini-batch size which can be in memory for training.
+ 
+D.1. LeNet-5
+One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common
+pattern of a single convolutional layer with tanh as a non-linear activation function followed
+by a pooling layer and three fully connected layers. One fully connected layer is used to
+get the right output dimension, another one is necessary to allow the network to learn a
+non-linear combination of the features of the feature maps.
+Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test
+error rate of 0.8 % on MNIST.
+Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98].
+# Type Filters @
+Patch size / stride Parameters FLOPs Output size
+Input 0 0 1 @ 32 × 32
+1 Convolution 6 @ 5 × 5 × 1 / 1 156 307 800 6 @ 28 × 28
+2 Scaled average pooling 2 × 2 / 2 2 336 6 @ 14 × 14
+3 Convolution 16 @ 5 × 5 × 6 / 1 2 416 942 400 16 @ 10 × 10
+4 Scaled average pooling 2 × 2 / 2 2 1 600 16 @ 5 × 5
+5 Fully Connected 120 neurons 48 120 240 000 120
+6 Fully Connected 84 neurons 10 164 20 580 84
+7 Fully Connected (output) 10 neurons 850 1 730 10
+
+ 61 710 15 144 446 9118
+Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied.
+After layer 7, the softmax function is applied. One can see that convolutional layer
+need much fewer parameters, but an order of magnitude more FLOPs per parameter
+than fully connected layers.
+
+D.2. AlexNet
+The first CNN which achieved ma jor improvements on the ImageNet dataset was AlexNet [KSH12].
+Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106
+ parameters.
+ A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet.
+Note that the uncompressed size is at least 60 965 224 floats · 32 bit
+float ≈ 244 MB.
+Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed
+by pooling layers multiple times. At the end, a fully connected network is applied.
+Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1).
+# Type Filters @
+Patch size / stride Parameters FLOPs Output size
+Input 3 @ 224 × 224
+1 Convolution 96 @ 11 × 11 × 3 / 4 34 944 211 M 96 @ 55 × 55
+LCN  12 M 96 @ 55 × 55
+2 Max pooling 3 × 3 / 2 0 301 k 96 @ 27 × 27
+3 Convolution 256 @ 5 × 5 × 48 / 1 307 456 448 M 256 @ 13 × 13
+LCN  3 M 256 @ 13 × 13
+4 Max pooling 3 × 3 / 2 0 50 k 256 @ 13 × 13
+5 Convolution 384 @ 3 × 3 × 256 / 1 885 120 299 M 384 @ 13 × 13
+7 Convolution 384 @ 3 × 3 × 192 / 1 663 936 224 M 384 @ 13 × 13
+9 Convolution 256 @ 3 × 3 × 192 / 1 442 624 150 M 256 @ 13 × 13
+10 Max pooling 3 × 3 / 2 0 50 k 256 @ 6 × 6
+11 FC 4096 neurons 37 752 832 75 M 4096
+12 FC 4096 neurons 16 781 312 34 M 4096
+13 FC 1000 neurons 4 097 000 8 M 1000
+
+ 60 965 224 3300 M 1 122 568
+Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to
+computational restrictions at the time of its development. This also reduces the number
+of parameters and allows parallel computation on separate GPUs. However, to make
+the architecture easier to compare, this grouping was ignored for the parameter count.
+The FLOPs are taken from [HPTD15] and combined with rough estimates for Local
+Contrast Normalization and max pooling.
+The calculated number of parameters was checked against the downloaded version. It
+also has 60 965 224 parameters.
+ 
+D.3. VGG-16 D
+Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual
+Geometry Group in Oxford which developed this architecture. It has 16 layers which can
+learn parameters. A ma jor difference compared to AlexNet is that VGG-16 uses only 3 × 3
+filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a
+detailed textual description is given in Table D.3.
+A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/
+tensorflow- vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit
+float ≈
+520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and
+514 MB with compression.
+224 × 224Input
+C 64@3 × 3/1
+C 64@3 × 3/1 112 × 112max pooling 2 × 2/1
+C 128@3 × 3/1
+C 128@3 × 3/1 56 × 56max pooling 2 × 2/1
+C 256@3 × 3/1
+C 256@3 × 3/1
+C 256@3 × 3/1 28 × 28max pooling 2 × 2/1
+C 512@3 × 3/1
+C 512@3 × 3/1
+C 512@3 × 3/1 14 × 14max pooling 2 × 2/1
+C 512@3 × 3/1
+C 512@3 × 3/1
+C 512@3 × 3/1 7 × 7max pooling 2 × 2/1
+Fully Connected 4096
+Dropout, p = 0.5
+Fully Connected 4096
+Dropout, p = 0.5
+Fully Connected 1000
+Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of
+kernel size 3 × 3 with stride 1. All convolutional layers use SAME padding.
+
+# Type Filters @
+Patch size / stride Parameters FLOPs Output size
+Input 3 @ 224 × 224
+1 Convolution 64 @ 3 × 3 × 3 / 1 1 792 186 M 64 @ 224 × 224
+2 Convolution 64 @ 3 × 3 × 64 / 1 36 928 3712 M 64 @ 224 × 224
+Max pooling 2 × 2 / 2 0 2 M 64 @ 112 × 112
+3 Convolution 128 @ 3 × 3 × 64 / 1 73 856 1856 M 128 @ 112 × 112
+4 Convolution 128 @ 3 × 3 × 128 / 1 147 584 3705 M 128 @ 112 × 112
+Max pooling 2 × 2 / 2 0 1 M 128 @ 56 × 56
+5 Convolution 256 @ 3 × 3 × 128 / 1 295 168 1853 M 256 @ 56 × 56
+6 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56
+7 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56
+Max pooling 2 × 2 / 2 0 <1 M 256 @ 28 × 28
+8 Convolution 512 @ 3 × 3 × 256 / 1 1 180 160 1851 M 512 @ 28 × 28
+9 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28
+10 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28
+Max pooling 2 × 2 / 2 0 <1 M 512 @ 14 × 14
+11 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
+12 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
+13 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14
+Max pooling 2 × 2 / 2 0 <1 M 512 @ 7 × 7
+14 FC 4096 neurons 102 764 544 206 M 4096
+Dropout  0 0 4096
+15 FC 4096 neurons 16 781 312 34 M 4096
+Dropout  0 0 4096
+16 FC 1000 neurons 4 097 000 8 M 1000
+
+ 138 357 544 31 000 M 15 245 800
+Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have
+learnable parameters. All convolutions are zero padded to prevent size changes and
+use ReLU activation functions. The channels mean is subtracted from each pixel as
+a preprocessing step (−103.939, −116.779, −123.68). As Dropout is only calculated
+during training time, the number of FLOPs is 0. The dropout probability is 0.5.
+The calculated number of parameters was checked against the downloaded version. It
+also has 138 357 544 parameters.
+ 
+D.4. GoogleNet, Inception v2 and v3
+The large number of parameters and operations is a problem when such models should get
+applied in practice to thousands of images. In order to reduce the computational cost while
+maintaining the classification quality, GoogleNet [SLJ+
+15] and the Inception module were
+developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and
+5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of
+parameters. It is shown in Figure D.4.
+Figure D.4.: Inception module
+Image source: [SLJ+
+15]
+Compared to GoogleNet, Inception v2 [SVI+
+15] removed the 5 × 5 filters and replaced
+them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module
+is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to
+approximate symmetric filters with fewer parameters. The authors call this approach filter
+factorization.
+Inception v3 introduced Batch Normalization to the network [SVI+
+15].
+Figure D.5.: Inception v2 module
+Image source: [SVI+
+15]
+
+D.5. Inception-v4
+Inception-v4 as described in [SIV16] consists of four main building blocks: The stem,
+Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper,
+wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A
+and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use
+average pooling. The stem, module B and module C use separable convolutions.
+# × Type Parameters Output size
+Input 3 @ 299 × 299
+1 Stem 605 728 384 @ 35 × 35
+2 4× Inception A 317 632 384 @ 35 × 35
+3 Reduction A 2 306 112 1024 @ 17 × 17
+4 7×
+ Inception B 2 936 256 1024 @ 17 × 17
+5 Reduction B 2 747 392 1536 @ 8 × 8
+6 3× Inception C 4 553 088 1536 @ 8 × 8
+Global Average Pooling 0 1536 @ 1 × 1
+Dropout (p=0.8) 0 1536 @ 1 × 1
+7 Softmax 1 537 000 1000
+
+ 42 679 816
+Table D.4.: Inception-v4 network.
+ 
+
+E. Datasets
+Well-known benchmark datasets for classification problems in computer vision are listed
+in Table E.1. The best results known to me are given in Table E.2. However, every semantic
+segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers
+using Algorithm 2.
+Database Image Resolution
+(width × height) Number
+of
+Images Number
+of
+Classes Channels Data source
+MNIST 28 px × 28 px 70 000 10 1 [YL98, LBBH98]
+HASYv2 32 px × 32 px 168 233 369 1 [Tho17a]
+SVHN 32 px × 32 px 630 420 10 3 [NWC+
+11b],
+[NWC+
+11a]
+CIFAR-10 32 px × 32 px 60 000 10 3 [Kri, KH09]
+CIFAR-100 32 px × 32 px 60 000 100 3 [Kri, KH09]
+STL-10 96 px × 96 px 13 000 10 3 [CLN11, CLN10]
+Caltech-101 (80 px − 3481 px)
+×(92 px − 3999 px) 9144 102 3 [FFP03, FFFP06]
+Caltech-256 (75 px − 7913 px)
+×(75 px − 7913 px) 30 607 257 3 [Gri06, GG07]
+ILSVRC 20121 (8 px − 9331 px)
+×(10 px − 6530 px) 1.2 · 106
+ 1000 3 [Ima12, RDS+
+14]
+Places3652 (290px − 3158px)
+×(225px − 2630px) 1.8 · 106
+ 365 3 [Zho16, ZKL+
+16]
+GTSRB (25 px − 266 px)
+×(25 px − 232 px) 51 839 43 3 [SSSI, SSSI12]
+Asirra3 (4 px − 500 px)
+×(4 px − 500 px) 25 000 2 3 [Asi17, EDHS07]
+Graz-02 480 px × 640 px
+and 640 px × 480 px 1096 3 3 [Mar08, MS07]
+Table E.1.: An overview over publicly available image databases for classification. The number
+of images row gives the sum of the training and the test images. Some datasets, like
+SVHN, have additional unlabeled data which is not given in this table.
+1
+ImageNet Large Scale Visual Recognition Competition
+2
+The dimensions are only calculated for the validation set.
+3
+Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle
+
+Dataset Model type / name Result Score Achieved /
+Claimed by
+MNIST —  0.21 % error [WZZ+
+13]
+HASYv2 TF-CNN 81.00 % accuracy [Tho17a]
+SVHN DenseNet (k = 24) 1.59 % error [HLW16]
+CIFAR-10 DenseNet-BC (k = 40) 3.46 % error [HLW16]
+CIFAR-100 WRN-28-10 16.21 % error [LH16]
+STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15]
+Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14]
+Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14]
+ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a]
+GTSRB MCDNN 99.46 % accuracy [SL11]
+Asirra SVM  82.7 % accuracy [Gol08]
+Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10]
+Table E.2.: An overview over state of the art results achieved in computer vision datasets.
+Algorithm 2 Create a classification dataset from a semantic segmentation dataset
+Require: Semantic segmentation dataset (D
+S )
+procedure CreateDataset(Annotated dataset D
+S )
+D
+C ← List
+w ← desired image width
+h ← desired image height
+for Image and associated label (x, y) in D
+S do
+i ← randint(0, L.width − w)
+j ← randint(0, L.height − h)
+c
+L ← crop(y, (i, j ), (i + w, j + h))
+if at least 50% of s are of one class then
+c
+I ← crop(x, (i, j ), (i + w, j + h))
+D.append((c
+I , c
+L))
+return (D
+C )
+
+F. List of Tables
+2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
+5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
+5.2 Baseline model evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40
+5.3 Baseline model speed comparison . . . . . . . . . . . . . . . . . . . . . . . . 40
+5.4 Clustering errors for spectral clustering and CMO on CIFAR-100 . . . . . . 52
+5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52
+5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53
+5.7 Parameters of models with increased capacity . . . . . . . . . . . . . . . . . 54
+5.8 Training time for models with increased capacity . . . . . . . . . . . . . . . 54
+5.9 Baseline model training time . . . . . . . . . . . . . . . . . . . . . . . . . . 59
+5.10 Activation function properties . . . . . . . . . . . . . . . . . . . . . . . . . . 62
+5.11 Activation function evaluation results on CIFAR-100 . . . . . . . . . . . . . 63
+5.12 Activation function timing results on CIFAR-100 . . . . . . . . . . . . . . . 63
+5.13 Activation function evaluation results on MNIST . . . . . . . . . . . . . . . 64
+5.14 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66
+5.15 Optimized model evaluation results . . . . . . . . . . . . . . . . . . . . . . . 67
+5.16 Optimized model speed comparison . . . . . . . . . . . . . . . . . . . . . . . 67
+5.17 Optimized model mean training epochs . . . . . . . . . . . . . . . . . . . . . 68
+5.18 Optimized model trained with early stopping vs training with more data . . 69
+5.19 Model regularization with early stopping on training loss . . . . . . . . . . . 69
+5.20 Model regularization with early stopping on training loss - Training time . . 69
+A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75
+A.2 Activation function evaluation results on HASYv2 . . . . . . . . . . . . . . . 77
+A.3 Activation function evaluation results on STL-10 . . . . . . . . . . . . . . . 78
+B.1 Data augmentation techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 80
+B.2 Weight initialization schemes . . . . . . . . . . . . . . . . . . . . . . . . . . 81
+B.3 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84
+D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
+D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 93
+D.4 Inception-v4 network . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95
+
+E.1 Image Benchmark datasets . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97
+E.2 State of the Art results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 98
+
+G. List of Figures
+2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3
+2.2 Application of a convolutional layer . . . . . . . . . . . . . . . . . . . . . . . 6
+2.3 Max pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
+2.4 ResNet module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+2.5 Aggregation block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12
+2.6 Dense block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
+2.7 Validation curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
+2.8 Validation curve with plateaus . . . . . . . . . . . . . . . . . . . . . . . . . 18
+2.9 Learning curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
+2.10 Occlusion analysis . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25
+2.11 Filter visualization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26
+3.1 Cascade-correlation network . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
+4.1 Class Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33
+5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
+5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42
+5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42
+5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44
+5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45
+5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46
+5.9 Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 47
+5.10 Baseline Weight updates (sum) . . . . . . . . . . . . . . . . . . . . . . . . . 47
+5.11 Confusion matrices for CIFAR-10 . . . . . . . . . . . . . . . . . . . . . . . . 48
+5.12 Confusion matrices for GTSRB . . . . . . . . . . . . . . . . . . . . . . . . . 49
+5.13 Confusion matrices for HASYv2 . . . . . . . . . . . . . . . . . . . . . . . . . 50
+5.14 Confusion matrix of CIFAR-100 . . . . . . . . . . . . . . . . . . . . . . . . . 51
+5.15 Mean weight updates of model with bottleneck . . . . . . . . . . . . . . . . 55
+5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67
+A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75
+A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76
+
+A.3 Maximum weight updates of baseline with bottleneck . . . . . . . . . . . . . 77
+A.4 Sum of weight updates of baseline with bottleneck . . . . . . . . . . . . . . 78
+B.1 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85
+D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90
+D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92
+D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94
+D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94
+
+H. Bibliography
+[AAB+
+16] M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on
+heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467, Mar.
+2016. [Online]. Available: https://arxiv.org/abs/1603.04467
+[ABKS99] M. Ankerst, M. M. Breunig et al., “ OPTICS: Ordering points to identify the
+clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp.
+49–60.
+[ADG+
+16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by
+gradient descent,” in Advances in Neural Information Processing Systems 29
+(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar.
+2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461learning-to-
+ learn-by-gradient- descent- by- gradient-descent.pdf
+[AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism:
+Going deeper into neural networks,” Jun. 2015. [Online]. Available:
+ https://research.googleblog.com/2015/06/inceptionism-going-deeperinto-
+ neural.html
+[Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https:
+//www.microsoft.com/en-us/download/details.aspx?id=54765
+[BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,”
+Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305,
+Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/
+bergstra12a/bergstra12a.pdf
+[BCW+
+17] J. Bao, D. Chen et al., “ CVAE-GAN: Fine-grained image generation through
+asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online].
+Available: https://arxiv.org/abs/1703.10155
+[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better image
+ features,” Département d’Informatique et de Recherche Opérationnelle,
+Université de Montréal, Tech. Rep. 1337, 2009.
+[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using
+reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.02167
+ 
+[BM93] U. Bodenhausen and S. Manke, Automatical ly Structured Neural
+Networks For Handwritten Character And Word Recognition. London:
+Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http:
+//dx.doi.org/10.1007/978-1- 4471-2063- 6_283
+[BMDP10]
+ R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest
+neighbor,” in European Conference on Computer Vision (ECCV). Springer,
+2010, pp. 171–184.
+[BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of
+feature pooling in visual recognition,” in International Conference on
+Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available:
+http://yann.lecun.com/exdb/publis/pdf/boureau- icml-10.pdf
+[BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies
+with gradient descent is difficult,” IEEE transactions on neural networks,
+vol. 5, no. 2, pp. 157–166, 1994.
+[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training
+of artificial neural networks,” IEEE Proceedings G-Circuits, Devices
+and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available:
+http://ieeexplore.ieee.org/document/143326/
+[Cho15] F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015.
+[CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks
+in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109,
+p. 2, 2010. [Online]. Available: http://cs.stanford.edu/~acoates/papers/
+coatesleeng_aistats_2011.pdf
+[CLN11] A. Coates, H. Lee, and A. Y. Ng, “ STL-10 dataset,” 2011. [Online]. Available:
+http://cs.stanford.edu/~acoates/stl10
+[CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural
+networks for image classification,” in Conference on Computer Vision and
+Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online].
+Available: https://arxiv.org/abs/1202.2745v1
+[CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate
+deep network learning by exponential linear units (ELUs),” arXiv
+preprint arXiv:1511.07289, Nov. 2015. [Online]. Available: https:
+//arxiv.org/abs/1511.07289
+[CWV+
+14] S. Chetlur, C. Woolley et al., “ cuDNN: Efficient primitives for deep
+learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available:
+https://arxiv.org/abs/1410.0759
+
+[DBB+
+01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional
+knowledge for better option pricing,” in Advances in Neural Information
+ Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich,
+and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online].
+Available: http://papers.nips.cc/paper/1920-incorporating- second- orderfunctional-knowledge-
+ for-better-option- pricing.pdf
+[DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry
+in convolutional neural networks,” arXiv preprint arXiv:1602.02660, Feb.
+2016. [Online]. Available: https://arxiv.org/abs/1602.02660
+[DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for
+online learning and stochastic optimization,” Journal of Machine Learning
+Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available:
+http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
+[DHS16]
+ J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via
+multi-task network cascades,” in Conference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available:
+https://arxiv.org/abs/1512.04412
+[DJ99]
+ W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural
+Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available:
+ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf
+[Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford
+University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/
+pro j2015/054_report.pdf
+[DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised
+feature learning with convolutional neural networks,” in Advances in Neural
+Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling
+et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online].
+Available: http://papers.nips.cc/paper/5548-discriminative-unsupervisedfeature-learning-
+ with- convolutional- neural-networks.pdf
+[DWD15]
+ S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional
+neural networks for galaxy morphology prediction,” Monthly notices of the
+royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015.
+[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that
+exploits interest-aligned manual image categorization,” in ACM Conference
+ on Computer and Communications Security (CCS), no. 14.
+Association for Computing Machinery, Inc., Oct. 2007. [Online].
+
+Available: https://www.microsoft.com/en-us/research/publication/asirra-acaptcha-
+ that-exploits- interest-aligned- manual-image- categorization/
+[EKS+
+96] M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering
+clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996,
+pp. 226–231.
+[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing.
+Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3662-
+ 44874- 8
+[Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation
+networks,” 1988. [Online]. Available: http://repository.cmu.edu/cgi/
+viewcontent.cgi?article=2799&context=compsci
+[FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of ob ject
+categories,” IEEE transactions on pattern analysis and machine intel ligence,
+vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http:
+//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf
+[FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http:
+//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html
+[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Ob ject detection with discriminatively
+ trained part-based models,” IEEE transactions on pattern analysis and
+machine intel ligence, vol. 32, no. 9, pp. 1627–1645, 2010.
+[FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,”
+1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/
+[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep
+feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online].
+Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
+[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural
+networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available:
+http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf
+[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate ob ject
+detection and semantic segmentation,” in Conference on Computer Vision
+and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online].
+Available: https://arxiv.org/abs/1311.2524
+[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 ob ject category dataset,” Apr.
+2007. [Online]. Available: http://authors.library.caltech.edu/7694/
+
+[GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with
+Bernoulli approximate variational inference,” arXiv preprint arXiv:1506.02158,
+Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6
+[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman
+New York, 2002, vol. 29.
+[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete
+graph problems,” Theoretical computer science, vol. 1, no. 3, pp. 237–267,
+1976.
+[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM
+conference on Computer and communications security (CCS), no. 15. ACM,
+2008, pp. 535–542.
+[Gra15]
+ B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May
+2015. [Online]. Available: https://arxiv.org/abs/1412.6071
+[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available:
+http://www.vision.caltech.edu/Image_Datasets/Caltech256/
+[GWFM+
+13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML,
+vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http:
+//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
+[HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for
+transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online].
+Available: https://arxiv.org/abs/1608.08614
+[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online].
+Available: http://papers.nips.cc/paper/227- meiosis-networks.pdf
+[Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available:
+https://devblogs.nvidia.com/parallelforall/new- features- cuda- 7-5/
+[HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional
+networks,” arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available:
+https://arxiv.org/abs/1608.06993v1
+[HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv
+preprint arXiv:1611.04231, Nov. 2016. [Online]. Available: https:
+//arxiv.org/abs/1611.04231
+[How13] A. G. Howard, “Some improvements on deep convolutional neural network
+based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013.
+[Online]. Available: https://arxiv.org/abs/1312.5402
+ 
+[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques.
+Elsevier, 2011.
+[HPN+
+16]
+ S. Han, J. Pool et al., “ DSD: Regularizing deep neural networks with
+dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016.
+[Online]. Available: https://arxiv.org/abs/1607.04381
+[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient
+neural network,” in Advances in Neural Information Processing Systems 28
+(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun.
+2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784learning-both-weights-
+ and- connections-for- efficient- neural- network.pdf
+[HSK+
+12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing
+co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580, Jul.
+2012. [Online]. Available: https://arxiv.org/abs/1207.0580
+[HSL+
+16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,”
+arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https:
+//arxiv.org/abs/1603.09382
+[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon
+and general network pruning,” in International Conference on Neural
+Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http:
+//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf
+[HVD15]
+ G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural
+network,” arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available:
+https://arxiv.org/abs/1503.02531
+[HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional
+networks for visual recognition,” in European Conference on Computer
+Vision (ECCV). Springer, 2014, pp. 346–361. [Online]. Available:
+https://arxiv.org/abs/1406.4729
+[HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,”
+arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https:
+//arxiv.org/abs/1512.03385v1
+[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level
+performance on imagenet classification,” in International Conference on
+Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available:
+https://arxiv.org/abs/1502.01852
+[Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),”
+
+2012. [Online]. Available: http://www.image- net.org/challenges/LSVRC/
+2012/nonpub-downloads
+[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network
+training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167,
+Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167
+[JXF+
+16] X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation
+units,” in Thirtieth AAAI Conference on Artificial Intel ligence, Dec. 2016.
+[Online]. Available: https://arxiv.org/abs/1512.07030
+[Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr.
+2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manuallyclassifying-cifar10/
+
+[KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,”
+arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https:
+//arxiv.org/abs/1412.6980
+[KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny
+images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/
+learning-features- 2009- TR.pdf
+[KMN+
+16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning:
+Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836,
+Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836
+[Koc15] T. Kocmánek, “ HyperNEAT and novelty search for image recognition,” Ph.D.
+dissertation, Master’s thesis, Czech Technical University in Prague, 2015.
+[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf
+[KPY+
+15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks
+for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530,
+Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530
+[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to
+cluster analysis. John Wiley & Sons, 2009, vol. 344.
+[Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https:
+//www.cs.toronto.edu/~kriz/cifar.html
+[KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear
+and neural network approximation,” IEEE Transactions on Information
+Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available:
+http://ieeexplore.ieee.org/abstract/document/971754/
+ 
+[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification
+with deep convolutional neural networks,” in Advances in Neural
+Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges
+et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online].
+Available: http://papers.nips.cc/paper/4824-imagenet- classification- withdeep-convolutional-neural-
+ networks.pdf
+[KSlB+
+10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature
+hierarchies for visual recognition,” in Advances in Neural Information
+Processing Systems 23 (NIPS), J. D. Lafferty, C. K. I. Williams
+et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online].
+Available: http://papers.nips.cc/paper/4133-learning- convolutional-featurehierarchies-
+ for-visual- recognition.pdf
+[LAE+
+16] W. Liu, D. Anguelov et al., “ SSD: Single shot multibox detector,” in
+European Conference on Computer Vision (ECCV). Springer, 2016, pp.
+21–37. [Online]. Available: https://arxiv.org/abs/1512.02325
+[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/
+en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer
+[LBBH98]
+ Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document
+recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov.
+1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun01a.pdf
+
+[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature,
+vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available:
+http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html
+[LBOM98] Y. A. LeCun, L. Bottou et al., Efficient BackProp, ser. Lecture Notes in
+Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol.
+1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8
+[LDS+
+89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989,
+pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/
+lecun-90b.pdf
+[Le13] Q. V. Le, “Building high-level features using large scale unsupervised
+learning,” in International conference on acoustics, speech and signal
+processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http:
+//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343
+[LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in
+
+Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep.
+2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308
+[LGT16]
+ C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in
+convolutional neural networks: Mixed, gated, and tree,” in International
+Conference on Artificial Intel ligence and Statistics, 2016. [Online]. Available:
+https://arxiv.org/abs/1509.08985v2
+[LH16]
+ I. Loshchilov and F. Hutter, “ SGDR: stochastic gradient descent
+with warm restarts,” Learning, Aug. 2016. [Online]. Available: https:
+//arxiv.org/abs/1608.03983
+[LJD+
+16] L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to
+hyperparameter optimization,” arXiv preprint arXiv:1603.06560, Mar. 2016.
+[Online]. Available: https://arxiv.org/abs/1603.06560
+[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885,
+Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885
+[LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for
+semantic segmentation,” in Conference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available:
+https://arxiv.org/abs/1411.4038v2
+[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513, Mar.
+2017. [Online]. Available: https://arxiv.org/abs/1703.01513
+[Ma j17] S. Ma jumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available:
+https://github.com/titu1994/DenseNet
+[Mar08] M. Marszałek, “ INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online].
+Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/
+[MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter
+optimization through reversible learning,” in International Conference on
+Machine Learning (ICML), 2015, pp. 2113–2122.
+[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of
+Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008.
+[MHN13]
+ A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities
+improve neural network acoustic models,” in Proc. ICML, vol. 30,
+no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/
+relu_hybrid_icml2013_final.pdf
+[MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv
+
+preprint arXiv:1511.06422, Nov. 2015. [Online]. Available: https:
+//arxiv.org/abs/1511.06422
+[MP43]
+ W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in
+nervous activity,” The bul letin of mathematical biophysics, vol. 5, no. 4, pp.
+115–133, 1943.
+[MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for
+reducing dataset bias in person re-identification,” in International Conference
+on Advanced Video and Signal Based Surveil lance (AVSS), no. 12, Aug. 2015,
+pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/
+7301739/
+[MS07] M. Marszalek and C. Schmid, “Accurate ob ject localization with
+shape masks,” in Conference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http:
+//ieeexplore.ieee.org/document/4270110/
+[MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN
+advances on the ImageNet,” arXiv preprint arXiv:1606.02228, Jun. 2016.
+[Online]. Available: https://arxiv.org/abs/1606.02228
+[MV16]
+ A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural
+networks using natural pre-images,” International Journal of Computer Vision,
+pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017
+[NDRT13] N. Natara jan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances
+in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges,
+L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online].
+Available: http://papers.nips.cc/paper/5073- learning- with- noisy- labels.pdf
+[Nes83] Y. Nesterov, “A method of solving a convex programming problem with
+convergence rate o (1/k2),” in Soviet Mathematics Doklady, vol. 27, no. 2,
+1983, pp. 372–376.
+[new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available:
+http://www1.icsi.berkeley.edu/Speech/faq/nn- train.html
+[Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS
+Talk, Dec. 2016.
+[NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft
+weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992.
+[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf
+[NH02] R. T. Ng and J. Han, “ CLARANS: A method for clustering ob jects for spatial
+
+data mining,” IEEE transactions on know ledge and data engineering, vol. 14,
+no. 5, pp. 1003–1016, 2002.
+[NWC+
+11a]
+ Y. Netzer, T. Wang et al., “Reading digits in natural images with
+unsupervised feature learning,” in NIPS workshop on deep learning and
+unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available:
+http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf
+[NWC+
+11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,”
+2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/
+[NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization:
+Uncovering the different types of features learned by each neuron in deep
+neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online].
+Available: https://arxiv.org/abs/1602.03616
+[OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive
+classifiers for unbalanced classification problems: A study on the performance
+scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available:
+https://arxiv.org/abs/1608.08984
+[PMW+
+15] N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial
+perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508,
+Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508
+[Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer
+Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/
+10.1007/3-540- 49430-8_3
+[RDS+
+14] O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition
+challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep.
+2014. [Online]. Available: https://arxiv.org/abs/1409.0575
+[RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks
+for biomedical image segmentation,” in International Conference on Medical
+Image Computing and Computer-Assisted Intervention. Springer, 2015, pp.
+234–241. [Online]. Available: https://arxiv.org/abs/1505.04597
+[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and density
+ of neurons in the hyperneat substrate,” in Conference on Genetic and
+evolutionary computation, no. 12. ACM, 2010, pp. 563–570.
+[RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?":
+Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938,
+Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938
+ 
+[Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,”
+arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https:
+//arxiv.org/abs/1609.04747
+[SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks
+applied to house numbers digit classification,” in International Conference
+on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291.
+[Online]. Available: https://arxiv.org/abs/1204.3968
+[SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding
+for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185–
+212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/
+[SEZ+
+13] P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization
+and detection using convolutional networks,” arXiv preprint arXiv:1312.6229,
+Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4
+[SHK+
+14] N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to
+prevent neural networks from overfitting.” Journal of Machine Learning
+Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available:
+https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
+[SHY+
+13] A. Senior, G. Heigold et al., “An empirical study of learning rates in deep
+neural networks for speech recognition,” in International Conference on
+Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online].
+Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963
+[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the
+impact of residual connections on learning,” arXiv preprint arXiv:1602.07261,
+Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261
+[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding
+for face recognition and clustering,” in Conference on Computer Vision
+and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online].
+Available: https://arxiv.org/abs/1503.03832
+[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale
+convolutional networks,” in International Joint Conference on Neural
+Networks (IJCNN), Jul. 2011, pp. 2809–2813. [Online]. Available:
+http://ieeexplore.ieee.org/document/6033589/
+[SLJ+
+15] C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference
+on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp.
+1–9. [Online]. Available: https://arxiv.org/abs/1409.4842
+[SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through
+
+augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127,
+2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/
+106365602320169811
+[SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to
+the nonlinear dynamics of learning in deep linear neural networks,”
+arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https:
+//arxiv.org/abs/1312.6120
+[SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive
+networks,” arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available:
+https://arxiv.org/abs/1410.1165
+[SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition
+benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section=
+gtsrb&subsection=news
+[SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking
+machine learning algorithms for traffic sign recognition,” Neural Networks,
+no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/
+article/pii/S0893608012000457
+[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint
+arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606.02492
+[SVI+
+15] C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture
+for computer vision,” arXiv preprint arXiv:1512.00567, Dec. 2015. [Online].
+Available: https://arxiv.org/abs/1512.00567v3
+[SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional
+networks: Visualising image classification models and saliency maps,”
+arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https:
+//arxiv.org/abs/1312.6034
+[SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for
+large-scale image recognition,” arXiv preprint arXiv:1409.1556, Sep. 2014.
+[Online]. Available: https://arxiv.org/abs/1409.1556
+[SZS+
+13] C. Szegedy, W. Zaremba et al., “Intriguing properties of neural
+networks,” arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available:
+https://arxiv.org/abs/1312.6199v4
+[TF-16a] “ MNIST for ML beginners,” Dec. 2016. [Online]. Available: https:
+//www.tensorflow.org/tutorials/mnist/beginners/
+ 
+[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/
+api_docs/python/nn/activation_functions_#dropout
+[TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient
+by a running average of its recent magnitude,” COURSERA: Neural
+Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available:
+http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
+[Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,”
+Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martinthoma.com/write-math
+
+[Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available:
+https://martin- thoma.com/twiddle/
+[Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint
+arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/
+1602.06541
+[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380, Jan.
+2017. [Online]. Available: https://arxiv.org/abs/1701.08380
+[Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available:
+https://martin- thoma.com/msthesis
+[VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep
+learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available:
+https://arxiv.org/abs/1312.5355
+[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing.
+Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available:
+http://dx.doi.org/10.1007/978-94- 015-7744- 1_2
+[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent
+networks with long term dependencies,” arXiv preprint arXiv:1702.00071,
+Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071
+[WHH+
+89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay
+neural networks,” IEEE transactions on acoustics, speech, and signal
+processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available:
+http://ieeexplore.ieee.org/document/21701/
+[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connectionist
+ reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256,
+1992.
+
+[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling
+and Normalization Methods for Action Recognition. Berlin, Heidelberg:
+Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online].
+Available: http://dx.doi.org/10.1007/978-3- 642-37431- 9_44
+[WYS+
+15]
+ R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv
+preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available:
+https://arxiv.org/abs/1501.02876v4
+[WZZ+
+13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,”
+in International Conference on Machine Learning (ICML), no. 30, 2013,
+pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/
+icml2013/icml2013.pdf
+[XGD+
+16] S. Xie, R. Girshick et al., “Aggregated residual transformations for deep
+neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.05431v1
+[Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged
+stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011.
+[Online]. Available: https://arxiv.org/abs/1107.2490
+[XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in
+convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online].
+Available: https://arxiv.org/abs/1505.00853
+[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on
+support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available:
+https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf
+[XZY+
+14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional
+ neural network for large-scale image classification,” in International
+Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186.
+[YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten
+digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/
+[ZBH+
+16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking
+generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.03530
+[ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in
+Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee,
+M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090.
+[Online]. Available: http://papers.nips.cc/paper/6340- doubly-convolutionalneural-networks.pdf
+
+ 
+[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category
+detection,” in European Conference on Computer Vision (ECCV). Springer,
+Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867
+[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint
+arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv.org/abs/
+1212.5701v1
+[ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep
+convolutional neural networks,” arXiv preprint arXiv:1301.3557, Jan. 2013.
+[Online]. Available: https://arxiv.org/abs/1301.3557v1
+[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional
+networks,” in European Conference on Computer Vision (ECCV). Springer,
+Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901
+[Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http://
+places2.csail.mit.edu/download.html
+[ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv
+preprint arXiv:1605.07146, May 2016. [Online]. Available: https:
+//arxiv.org/abs/1605.07146
+[ZKL+
+15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative
+localization,” arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available:
+https://arxiv.org/abs/1512.04150
+[ZKL+
+16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene
+understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online].
+Available: https://arxiv.org/abs/1610.02055
+[ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement
+learning,” arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available:
+https://arxiv.org/abs/1611.01578
+[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,”
+arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https:
+//arxiv.org/abs/1506.02351v1
+[ZYL+
+15] H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus
+units,” in International Joint Conference on Neural Networks (IJCNN), Jul.
+2015, pp. 1–4.
+
+I. Glossary
+ANN artificial neural network. 4
+ASO Automatic Structure Optimization. 29
+CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71
+CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60,
+71, 72, 79, 82–84, 88–91
+ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84
+ES early stopping. 68
+FC Fully Connected. 91, 93
+FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93
+GA genetic algorithm. 30
+GAN Generative Adverserial Network. 80
+GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91
+HSV hue, saturation, value. 79
+LCN Local Contrast Normalization. 91
+LDA linear discriminant analysis. 79
+LReLU leaky rectified linear unit. 63, 72, 77, 78, 84
+MLP multilayer perceptron. 3–6, 28
+NAG Nesterov Accellerated Momentum. 83
+NEAT NeuroEvolution of Augmenting Topologies. 83
+OBD Optimal Brain Damage. 29
+ 
+PCA principal component analysis. 79
+PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84
+ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84
+SGD stochastic gradient descent. 5, 30, 45, 46, 82
+ZCA Zero Components Analysis. 79
diff --git a/read/results/playa/2201.00021.txt b/read/results/playa/2201.00021.txt
new file mode 100644
index 0000000..ef562e7
--- /dev/null
+++ b/read/results/playa/2201.00021.txt
@@ -0,0 +1,1102 @@
+Astronomy & Astrophysics manuscript no. mainArxiv  ©ESO 2022
+April 12, 2022
+Discovery of ammonia (9,6) masers in two high-mass star-forming
+regions
+Y. T. Yan (闫耀庭)1,
+, C. Henkel1, 2, 3
+, K. M. Menten1
+, Y. Gong (龚龑)1
+, J. Ott4
+, T. L. Wilson1
+, A. Wootten4
+, A.
+Brunthaler1
+, J. S. Zhang (张江水)5
+, J. L. Chen (陈家梁)5
+, and K. Yang (杨楷)6, 7
+1
+ Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany
+e-mail: yyan@mpifr-bonn.mpg.de
+2
+ Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia
+3
+ Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China
+4
+ National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA
+5
+ Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China
+6
+ School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China
+7
+ Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s
+Republic of China
+Received 13 December 2021 / Accepted 30 December 2021
+ABSTRACT
+Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact
+regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements.
+Aims. Only a few NH
+3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH
+3 (9,6)
+masers to provide a better observational basis for studying their role in high-mass star-forming regions.
+Methods. We carried out NH
+3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam
+size 49
+) and the Karl G. Jansky Very Large Array (JVLA; beam size about 1
+. 2).
+Results. We discovered new NH
+3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass starforming
+ regions hosting NH
+3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity
+of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming
+linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH
+3 (9,6) emission
+arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the
+NH
+3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (0
+. 28 ± 0
+. 10) of the peak position
+of the 1.36 cm continuum object, HW2. In G34.26+0.15, three NH
+3 (9,6) maser spots are observed: one is close to the head of the
+cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact H ii region
+A.
+Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long
+baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios.
+Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM
+1. Introduction
+Since its discovery more than five decades ago (Cheung et al.
+1968), ammonia (NH
+3) has been a most valuable molecule for
+investigating the physical properties of molecular clouds (e.g.,
+Ho & Townes 1983). While thermally excited transitions in
+the centimeter-wavelength inversion transitions of ammonia are
+regarded as a reliable thermometer of molecular clouds (e.g.,
+Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia
+masers have attracted attention since the first detection of maser
+action in the ( J, K ) = (3,3) metastable (J = K ) line toward the
+massive star-forming region W33 (Wilson et al. 1982). Subsequent
+ observations have led to the detection of new metastable
+ammonia masers, including 15
+NH
+3 (3,3) (Mauersberger et al.
+1986), NH
+3 (1,1) (Gaume et al. 1996), NH
+3 (2,2) (Mills et al.
+2018), NH
+3 (5,5) (Cesaroni et al. 1992), NH
+3 (6,6) (Beuther
+
+ Member of the International Max Planck Research School (IMPRS)
+ for Astronomy and Astrophysics at the universities of Bonn and
+Cologne.  et al. 2007), NH
+3 (7,7), NH
+3 (9,9), and NH
+3 (12,12) (Henkel
+et al. 2013). These have led to the discovery of metastable maser
+lines in 22 different regions (Mauersberger et al. 1986, 1987;
+Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991;
+Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Wootten
+ 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang
+et al. 1999; Walsh et al. 2007; Hunter et al. 2008; Galván-Madrid
+et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh
+et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman &
+Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al.
+2019; Mei et al. 2020; Towner et al. 2021). Compared with the
+metastable ammonia masers, detected non-metastable ( J > K )
+ammonia maser transitions are more numerous. The first highly
+excited non-metastable ammonia maser was detected by Madden
+ et al. (1986) in the ( J, K ) = (9,6) and (6,3) lines. Thereafter,
+many other NH
+3 non-metastable inversion transition lines have
+been identified as masers, including the (5,3), (5,4), (6,1), (6,2),
+(6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3),
+(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transiArticle
+ number, page 1 of 10arXiv:2201.00021v3  [astro-ph.GA]  9 Apr 2022
+A&A proofs: manuscript no. mainArxiv
+tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007;
+Henkel et al. 2013; Mei et al. 2020). Except for the NH
+3 (3,3)
+masers proposed to be associated with four supernova remnants
+(McEwen et al. 2016), almost all the other ammonia masers are
+detected in high-mass star-forming regions (HMSFRs). However,
+ while many HMSFRs host water (H
+2O), hydroxyl (OH),
+or methanol (CH
+3OH) masers, ammonia masers are quite rare
+in these sources, and the role that the environment of a young
+high-mass star plays in their excitation remains unclear. Therefore,
+ dedicated searches for ammonia masers in HMSFRs are
+indispensable in regard to their overall incidence and association
+ with different environments, which can provide additional
+constraints on the pumping mechanism of ammonia masers.
+So far, a total of 32 NH
+3 inversion transitions (∆K = 0
+and ∆J = 0) have been identified as masers. Among these, and
+despite arising from energy levels as high as 1090 K above
+the ground state, the NH
+3 (9,6) maser stands out as being the
+strongest and most variable one in W51-IRS2 (e.g., Henkel et al.
+2013). Maser emission in this line has only been detected in five
+HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al.
+1986), and Sgr B2(N) (Mei et al. 2020). The NH
+3 (3,3) masers
+are thought to be collisionally excited (e.g., Flower et al. 1990;
+Mangum & Wootten 1994); in contrast, the pumping mechanism
+ of NH
+3 (9,6) masers is less well constrained (Madden et al.
+1986). Brown & Cragg (1991) have studied ortho-ammonia and
+found that it could possibly pump the (6,3) inversion line, but
+they did not extend their model to the (9,6) transition due to the
+fact that collision rates are only known for inversion levels up to
+J = 6 (e.g., Danby et al. 1988).
+NH
+3 (9,6) masers are found to be strongly variable, similar to
+H
+2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al.
+2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6)
+line showed significant variation in line shape within a time interval
+ of only two days. Mapping of the (9,6) maser toward W51
+with very long baseline interferometry (VLBI) suggests that the
+masers are closer to the H
+2O masers than to the OH masers or
+to ultracompact (UC) H ii regions (Pratap et al. 1991). While
+Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO
+and NH
+3 masers in W51-IRS2 are very close to each other, their
+positions, differing by 0
+. 065 (∼0.015 pc), do not fully coincide.
+In this paper we report the discovery of NH
+3 (9,6) masers
+in two HMSFRs, Cepheus A and G34.26+0.15. This increases
+the number of (9,6) maser detections in our Galaxy from five
+to seven. In Sect. 2 observations with the Effelsberg 100-meter
+telescope and the Karl G. Jansky Very Large Array (JVLA) are
+described. Results are presented in Sect. 3. The morphology of
+Cep A and G34.26+0.15 as well as a comparison of the emission
+distributions of different tracers with the NH
+3 (9,6) masers are
+presented in Sect. 4. Our main results are summarized in Sect. 5.
+2. Observations and data reduction
+2.1. Effelsberg observations and data reduction
+The NH
+3 (9,6) line was observed toward Cep A and
+G34.26+0.15 with the 100-meter Effelsberg telescope1
+ in 2020
+January and 2021 February, July, and August. The S14mm double
+ beam secondary focus receiver was employed. The full width
+at half maximum (FWHM) beam size is 49
+ at 18.5 GHz, the
+frequency of the target line. The observations were performed in
+position switching mode, and the off position was 10
+ in azimuth
+1
+ Based on observations with the 100-meter telescope of the MPIfR
+(Max-Planck-Institut für Radioastronomie) at Effelsberg. away from the source. For observations made before 2021 August,
+ we used a spectrometer that covered 2 GHz wide backends
+with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1
+at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar
+1975). A high spectral resolution backend with 65536 channels
+and a bandwidth of 300 MHz was employed in 2021 August,
+providing a channel width of 0.07 km s−1
+ at 18.5 GHz. Pointing
+ was checked every 2 hours using 3C 286 or NGC 7027.
+Focus calibrations were done at the beginning of the observations
+ and during sunset and sunrise toward the abovementioned
+pointing sources. The system temperatures were 100–130 K on
+a main-beam brightness temperature, T
+MB, scale. This flux density
+ was calibrated assuming a T
+MB/S ratio of 1.95 K/Jy, derived
+from continuum cross scans of NGC 7027 (the flux density was
+adopted from Ott et al. 1994). Calibration uncertainties are estimated
+ to be ∼ 10%.
+We used the GILDAS/CLASS2
+ package (Pety 2005) to reduce
+ the spectral line data. A first-order polynomial was subtracted
+ from each spectrum for baseline removal.
+2.2. JVLA observations and data reduction
+Observations of the NH
+3 (9,6) line toward Cep A and
+G34.26+0.15 were obtained on 2021 July 13 with the JVLA
+of the National Radio Astronomy Observatory3
+ (NRAO) in the
+C configuration (project ID: 21A-157, PI: Yaoting Yan). We
+employed 27 antennas for the observations. The primary beam
+of the JVLA antennas is 150
+ (FWHM) at 18.5 GHz. A mixture
+ of mixed three-bit and eight-bit samplers were used to perform
+ the observations. For the NH
+3 (9,6) line observations, we
+used one subband with the eight-bit sampler covering a bandwidth
+ of 16 MHz with full polarization, eight recirculations, and
+four baseline board pairs (BIBPs) to provide a velocity range
+of 260 km s−1
+ with a channel spacing of 0.13 km s−1
+. Two
+additional subbands of bandwidth 16 MHz were used to cover
+the NH
+3 (8,5) and (10,7) lines. The three-bit sampler with 32
+subbands, each with a bandwidth of 128 MHz to cover a total
+ range of 4 GHz between 20–24 GHz, was used to measure
+ the continuum emission. 3C 286 with a flux density of
+2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a
+calibrator for pointing, flux density, bandpass, and polarization.
+J2230+6946 and J1851+0035 served as gain calibrators for Cep
+A and G34.26+0.15, respectively. The on-source times were
+4m
+30s
+ and 4m
+50s
+ toward Cep A and G34.26+0.15, respectively.
+Data from two antennas were lost due to technical issues.
+ The data from the remaining 25 antennas were reduced
+through the Common Astronomy Software Applications package
+ (CASA4
+; McMullin et al. 2007). We calibrated the data with
+the JVLA CASA calibration pipeline using CASA 6.1.2. The
+results were obtained after flagging data that contain artifacts.
+We inspected the phase, amplitude, and bandpass variations of
+the calibrated visibility data to search for additional artifacts before
+ imaging. Then, the uvcontsub task in CASA was used to
+separate the calibrated visibilities into two parts, one with lineonly
+ data and the other with the continuum data. The tclean task
+with a cell size of 0
+. 2 and Briggs weighting with robust=0 was
+used to produce the images of spectral line and continuum emission.
+ The synthesized beams for NH
+3 (9,6) are 1
+. 47 × 0
+. 99 at
+2
+ https://www.iram.fr/IRAMFR/GILDAS/
+3
+ The National Radio Astronomy Observatory is a facility of the National
+ Science Foundation operated under cooperative agreement by Associated
+ Universities, Inc.
+4
+ https://casa.nrao.edu/
+Article number, page 2 of 10
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+P.A. = 58◦
+.79 and 1
+. 33 × 1
+. 06 at P.A. = 5◦
+.36 toward Cep A
+and G34.26+0.15, respectively. For the 1.36 cm (20–24 GHz)
+continuum emission, the synthesized beams are 1
+. 08 × 0
+. 67 at
+P.A. = 60◦
+.64 and 0
+. 95 × 0
+. 71 at P.A. = 5◦
+.91 toward Cep A and
+G34.26+0.15. The typical absolute astrometric accuracy of the
+JVLA is ∼10% of the synthesized beam5
+. The flux density scale
+calibration accuracy is estimated to be within 15%.
+Fig. 1. Spectra from NH
+3 (9,6) transition lines. Left: Top to bottom:
+Time sequence of NH
+3 (9,6) profiles observed toward Cep A with the
+Effelsberg 100-meter telescope (after subtracting a first-order polynomial
+ baseline). A JVLA spectrum is interspersed. The systemic velocity
+ from CO and HCO+
+ lines is indicated by a dashed blue line. The
+two dashed red lines at LSR velocities, V
+LSR, of −0.90 km s−1
+ and
+−0.28 km s−1
+ indicate the central velocities of the two major components.
+ Right: NH
+3 (9,6) spectra from G34.26+0.15. The systemic velocity
+ from C17
+O is indicated by a dashed blue line. The three dashed
+red lines at V
+LSR = 54.1 km s−1
+, 55.8 km s−1
+, and 62.5 km s−1
+ show the
+central velocities of the main ammonia emission components.
+3. Results
+The spectra from different epochs are shown in Figs. 1 and 2.
+Toward Cep A, the NH
+3 (9,6) line profile from the JVLA is extracted
+ from an Effelsberg-beam-sized region (FWHM, 49
+). In
+the case of G34.26+0.15, the NH
+3 spectrum is below the noise
+level if a similarly large beam size is used. Therefore, we derived
+ the JVLA NH
+3 (9,6) spectrum from a smaller region, with
+radius 3
+. 5, that contains all the detected NH
+3 (9,6) emission. In
+Table A.1, the observed NH
+3 (9,6) line parameters obtained by
+Gaussian fits are listed. NH
+3 (8,5) and (10,7) emission is not detected
+ by our JVLA observations. The 3σ upper limits for the
+NH
+3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1
+5
+ https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance/positional-accuracy
+  Fig. 2. NH
+3 (9,6) line profiles emphasizing, in contrast to the spectra
+in Fig. 1, weaker features. Cep A spectra are presented on the left,
+G34.26+0.15 spectra on the right. The two dashed red lines in the left
+panels indicate V
+LSR = 1.48 km s−1
+ and 2.89 km s−1
+. In the right panels,
+the two dashed red lines refer to 54.1 km s−1
+ and 55.8 km s−1
+.
+and 27.2 mJy beam−1
+, respectively. In G34.26+0.15, the corresponding
+ 3σ upper limits for the NH
+3 (8,5) and (10,7) lines are
+22.1 mJy beam−1
+ and 30.4 mJy beam−1
+. For both sources, sensitivity
+ levels refer to emission from a single channel of width
+0.13 km s−1
+. Taking the larger measured line widths of the (9,6)
+maser features (see Table A.1), these limits could be further lowered
+ by factors of two to four.
+3.1. Centimeter-continuum emission
+The 1.36 cm continuum, derived from our JVLA observations,
+toward Cep A is presented in Fig. 3. Six published compact
+sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are detected
+ in our observations. Figure 4 shows the 1.36 cm continuum
+ in G34.26+0.15. Three main continuum objects, A, B, and
+C, are detected. By using the imfit task in CASA, we measured
+the continuum flux at 1.36 cm toward individual compact source
+components in Cep A and G34.26+0.15. Details are given in Table
+ A.2.
+3.2. NH
+3 (9,6) emission in Cep A
+In 2020 January, NH
+3 (9,6) emission with a peak flux density of
+0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter
+telescope in Cep A. Emission with similar strength was also detected
+ in 2021 February and August with the same telescope.
+Higher velocity resolution data, which were obtained in 2021
+August, again with the Effelsberg 100-meter telescope, show
+that the (9,6) emission contains two main velocity components.
+Overall, the flux densities of the NH
+3 (9,6) emission line measured
+ with the Effelsberg 100-meter telescope are, within the calibration
+ uncertainties, unchanged. This is valid for the time interval
+ between 2020 January and August 2021, when we smoothed
+the obtained spectra to the same velocity resolution. We also
+see another two weaker components. Figure 2 emphasizes these
+weak components with an expanded flux density scale.
+Higher angular resolution data from the JVLA pinpoint the
+position of the NH
+3 (9,6) emission with an offset of (−0
+. 28,
+0
+. 02) relative to the 1.36 cm continuum peak of Cep A HW2
+(Fig. 3). The deconvolved NH
+3 (9,6) component size is (0
+. 29 ±
+0
+. 15) × (0
+. 19 ± 0
+. 14) at P.A. = 174◦
+, derived with the imfit task
+in CASA, and can thus be considered, accounting for the uncertainties,
+ as unresolved.
+ Article number, page 3 of 10
+A&A proofs: manuscript no. mainArxiv
+Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90,
+and 110 × 0.125 mJy beam−1
+. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane
+Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is α
+J2000 = 22h
+56m
+17s
+.972, and
+δ
+J2000 = 62◦
+01
+49
+. 587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
+ellipse denoting the position of the NH
+3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H
+2O (Sobolev et al. 2018),
+and CH
+3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates
+the LSR velocity range of the maser spots.
+Fig. 4. 1.36 cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130,
+150, 180, and 200 × 5.0 mJy beam−1
+. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is
+α
+J2000 = 18h
+53m
+18s
+.560, and δ
+J2000 = 01◦
+14
+58
+. 201, the peak position, is marked by a black cross. The black ellipses show the positions of NH
+3
+(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H
+2O (Imai et al. 2011), and CH
+3OH (Bartkiewicz et al.
+2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (V
+LSR) of maser spots.
+In view of the constancy of the flux densities obtained at Effelsberg
+ and the similar JVLA flux density, measured in 2021
+July, there is no missing interferometric flux density in the JVLA
+data.
+3.3. NH
+3 (9,6) emission in G34.26+0.15
+The NH
+3 (9,6) emission was first detected toward G34.26+0.15
+in 2020 January with the Effelsberg 100-meter telescope. Higher velocity resolution data from 2021 August show the NH
+3 (9,6)
+emission to be composed of two different components. The spectra
+ of weak components on a smaller flux density scale are presented
+ in Fig. 2.
+Three different locations showing NH
+3 (9,6) emission are
+found toward G34.26+0.15 (Fig. 4). The deconvolved NH
+3 (9,6)
+component sizes are (1
+. 42 ± 0
+. 43) × (0
+. 54 ± 0
+. 62) at P.A. = 97◦
+(M1), (0
+. 42 ± 0
+. 27) × (0
+. 15 ± 0
+. 27) at P.A. = 150◦
+ (M2), and
+Article number, page 4 of 10
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+(1
+. 17 ± 0
+. 34) × (0
+. 27 ± 0
+. 46) at P.A. = 53◦
+ (M3) and are thus
+comparable to or smaller than the beam size.
+Overall, the NH
+3 (9,6) line from G34.26+0.15 weakened
+during the time interval from 2020 January to 2021 August by
+about 70%. A comparison between the JVLA spectrum and the
+Effelsberg data, assuming a linear decrease in the integrated intensity
+ as a function of time between different epochs of the
+100-meter observations, suggests there is no missing flux in the
+JVLA data. This is similar to the situation in Cep A.
+4. Discussion
+4.1. Morphology of Cep A and G34.26+0.15
+Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc
+(Moscadelli et al. 2009; Dzib et al. 2011), is the second closest
+HMSFR (after Orion) and by far the closest NH
+3 (9,6) maser
+known. About 16 compact (∼1
+) radio sources (e.g., Hughes &
+Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been
+identified in Cep A. Hughes & Wouterloot (1984) discovered
+these targets at radio wavelengths, which are UC and hypercompact
+ (HC) H ii regions and/or stellar wind sources, subsequently
+named as HW sources. The HW2 object is one of the best known
+examples of a protostellar jet or disk system driving a powerful
+outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles
+et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021).
+The observed NH
+3 (9,6) emission is slightly offset (−0
+. 28, 0
+. 02)
+from the center of HW2 (see Fig. 3).
+G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc
+(Kuchar & Bania 1994). It hosts four radio continuum components
+ named A, B, C, and D. Component C is a prototypical
+cometary UC H ii region containing a compact head and a diffuse
+tail that extends from east to west (e.g., Reid & Ho 1985; Garay
+et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components
+A and B are HC H ii regions, located to the east of component
+C. An extended ring-like H ii region, called component D, is located
+ southeast of components A-C. One of the three observed
+NH
+3 (9,6) emission line sources, M1, is close to the head of component
+ C, whereas M2 and M3 originate from another compact
+region in the west of the HC H ii component A (see Fig. 4).
+4.2. NH
+3 (9,6) emission possibly caused by maser action
+As shown in Fig. 1, the NH
+3 (9,6) profiles in Cep A and
+G34.26+0.15 are narrow (∆V
+1/2 ≤2.0 km s−1
+), much narrower
+than the expected line widths (4 km s−1
+) of thermal lines observed
+ at a similar angular resolution (e.g., Torrelles et al. 1985,
+1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mookerjea
+ et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Velocity
+ shifts with respect to the systemic velocities of the two
+sources are both observed, that is, V ∼10 km s−1
+ in Cep A and
+V ∼4 km s−1
+ in G34.26+0.15 (see details in Sect. 4.3). Furthermore,
+ time variability is observed in the case of G34.26+0.15,
+which is also a characteristic feature of maser emission.
+Additional evidence of their maser nature is the high brightness
+ temperatures of the (9,6) emission spots toward Cep A and
+G34.26+0.15. The spectral parameters are listed in Table A.3.
+Because at least a significant part of the NH
+3 (9,6) emission
+is not resolved by our JVLA observations, the derived brightness
+ temperatures are only lower limits. Nevertheless, the lower
+limits on the brightness temperature are >800 K in Cep A (see
+Table A.3), which is much higher than the expected thermal
+gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito
+et al. 2007; Beuther et al. 2018). This strongly suggests that the NH
+3 (9,6) emission in Cep A is due to maser action. Because
+ G34.26+0.15 is located at about five times the distance to
+Cep A, beam dilution effects reduce the lower main beam brightness
+ temperature limit to 400 K in G34.26+0.15 (M2) (see Table
+ A.3). We also note that the luminosity of the NH
+3 (9,6) emission
+ in G34.26+0.15 is higher than or comparable to that in Cep
+A, depending on the epoch of our observations.
+Finally, the non-detections of the (8,5) and (10,7) lines also
+indicate that the (9,6) line is special. This allows us to derive
+lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity
+ratios. The (9,6) line arises from ortho-NH
+3 (K = 3n), whereas
+the NH
+3 (8,5) and (10,7) lines are para-NH
+3 (K  3n) lines.
+The minimum ortho-to-para ratios are in the range 12–42 and 1–
+8 toward Cep A and G34.26+0.15, respectively. The statistical
+weights for the ortho states are twice as large as those for the
+para states (e.g., Umemoto et al. 1999; Goddi et al. 2011; Henkel
+et al. 2013). In Cep A, the line intensity ratios are far higher than
+this factor of two. Thus, at least in Cep A the higher main beam
+brightness peak temperature of the (9,6) emission is caused by
+maser action, perhaps involving exponential amplification, and
+the case of G34.26+0.15 is likely similar.
+4.3. Comparison of NH
+3 (9,6) masers with previously
+published (quasi-)thermal NH
+3 emission
+The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
+show thermal emission toward Cep A over a velocity range of
+−13 km s−1
+ ≤ V
+LSR ≤ −4 km s−1
+ (Brown et al. 1981; Güsten
+et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average
+NH
+3 column density of ∼5×1015
+ cm−2
+ was estimated for a region
+of 3
+ around HW2 (Torrelles et al. 1999). This high NH
+3 abundance
+ could provide a suitable environment for maser species.
+Large line widths (∆V
+1/2 7.0 km s−1
+) with V
+LSR ∼ −10 km s−1
+in both (1,1) and (2,2) lines were found toward HW2 (Torrelles
+et al. 1993). The velocity is similar to the cloud’s systemic local
+ standard of rest (LSR) velocity of −11.2 km s−1
+, which
+is based on CO (Narayanan & Walker 1996) and HCO+
+ observations
+ (Gómez et al. 1999). Our (9,6) maser is redshifted
+(−0.9 km s−1
+ ≤ V
+LSR ≤2.9 km s−1
+) and shares positions with
+the outflowing gas seen in CO and HCO+
+ with similarly redshifted
+ velocities. Therefore, we argue that the (9,6) masers are
+related to outflowing gas.
+In G34.26+0.15, a large NH
+3 column density,
+1018.5±0.2
+ cm−2
+, and a kinetic temperature of 225±75 K
+were derived by Henkel et al. (1987) based on measurements
+of 15 NH
+3 inversion transitions in the frequency range of
+22.0–26.0 GHz. These did not include the (9,6) transition.
+While these lines were measured with a beam size of about
+40
+, a comparison of the peak intensities of the optically thick
+lines with the kinetic temperature reveals the size of the hot,
+ammonia-emitting core to be only ∼2.5
+. All those measured
+NH
+3 lines were quasi-thermal and had LSR velocities of
+∼ 58.5 km s−1
+, close to the systemic velocity of ∼ 58.1 km s−1
+obtained from C17
+O observations (Wyrowski et al. 2012).
+Their line widths (∆V
+1/2 ≥3.6 km s−1
+) are larger than what
+we find (0.35 km s−1
+ ≤ ∆V
+1/2 ≤ 0.94 km s−1
+) for each (9,6)
+maser component (see details in Table A.3). In all, we may
+have observed four different (9,6) velocity features. Three
+are blueshifted at V
+LSR ∼ 53.8 km s−1
+, 55.8 km s−1
+, and
+56.8 km s−1
+, and a fourth, tentatively detected, at 62.5 km s−1
+.
+This tentative redshifted feature was only potentially detected
+with Effelsberg in 2020 January. The velocity is similar to that
+of the JVLA measurements on the NH
+3 (1,1) absorption line
+against continuum source C (∼ 7
+ resolution; Keto et al. 1987)
+Article number, page 5 of 10
+A&A proofs: manuscript no. mainArxiv
+and the NH
+3 (3,3) emission surrounding continuum source B as
+well as the head of C (1
+. 4×1
+. 2 resolution; Heaton et al. 1989).
+However, we did not find this redshifted component in our
+JVLA observations. Therefore, its position within G34.26+0.15
+cannot be determined. The blueshifted (9,6) masers with a
+velocity range of 53.8–56.8 km s−1
+ (M1, M2, and M3) show
+velocities compatible with those of the NH
+3 (3,3) emission at
+the proper positions (Heaton et al. 1989), which might be a
+suitable environment for maser species.
+4.4. Comparison of NH
+3 (9,6) masers with other maser lines
+To characterize the environment of NH
+3 (9,6) masers, we can
+compare their positions with respect to those of other maser
+species (i.e., OH, H
+2O, and CH
+3OH). Toward Cep A HW2,
+many CH
+3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna
+et al. 2017) and H
+2O maser spots (e.g., Torrelles et al. 1998,
+2011; Sobolev et al. 2018) are detected and are associated with
+its disk. Sobolev et al. (2018) also found that most of the H
+2O
+maser flux is associated with the compact H ii region HW3d. OH
+maser features close to the H ii regions are also seen in HW2
+(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These
+three kinds of masers in Cep A have a large velocity range of
+−25 km s−1
+ ≤ V
+LSR ≤ −2 km s−1
+ and are widespread around
+HW2 and HW3, while NH
+3 (9,6) emission is only detected at
+−0.9 km s−1
+ ≤ V
+LSR ≤2.9 km s−1
+ toward a sub-arcsecondsized
+ region to the west of the peak continuum position of HW2
+(see Fig. 3). This suggests that the NH
+3 (9,6) maser in Cep A
+is unique and not related to maser spots seen in other molecular
+species.
+In G34.26+0.15, OH (Zheng et al. 2000), H
+2O (Imai et al.
+2011), and CH
+3OH (Bartkiewicz et al. 2016) masers have been
+detected east of source C (Fig. 4), and none of them coincides
+with the head of C. The NH
+3 (9,6) maser M1 is also found
+slightly off the head of source C. This could suggest that M1
+is powered by continuum source C or by an outflow. Near component
+ B, there are some OH and CH
+3OH masers but no H
+2O
+or NH
+3 masers. A group of H
+2O masers, well-known tracers
+of outflows, with a large velocity distribution of 43 km s−1
+ ≤
+V
+LSR ≤54 km s−1
+, was found to the west of the centimetercontinuum
+ source A and close to the peak of the millimetercontinuum
+ emission (see details in our Fig. A.2 and also in Fig. 5
+of Imai et al. 2011). The closeness of NH
+3 (9,6) maser spots M2
+and M3 to this group of water masers and their similar velocities
+again suggest an association of NH
+3 (9,6) masers with outflow
+activity.
+4.5. Constraints on pumping scenarios
+Our observations have resulted in the detection of NH
+3 (9,6)
+masers in Cep A and G34.26+0.15. The new detections could
+provide additional constraints on the maser line’s pumping
+mechanism. As mentioned in Sect. 1, the pumping mechanism
+of the (9,6) maser is unclear (Madden et al. 1986; Brown &
+Cragg 1991). Previous studies have suggested that there are three
+main pumping scenarios to explain the observed NH
+3 maser
+lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared radiation
+ from the dust continuum emission, (2) line overlap, and
+(3) collisional pumping.
+For the first mechanism, infrared photons near 10 µm are
+needed for vibrational excitation. The high dust temperature
+(∼300 K) of W51-IRS2 can provide substantial infrared photons
+ near 10 µm, which is used for radiative pumping (Henkel et al. 2013). Both Cep A and G34.26+0.15 have similar kinetic
+temperatures of 200 K (Henkel et al. 1987; Patel et al. 2005;
+Comito et al. 2007; Beuther et al. 2018). This suggests that
+high kinetic temperatures are needed to excite NH
+3 (9,6) masers.
+However, it should be noted that the silicate dust absorption feature
+ might dominate at 10 µm (see the spectral energy distribution
+ of Cep A in De Buizer et al. 2017). Additionally, there is
+no bright infrared emission around the two (9,6) masers, M2 and
+M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer
+et al. 2003 for a 10.5 µm map). This indicates that the pumping
+mechanism via infrared photons near 10 µm may not be viable
+to explain the (9,6) masers in Cep A and G34.26+0.15. Furthermore,
+ Wilson & Schilke (1993) argued that radiative pumping by
+dust emission tends to excite multiple adjacent ammonia maser
+transitions, which appears to contradict our failure to detect the
+adjacent (8,5) and (10,7) lines (with respect to quantum numbers
+and frequency) and to only measure the (9,6) transitions in Cep
+A and G34.26+0.15. Therefore, we suggest that infrared radiation
+ from dust is not the main pumping source.
+Madden et al. (1986) suggested that there might be some
+line overlaps between the rotational NH
+3 transitions in the farinfrared
+ band. However, this would be unlikely to affect only the
+(9,6) line. Nevertheless, far-infrared spectral observations will
+be needed to clarify this scenario.
+Based on our observations, the (9,6) maser spots are close
+to, but not coincident with, the peaks of the radio continuum
+emission in Cep A and G34.26+0.15. Furthermore, the (9,6)
+masers show velocity offsets with respect to their systemic velocities.
+ This indicates that the (9,6) masers are located at the
+base of outflows, similar to the H
+2O masers. This is supported
+by VLBI observations that show that (9,6) masers tend to be
+closely associated with H
+2O masers (Pratap et al. 1991). The observed
+ time variability in G34.26+0.15 and W51-IRS2 can also
+be attributed to episodic molecular outflows. This indicates that
+collisional pumping could be the driver of the (9,6) maser. On
+the other hand, collisional pumping has been successfully used
+to explain the NH
+3 (3,3) maser (Walmsley & Ungerechts 1983;
+Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to
+pump from the K =0 level to the K =3 level with parity changes,
+that is, the upper level of the (3,3) metastable transition will be
+overpopulated. NH
+3 (9,6) arises from the ortho species, so a similar
+ mechanism might also occur in the case of the (9,6) transition.
+ Further measurements of collisional rates of ammonia will
+allow us to test this scenario.
+5. Summary
+We report the discovery of NH
+3 (9,6) masers in two HMSFRs,
+Cep A and G34.26+0.15. The narrow line width of the emission
+ features (∆V
+1/2 ≤2.0 km s−1
+) and their high brightness temperatures
+ (> 400 K) indicate the maser nature of the lines.
+The intensity of the (9,6) maser in G34.26+0.15 is decreasing
+with time, while toward Cep A the maser is stable based on 20
+months of monitoring at Effelsberg. Linearly interpolating the
+integrated intensities obtained at Effelsberg as a function of time,
+the JVLA measurements show that there is no missing flux density
+ on scales on the order of 1.2 arcsec (4 ×10−3
+ and 2 ×10−2
+ pc)
+to the total single-dish flux. The JVLA-detected emission indicates
+ that the NH
+3 (9,6) maser in Cep A originates from a
+sub-arcsecond-sized region slightly (0
+. 28 ± 0
+. 10) to the west
+of the peak position of the 1.36 cm continuum object, HW2. In
+G34.26+0.15, three NH
+3 (9,6) maser spots are observed: one is
+close to the head of the cometary UC H ii region C, and the other
+two are emitted from a compact region to the west of the HC H ii
+Article number, page 6 of 10
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+region A. We suggest that the (9,6) masers may be connected to
+outflowing gas. Higher angular resolution JVLA and VLBI observations
+ are planned to provide more accurate positions and
+constraints on pumping scenarios.
+Acknowledgements. We would like to thank the anonymous referee for the useful
+ comments that improve the manuscript. Y.T.Y. is a member of the International
+ Max Planck Research School (IMPRS) for Astronomy and Astrophysics
+at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China
+Scholarship Council (CSC) for its support. We would like to thank the staff at
+the Effelsberg for their help provided during the observations. We thank the staff
+of the JVLA, especially Tony Perreault and Edward Starr, for their assistance
+with the observations and data reduction. This research has made use of the
+NASA/IPAC Infrared Science Archive, which is funded by the National Aeronautics
+ and Space Administration and operated by the California Institute of
+Technology.
+References
+Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MNRAS,
+ 361, 623
+Bartkiewicz, A., Szymczak, M., & van Langevelde, H. J. 2016, A&A, 587, A104
+Benjamin, R. A., Churchwell, E., Babler, B. L., et al. 2003, PASP, 115, 953
+Beuther, H., Mottram, J. C., Ahmadi, A., et al. 2018, A&A, 617, A100
+Beuther, H., Walsh, A. J., Thorwirth, S., et al. 2007, A&A, 466, 989
+Brogan, C. L., Hunter, T. R., Cyganowski, C. J., et al. 2011, ApJ, 739, L16
+Brown, A. T., Little, L. T., MacDonald, G. H., Riley, P. W., & Matheson, D. N.
+1981, MNRAS, 195, 607
+Brown, R. D. & Cragg, D. M. 1991, ApJ, 378, 445
+Carrasco-González, C., Sanna, A., Rodríguez-Kamenetzky, A., et al. 2021, ApJ,
+914, L1
+Cesaroni, R., Walmsley, C. M., & Churchwell, E. 1992, A&A, 256, 618
+Cheung, A. C., Rank, D. M., Townes, C. H., Thornton, D. D., & Welch, W. J.
+1968, Phys. Rev. Lett., 21, 1701
+Churchwell, E., Babler, B. L., Meade, M. R., et al. 2009, PASP, 121, 213
+Cohen, R. J. & Brebner, G. C. 1985, MNRAS, 216, 51P
+Comito, C., Schilke, P., Endesfelder, U., Jiménez-Serra, I., & Martín-Pintado, J.
+2007, A&A, 469, 207
+Curiel, S., Ho, P. T. P., Patel, N. A., et al. 2006, ApJ, 638, 878
+Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988,
+MNRAS, 235, 229
+De Buizer, J. M., Liu, M., Tan, J. C., et al. 2017, ApJ, 843, 33
+De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598,
+1127
+Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M.
+2011, ApJ, 733, 71
+Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P
+Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036
+Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ,
+459, 193
+Garay, G., Rodriguez, L. F., & van Gorkom, J. H. 1986, ApJ, 309, 553
+Gaume, R. A., Wilson, T. L., & Johnston, K. J. 1996, ApJ, 457, L47
+Goddi, C., Greenhill, L. J., Humphreys, E. M. L., Chandler, C. J., & Matthews,
+L. D. 2011, ApJ, 739, L13
+Goddi, C., Henkel, C., Zhang, Q., Zapata, L., & Wilson, T. L. 2015, A&A, 573,
+A109
+Gómez, J. F., Sargent, A. I., Torrelles, J. M., et al. 1999, ApJ, 514, 287
+Güsten, R., Chini, R., & Neckel, T. 1984, A&A, 138, 205
+Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148
+Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90
+Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137
+Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239
+Hoffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83
+Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79
+Hughes, V. A. 1991, ApJ, 383, 280
+Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204
+Hunter, T. R., Brogan, C. L., Indebetouw, R., & Cyganowski, C. J. 2008, ApJ,
+680, 1271
+Imai, H., Omi, R., Kurayama, T., et al. 2011, PASJ, 63, 1293
+Keto, E. R., Ho, P. T. P., & Reid, M. J. 1987, ApJ, 323, L117
+Kraemer, K. E. & Jackson, J. M. 1995, ApJ, 439, L9
+Kuchar, T. A. & Bania, T. M. 1994, ApJ, 436, 117
+Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D.
+1986, ApJ, 300, L79
+Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33
+Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352
+Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13  Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123
+McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189
+McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in Astronomical
+ Society of the Pacific Conference Series, Vol. 376, Astronomical
+Data Analysis Software and Systems XVI, ed. R. A. Shaw, F. Hill, & D. J.
+Bell, 127
+Mei, Y., Chen, X., Shen, Z.-Q., & Li, B. 2020, ApJ, 898, 157
+Menten, K. M. 1991, ApJ, 380, L75
+Mills, E. A. C., Ginsburg, A., Clements, A. R., et al. 2018, ApJ, 869, L14
+Mookerjea, B., Casper, E., Mundy, L. G., & Looney, L. W. 2007, ApJ, 659, 447
+Moscadelli, L., Reid, M. J., Menten, K. M., et al. 2009, ApJ, 693, 406
+Narayanan, G. & Walker, C. K. 1996, ApJ, 466, 844
+Ott, M., Witzel, A., Quirrenbach, A., et al. 1994, A&A, 284, 331
+Patel, N. A., Curiel, S., Sridharan, T. K., et al. 2005, Nature, 437, 109
+Perley, R. A. & Butler, B. J. 2013, ApJS, 204, 19
+Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Casoli,
+ T. Contini, J. M. Hameury, & L. Pagani, 721
+Poynter, R. L. & Kakar, R. K. 1975, ApJS, 29, 87
+Pratap, P., Menten, K. M., Reid, M. J., Moran, J. M., & Walmsley, C. M. 1991,
+ApJ, 373, L13
+Reid, M. J. & Ho, P. T. P. 1985, ApJ, 288, L17
+Rodriguez, L. F., Ho, P. T. P., & Moran, J. M. 1980, ApJ, 240, L149
+Sanna, A., Moscadelli, L., Surcis, G., et al. 2017, A&A, 603, A94
+Sewilo, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2004, ApJ,
+605, 285
+Sewiło, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2011, ApJS,
+194, 44
+Sobolev, A. M., Moran, J. M., Gray, M. D., et al. 2018, ApJ, 856, 60
+Sugiyama, K., Fujisawa, K., Doi, A., et al. 2008, PASJ, 60, 1001
+Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1998, ApJ, 509, 262
+Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1999, MNRAS, 307, 58
+Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1985, ApJ, 288, 595
+Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1986, ApJ, 305, 721
+Torrelles, J. M., Patel, N. A., Curiel, S., et al. 2011, MNRAS, 410, 627
+Torrelles, J. M., Verdes-Montenegro, L., Ho, P. T. P., Rodriguez, L. F., & Canto,
+J. 1993, ApJ, 410, 202
+Towner, A. P. M., Brogan, C. L., Hunter, T. R., & Cyganowski, C. J. 2021, ApJ,
+923, 263
+Umemoto, T., Mikami, H., Yamamoto, S., & Hirano, N. 1999, ApJ, 525, L105
+Urquhart, J. S., Morgan, L. K., Figura, C. C., et al. 2011, MNRAS, 418, 1689
+Walmsley, C. M. & Ungerechts, H. 1983, A&A, 122, 164
+Walsh, A. J., Breen, S. L., Britton, T., et al. 2011, MNRAS, 416, 1764
+Walsh, A. J., Longmore, S. N., Thorwirth, S., Urquhart, J. S., & Purcell, C. R.
+2007, MNRAS, 382, L35
+Wang, K., Zhang, Q., Wu, Y., Li, H.-b., & Zhang, H. 2012, ApJ, 745, L30
+Wilson, T. L., Batrla, W., & Pauls, T. A. 1982, A&A, 110, L20
+Wilson, T. L. & Henkel, C. 1988, A&A, 206, L26
+Wilson, T. L., Johnston, K. J., & Henkel, C. 1990, A&A, 229, L1
+Wilson, T. L. & Schilke, P. 1993, in Lecture Notes in Physics, Astrophysical
+Masers, ed. A. W. Clegg & G. E. Nedoluha, Vol. 412, 123–126
+Wyrowski, F., Güsten, R., Menten, K. M., Wiesemeyer, H., & Klein, B. 2012,
+A&A, 542, L15
+Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63
+Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117
+Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192
+Article number, page 7 of 10
+A&A proofs: manuscript no. mainArxiv
+Appendix A:
+Table A.1. Summary of NH
+3 (9, 6) maser observations.
+Source Telescope Beam Epoch Channel S
+ ν rms
+ S
+ ν dv V
+LSR ∆V
+1/2
+size spacing
+(km s−1
+) (Jy) (mJy) (Jy km s−1
+) (km s−1
+)
+Cep A Effelsberg 49
+ 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04
+Effelsberg 49
+ 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04
+Effelsberg 49
+ 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05
+JVLAa
+ 1
+. 47 × 0
+. 99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12
+Effelsberg 49
+ 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01
+0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05
+Effelsberg 49
+ 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07
+0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07
+0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07
+0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07
+0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07
+G34.26+0.15 Effelsberg 49
+ 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13
+Effelsberg 49
+ 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12
+Effelsberg 49
+ 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14
+JVLAb
+ 1
+. 33 × 1
+. 06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09
+0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12
+0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08
+Effelsberg 49
+ 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12
+0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09
+0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06
+Effelsberg 49
+ 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05
+Notes. The spectral parameters are obtained from Gaussian fitting. (a)
+ The JVLA spectrum toward Cep A is extracted from the Effelsberg-beamsized
+ region (FWHM 49
+). (b)
+ For G34.26+0.15, the JVLA beam samples the NH
+3 (9,6) spectrum over a region of radius 3
+. 5, which contains all
+detected NH
+3 (9,6) emissions.
+Table A.2. 1.36 cm JVLA flux densities of individual continuum sources.
+Source R.A. Dec. Size P.A. S
+ ν
+(h m s) (◦
+) (arcsec) (deg) (mJy)
+Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4
+HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74
+HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36
+HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7
+HW3d 22 56 18.195 ± 0.005 +62 01 46.325 ± 0.014 (1.26 ± 0.12) × (0.30 ± 0.19) 102.5 13.75 ± 0.92
+HW9 22 56 18.626 ± 0.014 +62 01 47.851 ± 0.137 (1.53 ± 0.51) × (0.29 ± 0.30) 28.0 3.26 ± 0.78
+G34.26+0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33
+B 18 53 18.649 ± 0.005 +01 15 00.071 ± 0.180 (2.31 ± 0.49) × (0.85 ± 0.21) 17.4 597 ± 110
+C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) 178.0 5070 ± 660
+Article number, page 8 of 10
+Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+Table A.3. NH
+3 (9,6) maser positions derived from the JVLA observations.
+Source R.A. Dec.  S
+ ν T
+MB V
+LSR ∆V
+1/2
+(h m s) (◦
+) (mJy beam−1
+) (K) (km s−1
+)
+Cep A M 22 56 17.933 ± 0.002 +62 01 49.608 ± 0.011 985.2 2464.8 -0.88 ± 0.01 0.51 ± 0.02
+343.2 829.5 -0.24 ± 0.03 0.63 ± 0.05
+G34.26+0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14
+M2 18 53 18.696 ± 0.002 +01 14 55.807 ± 0.034 48.4 122.4 53.77 ± 0.05 0.35 ± 0.08
+57.8 146.2 54.35 ± 0.07 0.83 ± 0.14
+180.8 457.6 55.83 ± 0.01 0.59 ± 0.03
+M3 18 53 18.667 ± 0.005 +01 14 55.348 ± 0.066 78.1 197.2 54.22 ± 0.04 0.94 ± 0.08
+73.7 186.3 55.78 ± 0.04 0.79 ± 0.08
+Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is α
+J2000 = 22h
+56m
+17s
+.972,
+and δ
+J2000 = 62◦
+01
+49
+. 587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse
+denoting the position of the NH
+3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array
+(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam−1
+.
+OH (Bartkiewicz et al. 2005), H
+2O (Sobolev et al. 2018), and CH
+3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares,
+respectively. The color bar on the right-hand side indicates the velocity range (V
+LSR) of maser spots.
+ Article number, page 9 of 10
+A&A proofs: manuscript no. mainArxiv
+Fig. A.2. 1.36 cm JVLA continuum map of G34.26+0.15 presented as gray shaded areas. The reference position is α
+J2000 = 18h
+53m
+18s
+.560, and
+δ
+J2000 = 01◦
+14
+58
+. 201, the peak position, is marked by a red cross. The red ellipses show the positions of NH
+3 (9,6) emission with stars at their
+center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from
+Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 × 20 mJy beam−1
+. OH (Zheng et al. 2000), H
+2O (Imai
+et al. 2011), and CH
+3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates
+the velocity range (V
+LSR) of maser spots.
+Article number, page 10 of 
\ No newline at end of file
diff --git a/read/results/playa/2201.00022.txt b/read/results/playa/2201.00022.txt
new file mode 100644
index 0000000..66ac88e
--- /dev/null
+++ b/read/results/playa/2201.00022.txt
@@ -0,0 +1,1383 @@
+Draft version July 7, 2022
+Typeset using LA
+T
+EX twocolumn style in AASTeX631
+The Formation of Intermediate Mass Black Holes in Galactic Nuclei
+Sanaea C. Rose,1, 2
+ Smadar Naoz,1, 2
+ Re’em Sari,3
+ and Itai Linial3
+1
+Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA
+2
+Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA
+3
+Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel
+ABSTRACT
+Most stellar evolution models predict that black holes (BHs) should not exist above approximately
+50 − 70 M
+, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
+indicate the existence of BHs with masses at and above this threshold. We suggest that massive
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
+processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite
+efficient, forming IMBHs as massive as 104
+ M
+. This upper limit assumes that (1) the BHs accrete a
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
+new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar
+disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs,
+respectively).
+1. INTRODUCTION
+The recently detected gravitational wave source
+GW190521 (The LIGO Scientific Collaboration et al.
+2020a,b) produced an intermediate mass black hole of
+approximately 142 M
+. This event may have also had a
+85 M
+ progenitor, which falls within the pair-instability
+mass gap that limits stellar black holes (BHs) to no
+more than
+ ∼<
+ 50 M
+ (e.g., Heger et al. 2003; Woosley
+2017)1
+. Similarly, the merger products of GW150914,
+GW170104, and GW170814 fall within the mass gap
+(e.g., Abbott et al. 2016, 2017a,b). BH mergers that
+form second generation BHs and, in some cases, intermediate
+ mass BHs (IMBHs), these gravitational wave
+(GW) events can occur in globular clusters, young stelCorresponding
+ author: Sanaea C. Rose
+srose@astro.ucla.edu
+1
+ Note that the exact lower and upper limits may be sensitive to
+metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
+2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski
+et al. 2020a; Renzo et al. 2020; Vink et al. 2021). lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez
+ et al. 2019; Fishbach et al. 2020; Mapelli et al.
+2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
+2021; Arca Sedda et al. 2021). However, IMBHs are
+not limited to these locations and may reside in galactic
+ nuclei as well. Several studies propose that our
+own galactic center may host an IMBH in the inner pc
+(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004;
+G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
+& Liu 2013; Generozov & Madigan 2020; Fragione et al.
+2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
+Collaboration et al. 2020).
+Several IMBH formation channels have been suggested
+in the literature. For example, IMBHs may have a cosmological
+ origin, forming in the early universe either
+as a result of the very first stars (e.g., Madau & Rees
+2001; Schneider et al. 2002; Johnson & Bromm 2007;
+Valiante et al. 2016) or from direct collapse of accumulated
+ gas (e.g., Begelman et al. 2006; Yue et al. 2014;
+Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
+2016). These high redshift IMBHs would need to survive
+ galaxy evolution and mergers to present day (e.g.,arXiv:2201.00022v2  [astro-ph.GA]  6 Jul 2022
+  Rose et al.
+Rashkov & Madau 2014), with significant effects on their
+stellar and even dark matter surroundings (e.g., Bertone
+et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
+et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
+popular formation channel relies on the coalescence of
+many stellar-mass black holes, which may seed ob jects
+as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs
+may form in the centers of globular clusters, where fewbody
+ interactions lead to the merger of stellar-mass BHs
+(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha
+et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez
+ et al. 2018; Rodriguez et al. 2019; Fragione et al.
+2020b). Other formation mechanisms invoke successive
+collisions and mergers of massive stars (e.g., Ebisuzaki
+et al. 2001; Portegies Zwart & McMillan 2002; Portegies
+Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017;
+Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al.
+2021; Das et al. 2021a,b; Escala 2021).
+The main obstacle to sequential BH mergers in clusters
+ is that the merger recoil velocity kick often exceeds
+the escape velocity from the cluster (e.g., Schnittman
+& Buonanno 2007; Centrella et al. 2010; O’Leary et al.
+2006; Baibhav et al. 2020, Rom & Sari, in prep.). However,
+ nuclear star clusters at the centers of galaxies do
+not encounter this problem. For example, Fragione et al.
+(2021) explore repeated BH-BH mergers in nuclear star
+clusters without a SMBH. They considered BH binarysingle
+ interactions, binary BH GW merger, and GW
+merger recoil kicks. The post-kick merger product sinks
+back towards the cluster center over a dynamical friction
+ timescale. Using this approach, they showed that
+103
+ − 104
+ M
+ IMBHs can form efficiently over the lifetime
+ of a cluster.
+However, as discussed in Section 2.2, direct BH-star
+collisions are much more frequent than BH-BH collision
+in galactic nuclei, making the former a promising channel
+ for BH growth. In an N-body study of young star
+clusters, Rizzuto et al. (2022) find that BH-star collisions
+ are a main contributor to the formation of BHs
+in the mass gap and IMBHs. In a similar vein, Stone
+et al. (2017) demonstrate that massive BHs can form
+from repeated tidal encounters between stars and BHs.
+More generally, several studies have explored the role of
+collisions in a GN, with implications for the stellar and
+red giant populations (e.g., Dale & Davies 2006; Dale
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+et al. 2021). We propose that IMBHs can form naturally
+within the central pc of a galactic center through repeated
+ collisions between BHs and main sequence stars.
+During a collision, the BH can accrete some portion of
+the star’s mass. Over many collisions, it can grow appreciably
+ in size. We demonstrate that this channel can create IMBHs with masses as large as 104
+ M
+, an upper
+limit that depends on the density profile of the surrounding
+ stars and the efficiency of the accretion.
+The paper is structured as follows: we describe relevant
+ physical processes and our approach in Section 2.
+In particular, we provide an overview of collisions in
+Section 2.2 and present our statistical approach in Section
+ 2.3. Section 2.4 discusses our treatment of the
+mass growth with each collision and presents analytic
+solutions to our equations in two different regimes, efficient
+ collisions and inefficient collisions We compare
+these solutions to our statistical results. Sections 2.6
+and 2.8 discuss implications for GW merger events between
+ IMBHs and the SMBH. We then incorporate relaxation
+ processes and discuss the subsequent results in
+Section 2.9. Finally, we discuss and summarize our findings
+ in Section 3.
+2. METHODOLOGY
+We consider a population of stellar mass BHs embedded
+ in a cluster of 1 M
+ stars. When stars and BHs
+collide, the BHs can accrete mass. The growth rate depends
+ on the physical processes outlined below. We use
+a statistical approach to estimate the stellar encounters
+and final IMBH masses.
+2.1. Physical Picture
+We consider a population of BHs within the inner few
+parsecs of the SMBH in a galactic nucleus (GN). We assume
+ that the BH mass distribution follows that of the
+stars from which they originate, a Kroupa initial mass
+function dN/dm ∝ m−2.35
+. While this choice represents
+a gross oversimplification, it has very little bearing on
+our final results. Future work may address the particulars
+ of the BH mass distribution, but we do not expect
+that it will significantly alter the outcome. The upper
+and lower limits of the BH mass distribution are 5 and
+50 M
+, respectively. We select the upper limit to encompass
+ the range of upper bounds predicted by stellar
+evolution models, which vary between 40 and 125 M
+
+depending on the metallicity (Heger et al. 2003; Woosley
+2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b;
+Belczynski et al. 2020b; Renzo et al. 2020). We assume
+that the orbits of the BHs follow a thermal eccentricity
+distribution. We draw their semima jor axes, a
+•, from a
+uniform distribution in log distance, dN/d(log r) being
+constant. While this distribution is not necessarily representative
+ of actual conditions in the GN, we use it to
+build a comprehensive physical picture of BH growth at
+all distances from the SMBH, including within 0.01 pc.
+Otherwise, the innermost region of the GN would be
+poorly represented in our sample. We consider other
+IMBH Formation in Galactic Nuclei 3
+Figure 1. We plot the relevant timescales, including collision
+ (green), relaxation (gold), and BH-BH GW capture
+(purple), for a single BH in the GN as a function of distance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit. The timescales depend on the
+density, so we adopt a range of density profiles, bounded by
+α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
+blue line represents the time for a 105
+ M
+ BH to merge with
+the SMBH through GW emission.
+observationally motivated distributions in Section 2.9,
+but reserve a more detailed examination of the distribution’s
+ impact for future work.
+2.2. Direct Col lisions
+BHs in the GN can undergo direct collisions with other
+ob jects. The timescale for this process, t
+coll, can be estimated
+ using a simple rate calculation: t−1
+coll = nσA,
+where n is the number density of ob jects, σ is the velocity
+ dispersion, and A is the cross-section. We use the
+collision timescale from Rose et al. (2020):
+t−1
+coll = πn(a
+•)σ(a
+•)
+×
+f
+1(e
+•)r2
+c + f
+2(e
+•)r
+c 2G(m
+BH + m
+)
+σ(a
+•)2
+ . (1)
+where G is the gravitational constant and r
+c is the sum
+of the radii of the interacting ob jects, a black hole with
+mass m
+BH and a star with mass m
+. Detailed in Rose
+et al. (2020), f
+1(e
+•) and f
+2(e
+•) account for the effect of
+the eccentricity of the BH’s orbit about the SMBH on
+the collision rate, while n and σ are simply evaluated
+at the semima jor axis of the orbit (see below). Note
+that this timescale equation includes the effects of gravitational
+ focusing, which enhances the cross-section of
+interaction.
+Assuming a circular orbit for simplicity, we plot the
+timescale for a BH orbiting in the GN to collide with
+a 1 M
+ star as a function of distance from the SMBH in Figure 1.2
+ As this timescale depends on the density
+of surrounding stars, we adopt a density profile of the
+form:
+ ρ(r
+•) = ρ
+0
+ r
+•
+r
+0
+−α
+ , (2)
+where r
+• denotes the distance from the SMBH. We adopt
+a SMBH mass of 4 × 106
+ M
+ such that our fiducial GN
+matches our own galactic center (e.g., Ghez et al. 2005;
+Genzel et al. 2003). In this case, the normalization in
+Eq. (2) is ρ
+0 = 1.35 × 106
+ M
+/pc3
+ at r
+0 = 0.25 pc (Genzel
+ et al. 2010). Additionally, in Eq. (2), α gives the
+slope of the power law. We assume that a uniform population
+ of solar mass stars account for most of the mass
+in the GN, making the stellar number density:
+n(r
+•) = ρ(r
+•)
+1 M
+ . (3)
+The collision timescale also depends on the velocity dispersion,
+ which we express as:
+σ(r
+•) =
+ GM
+•
+r
+•(1 + α) , (4)
+where α is the slope of the density profile and M
+• denotes
+ the mass of the SMBH (Alexander 1999; Alexander
+ & Pfuhl 2014). As mentioned above, Eq. (1) depends
+on the sum of the radii of the colliding ob jects, r
+c. We
+take r
+c = 1 R
+ because these interactions involve a BH
+and a star, and the former has a much smaller physical
+ cross-section. For example, the Schwarzschild radius
+of a 10 M
+ BH is only 30 km, or 4.31 × 10−5
+ R
+. For
+this reason, direct collisions between compact ob jects
+are very rare and not included in our model.
+We note that direct collisions between BHs, via GW
+emission, were shown to be efficient in nuclear star clusters
+ without SMBHs (e.g., Portegies Zwart & McMillan
+ 2000; O’Leary et al. 2006; Rodriguez et al. 2016).
+However, in the GN, star-BH collisions are much more
+frequent than direct BH-BH collisions. As depicted in
+Figure 1, the star-BH collision timescale for a range
+of density profiles is many orders of magnitude shorter
+than the BH-BH GW collision timescale (for the relevant
+ equations, see O’Leary et al. 2009; Gond´an et al.
+2018, for example). Thus, we expect that star-BH collisions
+ will be the main driver of IMBH growth in the
+GN.
+2
+ We note that the eccentricity has a very minor effect on the
+collision timescale (Rose et al. 2020).
+  Rose et al.
+2.3. Statistical Approach to Col lisions
+We simulate the mass growth of a population of BHs
+with initial conditions detailed in Section 2.1. Over an
+increment ∆t of 106
+ yr, we calculate the probability of
+a collision occurring, given by ∆t/t
+coll. This choice of
+∆t is motivated by our galactic center’s star formation
+timescale (e.g., Lu et al. 2009), allowing for regular replenishment
+ of the stellar population in the GN. We have
+checked that the results are not sensitive to this choice
+of ∆t, omitted here to avoid clutter. We draw a number
+between 0 and 1 using a random number generator. If
+that number is less than or equal to the probability, we
+increase the BH’s mass by ∆m, the mass that the BH is
+expected to accrete in a single collision (see Section 2.4
+for details). We recalculate the collision timescale using
+the updated BH mass and repeat this process until the
+time elapsed equals the simulation time of 10 Gyr3
+.
+2.4. Mass Growth
+When a BH collides with a star, it may accrete material
+ and grow in mass. The details of the accretion
+depend on the relative velocity between the BH and
+star. For simplicity, this calculation assumes that the
+two ob jects experience a head on collision, with the BH
+passing through the star’s center. We begin by considering
+ the escape velocity from the BH at the star’s
+outermost point, its surface, which corresponds to the
+maximum impact parameter 1 R
+. Qualitatively, one
+might expect that the BH could capture the entire star
+(i.e., ∆m ∼ 1 M
+) if the relative velocity is smaller than
+the escape velocity from the BH at this point. However,
+in the vicinity of the SMBH, the dispersion velocity of
+the stars may be much larger than the escape velocity
+from the BH at the star’s surface. In this case, the BH
+captures a “tunnel” of material through the star. This
+tunnel has radius equal to the Bondi radius and length
+approximately 1 R
+. For the purposes of this study, we
+assume that the BH accretes all of the material that
+it captures. The details of the accretion are uncertain,
+however, and it may be much less efficient than our results
+ imply. We discuss accretion in Section 2.5.
+To estimate ∆m, we begin with the Bondi-Hoyle accretion
+ rate, ˙m, given by:
+˙m = 4πG2
+m2
+BHρ
+star
+(c2
+s + σ2
+)3/2 , (5)
+3
+ Closer to the SMBH, ∆t may exceed the collision timescale by
+a factor of a few for steep density profiles. We include a safeguard
+ in our code which takes the ratio t
+coll/∆t and rounds it
+to the nearest integer. We take this integer to be the number of
+collisions and increase the BH mass accordingly.  Figure 2. We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10 M
+ for all of the BHs. Assuming
+the density profile of stars has α = 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+where c
+s is the speed of sound in the star and ρ
+star is its
+density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
+et al. 1985; Edgar 2004, see latter for a review). We
+approximate the density as 1 M
+/(4πR3
+/3) and take
+the conservative value of c
+s = 500 km s−1
+, which is
+consistent with the sound speed inside a 1 M
+ star
+(Christensen-Dalsgaard et al. 1996) and allows us to set
+a lower limit on ∆m. To find ∆m, at each collision, we
+have:
+ ∆m = min( ˙m × t
+,cross, 1 M
+) , (6)
+where t
+,cross ∼ R
+/σ is the crossing time of the BH in
+the star. We take the minimum between ˙m × t
+,cross and
+1 M
+ because the BH cannot accrete more mass than
+one star at each collision.
+Figure 2 juxtaposes the expected growth using BondiHoyle-Lyttleton
+ accretion (blue small points) with a
+much simpler model in which the BH accretes the star’s
+entire mass, 1 M
+ (red large points). Both examples
+start with identical populations of 10 M
+ BHs (grey)
+and simulate growth through collisions using a statistical
+ approach. As the BHs grow, the collision timescale,
+which depends on m
+BH , decreases. Simultaneously,
+∆m, which also depends on m
+BH , increases. The result
+ is exponential growth (see discussion and details
+surrounding Eq. (8)). In Figure 2, however, the simulations
+ assume α = 1 for the stellar density profile, ensuring
+ the collision timescale is long compared to the sim-
+IMBH Formation in Galactic Nuclei 5
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
+and their final masses can be approximated using the
+following equation:
+m
+final(t
+coll → const.) = m
+initial + ∆m T
+t
+coll , (7)
+in which T represents the simulation time and ∆m and
+t
+coll remain constant, approximated as their initial values.
+
+This equation is plotted in Figure 2 for both cases,
+∆m = 1 M
+ (red) and ∆m from Bondi-Hoyle-Lyttleton
+accretion (blue), and the curves coincide with the corresponding
+ simulated results. The shaded regions represent
+ one standard deviation from Eq. (7), calculated
+using the square root of the number of collisions, T /t
+coll.
+As indicated by the results in red, in the absence of
+Bondi-Hoyle-Lyttleton accretion, the BHs closest to the
+SMBH experience the most growth because they have
+shorter collision timescales. However, Bondi-HoyleLyttleton
+ accretion becomes important closer to the
+SMBH, where the velocity dispersion is large compared
+with the stars’ escape velocity, and curtails the mass
+growth for BHs in this region. Outside of 10−2
+ pc, a BH
+consumes the star’s entire mass: the accretion-limited
+∆m governed by Eq. (7) is greater than or equal to the
+star’s mass.
+Eq. 7 does not apply for other values of α. When the
+collision timescale is shorter, corresponding to a larger
+index α in the density profile (see Figure 1), the growth
+is very efficient and ∆m quickly approaches 1 M
+. Consequently,
+ while we can now assume ∆m = 1 M
+, we
+can no longer assume the collision timescale is constant.
+The final mass grows exponentially as a result. For
+∆m = 1M
+, the general solution is reached by solving
+the differential equation dm/dt = 1 M
+/t
+coll(m), which
+gives:
+m
+final(∆m → 1 M
+) = −A + (m
+initial + A) eCT
+ (8)
+where A = σ2
+R
+star/G and C = 2πGn
+starR
+star/σ. As an
+example, we plot this curve in purple for the α = 2 case,
+in Figure 3, which agrees with the simulated masses.
+2.5. Uncertainties in Accretion
+We note that the ∆M calculated in this proof-ofconcept
+ study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challenging;
+ this complex problem requires numerically solving
+the generalized GR fluid equations with cooling, heating,
+ and radiative transfer, etc. and remains an active
+field of research (e.g., Blandford & Begelman 1999; Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star results
+ in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity remain
+ uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accumulate
+ significant amounts of mass over many collisions
+even if it accretes very little in a single one. We explore
+ the viability of our channel using a physically motivated
+ inefficient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately v
+esc/(cη), where v
+esc is
+the escape velocity from the BH at 1 R
+ and η is the
+accretion efficiency at the ISCO. We take η to be 0.1
+(e.g., Yu & Tremaine 2002). This expression for the
+fraction accreted is consistent with Kremer et al. (2022)
+equation 19 for s = 0.5, which is a reasonable value for
+s, a free parameter between 0.2 and 0.8. We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more efficient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radiation
+ away from the captured material (e.g., Blandford
+& Zna jek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumenshchev
+ 2008; Begelman 2012a,b; McKinney et al. 2014).
+2.6. GW Inspiral
+When a BH is close to the SMBH, GW emission can
+circularize and shrink its orbit. We implement the effects
+ of GW emission on the BH’s semima jor axis and
+eccentricity following Peters & Mathews (1963a). The
+characteristic timescale to merge a BH with an SMBH
+is given by:
+t
+GW ≈ 2.9 × 1012
+ yr
+ M
+•
+106
+ M
+
+−1
+ m
+BH
+106
+ M
+
+−1
+×
+ M
+• + m
+BH
+2 × 106
+ M
+
+−1
+ a
+•
+10−2
+ pc
+4
+× f (e
+•)(1 − e2
+•)7/2
+ , (9)
+where f (e
+•) is a function of e
+•. For all values of e
+•,
+f (e
+•) is between 0.979 and 1.81 (Blaes et al. 2002). We
+plot this timescale for a 1 × 105
+ M
+ BH in Figure 1 in
+blue.
+  Rose et al.
+Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
+cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking m
+initial to be the average mass
+of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
+merger times of these BHs.
+In our simulations, we assume a BH has merged with
+the SMBH when the condition t
+GW < t
+elapsed is met.
+When this condition is satisfied, we terminate mass
+growth through collisions for that BH.4
+2.7. IMBH growth
+As detailed above, BH-stellar collisions can increase
+the BH masses as a function of time. Here, we examine
+the sensitivity of the BH growth to the density power
+law. From Eq. (1), it is clear that the growth rate depends
+ on the stellar density profile, governed by the index
+ α. We expect that higher values of α, or steeper
+profiles, will result in more efficient mass growth. In
+Figure 1, larger values of α lead to collision timescales
+in the GN’s inner region, inwards of 0.25 pc, that are
+much smaller that the 10 Gyr simulation time. Figure 3
+confirms this expectation. It depicts the mass growth of
+a uniform distribution of BHs with initial conditions detailed
+ in Section 2.1 for five α values, spanning 1 (green)
+to 2 (purple). The most massive IMBHs form inwards
+of 0.25 pc for the α = 2 case.
+2.8. Gravitational Wave Mergers and Intermediate
+and Extreme Mass Ratio Inspiral Candidates
+Towards the SMBH, efficient collisions can create BHs
+massive enough to merge with the SMBH through GWs.
+Following the method detailed in Section 2.6, when a
+given BH meets the criterion t
+GW < t
+elapsed, we mark
+4
+ For comparison, we also incrementally changed the semimajor
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b). This method leads to a slight
+increase in the final IMBH masses because it accounts for the
+collisions that take place while the orbit is gradually shrinking. it as merged with the SMBH. We assume that at this
+point the dynamics of the BH will be determined by GW
+emission, shrinking and circularizing the BHs orbit until
+ it undergoes an extreme or intermediate mass ratio
+inspiral (EMRI and IMRI, respectively). The righthand
+plot in Figure 3 shows the BH masses versus time of
+merger. It is interesting to note that even in the absence
+ of relaxation processes, which are often invoked
+to explain the formation of EMRIs, EMRIs and notably
+IMRIs can form in this region.
+2.9. Two Body Relaxation Processes
+A BH orbiting the SMBH experiences weak gravitational
+ interactions with other ob jects in the GN. Over a
+relaxation time, these interactions alter its orbit about
+the SMBH. The two-body relaxation timescale for a
+single-mass system is:
+t
+relax = 0.34 σ3
+G2
+ρM
+∗ ln Λ
+rlx , (10)
+where ln Λ
+rlx is the Coulomb logarithm and M
+∗ is the
+average mass of the surrounding ob jects, here assumed
+to be 1 M
+ (Spitzer 1987; Binney & Tremaine 2008,
+Eq. (7.106)). This equation represents the approximate
+timescale for a BH on a semi-circular orbit to change
+its orbital energy and angular momentum by order of
+themselves. The BH experiences diffusion in its angular
+momentum and energy as a function of time (depending
+on the eccentricity of the orbit, this process can be more
+efficient Fragione & Sari 2018; Sari & Fragione 2019).
+Relaxation can cause the orbit of an ob ject in a GN to
+reach high eccentricities. If the ob ject is a BH, it can
+spiral into the SMBH and form an EMRI, while a star
+IMBH Formation in Galactic Nuclei 7
+can be tidally disrupted by the SMBH (e.g. Magorrian
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
+Alexander 2005; Aharon & Perets 2016; Stone & Metzger
+ 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
+Naoz et al. 2022). The relaxation process is therefore
+crucial to our study. In Figure 1, we plot the relaxation
+timescale in gold for a range of α. We note that the Bahcall
+ & Wolf (1976) profile, α = 7/4, corresponds to zero
+net flux and therefore does not preferentially migrate
+ob jects inward.
+Additionally, because BHs are more massive on average
+ than the surrounding ob jects, they are expected
+to segregate inwards in the GN (e.g., Shapiro &
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
+They sink toward the SMBH on the mass segregation
+timescale, t
+seg ≈ M
+∗/m
+BH × t
+relax (e.g., Spitzer 1987;
+Fregeau et al. 2002; Merritt 2006), which is typically an
+order of magnitude smaller than the relaxation timescale
+plotted in Figure 1.
+We incorporate relaxation processes by introducing a
+small change in the BH’s energy and angular momentum
+ each time it orbits the SMBH. We apply a small
+instantaneous velocity kick to the BH, denoted as ∆v.
+We draw ∆v from a Guassian distribution with average
+of zero and a standard deviation of ∆v
+rlx/√
+3, where
+∆v
+rlx = v
+•
+P
+•/t
+rlx (see Bradnick et al. 2017, for an
+approach to changes in the angular momentum). The
+new orbital parameters can be calculated following Lu
+& Naoz (2019), and see Naoz et al. (2022) for the full
+set of equations.
+We account for the effects of relaxation processes,
+including mass-segregation, using a multi-faceted approach.
+ We begin by migrating each BH towards the
+center over its mass-segregation timescale, shifting it incrementally
+ inward such that its orbital energy changes
+by order of itself within the segregation timescale.
+As the BHs segregate down the potential well, their
+abundance with respect to stars increases, until at some
+turnover radius, BHs become the dominant source of
+scattering for both black holes and stars. Within this radius,
+ BH self-interaction dominates over two-body scatterings
+ with the now rarer main-sequence stars. The
+BHs will then settle onto a Bahcall-Wolf profile, while
+the stars may follow a shallower profile, with approximately
+ n
+ ∝ r−1.5
+, inwards of the transition radius
+(Linial & Sari in prep.).
+Therefore, after the initial mass segregation, we allow
+the BHs to begin diffusing over a relaxation timescale,
+their orbital parameters changing slowly through a random
+ process. In this random process, some of the BHs
+may migrate closer to the SMBH. We terminate mass growth when the BH enters the inner 200 au of the GN,
+within which the density of stars is uncertain. This cutoff
+ is based on the 120 au pericenter of S0-2, the closest
+known star to the SMBH (e.g., Ghez et al. 2005).
+Another physical process that causes inward migration
+ is dynamical friction. A cursory derivation based
+on the dynamical friction equations described in Binney
+& Tremaine (2008) reveals the process to have a similar
+ timescale to mass segregation. If a BH diffuses to
+a distance greater than 2 pc from the SMBH, exiting
+the sphere of influence, we have it sink inwards, back
+towards the center, over a dynamical friction timescale.
+After one dynamical friction timescale has passed, we
+restart diffusion.
+We note that our prescription ignores self-interactions
+between the BHs. As mentioned above, as the BHs sink
+towards the SMBH, their concentration in the inner region
+ of the GN increases, allowing them to dominate the
+scattering. We reserve the inclusion of these interactions
+for future study.
+2.10. Effect of Relaxation Processes
+As depicted in Figure 4, two-body relaxation processes
+result in more EMRIs and IMRIs events. These processes
+ allow BHs that begin further from the SMBH
+to migrate inwards and grow more efficiently in mass.
+However, it also impedes the growth of BHs that are
+initially closer to the SMBH by allowing them to diffuse
+ out of the inner region where collisions are efficient.
+As can be seen in Figure 4, the net result is that more
+BHs grow, but the maximum mass is lower compared
+to the scenario that ignores two-body relaxation. The
+histogram in Figure 4 presents the final BH mass distributions
+ for different power law indices α. As expected,
+the two-body relaxation suppresses the α dependence
+highlighted in Figure 3. In fact, using a KS test, we
+find that we cannot reject the hypothesis that the two
+distributions were drawn from the same sample for the
+α = 1.75 and α = 2 results. Interestingly, a BH mass
+IMF with an average of 10 M
+ leads to a final distribution
+ with an average of ∼ 200 M
+ and a median of
+∼ 45 M
+, which lies within the mass gap.
+3. DISCUSSION AND PREDICTIONS
+We explore the feasibility of forming IMBHs in a
+GN through successive collisions between a stellar-mass
+BH and main-sequence stars. Taking both a statistical
+ and analytic approach, we show that this channel
+can produce IMBHs efficiently with masses as high as
+103−4
+ M
+ and may result in many IMBH-SMBH mergers
+ (intermediate-mass ratio inspirals, or IMRIs) and
+EMRIs.
+  Rose et al.
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction.
+We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
+As the stellar mass BH collides with a star, the BH
+will grow in mass. The increase may equal star’s entire
+ mass if the relative velocity is smaller than the escape
+ velocity from the BH at 1 R
+. However, near the
+SMBH, the velocity dispersion may be larger than the
+escape velocity from the BH at the star’s radius. In this
+limit, the BH captures a “tunnel” of material through
+the star, estimated using Bondi-Hoyle-Lyttleton accretion.
+ In our statistical analysis, we account for BondiHoyle-Lyttleton
+ accretion and find that BHs outside of
+10−2
+ pc from the SMBH can capture the entire star (see
+Figure 2).
+The efficiency of collisions, and therefore IMBH,
+EMRI, and IMRI formation as well, are sensitive to
+the underlying stellar density. As shown in Figure 3, a
+steeper density profile results in larger IMBHs. This behavior
+ can be understood from the collision timescale’s
+dependence on the stellar density profile. A steeper profile
+ yields shorter collision timescales near the SMBH.
+However, the inclusion of relaxation processes in the
+simulations dampens the influence of the stellar density
+profile by allowing BHs to diffuse into regions of more
+or less efficient growth. As a result, more BHs grow in
+mass, but their maximum mass is smaller (∼ 104
+ M
+).
+Additionally, the final masses have no apparent dependence
+ on distance from the SMBH (see Figure 4).
+Most simulations in our study assume that the BHs
+accrete all of the mass that they capture. The final BH
+masses can be taken as an upper limit. We note that
+the accretion is a highly uncertain process and represents
+ an active field of study (e.g., Blandford & Begelman
+ 1999; Park & Ostriker 2001; Narayan et al. 2003;
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+Narayan et al. 2022). To assess the limits of our model, we also consider a physically motivated accretion model,
+momentum-driven winds (Section 2.5). We present the
+final mass distribution for momentum-driven winds in
+Figure 4. Importantly, we find that BHs within the
+mass gap still form naturally despite the substantially
+reduced accretion. About 5% of the BHs grow by 10
+to 100 M
+. Furthermore, if we increase this ∆M estimate
+ by a factor of 2 (i.e., use η = 0.05), the simulation
+ produces a 3.5 × 103
+ M
+ IMBH for the same initial
+conditions. Our proof-of-concept demonstrates that collisions
+ between BH and stars are an important process
+that should be taken into account in dense places such
+as a GN.
+Mass growth through BH-main-sequence star collisions
+ may act in concert with other IMBH formation
+channels, such as compact ob ject binary mergers (e.g.,
+Hoang et al. 2018; Stephan et al. 2019; Fragione et al.
+2021; Wang et al. 2021). While in some cases collisions
+ can unbind a binary (e.g., Sigurdsson & Phinney
+1993; Fregeau et al. 2004), BH binaries can be tightly
+bound enough to withstand the collisions. Wide binaries
+ may also become unbound due to interactions with
+the neighboring stars and compact ob jects (e.g., Binney
+& Tremaine 1987; Rose et al. 2020, see latter study for
+the timescale for an arbitrary eccentricity). However,
+as highlighted in previous studies, a substantial fraction
+ of these binaries may merge due to the Eccentric
+Kozai Lidov mechanism, leaving behind a single star or
+a single compact ob ject (e.g., Stephan et al. 2016, 2019;
+Hoang et al. 2018). Additionally, to be susceptible to
+evaporation, BH binaries must have a wider configuration.
+ Otherwise, they will be more tightly bound than
+the average kinetic energy of the surrounding ob jects
+and will only harden through weak gravitational inter-
+IMBH Formation in Galactic Nuclei 9
+actions with neighboring stars (see for example Figure
+6 in Rose et al. 2020).
+We note that we assume a steady-state and treat the
+stars as a reservoir in this model. Future work will take a
+more nuanced approach to the background stars, whose
+density as a function of time can be influenced by several
+factors. Firstly, the relaxation of the stellar population
+occurs on Gyr timescales. Some studies have suggested
+that in situ star formation can occur in the Galactic
+Center as close as 0.04 pc from the SMBH (e.g., Levin
+& Beloborodov 2003; Paumard et al. 2006), and star
+formation episodes can occur as often as every ∼ 5 Myr
+(e.g. Lu et al. 2009). Therefore, we expect that after
+the first Gyr, stars within  0.01 pc will be replenished
+at intervals consistent with the star formation episodes;
+the infalling populations of stars are separated by ∼
+5 − 10 Myr, which is shorter than the collision timescale.
+However, star-star collisions may complicate this picture
+ within ∼ 0.01 pc. As discussed above, regular star
+formation ensures the BHs always have a stellar population
+ to interact with outside of ∼ 0.01 pc.5
+ At 0.01 pc,
+however, the kinetic energy during a collision between
+two 1 M
+ stars is larger than their binding energies.
+Collisions can therefore thin out the stellar populations
+during the time it takes them to diffuse to these small
+radii,  0.01 pc, and may reduce the BH growth in the
+innermost region. We reserve the inclusion of star-star
+collisions for future work. We also note that the disruption
+ of binary stars by the SMBH may help replenish
+the stellar population even as collisions work to deplete
+it (e.g., Balberg et al. 2013); when a binary is disrupted,
+one of the stars is captured on a tightly bound orbit
+about the SMBH.
+An IMBH may also affect the stellar density profile.
+As it spirals into the SMBH, it can perturb stellar orbits,
+and these interactions can lead to hypervelocity stars
+(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt
+2008). L¨ockmann & Baumgardt (2008) show that an
+IMBH can modify an initially steep stellar density profile
+ to become consistent with the flatter cusp observed
+in the Galactic Center. The stars may then be replenished
+ on 100 Myr timescales (Baumgardt et al. 2006a).
+Therefore, after the formation of the first few IMBHs,
+subsequent BH growth may occur in bursts, coinciding
+with replenishment of the stars.
+While there are many competing dynamical processes
+that shape the stellar density profile, we stress that α
+5
+ In fact, the star-star collision timescale is greater than 10 Myr
+for the entire parameter space, save at 0.001 pc for larger values
+of α; the BH-star collision timescale plotted in Fig. 1 is the same
+order of magnitude as the star-star collision timescale. can simply be chosen to encapsulate all of the relevant
+physics. A value for α that is constrained by observations
+ must already reflect ongoing processes like starstar
+ collisions and replenishment. Sch¨odel et al. (2018)
+find the observed stellar mass enclosed within 0.01 pc of
+the Milky Way’s Galactic Center to be approximately
+180 M
+. This estimate is consistent to order of magnitude
+ with our α = 1.25 case. In a simulation like those
+depicted in Figure 4, which include relaxation, α = 1.25
+leads to a maximum IMBH mass of 140 M
+. Furthermore,
+ while the stellar mass within 0.01 pc may be a
+few hundred M
+, Do et al. (2019) and GRAVITY Collaboration
+ et al. (2020) set an upper limit on the mass
+enclosed within the orbit of S0-2 to be about a few thousand
+ M
+, or 0.1% of the central mass. This upper limit
+can include mass that was previously in stars but is now
+in BHs. In that case, the 180 M
+ is what remains of the
+stars, while BHs and IMBHs make up the ∼ 1000 M
+
+in the innermost region.
+Also not included in this study, collisions between the
+BH and other compact ob jects will increase the BH
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+Fragione et al. 2021) and even neutron star BH mergers
+(e.g., Hoang et al. 2020) become more likely as the BHs
+increase in mass through stellar collisions. As a result,
+the BH-BH collision timescale, discussed in Section 2.2,
+will become relevant to our simulations, allowing the
+BHs to grow through this channel in addition to stellar
+ collisions. Additionally, this compact ob ject mergers
+result in GW recoil, which may have a large impact on
+the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
+2021).
+The BH’s mass growth increases GW emission, which
+dissipates energy from the orbit. Along with relaxation,
+GW emission causes BHs to sink towards the SMBH
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
+and IMRIs. The GW emission from EMRIs and IMRIs
+ is expected to be at mHz frequencies, making them
+promising candidates for LISA to observe. While the
+exact rate calculation is beyond the scope of this study,
+the mechanism outlined here seems very promising.
+Our results also suggest that BHs within the mass gap
+as well as IMBHs likely exist in many galactic nuclei, as
+well as within our own galactic center. This implication
+seems to be consistent with recent observational and
+theoretical studies (e.g., Hansen & Milosavljevi´c 2003;
+Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris
+& Merritt 2009; Chen & Liu 2013; Generozov & Madigan
+ 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz
+et al. 2020; GRAVITY Collaboration et al. 2020).
+  Rose et al.
+Lastly, the collisions between stellar mass BHs and
+stars may contribute to the x-ray emission from our
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer
+ et al. (2022) for a discussion of electromagnetic signatures
+ from BH-star collisions)6
+. These interactions,
+in particular grazing collisions, may also result in tidal
+disruption events (e.g., Baumgardt et al. 2006b; Perets
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer
+ et al. 2021). Thus, the process outlined here may
+produce electromagnetic signatures in addition to GW
+mergers.
+We thank the anonymous referee for useful comments.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful
+ discussion.
+SR thanks the Charles E. Young Fellowship, the Nina
+Byers Fellowship, and the Michael A. Jura Memorial
+Graduate Award for support. SR and SN acknowledge
+the partial support from NASA ATP 80NSSC20K0505.
+SN thanks Howard and Astrid Preston for their generous
+ support. IL thanks support from the Adams Fellowship.
+ SN and RS thank the Bhaumik Institute visitor
+program. This work was performed in part at the Aspen
+ Center for Physics, which is supported by National
+Science Foundation grant PHY-1607611.
+ REFERENCES
+Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
+PhRvL, 116, 241102,
+doi: 10.1103/PhysRevLett.116.241102
+—. 2017a, PhRvL, 118, 221101,
+doi: 10.1103/PhysRevLett.118.221101
+—. 2017b, PhRvL, 119, 141101,
+doi: 10.1103/PhysRevLett.119.141101
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
+doi: 10.3847/2041- 8205/830/1/L1
+Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
+Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
+doi: 10.1088/0004- 637X/780/2/148
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
+doi: 10.1007/s41114- 018-0013- 8
+6
+ The connection between the observed X-ray sources at the Galactic
+ Center and tidal capture has been suggested by Generozov
+et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
+alternative channels.  Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
+2021, arXiv e-prints, arXiv:2109.12119.
+https://arxiv.org/abs/2109.12119
+Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
+doi: 10.1086/154711
+Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
+043002, doi: 10.1103/PhysRevD.102.043002
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+doi: 10.1093/mnrasl/slt071
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+2006a, MNRAS, 372, 174,
+doi: 10.1111/j.1365- 2966.2006.10818.x
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+Makino, J. 2006b, MNRAS, 372, 467,
+doi: 10.1111/j.1365- 2966.2006.10885.x
+Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
+613, 1143, doi: 10.1086/423299
+Begelman, M. C. 1979, MNRAS, 187, 237,
+doi: 10.1093/mnras/187.2.237
+—. 2012a, ApJL, 749, L3, doi: 10.1088/2041- 8205/749/1/L3
+IMBH Formation in Galactic Nuclei 11
+—. 2012b, MNRAS, 420, 2912,
+doi: 10.1111/j.1365- 2966.2011.20071.x
+Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
+MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
+Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
+890, 113, doi: 10.3847/1538- 4357/ab6d77
+—. 2020b, ApJ, 890, 113, doi: 10.3847/1538- 4357/ab6d77
+Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R.
+2009, New Journal of Physics, 11, 105016,
+doi: 10.1088/1367- 2630/11/10/105016
+Binney, J., & Tremaine, S. 1987, Galactic dynamics
+—. 2008, Galactic Dynamics: Second Edition
+Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
+doi: 10.1086/342655
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+Blandford, R. D., & Zna jek, R. L. 1977, MNRAS, 179, 433,
+doi: 10.1093/mnras/179.3.433
+Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
+427, doi: 10.1086/500727
+Bondi, H. 1952, MNRAS, 112, 195,
+doi: 10.1093/mnras/112.2.195
+Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
+doi: 10.1093/mnras/104.5.273
+Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
+2042, doi: 10.1093/mnras/stx1007
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+860, 14, doi: 10.3847/1538- 4357/aac2c4
+Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
+C. 2012, JCAP, 2012, 054,
+doi: 10.1088/1475- 7516/2012/07/054
+Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R.
+2010, Reviews of Modern Physics, 82, 3069,
+doi: 10.1103/RevModPhys.82.3069
+Chen, X., & Liu, F. K. 2013, ApJ, 762, 95,
+doi: 10.1088/0004- 637X/762/2/95
+Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33,
+doi: 10.3847/1538- 4357/aaba16
+Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015,
+MNRAS, 450, 4411, doi: 10.1093/mnras/stv694
+Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V.,
+et al. 1996, Science, 272, 1286,
+doi: 10.1126/science.272.5266.1286
+Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
+doi: 10.1086/156685
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
+doi: 10.1111/j.1365- 2966.2005.09937.x
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
+2009, MNRAS, 393, 1016,
+doi: 10.1111/j.1365- 2966.2008.14254.x  Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
+MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+C. N. 2021a, MNRAS, 505, 2186,
+doi: 10.1093/mnras/stab1428
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+T. C. N. 2021b, MNRAS, 503, 1051,
+doi: 10.1093/mnras/stab402
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+2005, ApJ, 620, 878, doi: 10.1086/427142
+Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
+MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
+Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
+MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+doi: 10.1126/science.aav8137
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+562, L19, doi: 10.1086/338118
+Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
+110, 221101, doi: 10.1103/PhysRevLett.110.221101
+Edgar, R. 2004, NewAR, 48, 843,
+doi: 10.1016/j.newar.2004.06.001
+Escala, A. 2021, ApJ, 908, 57,
+doi: 10.3847/1538- 4357/abd93c
+Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
+Monthly Notices of the Royal Astronomical Society, 443,
+2410, doi: 10.1093/mnras/stu1280
+Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891,
+L31, doi: 10.3847/2041-8213/ab77c9
+Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021,
+arXiv e-prints, arXiv:2107.04639.
+https://arxiv.org/abs/2107.04639
+Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a,
+ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2
+Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902,
+L26, doi: 10.3847/2041-8213/abbc0a
+Fragione, G., & Sari, R. 2018, ApJ, 852, 51,
+doi: 10.3847/1538- 4357/aaa0d7
+Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., &
+Rasio, F. A. 2004, MNRAS, 352, 1,
+doi: 10.1111/j.1365- 2966.2004.07914.x
+Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
+Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
+Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
+649, 91, doi: 10.1086/506193
+Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
+doi: 10.3847/1538- 4357/ab94bc
+Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
+J. P. 2018, MNRAS, 478, 4030,
+doi: 10.1093/mnras/sty1262
+  Rose et al.
+Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
+Modern Physics, 82, 3121,
+doi: 10.1103/RevModPhys.82.3121
+Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812,
+doi: 10.1086/377127
+Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ,
+620, 744, doi: 10.1086/427175
+Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ,
+860, 5, doi: 10.3847/1538- 4357/aabfee
+Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL,
+908, L29, doi: 10.3847/2041- 8213/abdf5b
+GRAVITY Collaboration, Abuter, R., Amorim, A., et al.
+2020, A&A, 636, L5, doi: 10.1051/0004- 6361/202037813
+Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361,
+doi: 10.1088/0004- 637X/705/1/361
+G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL,
+640, L39, doi: 10.1086/503295
+G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236,
+doi: 10.1086/430694
+Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature,
+556, 70, doi: 10.1038/nature25029
+Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593,
+L77, doi: 10.1086/378182
+Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., &
+Hartmann, D. H. 2003, ApJ, 591, 288,
+doi: 10.1086/375341
+Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., &
+Dosopoulou, F. 2018, ApJ, 856, 140,
+doi: 10.3847/1538- 4357/aaafce
+Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
+doi: 10.3847/1538- 4357/abb66a
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
+doi: 10.1086/431475
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
+doi: 10.1086/529025
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
+2003, ApJ, 592, 1042, doi: 10.1086/375769
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
+106, doi: 10.1088/0004-637X/796/2/106
+Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
+Royal Astronomical Society, 374, 1557,
+doi: 10.1111/j.1365- 2966.2006.11275.x
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+https://arxiv.org/abs/2201.12368
+Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
+doi: 10.3847/1538- 4357/abeb14
+Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
+45, doi: 10.3847/1538-4357/abb945  Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
+doi: 10.1086/376675
+Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
+doi: 10.3847/1538- 4365/aacb24
+—. 2018b, ApJS, 237, 13, doi: 10.3847/1538- 4365/aacb24
+L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
+doi: 10.1111/j.1365- 2966.2007.12699.x
+Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
+doi: 10.1093/mnras/stz036
+Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
+690, 1463, doi: 10.1088/0004- 637X/690/2/1463
+Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
+doi: 10.1086/319848
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+doi: 10.1046/j.1365- 8711.1999.02853.x
+Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
+2004, A&A, 423, 155, doi: 10.1051/0004- 6361:20034147
+Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda,
+M., & Artale, M. C. 2021a, arXiv e-prints,
+arXiv:2109.06222. https://arxiv.org/abs/2109.06222
+Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b,
+MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+doi: 10.1111/j.1365- 2966.2006.10256.x
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+doi: 10.1086/422244
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+Narayan, R. 2014, MNRAS, 441, 3177,
+doi: 10.1093/mnras/stu762
+Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
+doi: 10.1088/0034- 4885/69/9/R01
+Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847,
+doi: 10.1086/317837
+Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607
+Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL,
+622, L113, doi: 10.1086/429721
+Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009,
+ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+618, 569, doi: 10.1086/426067
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+L18, doi: 10.3847/2041-8213/ac574b
+Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
+doi: 10.1088/0004- 637X/795/2/102
+Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
+L35, doi: 10.3847/2041-8213/ab4fed
+IMBH Formation in Galactic Nuclei 13
+Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
+888, L8, doi: 10.3847/2041- 8213/ab5e3b
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+Curd, B. 2022, MNRAS, 511, 3795,
+doi: 10.1093/mnras/stac285
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005,
+ApJ, 628, 368, doi: 10.1086/430728
+O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
+2127, doi: 10.1111/j.1365-2966.2009.14653.x
+O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
+& O’Shaughnessy, R. 2006, ApJ, 637, 937,
+doi: 10.1086/498446
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+D. 2010, ApJ, 722, 642,
+doi: 10.1088/0004- 637X/722/1/642
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+doi: 10.1086/319042
+Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643,
+1011, doi: 10.1086/503273
+Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
+Stephen R., J. 2016, ApJ, 823, 113,
+doi: 10.3847/0004- 637X/823/2/113
+Peters, P. C., & Mathews, J. 1963a, Physical Review, 131,
+435, doi: 10.1103/PhysRev.131.435
+—. 1963b, Physical Review, 131, 435,
+doi: 10.1103/PhysRev.131.435
+Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J.,
+& McMillan, S. L. W. 2004, Nature, 428, 724,
+doi: 10.1038/nature02448
+Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL,
+528, L17, doi: 10.1086/312422
+—. 2002, ApJ, 576, 899, doi: 10.1086/341798
+Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
+doi: 10.1088/0004- 637X/780/2/187
+Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
+A56, doi: 10.1051/0004-6361/202037710
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+MNRAS, doi: 10.1093/mnras/stac231
+Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
+Rasio, F. A. 2018, PhRvL, 120, 151101,
+doi: 10.1103/PhysRevLett.120.151101
+Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
+PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029
+Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019,
+Phys. Rev. D, 100, 043027,
+doi: 10.1103/PhysRevD.100.043027
+Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
+113, doi: 10.3847/1538-4357/abc557  Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
+& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
+https://arxiv.org/abs/2009.01213
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
+Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
+100, 043009, doi: 10.1103/PhysRevD.100.043009
+Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
+doi: 10.3847/1538- 4357/ab43df
+Schneider, R., Ferrara, A., Natara jan, P., & Omukai, K.
+2002, The Astrophysical Journal, 571, 30,
+doi: 10.1086/339917
+Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
+doi: 10.1086/519309
+Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
+609, A27, doi: 10.1051/0004- 6361/201730452
+Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
+doi: 10.1086/156521
+Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
+MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367
+Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine,
+K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700
+Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631,
+doi: 10.1086/173190
+Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739,
+doi: 10.1093/mnras/stx1576
+—. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576
+Spitzer, L. 1987, Dynamical evolution of globular clusters
+Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
+e-prints. https://arxiv.org/abs/1603.02709
+—. 2019, ApJ, 878, 58, doi: 10.3847/1538- 4357/ab1e4d
+Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017,
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+doi: 10.1093/mnras/stv2281
+The LIGO Scientific Collaboration, the Virgo
+Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
+arXiv:2009.01075. https://arxiv.org/abs/2009.01075
+—. 2020b, arXiv e-prints, arXiv:2009.01190.
+https://arxiv.org/abs/2009.01190
+Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A.
+2012, ApJ, 750, 31, doi: 10.1088/0004- 637X/750/1/31
+Valiante, R., Schneider, R., Volonteri, M., & Omukai, K.
+2016, Monthly Notices of the Royal Astronomical
+Society, 457, 3356, doi: 10.1093/mnras/stw225
+Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
+G. N. 2021, MNRAS, 504, 146,
+doi: 10.1093/mnras/stab842
+  Rose et al.
+Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
+Breivik, K. 2021, ApJ, 917, 76,
+doi: 10.3847/1538- 4357/ac088d
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+doi: 10.1086/379767
+Woosley, S. E. 2017, ApJ, 836, 244,
+doi: 10.3847/1538- 4357/836/2/244
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+doi: 10.1046/j.1365- 8711.2002.05532.x
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+doi: 10.1088/0004- 637X/761/2/129  Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
+2014, Monthly Notices of the Royal Astronomical
+Society, 440, 1263, doi: 10.1093/mnras/stu351
+Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints,
+arXiv:2011.04653. https://arxiv.org/abs/2011.04653
+Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26,
+doi: 10.3847/1538- 4365/aab14f
\ No newline at end of file
diff --git a/read/results/playa/2201.00029.txt b/read/results/playa/2201.00029.txt
new file mode 100644
index 0000000..31a0f47
--- /dev/null
+++ b/read/results/playa/2201.00029.txt
@@ -0,0 +1,362 @@
+
+
+
+
+
+
+
+Exploring new techniques for analyzing variability in white dwarf KIC 8626021
+Thomas Huckans, Peter Stine
+Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd
+ St.,
+Bloomsburg, PA 17815
+
+
+Abstract
+
+ As  is  common  with  the  collection  of  astronomical  data,  signals  are  frequently  dominated
+by noise. However, when performing FTs of light curves, re-binning data can improve the signalto-noise
+ ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope,
+we sequentially re-binned data three times to investigate the SNR improvement of lower frequency
+(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8
+µHz  greatly  improved  through  this  process,  and  we  postulate  that  this  frequency  is  linked  to  the
+rotation of KIC 8626021.
+
+
+ Introduction
+
+First  detected  in  1862,  white  dwarfs  long  posed  a  mystery  for  early  observers. When  the
+companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and
+densities  baffled  astronomers.  Lacking  full  understanding  of  atomic  structures  and  the  energy
+states of electrons, these early researchers believed white dwarfs too dense to exist. However, new
+discoveries at the turn of the 20th
+ century explained the existence of these stars, and between the
+world wars white dwarfs were increasingly studied and modeled (Holberg, 2009).
+As  stars  age,  those  that  lack  the  mass  to  become  neutron  stars  and  black  holes  become
+white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008).  They are
+composed  of  a  core  of  carbon  and  oxygen  ions  that  slowly  cools  over  billions  of  years,  and  the
+light  emanating  from  these  stars  is  a  result  of  thermal  energy.  White  dwarf  stars  are  no  longer
+supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate
+state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two
+electrons  cannot  occupy  the  same  quantum  state,  Pauli  repulsion  keeps  white  dwarfs  from
+collapsing entirely.
+For many years, accurate detection of light variability in white dwarfs was difficult due to
+a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made
+capturing  the  light  of  distant  stars  much  more  efficient  and  effective  (Basri  et  al.,  2010).  Kepler
+was  initially  developed  with  the  intention  of  surveying  our  region  of  the  Milky  Way  galaxy  in
+order to find potentially habitable planets. The purpose of the mission was to identify key traits for
+such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits,
+and  the  characteristics  of  the  stars  being  orbited.  Over  the  lifespan  of  its  first  mission,  Kepler
+observed  approximately  1.5  x  105
+  stars  (Johnson,  2018),  affording  scientists  excellent
+opportunities  to  research  stellar  variability.  Due  to  the  loss  of  a  second  reaction  wheel  in  2013,
+NASA  developed  the  K2  mission,  a  way  to  prolong  Kepler’s  assistance  to  astronomy  and
+astrophysics.
+Utilizing  Kepler’s  ability  to  maintain  three-dimensional  control,  NASA  proceeded  to  use
+the telescope to collect photometry data of certain sections of our galaxy, although the number of
+targets  was  significantly  reduced.  In  addition,  the  K2  mission  was  designed  to  be  communityoriented,
+ with the scientific community having an influence on the fields observed and serving as
+the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was
+deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of
+white  dwarf  KIC  8626021  and  was  obtained  from  the  Kepler Asteroseismic  Science  Operations
+Center (KASOC).
+
+The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon
+previous studies, this research investigated novel techniques of analyzing variability in white
+dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on
+the star, allowing for the validation of results using our methods. KIC 8626021 has an effective
+temperature of 29,700 K, log g = 7.890, and mass of 0.56 M
+☉ (Córsico, 2020). Other research
+has found that this white dwarf is the DBV with the highest known temperature, and its helium
+layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too
+noisy to draw many conclusions, other FTs of short-cadence data have been performed to find
+variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with
+frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz
+(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777
+Herculis, although our research focuses on low frequencies using long-cadence data.
+
+
+
+ Methods
+
+All data were downloaded from the KASOC database, and the long-cadence (data
+sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were
+analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs
+were performed in Mathematica. The re-binning process consisted of summing adjacent light
+curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one
+hour, and then repeating this process on the data sample for a total of three times. In addition, a
+significant detection was defined as being 3𝝈 above the mean of the relative flux, and 0 on the
+graphs below represents this 3𝝈 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To
+find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify
+improvement in signal strength.
+
+
+ Results
+
+  Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13),
+with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs
+of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the
+same for Q13.
+  Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of
+KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and
+Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7
+and Q13.
+
+
+
+
+
+
+
+
+
+
+FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7
+lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012.
+Both graphs were constructed by plotting corrected flux magnitude (flux corrected for
+instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating
+between points. Q7 had forty-three interpolated points, and Q13 had sixty-six.
+
+
+
+
+
+
+
+
+
+
+
+
+
+FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of
+the  light  curve  data.  The  significant  frequencies  of  5.886  µHz  and  5.889  µHz  are  circled.  The
+disappearance  of  the  frequency  in  the  last  FT  is  most  likely  a  byproduct  of  the  method,  and  the
+spurious  frequency  of  5.464  µHz  in  the  last  FT  most  probably  represents  an  artifact  of  the  rebinning
+ process.
+
+
+
+
+
+
+
+
+
+
+
+
+
+FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins
+of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In
+addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3𝝈 and are
+nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a
+starspot (Santos et al., 2017).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Q7 Significant
+Data Points  Light
+Variability
+Frequency
+(µHz)  Corrected Flux
+Magnitude
+(ppm)  Period (days)  Signal-to-Noise
+(dB)
+Q7 First
+Iteration  5.886  -1.198  1.966   9.9
+Q7 Re-bin 1  5.886   -1.477  1.966   12.8
+Q7 Re-bin 2  5.889  0.597  1.965   19.2
+TABLE I: The table displays the various frequencies collected from Q7 and the information
+found through calculations to find period and SNR. The frequency of 5.464 µHz is not included,
+and therefore was not used in any calculations determining the average period of rotation. The
+values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus
+negative numbers are under the cutoff.
+
+
+
+ Q13 Significant
+Data Points  Light
+Variability
+Frequency
+(µHz)  Corrected Flux
+Magnitude
+(ppm)  Period (days)  Signal-to-Noise
+(dB)
+Q13 First
+Iteration  5.784  1.555  2.001  15.6
+Q13 Re-bin 1  5.784  2.873  2.001  17.7
+Q13 Re-bin 2  5.787  4.938  2.000  22.6
+Q13 Re-bin 3  5.787  6.909  2.000  26.3
+Q13 Re-bin 3  11.641  7.073  0.994  26.4
+Q13 Re-bin 3  16.823  2.299  0.688  24.1
+TABLE II: The table displays the various frequencies collected from Q13 and the information
+found through calculations to find period and SNR. The last two significant frequencies (11.641
+µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in
+further detail in the Conclusions section of this paper. The values under corrected flux magnitude
+are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff.
+
+
+
+
+
+
+
+
+
+
+First Iteration (µHz)  First Re-bin (µHz)  Second Re-bin (µHz)  Third Re-bin (µHz)
+0.933  0.933  0.215  0.216
+1.148  1.148  0.575  0.575
+1.364  1.364  0.934  0.935
+1.507  1.507  1.005  1.006
+12.561  12.561  1.149  1.150
+16.581  16.581  1.221  1.222
+     1.364  1.366
+     1.508  1.509
+     1.580  1.582
+     1.724  1.725
+     1.795  1.797
+     5.889  2.085
+     6.822  5.392
+     9.192  5.464
+     9.479  7.476
+     11.203  9.489
+     12.568  11.215
+     14.291  12.581
+     16.230  13.084
+     16.589  13.443
+       13.659
+       14.018
+       14.809
+       15.097
+       16.031
+       16.463
+       16.894
+TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm)
+above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct
+ of the method, and we calculated for such errors when finding our average.
+
+
+
+
+
+First Iteration (µHz)  First Re-bin (µHz)  Second Re-bin (µHz)  Third Re-bin (µHz)
+3.094  2.018  2.019  1.951
+5.784  3.094  3.095  2.019
+9.080  5.784  5.787  2.442
+13.519  7.667  7.671  2.759
+15.671  9.080  9.084  3.095
+16.209  11.165  11.641  3.634
+16.411  13.519  13.526  4.374
+   15.469  15.477  4.778
+   15.671  15.679  4.912
+   16.209  15.881  5.047
+   16.411  16.419  5.787
+       8.479
+       9.084
+       10.565
+       11.641
+       13.526
+       15.544
+       15.881
+       16.823
+TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm)
+above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct
+ of the method, and we calculated for such errors when finding our average.
+
+
+ Conclusions
+
+As our research used the long-cadence data from Kepler, much of the high-frequency
+variability due to gravitational wave pulsations is lost. However, this presents an opportunity to
+verify our results with the work of research groups that analyzed short-cadence data.With the
+data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the
+periods and accounting for a 1𝝈 margin of error, our research hypothesizes that the rotation
+period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation
+period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et
+al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and
+these periods indicate that the more precise significant period identified through our re-binning
+relates to the rotation of the white dwarf.
+Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it
+improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous
+
+significant frequency disappears, which becomes increasingly likely after successive re-binning
+processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe
+that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR
+improvement ranging from 1.1 dB to 1.3 dB.
+Through the re-binning process, more lines, or significant frequencies, appeared above
+the 3𝝈 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to
+short-cadence analysis, the re-binning process of long-cadence data can be used to identify
+significant lower frequencies in white dwarfs. The methods we used are also simple and
+replicable, which allows even those with less experience to quickly analyze the large amounts of
+data being collected by orbiting telescopes, such as the currently active TESS (Transiting
+Exoplanet Survey Satellite) telescope.
+The presence of possible harmonics in the third re-bin of Q13 also indicates the possible
+presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These
+spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence,
+the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and
+contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously
+dominated by noise, may have been discovered.
+
+Acknowledgments
+
+We wish to thank Bloomsburg University of Pennsylvania for its continued support of our
+research.
+This paper includes data collected by the Kepler mission and obtained from the MAST
+data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is
+provided by the NASA Science Mission Directorate. STScI is operated by the Association of
+Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555.
+
+
+ References
+
+ Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D.,
+Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010).
+PHOTOMETRIC  VARIABILITY  IN  KEPLER  TARGET  stars:  THE  SUN  AMONG
+stars—a  FIRST  LOOK.  The  Astrophysical  Journal,  713(2),  L155-L159.
+https://doi.org/10.1088/2041-8205/713/2/L155
+Bischoff-Kim,  A.,  Østensen,  R.  H.,  Hermes,  J.j.,  &  Provencal,  J.  L.  (2015).  Seven-Period
+asteroseismic  fit  of  KIC  8626021.  EPJ  Web  of  Conferences,  101,  06009.
+https://doi.org/10.1051/epjconf/201510106009
+Córsico, A.  H.  (2020). White-Dwarf  asteroseismology  with  the  kepler  space  telescope.  Frontiers
+in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047
+Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal
+for  the  History  of  Astronomy,  40(2),  137-154.
+https://doi.org/10.1177%2F002182860904000201
+Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S.,
+Bryson,  S.  T.,  Caldwell,  D.,  Chaplin,  W.  J.,  Cochran,  W.  D.,  Huber,  D.,  Marcy,  G.  W.,
+Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission:
+Characterization and early results. Publications of the Astronomical Society of the Pacific,
+126(938), 398-408. https://doi.org/10.1086/676406
+Johnson,  M.  (Ed.).  (2018,  October  30).  Mission  overview.  National  Aeronautics  and  Space
+Administration.  Retrieved  September  2,  2021,  from
+https://www.nasa.gov/mission_pages/kepler/overview/index.html
+Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensendalsgaard,
+ J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C.,
+Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy,
+G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC
+performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86.
+https://dx.doi.org/10.1088/2041-8205/713/2/L79
+Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., &
+Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The
+Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39
+Santos,  A.  R.  G.,  Cunha,  M.  S.,  Avelino,  P.  P.,  García,  R.  A.,  &  Mathur,  S.  (2017).  Starspot
+signature  on  the  light  curve. Astronomy  &  Astrophysics, 599,  A1.
+https://doi.org/10.1051/0004-6361/201629923
+
+Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology.
+Annual  Review  of  Astronomy  and  Astrophyics,  46(1),  157-199.
+https://doi.org/10.1146/annurev.astro.46.060407.145250
+Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021).
\ No newline at end of file
diff --git a/read/results/playa/2201.00037.txt b/read/results/playa/2201.00037.txt
new file mode 100644
index 0000000..224811d
--- /dev/null
+++ b/read/results/playa/2201.00037.txt
@@ -0,0 +1,3009 @@
+Confidential manuscript submitted to JGR-Planets
+The influence of a fluid core and a solid inner core on the
+Cassini sate of Mercury
+Mathieu Dumberry 1
+1
+Department of Physics, University of Alberta, Edmonton, Alberta, Canada.
+Key Points:
+•
+ The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid
+planet by no more than 0.01 arcmin.
+•
+ For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
+cores into a common precession motion.
+•
+ The larger the inner core is, the more the obliquity of the polar moment of inertia approaches
+ that expected for a rigid planet.
+Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca
+–1–arXiv:2201.00037v1  [astro-ph.EP]  31 Dec 202
+Confidential manuscript submitted to JGR-Planets
+Abstract
+We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core
+and a mantle. Our model includes inertial and gravitational torques between interior regions,
+and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show
+that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of
+the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The mantle
+ obliquity decreases with increasing inner core size, but the change between a large and no
+inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the inner
+ core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and
+solid cores into a common precession motion. Because of the strong gravitational coupling between
+ the mantle and inner core, the larger the inner core is, the more this co-precessing core
+is brought into an alignment with the mantle, and the more the obliquity of the polar moment
+of inertia approaches that expected for a rigid planet. The misalignment between the polar moment
+ of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 arcmin.
+ Our results imply that the measured obliquities of the mantle spin axis and polar moment
+ of inertia should coincide at the present-day level of measurement errors, and cannot be
+distinguished from the obliquity of a rigid planet.
+Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precessing
+ about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the
+same rate, an equilibrium known as a Cassini state. The angle between the spin axis and the
+normal to the orbital plane is known as the obliquity and remains fixed. Observations have confirmed
+ that Mercury’s obliquity matches, within measurement errors, the theoretical prediction
+ based on an entirely rigid planet. However, we know that Mercury has a large metallic core
+which is liquid, although the central part may be solid. In this work, we investigate how the
+presence of a fluid and solid core affect the Cassini state of Mercury. We show that the internal
+ coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliquity
+ of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an
+offset smaller than the present-day error in measurements. We also show that the larger the
+solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body.
+1 Introduction
+Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spinsymmetry
+ axis are both coplanar with, and precess about, the normal to the Laplace plane [Colombo ,
+1966; Peale , 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but
+its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot ,
+2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is
+reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513
+yr with an inclination angle of I = 8.5330◦
+ between the orbit and Laplace plane normals [Baland
+ et al., 2017]. Measurements of the obliquity ε
+m, defined as the angle of misalignment between
+ the spin-symmetry axis and the orbit normal, have been obtained by different techniques,
+including ground based radar observations [Margot et al., 2007, 2012], and stereo digital terrain
+ images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Margot
+ , 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvironment
+ GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors,
+all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals
+and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042 ± 0.08
+–2–
+Confidential manuscript submitted to JGR-Planets
+I
+descending
+node of orbitΩ
+p
+ ê
+3I
+I ê
+3Lε
+mI
+ ê
+3p
+ ascending
+node of orbit
+descending
+node of equatorequatorial
+plane
+ orbital
+direction
+Sê
+3I
+ê
+3L
+M
+ ε
+morbital plane
+Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded
+rectangle) and the Cassini state of Mercury. The normal to the orbital plane (ˆeI
+3) is offset from the normal
+ to the Laplace plane (ˆeL
+3 ) by an angle I = 8.5330◦
+. The symmetry axis of the mantle ˆep
+3 is offset
+from ˆeI
+3 by ε
+m ≈ 2 arcmin. ˆeI
+3 and ˆep
+3 are coplanar with, and precess about, ˆeL
+3 in a retrograde direction
+at frequency Ω
+p = 2π/325, 513 yr−1
+. The blue (orange) shaded region indicates the portion of the orbit
+when Mercury is above (below) the Laplace plane. Angles are not drawn to scale.
+arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Genova
+ et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1.
+The prediction of Mercury’s obliquity is based on the assumption that the whole planet
+precesses as a single body. However, we know that Mercury has a fluid core from two main lines
+of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by
+dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion
+in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the observed
+ amplitude of the 88-day longitudinal libration is approximately twice as large as that
+expected if Mercury were librating as a rigid body [Margot et al., 2007, 2012; Stark et al., 2015a].
+This indicates that it is only the mantle that librates, and that the outer part of the core is fluid.
+These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that
+its outermost part must be. A solid inner core may have nucleated at the centre although its
+size is not well constrained. Inner core growth leads to planetary contraction, and the inferred
+radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an
+approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner
+core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history.
+–3–
+Confidential manuscript submitted to JGR-Planets
+With a fluid core, and possibly a solid inner core, the observed obliquity ε
+m reflects the
+orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dissipation,
+ and at equilibrium in the Cassini state, the spin axis of the fluid core and the spinsymmetry
+ axis of the inner core should both also precess about the normal to the Laplace plane
+in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the
+plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek , 2016], although
+their obliquity angles may be different than ε
+m. Whether the spin axis of the fluid core is brought
+into an alignment with the mantle obliquity depends primarily on the pressure torque (also referred
+ to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the
+misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e , 1910]. The more flattened
+ the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained
+into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s
+CMB is not known. But if one assumes that the topography of the CMB coincides with an equipotential
+ surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the upper
+ mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core
+into a close alignment with the mantle [Peale et al., 2014]. The spin axis of the fluid core is not
+expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close
+that the resulting mantle obliquity does not differ much from that of a single body planet. Furthermore,
+ viscous and electromagnetic (EM) coupling at the CMB can further restrict the misalignment
+ between the mantle and core [Peale et al., 2014].
+If an inner core is present, its obliquity angle is determined by the sum of the torques acting
+ on it. This includes the gravitational torque from the Sun acting on its tilted figure, analogous
+ to the torque applied on the tilted mantle that sets the obliquity ε
+m. In addition, the
+tilt of the inner core also depends on the gravitational torque imposed by the mantle and the
+pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle gravitational
+ torque dominates, the inner core tilt is expected to remain closely aligned with the
+mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should instead
+ be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM coupling
+ at the ICB should also enforce a closer alignment between the rotation vectors of the inner
+ core and fluid core.
+It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mercury
+ is inferred [e.g. Peale , 1976; Margot et al., 2018]. Inherent in this calculation is the builtin
+ assumption that the mantle obliquity does not deviate from that of a rigid planet by a substantial
+ amount. However, the recent study by Peale et al. [2016] suggests that the inner core
+can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the
+orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the assumption
+ that the observed obliquity reflects the orientation of the whole planet.
+Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis
+does not coincide with the orientation of the polar moment of inertia of the whole planet. This
+can introduce a systematic offset between different types of obliquity measurements. Those based
+on tracking topographic features [Margot et al., 2007, 2012; Stark et al., 2015a] capture the obliquity
+ of the mantle spin axis. While those based on the orientation of the gravity field [Mazarico
+et al., 2014; Verma and Margot , 2016; Genova et al., 2019; Konopliv et al., 2020] are instead
+tied to the orientation of the principal moment of inertia of the whole planet. An offset of the
+obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the
+size of the inner core, even though this is difficult to do at present because the different estimates
+ of the obliquity of the gravity field do not match well with one another.
+–4–
+Confidential manuscript submitted to JGR-Planets
+There is thus a significant interest in properly assessing how the presence of a solid inner
+ core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a
+model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model
+is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumberry
+ and Wieczorek , 2016; Stys and Dumberry , 2018; Organowski and Dumberry , 2020]. The
+specific questions that motivate our study are the following. First, we want to determine how
+large the misaligned obliquities of the fluid core and solid inner core can be and how they depend
+ on model parameters. Second, we want to assess by how much the mantle obliquity may
+differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spinsymmetry
+ axis of the mantle and gravity field may differ.
+2 Theory
+2.1 The interior structure of Mercury
+Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid
+outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted
+by r
+s, r
+f , r
+m, and R, and their densities by ρ
+s, ρ
+f , ρ
+m, and ρ
+c, respectively. The inner core radius
+ r
+s corresponds to the ICB radius, the fluid core radius r
+f to the CMB radius, and R =
+2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure
+ with depth are not negligible in the core of Mercury. However adopting uniform densities
+simplifies the analytical expressions of the model while still capturing the first order rotational
+dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same
+strategy facilitates comparisons between our results.
+We build our interior model as detailed in Peale et al. [2016]. We first specify r
+s, ρ
+s (or
+a density contrast at the ICB), the crustal density ρ
+c and crustal thickness h = R−r
+m. The
+three unknowns r
+f , ρ
+f and ρ
+m are then solved such that the interior model is consistent with
+the known mass M and chosen values of the moments of inertia of the whole planet C and that
+of the mantle and crust C
+m.
+Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity)
+by
+i, defined as the difference between the mean equatorial and polar radii, divided by the mean
+spherical radius. Likewise, we denote the equatorial flattening by the variable ξ
+i, defined as the
+difference between the maximum and minimum equatorial radii, divided by the mean spherical
+ radius. As above, we use the subscript i = s, f , m and r, to denote the polar or equatorial
+ flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface.
+The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their
+numerical values are given in Table 1. We then assume that the ICB and CMB are both at hydrostatic
+ equilibrium with the imposed gravitational potential induced by the flattenings at the
+CrMB and surface. The flattenings at all interior boundaries are specified such that they are
+consistent with the observed degree 2 spherical harmonic coefficients of gravity J
+2 and C
+22; their
+numerical values are given in Table 1. Specifically, J
+2 and C
+22 are connected to the principal
+moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by
+J
+2 = C − ¯
+A
+M R2 = 8π
+15 1
+M R2
+(ρ
+s − ρ
+f )r5
+s
+s + (ρ
+f − ρ
+m)r5
+f
+f + (ρ
+m − ρ
+c)r5
+m
+m + ρ
+cR5
+
+r
+ , (1a)
+C
+22 = B − A
+4M R2 = 8π
+15 1
+4M R2
+(ρ
+s − ρ
+f )r5
+s ξ
+s + (ρ
+f − ρ
+m)r5
+f ξ
+f + (ρ
+m − ρ
+c)r5
+mξ
+m + ρ
+cR5
+ξ
+r
+ . (1b)
+where ¯
+A is the mean equatorial moment of inertia defined below. The same procedure was used
+in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry
+–5–
+Confidential manuscript submitted to JGR-Planets
+Mercury Parameter Numerical value Reference
+mean motion, n 2π/87.96935 day−1
+ Stark et al. [2015b]
+rotation rate, Ω
+o = 1.5n 2π/58.64623 day−1
+ Stark et al. [2015b]
+orbit precession rate, Ω
+p 2π/325, 513 yr−1
+ Baland et al. [2017]
+Poincar´e number, δω = Ω
+p/Ω
+o 4.9327 × 10−7
+orbital eccentricity, e
+c 0.20563 Baland et al. [2017]
+orbital inclination, I 8.5330◦
+ Baland et al. [2017]
+mean planetary radius, R 2439.360 km Perry et al. [2015]
+mass, M 3.3012 × 1023
+ kg Genova et al. [2019]
+mean density, ¯ρ 5429.5 kg m−3
+J
+2  5.0291 × 10−5
+ Genova et al. [2019]
+C
+22  8.0415 × 10−6
+ Genova et al. [2019]
+polar surface flattening,
+r 6.7436 × 10−4
+ Perry et al. [2015]
+equatorial surface flattening, ξ
+r 5.1243 × 10−4
+ Perry et al. [2015]
+Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109
+m3
+/s2
+ taken from Genova et al. [2019]. The mean density is calculated from 4π
+3 ¯ρR3
+ = M . The numerical
+values of
+r and ξ
+r are calculated from
+r = (¯a − c)/R and ξ
+r = (a − b)/R, where ¯a = 1
+2 (a + b) and where
+a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semima jor, intermediate and semiminor
+axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J
+2 and C
+22 are
+computed from Equation (4) in the Supporting Information of Genova et al. [2019].
+and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon.
+Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topography
+ and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦
+and an equatorial offset of ∼ 15◦
+ [Perry et al., 2015].
+Once the densities and flattenings of all interior regions are known, we can specify the moments
+ of inertia of the fluid core (C
+f > B
+f > A
+f ) and solid inner core (C
+s > B
+s > A
+s)
+along with the mean equatorial moments of inertia
+¯
+A = 1
+2 (A + B ) , ¯
+A
+f = 1
+2 (A
+f + B
+f ) , ¯
+A
+s = 1
+2 (A
+s + B
+s) . (2)
+From these, we define the polar (e, e
+f , e
+s) and equatorial (γ , γ
+s) dynamical ellipticities of the
+whole planet (no subscript), fluid core (subscript f ) and solid inner core (subscript s), which
+enter our rotational model,
+e = C − ¯
+A
+¯
+A e
+f = C
+f − ¯
+A
+f
+¯
+A
+f e
+s = C
+s − ¯
+A
+s
+¯
+A
+s , (3a)
+γ = B − A
+¯
+A γ
+s = B
+s − A
+s
+¯
+A
+s . (3b)
+We further note that e and γ are connected to J
+2 and C
+22 by
+e = M R2
+¯
+A J
+2 , γ = 4M R2
+¯
+A C
+22 . (4)
+–6–
+Confidential manuscript submitted to JGR-Planets
+θ
+m
+ θ
+n
+θ
+s
+ θ
+fΩ
+ Ω
+s
+ Ω
+fê
+3p
+ ê
+3sê
+3I
+I ε
+m
+θ
+pê
+3L
+ ê
+1p
+ ê
+2p Cassini plane
+ ωΩ
+otê
+3I
+I
+ ε
+m ê
+3p
+ ê
+1ê
+2pê
+3La)  b)
+Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b)
+in a frame attached to the rotating mantle. The orbit normal (ˆeI
+3) is tilted by an angle I = 8.533◦
+ from
+the Laplace normal (ˆeL
+3 ) and the symmetry axis of Mercury’s mantle (ˆep
+3 ) is tilted by an obliquity ε
+m
+with respect to ˆeI
+3. Shown in (a) are the orientations of the symmetry axis of the inner core (ˆes
+3), the
+rotation rate vectors of the mantle (Ω), fluid core (Ω
+f ) and inner core (Ω
+f ) and angles θ
+p, θ
+n, θ
+m, θ
+f
+and θ
+s in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer
+to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of
+the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section.
+The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial
+mantle axes ˆep
+1 and ˆep
+2 with respect to the Cassini plane. Viewed in the frame attached to the rotating
+mantle (b), the Cassini plane is rotating at frequency ωΩ
+o = −Ω
+o − Ω
+p cos I in the longitudinal direction.
+ The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of
+illustration.
+ –7–
+Confidential manuscript submitted to JGR-Planets
+2.2 The rotational model
+Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes
+3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is
+87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These define
+ the mean motion n = 2π/87.96935 day−1
+ and the sidereal frequency Ω
+o = 2π/58.64623
+day−1
+, with Ω
+o = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby
+the orientations of the orbit normal ( ˆeI
+3) and of the mantle symmetry axis ( ˆep
+3 ) are both coplanar
+ with, and precess about, the normal to the Laplace plane ( ˆeL
+3 ). The orientation of the Laplace
+plane varies on long timescales, but it can be taken as invariable in inertial space for our present
+purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆeL
+3 and ˆeI
+3
+is the orbital inclination I = 8.5330◦
+ [Baland et al., 2017], the angle between ˆeI
+3 and ˆep
+3 is the
+obliquity ε
+m and the angle between ˆeL
+3 and ˆep
+3 is θ
+p = I + ε
+m. The precession of ˆeI
+3 and ˆep
+3
+about the Laplace pole is retrograde with frequency Ω
+p = 2π/325, 513 yr−1
+ [Baland et al., 2017].
+The mantle and crust are welded together and form a single rotating region which we refer
+ to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes
+of the mantle are expected to remain in close alignment, but they do not coincide exactly. We
+define the rotation rate vector of the mantle by Ω, and its misalignment from ˆep
+3 by an angle
+θ
+m. Note that θ
+m  ε
+m and it is often the spin axis of Mercury which is used to define the
+obliquity ε
+m [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆep
+3 and Ω would
+characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and
+the angles I , ε
+m and θ
+m would completely describe the Cassini state. The presence of a fluid
+outer core and solid inner core require three additional orientation vectors and angles. The symmetry
+ axis of the inner core is defined by unit vector ˆes
+3 and its misalignment from ˆep
+3 by an
+angle θ
+n. The rotation vectors of the fluid core and inner core are defined as Ω
+f and Ω
+s, respectively,
+ and their misalignment from the rotation vector of the mantle Ω are defined by angles
+ θ
+f and θ
+s (see Figure 2a). The rotation and symmetry axes of the inner core remain in close
+alignment, so θ
+n ≈ θ
+s. To be formal in our definition of the different angles of misalignment,
+for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise
+direction.
+At equilibrium in the Cassini state, the three orientation vectors ( ˆeI
+3, ˆep
+3 , ˆes
+3) and three
+rotation vectors (Ω, Ω
+f , Ω
+s) are forced to precess about ˆeL
+3 at the same frequency. If we neglect
+ dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed
+in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω
+p. Viewed
+in the frame attached to the mantle rotating at sidereal frequency Ω
+o, the Cassini plane is rotating
+ in a retrograde direction at frequency ωΩ
+o (see Figure 2b), where ω, expressed in cycles
+per Mercury day, is equal to
+ ω = −1 − δω cos(θ
+p) . (5)
+The factor δω = Ω
+p/Ω
+o = 4.933 × 10−7
+ is the Poincar´e number, expressing the ratio of the
+forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal
+as seen in the mantle frame is expressed as
+d
+dt ˆeL
+3 + Ω × ˆeL
+3 = 0 , (6)
+or equivalently, by Equation (19e) of Stys and Dumberry [2018],
+ω sin(θ
+p) + sin(θ
+m + θ
+p) = 0 . (7)
+–8–
+Confidential manuscript submitted to JGR-Planets
+This expresses a formal connection between θ
+p and θ
+m which is independent of the interior structure
+ of Mercury. Using Equation (5) and cos(θ
+m) → 1, this connection can be rewritten as
+sin(θ
+m) = δω sin(θ
+p) . (8)
+and thus the relative amplitudes of θ
+m and θ
+p depend of the Poincar´e number δω.
+To investigate Mercury’s response to the gravitational torque from the Sun, we take advantage
+ of the framework developed in Mathews et al. [1991] to model the forced nutations of
+Earth [see also Mathews et al., 2002; Dehant and Mathews , 2015]. This model takes into account
+ the pressure torque (also referred to as the inertial torque) that results when the spin axis
+of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and
+ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned
+with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been
+incorporated into the framework [e.g Buffett , 1992; Buffett et al., 2002; Mathews and Guo , 2005;
+Deleplace and Cardin , 2006]. The framework was adapted to model the Cassini state of the Moon
+in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski
+and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury.
+Because the forced precession period is much longer than the rotation and orbital periods
+ of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean
+torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, pointing
+ in the same direction as the vector connecting the Sun to the descending node of Mercury’s
+orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque
+is periodic, rotating at frequency ωΩ
+o. Setting the equatorial directions ˆep
+1 and ˆep
+2 to correspond
+to the real and imaginary axes of the complex plane, respectively, we can write the equatorial
+components of this periodic applied torque in a compact form as
+Γ
+1(t) + iΓ
+2(t) = −i ˜
+Γ(ω) exp[iωΩ
+ot] , (9)
+where ˜
+Γ(ω) represents the amplitude of the torque at frequency ωΩ
+o. In response to this torque,
+the axes defining all angles (θ
+p, ε
+m, θ
+m, θ
+f , θ
+s, θ
+n) as viewed in the mantle frame are also rotating
+ at frequency ωΩ
+o (see Figure 2). The longitudinal direction of each of these angles at
+a specific time t can then also be written in the equatorial complex plane and is proportional
+to exp[iωΩ
+ot]. For instance, the two equatorial time-dependent components θ
+m1 and θ
+m2 of the
+angle θ
+m, as seen in the mantle frame, can be written as
+θ
+m1(t) + iθ
+m2(t) = ˜m exp[iωΩ
+ot] , (10a)
+where
+ ˜m ≡ ˜m(ω) = Re[ ˜m] + iI m[ ˜m] , (10b)
+is the amplitude at frequency ωΩ
+o. Equivalent definitions apply for all other angles, with the
+connection as follows:
+θ
+m ⇔ ˜m , θ
+f ⇔ ˜m
+f , θ
+s ⇔ ˜m
+s , θ
+n ⇔ ˜n
+s , θ
+p ⇔ ˜p , ε
+m ⇔ ˜ε
+m . (11)
+The notation ˜m, ˜m
+f , ˜m
+s, ˜n
+s follows that introduced in the original model of Mathews et al. [1991].
+Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase response
+ to the applied torque as a result of dissipation, for instance from viscous or EM coupling
+–9–
+Confidential manuscript submitted to JGR-Planets
+at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely
+real. We concentrate our analysis in this work on the real part of the solutions, which corresponds
+ to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ε
+m
+corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε
+m,
+though we keep the tilde notation in the presentation of our results to emphasize that it represents
+ the real part of the solution from our system. Furthermore, since ˜m  ˜ε
+m, we often
+refer to ˜ε
+m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
+more customarily described in terms of the latter in the literature.
+The model of Mathews et al. [1991] is developed under the assumption of small angles as
+appropriate for the nutations on Earth. The details on how the equations of the model are derived
+ can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equations
+ describe, respectively, the time rate of change of the angular momenta of the whole of Mercury,
+ the fluid core, and the inner core in the reference frame of the rotating mantle. These three
+equations are
+ (ω − e) ˜m + (1 + ω)
+ ¯
+A
+f
+¯
+A ˜m
+f + ¯
+A
+s
+¯
+A ˜m
+s + α
+3e
+s ¯
+A
+s
+¯
+A ˜n
+s
+ = 1
+iΩ2
+o ¯
+A
+ ˜
+Γ
+sun
+ , (12a)
+ω ˜m + (1 + ω + e
+f ) ˜m
+f − ωα
+1e
+s ¯
+A
+s
+¯
+A
+f ˜n
+s = 1
+iΩ2
+o ¯
+A
+f
+ − ˜
+Γ
+cmb − ˜
+Γ
+icb
+ , (12b)
+(ω − α
+3e
+s) ˜m + α
+1e
+s ˜m
+f + (1 + ω) ˜m
+s + (1 + ω − α
+2) e
+s ˜n
+s = 1
+iΩ2
+o ¯
+A
+s
+ ˜
+Γs
+sun + ˜
+Γ
+icb
+ , (12c)
+and a fourth equation consists of a kinematic relation that expresses the change in the orientation
+ of the inner core figure as a result of its own rotation,
+˜m
+s + ω ˜n
+s = 0 . (12d)
+In these equations, the parameters α
+1, α
+2 and α
+3 involve the density contrast at the ICB
+and are given by
+ α
+1 = ρ
+f
+ρ
+s , α
+3 = 1 − α
+1 , α
+2 = α
+1 − α
+3α
+g , (13a)
+where the parameter α
+g is a measure of the ratio of the gravitational to inertial torque applied
+on the inner core,
+ α
+g = 8πG
+5Ω2
+o [ρ
+c(
+r −
+m) + ρ
+m(
+m −
+f ) + ρ
+f
+f ] , (13b)
+where G is the gravitational constant.
+˜
+Γ
+sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
+a small mantle obliquity ˜ε
+m and a small inner core tilt ˜n
+s, it is given by
+˜
+Γ
+sun = −iΩ2
+o ¯
+A
+φ
+m ˜ε
+m + ¯
+A
+s
+¯
+A α
+3φ
+s ˜n
+s
+ , (14)
+where
+ –10–
+Confidential manuscript submitted to JGR-Planets
+φ
+m = 3
+2 n2
+Ω2
+o
+G
+210 e + 1
+2 G
+201 γ
+ , (15a)
+φ
+s = 3
+2 n2
+Ω2
+o
+G
+210 e
+s + 1
+2 G
+201 γ
+s
+ , (15b)
+and where G
+210 and G
+201 are functions of the orbital eccentricity e
+c,
+G
+210 = 1
+(1 − e2
+c )3/2 , (16a)
+G
+201 = 7
+2 e
+c − 123
+16 e3
+c + 489
+128 e5
+c . (16b)
+The gravitational torque by the Sun acting on the inner core alone, ˜
+Γs
+sun, is
+˜
+Γs
+sun = −iΩ2
+o ¯
+A
+sα
+3φ
+s( ˜ε
+m + ˜n
+s) . (17)
+˜
+Γ
+cmb and ˜
+Γ
+icb are the torques from tangential stresses by the fluid core on the mantle at the
+CMB and on the inner core at the ICB, respectively. These torques can be parameterized in
+terms of dimensionless complex coupling constants K
+icb and K
+cmb and the differential angular
+ velocities at each boundary [e.g Buffett , 1992; Buffett et al., 2002],
+˜
+Γ
+icb = iΩ2
+o ¯
+A
+sK
+icb( ˜m
+f − ˜m
+s) , (18a)
+˜
+Γ
+cmb = iΩ2
+o ¯
+A
+f K
+cmb ˜m
+f . (18b)
+Specific expressions for K
+icb and K
+cmb are delayed to sections 4 and 5 when we consider the
+effects of viscous and EM coupling, respectively.
+A fifth equation is required to connect this interior model to the obliquity of the mantle,
+and this is provided by Equation (7). For small angles θ
+m and θ
+p, this gives [e.g. Mathews et al.,
+1991; Dumberry and Wieczorek , 2016; Baland et al., 2019]
+˜m + (1 + ω) ˜p = 0 . (19)
+For Mercury, it is more convenient to connect the internal model with ˜ε
+m instead of ˜p. This
+is because θ
+p ≈ 8.567◦
+ whereas ˜ε
+m ≈ 2 arcmin and thus the latter obeys more strictly the
+condition of small angles assumed in our framework. Furthermore, the external torques acting
+ on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ε
+m. Written
+ in terms of ˜ε
+m, and with the approximation of ˜ε
+m  1 and ˜m  1, Equation (7) becomes
+˜m + (1 + ω) ˜ε
+m = −(1 + ω) tan I . (20)
+Likewise, the frequency ω from Equation (5) can be written simply in terms of I ,
+ω = −1 − δω cos I . (21)
+The set of four Equations (12) with the addition of Equation (20) form a linear system
+of equations for the five rotational variables ˜m, ˜m
+f , ˜m
+s, ˜n
+s and ˜ε
+m. It captures the response
+of Mercury, in the frequency domain, when sub ject to a periodic solar torque applied at frequency
+ ω. The system can be written in a matrix form as
+–11–
+Confidential manuscript submitted to JGR-Planets
+M · x = y , (22a)
+where the solution (x) and forcing (y) vectors are
+xT
+ = [ ˜m, ˜m
+f , ˜m
+s, ˜n
+s, ˜ε
+m] , (22b)
+yT
+ = [0, 0, 0, 0, −(1 + ω) tan I ] , (22c)
+and the elements of matrix M are
+M = 
+
+
+
+
+
+ ω − e (1 + ω) ¯
+A
+f
+¯
+A (1 + ω) ¯
+A
+s
+¯
+A ¯
+A
+s
+¯
+A α
+3
+(1 + ω)e
+s + φ
+s
+ φ
+m
+ω 1 + ω + e
+f + K
+cmb + ¯
+A
+s
+¯
+A
+f K
+icb − ¯
+A
+s
+¯
+A
+f K
+icb −ωe
+sα
+1 ¯
+A
+s
+¯
+A
+f 0
+ω − α
+3e
+s α
+1e
+s − K
+icb 1 + ω + K
+icb (1 + ω − α
+2)e
+s + α
+3φ
+s α
+3φ
+s
+0 0 1 ω 0
+1 0 0 0 (1 + ω)
+
+
+
+
+
+ .
+(22d)
+Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three
+modes have periods which, when seen in inertial space, are typically in the range of a few hundred
+ to a few thousand years. The first is the free axial precession of Mercury maintained by
+the solar torque acting on its elliptical figure [e.g. Peale , 2005]. The second is the free core nutation
+ (FCN), which is the free precession of the spin axis of the fluid core about the symmetry
+ axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN),
+a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al.,
+1991].
+A few remarks on our model are important to point out before we proceed further. First,
+although we have retained the triaxial shape of Mercury in the expression of the solar torque,
+we treat its angular momentum response as if it were an axially symmetric body. This is convenient
+ as the two equatorial angular momentum equations for each region can be combined
+into a single equation. To first order, the frequency of the free precession of Mercury is not largely
+altered by triaxiality [e.g. Peale , 2005]. Baland et al. [2019] showed that the frequencies of the
+FCN and FICN for a triaxial planetary body may be slightly different than those for an axially
+ symmetric body, but not by large factor. As the response of Mercury to the solar torque
+is largely determined by the resonant amplification due to the presence of these three modes,
+our model should capture correctly the first order Cassini state of Mercury. Considering the
+triaxial shape of Mercury may alter the numerical results, but not our general conclusions.
+Second, our modelling approach is different than in the studies of Peale et al. [2014] and
+Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are developed
+ and must then be integrated in time. The equilibrium Cassini state is the quasi-steady
+state that remains after transient effects associated with the initial conditions have decayed away.
+An advantage of these models compared to ours is that the complete triaxial dynamics of Mercury,
+ including its longitudinal librations, are retained. However, the numerical integration can
+be lengthy if dissipation is weak, which restricts the number of possible interior models of Mercury
+ that can be tested. In contrast, our model is a simple linear system in the frequency domain,
+ focused on one specific frequency: the forced precession associated with the Cassini state.
+Solutions are straightforward to obtain for a given interior model, and this allows us to cover
+a larger span of the parameter space. One drawback, however, is that our model does not capture
+ time-dependent variations at any other frequencies, including the precession of the pericenter
+ of Mercury’s orbit about the Sun.
+ –12–
+Confidential manuscript submitted to JGR-Planets
+2.3 Analytical solutions and limiting cases
+2.3.1 The Cassini state of a single-body, rigid Mercury
+For a rigid planet with no fluid and solid cores, our system of equations reduces to Equations
+ (12a) and (20),
+ (ω − e) ˜m + φ
+m ˜ε
+m = 0 , (23a)
+˜m + (1 + ω) ˜ε
+m = −(1 + ω) tan I . (23b)
+Using Equation (21), δω  1, and the approximation ¯
+A(1 + e + δω cos I ) = C + ¯
+Aδω cos I ≈
+C , these can be written as
+ C ˜m = ¯
+Aφ
+m ˜ε
+m , (24a)
+˜m = δω
+ sin I + cos I ˜ε
+m
+ . (24b)
+Equation (24b) gives a direct relationship between ˜m and ˜ε
+m. For I = 8.5330◦
+, δω =
+4.9327×10−7
+ and taking ˜ε
+m = 2.04 arcmin, this gives ˜m = 2.52×10−4
+ arcmin, much smaller
+than ˜ε
+m: the offset of the rotation axis of the mantle with respect to its symmetry axis is very
+small. Substituting Equation (24b) in Equation (24a) gives
+C Ω
+p
+ sin I + cos I ˜ε
+m
+ = ¯
+AΩ
+oφ
+m ˜ε
+m , (25)
+and isolating for ˜ε
+m,
+ ˜ε
+m = C Ω
+p sin I
+−C Ω
+p cos I + ¯
+AΩ
+oφ
+m . (26)
+Upon using Equations (4), (15a), and Ω
+o = 3
+2 n, we can write
+˜ε
+m = C Ω
+p sin I
+−C Ω
+p cos I + nM R2
+ (G
+210J
+2 + 2G
+201C
+22) . (27)
+This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1
+[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙
+Ω is equal to −Ω
+p].
+Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of
+Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized moment
+ of inertia ˆ
+C ,
+ ˆ
+C = C
+M R2 = n
+Ω
+p G
+210J
+2 + 2G
+201C
+22
+cos I + sin I / ˜ε
+m . (28)
+which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation
+that a measurement of the obliquity gives a constraint on ˆ
+C .
+Two free modes of precession are found by setting y = 0 in Equation (23). One mode corresponds
+ to the Eulerian wobble, or Chandler wobble, and represents the prograde precession
+of the rotation axis about the symmetry axis. The second mode is the free retrograde axial precession
+ of Mercury. As seen in the inertial frame, its frequency is given by
+–13–
+Confidential manuscript submitted to JGR-Planets
+ω
+f p = n M R2
+C
+G
+210J
+2 + 2G
+201C
+22
+ , (29)
+which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical component.
+ Note that in Peale [2005] it was assumed that only the mantle was involved in the solidbody
+ precession and hence C was replaced by C
+m. Using C = 0.346 · M R2
+ [Margot et al.,
+2012] and the numerical values for n, J
+2, C
+22 and e
+c given in Table 1, we obtain a free precession
+ period of T
+f p = 2π/ω
+f p = 1298 yr. If we use C
+m instead of C in Equation (29), and take
+C
+m = 0.431 · C = 0.431 · 0.346 · M R2
+ [Margot et al., 2012], we obtain T
+f p = 2π/ω
+f p = 560 yr.
+These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical,
+the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid
+core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The
+true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value,
+the free precession period is much shorter than the forcing period of 325 kyr. Using Equation
+(29), Equation (27) can be written as [e.g. Baland et al., 2017]
+˜ε
+m = Ω
+p sin I
+−Ω
+p cos I + ω
+f p . (30)
+The obliquity of Mercury is thus determined by how the forcing frequency Ω
+p compares with
+the free precession frequency ω
+f p. Because ω
+f p > Ω
+p, Mercury occupies Cassini state 1 [Peale ,
+1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant
+amplification if Ω
+p ≈ ω
+f p. Since ω
+f p  Ω
+p, resonant amplification is minimal and the resulting
+ obliquity, ˜ε
+m ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5◦
+.
+2.3.2 The misalignment of the fluid and solid cores
+With ω = −1 − δω cos I and δω  1, Equation (12d) gives ˜n
+s ≈ ˜m
+s; as for the mantle,
+the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state.
+The relationship between ˜m and ˜ε
+m of Equation (24b) is independent of the interior structure,
+so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equation
+ (12a), and setting ˜n
+s = ˜m
+s, the angular momentum equation of the whole planet becomes
+C Ω
+p
+ sin I + cos I ˜ε
+m
+ + ( ¯
+A
+f cos I Ω
+p) ˜m
+f + ¯
+A
+s(cos I Ω
+p − Ω
+oα
+3φ
+s)˜n
+s = ¯
+AΩ
+oφ
+m ˜ε
+m . (31)
+This latter equation shows how the misaligned inner core and fluid core can lead to a modification
+ of the mantle obliquity ˜ε
+m. Approximate analytical solutions of ˜n
+s and ˜m
+f are given by
+˜n
+s ≈ Ω
+p
+κλ
+s
+1 + Ω
+o(K
+icb − α
+1e
+s)
+λ
+f
+
+ sin I + cos I ˜ε
+m
+ − Ω
+oα
+3φ
+s
+κλ
+s ˜ε
+m , (32a)
+˜m
+f ≈ Ω
+p
+λ
+f
+ sin I + cos I ˜ε
+m
+ + Ω
+o
+λ
+f ¯
+A
+s
+¯
+A
+f
+K
+icb − α
+1e
+s
+˜n
+s , (32b)
+where
+ κ = 1 − ¯
+A
+s
+¯
+A
+f Ω2
+o
+K
+icb − α
+1e
+s
+2
+λ
+s λ
+f , (33a)
+λ
+f = ¯σ
+f − Ω
+p cos I , (33b)
+λ
+s = ¯σ
+s − Ω
+p cos I , (33c)
+–14–
+Confidential manuscript submitted to JGR-Planets
+and where we have introduced the frequencies
+¯σ
+f = Ω
+o
+e
+f + K
+cmb + ¯
+A
+s
+¯
+A
+f K
+icb
+ , (33d)
+¯σ
+s = Ω
+o
+e
+sα
+3α
+g − e
+sα
+1 + α
+3φ
+s + K
+icb
+ . (33e)
+These solutions are good approximations for all the results that we present in section 3. For
+an observed mantle obliquity ˜ε
+m and for a chosen set of interior model parameters, they provide
+ useful predictions of ˜n
+s and ˜m
+f .
+In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σ
+s
+Ω
+p and ¯σ
+f  Ω
+p, so that ˜n
+s → 0, ˜m
+f → 0 and Equation (31) reverts back to Equation (25)
+for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and
+mantle (i.e. for spherical internal boundaries, e
+f = e
+s = γ
+s = 0 and no viscous or EM coupling,
+ K
+cmb = K
+icb = 0), then
+φ
+s = 0 , κ = 1 , λ
+f = λ
+s = −Ω
+p cos I , ˜m
+f = ˜n
+s = −(tan I + ˜ε
+m) . (34)
+Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C
+m =
+C − ¯
+A
+f − ¯
+A
+s, we obtain
+ C
+m Ω
+p
+ sin I + cos I ˜ε
+m
+ = ¯
+AΩ
+oφ
+m ˜ε
+m . (35)
+which describes, as expected, a forced precession of the mantle alone. If this was the case for
+Mercury, taking C
+m/C = 0.431, the obliquity should be ˜ε
+m ≈ 0.88 arcmin, substantially smaller
+than the observed obliquity of ˜ε
+m ≈ 2 arcmin.
+If ¯σ
+f ≈ Ω
+p (and thus λ
+f → 0) and/or ¯σ
+s ≈ Ω
+p (and thus λ
+s → 0) resonant amplification
+ leads to large amplitudes for ˜m
+f , ˜n
+s and the mantle obliquity ˜ε
+m. The frequencies ¯σ
+f and
+¯σ
+s are closely related to the FCN and FICN frequencies ω
+f cn and ω
+f icn, respectively. Hence,
+just as a large mantle obliquity can result from resonant amplification when the forcing frequency
+approaches the free precession frequency, a large mantle obliquity can likewise result from resonant
+ amplification when the forcing frequency approaches the FCN or FICN frequencies. These
+frequencies depend on the interior density structure and are not known. However, we will show
+that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of
+a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not expect
+ an important amplification effect. Furthermore, since ω
+f cn, ω
+f icn  Ω
+p, then ¯σ
+f  Ω
+p
+and ¯σ
+s  Ω
+p, and we are in the strong coupling limit. The mantle obliquity should be close
+to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜m
+f and
+˜n
+s should be of the order of ˜ε
+m or smaller. This further justifies the assumption of small angles
+ that we have adopted.
+3 Results
+3.1 Geodetic constraints and interior density structure
+All our interior models are constrained to match the mass M of Mercury and specific choices
+of ˆ
+C = C/M R2
+ and C
+m/C . The choice of ˆ
+C is determined from Equation (28). For the parameters
+ listed in Table 1, and an observed obliquity of ε
+m = 2.04 arcmin [Margot et al., 2012],
+this gives ˆ
+C = C/M R2
+ = 0.3455 and all our interior models are consistent with this choice.
+Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are
+–15–
+Confidential manuscript submitted to JGR-Planets
+perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in estimating
+ ˆ
+C from Equation (28), or conversely in predicting ε
+m based on a given choice for ˆ
+C .
+Part of the ob jective of our study is to estimate how large this error is. The ratio C
+m/C is obtained
+ from the amplitude of the 88-day longitudinal mantle libration φ
+o, which is given by
+φ
+o = 6 · f (e
+c)C
+22 M R2
+C C
+C
+m 1
+1 + ζ , (36)
+where
+ f (e
+c) = 1 − 11e2
+c + 959
+48 e4
+c , (37)
+and where ζ is a correction that takes into account the entrainment of the inner core in the libration
+ [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini , 2015]; this correction
+ is small and, to simplify, we neglect it here. Taking the observed libration amplitude
+to be 38.5 arcsec [Margot et al., 2012], ˆ
+C = C/M R2
+ = 0.3455 and C
+22 and e
+c from Table 1,
+this corresponds to a ratio C
+m/C = 0.4269, or equivalently ˆ
+C
+m = C
+m/M R2
+ = 0.1475.
+For all results presented in our study, the crustal density is set at ρ
+c = 2974 kg m−3
+ [Sori ,
+2018]. Our standard choice for the crustal thickness is h = 26 km [Sori , 2018], although in
+section 3.2 we also present some results with other choices of h. We have considered two possible
+ prescriptions connected to the density of the inner core. First, for all the results presented
+in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ
+s = 8800 kg m−3
+ approximately
+ that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure
+Fe composition in face-centered cubic phase. This captures an end-member scenario where the
+core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively
+free of S on the Fe-rich side of the eutectic [Li et al., 2001]. If the core composition is instead
+an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g.
+Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the
+ICB is expected to be small, although since density increases with depth, the contrast between
+the mean densities of the fluid and solid cores is larger. It is these mean densities that enter
+our Mercury model with uniform density layers. To capture this other end-member core composition
+ scenario, in section 3.5 we present results where we instead prescribe a fixed density
+contrast between the fluid and solid core; specifically, we set the numerical value of α
+3.
+For a given choice of inner core radius r
+s, the densities of the mantle (ρ
+m) and fluid core
+(ρ
+f ) and the radius of the CMB (r
+f ) are determined such that the interior model matches M ,
+ˆ
+C = 0.3455 and ˆ
+C
+m = 0.1475. Figure 3a shows how ρ
+m, ρ
+f and r
+f vary as a function of inner
+ core radius r
+s for each of the two inner core density scenarios: a fixed ρ
+s, or a fixed α
+3. When
+the inner core is small, its presence has a limited influence on the resulting density structure,
+and we find ρ
+m = 3197 kg m−3
+, ρ
+f = 7263 kg m−3
+ and r
+f = 2000 km in each of the two
+scenarios. When ρ
+s is fixed to 8800 kg m−3
+, as the inner core reaches 1500 km in size, r
+f increases
+ to above 2100 km, ρ
+m approaches 4000 kg m−3
+ and ρ
+f is reduced to below 5000 kg m−3
+.
+Figure 3a illustrates that when adopting a fixed ρ
+s, there is a limit in the possible inner core
+size, as otherwise ρ
+m gets unreasonably large and ρ
+f gets inappropriately small (as it would
+require an excessively large concentration of light elements). When adopting instead a fixed density
+ contrast, with α
+3 = 0.1, the changes in r
+f , ρ
+m and ρ
+f with inner core radius are more modest,
+ allowing larger possible inner core sizes. Different assumptions on ρ
+c and h would alter the
+numerical values shown on Figure 3a but not their trends with r
+s.
+Figure 3b shows how the FCN and FICN periods vary with r
+s for each of the two inner
+core density scenarios and in the absence of viscous and EM coupling (i.e. K
+cmb = K
+icb =
+–16–
+Confidential manuscript submitted to JGR-Planets
+0200400600800100012001400
+period (yr)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km)300040005000600070008000
+density (kg/m3 )
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km) 200020202040206020802100
+ Fluid core radius (km)fluid core density
+ CMB radius
+ FICNFCN
+int
+mantle densitya  b
+ FCN
+Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand
+side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN
+period when the external torque is set to zero (FCN
+int ) is shown in orange. Solid lines correspond to
+a scenario where the density of the inner core is set to 8800 kg m−3
+; thin dashed lines correspond to a
+scenario where the density contrast between the fluid and solid cores is set to α
+3 = 0.1.
+0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small inner
+ core, increasing to approximately 600 yr at the largest r
+s. The FICN period is shorter, close
+to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the
+largest r
+s under the fixed ρ
+s (fixed α
+3) scenario. This confirms that the FCN and FICN periods
+ are both much shorter than the forcing precession period of 325 kyr and sufficiently far away
+from it that we do not expect large ˜m
+f and ˜n
+s from resonant amplification.
+The FCN and FICN periods that we have computed include the influence of the external
+ torque. As shown by Baland et al. [2019], the external torque allow solid regions to have
+a free motion in inertial space thereby affecting the free rotational modes. To a good approximation,
+ the FCN and FICN frequencies (as seen in an inertial frame) for K
+cmb = K
+icb = 0
+are given by
+ ω
+f cn ≈ −Ω
+o
+ ¯
+A
+¯
+A
+m + ¯
+A
+s
+
+e
+f + φ
+m
+ + Ω
+o e
+f φ
+m
+(e
+f + φ
+m) , (38a)
+ω
+f icn ≈ Ω
+o
+ ¯
+A + ¯
+A
+s
+¯
+A − ¯
+A
+s
+
+e
+sα
+1 − e
+sα
+3α
+g − α
+3φ
+s
+ . (38b)
+The expression of the FICN frequency involves the inertial torque (term e
+sα
+1) and the gravitational
+ torque from the rest of Mercury (e
+sα
+3α
+g ) and the Sun (α
+3φ
+s) acting on the inner core.
+For both of our inner core density scenarios (and our choices of ρ
+s = 8800 kg m−3
+ and α
+3 =
+0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α
+3α
+g  α
+1;
+the gravitational torque dominates the inertial torque, in large part because of the slow rotation
+ rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion
+is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek , 2016; Stys and
+Dumberry , 2018], but it is different for Earth, where α
+1 > α
+3α
+g because of its faster rotation
+and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres–17–
+
+Confidential manuscript submitted to JGR-Planets
+sion for the FICN differs by a factor ( ¯
+A + ¯
+A
+s)/( ¯
+A − ¯
+A
+s) compared to that given in Dumberry
+and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon.
+The expression for FCN frequency differs from the usual expression for Earth. First, it
+involves the external torque from the Sun captured by the parameter φ
+m. If we set φ
+m = 0,
+we obtain the FCN frequency for a decoupled model in which only interior torques contribute,
+ω
+f cn,int ≈ −Ω
+o
+ ¯
+A
+¯
+A
+m + ¯
+A
+s
+ e
+f . (38c)
+This frequency is slightly different from the usual expression for Earth, involving the ratio ¯
+A/( ¯
+A
+m+
+¯
+A
+s) rather than ¯
+A/ ¯
+A
+m. This is because of the relatively thin mantle of Mercury; for the largest
+r
+s considered, the moment of inertia of the inner core can get close to 40% of that of the mantle
+ and is not negligible. The period of the FCN when only interior torques contribute is shown
+in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr
+at the largest r
+s. Hence, the influence of the solar torque reduces the FCN period by a factor
+of approximately 3. We note that the FICN period, in contrast, is not altered substantially when
+the external torque is set to zero.
+3.2 Gravitational and inertial coupling
+Let us now investigate the obliquities of the mantle, fluid core and inner core in their equilibrium
+ Cassini state. We assume a fixed inner core density scenario in this section, with ρ
+s =
+8800 kg m−3
+. Viscous and EM coupling are set to zero in order to isolate the influence of gravitational
+ and inertial coupling. Figure 4 shows how ˜ε
+m, ˜m
+f and ˜n
+s vary as functions of inner
+core radius. We show calculations for three different choices of crustal thickness, but let us concentrate
+ first on the case for h = 26 km. For small r
+s, we retrieve an obliquity of ˜ε
+m = 2.0494
+arcmin (Figure 4a). ˜ε
+m decreases with r
+s, but not substantially; at the largest r
+s (1500 km),
+˜ε
+m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ε
+m = 2.04
+arcmin, the obliquity that we used in setting the constraint for ˆ
+C – and hence the prediction
+we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which
+occurs for small inner cores.
+The deviation of ˜ε
+m from that of a rigid planet is due to the misalignments of the fluid
+core ( ˜m
+f ) and solid inner core ( ˜n
+s) with respect to the mantle (Figure 4b). The misalignment
+of the fluid core spin axis from the mantle is significant: ˜m
+f is approximately 4.02 arcmin for
+a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin
+at the largest r
+s. Recall that ˜m
+f is measured with respect to the mantle rotation axis (which
+coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with
+respect to the orbit normal is ˜ε
+m+ ˜m
+f ≈ 6 arcmin. The reason why the obliquity of the spin
+axis of the fluid core is larger than that of the mantle can be understood from Equation (32b),
+which shows that ˜m
+f is determined by the resonant amplification of the FCN mode at the forcing
+ frequency. When the FCN frequency is much larger than the forcing frequency, as is the
+case for Mercury, the resonant amplification is very weak but remains present and ˜m
+f is larger
+than zero.
+In contrast to ˜m
+f , the misalignment of the inner core with respect to the mantle is much
+smaller; ˜n
+s is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ε
+m.
+Physically, this is because the gravitational torque acting on the inner core when it is tilted from
+the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner
+core must remain in close alignment with the mantle. Presented differently, since the FICN period
+ is more than 3000 times shorter than the forced precession period, the inner core can eas–18–
+
+Confidential manuscript submitted to JGR-Planets
+2.0382.0402.0422.0442.0462.0482.050
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km) 1.52.02.53.03.54.04.5
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km)crustal thickness
+16 km
+36 km26 km crustal thickness
+16 km
+36 km26 kmε
+m
+ε
+g
+ for a rigid planet
+ε
+m  m
+f
+n
+s (x100)a  b
+Figure 4. a) Obliquity of the mantle ( ˜ε
+m, solid lines) and of the principal moment of inertia ( ˜ε
+g ,
+dashed line) b) ˜m
+f (solid lines) and ˜n
+s (dashed lines, x100) as a function of inner core radius and for
+different choices of crustal thickness.
+ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜n
+s does
+not change substantially as the inner core increases in size.
+When K
+icb = K
+cmb = 0, a good approximation of ˜ε
+m is given by
+˜ε
+m = C
+Ω
+p sin I
+−C
+Ω
+p cos I + ¯
+AΩ
+oφ
+m , (39)
+which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced
+by C
+. The latter represents an effective moment of inertia that accounts for the coupling of
+the core to the mantle,
+ C
+ = C + ¯
+A
+cχ , (40)
+where ¯
+A
+c = ¯
+A
+f + ¯
+A
+s and
+χ = Ω
+p cos I
+¯
+A
+c
+ ¯
+A
+f
+( ¯σ
+f − Ω
+p cos I ) + ¯
+A
+s
+( ¯σ
+s − Ω
+p cos I )
+ − ¯
+A
+s
+¯
+A
+c Ω
+oα
+3φ
+s
+( ¯σ
+s − Ω
+p cos I ) . (41)
+The frequencies ¯σ
+f and ¯σ
+s are given in Equations (33d-33e) and closely approximate the FCN
+and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then
+how the core is entrained to precess with the mantle, with the coupling between the two expressed
+ in terms of the resonant amplification of the FCN and FICN frequencies. In the limit
+of ¯σ
+f , ¯σ
+s → 0, then χ = −1, C
+ = C
+m, the core is fully decoupled from the mantle and we
+retrieve Equation (35). If instead ¯σ
+f , ¯σ
+s → ∞, then χ = 0, C
+ = C and we retrieve the prediction
+ for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω
+p,
+as is the case here, resonant amplification is weak, χ is small and positive, C
+ > C and this
+leads to a slightly larger ˜ε
+m compared to a rigid planet. Because the inner core core is gravitationally
+ locked to the mantle, deviations from a rigid planet are dominantly caused by the
+misalignment of the fluid core. In Equation (41), ¯σ
+s  ¯σ
+f , so to a good approximation
+–19–
+Confidential manuscript submitted to JGR-Planets
+χ ≈ ¯
+A
+f
+¯
+A
+c Ω
+o cos I
+( ¯σ
+f − Ω
+p cos I ) . (42)
+For a small inner core, χ ≈ 7.55×10−3
+. As the inner core grows, ¯
+A
+f decreases, and the combination
+ ¯
+A
+cχ also decreases. This implies that C
+ decreases with inner core size and, consequently,
+˜ε
+m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the
+prediction for a rigid planet.
+The specific predictions of ˜ε
+m, ˜m
+f and ˜n
+s on Figure 4 depend sensitively on the assumed
+interior density model and on the dynamical ellipticities of the inner core (e
+s) and fluid core
+(e
+f ). Hence, it depends on the choices we have made for the inner core density ρ
+s, the crustal
+density ρ
+c and its thickness h. Changing ρ
+s, ρ
+c and/or h requires a different combination of ρ
+f ,
+ρ
+m and r
+f in order to match M , ˆ
+C and ˆ
+C
+m. In turn, this leads to different ellipticities at interior
+ boundary in order to match J
+2 and C
+22, and thus different predictions for ˜ε
+m, ˜m
+f and
+˜n
+s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal
+thicknesses changed to h = 16 and 36 km. The change in ˜ε
+m remains modest, ∼ 0.025%, but
+the changes in ˜m
+f and ˜n
+s are more substantial, ∼ 5% and ∼ 10%, respectively.
+We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment
+of inertia of the whole planet, which we denote by ˜ε
+g . A difference between ˜ε
+g and ˜ε
+m occurs
+if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core
+(with ˜n
+s assumed small) leads to an off-diagonal component of the moment of inertia tensor
+of (C
+s− ¯
+A
+s)α
+3 ˜n
+s = ¯
+A
+se
+sα
+3 ˜n
+s. The angle by which the mantle frame must be rotated so that
+the moment of inertia of the whole planet is purely diagonal is ( ¯
+A
+se
+sα
+3 ˜n
+s)/( ¯
+Ae), and hence a
+good approximation of ˜ε
+g is
+ ˜ε
+g = ˜ε
+m + ¯
+A
+se
+s
+¯
+Ae α
+3 ˜n
+s . (43)
+Since the inner core is gravitationally forced into a close alignment with the mantle, the difference
+ between ˜ε
+g and ˜ε
+m remains very small. For the largest inner core radius that we have
+considered, ˜ε
+g differs from ˜ε
+m only by approximately 0.001 arcmin.
+3.3 Viscous coupling
+We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini
+state. Peale et al. [2014] present two different parameterizations of viscous coupling based on
+the timescale of attenuation of the differential rotation between the fluid core and mantle. More
+complete analytical solutions for the flow resulting from a differentially precessing shell have
+been derived [e.g. Stewartson and Roberts , 1963; Busse , 1968; Rochester , 1976] and we exploit
+these solutions here. The parametrization of the viscous coupling constants K
+cmb and K
+icb based
+on them are given in Mathews and Guo [2005],
+K
+cmb = πρ
+f r4
+f
+¯
+A
+f
+ ν
+2Ω
+o
+0.195 − 1.976i
+ , (44a)
+K
+icb = πρ
+f r4
+s
+¯
+A
+s
+ ν
+2Ω
+o
+0.195 − 1.976i
+ , (44b)
+where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary interior
+ is not well known but based on theoretical and experimental studies it is expected to be
+of the order of 10−6
+ m2
+ s−1
+ [e.g. Gans , 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al.,
+2002a,b].
+ –20–
+Confidential manuscript submitted to JGR-Planets
+The above parameterizations are valid only under the assumption that the flow in the boundary
+ layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds
+number Re = r
+f ∆u
+f /ν , associated with the differential velocity ∆u
+f = r
+f Ω
+o ˜m
+f at the CMB.
+For r
+f = 2000 km, and taking ˜m
+f = 4 arcmin ≈ 0.001 rad from the results in the previous
+section, we get ∆u
+f ∼ 2 mm/s and Re ∼ 6 × 109
+. Such a large Reynolds number indicates
+that the viscous friction between the fluid core and mantle should induce turbulent flows, as
+is the case for the Cassini state of the Moon [Yoder , 1981; Wil liams et al., 2001; C´ebron et al.,
+2019]. For a boundary layer that involves turbulent flows, the viscous torque should be independent
+ of the fluid viscosity and proportional to the square of the differential velocity. The
+coupling constant K
+cmb should be in the form
+K
+cmb = f
+cmb
+
+ ˜m
+f
+
+0.195 − 1.976i
+ , (45)
+where f
+cmb is a numerical factor that depends among other things on surface roughness. Incorporating
+ a viscous coupling of this form in our rotational model is more challenging not only
+because f
+cmb is not known but also because the viscous torque is no longer linear in ˜m
+f . One
+strategy is to find solutions through an iterative process. The simpler alternative strategy that
+we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν
+represents an effective turbulent viscosity.
+To give an estimate of an appropriate turbulent value for ν , we turn to the Cassini state
+of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained
+by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR)
+[Wil liams et al., 2001, 2014; Wil liams and Boggs , 2015]. Viscous dissipation is reported in terms
+of a coupling parameter K and a recent estimate is K/C
+L = (1.41±0.34)×10−8
+ day−1
+ [Wil liams
+and Boggs , 2015], where C
+L is the lunar polar moment of inertia. The connection between K
+and K
+cmb is
+
+
+I m[K
+cmb]
+
+ = K
+C
+L C
+L
+C
+f L 1
+Ω
+L , (46)
+where C
+f L is the moment of inertia of the lunar core and Ω
+L = 2.66 × 10−6
+ s−1
+ the lunar
+rotation rate. With C
+f L/C
+L ∼ 7 × 10−4
+ [e.g. Wil liams et al., 2014], this gives |I m[K
+cmb]| ∼
+9×10−5
+. In order to match this amplitude in Equation (44a), with lunar parameters and assuming
+ a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4
+ m2
+s−1
+, about 500 times larger than the laminar viscosity. Note that the differential velocity at the
+CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Wil liams et al., 2001], more than 10 times
+larger than our estimate for Mercury above. Since the effective turbulent coupling constant K
+cmb
+is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mercury
+ should be smaller. Thus, ν ≈ 5×10−4
+ m2
+ s−1
+ gives a conservative upper bound for the
+possible effective turbulent viscosity that can be expected for Mercury.
+Figure 5 shows how ˜ε
+m, ˜m
+f and ˜n
+s vary as functions of inner core radius for different choices
+of effective viscosities. For ν = 10−5
+ m2
+ s−1
+, viscous coupling is too weak to affect ˜ε
+m and
+˜m
+f and they are essentially unchanged from the solutions shown in Figure 4. With increasing
+ν , the stronger viscous coupling between the core and the mantle reduces their differential velocity,
+ and ˜m
+f is reduced. With the reduced differential velocity at the CMB, the prediction
+of ˜ε
+m gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
+viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ε
+m
+and ˜m
+f are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
+fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent viscosity
+ that we have identified above (i.e ν ≈ 5 × 10−4
+ m2
+ s−1
+), the influence of viscous cou–21–
+
+Confidential manuscript submitted to JGR-Planets
+ε
+mε
+g
+ m
+f
+n
+s
+2.0382.0402.0422.0442.0462.0482.050
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km)kinematic viscosity:
+ 0.01 m2
+ s-1
+ 0.00001 m2
+ s-1
+0.0001 m2
+ s-1
+0.0005 m2
+ s-1
+0.001  m2
+ s-1
+a  b
+ for a rigid planet
+ε
+m
+Figure 5. a) Obliquity of the mantle ( ˜ε
+m, solid lines) and gravity field ( ˜ε
+g , dashed lines) b) ˜m
+f
+(solid lines) and ˜n
+s (dashed lines) as a function of inner core radius and for different choices of kinematic
+viscosity (color in legend).
+pling on ˜ε
+m remains modest, reducing its amplitude by a maximum of approximately 0.0015
+arcmin.
+The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core
+tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the inner
+ core with the fluid core spin axis. The viscous coupling strength is inversely proportional
+to r
+s, so a larger viscosity results in a larger inner core radius at which viscous coupling is of
+a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4
+m2
+ s−1
+, Figure 5 indicates that ˜n
+s may be 1 arcmin or larger only if the inner core radius is
+smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravitational
+ coupling is much larger than viscous coupling, and the inner core tilt is limited to a
+fraction of 1 arcmin.
+The larger inner core tilt observed with increasing effective viscosity results in a larger
+offset between the obliquity of the principal moment of inertia ˜ε
+g and that of the mantle ˜ε
+m,
+though it remains limited. For the upper bound of ν = 5 × 10−4
+ m2
+ s−1
+, and for r
+s = 1500
+km, the difference between ˜ε
+g and ˜ε
+m is limited to 0.0013 arcmin.
+The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller
+the misalignments of both the fluid core and inner core are with respect to the mantle. This
+implies that the larger the inner core is, the more we approach a planet precessing as a rigid
+body, although the misalignment of the spin axis of the fluid core remains important, approximately
+ 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ε
+m, ˜m
+f
+and ˜n
+s change with inner core size would certainly be different for a turbulent model of viscous
+coupling. But the general conclusion remains that the addition of viscous coupling at the CMB
+and ICB does not significantly modify the Cassini state equilibrium angle of the mantle.
+–22–
+Confidential manuscript submitted to JGR-Planets
+3.4 Electromagnetic coupling
+Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium
+Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to remain
+ attached to electrically conducting materials, a differential tangential motion between two
+electrically conducting regions stretches existing magnetic field lines that thread their interface.
+This induces a secondary magnetic field (or equivalently, an electrical current) and an associated
+ tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB
+acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength
+of the radial magnetic field B
+r and the electrical conductivity σ on either side of the boundary
+ [Rochester , 1960, 1962, 1968].
+The parametrization of EM coupling in terms of the coupling constants K
+cmb and K
+icb
+has been developed in a few studies [e.g. Buffett , 1992; Buffett et al., 2002; Dumberry and Koot ,
+2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given
+by B
+r = √
+3
+B d
+r
+ cos θ, where
+B d
+r
+ is the r.m.s. strength of the field, the coupling constant
+K
+cmb can be written is the form
+ K
+cmb = 3(1 − i)F
+cmb
+Bd
+r
+2
+ , (47)
+where
+ F
+cmb = 1
+Ω
+oρ
+f r
+f
+ 1
+σ
+mδ
+m + 1
+σ
+f δ
+f
+−1
+ , (48)
+and where σ
+m, δ
+m =
+2/(σ
+mµΩ
+o) and σ
+f , δ
+f =
+2/(σ
+f µΩ
+o) are the electrical conductivities
+ and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π × 10−7
+N A−2
+ the magnetic permeability of free space. The r.m.s. field strength
+Bd
+r
+ is connected to
+the Gauss coefficient g0
+1 of the surface magnetic field by
+
+Bd
+r
+ = 2
+√
+3
+ R
+r
+f
+3
+
+
+g0
+1
+
+ . (49)
+We can readily build an estimate of the amplitude of K
+cmb. The electrical conductivity
+of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding
+to the CMB of Mercury is in the range of σ
+m ∼ 0.01 − 1 S m−1
+ [Constable , 2015]. In contrast,
+ the electrical conductivity of Fe in planetary cores is expected to be close σ
+f ∼ 106
+ S
+m−1
+ [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ
+mδ
+m)−1
+  (σ
+f δ
+f )−1
+. Taking
+ σ
+m = 1 S m−1
+,
+
+g0
+1
+
+ = 190 nT for Mercury’s dipole field [Anderson et al., 2012], r
+f =
+2000 km, ρ
+f = 7000 kg m−3
+, this gives K
+cmb ≈ (3.1 × 10−11
+) · (1 − i). To put this amplitude
+in perspective, taking a molecular viscosity of ν = 10−6
+ m2
+ s−1
+ in Equation (44a) gives a viscous
+ coupling constant of K
+cmb ≈ (6.0 × 10−7
+) · (0.195 − 1.976i). Hence, EM coupling at the
+CMB is much weaker than viscous coupling, even if we include other spherical harmonic components
+ of the radial magnetic field.
+EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by
+CMB cavities [Buffett , 2010; Glane and Buffett , 2018], in which case the effective σ
+m could be
+closer to σ
+f . Likewise, σ
+m can be increased if a more electrically conducting layer has formed
+at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction
+of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even
+in the extreme case of σ
+m = σ
+f = 106
+ S m−1
+, K
+cmb ≈ (1.6 × 10−8
+) · (1 − i), which remains
+–23–
+Confidential manuscript submitted to JGR-Planets
+smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces
+dominate the tangential stress on the CMB of Mercury.
+At the ICB, because we can expect the electrical conductivity in both the solid inner core
+and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM
+coupling can be much larger and dominate viscous coupling. We assume that the magnetic field
+morphology at the ICB is dominantly comprised of small spatial scales for example as predicted
+by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in
+terms of an equivalent uniform radial magnetic field B
+r  capturing its r.m.s. strength [Buffett
+ et al., 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the
+fluid and solid core, the coupling constant K
+icb can be written in the form
+K
+icb = 5
+4 (1 − i)F
+icb B
+r 2
+ , (50)
+where
+ F
+icb = σδ
+Ω
+oρ
+sr
+s , (51)
+and where δ =
+2/(σµΩ
+o) is the magnetic skin depth. As F
+icb is inversely proportional to
+r
+s, K
+icb is inversely proportional to inner core size. Note that computing the EM coupling based
+on the r.m.s. strength B
+r  rather than a true field morphology tends to overestimate the strength
+of the coupling [Koot and Dumberry , 2013]. However, since the strength of the radial magnetic
+field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are
+absorbed in the range of possible B
+r  values.
+The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al.,
+2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected.
+When B
+r  is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
+field’ regime [Buffett et al., 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which
+K
+icb increases linearly with B
+r  instead of quadratically. A good approximation of K
+icb calculated
+ for Earth can be extracted from Figure 6a of Dumberry and Koot [2012],
+K E
+icb = (0.175 − i0.138) B
+r  , (52)
+where B
+r  is in units of Tesla. The superscript E emphasizes that the numerical factors are
+appropriate for the parameter values adopted for Earth in the computation of Dumberry and
+Koot [2012]. To adapt these numerical factors to Mercury, we write,
+K
+icb = (0.175 − i0.138) F
+icb
+F E
+icb B
+r  , (53)
+where F E
+icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumberry
+ and Koot [2012]. These are Ω
+o = 7.292 × 10−5
+ s−1
+, ρ
+s = 12846 kg m−3
+, r
+s = 1221.5
+km, σ = 5 × 105
+ S m−1
+, which gives F E
+icb = 90.36 T−2
+.
+To compute F
+icb, we assume an electrical conductivity of σ = 106
+ S m−1
+ in the core of
+Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and
+strong field regime occurs when B
+r  ≈ 1.53 mT for the real part of K
+icb. B
+r  at the ICB
+of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geometry
+ inside the core could be dominated by small length scales, yet only the weaker lower harmonics
+ of the field would penetrate through a thermally stratified layer in the upper region of
+–24–
+Confidential manuscript submitted to JGR-Planets
+the fluid core and reach the surface. If so, the field strength inside the core can exceed the surface
+ field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g Anderson
+ et al., 2012], B
+r  at the ICB could be as large as 0.3 mT, corresponding to approximately
+ 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mercury’s
+ field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of
+Mercury remains in the weak field regime.
+Figure 6 shows how ˜ε
+m, ˜m
+f and ˜n
+s vary as functions of inner core radius for different choices
+of B
+r . The larger B
+r  is, the stronger is the EM coupling at the ICB, and the smaller is the
+differential rotation between the fluid core and inner core. The inner core and fluid core are virtually
+ locked into a common precession motion when B
+r  > 0.3 mT. Further increasing B
+r
+above 1 mT does not change the solution as EM coupling already dominates all other torques
+on the inner core. This is the case even when EM coupling transitions into the strong field regime.
+EM coupling at the CMB is included in these calculations, with σ
+m = 1 S m−1
+ and
+
+g0
+1
+
+ =
+190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core
+we retrieved the solutions of ˜ε
+m and ˜m
+f shown in Figure 4.
+As the inner core radius is increased, both ˜ε
+m and ˜m
+f get smaller, as it was the case with
+viscous coupling alone, although the addition of EM coupling lead to more substantial changes.
+The inner core needs to be larger than approximately 500 km for changes in the Cassini state
+equilibrium to be noticeable. It is important to point out that ˜m
+f is reduced not because of
+EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which
+pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the
+inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the
+greater is the reduction in ˜ε
+m and ˜m
+f .
+When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are
+locked into a common precession motion, a good approximation of ˜ε
+m is given by the same prediction
+ as Equations (39-40) involving the effective moment of inertia C
+, except χ is now given
+by
+ χ = ¯
+A
+cΩ
+p cos I − ¯
+A
+sΩ
+oα
+3φ
+s
+¯
+A
+f Ω
+o(e
+f + K
+cmb) + ¯
+A
+sΩ
+oe
+sα
+3α
+g − ¯
+A
+cΩ
+p cos I . (54)
+For a small inner core, ¯
+A
+cΩ
+p cos I > ¯
+A
+sΩ
+oα
+3φ
+s and χ is positive. Because ¯
+A
+sΩ
+oα
+3φ
+s increases
+with inner core size, χ gets smaller, and so do C
+ and ˜ε
+m. The mantle obliquity drops from 2.049
+arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015
+arcmin. For an inner core larger than ≈ 1000 km, ¯
+A
+cΩ
+p cos I < ¯
+A
+sΩ
+oα
+3φ
+s, so χ becomes negative,
+ C
+ becomes smaller than the moment of inertia of a rigid Mercury C , and ˜ε
+m becomes
+smaller than the prediction based on a rigid planet.
+The larger the inner core is, the smaller are the misalignments of the fluid and solid cores
+with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone
+is not altered with the addition of EM coupling but further strengthened; the larger the inner
+core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the
+obliquity of the gravity field ˜ε
+g which, for a large inner core, asymptotically approaches the obliquity
+ expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset between
+ ˜ε
+m and ˜ε
+g can be as large as 0.008 arcmin for a large inner core.
+3.5 Fixed inner core density versus fixed ICB density contrast
+Coupling models when viscous and EM stresses are both present have been presented in
+Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results,
+–25–
+Confidential manuscript submitted to JGR-Planets
+2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km)B
+r at ICB:
+ 1 mT 0.01 mT0.03 mT0.1 mT0.3  mT
+ε
+m
+ε
+g
+ m
+f
+n
+sa  b
+ for a rigid planet
+ε
+m
+Figure 6. a) Obliquity of the mantle ( ˜ε
+m, solid lines) and gravity field ( ˜ε
+g , dashed lines) b) ˜m
+f
+(solid lines) and ˜n
+s (dashed lines) as a function of inner core radius and for different choices of B
+r
+(colour in legend).
+for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by
+viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we consider
+ a model where K
+cmb is purely from viscous coupling and K
+icb purely from EM coupling.
+We choose an effective viscosity at the CMB of ν = 10−4
+ m2
+ s−1
+, which we believe to be a
+representative value given the comparison with the Moon (see section 3.3). We take a radial
+field strength at the ICB of B
+r  = 0.3 mT, approximately the field strength expected under
+the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representative’
+ coupling model, although the uncertainty on ν and B
+r  obviously remains high.
+Figure 7 shows how ˜ε
+m, ˜m
+f and ˜n
+s vary with inner core radius for the ’representative’
+coupling model (black lines) under the fixed inner core density scenario that we have used in
+sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same representative
+ coupling model, we adopt instead a fixed density contrast between the fluid and solid
+cores and for different choices of α
+3 (coloured lines). For a relatively high density contrast (α
+3 =
+0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller
+α
+3, the point at which the orientation of the co-precessing fluid and inner cores begins to be
+pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the
+general behaviour of ˜ε
+m, ˜m
+f and ˜n
+s as functions of inner core radius is unchanged. Hence, all
+our results in the previous three sections would be qualitatively similar under a fixed density
+contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core
+is required in order to produce an equivalent change in the Cassini state equilibrium.
+4 Discussion
+The study of Peale et al. [2016] also presented predictions of the obliquities of the mantle,
+ fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their
+model included the tangential viscous stress at the ICB and CMB, but not the EM stress. Their
+Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as
+–26–
+Confidential manuscript submitted to JGR-Planets
+2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5
+Obliquity angle (arcmin)
+ 0 200 400 600 800 1000 1200 1400
+Inner core radius (km) for a rigid planet
+ε
+ma  bα
+3:
+ 0.20 0.010.05 0.100.15ρ
+s = 8800 kg m-3
+ m
+f
+n
+sε
+m
+ε
+g
+Figure 7. a) Obliquity of the mantle ( ˜ε
+m, solid lines) and gravity field ( ˜ε
+g , dashed lines) b) ˜m
+f
+(solid lines) and ˜n
+s (dashed lines) as a function of inner core radius, for a fixed inner core density of
+8800 kg m−3
+ (black lines) and for different choices of α
+3 (coloured lines).
+i
+m, i
+f and i
+s; these represent the obliquities with respect to the orbital plane and are connected
+to our variables by: i
+m = ˜ε
+m, i
+f = ˜ε
+m + ˜m + ˜m
+f ≈ ˜ε
+m + ˜m
+f and i
+s = ˜ε
+m + ˜n
+s. To summarize
+their results, i
+f and i
+s vary substantially for different inner core sizes, are always of comparable
+ amplitude, and i
+s is always larger than i
+f . Furthermore, they find that as the inner core
+size is increased, the mantle obliquity i
+m gets progressively larger and is displaced further away
+from its expected orientation based of a rigid planet (see their Figure 6). The change in i
+m they
+obtain between a case with no inner core and an inner core radius equal to 0.6 times the planetary
+ radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered),
+is approximately an increase of 5 × 10−5
+ rad = 0.17 arcmin. This also corresponds approximately
+ to the deviation of the obliquity with respect to that of a rigid planet.
+When only viscous stress is included in our model (section 3.3), our results are substantially
+ different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core
+gets smaller with inner core size and that the change is very modest. In contrast with the results
+ of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that
+of the fluid core, except when the inner core is very small or when the effective viscosity is unreasonably
+ large. We also find that as the inner core size is increased, the mantle obliquity gets
+smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most
+of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling
+model that we use. But even when we adopt their model parameters and use their viscosity model,
+we were not able to reproduce their results.
+In the absence of viscous and EM coupling, the strong gravitational torque exerted on the
+inner core by the mantle should prevent any large misalignment between the two. This is captured
+ by the period of the FICN, which is of the order of 100 yr, much shorter than the forcing
+ period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational
+torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscousEM
+ torque to the gravitational torque decreases with inner core size, so a large inner core should
+be more strongly aligned with the mantle. The more strongly the inner core and mantle are
+–27–
+Confidential manuscript submitted to JGR-Planets
+gravitationally locked together, the more they behave as a single rigid body in response to the
+external torque from the Sun. We expect then that the obliquity of the mantle should be brought
+closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the results
+ of Peale et al. [2016], which suggest the opposite.
+We showed that EM coupling is most likely larger than viscous coupling at the ICB, even
+though our knowledge of the radial magnetic field strength inside Mercury (on which EM coupling
+ depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM
+coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion.
+The larger the inner core is, the more this co-precessing core is forced into an alignment with
+the mantle because of the mantle gravitational torque on the inner core. As a result, the larger
+the inner core is, the closer we approach a situation resembling a whole planet precessing as
+a rigid body. The addition of EM coupling at the ICB does not change the overall picture that
+we observe with viscous coupling alone; the mantle obliquity decreases with inner core size. The
+amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous coupling
+ alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016],
+and again, importantly, in the reverse direction.
+Our results suggest then that the presence and size of an inner core leads to only modest
+ changes of the mantle obliquity ε
+m compared to the obliquity predicted on the basis of an
+entirely rigid planet (εr
+m). Let us denote this difference as ∆ε
+m = ε
+m−εr
+m. The largest ∆ε
+m
+occurs for a small or no inner core, and is ∆ε
+m ≈ 0.01 arcmin. This difference is decreased
+as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM
+coupling and large density contrast at the ICB, ∆ε
+m can be negative, but its absolute value
+remains smaller than 0.01 arcmin.
+To put these results in perspective, the uncertainty in the measurement of the mantle obliquity
+ reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much
+larger than this difference. This means that, at the current level of precision, it is not possible
+ to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This
+is consistent with the fact that the observed obliquity falls close to that expected from a rigid
+planet. But it also implies that the observed obliquity cannot be used to place constraints on
+the inner core size.
+Nevertheless, our results show that the presence of a fluid core and inner core affect the
+resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change
+in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006
+arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation
+motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which
+is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliquity
+ from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008
+arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the precession
+ of the pericenter, a Cassini state model that includes a fluid and solid core will then be
+necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens
+the possibility of further constraining the interior structure of Mercury on the basis of its obliquity.
+
+ Obliquity measurements based on tracking topographic features reflect the orientation of
+the spin-symmetry axis of the mantle (ε
+m). Measurements based on tracking the gravity field
+of Mercury reflect instead the orientation of the principal moment of the whole planet (ε
+g ). These
+two orientations do not coincide when an inner core is present and is misaligned from the mantle.
+ Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we
+–28–
+Confidential manuscript submitted to JGR-Planets
+find that the misalignment ∆ε
+g = ε
+g − ε
+m is limited. The maximum offset that we obtain
+is approximately ∆ε
+g ≈ 0.007 arcmin. This limited magnitude of offset is important in the
+light of the recent obliquity of the gravity field estimated in Genova et al. [2019], ε
+g = 1.968±
+0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the
+spin-symmetry axis of the mantle: ε
+m = 2.04 ± 0.08 arcmin [Margot et al., 2012] and ε
+m =
+2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent
+with one another within their error estimates. In their interpretation, Genova et al. [2019] suggest
+ that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 arcmin)
+ is perhaps explained by an offset ∆ε
+g due to the presence of a (possibly large) solid inner
+ core. However, this is one order of magnitude larger than the maximum magnitude of ∆ε
+g
+that we predict. Moreover, we predict that the obliquity of the gravity field should be larger
+than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision
+of the measurements, ε
+g and ε
+m should coincide, and their difference cannot be interpreted as
+reflecting the misalignment between the polar moment of inertia of the whole planet and the
+mantle spin axis.
+Lastly, we have concentrated our efforts on the mutual orientations of the different spin
+and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by viscous
+ and EM coupling also lead to a displacement of these axes in the direction perpendicular
+ to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on tracking
+ surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that
+the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin).
+Although this offset is smaller than the measurement errors, so that the observed obliquity is
+still consistent with no deviation away from the Cassini plane, some amount of dissipation invariably
+ takes place. These measurements give then a measure of the possible amplitude of the
+dissipation. One source of dissipation is from anelastic tidal deformation [Baland et al., 2017],
+but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-ofplane
+ component of the observed obliquity may further help to quantify and constrain the interior
+ coupling mechanisms. This will be the sub ject of a future study.
+5 Conclusion
+We have investigated how the presence of a fluid core and solid inner core affects the Cassini
+state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mercury’s
+ interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis
+does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset occurs
+ for a small or no inner core. The larger the inner core is, the more it is forced into an alignment
+ with the mantle because of the strong gravitational torque between the two, and the closer
+we approach a situation resembling a whole planet precessing as a rigid body. The misalignment
+ between the polar moment of inertia and mantle spin axis increases with inner core size,
+but is limited to approximately 0.007 arcmin. These conclusions apply irrespective of the core
+composition and thus of the partitioning of light elements into the solid core; a smaller density
+ contrast at the ICB only implies that a larger inner core is required in order to produce
+an equivalent change in the Cassini state equilibrium.
+Our results imply that the obliquities of the mantle spin axis and polar moment of inertia
+ (or, equivalently, the gravity field) should coincide at the present-day level of measurement
+errors. Moreover, neither of these can be distinguished from the obliquity predicted on the basis
+ of a rigid planet. However, the smaller measurement errors expected from the upcoming BepiColumbo
+ satellite mission may permit this distinction, and thus provide further constraints on
+Mercury’s interior structure.
+ –29–
+Confidential manuscript submitted to JGR-Planets
+Acknowledgments
+Figures were created using the GMT software [Wessel et al., 2013]. The source codes, GMT
+scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work
+was supported by an NSERC/CRSNG Discovery Grant.
+References
+Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core
+conditions, Phys. Rev., B61, 132–142.
+Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin,
+S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global
+magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859–
+1862.
+Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Purucker,
+ J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Lowdegree
+ structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12,
+doi:10.1029/2012JE004159.
+Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mercury:
+ Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159.
+Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin precession
+ and polar motion of a synchronously rotating satellite: application to Titan,
+Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50.
+Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the
+forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597.
+Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from
+observations of nutations, Earth Planet. Sci. Lett., 296, 367–372.
+Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession:
+effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056.
+Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech., 33,
+739–751.
+Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A.
+Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature
+Geosci., 7, 301–307.
+C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells:
+flows, dissipation, dynamo and the lunar core, Geophys. J. Int., 219 (Supplement
+ 1),
+S34–S57, doi:10.1093/gji/ggz037.
+Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature,
+444, 1056–1058.
+Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The
+BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software,
+Month. N. Roy. Astr. Soc., 457, 1507–1521.
+Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896.
+Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second
+Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford.
+
+de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal
+conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc.
+Nat. Acad. Sci., 109, 4070–4073.
+ –30–
+Confidential manuscript submitted to JGR-Planets
+de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price
+(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature,
+392, 805–807.
+Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics,
+vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford.
+Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary,
+Geophys. J. Int., 167, 557–566.
+Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical
+resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37,
+doi:10.1029/2012GL054347.
+Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid inner
+ core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL
+Dataverse, V2.
+Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nutations,
+ Geophys. J. Int., 191, 530–544.
+Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization
+regime, Icarus, 248, 254–268.
+Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner
+core, J. Geophys. Res. Planets, 121, 1264–1292.
+Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mercury’s
+ core density structure on its longitudinal librations, Icarus, 225, 62–74.
+Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366.
+Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang,
+T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019),
+Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46,
+doi:10.1029/2018GL081135.
+Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at
+the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171.
+Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global
+contraction of Mercury, Earth Planet. Sci. Lett., 307, 135–146.
+Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine,
+E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T.
+Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118,
+doi:10.1002/jgre.20091.
+Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H.
+Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon
+(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys.
+Res., 117, E00L14, doi:10.1029/2012JE004217.
+Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orientation,
+ love number, and ephemeris from the MESSENGER radiometric tracking data,
+Icarus, 335, 113,386.
+Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the
+electromagnetic coupling for nutations, Geophys. J. Int., 195, 200–210.
+Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth
+Planet. Sci. Lett., 193, 509–514.
+Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longitude
+ libration of Mercury reveals a molten core, Science, 316, 710–714.
+Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens,
+M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s
+–31–
+Confidential manuscript submitted to JGR-Planets
+moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09,
+doi:10.1029/2012JE004161.
+Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s
+internal structure, in Mercury: The View after MESSENGER, edited by S. Solomon,
+L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi:
+10.1017/9781316650684.005.
+Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation
+theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915.
+Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of
+the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242.
+Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and precession:
+ New nutation series for nonrigid Earth and insights into the Earth’s interior, J.
+Geophys. Res., 107, doi:10.1029/2004JB000390.
+Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber,
+D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of
+Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res.
+Planets, 119, 2417–2436.
+Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon
+and the phase lead of its Cassini state, Journal of Geophysical Research Planets, 125,
+e2020JE006386.
+Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489.
+Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744.
+Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766.
+Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18.
+Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic invariance,
+ Icarus, 181, 338–347.
+Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle
+and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220.
+Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a
+solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455.
+Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of
+Mercury, Geophys. Res. Lett., 42, 6951–6958.
+Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bul l. Astron. Ser. 1, 27,
+321–356.
+Pozzo, M., C. Davies, D. Gubbins, and D. Alf´e (2012), Thermal and electrical conductivity
+of iron at Earth’s core conditions, Nature, 485, 355–358.
+Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s
+rotation, Phil. Trans. R. Soc. Lond., A, 252, 531–555.
+Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833–
+4836.
+Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic coremantle
+ coupling, J. Geomag. Geoelectr., 20, 387–402.
+Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle
+coupling, Geophys. J. R. Astron. Soc., 46, 109–126.
+Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), Towards
+ evaluating the viscosity of the Earth’s outer core: an experimental high pressure
+study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1.
+Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang
+(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B, 66, 060,102,
+–32–
+Confidential manuscript submitted to JGR-Planets
+doi:10.1029/2001GL014392.
+Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metalsilicate
+ partitioning and its role in core formation and composition on Super-Earths,
+Astrophys. J., 835, 234.
+Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett., 489, 92–99.
+Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann,
+S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observations
+ of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889.
+Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular
+orbital elements, Celest. Mech. Dyn. Astr., 123, 263–277.
+Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity
+of a precessing rigid body, J. Fluid Mech., 17, 1–20.
+Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys.
+Res. Planets, 123, 1–25, doi:10.1029/2018JE005607.
+Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics,
+vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford.
+Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides
+and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett., 333–334,
+83–90.
+Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSENGER
+ radio science data, J. Geophys. Res. Planets, 121, 1627–1640.
+Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping
+Tools: Improved version released, EOS Trans. AGU, 94, 409–410.
+Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of
+dissipation, J. Geophys. Res. Planets, 120 (4), 689–724, doi:10.1002/2014JE004755.
+Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar
+rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933–
+27,968.
+Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine,
+S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A.
+Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek,
+J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J.
+Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J.
+Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559.
+Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond.
+A, 303, 327–338.
+Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181,
+327–337.
+ –33–
\ No newline at end of file
diff --git a/read/results/playa/2201.00069.txt b/read/results/playa/2201.00069.txt
new file mode 100644
index 0000000..c79949c
Binary files /dev/null and b/read/results/playa/2201.00069.txt differ
diff --git a/read/results/playa/2201.00151.txt b/read/results/playa/2201.00151.txt
new file mode 100644
index 0000000..21248c9
--- /dev/null
+++ b/read/results/playa/2201.00151.txt
@@ -0,0 +1,1146 @@
+arXiv:2201.00151v1  [astro-ph.GA]  1 Jan 2022 Astronomy & Astrophysics manuscript no. Populations4  ©ESO 2022
+January 4, 2022
+Multiple stellar populations in Schwarzschild modeling
+and the application to the Fornax dwarf
+Klaudia Kowalczyk and Ewa L. Łokas
+Nicolaus Copernicus Astronomical Center, Polish Academy of Sciences, Bartycka 18, 00-716 Warsaw, Poland
+e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl
+January 4, 2022
+ ABSTRACT
+Dwarf spheroidal (dSph) galaxies are believed to be strongly dark matter dominated and thus are considered perfect objects to study
+dark matter distribution and test theories of structure formation. They possess resolved, multiple stellar populations that offer new
+possibilities for modeling. A promising tool for the dynamical modeling of these objects is the Schwarzschild orbit superposition
+method. In this work we extend our previous implementation of the scheme to include more than one population of stars and a more
+general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galaxy formed in
+the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations
+by metallicity and demonstrate that in spite of larger sampling errors the increased number of constraints leads to significantly tighter
+confidence regions on the recovered density and velocity anisotropy profiles. We then applied the method to the Fornax dSph galaxy
+with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly
+increasing, rather than decreasing, with radius and more strongly constrained. We are also able to infer anisotropy for each stellar
+population separately and find them to be significantly different.
+Key words. galaxies: kinematics and dynamics – galaxies: structure – galaxies: fundamental parameters – galaxies: dwarf – galaxies:
+star clusters: individual: Fornax
+1. Introduction
+Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo
+1998; Tolstoy et al. 2009) are considered to be a perfect tool to
+test our current theories of structure formation involving dark
+matter in the context of near-field cosmology. The objects are
+believed to be strongly dark matter dominated with mass-to-light
+ratios even on the order of a few hundred solar units. Due to their
+proximity they are also the only extragalactic systems where individual
+ stars can be resolved and their velocities measured offering
+ the possibility to create interesting dynamical modeling
+techniques.
+The first estimates of dark matter content in dSph galaxies
+were based on a single measurement of the line-of-sight velocity
+dispersion of the stars and the application of the virial theorem.
+As the samples of the stars with kinematic measurements grew,
+it became possible to estimate the profile of the velocity dispersion
+ and model it using the Jeans equation (Binney & Tremaine
+2008). Since the stars in the galaxy can move on a variety
+of orbits, from circular to radial, the degeneracy between the
+anisotropy of the orbits and the mass distribution is inherent in
+this type of modeling. The reason for this lies in the fact that
+different combinations of these quantities can reproduce the velocity
+ dispersion profile equally well.
+A way to overcome this issue, at least partially, is to resort to
+higher order line-of-sight velocity moments, such as the kurtosis,
+ and use the corresponding Jeans equations. Since the kurtosis
+ is more sensitive to the velocity anisotropy than to the mass
+distribution, useful constraints can be obtained on both. Still, the
+method requires large kinematic samples to estimate the velocity moments reliably and some assumption on the functional form
+of the anisotropy (Łokas 2002; Łokas et al. 2005).
+The Schwarzschild modeling technique (Schwarzschild
+1979) offers a different approach to estimate the properties of
+dSph galaxies without prior assumptions on the type of orbits.
+It relies on building a galaxy model out of a set of best-fitting
+orbits probed in the range of energy and angular momenta. In
+this method, the anisotropy of the stellar orbits comes out as a
+result of the modeling in the same way as the density profile. Although
+ it has been originally developed for large elliptical galaxies
+ (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt et al.
+2015), it has recently been adopted for use on discrete data
+characteristic of dSph galaxies and applied to a number of
+dwarfs, including Carina, Draco, Fornax, Sculptor, and Sextans
+(Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi
+2013; Breddels et al. 2013; Kowalczyk et al. 2019).
+Many dSph galaxies show signs of the presence of multiple
+stellar populations resulting from a few star formation episodes
+(Bellazzini et al. 2001; del Pino et al. 2015; Fabrizio et al. 2016;
+Pace et al. 2020). This observation offers a way to improve the
+modeling methods since, assuming dynamical equilibrium, all
+populations are supposed to be influenced by the same underlying
+ gravitational potential of the galaxy, but they have different
+ distributions so more constraints can be imposed during
+the modeling. This approach was first used by Battaglia et al.
+(2008) to model the mass distribution in the Sculptor dSph
+galaxy. A few attempts have also been made to constrain the
+inner slope of the dark matter profile in dSph galaxies using
+this technique (Walker & Peñarrubia 2011; Amorisco & Evans
+2012; Hayashi et al. 2018) in order to resolve the so-called cuspcore
+ problem. It has been shown to be difficult, however, due
+Article number, page 1 of 12
+A&A proofs: manuscript no. Populations4
+Table 1. Properties of the Illustris galaxy used to create mock data.
+Property Value
+Subhalo ID 16960
+Number of stellar particles (N
+⋆) 70446
+Number of dark matter particles (N
+DM ) 78448
+Stellar mass ( M
+⋆) 5.74 × 1010
+ M
+⊙
+Dark matter mass ( M
+DM ) 4.91 × 1011
+ M
+⊙
+Mean mass of stellar particles 815808 M
+⊙
+Stellar half-mass radius 9.99 kpc
+Stellar half-number radius (r
+1/2) 9.6 kpc
+Axis ratio c/a within r
+1/2 0.907
+Axis ratio b/a within r
+1/2 0.949
+Triaxiality 0.56
+to the nonsphericity of the dwarfs that introduces biases in such
+measurements (Kowalczyk et al. 2013; Genina et al. 2018).
+In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we
+developed the Schwarzschild technique in the form applicable to
+binned velocity moments of a single tracer and verified its ability
+ to reproduce the mass distribution and velocity anisotropy of
+simulated galaxies. We have also studied biases resulting from
+the nonsphericity of the modeled objects. Later, we applied the
+method to model the kinematics of the Fornax dSph galaxy estimating
+ its mass and anisotropy profiles with unprecedented precision.
+
+In this paper we extend our Schwarzschild modeling technique
+ to include multiple stellar populations with the aim to
+constrain the properties of dSph galaxies even more strongly.
+We test our approach on a realistic simulated galaxy formed in
+the cosmological context, originating from the Illustris project
+(Vogelsberger et al. 2014a). Although no precise analogues of
+dSph galaxies are available in this simulation because of the resolution,
+ we use a more massive galaxy but with properties otherwise
+ similar to dSphs. The reliability of the modeling does not
+depend on the particular value of the mass so we believe these
+tests to be viable. We do not attempt to constrain the inner dark
+matter density profile (which is poorly resolved anyway) but try
+to put tighter limits on the estimates of the mass and anisotropy
+profiles. Finally, we apply the improved method to the available
+kinematic data for the distinct stellar populations of the Fornax
+dSph.
+This paper is organized as follows. In Section 2 we present
+the data for the simulated galaxy as well as their splitting into
+stellar populations and mock observations along the main axes.
+Section 3 contains an overview of our modeling method, the application
+ of the method to all stars and to two populations, and
+a comparison of the results obtained with these two approaches.
+The results of the application of the method to the Fornax dSph
+galaxy are presented in Section 4. We discuss our findings and
+summarize the paper in Section 5.
+2. Mock data
+2.1. Selection of the simulated galaxy
+In order to test our modeling method on realistic simulated
+data, we decided to use a galaxy from the Illustris project
+(Vogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al.
+2015), namely the Illustris-1 cosmological simulation. This simulation
+ follows the formation and evolution of galaxies from the
+early Universe to the present by solving gravity and hydrodynamics,
+ as well as modeling of star formation, galactic winds, SFR [M ⊙ yr-1 ]
+ t [Gyr] 0 4 8 12 16
+  0  2  4  6  8  10  12
+Fig. 1. Star formation rate as a function of the age of the Universe in
+the simulated galaxy from the Illustris project used to create mock data.
+The black and gray vertical arrows indicate the last mergers which the
+galaxy underwent, wet and dry, respectively.
+t [Gyr]
+ Z [Z
+⊙] 0 2 4 6 8 10
+  0  1  2  3  4  5  0 2 4 6
+ N [102 ]
+Fig. 2. Number of stars as a function of their metallicity and time of
+formation (the age of the Universe) in the simulated galaxy. The vertical
+line indicates the applied split into stellar populations.
+magnetic fields, and the feedback from black holes. Although
+dwarf galaxies that are of our interest here are not resolved in the
+suite, this can be easily overcome with the appropriate choice of
+the object and the treatment of data.
+As the key properties of dSph galaxy equivalents we identified:
+ the lack of gas, the lack of a black hole, a low spin,
+the stellar mass much smaller than the dark matter mass and a
+nearly spherical shape. The last condition was adopted in an attempt
+ to avoid any strong bias introduced by the spherical modeling
+ of a nonspherical object. Moreover, we required the galaxy
+to possess a significant number of both stellar and dark matter
+ particles (over 105
+), and a well resolved center. Due to the
+large softening scale for dark matter particles in the simulation
+(ǫ
+DM = 1.42 kpc), we looked for an object in which even the
+more concentrated stellar population (see Section 2.2) extended
+over 43 kpc so that the region affected by the numerical artifacts
+was enclosed within 2-3 innermost data bins (we used 20 linearly
+spaced spatial bins, see Section 3.1).
+Out of 27345 galaxies listed in the catalog of stellar circularities,
+ angular momenta, and axis ratios published by the Illustris
+ team (Genel et al. 2015) containing subhalos with the stellar
+mass larger than 109
+ M
+⊙, only a few met our restrictive requireArticle
+ number, page 2 of 1
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+-80-4004080  POPULATION I
+[kpc] major POPULATION I
+intermediatePOPULATION I
+ minor
+  5.3 5.9 6.5 7.1 7.7
+ log(Σ) [M ⊙/kpc2 ]
+-80-40040  POPULATION II
+[kpc]  POPULATION IIPOPULATION II
+-160-80 0 80 160
+ V [km/s]
+-80-40040
+-80 -40 0 40  POPULATION II
+[kpc]
+ [kpc] -80 -40 0 40  POPULATION II
+[kpc] -80 -40 0 40 80 POPULATION II
+[kpc]  0 30 60 90
+ σ [km/s] -80-4004080 POPULATION II
+[kpc] major POPULATION II
+intermediatePOPULATION II
+ minor
+  5.3 5.9 6.5 7.1 7.7
+ log(Σ) [M ⊙/kpc2 ]
+-80-40040 POPULATION II
+[kpc] POPULATION IIPOPULATION II
+ -160-80 0 80 160
+ V [km/s]
+-80-40040
+-80 -40 0 40 POPULATION II
+[kpc]
+ [kpc] -80 -40 0 40POPULATION II
+[kpc] -80 -40 0 40 80POPULATION II
+ [kpc]  0 30 60 90
+ σ [km/s]
+Fig. 3. Maps of the projected stellar density, mean stellar velocity, and stellar velocity dispersion (in rows) for two stellar populations: the metalrich
+ population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along the principal axes determined
+for all stars (in columns, along the major, the intermediate, and the minor axis, respectively).
+-1-0.5 0 0.5 1
+  1  10  100β(r)
+ r [kpc]
+-1-0.5 0 0.5 1
+  0  10  20  30  40  50β(r)
+ r [kpc]all stars
+pop I
+pop II  40 60 80 100 120
+  1  10  100σ r(r)
+ r [kpc]
+ 40 60 80 100 120
+  0  10  20  30  40  50σ r(r)
+ r [kpc]  40 60 80 100 120
+  1  10  100σ t(r)
+ r [kpc]
+ 40 60 80 100 120
+  0  10  20  30  40  50σ t(r)
+ r [kpc]
+Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calculated
+ from all stars (in red), including only population I (in orange), and only population II (in blue). The upper row shows the profiles using the
+logarithmic distance scale and reaching the outskirts of the galaxy whereas the bottom row presents in the linear scale only the radial range used
+in the modeling.
+ments. We decided to use a galaxy labeled as subhalo 16960.
+All the relevant properties of the galaxy are given in Table 1,
+including numbers of particles and total masses for both components,
+ and details on the shape of the stellar component: the axis
+ratios minor to major (shortest to longest) c/a, intermediate to
+major b/a, and the triaxiality parameter T = (a2
+ − b2
+)/(a2
+ − c2
+).
+We distinguish between the half-mass radius provided in the Illustris
+ database and the half-number radius r
+1/2, which we use for further calculations in this paper. The difference between the
+two comes from a small gradient in the stellar mass-to-light ratio
+with the distance from the galactic center. Since in our approach
+we treat stars as equal-mass particles and refer to number densities
+ (multiplied by the mean mass of a stellar particle when
+needed), the application of the half-number radius is more selfconsistent.
+
+ Article number, page 3 of 12
+A&A proofs: manuscript no. Populations4
+10-310-1101103
+  10  100n ⋆(R) [kpc-2 ]
+ R [kpc]major
+  10  100
+R [kpc]intermediate
+  10  100
+R [kpc]minor
+all stars
+pop I
+pop II
+Fig. 5. Surface number density profiles of the stellar data samples for the simulated galaxy observed along different lines of sight (from the left to
+the right). Different lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in
+blue). Thin vertical lines indicate r
+0 (see text) and the outer boundary of the spectroscopic data.
+2.2. Splitting the stars into populations
+Our chosen galaxy shows a complex formation history undergoing
+ multiple mergers which result in extended star formation
+with a few star formation bursts. The last wet merger, that is a
+merger with an object containing gas, happens at 6.9 Gyr from
+the beginning of the simulation, whereas the last dry merger (no
+gas transfer) at 12.1 Gyr, giving the galaxy enough time to regain
+dynamical equilibrium. We present the star formation rate (SFR)
+as a function of time (the age of the Universe) in Fig. 1, where
+these last mergers are indicated with black and gray vertical arrows.
+ In Fig. 2 we show the distribution of stars as a function of
+their metallicity (in solar units) and the time of formation. In order
+ to divide the stellar sample into two populations we cut it in
+half based on the metallicity index of each stellar particle. This
+split is indicated in Fig. 2 with the vertical line. With satisfying
+accuracy it separates the stars born before and after 4 Gyr since
+the start of the simulation, which corresponds to the formation
+time before and after the end of the second major star burst, as
+shown in Fig. 1. We refer to the metal-rich stars as population I
+and to the metal-poor as population II, following the commonly
+used nomenclature in astronomy.
+In Fig. 3 we present maps of the projected stellar mass density,
+ line-of-sight velocity, and line-of-sight velocity dispersion
+for both populations obtained by projecting the galaxy along its
+principal axes. The orientation was determined from the inertia
+ tensor calculated from all stars within the half-number radius
+r
+1/2 and therefore is the same in both panels. The two populations
+ differ significantly in the spatial distribution and kinematics
+ with the metal-rich (considered to be younger) population I
+being more concentrated but having lower central velocity dispersion.
+ Both populations show a weak rotation signal at large
+distances from the center.
+The velocity anisotropy parameter β(r) = 1 − (σ2
+θ +
+σ2
+φ )/(2σ2
+r ), where σ
+i are velocity dispersions in spherical coordinates
+ (Binney & Tremaine 2008), describes the orbital structure
+of galaxies. It is one of the most important dynamical properties
+of bound systems which cannot be inferred directly from observations
+ and has to be recovered by dynamical modeling. The
+profiles of the anisotropy parameter β as well as the radial σ
+r
+and tangential σ
+t = [(σ2
+θ + σ2
+φ )/2]1/2
+ velocity dispersions for our
+simulated galaxy are presented in the consecutive columns of Fig. 4. Throughout the paper we use red, orange, and blue colors
+to indicate values calculated or recovered for all stars, population
+ I, and population II, respectively. The two rows of the figure
+show the behavior of the parameters at different scales. The top
+row plots the profiles with the distance from the center of the
+galaxy in the logarithmic scale and shows the drop of anisotropy
+at the outer edges of the object. The bottom row uses the linear
+distance scale and focuses on the main body of the galaxy.
+Figure 5 shows the surface number density profiles of the
+stars as measured in different directions. We can see that while
+the different subsamples have quite distinguishable profiles, the
+difference between the lines of sight is small because the galaxy
+is close to spherical.
+2.3. Observables
+We generated nine sets of mock data by observing all stars and
+each population separately along the principal axes determined
+from all stars. For the observables to be used in the modeling we
+divided the stars into 20 bins spaced linearly in distance from
+the center of the galaxy up to 50 kpc, measuring the fraction
+of the total number of stars and the 2nd, 3rd, and 4th proper
+moments of the line-of-sight velocity defined in Eq. 8 and 9
+of Kowalczyk et al. (2018). The profiles of these quantities are
+shown in consecutive rows in Fig. 6. Columns correspond to different
+ lines of sight, from the left to the right: along the major,
+intermediate, and minor axis of the galaxy. For clarity of the figure,
+ in each panel we indicate only the error bars for one of the
+data sets. However, as the number of stars in a sample remains
+roughly constant between the lines of sight, the error bars are
+very similar among the panels in a given row.
+Although in our previous studies of the reliability of
+the Schwarzschild modeling and its applications to real data
+(Kowalczyk et al. 2017, 2018, 2019) we approximated the density
+ profile of the tracer with the Sérsic formula, we found that it
+does not provide a good approximation of the data for the simulated
+ galaxy considered here. We therefore fit the projected density
+ profile with the King formula (King 1962)
+I (R) = I
+0 
+
+
+
+
+
+ 1
+p
+1 + (R/R
+c)2 − 1
+p
+1 + (R
+t/R
+c)2 
+
+
+
+
+
+2
+ , (1)
+Article number, page 4 of 12
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+10-310-210-1100
+  0  10  20  30  40M(R)
+ R [kpc]major
+  0  10  20  30  40
+R [kpc]intermediate
+  0  10  20  30  40  50
+R [kpc]minor
+36912
+  0  10  20  30  40m 2(R)[103 (km s-1 )2 ]
+ R [kpc]  0  10  20  30  40
+R [kpc]  0  10  20  30  40  50
+R [kpc]
+-10-50510
+  0  10  20  30  40m 3(R)[104 (km s-1 )3 ]
+ R [kpc]  0  10  20  30  40
+R [kpc]  0  10  20  30  40  50
+R [kpc]
+01234
+  0  10  20  30  40m 4(R)[108 (km s-1 )4 ]
+ R [kpc]  0  10  20  30  40
+R [kpc]  0  10  20  30  40  50
+R [kpc]all stars
+pop I
+pop II
+Fig. 6. Observables used in our Schwarzschild modeling scheme of the simulated galaxy. In rows: the fraction of the total number of stars, 2nd,
+3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, intermediate, and minor axis. In red we present
+the values obtained for all stars whereas in orange and blue those for populations I and II, respectively. For clarity of the figure, in each panel we
+indicate only the error bars for one of the data sets.
+where I
+0, R
+c , and R
+t are the model parameters. The profile can
+be analytically deprojected to obtain the 3D density
+ρ(r) = ρ
+0
+z2 "
+ 1
+z arccos(z) − p
+1 − z2 #
+ , (2)
+where
+ρ
+0 = I
+0
+πR
+c [1 + (R
+t/R
+c)2
+]3/2  (3)
+and
+z = s
+ r2
+ + R2
+c
+R2
+c + R2
+t .  (4) 3. Schwarzschild modeling
+In this section we briefly present our modeling method and its
+application to the data sets derived for all stars and the two populations
+ of the simulated galaxy separately. In both cases our
+aim was to recover the profiles of the total mass and the velocity
+anisotropy.
+3.1. Overview of the method
+We follow the approach introduced in Kowalczyk et al. (2018),
+namely we model the total mass profile with the mass-to-light
+ratio Υ varying with radius:
+log Υ(r) = (
+ log(Υ
+0) r ≤ r
+0
+a(log r − log r
+0)c
+ + log(Υ
+0) r > r
+0 (5)
+Article number, page 5 of 12
+A&A proofs: manuscript no. Populations4
+ 1  2  3
+ 0 0.5 1 1 2 3 ALL
+ Υ
+0ac
+  1  2  3
+ 0 0.5 1 1 2 3 POPULATIONS
+ Υ
+0ac
+  10 100
+ χ2
+ 1  2  3
+ 0 0.5 1 1 2 3 POP I
+ Υ
+0ac
+  1  2  3
+ 0 0.5 1 1 2 3 POP II
+ Υ
+0ac
+  10 100
+ χ2
+Fig. 7. Absolute values of χ2
+ obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom
+right) for the observations along the major axis of the simulated galaxy. The results for the modeling of two populations (top right) were obtained
+as an algebraic sum of values for populations I and II. To avoid large numbers in the figure, Υ
+0 was divided by the mean mass of a stellar particle.
+where r is the distance from the center of the galaxy, r
+0 is a
+constant, while Υ
+0, a, and c are the parameters of a model. We
+have assumed log r
+0 = 0.33 which corresponds to three softening
+scales for stellar particles in the Illustris simulation.
+We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04
+and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the requirement
+ on the total density profile to be monotonically decreasing
+with radius. For each set of parameters and for each line of sight
+we generated 1200 orbits using 100 values of energy (expressed
+with the radius of a circular orbit) spaced logarithmically and
+12 values of the relative angular momentum spaced linearly. The
+outer radius of the orbit library, that is the apocenter of the most
+extended orbit, was set to r
+out = 165 kpc in order to cover over
+0.999 of the total stellar mass based on the fitted King profile
+parameters.
+We fit the kinematics weighted with the fraction of mass with
+the constrained least squares algorithm where different values
+of Υ
+0 were obtained with a simple transformation of velocities
+given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order
+ to smooth out the numerical artifacts, the three-dimensional
+χ2
+ spaces were then interpolated with 12-order polynomials (∼ a4
+c4
+Υ4
+0) that were further used to determine the global minimums
+ (identified as the best-fitting models) and 1, 2, 3 σ confidence
+ levels which for three parameters correspond to ∆χ2
+ =
+3.53, 8.02, 14.2 (Press et al. 1992).
+3.2. Application to mock data
+In the following we present the direct and inferred results of
+the Schwarzschild modeling of the data sets described in Section
+ 2.3.
+First, Fig. 7 shows the distribution of the absolute values of
+the χ2
+ as a function of three parameters of the mass-to-light ratio.
+ In order to avoid unnecessary repetitions, we include only
+the plot for the mock data obtained by observing the Illustris
+galaxy along its major axis as the others are qualitatively similar.
+The four panels refer to fits for all stars (top left), the metal-rich
+population I (bottom left), the metal-poor population II (bottom
+right), and the one named "populations" (top right) which is the
+algebraic sum of values for both populations.
+As our parametrization of the mass-to-light ratio is not intuitive
+ we present its profiles explicitly in the first rows of the leftArticle
+ number, page 6 of 12
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+1061071081091010
+  10  100 ALL
+Υ(r) [M ⊙/L ⊙]
+ r [kpc]major
+  10  100ALL
+r [kpc]intermediate
+  10  100ALL
+ r [kpc]minor
+3σ
+2σ
+1σ
+best model
+data
+104106108
+  10  100 ALL
+ν tot(r) [M ⊙ kpc-3 ]
+ r [kpc]  10  100ALL
+r [kpc]  10  100ALL
+ r [kpc]
+101010111012
+  10  100 ALL
+M tot(r) [M ⊙]
+ r [kpc]  10  100ALL
+r [kpc]  10  100ALL
+ r [kpc]
+-2-101
+  0  10  20  30  40 ALL
+β(r)
+ r [kpc]  0  10  20  30  40ALL
+r [kpc]  0  10  20  30  40  50ALL
+ r [kpc] 1061071081091010
+  10  100 POPULATIONS
+Υ(r) [M ⊙/L ⊙]
+ r [kpc]major
+  10  100POPULATIONS
+r [kpc]intermediate
+  10  100POPULATIONS
+ r [kpc]minor
+3σ
+2σ
+1σ
+best model
+data
+104106108
+  10  100 POPULATIONS
+ν tot(r) [M ⊙ kpc-3 ]
+ r [kpc]  10  100POPULATIONS
+r [kpc]  10  100POPULATIONS
+ r [kpc]
+101010111012
+  10  100 POPULATIONS
+M tot(r) [M ⊙]
+ r [kpc]  10  100POPULATIONS
+r [kpc]  10  100POPULATIONS
+ r [kpc]
+-2-101
+  0  10  20  30  40 POPULATIONS
+β(r)
+ r [kpc]  0  10  20  30  40POPULATIONS
+r [kpc]  0  10  20  30  40  50POPULATIONS
+ r [kpc]
+Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy along the principal
+axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, intermediate,
+ and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the
+1, 2, and 3 σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r
+0 and the outer range of the
+data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations.
+and right-hand side panels of Fig. 8 for the results obtained for
+all stars and the populations, respectively. We further calculate
+the total density (second rows) and the total mass content (third
+rows). We include the obtained orbit anisotropy within the modeled
+ range in the bottom rows. The consecutive columns present
+the results for the observations along the major, intermediate,
+and minor axis. Green lines indicate values for the best-fit models
+ whereas the colored areas of decreasing intensity correspond
+to 1, 2, and 3 σ confidence regions obtained as extreme values allowed
+ by the models with χ2
+ within a given region. In each panel
+the true values from the simulation are presented with black lines
+while thin vertical lines mark the values of r
+0 and the outer range
+of the data sets beyond which the reliability of results drops significantly.
+ The true mass-to-light ratio profile was obtained by
+dividing the total mass by the fitted King profiles, therefore the
+drop at 100 kpc is the numerical artifact occurring at the very
+outskirts of the galaxy.
+Whereas in the right-hand side panels of Fig. 8 the resulting
+anisotropy is obtained from the fit of all stars and uses only the
+location of global minimum and confidence levels from two populations
+ (as in the top right panel of Fig. 7), in Fig. 9 we present
+another method of calculating the anisotropy. In the second and
+third row we show the derived profiles for population I and II
+separately and combine them as stellar mass weighted average
+in the top row. As in previous figures, three columns refer to the
+different lines of sight whereas the narrow fourth one shows the
+behavior of the true profiles outside the modeled range which, as
+we noticed in our previous studies, in a limited way influences
+the results. Such an impact is understandable since the stars at
+larger distances from the center are still included in the line-ofsight
+ measurements.  3.3. Comparison of fitting results
+The main strength of the two populations method comes from
+tracing the underlying gravitational potential at different scales.
+As can be seen in the bottom panels of Fig. 7, population I, which
+is more concentrated, is also more sensitive to Υ
+0, but gives
+weaker constraints on a or c. On the other hand, population II
+attempts to reproduce the total mass content at larger distances
+as well, therefore showing stronger coupling between the parameters.
+
+The global minimums of the χ2
+ distributions for both approaches,
+ that is modeling one and two populations, which we
+identify as the best-fitting models, closely coincide showing that
+there is no internal bias in the improved method. However, significant
+ differences can be observed when comparing the confidence
+ levels, mainly at 1 and 3 σ. Namely, we find that using
+two populations, the constraints we obtain on the density and
+anisotropy profile are much stronger.
+Additionally, the more accurate method allows us to study
+other effects and biases, for example the consequences of the
+nonsphericity of the modeled object. Whereas for the fit of all
+stars the true values of the density, mass, and anisotropy profiles
+are contained within 1 σ confidence regions, the results for the
+populations are more or less biased depending on the axis. They
+are well reproduced for the observation along the intermediate
+axis, for which the effects of nonsphericity seem to cancel out,
+and more biased for the remaining lines of sight. We notice a
+trend from under- to overestimation of the anisotropy when going
+ from the major to the minor axis.
+ Article number, page 7 of 12
+A&A proofs: manuscript no. Populations4
+-101
+  0  10  20  30  40POP I + POP II β(r)
+ r [kpc]major
+  0  10  20  30  40
+r [kpc]intermediate
+  0  10  20  30  40
+r [kpc]minor
+  50  60  70  80
+-101
+  0  10  20  30  40POP I β(r)
+ r [kpc]  0  10  20  30  40
+r [kpc]  0  10  20  30  40
+r [kpc]  50  60  70  80
+-101
+  0  10  20  30  40POP II β(r)
+ r [kpc]  0  10  20  30  40
+r [kpc]  0  10  20  30  40
+r [kpc]  50  60  70  80
+data
+best model
+  1σ
+2σ
+3σ
+Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulated galaxy. In rows:
+results for all stars (calculated as the superposition of two populations), population I, and population II. Colors follow the convention used in
+previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines)
+outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1,
+2, and 3 σ confidence regions.
+4. Modeling Fornax dSph
+In this section we present the application of our Schwarzschild
+modeling scheme to the observational data for the Fornax dSph
+galaxy obtained by del Pino et al. (2015) and del Pino et al.
+(2017). This study is a follow-up of the work of Kowalczyk et al.
+(2019) and can be directly compared to the results presented
+there. Moreover, we refer the reader to these previous publications
+ for details on the origin of data and our procedures used
+for cleaning the spectroscopic sample.
+Similarly to the approach introduced in Section 2.2, we divided
+ all available stars into two equal-size populations based on
+their metallicity and then cross-correlated the samples with the
+data used in Kowalczyk et al. (2019). The metallicity histogram
+of the final spectroscopic sample is shown in Fig. 10. Additionally,
+ we color-coded each bin with the population it has been
+assigned to, namely orange or blue for population I or II. Interestingly,
+ the case of Fornax is similar to our simulated galaxy
+as the split at [Fe/H]= −1 also captures an important feature
+of the object’s star formation history, separating stars into subsamples
+ older and younger than 6 Gyr, as shown in Fig. 12 of
+del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The
+numbers of stars contained in the samples of all stars, population
+ I, and population II are given in Table 2, where the indices
+"phot" and "spec" refer to the photometric and kinematic samples.
+ The sum of stars in the populations is lower than in the sample of all stars since only stars with reliable measurements
+of metallicity could be included.
+N
+ [Fe/H]pop I
+pop II
+ 0 20 40 60 80 100
+-2.5 -2 -1.5 -1 -0.5  0
+Fig. 10. Metallicity histogram of the final spectroscopic sample used in
+the modeling of two stellar populations in the Fornax dSph. Each bin is
+color-coded according to the population it has been assigned to, orange
+or blue for population I and II, respectively.
+As we have shown in our earlier work, the light profile of the
+Fornax dSph can be well reproduced with the three-parameter
+Article number, page 8 of 12
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+Table 2. Properties of the data samples for the Fornax dSph.
+Property ALL POP I POP II
+Number of stars (N
+phot) 65 797 14 882 49 205
+Number of stars (N
+spec) 3286 1136 1151
+Stars within 1.8 kpc 3268 1134 1130
+Fitted normalization (N
+0 ) [×104
+] 6.95 1.81 5.45
+Sérsic radius (R
+S) [kpc] 0.454 0.429 0.420
+Sérsic parameter (m) 0.808 0.807 0.898
+102103104105
+  0.2  0.5  2 0.1   1n ⋆(R) [kpc-2 ]
+ R [kpc]all stars
+popI
+popII
+Fig. 11. Surface number density profiles of the photometric data samples
+ for the Fornax dSph: all available stars (in red), the metal-rich population
+ I (in orange), and the metal-poor population II (in blue). Thin
+vertical lines indicate r
+0 (see text) and the outer boundary of the spectroscopic
+ data.
+Sérsic formula (Sérsic 1968). The profiles of number density for
+all stars and both populations together with the best-fitting Sérsic
+profiles are presented in Fig. 11. The colors follow the convention
+ introduced in previous sections. Thin vertical lines indicate
+the innermost data point for the light profile for all stars and
+the outer boundary of the kinematic sample. The former, set at
+log r = −0.16, is also used as the minimum of the mass-to-light
+ratio profile (r
+0 in Eq. 5). The fitted parameters of the profiles,
+that is the normalization N
+0 , the Sérsic radius R
+S , and the Sérsic
+parameter m, are included in the second part of Table 2.
+Figure 12 presents the profiles of the observables used in the
+Schwarzschild modeling: the fraction of stars and the 2nd, 3rd,
+and 4th velocity moments (top to bottom) for the three data samples:
+ all stars, population I, and population II (in red, orange, and
+blue, respectively). The error bars indicate 1 σ sampling errors.
+The parameter space for Υ(r) has been probed as follows:
+a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a
+step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the
+parameter c was fixed at c = 3 and now we fit it as a free parameter.
+ As for the mock data in Section 3.2, different values of
+Υ
+0 were obtained with the transformation of velocity moments
+within the χ2
+ fitting routine. The values of ∆χ2
+ for all stars and
+the populations are shown in the two panels of Fig. 13 (left and
+right-hand side, respectively). Due to the dense coverage of the
+grid, we decided to include only the values within 3 σ from the
+fitted minimums (see Section 3.1).
+The profiles of the mass-to-light ratio, total density, total
+mass, and velocity anisotropy resulting from the χ2
+ distributions
+are presented in the consecutive rows of Fig. 14. The anisotropy
+profile for the populations is based on the fit of all stars but using  0 0.05 0.1 0.15 0.2 0.25
+  0  0.4  0.8  1.2  1.6M(R)
+ R [kpc]all stars
+pop I
+pop II
+04080120160200
+  0  0.4  0.8  1.2  1.6m 2(R)[(km s-1 )2 ]
+ R [kpc]
+-16-80816
+  0  0.4  0.8  1.2  1.6m 3(R)[102 (km s-1 )3 ]
+ R [kpc]
+0481216
+  0  0.4  0.8  1.2  1.6m 4(R)[104 (km s-1 )4 ]
+ R [kpc]
+Fig. 12. Observables of the Fornax dSph used in our Schwarzschild
+modeling scheme. In rows: the fraction of the total number of stars, the
+2nd, 3rd, and 4th velocity moment. In red we present the values obtained
+for all stars whereas in orange and blue those for populations I and II,
+respectively.
+the confidence levels on Υ from the fit of two populations. Green
+lines indicate the values for the best-fitting models whereas the
+colored areas of decreasing intensity show the 1, 2, and 3 σ confidence
+ regions. Additionally, with black dashed lines we include
+the results from Kowalczyk et al. (2019) for comparison.
+As a result of freeing the steepness of the mass-to-light
+ratio profile (parameter c) with respect to the previous study
+Article number, page 9 of 12
+A&A proofs: manuscript no. Populations4
+ 0 0.5 1 1.5
+  0
+ 0.5
+ 1
+ 1.5 2 3 4 5 6 ALL
+Υ
+0
+ ac
+  0 0.5 1 1.5
+  0
+ 0.5
+ 1
+ 1.5 2 3 4 5 6 POPULATIONS
+Υ
+0
+ ac
+  0 3 6 9 12
+ χ2 -χ2 min
+Fig. 13. Values of χ2
+ relative to the fitted minimum within the range of 3 σ confidence level for all stars (left panel) and for the populations (right
+panel) for the Fornax dSph.
+(Kowalczyk et al. 2019), we obtained higher estimates of the enclosed
+ total mass at larger radii. In particular, for the mass enclosed
+ within 1.8 kpc we get M
+all(< 1.8 kpc) = 3.87+1.48
+−1.56 × 108
+M
+⊙ from the fit for all stars and M
+pops (< 1.8 kpc) = 4.71+0.87
+−1.13 ×
+108
+ M
+⊙ from the fit of populations, while previously we had
+M
+old (< 1.8 kpc) = 3.7+1.4
+−1.3 × 108
+ M
+⊙.
+Interestingly, despite the significant shift of the position of
+χ2
+min (to c = 4.2 for all stars and 3.6 for populations), the obtained
+ profile of the anisotropy parameter remains decreasing or
+flat for all stars but changes to increasing from 0 to 0.5 for the
+populations. Nevertheless, even in the latter case the previous
+result agrees with the new finding within 1 σ.
+The detailed analysis of the anisotropy is shown in Fig. 15
+where the middle and bottom panels present the profiles obtained
+ for each population separately. We notice that the profile
+for population I is decreasing or has a local minimum whereas
+for population II is increasing (from −0.25 to 0.5 for the bestfitting
+ model). Since population I is more concentrated, the last
+bins contain very few stars, which limits their credibility. The
+top panel of Fig. 15 presents the anisotropy of all stars calculated
+ as a weighted superposition of two populations. With such
+approach we still obtain the increasing profile (from 0 to 0.5) but
+the previous result agrees with it only within 2 σ.
+Since Fornax dSph is significantly elongated with the projected
+ ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou
+1995), we anticipate some bias in the obtained results caused
+by the spherically symmetric modeling. Kowalczyk et al. (2018)
+studied such bias in an axisymmetric simulated object qualitatively
+ similar to Fornax and identified differences in the systematic
+ errors depending on whether the galaxy was observed along
+its major or minor axis. Assuming that Fornax is observed along
+the line of sight in between these extremes, we expect the total
+mass profile to be slightly overestimated and the anisotropy to be
+underestimated, further strengthening the likelihood of the real
+anisotropy to be radial and its profile to be growing with radius
+with respect to the results of Kowalczyk et al. (2019).
+Both constant (like for our population I) and growing (population
+ II) anisotropy profiles can arise from biased modeling of the real growing profile by observing an object along the
+minor and major axis, respectively. However, for the bias to
+occur in two populations presented here, their inner orientations
+ would need to be opposite. Since such morphological features
+ are not supported by the photometric studies of Fornax
+(del Pino et al. 2015; Wang et al. 2019) which rather find a good
+spatial alignment between the stellar populations, we conclude
+that the anisotropy profiles of the two populations modeled in
+this work are indeed significantly distinct.
+Finally, it is worth noticing that the so-called mass-followslight
+ model, that is the one following from the assumption that
+the total density traces the stellar distribution, is no longer supported
+ by the fit of the populations. With our parametrization,
+the mass-follows-light model corresponds to a = 0 and whereas
+it is enclosed within 3 σ for the fit of all stars, as was the case
+in Kowalczyk et al. (2019), the allowed values for the improved
+method are much larger, as demonstrated by the right panel of
+Fig. 13.
+5. Summary and discussion
+Building on the previously created implementation of the
+Schwarzschild orbit superposition method focused on modeling
+dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018,
+2019), we improved our tool by introducing multiple stellar populations.
+ Such an improvement is desirable and justified since
+many of the dwarfs show signs of multiple star formation bursts
+or extended star formation episodes. As the different populations
+trace the common underlying gravitational potential, one may
+expect a significant improvement in the estimates of not only the
+total mass content but also the orbit anisotropy since this robust
+modeling technique reproduces the anisotropy as a by-product
+of the modeling rather than taking it as an assumption.
+We have tested our hypothesis by modeling mock data generated
+ from a galaxy formed in the Illustris simulation. Due to the
+limitations of the resolution, we chose a galaxy of mass a few orders
+ of magnitude larger than the estimated masses of classical
+dwarfs. Still, the galaxy possessed appropriate qualitative characteristics,
+ such as the lack of gas and an almost spherical shape,
+Article number, page 10 of 12
+K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling
+101103105
+ 0.1  1Υ(r) [M ⊙/L ⊙]
+ r [kpc]ALL
+  0.1  1
+r [kpc]POPULATIONS
+3σ
+2σ
+1σ
+best model
+K19
+104106108
+ 0.1  1ν tot(r) [M ⊙ kpc-3 ]
+ r [kpc]  0.1  1
+r [kpc]
+105107109
+ 0.1  1M tot(r) [M ⊙]
+ r [kpc]  0.1  1
+r [kpc]
+-3-2-101
+  0  0.4  0.8  1.2  1.6β(r)
+ r [kpc]  0  0.4  0.8  1.2  1.6
+r [kpc]
+Fig. 14. Results of Schwarzschild modeling of the Fornax dSph.
+In rows: derived mass-to-light ratio, total density, total mass, and
+anisotropy parameter. In columns: results for all stars and the populations,
+ respectively. Green lines indicate the values for the best-fit models
+whereas the colored areas of decreasing intensity show the 1, 2, and 3 σ
+confidence regions. The best-fitting values obtained by Kowalczyk et al.
+(2019) are shown with black dashed lines.
+that made it a good test bed for modeling techniques applicable
+ to dSph galaxies. We applied our approach to all data and
+to two stellar populations separately, comparing the accuracy of
+the obtained results. Although the addition of the second tracer
+seemingly increases the number of constraints twice, the increment
+ is somewhat compromised by the sampling errors since the
+number of stars in each sample is then reduced. Still, we found
+strong improvements in the accuracy of the method when using
+ two populations. The results of the modeling show that the
+density and velocity anisotropy profiles are more strongly constrained,
+ most importantly at the 3 σ level, that is the range of
+allowed values is much narrower.
+Similarly to the conclusions of Kowalczyk et al. (2018) who
+explored the effects of nonsphericity using large and small
+data samples, the comparison of results presented in the leftand
+ right-hand side panels of Fig. 8 suggests that the improved
+method using two stellar populations gives more precise but less
+accurate outcome. However, in both studies the apparent deterioration
+ of the reliability is a consequence of modeling of a
+nonspherical object. In both cases, a simpler approach (much
+smaller data samples or using one stellar population) resulted -2-101
+  0  0.4  0.8  1.2  1.6POP I + POP II β(r)
+ r [kpc]
+-2-101
+  0  0.4  0.8  1.2  1.6POP I β(r)
+ r [kpc]
+-2-101
+  0  0.4  0.8  1.2  1.6POP II β(r)
+ r [kpc]
+best model
+1σ
+2σ 3σ
+K19
+Fig. 15. Profiles of the anisotropy parameter obtained with the
+Schwarzschild modeling of two stellar populations for the Fornax dSph.
+In rows: results for all stars (calculated as the superposition of two populations),
+ population I, and population II. Color lines indicate values
+for the best-fit models whereas the colored areas of decreasing intensity
+show the 1, 2, and 3 σ confidence regions. The dashed black line shows
+the result from Kowalczyk et al. (2019) for comparison.
+in larger final uncertainties, usually containing the true values
+within 1 σ confidence region. On the other hand, the improved
+methods exhibit substantially reduced uncertainties, highlighting
+the underlying bias.
+Our method parametrizes the total mass content with the
+mass-to-light ratio varying with radius as a power-law in the loglog
+ scale. We made two main changes with respect to our previous
+ work: we added a third parameter c controlling the steepness
+of the mass-to-light ratio profile (previously fixed at the value of
+3) and allowed for different stellar density profiles (previously
+only Sérsic, now also King). These changes are of course coupled
+ since different density profiles require different exponents to
+reproduce the same mass profile. It is visible also in our results
+since the King profile applied in the simulated galaxy gave us
+values of c lower than 3. Nevertheless, we decided to use different
+ density profiles to make our method more general and applicable
+ to objects, such as our Illustris galaxy, for which the Sérsic
+formula does not provide a good approximation of the density
+distribution.
+Finally, we applied the improved method to the data for the
+Fornax dSph galaxy. Due to the addition of another free parameter
+ in our functional form for the mass-to-light ratio, our results
+ for modeling all stars are slightly different from the ones
+Article number, page 11 of 12
+A&A proofs: manuscript no. Populations4
+obtained in Kowalczyk et al. (2019). However, in terms of the
+total density and mass distribution the estimates obtained here
+agree very well with those earlier results in the range covered
+by the data. Therefore, the detailed comparison with other estimates
+ from the literature presented in Kowalczyk et al. (2019) is
+still valid and we do not repeat it here.
+A more significant difference with respect to these previous
+estimates is seen in the results of modeling two populations in
+Fornax. In this case we find the anisotropy to be slightly increasing
+ rather than decreasing with radius and, most importantly, the
+confidence regions for this parameter, as well as for the density,
+ are much narrower. We were thus able to obtain tighter constraints
+ on the properties of Fornax, which means that the improved
+ method is successful. For the first time, we were also able
+to deduce the velocity anisotropy profiles for each of the populations
+ separately. We found that the more concentrated, metal-rich
+population I has a decreasing anisotropy profile while the more
+extended, metal-poor population II has the anisotropy increasing
+with radius. This finding may partially explain the large spread
+of the anisotropy values obtained in the literature and summarized
+ in Table 2 and 3 of Kowalczyk et al. (2019), which were
+often based on modeling subsamples of our spectroscopic data
+set.
+ For both studied objects we split the stars into two populations
+ by dividing them in half based on their metallicity, Z (in
+solar units), for the Illustris galaxy and [Fe/H] for Fornax. Such
+a method is approximate but justified. Both galaxies have complex
+ star formation history with multiple star formation bursts, as
+demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al.
+(2013), producing multiple stellar populations which cannot be
+easily tracked as the metallicity is a good but not perfect proxy
+for the stellar age. Moreover, the metallicity histograms for both
+objects are approximately unimodal not allowing for a convenient
+ separation. More refined methods of division have been
+suggested in the literature, for example in the form of the likelihood
+ function based on the position, velocity, and metallicity index
+ (Walker & Peñarrubia 2011). However, the likelihood function
+ requires many assumptions which introduce additional uncertainties
+ into the treatment of the data. On the other hand, our
+approach ensures the maximization of each sample (and therefore
+ minimization of sampling errors) while capturing the important
+ features of the star formation history.
+Further improvements to the Schwarzschild modeling
+method are certainly possible. One way to proceed would be to
+include the modeling of the proper motions of the stars. For now,
+measurements of transverse velocities are available only for the
+brightest stars in dSph galaxies, but even small samples of this
+type could provide further constraints on the models, as demonstrated
+ by Strigari et al. (2007) and Massari et al. (2020).
+Acknowledgements. We are grateful to Andrés del Pino for providing the data for
+the Fornax dSph and to the Illustris team for making their simulations publicly
+available. Useful comments from the anonymous referee are kindly appreciated.
+This research was supported by the Polish National Science Center under grant
+2018/28/C/ST9/00529.
+References
+Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184
+Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13
+Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 327, L15
+Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University
+ Press, Princeton)
+Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35
+Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia,
+G. 2013, MNRAS, 433, 3173  del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS, 433, 1505
+del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996
+del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017, MNRAS, 465,
+3708
+Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126
+Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92
+Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L40
+Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRAS, 445, 175
+Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, MNRAS, 474, 1398
+Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 481, 250
+Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354
+Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89
+Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Williams, M. J. 2013,
+ApJ, 763, 91
+King, I. 1962, AJ, 67, 471
+Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431,
+2796
+Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3959
+Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2918
+Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482,
+5241
+Łokas, E. L., 2002, MNRAS, 333, 697
+Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918
+Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36
+Mateo, M. 1998, ARA&A, 36, 435
+Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13,
+12
+Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022
+Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numerical
+ Recipes in C, 2nd edn. (Cambridge University Press, Cambridge)
+Schwarzschild, M. 1979, ApJ, 232, 236
+Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cordoba,
+ Argentina)
+Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 657, L1
+Tolstoy, E., Hill, V., & Tosi, M. 2009, ARA&A, 47, 371
+Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66
+van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493,
+613
+Vogelsberger, M., Genel, S., Springel, V., et al. 2014a, Nature, 509, 177
+Vogelsberger, M., Genel, S., Springel, V., et al. 2014b, MNRAS, 444, 1518
+Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20
+Wang, M. Y., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118
+Article number, page 12 of 
\ No newline at end of file
diff --git a/read/results/playa/2201.00178.txt b/read/results/playa/2201.00178.txt
new file mode 100644
index 0000000..b3c719c
--- /dev/null
+++ b/read/results/playa/2201.00178.txt
@@ -0,0 +1,1272 @@
+Draft version January 4, 2022
+Typeset using LA
+T
+EX default style in AASTeX631
+Imaging the Sun’s near-surface flows using mode-coupling analysis
+Prasad Mani ,1
+ Chris S. Hanson ,2
+ and Shravan Hanasoge 1, 2
+1
+Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India
+2
+Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE
+ABSTRACT
+The technique of normal-mode coupling is a powerful tool with which to seismically image nonaxisymmetric
+ phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to
+probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and
+Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling
+measurements to show that the resulting divergence and radial vorticity maps at supergranular length
+scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Correlation
+ Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows,
+while ≥ 0.8 is obtained for the radial vorticity.
+Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662)
+1. INTRODUCTION
+Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect
+on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun,
+behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber
+of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of
+the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be
+retrieved through meticulous inversions of these seismic measurements.
+Several important flow systems on the Sun have been inferred using various global and local helioseismic methods.
+Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through
+global mode frequency splitting (Thompson et al. 1996; Schou et al. 1998), and the resolving the neutrino problem
+(Bahcall & Pinsonneault 1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al.
+1997; Gizon et al. 2020) through time-distance helioseismology (Duvall et al. 1993), and farside imaging of active
+regions (Braun & Lindsey 2001) and their near side emergence (Birch et al. 2016), through helioseismic holography
+(Lindsey & Braun 2000). The recent discovery of various inertial waves (Gizon et al. 2021), including the equatorial
+Rossby wave (L¨optien et al. 2018), has been achieved through local helioseismic ring-diagram analysis (Hill 1988) and
+the non-helioseismic local correlation tracking (LCT, November & Simon 1988) of granulation.
+In recent years, the use of global mode-coupling helioseismology (Woodard 1989; Lavely & Ritzwoller 1992) has
+received attention, with many studies seeking to validate and demonstrate the importance of such a technique for
+investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically
+challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global
+mode-coupling has been validated through observations of the meridional flow (Vorontsov 2011; Woodard et al. 2013),
+differential rotation (Schad & Roth 2020; Kashyap et al. 2021), global-scale convection (Woodard 2014, 2016; Hanasoge
+et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Mandal & Hanasoge 2020; Mandal
+et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was
+validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and
+comparing with previous time-distance studies (Langfellner et al. 2018).
+prasad.subramanian@tifr.res.inarXiv:2201.00178v1  [astro-ph.SR]  1 Jan 2022
+  Mani et al.
+Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combination
+ of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete
+and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and
+flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation
+eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then
+reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield
+cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights
+(Woodard 2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial
+and temporal frequencies, leaving us with measurements sensitive to different quantities of interest.
+In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps
+at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is
+reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow
+and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order
+coupling (p
+2-p
+2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface.
+We compare our results with flows obtained using the Local Correlation Tracking method on solar granules.
+1.1. Forward problem
+In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a
+complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we
+denote the horizontal unit vectors e
+x and e
+y in our local Cartesian domain as pointing towards west and north on the
+solar surface, respectively, and e
+z points outwards. This approximation is valid when observing patches of the surface
+that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood
+of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the
+horizontal wavenumber qR
+ ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q
+x, q
+y )| is the vector horizontal
+wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow
+perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon
+& Rieutord 2018), permitting us to model the flow vector uu
+u = (u
+x, u
+y , u
+z ) in the Cartesian domain like so (Unno et al.
+1989; Woodard 2006)
+ uσ
+ = ∇×[∇×(P e
+z )] + ∇×(T e
+z ), (1)
+where P = P σ
+ (x) and T = T σ
+ (x) are poloidal and toroidal scalar functions, varying with position x and temporal
+frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
+perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for
+example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period
+of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of
+perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using
+vector calculus results in
+ u = −∇2
+P e
+z + ∇(∂
+z P ) + ∇
+hT ×e
+z , (2)
+where ∇
+h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the
+Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a
+function of horizontal wavenumber q and depth ze
+z . Hence the poloidal and toroidal flows are described by P
+q (z) and
+T
+q (z), respectively. Furthermore, we parametrize the flow along e
+z using basis functions f (z) (Chebyshev, B -spline,
+etc). This is expressed as
+ P ≡ P
+q (z) =
+j f
+j (z) P
+qj , T ≡ T
+q (z) =
+j f
+j (z) T
+qj . (3)
+The flow coefficients P
+qj and T
+qj , represented by the discrete indices q and j , become ideal candidates for inversions,
+where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be
+exploited to expedite inversions. Note that P
+qj = P ∗
+−qj and T
+qj = T ∗
+−qj for the flow field to be real in the spatiotemporal
+ domain.
+To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner
+Imaging near-surface flows using mode-coupling analysis 3
+φω∗
+k φω
+k+q , where k is the oscillation mode wavenumber (k
+x, k
+y ) and ω is the temporal frequency. Relate φω∗
+k φω
+k+q thus
+to the flow coefficients P
+qj and T
+qj (see eq A7)
+φω∗
+k φω
+k+q  = Hω
+kk
+nn
+j C
+qj,k P
+qj + D
+qj,k T
+qj . (4)
+The weight factor Hω
+ (see eq A8) is a function of frequency, capturing information about the extent of coupling between
+the two modes [n, k] and [n
+, k
+], where n and n
+ are the radial orders of the modes, and k = |k| and k
+ = |k
+| = |k + q|.
+The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the
+Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C
+qj,k and D
+qj,k are poloidal
+and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements
+and are derived from the solar model see Appendix A. They possess the symmetry relation: C
+qj,k = C
+−qj,−k and
+D
+qj,k = D
+−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis f
+j (z).
+1.2. Least-squares of cross-correlation
+Even though φω∗
+k φω
+k+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement,
+ known in mode-coupling literature as ’B -coefficients’, is much better designed for inversion as it reduces the
+dimension of the problem. A least-squares fit to the cross-correlation φω∗
+k φω
+k+q (see Woodard 2006, 2014, 2016) results
+in the B -coefficients B
+k,q , according to
+ B
+k,q =
+ω Hω∗
+kk
+nn φω∗
+k φω
+k+q
+
+ω |Hω
+kk
+nn |2 . (5)
+Multiplying eq 4 on both sides by Hω∗
+kk
+nn and substituting by eq 5 on the left-hand-side results in a concisely defined
+forward problem (compare with eq 4)
+ B
+k,q =
+j C
+qj,k P
+qj + D
+qj,k T
+qj . (6)
+In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω.
+Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω
+nk ,
+|ω| ∈
+ω
+nk − Γ
+nk /2, ω
+nk + Γ
+nk /2
+ or
+|ω| ∈
+ω
+n
+k
+ − Γ
+n
+k
+ /2, ω
+n
+k
+ + Γ
+n
+k
+ /2
+. (7)
+Summing over ±ω guarantees that the parity B
+k,q = B ∗
+−k,−q (see Appendix A for derivation) is obeyed, thereby
+ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain.
+Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and
+−k,
+ B∗
+−k,−q =
+j C
+−qj,−k P ∗
+−qj + D
+−qj,−k T ∗
+−qj . (8)
+Substituting parity and symmetry relations for all terms in the above results in eq 6. As B
+k,q is constructed by a
+least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a
+by-product.
+ 1.3. Noise model
+In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from
+the observed B -coefficients. For estimating the contribution from realization noise to the measurements, we make the
+following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian
+random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations.
+Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a
+random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes
+and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters
+  Mani et al.
+Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p
+1 (orange) and p
+2 (green). The shaded
+regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of
+kR
+ and ω/2π to which we have restricted ourselves in this analysis. Beyond kR
+ of 2000, it is seen that the theoretical fitting
+of mode frequencies start deviating from the observed dispersion relation for the f -mode.
+such as its amplitude, frequency and linewidth, and consequently in B
+k,q in our case. We use the same noise model
+as in H21, which was motivated by the above discussion,
+G
+k,q ≡ |B
+k,q |2
+, (9)
+where, unlike H21, we again sum over ±ω. G
+k,q is real, with the symmetry relation G
+k,q = G
+−k,−q (see Appendix A
+for explanation).
+ 2. DATA ANALYSIS
+In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the
+Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image
+is Postel pro jected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked
+at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 × 194.4 Mm2
+ in size, tracked for 24 hours
+and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number
+2197, Carrington longitude 90◦
+). This Dopplercube is considered as the physical wavefield φ(x, y; t). The Fourier-space
+wavefield φω
+k (and subsequently, the cross-correlation φω∗
+k φω
+k+q ) is obtained by computing the 3D spatial and temporal
+Fourier transform of the Dopplercube.
+The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in
+Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon
+& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination
+from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015).
+Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral
+profiles of the two modes [n, k] and [n
+, k
+] closely align in ω space. This implies that their mode frequencies should be
+sufficiently close (|ω
+nk − ω
+n
+k
+ | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
+±ω is significant only over a few linewidths (, the summation parameter; see eq 7). We have empirically found and
+tabulated δ in Table 1 for the radial order couplings n-n
+ ∈ f-f, p
+1-p
+1, and p
+2-p
+2 (the signal strength depends only
+weakly on ; we set it to 3 line widths).
+Figure 1 shows that for any two adjacent ridges (adjacent n and n
+), mode frequencies ω
+nk and ω
+n
+k become spaced
+farther apart with increasing wavenumber kR
+. It is also known that mode linewidth Γ grows with radial orders for
+a given kR
+. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
+observation set the total number of modes within a range of kR
+ (and ω/2π) that can be clearly observed, thereby
+affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually
+inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR
+ at fixed
+radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR
+ ≤ 2000 and qR
+ ≤ 300. Our
+frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz).
+Imaging near-surface flows using mode-coupling analysis 5
+Coupling kR
+ range # of δ
+modes
+f-f [400,1000] 5240 4
+[1000,1500] 7784 1.1
+[1500,2000] 10940 0.4
+p
+1-p
+1 [400,1000] 5240 4.5
+[1000,1750] 12852 2
+p
+2-p
+2 [200,1000] 5886 3
+[1000,1300] 4280 3
+Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different
+ranges of kR
+.
+ 3. INVERSION
+The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements
+B
+k,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
+leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods
+complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas
+SOLA gives better localization. For total number of modes M , RLS scales as M xJ where J is the number of basis
+functions f
+j (z) (J  M ; see eq 3 and section 3.1), whereas SOLA scales as M 2
+ (see Appendix B). For M > 5000,
+computation starts to quickly become expensive for SOLA.
+Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While
+f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is
+present even in p
+1-p
+1, and p
+2-p
+2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are
+interested in only surface flows, we leave higher order coupling to future work.
+It bears mentioning that the slopes of the ridges in the kR
+-ν spectrum (Figure 1) increase with radial order. This
+limits us to low-to-intermediate kR
+ (< 1000) for these higher radial orders if we are to remain under the acoustic cutoff
+ frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals
+from low kR
+ - too large an observation region could possibly render invalid the Cartesian geometry approximation.
+Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions
+separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich
+helioseismic technique.
+ 3.1. RLS
+For given q, the forward problem may be stated as
+ KU = B, (10)
+with the aim to minimize the misfit
+k ||KU − B||
+2, with || ||
+2 denoting the L
+2 norm. Here, K is the matrix formed
+by the sensitivity kernels: {C
+qj,k , D
+qj,k }. U is a vector composed of the flow coefficients: {P
+qj , T
+qj } and B is a vector
+composed of computed B-coefficients: {B
+k,q }. The least-squares problem is solved simultaneously for poloidal and
+toroidal flow. We use B -spline basis functions as our f
+j (z), comprising 11 knots spaced uniformly in acoustic radius,
+for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M ) and 11 basis
+functions for each poloidal and toroidal, the dimensions of K, U and B are thus M × 22, 22 × 1, and M × 1 respectively.
+Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries G
+k,q ; see eq 9; dimension
+M × M ) and pre-multiplying by K
+,
+ (K
+Λ−1
+K)U =(K
+Λ−1
+)B, (11)
+U =(K
+Λ−1
+K)−1
+K
+Λ−1
+B. (12)
+  Mani et al.
+Figure 2. Left : Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR
+ = [−112, −45],
+at the depth z
+o = −0.41 Mm. Right : L-curve for the mode qR
+ = [−112, −45]; the knee (λ = 2.48) is marked by a blue
+diamond.
+Since the least-squares problem is typically ill-posed, we restate the minimization as
+k ||KU − B||
+2 + λ||U||
+2 with
+the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution
+norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the
+data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this
+regularization makes the problem better conditioned and is now defined as
+U = (K
+Λ−1
+K + λI)−1
+K
+Λ−1
+B, (13)
+where I is the identity matrix for L
+1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
+by plotting ||U||
+2 vs ||KU − B||
+2 for different values of λ (see right panel of Figure 2), is usually chosen as the
+regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal
+flow P
+q are shown in Figure 3.
+ 4. LCT
+To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained
+from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by
+examining the advection of convective granules (1.2 Mm, qR
+ ≈ 3500; Hathaway et al. 2015) by underlying largerscale
+ flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm),
+LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation.
+Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2
+(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are obtained
+ and Postel pro jected. The horizontal flows are deduced by tracking the proper motions of granules between
+consecutive intensity images, which we denote as I
+1, I
+2. The LCT method selects a patch in two images each
+(I
+1 = I
+1e(x−x
+ij )2
+/2 sigma2
+ , I
+2 = I
+2e(x−x
+ij )2
+/2 sigma2
+ ) that observe the same granule at the grid point x
+ij = (x
+i, y
+j ).
+A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance
+moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in
+section 1.1. The two patches I
+1, I
+2 are then cross correlated for different values of position shifts ∆x,
+C
+ij (∆x, ∆y) =
+ dx I ∗
+1 (−x)I
+2(∆x − x). (14)
+The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation C
+ij is taken to be the proper motion of the granule.
+Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10
+min), the velocities are given by v
+x = ∆x/∆t and v
+y = ∆y/∆t. This exercise is repeated for all grid points in the
+images I
+1, I
+2 and for each consecutive pair of images in the cube.
+In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing v
+x and v
+y . FLCT
+requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the
+Imaging near-surface flows using mode-coupling analysis 7
+Figure 3. Top : Inverted poloidal flow power-spectrum for the three couplings f-f, p
+1-p
+1, and p
+2-p
+2 as a function of q
+xR
+ and
+q
+y R
+. Bottom : Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the
+mean. Total power appears to increase through the radial orders. Power is in units of m2
+/s4
+.
+dominant length scale of the velocity field in the images. The Postel-pro jected intensity images are fed as input to the
+FLCT code. v
+x and v
+y are then computed for consecutive pairs of images and are averaged over the entire day.
+5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY
+For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by
+substituting P and T from eq 3 into eq 2 as below uu
+
+u(q, z) = −∇2
+P e
+z + ∇(∂
+z P ) + ∇
+hT ×e
+z ,
+= −(0, 0, ∂ 2
+x P + ∂ 2
+y P + ∂ 2
+z P ) + (∂
+x∂
+z P, ∂
+y ∂
+z P, ∂ 2
+z P ) + (∂
+y T , −∂
+xT , 0). (15)
+Setting ∂ 2
+x + ∂ 2
+y = q2
+, div is given by,
+ ∇
+h · uu
+u(q, z) = q2
+∂
+z P, (16)
+and curl is given by,
+
+∇ × uu
+u(q, z)
+z = q2
+T . (17)
+We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The
+essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR
+ of
+interest (see Figure 4), and subsequently convert it to real space.
+We seek to show comparisons (see Figures 5, 6, and 7) for qR
+ = 100, 150, 200 and 250. To sufficiently delineate
+flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The
+Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to
+obtain a real-space steady-flow map.
+  Mani et al.
+Figure 4. Left: Divergence-flow power spectrum |div|2
+, from eqn 16, obtained from inversion using all the couplings. The
+power-spectrum is then filtered with a bandpass centered around qR
+ = 150 (middle panel). The resulting spectra is shown in
+the right panel. The units of |div|2
+ are in s−2
+. For illustration, we show the action of the filter on the power-spectrum |div|2
+since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter.
+For LCT, we first apply a Gaussian smoothing to v
+x and v
+y to average over small-scale features; the extent of
+smoothing depends on the length scale qR
+ to be compared with mode-coupling. div and curl are then simply
+computed by
+ div = ∂
+xv
+x + ∂
+y v
+y , (18)
+curl = ∂
+xv
+y − ∂
+y v
+x. (19)
+We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling,
+and transform back to real space.
+Condensing all of the above, the following sequence of operations to compare flows at desired length scales are
+performed for mode-coupling (M-C) and for LCT M-C
+ : φ(x, y; t) 3D FFT
+=====⇒ φω
+k , B
+k,q inversion
+======⇒ P, T ∇
+h·
+===⇒
+∇× eqns 16, 17 Filter,
+=====⇒
+2D FFT div, curl
+LCT : I
+1, I
+2 FLCT
+====⇒ v
+x, v
+y smooth,
+======⇒
+∇
+h· ∇× eqns 18, 19 2D FFT,
+======⇒
+Filter Filtered,
+Fourier-space
+flows 2D FFT
+=====⇒ div, curl
+6. RESULTS
+Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5,
+where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from
+the two methods near supergranular scale (qR
+ ≈ 100). Near-surface flows are imaged most faithfully when all the
+couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of
+vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between
+the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence
+flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to
+insufficient modes for the p
+2-p
+2 case (see Table 1), we are unable to infer vortical flows with conviction other than near
+the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished
+through mode-coupling helioseismology - using f-f or p
+1-p
+1 alone to seismically infer near-surface divergence and vortical
+flows at different scales (qR
+ = 100, 150) can yield extremely good agreement with LCT. As the length scale of the
+inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases.
+An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to
+comment substantively on the flows at these scales.
+6.1. Amplitudes of mode-coupling flows
+Imaging near-surface flows using mode-coupling analysis 9
+(a) qR
+ = 100, f-f + p
+1-p
+1 + p
+2-p
+2
+Figure 5. Real-space divergence flows (left column, in units of 10−5
+s−1
+) and radial vorticity (right column, in units of 10−6
+s−1
+)
+for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around
+qR
+ = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges
+out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter
+plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum
+values.
+For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated
+numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward
+a precise statement on them. H21 reported a 60% greater amplitude for p
+1-p
+1 over f-f coupling (Figure 3 reflects a
+similar conclusion), another element to consider when combining different radial orders. The choice of regularization
+(see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow
+amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages.
+This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a
+radial grid along which kernels and flows tend to be described.
+Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors:
+• Coupling(s) used,
+• Regularization parameter in the inversion,
+• Smoothing applied to LCT flows (indirectly; see below paragraph),
+• The depth at which flows are inferred.
+Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close
+to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR
+, we first fix the coupling(s)
+and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and
+  Mani et al.
+(a) qR
+ = 100, f-f (b) qR
+ = 150, p
+1-p
+1
+Figure 6. Real-space divergence flows (left column, in units of 10−5
+s−1
+) and radial vorticity (right column, in units of 10−6
+s−1
+)
+for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around
+qR
+ = 100, and using (b) p
+1-p
+1 coupling (bottom row), bandpass filtered around qR
+ = 150. We cut edges out from the flow
+maps and compare a circular region of diameter ≈175 Mm.
+(a) qR
+ = 200, f-f + p
+1-p
+1 + p
+2-p
+2 (b) qR
+ = 250, f-f + p
+1-p
+1 + p
+2-p
+2
+Figure 7. Real-space divergence flows (left column, in units of 10−5
+s−1
+) and radial vorticity (right column, in units of 10−6
+s−1
+)
+for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around
+(a) qR
+ = 200, and (b) qR
+ = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm.
+vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained
+from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation
+(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired
+qR
+.
+It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line-of-sight velocity from Dopplergrams
+and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes
+for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history
+(see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda
+& ˇ
+Svanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al.
+Imaging near-surface flows using mode-coupling analysis 11
+Coupling qR
+ div curl
+f-f 100 0.97 0.87
++ p
+1-p
+1 150 0.95 0.76
++ p
+2-p
+2 200 0.92 0.76
+250 0.85 0.65
+f-f 100 0.96 0.85
+150 0.93 0.76
+200 0.89 0.69
+250 0.77 0.58
+p
+1-p
+1 100 0.95 0.83
+150 0.95 0.75
+200 0.92 0.75
+250 0.85 0.61
+p
+2-p
+2 100 0.94 0.7
+150 0.91 0.39
+200 0.79 0.3
+250 0.55 0.3
+Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images,
+respectively.
+(2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes
+from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see
+Verma et al. 2013; L¨optien et al. 2016). Even for the case of supergranulation divergence maps obtained through
+ring-diagram helioseismology, Greer et al. (2016) only report normalized amplitudes.
+In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used
+with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built
+using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing
+of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve
+signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic
+applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013;
+Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves.
+APPENDIX
+A. DERIVATION OF THE FORWARD MODEL
+As described in section 1.1, we seek to describe the flow u as a function of q along e
+z . To that end, substituting
+eq 3 into eq 2,
+ uσ
+q (z) =
+j
+q2
+ f
+j e
+z + iq f
+j
+ P σ
+jq + iq×e
+z f
+j T σ
+jq . (A1)
+For flows in the anelastic limit (u  speed of sound), we can denote the flow perturbation operator as δLσ
+ =
+−2iωρuσ
+ · ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
+δLσ
+q = −2i ω ρ (i uσ
+q · k + uσ
+q · e
+z ∂
+z ), (A2)
+= −2i ωρ
+j
+−k · q f
+j P σ
+jq − k · (q×e
+z ) f
+j T σ
+jq + q2
+ f
+j P σ
+jq ∂
+z
+ . (A3)
+  Mani et al.
+Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006)
+ξ
+k ≡ ξ
+nk (z) = i ˆ
+kH
+nk (z)e
+z + ˆzV
+nk (z), (A4)
+where H and V are real-valued functions; n and n
+ are dropped for compactness of notation. Then the coupling of
+two modes ξ
+k and ξ
+k (k
+ = k + q), by the flow perturbation operator δLσ
+q , denoted by coupling integral Λk
+k (σ), is
+given by
+ Λk
+k (σ) ≡
+ dx (δLσ
+q ξ
+k ) · ξ∗
+k =
+ dx
+ − 2i ωρ
+j
+q2
+ f
+j P σ
+jq ( ˆ
+k · ˆ
+k
+ H
+k H ∗
+k + V
+k V ∗
+k )
+−
+k · q f
+j P σ
+jq + k · (q×e
+z ) f
+j T σ
+jq
+ ( ˆ
+k · ˆ
+k
+ H
+k H ∗
+k + V
+k V ∗
+k )
+ (A5)
+We desire to linearly relate the coupling integral in the above equation to the flows P and T , through poloidal and
+toroidal sensitivity kernels, C
+qj,k and D
+qj,k respectively. Hence, they are given by
+C
+qj,k =
+ dz ρ
+q2
+ f
+j ( ˆ
+k · ˆ
+k
+ H
+k H ∗
+k + V
+k V ∗
+k )
+−k · q f
+j ( ˆ
+k · ˆ
+k
+ H
+k H ∗
+k + V
+k V ∗
+k )
+ ,
+D
+qj,k = k · (q×e
+z )
+ dz ρ f
+j ( ˆ
+k · ˆ
+k
+ H
+k H ∗
+k + V
+k V ∗
+k ). (A6)
+Note the symmetry C
+qj,k = C
+−qj,−k and D
+qj,k = D
+−qj,−k . This coupling integral contributes to the cross-spectral
+measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on
+wavefield cross-correlation as
+ φω∗
+k φω+σ
+k+q  = H ω
+kk
+σ Λk
+k (σ), (A7)
+where the function H is given by
+ Hω
+kk
+σ = −2i ω(N
+k |Rω
+k |2
+ Rω+σ
+k + N
+k
+ |Rω+σ
+k |2
+ Rω∗
+k ). (A8)
+We absorb the factor −2i ω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4.
+The mode spectral profile R is a Lorentzian, given by
+Rω
+k = 1
+ω2
+nk − ω2
+ − iωγ
+nk /2 , (A9)
+where ω
+nk is the resonant frequency of the mode, and γ
+nk is the mode linewidth. Eq A9 can be derived by introducing
+mode damping −i ωγ ρ as an operator in the differential equation that governs undamped, driven oscillations (see eq
+5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation.
+Also, the parity Hω
+kk
+σ = H−ω∗
+kk
+−σ and Rω
+k = R−ω∗
+k are established. Mode normalization N is given by
+N
+k = 1
+Q Q
+
+k
+ω |φω
+k |2
+
+ω Rω
+k , (A10)
+where the 1
+Q Q
+
+k on the right-hand-side implies average over all [k
+x, k
+y ] (Q terms in all) such that k = |k| is constant.
+This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω
+nk .
+Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real.
+The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve
+to establish the parity Bσ
+k,q = B∗−σ
+−k,−q . This allows for obtaining P σ
+q = P ∗−σ
+−q , and subsequently, purely real flow in
+the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into
+the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ
+k,q = G−σ
+−k,−q .
+Imaging near-surface flows using mode-coupling analysis 13
+B. SOLA INVERSIONS
+Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors
+for the mode q and depth z
+o, which we will call α
+k,zo. A linear weighted sum of the measurements B
+k,q in the fashion
+
+k α
+k,zoB
+k,q allows for an average value of the flow P
+q (z) to be estimated at the depth z
+o. To obtain the coefficients
+α
+k,zo, it is assumed that a set of sensitivity kernels K
+k,q (z) for the mode q can be summed up coherently to give an
+’averaging kernel’ that is localized at the depth z
+o. Conventionally, a Gaussian centered at z
+o and a width ∆ is chosen
+which the averaging kernel should resemble after performing inversion.
+B.1. Kernels in the integral form
+Since the kernels in eq A6 are manifest as coefficients on a basis f
+j (z), we first derive kernels that can be expressed
+as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions:
+P ≡ P
+q (z), p ≡ P
+qj , F ≡ f
+j (z), B ≡ B
+k,q C ≡ C
+qj,k and K ≡ K
+k,q (z), we write (assume only poloidal flow for
+simplicity, the same derivations hold true for toroidal flow as well)
+P = F p (B11)
+The size of P is thus the same as the length of the radial grid z.
+Now, pre-multiply by F T
+ and integrate over z on both sides (drop the integral notation for compactness),
+F T
+ P = (F T
+ F )p
+p = (F T
+ F )−1
+ F T
+ P (B12)
+Now, substituting eq B12 into the forward problem eq 6,
+B = C p
+= (F T
+ F )−1
+F T
+ C P
+= K P (B13)
+where
+ K = (F T
+ F )−1
+F T
+ C,
+i.e., K
+k,q (z) =
+j,j
+ dz f
+j (z)f
+j
+ (z)
+−1
+f
+j
+ (z)C
+qj
+,k (B14)
+B.2. Obtaining the coefficients α
+Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z
+o
+T (z, z
+o) = 1
+√
+2π∆2 exp
+ z − z
+o
+2∆2
+. (B15)
+This can be achieved by solving the optimization problem
+minimize X =
+ dz
+T (z, z
+o) − Θ
+q (z, z
+o)
+2
+, (B16)
+where we introduce the averaging kernel for mode q thus
+Θ
+q (z, z
+o) =
+k α
+k,zoK
+k,q (z). (B17)
+As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13
+and B14.
+  Mani et al.
+Figure 8. Left : Kernel K
+k,q (z) (eq B14) shown vs depth z for the three radial order couplings f-f, p
+1-p
+1, and p
+2-p
+2. qR
+ =
+[−112, −45] and kR
+ = [−853, −157] is chosen for all the radial order couplings for comparison. Right : Averaging kernel
+(eq B17) using SOLA, for qR
+ = [−112, −45] at depth z
+0 = −0.48 Mm, and the corresponding target Gaussian (eq B15).
+Integral of the averaging kernel over z is 0.89.
+Setting ∂X
+∂α → 0 gives us the matrix problem to be solved
+A{α} = v,
+{α} =
+A + µI
+−1
+v, (B18)
+where the square matrix A =
+ dz K
+k,q (z)K
+k
+,q (z) and v =
+ dz K
+k,q (z)T (z, z
+o). Here, k
+ is just a dummy index for
+denoting elements in the matrix A, (k
+ = k + q). In the last line of eq B18, we introduce regularization using an Identity
+matrix I , with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining
+α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α
+obtained from eq B18 into last line of eq B13, and
+k on both sides
+
+k α
+k,z
+o B σ
+k,q =
+k α
+k,z
+o
+ dz K
+k,q (z)P σ
+q (z),
+=
+ dz Θ
+q (z, z
+o)P σ
+q (z),
+≈ P σ
+q (z
+o) (B19)
+Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Divergence
+ flow can then be obtained from eq 16. Results are shown in Figures 9 and 10.
+REFERENCES
+Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M.
+1990, ApJ, 364, 699, doi: 10.1086/169452
+Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of
+Modern Physics, 64, 885,
+doi: 10.1103/RevModPhys.64.885
+Birch, A. C., Schunker, H., Braun, D. C., et al. 2016,
+Science Advances, 2, e1600557,
+doi: 10.1126/sciadv.1600557
+Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019,
+A&A, 628, A37, doi: 10.1051/0004-6361/201935591 B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
+Schou, J. 2020, A&A, 635, A181,
+doi: 10.1051/0004- 6361/201937331
+Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189,
+doi: 10.1086/324323
+Christensen-Dalsgaard, J. 2002, Reviews of Modern
+Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073
+—. 2021, Living Reviews in Solar Physics, 18, 2,
+doi: 10.1007/s41116- 020-00028- 3
+Imaging near-surface flows using mode-coupling analysis 15
+Figure 9. Left : Poloidal flow power-spectrum for f-f as a function of q
+xR
+ and q
+y R
+. Right : Corresponding power-spectrum
+averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2
+/s4
+.
+Figure 10. Real-space divergence flows (in units of 10−5
+s−1
+) for mode-coupling inversion through SOLA using f-f coupling,
+and LCT, bandpass filtered around qR
+ = 100. We cut edges out from the flow maps and compare a circular region of diameter
+≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is
+1.05. For demonstration, we show inversions only for poloidal flow using SOLA.
+De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh,
+192, 351, doi: 10.1023/A:1005269001739
+De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242,
+doi: 10.1086/424920
+Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced
+Study Institute (ASI) Series C, Vol. 169, Seismology of
+the Sun and the Distant Stars, ed. D. O. Gough, 105–116
+Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., &
+Pomerantz, M. A. 1993, Nature, 362, 430,
+doi: 10.1038/362430a0
+Fisher, G. H., & Welsch, B. T. 2008, in Astronomical
+Society of the Pacific Conference Series, Vol. 383,
+Subsurface and Atmospheric Influences on Solar Activity,
+ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, &
+G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289
+Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S.
+1997, Nature, 390, 52, doi: 10.1038/36294
+Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472,
+doi: 10.1086/423367
+Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020,
+Science, 368, 1469, doi: 10.1126/science.aaz7119 Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A,
+652, L6, doi: 10.1051/0004- 6361/202141462
+Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ,
+824, 128, doi: 10.3847/0004- 637X/824/2/128
+Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32,
+doi: 10.3847/2041- 8213/aaff60
+Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020,
+Science Advances, 6, eaba9639,
+doi: 10.1126/sciadv.aba9639
+Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., &
+Sreenivasan, K. R. 2017, MNRAS, 470, 1404,
+doi: 10.1093/mnras/stx1298
+Hansen, P. C. 1992, SIAM review, 34, 561
+Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., &
+Sreenivasan, K. R. 2020, A&A, 644, A103,
+doi: 10.1051/0004- 6361/202039108
+Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021,
+ApJ, 910, 156, doi: 10.3847/1538-4357/abe770
+Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I.
+2015, ApJ, 811, 105, doi: 10.1088/0004- 637X/811/2/105
+  Mani et al.
+Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science,
+342, 1217, doi: 10.1126/science.1244682
+Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807
+Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard,
+M. F., & Tromp, J. 2021, ApJS, 253, 47,
+doi: 10.3847/1538- 4365/abdf5e
+Korda, D., & ˇ
+Svanda, M. 2021, A&A, 646, A184,
+doi: 10.1051/0004- 6361/202039928
+Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617,
+A97, doi: 10.1051/0004-6361/201732471
+Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581,
+A67, doi: 10.1051/0004-6361/201526024
+Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical
+Transactions of the Royal Society of London Series A,
+339, 431, doi: 10.1098/rsta.1992.0048
+Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261,
+doi: 10.1023/A:1005227200911
+L¨optien, B., Birch, A. C., Duvall, T. L., Gizon, L., &
+Schou, J. 2016, A&A, 587, A9,
+doi: 10.1051/0004- 6361/201526805
+L¨optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature
+Astronomy, 2, 568, doi: 10.1038/s41550-018- 0460-x
+Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125,
+doi: 10.3847/1538- 4357/ab7227
+Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652,
+A96, doi: 10.1051/0004-6361/202141044
+Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139,
+doi: 10.3847/1538- 4357/abb133
+—. 2021, ApJ, 920, 36, doi: 10.3847/1538- 4357/ac1ad6
+November, L. J., & Simon, G. W. 1988, ApJ, 333, 427,
+doi: 10.1086/166758
+Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231
+Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚
+A., &
+Stein, R. 2001, A&A, 377, L14,
+doi: 10.1051/0004- 6361:20011160  Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar
+Physics, 15, 6, doi: 10.1007/s41116- 018-0013- 5
+Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord,
+M. 2017, A&A, 599, A69,
+doi: 10.1051/0004- 6361/201629747
+Schad, A., & Roth, M. 2020, ApJ, 890, 32,
+doi: 10.3847/1538- 4357/ab65ec
+Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh,
+275, 207, doi: 10.1007/s11207- 011-9834- 2
+Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505,
+390, doi: 10.1086/306146
+Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed.
+J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389
+Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ,
+59, S637, doi: 10.1093/pasj/59.sp3.S637
+Snodgrass, H. B. 1984, SoPh, 94, 13,
+doi: 10.1007/BF00154804
+Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996,
+Science, 272, 1300, doi: 10.1126/science.272.5266.1300
+Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H.
+1989, Nonradial oscillations of stars
+Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555,
+A136, doi: 10.1051/0004-6361/201321628
+Vorontsov, S. V. 2011, MNRAS, 418, 1146,
+doi: 10.1111/j.1365- 2966.2011.19564.x
+Woodard, M. 2014, SoPh, 289, 1085,
+doi: 10.1007/s11207- 013-0386- 5
+Woodard, M., Schou, J., Birch, A. C., & Larson, T. P.
+2013, SoPh, 287, 129, doi: 10.1007/s11207- 012-0075- 9
+Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206
+—. 2006, ApJ, 649, 1140, doi: 10.1086/506927
+—. 2007, ApJ, 668, 1189, doi: 10.1086/521391
+—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223
+Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007,
+ApJ, 659, 848, doi: 10.1086/512009
+Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G.,
+& Duvall, T. L., J. 2012, ApJL, 749, L5,
+doi: 10.1088/2041- 8205/749/1/L5
\ No newline at end of file
diff --git a/read/results/playa/2201.00200.txt b/read/results/playa/2201.00200.txt
new file mode 100644
index 0000000..5c78e5f
--- /dev/null
+++ b/read/results/playa/2201.00200.txt
@@ -0,0 +1,736 @@
+Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected  © ESO 2022
+January 4, 2022
+Local heating due to convective overshooting and the solar
+modelling problem
+I. Baraffe1,2
+, T. Constantino1
+, J. Clarke1
+, A. Le Saux1,2
+, T. Goffrey4
+, T. Guillet1
+, J. Pratt3
+, D. G. Vlaykov1
+1
+ University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk)
+2
+ ´
+Ecole Normale Sup´
+erieure, Lyon, CRAL (UMR CNRS 5574), Universit´
+e de Lyon, France
+3
+ Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA
+4
+ Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK
+ABSTRACT
+Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary
+of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in onedimensional
+ stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a
+solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a
+local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in
+the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun
+inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the
+speed of sound. These effects could help reduce the discrepancies between solar models and observed constraints based on seismic
+inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective
+penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach.
+Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior
+1. Introduction
+Modelling the internal structure of the Sun is still a challenge.
+A recent review by Christensen-Dalsgaard (2021) describes in
+detail the long-standing efforts to improve solar models. The solar
+ modelling problem refers to the discrepancy between helioseismology
+ and solar interior models that adopt low metallicities
+ predicted by the three-dimensional (3D) atmosphere models
+of, for example, Asplund et al. (2009) and Caffau et al. (2011),
+in contrast to the high metallicities based on previous literature
+ compilations by, for example, Anders & Grevesse (1989)
+and Grevesse & Noels (1993). Asplund et al. (2021) have recently
+ confirmed with state-of-the-art 3D simulations the relatively
+ low metal abundances for the Sun. Asplund et al. (2021)
+consider that their study yields the most reliable solar abundances
+ available today, suggesting that the solar modelling problem
+ is no longer a problem of abundances but rather a problem
+of stellar physics. The treatment of mixing below the convective
+zone is one of the key processes that could improve solar models.
+ Several studies indeed reveal that the process of convective
+penetration, also called overshooting, at the bottom of the convective
+ envelope could play an important role in improving the
+agreement between solar models and helioseismic constraints
+(see for example Christensen-Dalsgaard et al. 2011; Zhang et al.
+2012; Buldgen et al. 2019b). Overshooting in solar models has
+most often been treated using diffusive or instantaneous chemical
+ mixing. A temperature gradient that sharply transitions from
+a nearly adiabatic form to a radiative form is usually assumed,
+as suggested by the theoretical work of Zahn (1991). Models
+with a smoother transition have also been investigated. Based
+on the analysis of models with different stratifications near the
+Send offprint requests to: I. Baraffe base of the convective zone, Christensen-Dalsgaard et al. (2011)
+found that models that better fit the helioseismic data have a
+weakly sub-adiabatic temperature gradient in the lower part of
+the convective zone and a smooth transition to the radiative gradient
+ in the overshooting layer. But Christensen-Dalsgaard et al.
+(2011) noted that the required temperature stratification is difficult
+ to reconcile with existing overshooting models and numerical
+ simulations. They concluded that only non-local turbulent
+convection models could produce the desired degree of smoothness
+ in the transition (see for example Zhang & Li 2012; Zhang
+et al. 2012). But these non-local models remain uncertain, and
+their description of overshooting under the conditions found at
+the base of the solar convective zone is yet to be validated.
+Zhang et al. (2019) explored the impact of overshooting by
+introducing a parametrised turbulent kinetic energy flux based
+on a model with parameters that are adjusted to improve the
+helioseismic properties. They suggest that amelioration can be
+obtained specifically below the convective envelope. However,
+Zhang et al. (2019) find that this model cannot solve the whole
+solar problem because such a flux worsens the sound-speed profile
+ in the deep radiative interior of their solar model. Given the
+uncertainties regarding the temperature stratification of the overshooting
+ region, solar modellers have considered these effects as
+secondary and have focused their efforts on exploring the impact
+of solar abundances, microphysics (opacities, equations of state,
+nuclear reaction rates), and chemical mixing and diffusion (see
+details and references in the review of Buldgen et al. 2019a).
+Additional, more exotic effects such as early disk accretion or
+solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot
+2021) are also attracting increasing attention.
+To reinvigorate the debate, Buldgen et al. (2019b) recently
+highlighted once again how the transition of the temperature gra1arXiv:2201.00200v1
+  [astro-ph.SR]  1 Jan 2022
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+dient just below the convective envelope can significantly impact
+the disagreement between solar models and helioseismic constraints.
+ Their results, based on a method that combines multiple
+ structural inversions, suggest that the transition in temperature
+ gradient is improperly reproduced by adopting either an
+adiabatic or a radiative temperature gradient in the overshooting
+ layer. The solution should be somewhere in between these
+two extremes. Christensen-Dalsgaard et al. (2018) also note that
+an increase in the temperature at the transition would remove
+a remaining small sharp dip in the speed of sound immediately
+beneath the convective zone of the model. A major difficulty is
+to disentangle the effects of overshoot from the effects of opacities,
+ which can also alter the temperature gradient in these layers.
+Given the large number of parameters to deal with in order to improve
+ solar models and the current lack of strong arguments in
+favour of modifying the thermal stratification in the overshooting
+ layer, there has been no real motivation to deviate from the
+traditional picture of a sharp transition as formalised by Zahn
+(1991).
+The present work is motivated by arguments inspired by hydrodynamical
+ simulations of convection and convective penetration
+ in solar-like models. Recent hydrodynamical simulations by
+Baraffe et al. (2021, hereafter B21) highlight the process of local
+heating in the overshooting region due to penetrating convective
+motions across the convective boundary. In the following, we
+analyse the potential impact of this feature on one-dimensional
+(1D) stellar evolution structures in the context of solar models.
+The hydrodynamical results of B21 are briefly summarised in
+Sect. 2, and their impact on 1D models are analysed in Sect. 3
+and discussed in Sect. 4.
+2. Modification of the thermal background in the
+overshooting layer: Results from
+two-dimensional hydrodynamical simulations
+B21 performed two-dimensional (2D) fully compressible timeimplicit
+ simulations of convection and convective penetration in
+a solar-like model with the MUlti-dimensional Stellar Implicit
+Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017).
+The main motivation was to explore the impact of an artificial
+increase in the stellar luminosity on the properties of convection
+and convective penetration. This procedure is a common tactic
+adopted in hydrodynamical simulations of convection (Rogers
+et al. 2006; Meakin & Arnett 2007; Brun et al. 2011; Hotta 2017;
+Edelmann et al. 2019). The experiments of B21 highlight the impact
+ of penetrative downflows on the local thermal background
+in the overshooting layer. They illustrate how convective downflows,
+ when penetrating the region below the convective boundary
+ of the envelope, can induce a local heating and a modification
+of the temperature gradient as a result of compression and shear
+in the overshooting layer. This modification of the local background
+ is connected to a local increase in the radiative flux to
+counterbalance the negative enthalpy flux (or heat flux) produced
+by penetrating flows. The negative peak of the enthalpy flux
+and the positive bump of the radiative flux below the convective
+boundary are well-known features described in many numerical
+ works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell
+et al. 2002; Brun et al. 2011; Hotta 2017; K ¨
+apyl ¨
+a 2019; Cai
+2020). A few works (Rogers et al. 2006; Viallet et al. 2013; Korre
+et al. 2019; Higl et al. 2021) have also reported a modification
+of the local thermal background in the overshooting region, but
+without providing a detailed description. The simulations of B21
+provide a physical explanation that links the convective penetra- tion process to the local heating and to the radiative bump in the
+overshooting layer. The solar-like star simulated in B21 is based
+on a model that is not thermally relaxed. It is reasonable to assume
+ that the local heating seen in B21 is present in stars because
+the negative heat flux in the overshooting layer and the bump in
+the radiative flux that compensates for this feature are persistent.
+These two features are also commonly observed in other hydrodynamical
+ simulations, as mentioned above. An exploration of
+the impact of this heating on stellar evolution models may reveal
+that heating is a necessary aspect of models for overshooting.
+Fig. 1. Radial profile of the temperature departure ∆T /T
+0 from
+the initial profile T
+0 and of the sub-adiabaticity (∇ − ∇
+ad) close to
+the convective boundary predicted by 2D hydrodynamical simulations
+ (B21) of solar-like models. The lower panel corresponds
+to the model with a realistic stellar luminosity and the upper
+panel to a model with luminosity enhanced by a factor of ten.
+The dash-dotted red lines show ∆T /T
+0 (in %), the relative difference
+ between the time and space averages of the temperature,
+T , and the initial temperature, T
+0. The solid blue lines show the
+time and space averages of the sub-adiabaticity (∇ − ∇
+ad). The
+dashed black lines show the initial profile of the sub-adiabaticity,
+(∇ − ∇
+ad)
+init . The convective boundary is indicated by the vertical
+solid line (see details in B21)
+The behaviour of the thermal profile below the convective
+boundary found in the simulations of B21 is illustrated in Fig.
+1. It is displayed for the model with a realistic stellar luminosity
+(lower panel). We also show the results for a model with an artificial
+ enhancement in the luminosity by a factor of ten because the
+features are intensified in these ‘boosted’ models (upper panel).
+The figure shows the local heating in the overshooting layer and
+its impact on the sub-adiabaticity (∇ − ∇
+ad), with ∇ = d log T
+d log P the
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+temperature gradient and ∇
+ad = d log T
+d log P |
+S the adiabatic gradient.
+The initial stratification below the convective boundary (located
+at r = 0.6734 × R
+star for this specific stellar model) is set by
+the stable radiative gradient, ∇
+rad (see the dashed black line below
+ the convective boundary in Fig. 1). B21 show that, as a result
+ of the local heating below the convective boundary characterised
+ by the bump in temperature difference ∆T /T
+0 displayed
+in Fig. 1, the temperature gradient becomes less sub-adiabatic
+immediately below the convective boundary1
+. The net result is
+a smoother transition just below the convective boundary with
+a temperature gradient that has an intermediate value between
+the radiative temperature gradient and the adiabatic one. In the
+next section we analyse the impact of this local heating on 1D
+solar structures by adopting a simple prescription that mimics
+the behaviour of the temperature gradient suggested by hydrodynamical
+ simulations.
+3. Impact on one-dimensional solar structure
+models
+3.1. Helioseismic constraints
+Our primary goal in this short paper is to illustrate the potential,
+qualitative impact of the local heating produced by overshooting.
+ We adopted a strategy inspired by the analysis of Buldgen
+et al. (2020), who constructed a static structure of the Sun in
+agreement with seismic inversions of the Ledoux discriminant
+defined by
+A = 1
+Γ
+1 d ln P
+d ln r − d ln ρ
+d ln r ,  (1)
+with Γ
+1 = (∂ ln P/∂ ln ρ)
+ad. Starting from a reference evolutionary
+ model, Buldgen et al. (2020) used an inversion procedure
+ to iteratively reconstruct a solar model. Successive inversions
+ of the Ledoux discriminant allowed them to obtain a
+model-independent profile for this quantity. Their reconstruction
+method also gives solar structures that are in excellent agreement
+ with other structural inversions, namely the entropy, S , the
+square of the speed of sound, c2
+s , and the density, ρ. To illustrate
+the convergence of their reconstruction procedure, they show
+(right panels of their Figs. 3-6) the successive iterations that converge
+ to an excellent level of agreement for the four structural
+inversions (A, S , c2
+s , ρ) starting from the initial reference model
+adopted in their work. The differences found between the reconstructed
+ model and the reference model are useful as they indicate
+ the modifications of the reference model that are required to
+converge towards a solar model in agreement with helioseismic
+data. We recall here the major trends found by Buldgen et al.
+(2020) for the four structural quantities, which are used for our
+analysis in Sect. 3.2.
+The first concerns the Ledoux discriminant. The major discrepancy
+ between the Sun and the reference model occurs just
+below the convective boundary, with a large positive bump for
+the quantity (A
+Sun - A
+ref ).
+The second concerns the speed of sound. The same positive
+bump at the same location as for the Ledoux discriminant, A, is
+observed for the quantity (c2
+s,Sun − c2
+s,ref )/c2
+s,ref . The corrections
+applied to A during the reconstruction procedure also reduce the
+discrepancy in the speed of sound in the radiative region.
+The third concerns the entropy. Large discrepancies are observed
+ in both the radiative region and the convective zone. The
+1
+ Less sub-adiabatic means that |∇ − ∇
+ad| decreases compared to the
+initial profile.  entropy discrepancy (S
+ Sun − S
+ ref )/S
+ ref has two positive peaks in
+the radiative zone, one just below the overshooting region and a
+larger peak deeper at ∼ 40% of the stellar radius. This discrepancy
+ is negative in the convective zone. The corrections applied
+to A help reduce these entropy discrepancies in both regions.
+The fourth concerns the density. The quantity (ρ
+Sun −
+ρ
+ref )/ρ
+ref has a negative peak in the radiative region, at ∼ 35%
+of the stellar radius, and is positive in the convective zone.
+Importantly, Buldgen et al. (2020) mention that their reconstruction
+ procedure gives similar Ledoux discriminant profiles
+for a wide range of initial reference models. We used these results
+ to gauge whether the modifications of the thermal profile
+predicted by B21 can help in qualitatively improving all the
+structural quantities used by Buldgen et al. (2020).
+3.2. Testing one-dimensional solar models
+Our main motivation is to show the potential impact of the local
+heating described in Sect. 2 on stellar models. We are not aiming
+ in this short work at constructing the best solar model to fit
+helioseismic constraints. Using stellar evolution codes, we have
+adopted two different methods that can be found in the literature
+ to construct solar models (e.g. Zhang et al. 2012; Vinyoles
+et al. 2017). Our first method relies on the thermal relaxation
+of a reference model with solar radius and luminosity that is
+modified to reproduce the temperature gradient in the overshooting
+ layer suggested by hydrodynamical simulations. In this case,
+the chemical abundances are not modified by nuclear reactions,
+mixing, or microscopic diffusion during the relaxation process.
+For these tests, we used the 1D Lyon stellar evolution code
+(Baraffe et al. 1998). We repeated this experiment based on thermal
+ relaxation with the stellar evolution code MONSTAR (e.g.
+Constantino et al. 2014) and obtained the same qualitative results.
+
+The second method considers models that account for the
+modification of the temperature gradient in the overshooting
+layer from the zero age main sequence (ZAMS). The models
+are then evolved until they reach the solar radius and luminosity.
+With this approach, changes in the chemical abundances from
+nuclear reactions, microscopic diffusion, and overshooting mixing
+ are also consistent with any modification of the structure
+induced by the forced local heating in the overshooting layer.
+These tests were performed with MONSTAR as it includes the
+treatment of microscopic diffusion.
+The first method allows the impact of local heating in
+the overshooting layer after thermal relaxation to be isolated.
+The second method provides evolutionary models that are selfconsistent
+ since the effect of the modification of the temperature
+gradient is accounted for during their evolution on the main sequence.
+
+In the following, we adopt a modification of the local temperature
+ gradient in the overshooting layer that qualitatively reproduces
+ the behaviour displayed in Fig. 1. We define an overshooting
+ length d
+ov = α
+ov H
+P,CB, with H
+P,CB the pressure scale height
+at the convective boundary and α
+ov a free parameter. We also define
+ two radial locations, r
+ov = r
+CB − d
+ov and r
+mid = r
+CB − d
+ov/2,
+with r
+CB the radial location of the convective boundary. The temperature
+ gradient is modified as follows. For r
+mid ≤ r < r
+CB, we
+use
+∇ = g(r)∇
+ad + (1 − g(r))∇
+rad, (2)
+with
+g(r) = sin{[(r − r
+mid)/(r
+CB − r
+mid)]a
+ × π/2}. (3)
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+For r
+ov ≤ r < r
+mid, we use
+∇ = ∇
+rad − h(r)∇
+ad,  (4)
+with
+h(r) = b × sin{[(r
+mid − r)/(r
+mid − r
+ov)] × π}. (5)
+Sine functions are used in Eqs. (3) and (5) to reproduce the
+smooth variations in the temperature gradient below the convective
+ boundary produced by the hydrodynamical simulations. We
+have verified that the results are insensitive to the smoothness of
+these variations and to the exact shape of the temperature gradient
+ radial profile.We adopted a=0.3 in Eq. (3) as it provides a
+behaviour for the temperature gradient very close to the one displayed
+ in Fig. 1. Results are rather insensitive to variations in the
+values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5),
+which also provides a close visual match to the hydrodynamical
+results, but we note that the results are insensitive to the value of
+b.
+3.2.1. Thermal equilibrium models
+The details of the procedure for the first method are the following.
+ We calculate the evolution of a 1 M
+ model with an initial
+helium mass fraction of 0.28, metallicity Z = 0.02, and a mixing
+ length l
+mix = 1.9H
+P. We use a reference model that is in
+thermal equilibrium2
+ and has the luminosity and radius of the
+current Sun. Starting from this reference model, the temperature
+ gradient is modified over a prescribed depth to mimic the
+impact of overshooting according to the hydrodynamical simulations
+ described in Sect. 2. We adopt the prescription given
+by Eqs. (2)-(5) over a distance d
+ov below the convective boundary.
+ We show the results in Fig. 2 for α
+ov = 0.15 and α
+ov= 0.20.
+These overshooting widths are in good agreement with the maximal
+ depth reached by downflows below the convective boundary
+predicted by the hydrodynamical simulations for the solar-like
+model investigated in B21. We note that the stellar model used
+in B21 is slightly under-luminous compared to the Sun (see B21
+for details). B21 also mention that one should be cautious when
+directly applying the overshooting depths predicted by their simulations
+ to real stars since the final relaxed state for these simulations
+ may have different properties from non-thermally relaxed
+states. We varied α
+ov between 0.15 and 0.35 and find that the
+results do not change qualitatively. However, the amplitude of
+the variations in the model properties depends on d
+ov (see below).
+ As shown below, this simple prescription implemented in
+a stellar evolution code yields a local increase in the temperature
+ below the convective boundary, similar to that observed in
+the hydrodynamical simulations. We stress that Eqs. (2)-(5) have
+been chosen for simplicity. They are only a rough approximation
+that can mimic the thermal profile behaviour suggested in the 2D
+simulations.
+The model with a modified temperature gradient is then thermally
+ relaxed, that is to say, it is evolved over many thermal
+timescales without any modification of the abundances from nuclear
+ reactions until thermal equilibrium is reached. The temperature
+ gradient is modified in the overshooting layer during the
+whole relaxation process, and this is referred to as a ‘forced local
+heating’. This procedure ensures that the model with a modified
+temperature gradient can be consistently compared to the reference
+ model. As shown in Fig. 2, the simple prescription given
+2
+ Thermal equilibrium means that the total nuclear energy produced
+in the central regions balances the radiative losses at the surface, i.e. the
+total nuclear luminosity, L
+nuc, equals the total stellar luminosity, L. by Eqs. (2)-(5) yields similar qualitative changes in the temperature
+ and the sub-adiabaticity close to the convective boundary
+that was found in the hydrodynamical simulations of B21.
+Fig. 2. Radial profile of the temperature difference and of the
+sub-adiabaticity of a 1D solar-like structure with a modified temperature
+ gradient in the overshooting layer according to Eqs.
+(2)-(5). The temperature gradient is modified over a distance
+d
+ov = α
+ov H
+P,CB, with α
+ov=0.15 in the lower panel and α
+ov=0.20
+in the upper panel. The dash-dotted red lines show the percentage
+ relative temperature difference, ∆T /T
+ref , with ∆T = T − T
+ref .
+The solid blue lines correspond to the sub-adiabaticity (∇ − ∇
+ad).
+The dashed black lines show the sub-adiabaticity of the reference
+ model. The convective boundary is indicated by the vertical
+solid line. The vertical dashed line in each panel is located at a
+distance d
+ov below the convective boundary.
+The impact on the whole stellar structure was quantified by
+comparing the four structural quantities (A, S , c2
+s , ρ) between the
+modified and the reference model. The results are displayed in
+Fig. 3, with ∆X defined as (X − X
+ref ) for any structural quantity X.
+The forced local heating in the overshooting layer produces similar
+ positive peaks for ∆A, ∆S , and ∆c2
+s , as found for the temperature.
+ The modification thus provides the correction required to
+improve the discrepancy for the Ledoux discriminant described
+in the first of the trends outlined in Sect. 3.1. Unsurprisingly,
+such a modification of the temperature gradient is expected to
+improve the agreement with helioseismic constraints and help
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+remove the sound speed anomaly below the convective boundary
+ (second trend in Sect. 3.1), as suggested by the results of
+Christensen-Dalsgaard et al. (2011). But it is also interesting to
+note that such a modification yields a slight cooling of the convective
+ zone (see Fig. 2) and thus a negative difference for the
+entropy (see Fig. 3). A negative difference in the convective envelope
+ is in agreement with the correction required for the reference
+ model of Buldgen et al. (2020) to better match the Sun
+(see third trend in Sect. 3.1). Regarding the density, the modification
+ of the temperature gradient has an interesting impact in
+the radiative zone, with a large decrease in the density compared
+to the reference model over a broad region below the convective
+boundary. The impact on the density in the convective region for
+this specific model is partly in agreement with the correction required
+ for this quantity in the Buldgen et al. (2020) study, with a
+positive difference found only in the upper part of the convective
+envelope (see the fourth trend in Sect. 3.1).
+These trends are insensitive to the depth over which the temperature
+ gradient is modified. Increasing the depth increases the
+magnitude of the differences but has no impact on their sign. We
+find that the maximum variation in the model properties, such as
+the speed of sound, ∆c2
+s /c2
+s,ref , roughly scales with d2
+ov. This scaling
+ is linked to the integrated area between the modified temperature
+ gradient curve and the one for the reference (non-modified)
+temperature gradient, which roughly decreases linearly with r.
+This area is proportional to the square of the overshooting depth,
+and consequently, the maximum variation in the model properties
+ is also proportional to d2
+ov. The qualitative trends also remain
+the same whether overshooting mixing in the reference model
+is ignored or included using a step function (with instantaneous
+mixing) or an exponential decay for the diffusion coefficient (e.g.
+Freytag et al. 1996).
+3.2.2. Self-consistent evolutionary models
+For the tests based on the second method, we ran different sets
+of models with different combinations of assumptions, including
+or not microscopic diffusion and with or without overshooting
+mixing. When overshooting mixing was included in the overshooting
+ layer, it was based either on a step function or on an
+exponential decay for the diffusion coefficient. Microscopic diffusion
+ for H and He was implemented according to Thoul et al.
+(1994). For these tests, the temperature gradient was modified
+according to Eqs. (2)-(5). All models start from the ZAMS and
+are evolved until they reach the solar radius and luminosity at the
+same age. This was achieved by making small adjustments to the
+mixing length, l
+mix. The models with temperature gradient modifications
+ were compared to the relevant reference model, which
+has no modification of the temperature gradient but everything
+else is the same (i.e. the same treatment of microscopic diffusion
+ and of overshooting mixing). The evolutionary models with
+temperature gradient modifications are thus self-consistent. The
+main difference between this approach and the one in the previous
+ section is that these models accumulate small differences in,
+for example, central H abundance when compared to their reference
+ model. These tests produce the same trends in the overshooting
+ layer as found for the tests based on the first method
+(Sect. 3.2.1), independently of the treatment of overshooting
+mixing and whether microscopic diffusion is included or not.
+In the convective zone, all models give a positive difference for
+the density between the model with a modified temperature gradient
+ and the relevant reference model. For the other quantities
+(S , c2
+s ), the differences in the convective zone are very sensitive Fig. 3. Difference of various structural quantities between a
+model with a modified temperature gradient in the overshooting
+ layer and a reference model calculated with the Lyon stellar
+evolution code. The temperature gradient in the modified model
+is changed over a distance d
+ov = α
+ov H
+P,CB below the convective
+ boundary (indicated by the vertical solid line). The lower
+panel shows the results for α
+ov = 0.15 and the upper panel for
+α
+ov = 0.20.
+to the assumptions regarding whether overshooting mixing is included
+ or not. But at least we find solutions that are compatible
+with the four trends found by Buldgen et al. (2020) for the four
+structural quantities. This is illustrated in Fig. 4 with a model
+that accounts for step function overshooting mixing over a distance
+ d
+ov = 0.15H
+P,CB (lower panel) and d
+ov = 0.20H
+P,CB (upper
+panel).
+4. Conclusion
+The tests performed in Sect. 3 are based on different methods
+(relaxed models versus consistent evolution) that can be used to
+construct solar models. Independently of the method used, the
+tests show that a local increase in the temperature in the overshooting
+ region due to convective penetration provides the qualitative
+ effects required to improve the speed of sound discrepancy
+below the convective boundary. This discrepancy is persistent in
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+Fig. 4. Difference of various structural quantities between a
+modified model and a reference model calculated with the
+MONSTAR stellar evolution code. The reference model is
+evolved from the ZAMS with microscopic diffusion and step
+function overshooting mixing over a distance d
+ov = α
+ov H
+P,CB below
+ the convective boundary. The lower panel shows the results
+for α
+ov = 0.15 and the upper panel for α
+ov = 0.20. The models
+with a modified temperature gradient in the overshooting layer
+(same microscopic diffusion and overshooting mixing treatment
+as the reference model) are evolved similarly from the ZAMS.
+The convective boundary is indicated by the vertical solid line.
+solar models that use low solar metal abundances. This is not
+surprising because an increase in the temperature in this specific
+ region has previously been invoked in the literature to solve
+this problem, as mentioned in Sect. 1. However, the details of
+the physical process responsible for this local heating have been
+lacking, whereas we can now suggest an explanation based on
+the B21 results. The trends that we find for the four structural
+quantities (A, S , c2
+s , ρ) are robust below the convective boundary
+ and in a large fraction of the radiative core, independently of
+the treatment of mixing and diffusion and of the method for constructing
+ the models in Sects. 3.2.1 and 3.2.2. Our experiments
+additionally show that such a local change in the temperature,
+despite being made over a very limited region below the convective
+ boundary, can also affect the density, the entropy, and the speed of sound in the convective envelope after thermal relaxation
+ or evolution on the main sequence. How these quantities
+are affected in the convective envelope compared to a reference
+model with no local heating depends on the strategy for building
+solar models and on the treatment of overshooting mixing. This
+mixing is obviously linked to the local heating given that both
+result from the same dynamical process. A combined testing of
+both effects in stellar models could provide more constraints on
+the general process of overshooting.
+Increasingly, efforts are now devoted to characterising the
+process of convective boundary mixing in stellar models based
+on multi-dimensional hydrodynamical simulations. More work
+is required to obtain reliable determinations of an overshooting
+depth and to describe quantitatively the mixing and impact on
+the temperature gradient. Understanding the effects of rotation
+and magnetic fields on overshooting is a significantly more difficult
+ theoretical and numerical problem to address; however,
+efforts to study these combined non-linear effects are ongoing
+(Hotta 2017; Korre et al. 2021). Despite the limitations of existing
+ hydrodynamical simulations, they are already providing
+constraints on physical processes usually treated with several
+free parameters in 1D stellar evolution models. They can thus
+limit the degrees of freedom in a problem as complex as solar
+ modelling. Our primary goal in this work is to highlight the
+potential impact of convective penetration on the thermal background
+ in the overshooting region. The processes studied in B21
+that produce a local change in the temperature gradient are also
+responsible for the mixing in this region. Because much observational
+ evidence points towards the need for extra mixing at convective
+ boundaries, for example lithium depletion in solar-like
+stars (Baraffe et al. 2017), the size of convective cores (Claret
+& Torres 2016), and colour-magnitude diagrams (Castro et al.
+2014), solar modellers often include this extra mixing in their
+models. But a consistent approach should also require accounting
+ for a local change in the temperature gradient. The impact of
+this local heating goes in the right direction to improve not only
+the discrepancies of solar models below the convective boundary,
+ but also in the convective envelope. This effect offers an interesting
+ step forward for solving the solar modelling problem.
+In this exploratory work, we adopt a simple prescription for the
+local heating in the overshooting layer since the main goal is
+to highlight its qualitative impact on stellar models. However,
+this effect should not be considered as another free parameter in
+the solar modelling problem. Future multi-dimensional hydrodynamical
+ simulations will enable this process, and its treatment
+in 1D stellar evolution codes, to be better constrained.
+5. Acknowledgements
+We thank our anonymous referee for valuable comments which
+helped improving the manuscript. This work is supported by the
+ERC grant No. 787361-COBOM and the consolidated STFC
+grant ST/R000395/1. IB thanks the Max Planck Institut f ¨
+ur
+Astrophysics (Garching) for warm hospitality during completion
+of part of this work. The authors would like to acknowledge the
+use of the University of Exeter High-Performance Computing
+(HPC) facility ISCA and of the DiRAC Data Intensive service
+at Leicester, operated by the University of Leicester IT Services,
+which forms part of the STFC DiRAC HPC Facility. The equipment
+ was funded by BEIS capital funding via STFC capital
+grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC
+Operations grant ST/R001014/1. DiRAC is part of the National
+e-Infrastructure.
+
+Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
+References
+Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197
+Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141
+Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481
+Baraffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403
+Baraffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6
+Baraffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126
+Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825
+Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79
+Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36
+Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space
+Sciences, 6, 42
+Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33
+Caffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011,
+Sol. Phys., 268, 255
+Cai, T. 2020, ApJ, 888, 46
+Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13
+Christensen-Dalsgaard, J. 2021, Living Reviews in Solar Physics, 18, 2
+Christensen-Dalsgaard, J., Gough, D. O., & Knudstrup, E. 2018, MNRAS, 477,
+3845
+Christensen-Dalsgaard, J., Monteiro, M. J. P. F. G., Rempel, M., & Thompson,
+M. J. 2011, MNRAS, 414, 1158
+Claret, A. & Torres, G. 2016, A&A, 592, A15
+Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56
+Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4
+Freytag, B., Ludwig, H. G., & Steffen, M. 1996, A&A, 313, 497
+Goffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7
+Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed.
+N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25
+Higl, J., M ¨
+uller, E., & Weiss, A. 2021, A&A, 646, A133
+Hotta, H. 2017, ApJ, 843, 52
+Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563
+K¨
+apyl¨
+a, P. J. 2019, A&A, 631, A122
+Korre, L., Brummell, N., Garaud, P., & Guervilly, C. 2021, MNRAS, 503, 362
+Korre, L., Garaud, P., & Brummell, N. H. 2019, MNRAS, 484, 1220
+Kunitomo, M. & Guillot, T. 2021, arXiv e-prints, arXiv:2109.06492
+Meakin, C. A. & Arnett, D. 2007, ApJ, 667, 448
+Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A,
+293, 127
+Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765
+Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828
+Viallet, M., Baraffe, I., & Walder, R. 2011, A&A, 531, A86
+Viallet, M., Goffrey, T., Baraffe, I., et al. 2016, A&A, 586, A153
+Viallet, M., Meakin, C., Arnett, D., & Moc´
+ak, M. 2013, ApJ, 769, 1
+Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202
+Zahn, J. P. 1991, A&A, 252, 179
+Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759,
+L14
+Zhang, Q. S. & Li, Y. 2012, ApJ, 746, 50
+Zhang, Q.-S., Li, Y., & Christensen-Dalsgaard, J. 2019, ApJ, 881, 103
+ 
\ No newline at end of file
diff --git a/read/results/playa/2201.00201.txt b/read/results/playa/2201.00201.txt
new file mode 100644
index 0000000..a3b0dc4
--- /dev/null
+++ b/read/results/playa/2201.00201.txt
@@ -0,0 +1,932 @@
+Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022
+January 19, 2022
+ Letter to the Editor
+The period-age relation of long-period variables
+M. Trabucchi1,
+, N. Mowlavi1
+Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland
+December 2021
+ ABSTRACT
+Context. Pieces of empirical evidence suggest the existence of a period-age relation for long-period variables (LPVs). Yet, this
+property has hardly been studied on theoretical grounds thus far.
+Aims. We aim to examine the period-age relation using the results from recent nonlinear pulsation calculations.
+Methods. We combined isochrone models with theoretical periods to simulate the distribution of fundamental mode LPV pulsators,
+which include Miras, in the period-age plane, and we compared it with observations of LPVs in Galactic and Magellanic Clouds’
+clusters.
+Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of
+the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible
+width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and Crich
+ models are predicted to have different slopes. We derived best-fit relations describing age and initial mass as a function of the
+fundamental mode period for both O- and C-rich models.
+Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich
+or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection
+biases, but rather to include semi-regular variables that pulsate predominantly in the fundamental mode. The use of the relations to
+study individual LPVs, on the other hand, requires more care given the scatter in the period distribution predicted at any given age.
+Key words. stars: AGB and post-AGB – stars: evolution – stars: variables: general – Galaxy: stellar content – Galaxy: globular
+clusters: general – Magellanic Clouds
+1. Introduction
+Low- to intermediate-mass stars approach the end of their lives
+through the asymptotic giant branch (AGB) evolutionary phase,
+during which they exhibit pulsations with timescales up to several
+ hundreds of days, and they are hence known as long-period
+variables (LPVs). If their V -band amplitude exceeds 2.5 mag,
+they are classified as Miras, which have a rather regular periodicity
+ and they are believed to pulsate only in the radial fundamental
+ mode (FM). If their photometric amplitude is smaller, they
+are known as semi-regular variables (SRVs), which are thought
+to be the progenitors of Miras. The name stems from the lesser
+degree of regularity of their light curves, likely due to the fact
+that they can pulsate in multiple modes simultaneously.
+The notion that younger LPVs tend to display longer periods
+compared to older ones, often referred to as the period-age (PA)
+relation, is rooted in the empirical evidence from stellar kinematics
+ in the solar neighborhood. The first such piece of evidence
+is probably due to Merrill (1923), who pointed out that M-type
+LPVs increasingly lag behind the local standard of rest (i.e., possess
+ a higher asymmetric drift) as their period decreases. Later
+studies (as summarized by Wyatt & Cahn 1983) confirmed this
+behavior (also using proper motion data, e.g., Wilson & Merrill
+ 1942), and showed that the shorter periods are also accompanied
+ by a higher velocity dispersion. Furthermore, groups of
+LPVs with relatively short periods are characterized by a greater
+scale height above the Galactic plane. This was shown, using for
+
+ Corresponding author: M. Trabucchi
+(michele.trabucchi@unige.ch) the first time the radial velocity of LPVs in the southern hemisphere,
+ by Feast (1963). In this seminal paper, Feast realized
+that LPVs with shorter periods must be members of older stellar
+populations and emphasized their highly promising applications
+for both Galactic and extra-galactic studies over a wide range
+of stellar ages. It should be noted that the PA relation is connected
+ with the existence of a period-metallicity relation (Lloyd
+Evans & Menzies 1973; Lloyd Evans 1983b; Feast 1981; Feast
+& Whitelock 2000a, and references therein).
+A number of subsequent works have corroborated the PA
+relation on empirical grounds, or have exploited it to interpret
+observational results. Relevant examples are studies of LPVs in
+globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; Whitelock
+ 1986), toward the galactic center and bulge (Lloyd Evans
+1976; Feast et al. 1980; Whitelock et al. 1991) or at high galactic
+latitude (Jura & Kleinmann 1992; Whitelock et al. 1994). Of particular
+ interest is the recent effort to extend the analysis of LPVs
+to dwarf galaxies in the Local Group (Menzies et al. 2002, 2008;
+Whitelock et al. 2009; Menzies et al. 2010, 2011; Sakamoto et al.
+2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013;
+Menzies et al. 2015).
+The Hipparcos mission provided the means to refine the results
+ on the period-kinematics connection. This was done by
+Feast & Whitelock (2000b), who found evidence supporting the
+existence of a bar-like structure in the Bulge from the orbits of
+local LPVs. A similar study dedicated to C-rich LPVs was performed
+ by Feast et al. (2006), who provided quantitative age
+estimates for these stars. A summary of the main results and
+prospects emerging from these Hipparcos-era studies is given by
+Article number, page 1 of 9arXiv:2201.00201v2  [astro-ph.SR]  17 Jan 2022
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Feast (2007). More recently, the study of the Galaxy with LPVs
+has been stimulated by the wealth of data acquired by large-scale
+surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), especially
+ the Gaia mission (Grady et al. 2019, 2020).
+It seems relevant that just a few years after the study of Feast
+(1963), Kippenhahn & Smith (1969) predicted the PA relation
+of classical Cepheids from stellar evolution and pulsation models.
+ The theoretical modeling of Cepheids and of their periodluminosity
+ (PL) and PA relations is now an active field of research
+ (e.g., Bono et al. 2005; Anderson et al. 2016; De Somma
+et al. 2020). In contrast, when it comes to theoretical assessments
+of the LPV PA relation, the literature is surprisingly scarce (especially
+ in comparison with the significant effort put into empirical
+studies). In fact, we were able to identify only two relevant studies
+ addressing this subject (Wyatt & Cahn 1983; Eggen 1998).
+The discrepancy in period predictions between linear and nonlinear
+ pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter
+& Wood 2005; Trabucchi et al. 2021b), and more generally the
+difficulty in modeling the structure of evolved red giants, likely
+played a role in hampering the theoretical investigation of the PA
+relation of LPVs.
+Motivated by the release of updated AGB evolutionary models
+ (Pastorelli et al. 2019, 2020) and the availability of new, accurate
+ model predictions for the FM period of AGB stars (Trabucchi
+ et al. 2019, 2021b), we decided to investigate the nature
+of the PA relation of LPVs on theoretical grounds. The adopted
+models and observed data are described in Sect. 2, while in
+Sect. 3 we present the results, which are discussed in Sect. 4.
+We summarize our conclusions in Sect. 5.
+2. Methods
+2.1. Models
+We employed PARSEC-COLIBRI isochrones (Marigo et al.
+2017) with stellar evolutionary models from Pastorelli et al.
+(2019, 2020) for the thermally pulsing asymptotic giant branch
+(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, version
+ 1.2S) for the preceding evolution. The adopted set of
+isochrones covers the range 0.001 to 0.016 in initial metallicity
+ (Z
+i), with a 0.001 step, while it spans the age interval
+8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB
+phase is short-lived, it only spans a small range of initial masses
+for each given isochrone, of order of 10−2
+ M
+ at most.
+The adopted isochrones include linear pulsation periods from
+Trabucchi et al. (2019) for overtone modes and nonlinear periods
+computed with the period-mass-radius relation from Trabucchi
+et al. (2021b) for the FM1
+. Pulsation properties were computed
+along both the early-AGB and the TP-AGB. We did not extend
+our analysis to red supergiant stars as the pulsation prescription
+we employed are strictly valid only below 7 M
+.
+We recall that, with the adopted nonlinear relation, the period
+increases with radius (R) as a broken power law, whose exponent
+decreases as soon as the “bending radius” R
+b is exceeded, it and
+becomes zero when the “saturation radius” R
+s > R
+b is reached
+(i.e., the period becomes independent of radius). The exact values
+ of R
+b and R
+s, as well as of the exponents, depend on the
+current mass (M). We assume that the FM is dominant if the
+stellar radius is larger than the critical value R
+dom,0, which we
+computed from the current stellar mass using Eq. 4 of Trabucchi
+et al. (2021b).
+1
+ Hereinafter, whenever we discuss periods, it should be understood
+that we refer to FM periods on which this work is focused.  2.2. Data
+As a first set of data, we considered the cluster-LPV pairs used
+by Grady et al. (2019, see their tables 1 and 2). These consist of
+19 clusters in the Large Magellanic Cloud, hosting a total of 20
+potential LPV members, and eight Galactic clusters each hosting
+a potential LPV member.
+We expanded this list with data for LPVs in a few populous
+clusters, namely the Galactic clusters NGC 362, NGC 2808, 47
+Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC
+1978 and NGC 1846; and the cluster NGC 419 in the Small Magellanic
+ Cloud (SMC). The source lists were taken from Lebzelter
+ & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010),
+whose notation for the sources names is adopted here. After excluding
+ the star LW3 in NGC 1846 and the star V129 in ω Cen,
+which are unlikely cluster members (cf. Lebzelter & Wood 2007,
+2016), we reached a total of 203 sources.
+The aforementioned studies also provide a lot of information,
+ possibly including J H K photometry, one or more periods,
+and a spectral type. In order to expand on the available data,
+we crossmatched the selected sample with the Two Micron AllSky
+ Survey (2MASS, Skrutskie et al. 2006), the all-sky data
+release of the Wide-field Infrared Survey Explorer (AllWISE,
+Cutri et al. 2013), the catalog of variable stars from the AllSky
+ Automated Survey for SuperNovae (ASAS-SN Jayasinghe
+et al. 2020), the catalogs of LPVs in the Magellanic Clouds from
+the third phase of the Optical Gravitational Lensing Experiment
+(OGLE-III, Soszy ´
+nski et al. 2009, 2011), the early third data release
+ from the Gaia mission (Gaia EDR3, Gaia Collaboration
+et al. 2021), and the catalog of LPV candidates from Gaia DR2
+(Mowlavi et al. 2018).
+Following Grady et al. (2019), we took ages from
+Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters
+in the Galaxy and LMC, respectively, thereby ensuring that ages
+would be homogeneously derived for clusters in both galaxies.
+Age uncertainties from Baumgardt et al. (2013), provided for
+each cluster, are generally around σ
+log(τ)  0.05. Kharchenko
+et al. (2016) do not provide age uncertainties, but a reasonable
+upper limit for their method should be σ
+log(τ) = 0.2 based on
+the analysis of Kharchenko et al. (2005) (the same value was
+adopted by Grady et al. 2019, in their Fig. 7).
+As discussed by Kamath et al. (2010), the age of the SMC
+cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is
+consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij
+et al. (2014), while it is as young as τ  0.89 ± 0.015 Gyr according
+ to Perren et al. (2017). Since an accurate estimate is not
+necessary for our exploratory analysis, we took a rough average
+and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846
+likely exhibit TP-AGB boosting (Girardi et al. 2013). We note
+that some clusters show multiple stellar populations, whose age
+spread has been estimated in some cases (e.g., Mackey & Broby
+Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is consistent
+ with the age uncertainties we adopted.
+Distances of Galactic clusters were also taken from
+Kharchenko et al. (2016), while for the Magellanic Clouds and
+their clusters we adopted the distance moduli µ
+LMC = 18.49 ±
+0.09 mag and µ
+SMC = 18.96 ± 0.02 mag from de Grijs et al.
+(2017). We searched for data on interstellar extinction from several
+ literature works (e.g., Nayak et al. 2016; Kharchenko et al.
+2016; Perren et al. 2017), all of which suggest that extinction
+in the K
+s filter is smaller than ∼ 0.1 mag for most of the clusters
+ we considered, and at most as large as ∼ 0.3 mag, which is
+negligible for our purposes.
+Article number, page 2 of 9
+Trabucchi et al.: The period-age relation of LPVs
+A detailed membership verification is beyond the scope of
+this work, and we relied on the checks performed by authors
+whose source lists we adopted. It should be kept in mind that
+some sources may not be real cluster members.
+For sources without a spectral type, we used the Gaia2MASS
+ diagram (Lebzelter et al. 2018, 2019) to determine
+whether they are O- or C-rich. We used the near-infrared periodluminosity
+ diagram to identify the most likely pulsation mode
+associated with each period of each observed source. We selected
+ only FM periods and rejected long secondary periods and
+periods attributed to overtone mode pulsation. The details of
+these classification steps are provided in Appendix A. Out of
+203 sources from the initial list, we identified 95 LPVs pulsating
+ in the FM, consisting of 40 C-rich and 55 O-rich sources.
+They consist of 29 Miras, 33 semi-regular variables, and 33 other
+sources (most likely LPVs) whose variability type has not been
+determined. We note that, with the exception of Gaia DR2, the
+sources of variability data considered here do not report the uncertainty
+ associated with observed periods. However, since periods
+ were derived in most cases from well-sampled, high-quality
+variability observations, relative period uncertainties are most
+likely negligible compared with those associated with age.
+3. Results
+Panel (a) of Fig. 1 shows a comparison between model predictions
+ and observations in the P
+FM–log(τ/yr) plane. The former
+are displayed by a density map showing the expected number
+N
+FM of LPVs pulsating in the FM in each period-age bin, normalized
+ to maximum. Model predictions are in good agreement
+with data derived from observations (i.e., individual LPVs in
+clusters, represented by symbols), and they show that the period
+ of LPVs pulsating in the FM decreases with increasing age.
+Crosses mark the average properties of the three groups of Crich
+ LPVs from Feast et al. (2006, their table 4), which fit the
+general pattern with the exception of their group 3, estimated to
+be older than what our models predict at P  650.
+We also show a linear best-fit to the models distribution
+(weighted by N
+FM), which shows a fairly good agreement with
+the best-fit to observations by Grady et al. (2019, also shown).
+However, the best-fit line does not fully capture the properties
+of the predictions, nor of the observed trend. Indeed, models are
+indicative of a substantial dispersion around the relation. For instance,
+ at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550
+days. Conversely, LPVs pulsating in the FM with a period of 350
+days are predicted to be at least ∼200 Myr old, but they can be as
+old as ∼3 Gyr. Observed data are consistent with the predicted
+spread, although the agreement cannot be considered as the observed
+ sample adopted is not complete.
+Nonetheless, it is relevant that some clusters host multiple
+LPVs, which are thus almost coeval, and they do span a wide
+period range. Some of these clusters host multiple stellar populations
+ that are believed to have formed over a time comparable
+with the age uncertainties we adopted. This means that longerperiod
+ (more massive) LPVs in these clusters probably lean toward
+ the lower age limit assumed for their host cluster, and the
+opposite is true at shorter periods. This tends to strengthen the
+agreement between models and observations.
+Our data set samples the intermediate-age range (NGC 419
+and NGC 1846) relatively well as well as old ages (ω Cen, 47
+Tuc, NGC 362, and NGC 2808). This provides us with the opportunity
+ to study the period distribution at these ages, and for
+a more detailed comparison between models and observations. On the basis of the average age of these two groups of clusters
+ and the associated uncertainty, and taking the discrete age
+sampling of the isochrones into account, we considered the age
+ranges log(τ/yr) = 9.15 ± 0.10 and log(τ/yr) = 10.10 ± 0.20. Period
+ distributions at those ages are displayed in panels (b) and (c)
+of Fig. 1, respectively, showing good agreement between model
+predictions and observations. We note that in both cases, the distribution
+ is skewed toward short periods, which seems to be true
+at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2,
+which is a version of the PA plane limited to an O-rich composition2
+
+. Indeed, although at τ  5 Gyr the observed sample is
+very scarce, it appears to be consistent with models predicting a
+more densely populated region in the shorter-period half of the
+PA distribution.
+The case of C-stars, shown in panel (b) of Fig. 2, is different.
+ They only form over a restricted range of initial masses
+and ages, so their occurrence in a given stellar population is an
+age indicator on its own. Toward the low-mass (old age) side
+of the C-star regime, the behavior is similar to the O-rich case
+with a concentration around relatively short periods. C-rich models
+ tend to have a lower surface temperature and larger radii,
+at a given mass, compared to O-rich models, and thus they attain
+ longer periods more easily. This occurs in particular toward
+higher masses, so that younger C-rich models are more concentrated
+ at longer periods, leading to a steeper PA relation compared
+ with the O-rich case. These predictions agree with observations
+ on the old side of the period distribution, while the
+scarcity of C stars at τ  0.6 Gyr prevents us from performing a
+comparison at younger ages.
+In appendix B, we provide analytic PA relations by fitting the
+high-density parts of the O- and C-rich models’ distribution. We
+emphasize that, because of the large scatter of the relation, ages
+estimated in this way for individual LPVs are bound to be highly
+uncertain. As a way to assess the error in age determination, we
+also provide analytic best-fit relations to the boundaries of the
+PA distribution of the models in the appendix. These relations
+are displayed in Fig. 2.
+4. Discussion
+In general agreement with observations, models confirm that
+LPVs pulsating predominantly in the FM follow a PA relation,
+which exhibits a non-negligible dispersion. Thanks to the newly
+available nonlinear period predictions, we were able to better examine
+ the nature of this relation and the origin of its scatter.
+The PA relation is intimately connected with the PL relation,
+both patterns emerging because of the prominent role of mass in
+shaping stellar structure and evolution. Indeed, stellar mass determines
+ the lifetimes of the main evolutionary stages, and thus
+the age of stars in the AGB phase. Pulsation models (Trabucchi
+ et al. 2021b) show that the radius R
+dom,0 (and corresponding
+ luminosity) at the onset of dominant FM pulsation (DFMP)
+increases with mass, so that the most massive FM-dominated
+LPVs are brighter. They also have longer periods, as this increases
+ with radius. In other words, the period, luminosity, and
+age near the tip of the AGB are all functions of initial stellar
+mass (at least to a good approximation).
+We note that this would not be the case if the FM were dominant
+ along the entire AGB, as the large change in radius during
+this phase would result in a wide range of periods at a given age.
+It is the very fact that DFMP occurs only during the final portion
+2
+ A further version of the PA plane highlighting both chemical types
+can be found in Fig. A.2 of appendix A.1.
+ Article number, page 3 of 9
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on
+a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape
+indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of
+galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit
+to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked
+in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the effect of the TP-AGB boosting is
+suppressed in panel (a).
+Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while
+dashed lines are best fits to the edges of the model distribution (see the text for more details).
+of the AGB that limits the range of periods a FM-pulsating LPV
+can have at a given age. Yet, the DFMP part of the AGB is long
+enough for significant variations in radius to occur, which result
+in the dispersion of the PA relation seen in Fig. 1.
+At a given initial metallicity Z
+i, the shape of the period distribution
+ primarily results from the fact that, throughout the TPAGB
+ (the stage during which the FM is normally excited), the
+envelope expansion accelerates, while the period becomes progressively
+ less sensitive to changes in radius (see Appendix C).
+In particular, the slope of the period-radius relation decreases
+sharply at P
+b = P(R
+b). The FM period distribution is roughly
+symmetric around that value, but at its short-period side, the FM
+is not dominant. Therefore, when only FM-dominated LPVs are
+considered, as is done here, the observed period distribution appears
+ skewed toward short periods.  This feature is strengthened when a set of isochrones is considered
+ which spans a range of initial metallicities because the
+adopted criterion for the onset of DFMP does not depend on
+metallicity, but the FM period does as metal-poor LPVs are
+warmer and have smaller radii compared with metal-rich ones.
+As a consequence, the bulk of the period distribution of metalpoor
+ LPVs is at periods shorter than P
+b, so they only contribute
+to the global distribution (i.e., at all Z
+i at a given age) over a
+small period range at P  P
+b. In contrast, metal-rich LPVs have
+periods well beyond P
+b, so they contribute both at that value and
+at longer periods. The result is an excess of FM-dominated LPVs
+near P
+b, that is to say on the short side of the overall period distribution.
+
+We note that, in contrast with the prescription we adopted,
+the onset of DFMP in reality is probably sensitive to metallicArticle
+ number, page 4 of 9
+Trabucchi et al.: The period-age relation of LPVs
+ity. While the good degree of agreement with observations suggests
+ that the dependence is weak at most, it is possible for
+any discrepancy to be smeared out by the fact that our set of
+isochrone implicitly assumes a flat star-formation rate with no
+age-metallicity relation, so it is not an accurate representation of
+any realistic stellar environment. In this sense, the PA relation is
+environment-dependent, and it is not necessarily universal.
+A further point of uncertainty stems from the fact that the
+prescription we adopted assumes that the FM period only depends
+ upon the mass and radius, and that it is affected by a
+change in composition only through the effect that such a variation
+ has on the radius. While this is true to a good approximation,
+linear models show a small dependence of periods on metallicity
+ at a fixed mass and radius, but the quantitative impact in the
+nonlinear case is unknown. We can only estimate, based on the
+results of Trabucchi et al. (2019), an uncertainty of ±10% at most
+with respect to the prescriptions adopted here.
+Qualitatively, a realistic age-metallicity relation and the
+metallicity dependence of the period and of the onset of DFMP
+are all expected to result in a steeper PA relation than the one
+we predict, but it is difficult to assess the relative importance of
+these effects. In this sense, the composition probably affects the
+shape of the PA relation more than its dispersion. The latter is
+likely affected by the composition indirectly through mass loss,
+the analysis of which is beyond the scope of this study. However,
+ we point out that mass loss represents a source of scatter in
+combination with the occurrence of thermal pulses, because it reduces
+ the minimum radius for the onset of DFMP. Thus, during
+the luminosity dips associated with thermal pulses, a LPV can
+have a period shorter than the one it had when it first entered the
+DFMP regime (see Appendix C). An additional source of uncertainty,
+ which we disregarded, is rotation (or other processes that
+induce extra mixing in the core) which causes a spread in ages
+at a given initial mass (cf. Anderson et al. 2016, for the case of
+classical Cepheids).
+The fairly good agreement between models and observations
+encourages the use of LPVs as age indicators, but the scatter of
+the PA relation hampers this application. We attempted to reduce
+the scatter through corrections involving photometric properties,
+as is customarily done for classical Cepheids with a color term
+(e.g., Bono et al. 2005), but with unsatisfactory results. A correction
+ dependent on the photometric amplitude of variability represents
+ a promising alternative, but it cannot be pursued at the
+moment. Indeed, for computational efficiency, current pulsation
+models include only a crude treatment of the atmospheric layers
+as they do not affect pulsation periods. On the other hand, the
+atmosphere is crucial in determining the spectral energy distribution
+ and its variation throughout the pulsation cycle, and hence
+the amplitude of variability. At the same time, the observational
+sample adopted here is too heterogeneous for a self-consistent
+investigation of amplitude, but this kind of study could be made
+possible by the upcoming data release 3 of the Gaia mission
+(Gaia Collaboration et al. 2021) and the future Legacy Survey
+of Space and Time (LSST, Ivezi ´
+c et al. 2019) of the Vera Rubin
+Observatory.
+It is worth noting that our analysis applies to Miras as well
+as SRVs, provided that they predominantly pulsate in the FM.
+The limitation of PA relation studies to Miras, as has mainly
+been done in literature so far, undoubtedly has some advantages:
+ to begin with, the fact that Miras are typically easier to
+detect than SRVs, and their light curves are easier to process
+as they tend to be more regular. Moreover, Miras represent the
+end-point of AGB evolution, so in principle they correspond to a
+smaller range of stellar parameters compared to the full extent of the DFMP regime, and they display a smaller range of periods
+at a given age (cf. Feast & Whitelock 2000b). In other words,
+they should exhibit a relatively narrow PA relation (even though,
+based on the observational data set we adopted, there is no conclusive
+ evidence that considering only Miras reduces the scatter
+of the PA relation).
+Nonetheless, we caution against this approach as it is prone
+to introducing uncontrolled biases, as the traditional distinction
+between SRVs and Miras is arbitrary (see Trabucchi et al. 2021a,
+and references therein). As such, it disregards the physical processes
+ at the origin of the range of amplitudes characterizing
+LPVs. In particular, photometric amplitudes are largely determined
+ by the formation and dissociation of molecules in the stellar
+ atmosphere, and they are likely to be metallicity-dependent.
+It is therefore reasonable to assume that metal-poor (old) Mira
+analogs might be classified as SRVs, thereby undermining the
+potential application of the PA relation if restricted to Miras.
+This seems to be supported by the fact that the bulk of old LPVs
+in our sample are classified as SRVs. Therefore, studies involving
+ PA relations of LPVs would advantageously include both
+Miras and FM-pulsating SRVs.
+The challenge associated with SRVs stems from the fact that
+they are often multiperiodic (even when predominantly pulsating
+ in the FM), a property that complicates the light curve analysis
+ and period extraction. At the same time, this feature could
+potentially improve age determinations as overtone modes are
+expected to display a PA relation as well.
+5. Conclusions
+We used the results from recent nonlinear pulsation calculations
+and combined them with state-of-the-art isochrone models to investigate
+ the PA relation of FM-dominated LPVs, finding good
+agreement with the distribution of observed LPVs in star clusters.
+ The theoretical PA relation displays a non-negligible scatter,
+ whose origin we identified due to the fact that, despite being
+very brief, the portion of AGB evolution during which the FM
+becomes dominant shows a relatively large range in mass and
+radius at a given age.
+The theoretical distribution of FM periods is roughly symmetric,
+ but the FM is not dominant at the shortest periods. As a
+result, models predict that the distribution of dominant FM periods
+ at a given age is skewed toward short periods, in agreement
+with observations. Depending on stellar populations, metallicity
+may enhance this feature as metal-poor LPVs, which tend to be
+warmer and more compact, only contribute near short periods.
+We provide the best-fit PA relation separately for O-rich and
+C-rich FM-pulsating LPVs. The latter LPVs show a steeper PA
+relation because of their lower surface temperatures, which allow
+them to reach longer periods more easily.
+Our analysis concerns all LPVs predominantly pulsating in
+the FM, regardless of whether they are classified as Miras or
+SRVs. We discourage such a distinction in that it is arbitrary and
+prone to selection biases that risk compromising the use of LPVs
+as age indicators.
+The main limitation in the use of the PA relation for age determinations
+ of individual LPVs stems from its relatively large
+scatter. We suggest that corrective terms, involving the amplitude
+ of variability, might help to reduce this scatter and anticipate
+ that upcoming data from ongoing and future surveys dedicated
+ to time-domain astronomy will be highly valuable to probe
+this possibility. A study of the impact of metallicity on nonlinear
+pulsation is highly desirable to pursue this line of investigation,
+Article number, page 5 of 9
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+as would be a theoretical investigation of the dependence of photometric
+ amplitudes upon global stellar parameters.
+Acknowledgements. M.T. and N.M. acknowledge the support provided by the
+Swiss National Science Foundation through grant Nr. 188697. We are grateful
+to the anonymous referee for the constructive comments that helped improving
+this paper, and to Léo Girardi for helping with the computation and interpretation
+ of isochrones. This research has made use of: data from the OGLE-III
+Catalog of Variable Stars; data products from the Two Micron All Sky Survey,
+ which is a joint project of the University of Massachusetts and the Infrared
+ Processing and Analysis Center/California Institute of Technology, funded
+by the National Aeronautics and Space Administration and the National Science
+ Foundation; data from the European Space Agency (ESA) mission Gaia
+(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Processing
+ and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/
+gaia/dpac/consortium). Funding for the DPAC has been provided by national
+ institutions, in particular the institutions participating in the Gaia Multilateral
+ Agreement. This research has made use of the following free/open source
+software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and
+Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver
+et al. 2016) notebooks; the Python libraries NumPy (Harris et al. 2020), SciPy
+(Virtanen et al. 2020), matplotlib (a Python library for publication quality graphics,
+ Hunter 2007), and Astropy (a community-developed core Python package
+for Astronomy, Astropy Collaboration et al. 2018). This research has made use of
+NASA’s Astrophysics Data System Bibliographic Services, and of the following
+services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue
+access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky
+atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012;
+Pineau et al. 2020).
+References
+Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A,
+591, A8
+Astropy Collaboration, Price-Whelan, A. M., Sip ˝
+ocz, B. M., et al. 2018, AJ, 156,
+123
+Battinelli, P. & Demers, S. 2012, A&A, 544, A10
+Battinelli, P. & Demers, S. 2013, A&A, 553, A93
+Baumgardt, H., Parmentier, G., Anders, P., & Grebel, E. K. 2013, MNRAS, 430,
+676
+Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pacific
+ Conference Series, Vol. 461, Astronomical Data Analysis Software and
+Systems XXI, ed. P. Ballester, D. Egret, & N. P. F. Lorente, 291
+Bonnarel, F., Fernique, P., Bienaymé, O., et al. 2000, A&AS, 143, 33
+Bono, G., Marconi, M., Cassisi, S., et al. 2005, ApJ, 621, 966
+Bressan, A., Marigo, P., Girardi, L., et al. 2012, MNRAS, 427, 127
+Catchpole, R. M., Whitelock, P. A., Feast, M. W., et al. 2016, MNRAS, 455,
+2216
+Cutri, R. M., Wright, E. L., Conrow, T., et al. 2013, Explanatory Supplement
+to the AllWISE Data Release Products, Explanatory Supplement to the AllWISE
+ Data Release Products
+de Grijs, R., Courbin, F., Martínez-Vázquez, C. E., et al. 2017, Space Sci. Rev.,
+212, 1743
+De Somma, G., Marconi, M., Cassisi, S., et al. 2020, MNRAS, 496, 5039
+Eggen, O. J. 1998, AJ, 115, 2435
+Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol.
+378, Why Galaxies Care About AGB Stars: Their Importance as Actors and
+Probes, ed. F. Kerschbaum, C. Charbonnel, & R. F. Wing, 479
+Feast, M. & Whitelock, P. 2000a, in Astrophysics and Space Science Library,
+Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Giovannelli,
+ 229
+Feast, M. W. 1963, MNRAS, 125, 367
+Feast, M. W. 1966, The Observatory, 86, 120
+Feast, M. W. 1981, in Astrophysics and Space Science Library, Vol. 88, Physical
+Processes in Red Giants, ed. J. Iben, I. & A. Renzini, 193–204
+Feast, M. W., Robertson, B. S. C., & Black, C. 1980, MNRAS, 190, 227
+Feast, M. W. & Whitelock, P. A. 2000b, MNRAS, 317, 460
+Feast, M. W., Whitelock, P. A., & Menzies, J. W. 2006, MNRAS, 369, 791
+Gaia Collaboration, Brown, A. G. A., Vallenari, A., et al. 2021, A&A, 649, A1
+Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142
+Goudfrooij, P., Girardi, L., Kozhurina-Platais, V., et al. 2014, ApJ, 797, 35
+Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022
+Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128
+Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357
+Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90
+Ivezi´
+c, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111
+Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13 Joo, S.-J. & Lee, Y.-W. 2013, ApJ, 762, 36
+Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105
+Kamath, D., Wood, P. R., Soszy ´
+nski, I., & Lebzelter, T. 2010, MNRAS, 408, 522
+Kharchenko, N. V., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D.
+2005, A&A, 438, 1163
+Kharchenko, N. V., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D.
+2016, A&A, 585, A101
+Kippenhahn, R. & Smith, L. 1969, A&A, 1, 142
+Kluyver, T., Ragan-Kelley, B., Pérez, F., et al. 2016, in Positioning and Power
+in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides &
+B. Scmidt (Netherlands: IOS Press), 87–90
+Lebzelter, T., Mowlavi, N., Marigo, P., et al. 2018, A&A, 616, L13
+Lebzelter, T., Trabucchi, M., Mowlavi, N., et al. 2019, A&A, 631, A24
+Lebzelter, T. & Wood, P. R. 2005, A&A, 441, 1117
+Lebzelter, T. & Wood, P. R. 2007, A&A, 475, 643
+Lebzelter, T. & Wood, P. R. 2011, A&A, 529, A137
+Lebzelter, T. & Wood, P. R. 2016, A&A, 585, A111
+Lloyd Evans, T. 1976, MNRAS, 174, 169
+Lloyd Evans, T. 1983a, MNRAS, 204, 985
+Lloyd Evans, T. 1983b, MNRAS, 204, 961
+Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Library,
+ Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in
+Related Systems, ed. J. D. Fernie, 151
+Mackey, A. D. & Broby Nielsen, P. 2007, MNRAS, 379, 151
+Marigo, P., Girardi, L., Bressan, A., et al. 2017, ApJ, 835, 77
+Menzies, J., Feast, M., Tanabé, T., Whitelock, P., & Nakada, Y. 2002, MNRAS,
+335, 923
+Menzies, J., Feast, M., Whitelock, P., et al. 2008, MNRAS, 385, 1045
+Menzies, J. W., Feast, M. W., Whitelock, P. A., & Matsunaga, N. 2011, MNRAS,
+414, 3492
+Menzies, J. W., Whitelock, P. A., & Feast, M. W. 2015, MNRAS, 452, 910
+Menzies, J. W., Whitelock, P. A., Feast, M. W., & Matsunaga, N. 2010, MNRAS,
+406, 86
+Merrill, P. W. 1923, ApJ, 58, 215
+Mowlavi, N., Lecoeur-Taïbi, I., Lebzelter, T., et al. 2018, A&A, 618, A58
+Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016,
+MNRAS, 463, 1446
+Ochsenbein, F., Bauer, P., & Marcout, J. 2000, A&AS, 143, 23
+Pastorelli, G., Marigo, P., Girardi, L., et al. 2020, MNRAS, 498, 3283
+Pastorelli, G., Marigo, P., Girardi, L., et al. 2019, MNRAS, 485, 5666
+Pérez, F. & Granger, B. E. 2007, Computing in Science and Engineering, 9, 21
+Perren, G. I., Piatti, A. E., & Vázquez, R. A. 2017, A&A, 602, A89
+Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical Society
+ of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis
+Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Shortridge,
+ 125
+Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y. 2012, ApJ, 761, L10
+Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163
+Soszy ´
+nski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22
+Soszy ´
+nski, I., Udalski, A., Szyma ´
+nski, M. K., et al. 2009, Acta Astron., 59, 239
+Soszy ´
+nski, I., Udalski, A., Szyma ´
+nski, M. K., et al. 2011, Acta Astron., 61, 217
+Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Series,
+ Vol. 351, Astronomical Data Analysis Software and Systems XV, ed.
+C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666
+Trabucchi, M., Mowlavi, N., & Lebzelter, T. 2021a, A&A, 656, A66
+Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2017, ApJ, 847, 139
+Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2019, MNRAS, 482, 929
+Trabucchi, M., Wood, P. R., Mowlavi, N., et al. 2021b, MNRAS, 500, 1575
+Urago, R., Omodaka, T., Nagayama, T., et al. 2020, ApJ, 891, 50
+Villanova, S., Geisler, D., Gratton, R. G., & Cassisi, S. 2014, ApJ, 791, 107
+Virtanen, P., Gommers, R., Oliphant, T. E., et al. 2020, Nature Methods, 17, 261
+Wenger, M., Ochsenbein, F., Egret, D., et al. 2000, A&AS, 143, 9
+Whitelock, P., Feast, M., & Catchpole, R. 1991, MNRAS, 248, 276
+Whitelock, P., Menzies, J., Feast, M., et al. 1994, MNRAS, 267, 711
+Whitelock, P. A. 1986, MNRAS, 219, 525
+Whitelock, P. A., Menzies, J. W., Feast, M. W., et al. 2009, MNRAS, 394, 795
+Whitelock, P. A., Menzies, J. W., Feast, M. W., Nsengiyumva, F., & Matsunaga,
+N. 2013, MNRAS, 428, 2216
+Wilson, R. E. & Merrill, P. W. 1942, ApJ, 95, 248
+Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225
+Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350
+Article number, page 6 of 9
+Trabucchi et al.: The period-age relation of LPVs
+Fig. A.1. Absolute-K
+s Gaia-2MASS diagram for the stars with or without
+ a spectral type (left and right panels, respectively) in the selected
+sample. Symbol colors and shapes indicate the spectral type and host
+cluster described in the legend, respectively, which also reports the number
+ of sources displayed (i.e., having both optical and NIR photometry).
+The dashed line marks the separation between O- and C-rich sources
+according to Lebzelter et al. (2018). An arrow marks the source MSX
+LMC 124 in NGC 1830 that, having W
+BP,RP − W
+J,K
+s = 9.73 mag, lies outside
+ the plot area. Background dots are LPVs in the LMC from OGLEIII
+ (light gray) and Mowlavi et al. (2018) (darker gray).
+Appendix A: Classification of observed LPVs
+Appendix A.1: Spectral type
+We adopted the spectral types provided by Lebzelter & Wood
+(2007) and Kamath et al. (2010) for 52 of the LPVs they studied
+in NGC 1846, NGC 1978, and NGC 419. The only exception
+is the star 5-3 in NGC 419, for which we adopted the S-type as
+reported by Lloyd Evans (1983a).
+We also searched the SIMBAD astronomical database
+(Wenger et al. 2000) for spectral type information, which we
+found for 26 more stars. We used the Gaia-2MASS diagram of
+Lebzelter et al. (2018) to confirm the chemical type classification
+taken from literature and to characterize the surface chemistry of
+sources of an unknown spectral type (see Fig. A.1). Among the
+latter, we identified 13 C-rich stars and 106 O-rich sources.
+Three of the sources without a spectral type lack Gaia photometry,
+ so they cannot be classified with the Gaia-2MASS. Two
+of them (LW5 and LW22 in 47 Tuc) have no match in Gaia
+EDR3, but they have NIR data and are probably O-rich based on
+their position in the J − K
+s versus K
+s color-magnitude diagram.
+The third source is one of the two stars in NGC 1903 from the
+list of Grady et al. (2019), which we identified with the 2MASS
+source J05171633-6920298. It is likely C-rich according to the
+NIR color-magnitude diagram.
+Finally, the sources V138 in ω Cen, LW15 in NGC 2808,
+and LW4 in NGC 362 lack NIR data. They cannot be placed in
+the NIR PL diagram, upon which we relied to assign pulsation
+modes to periods, so we excluded them from the sample. The distribution of O- and C-rich sources in the period-age diagram
+is shown in Fig. A.2.
+Appendix A.2: Variability
+For variability information, we complemented the data from
+Lebzelter & Wood and Kamath et al. (2010) with the catalogs
+from OGLE-III, ASAS-SN, and Gaia DR2. Combining these
+data sets, we found at least one period for each of the 176 sources
+in our sample.
+In order to identify the pulsation mode most likely responsible
+ for periods in a given source, we assumed that the second
+overtone mode is associated with sequence A, the first overtone
+mode with sequences B and C
+, and the fundamental mode with
+sequence C (e.g., Trabucchi et al. 2017). We excluded long secondary
+ periods on sequence D as they are not due to stellar pulsation
+ (Soszy ´
+nski et al. 2021, and references therein), and we
+used the pattern of PL sequences in the LMC as a reference to
+guide the mode identification (cf. Trabucchi et al. 2021a).
+We performed this classification separately for periods coming
+ from each distinct data set. If two or more periods from different
+ data sets were assigned to the same pulsation mode, we
+retained only one of those periods, with priority to the values
+from Lebzelter & Wood and Kamath et al. (2010). If the latter
+authors do not provide this information, we adopted the period
+from OGLE-III if available, and otherwise from ASAS-SN or
+from Gaia DR2.
+For some sources, the periods reported in different catalogs
+were assigned to the same mode through this procedure. In most
+cases, these periods are reasonably similar to each other. Only
+in a few cases were they significantly different, but this did not
+alter our conclusions.
+When available, the variability type was taken from OGLEIII
+ or ASAS-SN. We note that we are only interested in whether
+a star is classified as a Mira or semi-regular variable. In many
+cases, this type is not given or the star is simply considered, for
+instance, as an LPV or AGB in SIMBAD, in which case we considered
+ the variability type as undetermined.
+Appendix B: Fitting relations
+We obtained analytic expressions for the PA relations separately
+for O- and C-rich stars, proceeding as follows. For each bin of
+log(τ/yr), we modeled the period distribution with a Gaussian
+kernel density estimator (KDE) and identified the peak of the
+distribution. To describe the boundaries of the PA relation, we
+adopted, at each age, the values of the period at which the distribution
+ equals 25% of its maximum. We selected this arbitrary
+value upon visual inspection of the PA plane. We modeled the
+central trend of the PA relation, as well as its short- and longperiod
+ edges, with linear or quadratic functions in the form
+log(τ/yr) = a
+0 + a
+1 (P/ ˜
+P) + a
+2 (P/ ˜
+P)2
+ , (B.1)
+(where ˜
+P = 350 days) and employed a Lenvenberg-Marquardt
+nonlinear regression algorithm3
+ to derive the best-fit coefficients,
+which are listed in Table B.1. We remark that these best-fit expressions
+ are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3
+and 20 < P/days < 700 for O-rich composition, and within
+3
+ We made use of the Python library SciPy to perform Gaussian KDE
+modeling and best-fit, respectively, by means of the gaussian_kde
+tool from the stats module and the curve_fit function from the
+optimize module.
+ Article number, page 7 of 9
+A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Fig. A.2. Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red).
+Table B.1. Best-fit coefficients for the PA relation and its boundaries in
+the form given in Eq. B.1.
+Sp. type relation a
+0 a
+1 a
+2
+O-rich center 10.78 -2.660 0.5953
+lower edge 10.46 -2.818 0.6578
+upper edge 10.54 -0.8187 -0.2335
+C-rich center 9.755 -0.7532
+lower edge 9.982 -1.698
+upper edge 8.498 -1.827 -0.9959
+8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich
+case.
+Because of the connection between age and initial mass, the
+PA relation can be translated into a period-initial mass relation,
+which we derived using the same approach described above, and
+assuming the form
+log(M
+i/M
+) = b
+0 + b
+1 (P/ ˜
+P) + b
+2 (P/ ˜
+P)2
+ . (B.2)
+The resulting best-fit lines are displayed in Fig. B.1, and the coefficients
+ are given in Table B.2.
+We remark that both the PA and the period-initial mass relations
+ depend on model assumptions, in particular mass loss and
+mixing, as well as on the properties of the population of LPVs,
+namely the star-formation history and age-metallicity relation.
+Appendix C: The shape of the period distribution
+As an example case, we consider an isochrone of age log(τ/yr) =
+8.3 and initial metallicity Z
+i = 0.006. Stars on the TP-AGB have
+initial masses M
+i  3.85 M
+ over a small range of ∼ 10−3
+ M
+.
+The relation between period and initial mass is displayed in
+panel (a) of Fig. C.1, where isochrone portions undergoing Table B.2. Best-fit coefficients for the period-initial mass relation and
+its boundaries in the form given in Eq. B.2.
+Sp. type relation b
+0 b
+1 b
+2
+O-rich center -0.2790 0.8958 -0.1828
+lower edge -0.1772 0.9975 -0.2203
+upper edge -0.1740 0.2783 0.8247
+C-rich center -0.0304 0.2885
+lower edge -0.0131 0.5752
+upper edge -0.2245 -0.2720 0.2343
+DFMP are indicated by solid lines. Panel (b) shows the period
+distributions for a few different cases.
+It is instructive, to begin with, to ignore the effect of thermal
+pulses and consider only the quiescent evolution (green lines in
+Fig. C.1). The smallest initial mass corresponds to a star that just
+entered the TP-AGB, when the FM has a period of ∼ 240 days
+but is not dominant. It only becomes dominant above a threshold
+radius R
+dom,0, that is for periods longer than a (mass-dependent)
+critical period P
+dom,0 (the solid gray line in Fig. C.1). The least
+evolved (quiescent) model with dominant FM has P
+FM  360
+days (green circle and horizontal line), corresponding to a sharp
+cut in the period distribution shown in panel (b) of Fig. C.1.
+As a star evolves along the AGB it expands, and its period becomes
+ longer in response to the increase in radius. Models with
+a higher initial mass are more evolved, hence they have a larger
+radius and a longer period. The rate at which a period increases
+with radius is not fixed, but rather decreases with evolution. According
+ to the prescription of Trabucchi et al. (2021b), a period
+grows with radius as a broken power-law with exponent α  1.8
+if R < R
+b, and with α  1.25 at larger radii.
+This is equivalent to saying that the period grows more
+slowly after it exceeds a critical value P
+b = P(R
+b), marked by
+the gray dotted line in Fig. C.1. The isochrone reaches it at
+Article number, page 8 of 9
+Trabucchi et al.: The period-age relation of LPVs
+Fig. B.1. Similar to Fig. 2, but showing initial mass M
+i in place of age. The best-fit lines to the most populated band and edges of the theoretical
+P
+FM – M
+i relation are shown.
+Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows
+period as a function of initial mass (current mass on the top axis) on the
+TP-AGB for a ∼ 200 Myr old isochrone with Z
+i = 0.006. Red lines
+show full thermal pulses, while blue lines ignore luminosity spikes and
+green lines show only the quiescent evolution. The same color code
+is used for the period distributions (normalized to their maximum) on
+panel (b). Solid lines indicate that the FM is dominant. Circles indicate
+ the earliest onset of DFMP accounting for (red) or ignoring (green)
+luminosity spikes, and the shortest period of the dominant FM (blue).
+Gray lines mark the critical values of periods at which the FM becomes
+dominant (solid line), less sensitive to radius (dotted line, which occurs
+at the vertical line for this specific isochrone), and independent of radius
+(dashed line).
+M
+i  3.8524 M
+ (vertical gray line), when P
+FM  420 days. In
+models with a smaller initial mass, the period is still increasing
+at a relatively large rate as the envelope expands, while in more
+massive models the period has already become less sensitive to
+changes in radius. This is reflected by a slight inflection of the
+green curve, which corresponds to the maximum in the period
+distribution shown in panel (b) of Fig. C.1. The period distribution
+ of the full TP-AGB range is roughly symmetric around
+this maximum, while limiting the selection to DFMP, produces
+a distribution skewed toward short periods, as found in Sect. 3.
+If the luminosity dips following thermal pulses are taken
+into account (blue lines), the corresponding envelope contrac- tion causes the period to decrease, and the cut at ∼ 360 days
+becomes less sharp. Because of mass loss, the threshold period
+P
+dom,0 is lowered, so that the shortest period associated with
+DFMP does not correspond to the least evolved model (green
+circle), but rather to the luminosity dip of a thermal pulse (blue
+circle).
+To be precise, the earliest occurrence of DFMP is on the leftmost
+ luminosity spike (red circle), whose duration is so short that
+it is unlikely to be observed. Indeed, the inclusion of luminosity
+spikes alters the period distribution at long periods very little.
+Luminosity spikes are relevant only for relatively massive and
+young TP-AGB stars, and they give rise to the poorly populated
+portion of the PA relation at the longest periods, as seen in panel
+(a) of Fig. 2.
+ Article number, page 9 of 
\ No newline at end of file
diff --git a/read/results/playa/2201.00214.txt b/read/results/playa/2201.00214.txt
new file mode 100644
index 0000000..06db44a
--- /dev/null
+++ b/read/results/playa/2201.00214.txt
@@ -0,0 +1,765 @@
+arXiv:2201.00214v1  [astro-ph.SR]  1 Jan 2022  Temperature Analysis of Flaring
+(AR11283) and non-Flaring (AR12194)
+Coronal Loops
+N. Fathalian1
+ , S. S. Hosseini Rad2
+, N. Alipour2
+, H. Safari2
+1
+Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran.
+2
+Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran.
+e-mail: narges_fathalian@alum.sharif.edu
+January 4, 2022
+Abstract
+Here, we study the temperature structure of flaring and non-flaring coronal loops, using extracted
+loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging
+Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use data for loops of X2.1-class-flaring active
+region (AR11283) during 22:10UT till 23:00UT, on 2011, September 6; and non-flaring active region
+(AR12194) during 08:00:00UT till 09:00:00UT on 2014, October 26. By using spatially-synthesized
+Gaussian DEM forward-fitting method, we calculate the peak temperatures for each strip of the loops.
+We apply the Lomb-Scargle method to compute the oscillations periods for the temperature series of each
+strip. The periods of the temperature oscillations for the flaring loops are ranged from 7 min to 28.4
+min. These temperature oscillations show very close behavior to the slow-mode oscillation. We observe
+that the temperature oscillations in the flaring loops are started at least around 10 minutes before the
+transverse oscillations and continue for a long time duration even after the transverse oscillations are
+ended. The temperature amplitudes are increased at the flaring time (during 20 min) in the flaring loops.
+The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but
+their significances are less (below 0.5) in comparison with the flaring ones (near to one). Hence the
+detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the
+flaring ones, and maybe they are just fluctuations. Based on our confined observations, it seems that the
+flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the
+non-flaring ones. More accurate commentary in this respect requires more extensive statistical research
+and broader observations.
+Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions
+I. Introduction
+Analyzing the thermal structure of coronal loops is of considerable interest, especially as these
+magnetic loops have an essential role in heating the solar chromosphere and corona. Such analysis
+ can help to describe how the process of solar flaring is correlated with the loop’s thermal
+structure.
+Detections of coronal waves have a historical preview and have been reported for several times
+(e.g.,
+ Aschwanden et al. (1999); Nakariakov et al. (1999);Wang et al. (2003); Wang & Solanki (2004);
+Berghmans & Clette (1999); De Moortel et al. (2000), Verwichte et al. (2004), De Moortel & Brady
+(2007), Ballai et al. (2011)). Coronal seismology and MHD waves have been reviewed widely by
+
+De Moortel (2005), Nakariakov & Verwichte (2005), Aschwanden (2006), Banerjee et al. (2007) and
+De Moortel & Nakariakov (2012). Along with the development of the observations, transverse
+and longitudinal oscillations have also been studied theoretically (e.g., Gruszecki et al. (2006),
+Pascoe et al. (2007), Fathalian et al. (2010); Luna et al. (2010); Fathalian & Safari (2010). Coronal
+seismology techniques help to elicit the information from observations of oscillatory phenomena
+and the results to be interpreted by using theoretical models (see for e.g.,
+ Roberts et al. (1984);
+Goossens et al. (1992)). Oscillatory patterns and processes which happen during solar flares, were
+interesting and subject of investigations from different approaches (e.g., Nakariakov et al. (2010),
+Nisticò et al. (2013), Anfinogentov et al. (2013), Hindman & Jain (2014), Russell et al. (2015)). As
+we know the transverse loops oscillations usually occur in response to a close filament or flare
+(Wills-Davey & Thompson (1999)).
+Rapidly decaying long-period oscillations are mostly interpreted as global (or fundamental
+ mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014), and Wang
+(2011), also see Ofman & Wang (2002), and for slow-mode observed in fan-loops see Pant et al.
+(2017)). They often occur in hot coronal loops of active regions, associated with tiny (or micro-)
+flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected
+in intensity plots of solar and stellar flares are possibly caused by standing slow-mode waves (see
+reviews by Van Doorsselaere et al. (2016), and McLaughlin et al. (2018)).Excitation, propagation,
+and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al.
+(2007); Wang et al. (2015); Jess et al. (2016); Nakariakov et al. (2017); Nisticò et al. (2017); Kolotkov
+et al. (2019); Krishna Prasad et al. (2019); Reale et al. (2019); Wang & Ofman (2019)). To have
+a complete overview of slow-mode magnetoacoustic waves in coronal loops see the review by
+Wang et al. (2021).
+Investigating and comparing the thermal structures and oscillations of coronal loops in loops
+of flaring and non-flaring active regions could help us in better understanding the loops’ material
+oscillations and the flare impact on them. Several different methods have been developed to investigate
+ the thermal structure of the coronal loops and loop strands. The thermal stability of the
+coronal loops was the subject of research, done by Habbal & Rosner (1979) (and references cited
+therein). McClymont & Craig (1985) stated that a pressure fluctuation must assist asymmetric
+coronal temperature perturbation. They concluded that coronal loops are impartially stable in
+the case of uniform heating.
+ Van Doorsselaere et al. (2011) used spectroscopic line ratios to obtain
+the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona.
+The dependence of coronal loop temperature on loop length and magnetic field strength is also
+a favorite topic. For instance, Dahlburg et al. (2018) probed the temperature properties of solar
+coronal loops over a wide range of lengths and magnetic field strengths via numerical simulations
+ and observed a very high correlation between magnetic field strength and a maximum of
+the temperature. The effect of temperature inhomogeneity on the periods and the damping times
+of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al.
+(2012)). Fathalian (2019) estimated the loop temperature using the intensity ratios and the AIA response
+ functions in different wavelengths. Different emission measure (DEM) computations and
+methods have been developed to estimate the temperature in the corona, which led to various
+discussions.
+ Schmelz et al. (2010) analyzed a coronal loop, which was observed on 2010 August
+3, by AIA. They took some differential emission measure (DEM) curves, claiming a multithermal
+rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). After
+ that, Aschwanden & Boerner (2011) criticized the method of background subtraction which
+Schmelz et al. had applied. They claimed that the background subtraction method caused their
+inferred result of a multithermal loop. Aschwanden & Boerner (2011) analyzed a set of hundred
+loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian
+DEM model. In this regard, some attention was paid to the instrumental limitations and ability
+ of AIA and Guennou et al. (2012a,b) discussed on the accuracy of the differential emission
+measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovementioned
+ controversy of whether the cross-field temperatures of coronal loops are multithermal or
+isothermal, continued by
+ Schmelz et al. (2013) (similar to Schmelz et al. (2011)). They analyzed
+twelve loops to understand the cross-field temperature distributions of them and reveal the loops’
+substructure. Based on their achievements, the warmer loops entail broader DEMs. Thereafter,
+Schmelz et al. (2014) found indications of a relationship between the DEM weighted-temperature
+and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have
+narrower DEM widths. This could imply that fewer strands are seen emitting in the later cooling
+ phase, which they claim could potentially resolve the abovementioned controversy. In this
+subject, Aschwanden et al. (2015) (as well as 2013 (Aschwanden, 2013)) developed a method to
+extract the loop temperature which is based on Gaussian fit for Differential Emission Measure,
+named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter).
+This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and
+non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows:
+In section
+ II, data, we introduce the considered flaring and non-flaring active regions and describe
+the data employed and the time and properties of the flare, occurred in the active region. In
+section III, we explain the method we use to analyze the time-series of temperatures in different
+strips of the loops. Section IV is specified to our results, obtained related to flaring and nonflaring
+ regions. In section V we briefly state a summary of this work.
+II. Data
+We investigate the thermal structure and treatment of loops in a flaring region to see if it follows
+the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time.
+For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops
+of it have been analyzed by Jain et al. (2015). They analyzed intensity variations in the wavelength
+171 in two coronal loops of this region and detected obvious transverse oscillation with periods
+of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see
+the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active
+region, extract its loops and analyze their thermal treatment. Then we compare the temperature
+treatment of the loops at the flaring region with the loops of the non-flaring region to see the
+differences.
+The temperature analysis done here uses EUV images from the AIA onboard the SDO. AIA
+has ten different wavelength channels, three in white light and UV, and the other seven in EUV
+channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temperatures
+ (in order of T = 104.7
+K), not the corona, is not taken into account (Aschwanden et al. 2015).
+Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335
+). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16 MK.
+The two below data sets are finally selected to study thermal variations and coronal loops
+oscillations in flaring or non-flaring active regions. A few distinct loops are visible in the regions.
+Finally, these loops are chosen:
+– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely
+happening around the loops with the specification we are looking for. So this selected LOS
+X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA
+AR 11283, in the time period of 22:10UT till 23:00UT of 2011 September 6 with the cadence
+of 12 sec. This period of time is selected since no other flare is happening during it. A
+few distinct loops are visible and follow-able here during this period. Loop shapes in our
+active region change permanently; therefore, it is difficult or impossible to follow a loop
+over a very long time. Hence, it is not useful to extend the time interval of this region
+to the time before the flare. The transverse oscillations of two loops in this region were
+analyzed before by
+ Jain et al. (2015). We mark these loops by A and B in Figure 1 b. They
+detected fundamental mode oscillation with periods of roughly 2 minutes and decay time
+of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any)
+or thermal fluctuations in this condition. Figure 1a (left) displays AR 11283 and the area,
+indicated by the white box is featured in a zoom-in view in Figure 1.b (right) and the five
+selected parts of the center of the three chosen loops are shown by red lines (the movie of
+the region is available in this link). As it is clear in the movie, these three loops oscillate
+together and their oscillations decay simultaneously. The center of figure 1.a is coordinated
+at (230, 165) arcsec and its width and height are 450′′
+ × 456′′
+ /750 × 775 pixels. The flare
+occurring in this active region is an X2.1 class flare located close to the disk center at latitude
+14◦
+ north and longitude 18◦
+ west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT,
+ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection
+(CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with
+the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for
+more details look at LASCO CME catalogue.) 1
+– Three loops of non-flaring active region 12194: As a blind test, we select three loops of the
+non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till
+09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec
+and its width and height are 615′′
+ × 615′′
+ /1025 × 1025 pixels. We consider the images of
+the selected area with the cadence of 12 sec in the same six wavelengths mentioned above.
+These loops are relatively motionless and do not show any transversal oscillation (see the
+region’s movie in the link). We select the loops in such a way that they do not have any
+crossing over the neighbor loops (in our perspective) during this time. In figure
+ 2 the
+selected loops are distinguished in red in the mentioned active region. The size of the final
+cut of non-flaring region (represented in the right) is 351 × 401 pixels.
+The data set are primarily downloaded at level 1 with a pixel resolution of 0.6 arcsec. We use
+the standard aia_ pre p. pro subroutine available in SDO package SolarSoftWare library to adjust
+the screen scale between the four arms of the AIA. This pre-processing step increases the data
+level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series.
+We also used drot_ma p. pro subroutine to correct the differential rotation effect. According to the
+movie made by pre-processed images, the most obvious loops (marked in the abovementioned
+figures) are selected in each region (with obvious transversal oscillations in the case of the flaring
+active region).
+ III. Temperature Analysis Method
+We extract the selected loop segment pixels, for each loop, and calculate the normal vectors
+to each point of the loop’s direction. Then by using these data, we straighten each loop in a
+considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available
+empty area around each loop and the distance to the neighbor loop). The area around the
+loop is needed for calculations of background subtraction. The selected loop segment is cut in
+1
+Based on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/
+all wavelengths and at the same considered box from the images set. These loop images are
+necessary entrances for our thermal analysis process. Then the loop is divided into different
+strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we
+use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden
+et al.
+ (2015).
+The images in the above six wavelength filters are considered to calculate the temperature in
+each strip of the loop. The DEM function is considered a single-Gaussian function relative to the
+temperature determined by the forward fitting method. To obtain the temperature for each loop,
+we divided the loop into narrow strips, and then the intensity flux was averaged over each strip.
+The number of each strip is displayed with the index i. One of the usual methods to subtract
+the background from observed data is fitting a single-Gaussian cospatial function with a linear
+function on the flux profile. The DEM for each strip is considered to be single-Gaussian DEM
+in terms of the logarithm of the temperature, which has three free parameters (Aschwanden &
+Boerner, 2011):
+ D E M
+i = dE M
+i
+dT = E M
+p,i exp (− [log (T ) − log (T
+p,i )
+2σ2
+T,i ). (1)
+In which, T
+p,i is the DEM peak temperature, E M
+p,i is the peak EM function, and σ
+T,i is the
+logarithmic width of the temperature for that strip. To calculate the background-subtracted fluxes
+(for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below):
+F
+0λ =
+ dE M(T )
+dT R
+λ (T )dT =
+ ∑
+k E M(T
+k ) R
+λ (T
+k ). (2)
+Here, R
+λ (T ) is the instrumental temperature response function of each wavelength filter λ, which
+is obtained by the code aia_get_res ponse. pro in the SSW package. As time has passed, the AIA
+response functions calibration has partly changed. Here, we use the updated calibration of the
+temperature response functions, for each of the AIA temperature filters, according to the CHIANTI
+ Version 2019 code available in the Solar SoftWare (SSW). After forward-fitting the Gaussian
+DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting parameters,
+ temperature width (σ
+T,i), peak of temperature (T
+p,i), and peak emission measure (E M
+p,i )
+are found by minimizing χ2
+i .
+Our data sample is uneven because of omitting some damaged images in between. Therefore
+ to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is
+developed to use the technique periodogram, in the case where the observation times are unevenly
+ spaced (
+Scargle, 1982). The Lomb-Scargle periodogram method is useful in cases where
+the periodicity of data treatment is not immediately apparent. This method allows efficient computation
+ of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in
+an intuitive means of determining the period of oscillation (VanderPlas, 2018). Therefore we use
+Lomb-Scargle Periodogram to evaluate and estimate the efficient periods of temperature oscillations
+ in our loops. We select the first period related to the highest power frequency, which is
+obtained by this method.We considered the achieved periods with the highest significances and
+amplitudes. The most significant (highest) periods observed in temperature (minute) for flaring
+and non-flaring loops are listed in Tables 1 and 2, respectively. To estimate the significance of
+the periods, we computed the probability values (p-values). In the Lomb-Scargle method, the
+significance returned here is the false alarm probability of the null hypothesis, i.e., as the data
+is composed of independent Gaussian random variables. Accordingly, low probability values
+(p-value less than 0.05) indicate a high degree of significance in the associated periodic signal.
+IV. Results
+i. Temperature Analysis of Flaring Active Region Loops
+Thenceforth the temperature time-series of different strips of the selected loops are calculated
+using the method described in section 3. In the following figures, the vertical axis shows the
+logarithm of the temperature and the horizontal axis shows the time duration. To be comparable
+by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been coscaled
+ in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature
+map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 12, and 6 strips, respectively. Each
+strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’
+temperature oscillations are presented here. Figure
+ 3 displays the time-series of temperature
+oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the
+errors for each point (temperature) but removed in the presentation to avoid overcrowding of the
+figures. As we observe in Figures 3 and 4), the temperature oscillations are started and increase
+around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended
+(22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain
+et al. (2015). As Jain et al. reported, LoopA and B have a transverse oscillation with periods
+of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time
+(23:20) and decaying after the flare ended (22:24). So as we observe, the temperature oscillations in
+these flaring loops happen before the start of their transverse oscillations and are continuing even
+in the time interval after the transverse oscillations decay. Although the temperature oscillations
+do not decay as rapid as the transverse oscillations do, and conversely, the loop temperature
+increases at the end of the oscillating mode (see Fig.4, the temperature map of the loop A, for
+instance)
+We calculate the temperature oscillations periods, using Lomb-Scargle method. We consider
+the thermal oscillations periods with the highest significances. As this method shows, the most
+powerful period in the range of data time-series (listed in Table
+1) are from 7 to 28.4 minutes
+observed in the strips of the marked loops of this flaring region. These loops of flaring region
+also show some short periods in temperature oscillations which some are less than 10 minutes
+(listed in Table1). These short periods are more frequently observed in the loops of the flaring
+active region. Such short periods are very scarce for the loops of the non-flaring active region
+(compare Tables1 and 2).
+The first column in Table1 is the number of every strip along the loop. The second column is
+the period of the most powerful frequency observed for the loop strips, calculated by the LombScargle
+ method. The third column shows the maximum of log(T ) minus its minimum in each
+strip. The columns of Table
+2 are exactly the same as Table1; the only difference is that Table2 is
+for the non-flaring loops.
+The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop
+marked in Figure 1.b. The mean of the parameter (Max(log T)-Min(log T)) for the strips of loop A
+is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided
+into 11 strips, has the length of 20.24 (Mm). The mean of (Max(log T)-Min(log T)) and the mean
+of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively. The loop B2, which has 8
+strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± 0.21. The mean
+of (Max(log T)-Min(log T)) is 0.81 through this loop segment. The loops C1 and C2, divided into
+12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22,
+and 6.14 ± 0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively.
+We observe that despite the temperature oscillations, the flaring loops show a temperature
+rise at the end of the considered time interval (figure3). As their temperature maps also show,
+the oscillations follow with a relatively sensible rise in the final temperature of the loop segments
+(Figures 4). Although in the case of the transverse oscillations, the loops oscillate as the flare
+occurs and then the oscillations decay and stop, in the case of temperature oscillations, the temperatures
+ of the various strips of the loops oscillate and at the end of the flare occurrence, they
+get to a relatively higher value of temperature in average.
+Figure
+ 4 shows the temperature maps of the flaring loops A, B1, B2, C1, and C2, respectively
+as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and
+the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each
+separated grid part on the map is standing for one strip. Figure 4 shows that the temperature
+for most of the strips increased, bypassing a few oscillations. Before the end of the time duration,
+some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at
+the early times of the duration and becomes hotter at the middle and end times with a swing
+to lower temperatures again (see Fig. 4). There are some temperature fluctuations at the middle
+times (the red and green stripes) while at the end the strips temperatures are smoother with less
+fluctuations. The temperature map of the loop segment B2 (Fig.
+4) shows that at the beginning of
+the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end
+times this pattern is reversed in this loop segment. In loop segment C1 (Fig.4), the temperature
+fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time
+(23:00) the temperature is much higher than the beginning. The temperature is increasing after
+the flare time (22:24) for the loop C2 either (see Fig.4). This happens with some oscillations in
+the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation
+in most of the flaring loops’ strips after the flare time. According to these temperature maps,
+the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20
+minutes after that.
+We expect the flaring loops to cool down as a result of heat conduction and radiative cooling.
+Hence this relative temperature increase should be scrutinized. As we probed, this temperature
+rise is also followed in intensity time-series. As the intensity time-series show, the related intensity
+in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the
+authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of
+7 × 106 ◦
+ K (Ugarte-Urra & Warren (2014)). By using the method developed by Warren et al. (2012)
+the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the
+evolution of hot plasma in the loops. We do it to omit the contamination from the cooler plasma
+(mostly around 1MK) which also contributes to this AIA channel Boerner et al. (2012). This is
+done by subtracting the contaminating warm (i.e., around 1MK) component to the bandpass.
+This warm contribution is calculated from a weighted combination of the emission from the AIA
+171 and 193 channels dominated by Fe X and Fe X I I emission, respectively. This intensity
+analysis is done directly and it has not gone through any other process like the thermal analysis.
+For this purpose, we applied the formulation (1) used by Li et al. (2015). Plots in Figure 5 show
+the intensity map, and the mean intensity variation of the wavelength Fe XV I I I , for Loop A of
+the flaring region, respectively. As these plots show, this intensity is also higher at the end of
+the time duration in respect of the flare time. It seems to us that the expected cooling has not
+occurred in these flaring loops yet, even after the flare occurrence in the probed duration due to
+some plausible reasons. We consider that the mentioned simultaneous CME (see section
+II) which
+this flare is associated with could cause this increase in temperature. We can be sure that the
+source of this CME is AR 11283 (Romano et al. (2015)). This CME is in our flare region, hence
+the loops receive energy even after the flare occurrence and it is probably the reason why the
+expected cooling does not occur.
+The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same
+significance in all strips of the loops, but for most strips of the flaring loops, the significances are
+very near to one. To be assured about these oscillations, we probed the intensity time-series for
+each strip of the loops and we observed that this loop’s intensities shows intensity oscillations
+too (i.e., alongside the loop). The most probable dominant periods observed in intensity, for
+wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A, 16.7, and 18.22 min for strips of
+F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods
+are in the same order of the observed thermal oscillation periods. The intensity in this time series
+has not passed any thermal process but still shows oscillation periods close to thermal ones. So
+we think these results confirm the observation of thermal oscillations.
+ii. Temperature Analysis of non-Flaring Active Region Loops
+The temperature time-series for different strips of the selected loops of the non-flaring active
+region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig.
+ 6),
+the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time
+duration. Figure 6 displays the time-series of temperature variations for the first two strips of
+the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the
+logarithm of temperature (like the flaring loops range). The most powerful periods, observed in
+most of these non-flaring loops’ strips (listed in Table2) are from 8.5 min. to 30 min. Comparing
+the periods of the loops in the flaring region (Table1) with the non-flaring one (Table2), we see
+that the temperature periods of the flaring loops have lower values on average and have more
+diversity than the non-flaring ones. As Tables
+ 1 and 2 show, the mean temperatures of nonfloops
+ are lower in comparison with the f-loops, a fact we also expected from common sense.
+The parameter (Max(log T)-Min(log T)) in nonf-loops’ strips is less than that for the flaring loops’
+strips.
+Nonf-loop A, divided into 11 strips, has the length of 19.91 (Mm) which is the length of the
+selected part of the loop marked in Figure 2b. The mean of (Max(log T)-Min(log T)) for the strips
+of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10.
+Nonf-Loop B, divided into 6 strips, has the length of 11.11 (Mm), and the mean temperature (log),
+and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonfloop
+ C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of
+5.82 ± 0.12, and the mean (Max(log T)-Min(log T)) of 0.56.
+The first highest period observed for the temperature oscillations of these non-flaring loops’
+strips is reported in Table2. As we observe the temperature periods in these non-flaring loops
+are mostly longer than those of the flaring loops (compare the values listed in Table
+1 and Table2).
+Therefore the temperature oscillations of these loops are a little slower than the flaring ones.
+Figure 7 shows the temperature maps of the non-flaring loops A, B, and C, respectively as a
+time series. In each plot, the vertical axis is the distance along the loop in Mm, and the horizontal
+axis is the time. The color bar in the left shows the colors considered for the temperature range.
+Each separated colored part in the map is one strip. These color maps are plotted totally at the
+same color range of the loops of the flaring region either.
+As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature
+fluctuations and are smoother in comparison with the flaring ones (Fig. 4). Furthermore, that
+much increase in the temperatures of the strips, which was obvious in the loops of the flaring
+region toward the end times, is not observed here. The temperatures are also totally lower in the
+nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the
+non-flaring loops have relatively more similar temperature fluctuations.
+As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the
+flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes,
+and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of
+the flaring active region, and shorter temperature periods (less than 10 minutes, nearer to the
+transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison
+with the non-flaring ones. And figure
+ 9 shows that the increasing and decreasing of temperature
+range, or the difference between maximum and minimum of the temperature value (max(log(T ))min(log(T
+ ))), is much higher on average for the loops’ strips of the flaring AR in comparison with
+the loops’ strips of the non-flaring one.
+ V. Summery
+We reported the temperature oscillations of coronal loops of a flaring active region. We selected
+the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This
+region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been
+analyzed before by Jain et al. (2015). They analyzed intensity variations in the wavelength 171
+in two coronal loops of this region and detected obvious transverse oscillation with periods of
+roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure.1b)
+at the flare time. We were curious to know if the temperature variations follow the transverse
+oscillations of the loops, or there is any relation or correlation between them. We also wanted to
+investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal
+properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three
+segments of its loops and analyzed their thermal treatment. Then we compared the temperature
+treatment of the loops at the flaring region with the loops of the non-flaring region to see the
+differences. We were eager to observe the probable discrepancies between flaring and non-flaring
+loops in this respect.
+Here we used data of three loops of the flaring active region (AR11283) around the time of the
+Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring
+active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures
+1 and 2). To calculate the time series of the loop temperature values, we first extracted the loop
+pixels in each image and then displayed the loop straightly for all the images in the time series
+of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian
+DEM forward-fitting method founded by Aschwanden et al. (2015). We calculated the peak
+temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze
+temperature oscillations of the time-series for each strip of the loops.
+We observed temperature oscillations which are following the transverse loop oscillations
+observed by Jain et al. (2015) for the flaring loops. Furthermore, the temperature oscillations in
+these flaring loops happen before the transverse oscillations start and continue even in the time
+duration after the transverse oscillations decay. As observed, the temperature oscillations do not
+decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase
+at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures
+of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the
+flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in
+the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly
+2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of
+roughly 10 to 28.5 minutes in different segments of this loop. And as the transverse oscillation
+decays in this interval, no special definite decay is observed in its temperature oscillations.
+The temperature periods of the flaring loops are rather shorter than the temperature periods
+of the non-flaring loops. The loops of the flaring region show some short temperature oscillations
+periods in which some are less than 10 minutes (Table1). These kind of short periods are more
+frequently observed for the loops of the flaring active region and in the case of the non-flaring
+ones, are very scarce. We observed that the periods of the flaring loops have more diversity
+than those of the non-flaring ones. Based on our confined observations, the non-flaring loops’
+periods are longer and their temperatures’ values are totally lower. So our research showed that
+thermal structures of the flaring loops differ from the non-flaring ones in the ways described
+above. As temperature maps show, the temperature fluctuations are increasing at the flaring time
+and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’
+temperature. Conversely, it seems that different strips of the non-flaring loops have relatively
+more similar temperature fluctuations. The temperatures are either higher in average in the flaring
+ loops’ segments as expected. The significances of the periods, obtained by the Lomb-Scargle
+method, are calculated for each strip of each loop and the results show that these significances
+for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of
+the non-flaring region are less than 0.5. Hence the detected periods in the flaring loops’ strips
+have high significances (near to one) and are oscillations. Whereas the detected periods in the
+non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe
+they are just fluctuations.
+Using this method for the coronal loops showed that the oscillation modes obtained for the
+temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of
+the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These
+kind of oscillations often occur in hot coronal loops (log(T ) > 6) of active regions especially the
+ones associated with small (or micro-) flares (Wang et al. (2021)). The loops of our flaring active
+region are also hot loops with the mean temperature above this range. They also show intensity
+oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring
+loops. The temperature of the non-flaring loops are lower (log(T ) < 6) and as discussed above,
+we believe that the observed oscillation-like periods in non-flaring loops should be more probably
+related to the high amplitude fluctuations.
+Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes
+of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the
+FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively.
+ And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for
+nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)Min(log
+ T)) for these non-flaring loops show a difference from the flaring ones and are lower.
+Loops of the non-flaring active region 12194 have a relatively uniform temperature at the
+beginning of the time interval, which rises slightly at its end. As the Solar Monitor reports in the
+neighborhood of this region, the flaring active region 12192 exists of which between its multiple
+flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a possible suggestion
+that the abovementioned slight temperature rise in the loops of AR 12194 (in the time interval
+8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions
+exist in the AR 12192.
+Hence as our study shows, the temperature of coronal loops of flaring AR changes in an
+oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher temperatures
+ on average and higher oscillation periods with higher peaks and deeper valleys. More
+accurate commentary in this respect requires more extensive statistical research and broader observations.
+
+arcsecarcsec
+ 79 154 229 304 379 454−6825118211304397
+ a
+ arcsecarcsec
+
+114.6  171.2 227.8 284.4 341171.4206.3241.2276.1311
+ Loop B1
+ Loop ALoop C2
+Loop C1b
+ Loop B2
+Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view
+of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are
+the same loops studied by
+ Jain et al. (2015) (see Fig.3a in Jain et al. (2015)).
+arcsecarcsec
+ −154 0 154 308−572−418−264−11044
+ a
+ arcsecarcsec
+ −202 −134 −66 2 70−396−338−280−221−162
+ nonf−LoopAnonf−LoopB
+ nonf−LoopCb
+Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 recorded by AIA/SDO. (b) Zoom-in
+view of the area, marked by a box in the left, the loops are distinguished in red.
+5.866.26.46.66.8
+LogT  F−LoopA
+5.866.26.46.66.8
+LogT
+ 22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8
+ timeLogT
+            5.866.26.46.66.8
+LogT F−LoopB1
+22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8
+ timeLogT
+Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to
+3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the
+logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1.
+22:10 22:20 22:30 22:40 22:50 23:000 11213242   F−loopA
+Time Loop Length(Mm)
+ 5.866.26.46.66.8
+22:10 22:20 22:30 22:40 22:50 23:000 5 101520   F−loopB1
+Time Loop Length(Mm)
+ 66.056.16.156.26.256.36.356.46.456.5
+22:10 22:20 22:30 22:40 22:50 23:000 4 8 1216   F−loopB2
+Time Loop Length(Mm)
+ 5.866.26.46.66.8
+22:10 22:20 22:30 22:40 22:50 23:000 6 111722   F−loopC1
+Time Loop Length(Mm)
+ 5.65.866.26.46.66.8
+22:10 22:20 22:30 22:40 22:50 23:000 3 6 8 11   F−loopC2
+Time Loop Length(Mm)
+ 5.866.26.46.66.8
+Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical
+axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows
+the colors considered for the temperature range.
+Table 1: The properties observed for the loop segments of the flaring AR.
+FLoopA
+(Strip Number) The highest
+Temp.’s period
+observed Max(log(T))Min(log(T))
+ FLoopB2
+(Strip Number) The highest
+Temp.’s period
+observed Max(log(T))Min(log(T))
+
+1 9.94 1.09 1 18.07 0.68
+2 16.57 0.79 2 24.85 0.83
+3 8.46 0.65 3 24.85 0.85
+4 28.4 1.11 4 7.36 0.84
+5 28.4 0.75 5 8.64 0.85
+6 24.85 0.76 6 8.28 0.93
+7 22.09 0.58 7 18.07 0.84
+8 18.07 1.55 8 28.4 0.73
+9 18.07 1.6 FLoopC1 - 10
+ 12.42 1.57 1 28.4 1.46
+11 12.42 1.42 2 22.09 1.34
+12 24.85 1.56 3 16.57 1.36
+13 19.88 1.6 4 28.04 1.49
+14 19.88 1.24 5 24.85 1.6
+15 18.07 1.58 6 24.85 1.42
+16 19.88 1.45 7 15.29 1.6
+17 16.57 0.7 8 13.25 1.56
+18 7.36 1.6 9 13.25 1.6
+19 8.64 0.95 10 16.57 1.6
+20 16.57 1.54 11 16.57 1.6
+21 7.36 1.18 12 9.46 1.13
+22 7.36 1.51 FLoopC1 - 23
+ 18.07 1.58 1 18.07 0.88
+24 22.09 1.33 2 28.4 0.8
+25 24.85 0.72 3 15.29 0.87
+FLoopB1 - - 4 16.57 0.93
+1 18.07 1.43 5 18.07 1.22
+2 15.29 0.76 6 28.4 0.58
+3 18.07 0.76
+4 18.07 0.75
+5 18.07 0.59
+6 19.88 0.8
+7 19.88 0.91
+8 19.88 1.36
+9 11.04 1.6
+10 18.07 1.6
+11 18.07 1.6
+Table 2: The properties observed for the loop segments of the non flaring AR.
+Nonf-LoopA
+(Strip Number) The highest
+Temp.’s period
+observed Max(log(T))Min(log(T))
+
+1 24 0.61
+2 30 0.95
+3 30 0.81
+4 20 1.51
+5 20 0.77
+6 20 0.81
+7 11.42 0.71
+8 12 0.73
+9 30 0.72
+10 30 0.77
+11 30 0.61
+Nonf-LoopB
+(Strip Number) The highest
+Temp.’s period
+observed Max(log(T))Min(log(T))
+
+1 26.66 0.36
+2 26.66 0.64
+3 10.43 0.45
+4 12 0.62
+5 30 0.98
+6 8.57 0.67
+Nonf-LoopC
+(Strip Number) The highest
+Temp.’s period
+observed Max(log(T))Min(log(T))
+
+1 26.66 0.76
+2 26.66 0.75
+3 26.66 0.26
+4 30 0.27
+5 30 0.8
+22:10 22:20 22:30 22:40 22:50 23:000 11223243   Int−Fe−LoopA
+Time Loop Length(Mm)
+ 00.020.040.060.080.10.120.140.160.180.2
+22:10 22:20 22:30 22:40 22:50 23:0000.10.20.30.40.50.60.70.80.91 Int−Fe−LoopA
+TimeNormalized Intensity Fe XVIII
+Figure 5: Normalized intensity map of the flaring loop A for the wavelength Fe XV I I I, and mean intensity of Fe
+XV I I I (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and
+normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors
+considered for the Intensity range.
+VI. acknowledgements
+The author Narges Fathalian wishes to also express her thanks for the technical support and
+comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regarding to this
+work.
+             5.866.26.46.66.8
+LogT NonF−LoopA
+8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8
+ timeLogT
+              5.866.26.46.66.8
+LogT NonF−LoopB
+8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8
+ timeLogT
+Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the nonflaring
+ Loops A and B. Horizontal axis is the time and the vertical axis is the logarithm of the temperature.
+8:10 8:20 8:30 8:40 8:50 9:000 5 101520   NonF−loopA
+Time Loop Length(Mm)
+ 5.866.26.46.66.8
+8:10 8:20 8:30 8:40 8:50 9:000 5 9 1418   NonF−loopB
+Time Loop Length(Mm)
+ 5.866.26.46.66.8
+8:10 8:20 8:30 8:40 8:50 9:000 3 5 8 10   NonF−loopC
+Time Loop Length(Mm)
+ 5.866.26.46.66.8
+Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis
+is the distance along the loop in Mm, and the horizontal axis is the time. The color-bar in the left shows the
+colors considered for the temperature range.
+6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 3000.050.10.150.20.250.30.350.4
+ Temp. Period (min)Percentage of Temp. Periods
+Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and nonflaring
+ (red bars) ARs. The horizontal axis shows the temperature periods in minute.
+0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7024681012
+ max(log(T))−min(log(T))Number
+Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars)
+and non-flaring (red bars) ARs.
+References
+Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 280
+Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F.
+2013, ApJ, 773, 156
+Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5
+Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of London Series A, 364,
+417
+Aschwanden, M. J., & Boerner, P. 2011, The Astrophysical Journal, 732, 81
+Aschwanden, M. J., Boerner, P., Ryan, D., et al. 2015, The Astrophysical Journal, 802, 53
+Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexander, D. 1999, ApJ, 520, 880
+Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13
+Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3
+Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207
+Boerner, P., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41
+Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116
+De Moortel, I. 2005, Philosophical Transactions of the Royal Society of London Series A, 363, 2743
+De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210
+De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23
+De Moortel, I., & Nakariakov, V. M. 2012, Philosophical Transactions of the Royal Society of
+London Series A, 370, 3193
+Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369
+Fathalian, N., & Safari, H. 2010, ApJ, 724, 411
+Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403
+Goossens, M., Hollweg, J. V., & Sakurai, T. 1992, Solar Physics, 138, 233
+Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A, 460, 887
+Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203, 25
+Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203, 26
+Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113
+Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103
+Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19
+Jess, D. B., Reznikova, V. E., Ryans, R. S. I., et al. 2016, Nature Physics, 12, 179
+Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 2019, A&A, 628, A133
+Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space
+Sciences, 6, 57
+Li, L. P., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109
+Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277
+Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371
+McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834
+McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelínek, P., & Takasao, S. 2018, Space
+Science Reviews volume, 214, 45
+Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2017, ApJ, 849, 62
+Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010, Plasma Physics and Controlled Fusion,
+52, 124009
+Nakariakov, V. M., Ofman, L., Deluca, E. E., Roberts, B., & Davila, J. M. 1999, Science, 285, 862
+Nakariakov, V. M., & Verwichte, E. 2005, Living Reviews in Solar Physics, 2, 3
+Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 552, A57
+Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 2017, A&A, 600, A37
+Ofman, L., & Wang, T. 2002, ApJ, 580, L85
+Pant, V., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5
+Pascoe, D. J., Nakariakov, V. M., & Arber, T. D. 2007, Solar Physics, 246, 165
+Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, ApJ, 884, 131
+Roberts, B., Edwin, P. M., & Benz, A. O. 1984, ApJ, 279, 857
+Romano, P., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55
+Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 581, A8
+Scargle, J. D. 1982, ApJ, 263, 835
+Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ, 731, 49
+Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ, 725, L34
+Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171
+Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 2013, ApJ, 764, 53
+Ugarte-Urra, I., & Warren, H. P. 2014, ApJ, 783, 12
+Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, Solar Physics, 291, 3143
+Van Doorsselaere, T., Wardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32
+VanderPlas, J. T. 2018, ApJ, 236, 16
+Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77
+Wang, T. 2011, Space Science Reviews, 158, 397–419
+Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598
+Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33
+Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17
+Wang, T., & Ofman, L. 2019, ApJ, 886, 2
+Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13
+Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217
+Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 759, 141
+Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 190, 467
\ No newline at end of file
diff --git a/read/results/playa/GeoTopo-book.txt b/read/results/playa/GeoTopo-book.txt
new file mode 100644
index 0000000..8a01b45
--- /dev/null
+++ b/read/results/playa/GeoTopo-book.txt
@@ -0,0 +1,8025 @@
+Einführung in die
+Geometrie und Topologie
+0. Auflage, 31. Dezember 2016 Martin Thoma
+Vorwort
+Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet
+die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen
+und Tutorien.
+Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (SchwarzWeiß,
+ Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de).
+Danksagungen
+An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen
+gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich
+basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze
+Abschnitte konnten direkt mit LA
+T
+EX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre
+Inhalte in diesem Skript einbauen zu dürfen!
+Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsaufgaben
+ und Lösungen zu benutzen.
+Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen
+Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per
+E-Mail und nach dem Tutorium beantwortet. Danke!
+Was ist Topologie?
+Die Kugeloberfläche S 2
+ lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche
+oder der Oberfläche einer Pyramide verformen, aber nicht zum R2
+ oder zu einem Torus T 2
+. Für
+den R2
+ müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein
+Loch machen.
+Erforderliche Vorkenntnisse
+Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \, ∅, R, P (M ))
+und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Widerspruchsbeweisen
+ sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag,
+Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem
+in „Analysis I“ vermittelt.
+Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit,
+der Spektralsatz und der pro jektive Raum P (R) aus „Lineare Algebra I“ bekannt sind. In „Lineare
+Algebra II“ wird der Begriff der Orthonormalbasis eingeführt.
+
+ (a) S 2
+ (b) Würfel (c) Pyramide
+y
+ x
+(d) R2
+ (e) T 2
+Abbildung 0.1: Beispiele für verschiedene Formen
+Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und
+Zahlentheorie“ gehört zu haben.
+Inhaltsverzeichnis
+1 Topologische Grundbegriffe 2
+1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2
+1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6
+1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
+1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
+1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22
+2 Mannigfaltigkeiten und Simplizialkomplexe 24
+2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24
+2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29
+2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+3 Fundamentalgruppe und Überlagerungen 44
+3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44
+3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47
+3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51
+3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61
+4 Euklidische und nichteuklidische Geometrie 64
+4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64
+4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74
+4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74
+4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77
+Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86
+5 Krümmung  87
+5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
+5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89
+5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
+5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94
+Lösungen der Übungsaufgaben 99
+Bildquellen  105
+Abkürzungsverzeichnis 106
+Ergänzende Definitionen und Sätze 107
+Symbolverzeichnis 108
+  Inhaltsverzeichnis
+Stichwortverzeichnis 111
+1 Top ologische Grundb egriffe
+1.1 Topologische Räume
+Definition 1
+Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P (X )
+mit folgenden Eigenschaften
+(i) ∅, X ∈ T
+(ii) Sind U
+1, U
+2 ∈ T, so ist U
+1 ∩ U
+2 ∈ T
+(iii) Ist I eine Menge und U
+i ∈ T für jedes i ∈ I , so ist
+i∈I U
+i ∈ T
+Die Elemente von T heißen offene Teilmengen von X .
+A ⊆ X heißt abgeschlossen, wenn X \ A offen ist.
+Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es
+Mengen, die sowohl abgeschlossen als auch offen sind.
+Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.)
+Betrachte ∅ und X mit der trivialen Topologie T
+triv = { ∅, X }.
+Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem X C
+ = X \ X = ∅ ∈ T und
+X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen.
+Beispiel 1 (Topologien)
+1) X = Rn
+ mit der von der euklidischen Metrik erzeugten Topologie T
+Euklid:
+U ⊆ Rn
+ offen ⇔ für jedes x ∈ U gibt es r > 0,
+sodass B
+r (x) = { y ∈ Rn
+ | d(x, y) < r } ⊆ U
+Diese Topologie wird auch „Standardtopologie des Rn
+“ genannt. Sie beinhaltet unter
+anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedlichem
+ Mittelpunkt (vgl. Definition 1.ii).
+2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum.
+3) Für eine Menge X heißt T
+Diskret = P (X ) diskrete Topologie.
+4) X := R, T
+Z := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie
+Beobachtungen:
+• U ∈ T
+Z ⇔ ∃f ∈ R[X ], sodass R \ U = V (f ) = { x ∈ R | f (x) = 0 }
+• Es gibt keine disjunkten offenen Mengen in T
+Z .
+  1.1. TOPOLOGISCHE RÄUME
+5) X := Rn
+, T
+Z = {U ⊆ Rn
+|Es gibt Polynome f
+1, . . . , f
+r ∈ R[X
+1, . . . , X
+n] sodass
+Rn
+ \ U = V (f
+1, . . . , f
+r )}
+6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum.
+∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen.
+Definition 2
+Sei (X, T) ein topologischer Raum und x ∈ X .
+Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U
+0 ∈ T gibt mit x ∈ U
+0 und
+U
+0 ⊆ U .
+Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt.
+Definition 3
+Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge.
+a) M ◦
+ := { x ∈ M | M ist Umgebung von x } =
+U ⊆M
+U ∈T U heißt Inneres oder offener
+Kern von M .
+b) M :=
+M ⊆A
+A abgeschlossenA heißt abgeschlossene Hülle oder Abschluss von M .
+c) ∂ M :=
+ M \ M ◦
+ heißt Rand von M .
+d) M heißt dicht in X , wenn M = X ist.
+Beispiel 2
+1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M ◦
+ = ∅
+2) Sei X = R und M = (a, b). Dann gilt: M = [a, b]
+3) Sei X = R, T = T
+Z und M = (a, b). Dann gilt: M = R
+Definition 4
+Sei (X, T) ein topologischer Raum.
+a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen
+aus B ist.
+b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen
+Durchschnitten von Elementen aus S ist.
+Beispiel 3 (Basis und Subbasis)
+1) Jede Basis ist auch eine Subbasis, z.B.
+S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als
+auch Subbasis.
+2) Gegeben sei X = Rn
+ mit euklidischer Topologie T. Dann ist
+B = { B
+r (x) | r ∈ Q
+>0, x ∈ Qn
+ }
+ist eine abzählbare Basis von T.
+3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }.
+Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt:
+  1.1. TOPOLOGISCHE RÄUME
+• S ⊆ T
+• ∅, { 0, 1 } und { 0, 2 } ∈ S
+• { 0 } = { 0, 1 } ∩ { 0, 2 }
+• X = { 0, 1 } ∪ { 0, 2 }
+Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen
+aus S erzeugt werden kann.
+Bemerkung 2
+Sei X eine Menge und S ⊆ P (X ). Dann gibt es genau eine Topologie T auf X , für die S
+Subbasis ist.
+Definition 5
+Sei (X, T) ein topologischer Raum und Y ⊆ X .
+T
+Y := { U ∩ Y | U ∈ T } ist eine Topologie auf Y .
+T
+Y heißt Teilraumtopologie und (Y , T
+Y ) heißt ein Teilraum von (X, T).
+Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt.
+Definition 6
+Seien X
+1, X
+2 topologische Räume.
+U ⊆ X
+1 × X
+2 sei offen, wenn es zu jedem x = (x
+1, x
+2) ∈ U Umgebungen U
+i um x
+i mit
+i = 1, 2 gibt, sodass U
+1 × U
+2 ⊆ U gilt.
+T = { U ⊆ X
+1 × X
+2 | U offen } ist eine Topologie auf X
+1 × X
+2. Sie heißt Produkttopologie.
+B = { U
+1 × U
+2 | U
+i offen in X
+i, i = 1, 2 } ist eine Basis von T.
+U
+x
+x
+2
+ x
+1U
+2
+ U
+1 X
+1X
+2
+Abbildung 1.1: Zu x = (x
+1, x
+2) gibt es Umgebungen U
+1, U
+2 mit U
+1 × U
+2 ⊆ U
+Beispiel 4 (Produkttopologien)
+1) X
+1 = X
+2 = R mit euklidischer Topologie.
+⇒ Die Produkttopologie auf R × R = R2
+ stimmt mit der euklidischen Topologie auf
+R2
+ überein.
+2) X
+1 = X
+2 = R mit Zariski-Topologie. T Produkttopologie auf R2
+: U
+1 × U
+2
+(Siehe Abbildung 1.2)
+  1.1. TOPOLOGISCHE RÄUME
+U
+1 = R \ NU
+2
+ =
+R
+\
+N
+Abbildung 1.2: Zariski-Topologie auf R2
+Definition 7
+Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/
+∼ sei die Menge
+der Äquivalenzklassen, π : X → X , x → [x]
+∼.
+T
+X :=
+ U ⊆
+ X
+
+ π−1
+(U ) ∈ T
+X
+(X , T
+X ) heißt Quotiententopologie.
+Beispiel 5
+X = R, a ∼ b :⇔ a − b ∈ Z
+ R
+-1 0 1 2 3 4 5
+0a
+ U aπ−1
+(u)
+0 ∼ 1, d. h. [0] = [1]
+Beispiel 6
+Sei X = R2
+ und (x
+1, y
+1) ∼ (x
+2, y
+2) ⇔ x
+1 − x
+2 ∈ Z und y
+1 − y
+2 ∈ Z. Dann ist X/
+∼ ein Torus.
+Beispiel 7 (Pro jektiver Raum)
+X = Rn+1
+ \ { 0 } , x ∼ y ⇔ ∃λ ∈ R×
+ mit y = λx
+⇔ x und y liegen auf der gleichen
+Ursprungsgerade
+X = P n
+(R)
+  1.2. METRISCHE RÄUME
+Also für n = 1:
+−4 −2 2 4 6 8
+−4−224
+1.2 Metrische Räume
+Definition 8
+Sei X eine Menge. Eine Abbildung d : X × X → R+
+0 heißt Metrik, wenn gilt:
+(i) Definitheit: d(x, y) = 0 ⇔ x = y ∀x, y ∈ X
+(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X
+(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X
+Das Paar (X, d) heißt ein metrischer Raum.
+Bemerkung 3
+Sei (X, d) ein metrischer Raum und
+B
+r (x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R+
+B = { B
+r (x) ⊆ P (X ) | x ∈ X, r ∈ R+
+ } ist Basis einer Topologie auf X .
+Definition 9
+Seien (X, d
+X ) und (Y , d
+Y ) metrische Räume und ϕ : X → Y eine Abbildung mit
+∀x
+1, x
+2 ∈ X : d
+X (x
+1, x
+2) = d
+Y (ϕ(x
+1), ϕ(x
+2))
+Dann heißt ϕ eine Isometrie von X nach Y .
+Beispiel 8 (Skalarprodukt erzeugt Metrik)
+Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt ·, ·. Dann wird V
+durch d(x, y) :=
+x − y, x − y zum metrischen Raum.
+Beispiel 9 (diskrete Metrik)
+Sei X eine Menge. Dann heißt
+ d(x, y) =
+0 falls x = y
+1 falls x = y
+die diskrete Metrik. Die Metrik d induziert die diskrete Topologie.
+  1.2. METRISCHE RÄUME
+Beispiel 10
+X = R2
+ und d ((x
+1, y
+1), (x
+2, y
+2)) := max(x
+1 − x
+2, y
+1 − y
+2) ist Metrik.
+Beobachtung: d erzeugt die euklidische Topologie.
+B
+r (0) = r r
+ r
+r
+(a) B
+r (0) (b) Euklidische Topologie
+Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10
+  1.2. METRISCHE RÄUME
+Beispiel 11 (SNCF-Metrik1
+)
+X = R2
+−4 −2 2 4 6 8
+−4−224
+Definition 10
+Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x = y in X
+Umgebungen U
+x um x und U
+y um y gibt, sodass U
+x ∩ U
+y = ∅.
+Bemerkung 4 (Trennungseigenschaft)
+Metrische Räume sind hausdorffsch, wegen
+d(x, y) > 0 ⇒ ∃ε > 0 : B
+ε(x) ∩ B
+ε(y) = ∅
+Beispiel 12 (Topologische Räume und Hausdorff-Räume)
+1) (R, T
+Z ) ist ein topologischer Raum, der nicht hausdorffsch ist.
+2) (R, T
+Euklid) ist ein topologischer Hausdorff-Raum.
+Bemerkung 5 (Eigenschaften von Hausdorff-Räumen)
+Seien X, X
+1, X
+2 Hausdorff-Räume.
+a) Jeder Teilraum von X ist hausdorffsch.
+b) X
+1 × X
+2 ist hausdorffsch (vgl. Abbildung 1.4).
+Definition 11
+Sei X ein topologischer Raum und (x)
+n∈N eine Folge in X . x ∈ X heißt Grenzwert oder
+Limes von (x
+n), wenn es für jede Umgebung U von x ein n
+0 gibt, sodass x
+n ∈ U für alle
+n ≥ n
+0.
+Bemerkung 6
+Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert.
+Beweis: Sei (x
+n) eine konvergierende Folge und x und y Grenzwerte der Folge.
+Da X hausdorffsch ist, gibt es Umgebungen U
+x von x und U
+y von y mit U
+x ∩ U
+y = ∅ falls
+x = y. Da (x
+n) gegen x und y konvergiert, existiert ein n
+0 mit x
+n ∈ U
+x ∩ U
+y für alle n ≥ n
+0
+⇒ x = y
+1
+Diese Metrik wird auch „ französische Eisenbahnmetrik“ genannt.
+  1.3. STETIGKEIT
+(x
+1, y
+1) (x
+2, y
+2)
+x
+1 x
+2
+U
+1 × X
+2 U
+2 × X
+2 X
+1X
+2
+Abbildung 1.4: Wenn X
+1, X
+2 hausdorffsch sind, dann auch X
+1 × X
+2
+1.3 Stetigkeit
+Definition 12
+Seien (X, T
+X ), (Y , T
+Y ) topologische Räume und f : X → Y eine Abbildung.
+a) f heißt stetig :⇔ ∀U ∈ T
+Y : f −1
+(U ) ∈ T
+X .
+b) f
+ heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g :
+Y → X gibt, sodass g ◦ f = id
+X und f ◦ g = id
+Y .
+Bemerkung 72
+Seien X, Y metrische Räume und f : X → Y eine Abbildung.
+Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für
+alle y ∈ X mit d(x, y) < δ gilt d
+Y (f (x), f (y)) < ε.
+Beweis: „ ⇒“: Sei x ∈ X, ε > 0 gegeben und U := B
+ε(f (x)).
+Dann ist U offen in Y .
+Def. 12.a
+=====⇒ f −1
+(U ) ist offen in X . Dann ist x ∈ f −1
+(U ).
+⇒ ∃δ > 0, sodass B
+δ (x) ⊆ f −1
+(U )
+⇒ f (B
+δ (x)) ⊆ U
+⇒ { y ∈ X | d
+X (x, y) < δ } ⇒ Beh.
+„ ⇐“: Sei U ⊆ Y offen, X ∈ f −1
+(U ).
+Dann gibt es ε > 0, sodass B
+ε(f (x)) ⊆ U
+Vor.
+==⇒ Es gibt δ > 0, sodass f (B
+δ (x)) ⊆ B
+ε(f (x)))
+⇒ B
+δ (x) ⊆ f −1
+(B
+ε(f (x))) ⊆ f −1
+(U )
+Bemerkung 8
+Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt:
+f ist stetig
+⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f −1
+(A) ⊆ X ist abgeschlossen.
+Beispiel 13 (Stetige Abbildungen und Homöomorphismen)
+1) Für jeden topologischen Raum X gilt: id
+X : X → X ist Homöomorphismus.
+2
+Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt.
+  1.3. STETIGKEIT
+2) Ist (Y , T
+Y ) trivialer topologischer Raum, d. h. T
+Y = T
+triv, so ist jede Abbildung
+f : X → Y stetig.
+3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen
+Raum Y und jede Abbildung f .
+4) Sei X = [0, 1), Y = S 1
+ = { z ∈ C | z = 1 } und f (t) = e2πit
+.
+R
+0 1 0f
+g
+Abbildung 1.5: Beispiel einer stetigen Funktion f , deren Umkehrabbildung g nicht stetig ist.
+Die Umkehrabbildung g ist nicht stetig, da g−1
+(U ) nicht offen ist (vgl. Abbildung 1.5).
+Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig)
+Seien X, Y , Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen.
+Dann ist g ◦ f : X → Z stetig.
+ X f
+
+g◦f
+  Y
+g
+
+Z
+Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f )−1
+(U ) = f −1
+(g−1
+(U )). g−1
+(U ) ist offen in Y weil g stetig
+ist, f −1
+(g−1
+(U )) ist offen in X , weil f stetig ist.
+Bemerkung 10
+a) Für jeden topologischen Raum X ist
+Homöo(X ) := { f : X → X | f ist Homöomorphismus }
+eine Gruppe.
+b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus.
+c) Iso(X ) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X ) für
+jeden metrischen Raum X .
+Bemerkung 11 (Pro jektionen sind stetig)
+Seien X, Y topologische Räume. π
+X : X × Y → X und π
+Y : X × Y → Y die Pro jektionen
+π
+X : (x, y) → x und π
+Y : (x, y) → y
+Wird X × Y mit der Produkttopologie versehen, so sind π
+X und π
+Y stetig.
+Beweis: Sei U ⊆ X offen
+⇒ π−1
+X (U ) = U × Y ist offen in X × Y .
+Bemerkung 12
+Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/
+∼ der Bahnenraum
+versehen mit der Quotiententopologie, π : X → X , x → [x]
+∼.
+Dann ist π stetig.
+  1.4. ZUSAMMENHANG
+Beweis: Nach Definition ist U ⊆ X offen ⇔ π−1
+(U ) ⊆ X offen.
+Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird.
+Beispiel 14 (Stereographische Pro jektion)
+Rn
+ und S n
+ \ { N } sind homöomorph für beliebiges N ∈ S n
+. Es gilt:
+S n
+ =
+ x ∈ Rn+1
+
+ x = 1
+=
+ x ∈ Rn+1
+
+
+
+ n+1
+
+i=1 x2
+i = 1
+O. B. d. A. sei N = 
+
+
+
+0
+.
+.
+.
+0
+1
+
+
+
+. Die Gerade durch N und P schneidet die Ebene H in genau
+einem Punkt ˆ
+P . P wird auf ˆ
+P abgebildet.
+f :S n
+ \ { N } → Rn
+P → genau ein Punkt
+
+
+L
+P ∩ H
+wobei Rn
+ = H = 
+
+
+
+ 
+
+ x
+1
+.
+.
+.
+x
+n+1
+
+ ∈ Rn+1
+
+
+
+
+
+ x
+n+1 = 0 
+
+
+
+ und L
+P die Gerade in Rn+1
+ durch N
+und P ist.
+Sei P = 
+
+ x
+1
+.
+.
+.
+x
+n+1
+
+, so ist x
+n+1 < 1, also ist L
+P nicht parallel zu H . Also schneiden sich L
+P
+und H in genau einem Punkt ˆ
+P .
+Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
+1.4 Zusammenhang
+Definition 13
+a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen
+U
+1, U
+2 von X gibt mit U
+1 ∩ U
+2 = ∅ und U
+1 ∪ U
+2 = X .
+b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit
+der Teilraumtopologie zusammenhängend ist.
+  1.4. ZUSAMMENHANG
+x yz
+N
+ ˆ
+P0 P
+Abbildung 1.6: Visualisierung der stereographischen Pro jektion
+Bemerkung 13
+X
+ ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A
+1, A
+2
+mit A
+1 ∩ A
+2 = ∅ und A
+1 ∪ A
+2 = X .
+Beispiel 15 (Zusammenhang von Räumen)
+1) (Rn
+, T
+Euklid) ist zusammenhängend, denn:
+Annahme: Rn
+ = U
+1 ˙
+∪ U
+2 mit ∅ = U
+1, U
+2 ∈ T
+Euklid existieren.
+Sei x ∈ U
+1, y ∈ U
+2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun
+betrachten wir V  Rn
+ als (metrischen) Teilraum mit der Teilraumtopologie T
+V .
+Somit gilt U
+1 ∩ [x, y] ∈ T
+V wegen der Definition der Teilraumtopologie.
+Dann gibt es z ∈ [x, y] mit z ∈ ∂ (U
+1 ∩ [x, y]), aber z /∈ U
+1 ⇒ z ∈ U
+2. In jeder
+Umgebung von z liegt ein Punkt von U
+1 ⇒ Widerspruch zu U
+2 offen.
+2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R
+<0 ∪ R
+>0
+3) R2
+ \ { 0 } ist zusammenhängend.
+4) Q  R ist nicht zusammenhängend, da (Q ∩ R
+<√
+2) ∪ (Q ∩ R
+>√
+2) = Q
+5) { x } ist zusammenhängend für jedes x ∈ X , wobei X ein topologischer Raum ist.
+6) R mit Zariski-Topologie ist zusammenhängend.
+Bemerkung 14
+Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammenhängend.
+
+  1.4. ZUSAMMENHANG
+Beweis: durch Widerspruch
+Annahme: A = A
+1 ∪ A
+2, A
+i abgeschlossen, A
+i = ∅, A
+1 ∩ A
+2 = ∅
+⇒ A = (A ∩ A
+1)
+
+abgeschlossen ˙
+∪ (A ∩ A
+2)
+
+abgeschlossen
+
+
+disjunkt
+Wäre A ∩ A
+1 = ∅
+⇒ A ⊆ A = A
+1 ˙
+∪ A
+2
+⇒ A ⊆ A
+2 ⇒ A ⊆ A
+2
+⇒ A
+1 = ∅
+⇒ Widerspruch zu A
+1 = ∅
+⇒ A ∩ A
+1 = ∅ und analog A ∩ A
+2 = ∅
+⇒ Widerspruch zu A ist zusammenhängend.
+Bemerkung 15
+Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend.
+Ist A ∩ B = ∅, dann ist A ∪ B zusammenhängend.
+Beweis: Sei A ∪ B = U
+1 ˙
+∪ U
+2, U
+i = ∅ offen
+o. B. d. A.
+======⇒ A = (A ∩ U
+1) ˙
+∪ (A ∩ U
+2) offen
+A zhgd.
+====⇒ A ∩ U
+1 = ∅
+A∩B=∅
+====⇒ U
+1 ⊆ B
+B = (B ∩ U
+1)
+
+
+=U
+1 ∪ (B ∩ U
+2)
+
+=∅ ist unerlaubte Zerlegung.
+
+Definition 14
+Sei X ein topologischer Raum.
+Für x ∈ X sei Z (x) ⊆ X definiert durch
+Z (x) :=
+A⊆X zhgd.
+x∈AA
+Z (x) heißt Zusammenhangskomponente.
+Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten)
+Sei X ein topologischer Raum. Dann gilt:
+a) Z (x) ist die größte zusammenhängende Teilmenge von X , die x enthält.
+b) Z (x) ist abgeschlossen.
+c) X ist disjunkte Vereinigung von Zusammenhangskomponenten.
+Beweis:
+  1.5. KOMPAKTHEIT
+a) Sei Z (x) = A
+1 ˙
+∪ A
+2 mit A
+i = ∅ abgeschlossen.
+O. B. d. A. sei x ∈ A
+1 und y ∈ A
+2. y liegt in einer zusammehängenden Teilmenge A,
+die auch x enthält. ⇒ A = (A ∩ A
+1)
+
+
+x ∪ (A ∩ A
+2)
+
+y ist unerlaubte Zerlegung.
+b) Nach Bemerkung 14 ist Z (x) zusammenhängend ⇒ Z (x) ⊆ Z (x) ⇒ Z (x) = Z (x)
+c) Ist Z (y) ∩ Z (x) = ∅ Bem. 15
+=====⇒ Z (y) ∪ Z (x) ist zusammenhängend.
+⇒ Z (x) ∪ Z (y) ⊆ Z (x) ⇒ Z (y) ⊆ Z (x)
+⊆ Z (y) ⇒ Z (x) ⊆ Z (y)
+
+Bemerkung 17
+Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f (A) ⊆ Y zusammenhängend.
+Beweis: Sei f (A) = U
+1 ∪ U
+2, U
+i = ∅, offen, disjunkt.
+⇒ f −1
+(f (A)) = f −1
+(U
+1) ∪ f −1
+(U
+2)
+⇒ A = (A ∩ f −1
+(U
+1))
+
+=∅ ∪ (A ∩ f −1
+(U
+2))
+
+=∅
+1.5 Kompaktheit
+Definition 15
+Sei X eine Menge und U ⊆ P (X ).
+U heißt eine Überdeckung von X , wenn gilt:
+∀x ∈ X : ∃M ∈ U : x ∈ M
+Definition 16
+Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X
+U = { U
+i }
+i∈I mit U
+i offen in X
+eine endliche Teilüberdeckung
+
+i∈J ⊆IU
+i = X mit |J | ∈ N
+besitzt.
+Bemerkung 18
+Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie.
+Beweis: Sei (U
+i)
+i∈J eine offene Überdeckung von I .
+Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in
+einem der U
+i enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle
+  1.5. KOMPAKTHEIT
+der Länge δ unterteilen und alle U
+i in die endliche Überdeckung aufnehmen, die Teilintervalle
+enthalten.
+Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall I
+n ⊆ [0, 1]
+der Länge 1
+/n sodass I
+n  U
+i für alle i ∈ J .
+Sei x
+n der Mittelpunkt von I
+n. Die Folge (x
+n) hat einen Häufungspunkt x ∈ [0, 1]. Dann
+gibt es i ∈ J mit x ∈ U
+i. Da U
+i offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ U
+i.
+Dann gibt es n
+0, sodass gilt: 1
+/n
+0 < ε
+/2 und für unendlich viele3
+ n ≥ n
+0 : |x − x
+n| < ε
+/2, also
+I
+n ⊆ (x − ε, x + ε) ⊆ U
+i für mindestens ein n ∈ N.4
+⇒ Widerspruch
+Dann überdecke [0, 1] mit endlich vielen Intervallen I
+1, . . . , I
+d der Länge δ. Jedes I
+j ist in
+U
+ij enthalten.
+⇒ U
+j
+1 , . . . , U
+j
+d ist endliche Teilüberdeckung von U .
+Beispiel 16 (Kompakte Räume)
+1) R ist nicht kompakt.
+2) (0, 1) ist nicht kompakt.
+U
+n = (1
+/n, 1 − 1
+/n) ⇒
+n∈N U
+n = (0, 1)
+3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch.
+Bemerkung 19
+Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt.
+Beweis: Sei (V
+i)
+i∈I offene Überdeckung von A.
+Dann gibt es für jedes i ∈ I eine offene Teilmenge U
+i ⊆ X mit V
+i = U
+i ∩ A.
+⇒ A ⊆
+i∈I U
+i
+⇒ U = { U
+i | i ∈ I } ∪ { X \ A } ist offene Überdeckung von X
+X kompakt
+=======⇒ es gibt i
+1, . . . , i
+n ∈ I , sodass n
+
+j=1 U
+i
+j ∪ (X \ A) = X
+⇒ 
+ n
+
+j=1 U
+i
+j ∪ (X \ A)
+
+ ∩ A = A
+⇒ n
+
+j=1 (U
+i
+j ∩ A)
+
+
+=V
+i
+j ∪ ((X \ A) ∩ A)
+
+=∅ = A
+⇒ V
+i
+1 , . . . , V
+i
+n überdecken A.
+
+Bemerkung 20
+Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie
+kompakt.
+Beweis: Sei (W
+i)
+i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es
+offene Teilmengen U
+x,y von X und V
+x,y von Y sowie ein i ∈ I , sodass U
+x,y × V
+x,y ⊆ W
+i.
+3
+Dies gilt nicht für alle n ≥ n
+0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
+4
+Sogar für unendlich viele.
+  1.5. KOMPAKTHEIT
+W
+i
+x
+y
+ xV
+x,y
+ U
+x,y YX
+Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen
+Die offenen Mengen U
+x
+0,y × V
+x
+0,y für festes x
+0 und alle y ∈ Y überdecken { x
+0 } × y. Da Y
+kompakt ist, ist auch { x
+0 } × Y kompakt. Also gibt es y
+1, . . . , y
+m(x
+0) mit
+m(x
+0)
+i=1 U
+x
+0,y
+i ×
+V
+x
+0,y
+i ⊇ { x
+0 } × Y .
+Sei U
+x
+0 :=
+m(x)
+i=1 U
+x
+0,y
+i . Da X kompakt ist, gibt es x
+1, . . . , x
+n ∈ X mit
+n
+j=1 U
+x
+j = X
+⇒
+k
+j=1
+m(x
+j )
+i=1
+U
+x
+j ,y
+i × V
+x
+j ,y
+i
+
+
+Ein grün-oranges Kästchen⊇ X × Y
+⇒
+j
+i W
+i(x
+j , y
+i) = X × Y
+Bemerkung 21
+Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen.
+Beweis: z. Z.: Komplement ist offen
+Ist X = K , so ist K abgeschlossen in X . Andernfalls sei y ∈ X \ K . Für jedes x ∈ K seien
+U
+x bzw. V
+y Umgebungen von x bzw. von y, sodass U
+x ∩ V
+y = ∅.
+X
+i
+Kx
+y
+Da K kompakt ist, gibt es endlich viele x
+1, . . . , x
+n ∈ K , sodass
+m
+i=1 U
+x
+i ⊇ K .
+Sei V := n
+
+i=1 V
+x
+i
+  1.6. WEGE UND KNOTEN
+⇒ V ∩
+ n
+
+i=1 U
+x
+i
+ = ∅
+⇒ V ∩ K = ∅
+⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist.
+⇒ X \ K ist offen
+Damit ist K abgeschlossen.
+Bemerkung 22
+Seien X, Y topologische Räume, f : X → Y stetig.
+Ist K ⊆ X kompakt, so ist f (K ) ⊆ Y kompakt.
+Beweis: Sei (V
+i)
+i∈I offene Überdeckung von f (K )
+f stetig
+====⇒ (f −1
+(V
+i))
+i∈I ist offene Überdeckung von K
+Kompakt
+=====⇒ es gibt i
+1, . . . , i
+n, sodass f −1
+(V
+i
+1 ), . . . , f −1
+(V
+i
+n ) Überdeckung von K ist.
+⇒ f (f −1
+(V
+i
+1 )), . . . , f (f −1
+(V
+i
+n )) überdecken f (K ).
+Es gilt: f (f −1
+(V )) = V ∩ f (X )
+Satz 1.1 (Heine-Borel)
+Eine Teilmenge von Rn
+ oder Cn
+ ist genau dann kompakt, wenn sie beschränkt und
+abgeschlossen ist.
+Beweis: „ ⇒“: Sei K ⊆ Rn
+ (oder Cn
+) kompakt.
+Da Rn
+ und Cn
+ hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Voraussetzung
+ kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist
+beschränkt.
+„ ⇐“ Sei A ⊆ Rn
+ (oder Cn
+) beschränkt und abgeschlossen.
+Dann gibt es einen Würfel W = [−N , N ] × · · · × [−N , N ]
+
+n mal mit A ⊆ W bzw. „Polyzylinder“
+Z = { (z
+1, . . . , z
+n) ∈ Cn
+ | z
+i ≤ N für i = 1, . . . , n }
+Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch
+kompakt. Genauso ist Z kompakt, weil
+{ z ∈ C  z| ≤ 1 }
+homöomorph zu
+
+ (x, y) ∈ R2
+
+ (x, y) ≤ 1
+ist.
+1.6 Wege und Knoten
+Definition 17
+Sei X ein topologischer Raum.
+  1.6. WEGE UND KNOTEN
+a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X .
+b) γ heißt geschlossen, wenn γ (1) = γ (0) gilt.
+c) γ heißt einfach, wenn γ |
+[0,1) injektiv ist.
+Beispiel 17
+Ist X diskret, so ist jeder Weg konstant, d. h. von der Form
+∀x ∈ [0, 1] : γ (x) = c, c ∈ X
+Denn γ ([0, 1]) ist zusammenhängend für jeden Weg γ .
+Definition 18
+Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten
+x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ (0) = x und γ (1) = y.
+Bemerkung 23
+Sei X ein topologischer Raum.
+a) X ist wegzusammenhängend ⇒ X ist zusammenhängend
+b) X ist wegzusammenhängend ⇐ X ist zusammenhängend
+Beweis:
+a)
+ Sei X ein wegzusammenhängender topologischer Raum, A
+1, A
+2 nichtleere, disjunkte,
+abgeschlossene Teilmengen von X mit A
+1 ∪ A
+2 = X . Sei x ∈ A
+1, y ∈ A
+2, γ : [0, 1] → X
+ein Weg von x nach y.
+Dann ist C := γ ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist.
+C = (C ∩ A
+1)
+
+x ∪ (C ∩ A
+2)
+
+y
+ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch
+b) Sei X =
+ (x, y) ∈ R2
+
+ x2
+ + y2
+ = 1 ∨ y = 1 + 2 · e− 1
+10 x
+.
+Abbildung 1.8a veranschaulicht diesen Raum.
+Sei U
+1 ∪ U
+2 = X, U
+1 = U
+2 = ∅, U
+i offen. X = C ∪ S . Dann ist C ⊆ U
+1 oder C ⊆ U
+2,
+weil C und S zusammenhängend sind.
+Also ist C = U
+1 und S = U
+2 (oder umgekehrt).
+Sei y ∈ C = U
+1, ε > 0 und B
+ε(y) ⊆ U
+1 eine Umgebung von y, die in U
+1 enthalten ist.
+Aber: B
+ε(y) ∩ S = ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht
+wegzusammenhängend.
+Beispiel 18 (Hilbert-Kurve)
+Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbildung
+ 1.9 dargestellte Hilbert-Kurve.
+Definition 19
+Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ :
+[0, 1] → C ⊆ X bzw. γ : S 1
+ → C ⊆ X , wobei C := Bild γ .
+  1.6. WEGE UND KNOTEN
+(a) Spirale S mit Kreis C 0.1 1
+−101
+ X
+Y {(x, sin( 1
+x )) ∈ X × Y }
+(−1, 1) ⊆ Y
+(b) Sinus
+Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
+sind.
+(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5
+Abbildung 1.9: Hilbert-Kurve
+Jede Jordankurve ist also ein einfacher Weg.
+Satz 1.2 (Jordanscher Kurvensatz)
+Ist C = γ ([0, 1]) eine geschlossene Jordankurve in R2
+, so hat R2
+ \ C genau zwei
+Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt.
+außen
+innen
+Jordankurve
+Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte
+ äußeres genannt.
+Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie:
+Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265)
+nachgelesen werden.
+Idee: Ersetze Weg C durch Polygonzug.
+  1.6. WEGE UND KNOTEN
+Definition 20
+Eine geschlossene Jordankurve in R3
+ heißt Knoten.
+Beispiel 19 (Knoten)
+(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6
+2-Knoten
+Abbildung 1.11: Beispiele für verschiedene Knoten
+Definition 21
+Zwei Knoten γ
+1, γ
+2 : S 1
+ → R3
+ heißen äquivalent, wenn es eine stetige Abbildung
+H : S 1
+ × [0, 1] → R3
+gibt mit
+ H (z, 0) = γ
+1(z) ∀z ∈ S 1
+H (z, 1) = γ
+2(z) ∀z ∈ S 1
+und für jedes feste t ∈ [0, 1] ist
+ H
+z : S 1
+ → R3
+, z → H (z, t)
+ein Knoten. Die Abbildung H heißt Isotopie zwischen γ
+1 und γ
+2.
+Definition 22
+Sei γ : [0, 1] → R3
+ ein Knoten, E eine Ebene und π : R3
+ → E eine Pro jektion auf E .
+π heißt Knotendiagramm von γ , wenn gilt:
+
+
+π−1
+(x)
+
+ ≤ 2 ∀x ∈ π(γ )
+Ist (π|
+γ([0,1]))−1
+(x) = { y
+1, y
+2 }, so liegt y
+1 über y
+2, wenn gilt:
+∃λ > 1 : (y
+1 − x) = λ(y
+2 − x)
+Satz 1.3 (Satz von Reidemeister)
+Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie
+durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können.
+  1.6. WEGE UND KNOTEN
+(a) Ω
+1 (b) Ω
+2
+(c) Ω
+3
+Abbildung 1.12: Reidemeister-Züge
+Beweis: Durch sorgfältige Fallunterscheidung.5
+Definition 23
+Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt
+werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben
+auftreten.
+ Abbildung 1.13: Ein 3-gefärber Kleeblattknoten
+5
+Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177.
+  1.6. WEGE UND KNOTEN
+Übungsaufgaben
+Aufgabe 1 (Sierpińskiraum)
+Es sei X := { 0, 1 } und T
+X := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum.
+(a) Beweisen Sie, dass (X, T
+X ) ein topologischer Raum ist.
+(b) Ist (X, T
+X ) hausdorffsch?
+(c) Ist T
+X von einer Metrik erzeugt?
+Aufgabe 2
+Es sei Z mit der von den Mengen U
+a,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie
+versehen.
+Zeigen Sie:
+(a) Jedes U
+a,b und jede einelementige Teilmenge von Z ist abgeschlossen.
+(b) { −1, 1 } ist nicht offen.
+(c) Es gibt unendlich viele Primzahlen.
+Aufgabe 3 (Cantorsches Diskontinuum)
+Für jedes i ∈ N sei P
+i := { 0, 1 } mit der diskreten Topologie. Weiter Sei P :=
+i∈N P
+i.
+(a) Wie sehen die offenen Mengen von P aus?
+(b) Was können Sie über den Zusammenhang von P sagen?
+Aufgabe 4 (Kompaktheit)
+(a) Ist GL
+n(R) = { A ∈ Rn×n
+ | det(A) = 0 } kompakt?
+(b) Ist SL
+n(R) = { A ∈ Rn×n
+ | det(A) = 1 } kompakt?
+(c) Ist P (R) kompakt?
+Aufgabe 5 (Begriffe)
+Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“.
+Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist,
+begründen Sie warum.
+1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist,
+2) ein Homomorphismus, der kein Homöomorphismus ist,
+  1.6. WEGE UND KNOTEN
+3) ein Homöomorphismus, der kein Homomorphismus ist
+Aufgabe 6 (Begriffe)
+Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“.
+2 Mannigfaltigkeiten und
+Simplizialkomplexe
+2.1 Topologische Mannigfaltigkeiten
+Definition 24
+Sei (X, T) ein topologischer Raum und n ∈ N.
+a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V
+Homöomorphismus von U auf eine offene Teilmenge V ⊆ Rn
+.
+b) Ein n-dimensionaler Atlas A auf X ist eine Familie (U
+i, ϕ
+i)
+i∈I von Karten auf X ,
+sodass
+i∈I U
+i = X .
+c) X
+ heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist,
+eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt.
+Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn
+ ähnlich.
+Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten)
+Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R.
+Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ Rn
+, wobei
+V offen und ϕ ein Homöomorphismus ist, eine Karte auf X .
+Da jede offene Teilmenge des Rn
+ genauso mächtig ist wie der Rn
+, ϕ als Homöomorphismus
+insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig
+sind, ist U genauso mächtig wie der Rn
+. Da jede Mannigfaltigkeit mindestens eine Karte
+hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn
+.
+Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
+beliebig viele Elemente haben.
+Bemerkung 25
+a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1]
+b) Für n = m sind Rn
+ und Rm
+ nicht homöomorph. Zum Beweis benutzt man den „Satz
+von der Gebietstreue“ (Brouwer):
+Ist U ⊆ Rn
+ offen und f : U → Rn
+ stetig und injektiv, so ist f (U ) offen.
+Ist n < m und Rm
+ homöomorph zu Rn
+, so wäre
+f : Rn
+ → Rm
+ → Rn
+, (x
+1, . . . , x
+n) → (x
+1, x
+2, . . . , x
+n, 0, . . . , 0)
+eine stetige injektive Abbildung. Also müsste f (Rn
+) offen sein ⇒ Widerspruch
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+Beispiel 20 (Mannigfaltigkeiten)
+1) Jede offene Teilmenge U ⊆ Rn
+ ist eine n-dimensionale Mannigfaltigkeit mit einem
+Atlas aus einer Karte.
+2) Cn
+ ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
+(z
+1, . . . , z
+n) → ((z
+1), (z
+1), . . . , (z
+n), (z
+n))
+3) P n
+(R) = (Rn+1
+ \ { 0 })/
+∼ = S n
+/
+∼ und P n
+(C) sind Mannigfaltigkeiten der Dimension
+n bzw. 2n, da gilt:
+Sei U
+i := { (x
+0 : · · · : x
+n) ∈ P n
+(R) | x
+i = 0 } ∀i ∈ 0, . . . , n. Dann ist P n
+(R) =
+n
+i=0 U
+i
+und die Abbildung
+ U
+i → Rn
+(x
+0 : · · · : x
+n) →
+ x
+0
+x
+i , . . . ,
+ x
+i
+x
+i , . . . , x
+n
+x
+i
+(y
+1 : · · · : y
+i−1 : 1 : y
+i : · · · : y
+n) → (y
+1, . . . , y
+n)
+ist bijektiv.
+Die U
+i mit i = 0, . . . , n bilden einen n-dimensionalen Atlas:
+x = (1 : 0 : 0) ∈ U
+0 → R2
+ x → (0, 0)
+y = (0 : 1 : 1) ∈ U
+2 → R2
+ y → (0, 1)
+Umgebung: B
+1(0, 1) → { (1 : u : v) | (u, v) < 1 } = V
+1
+Umgebung: B
+1(0, 1) →
+ (w : z : 1)
+
+ w2
+ + z2
+ < 1
+ = V
+2
+V
+1 ∩ V
+2 = ∅?
+(a : b : c) ∈ V
+1 ∩ V
+2
+⇒ a = 0 und ( b
+a )2
+ + ( c
+a )2
+ < 1 ⇒ c
+a < 1
+⇒ c = 0 und ( a
+c )2
+ + ( b
+c )2
+ < 1 ⇒ a
+c < 1
+⇒ Widerspruch
+4) S n
+ =
+ x ∈ Rn+1
+
+ x = 1
+ ist n-dimensionale Mannigfaltigkeit.
+Karten:
+D
+i := {(x
+1, . . . , x
+n+1) ∈ S n
+|x
+i > 0} → B
+1(0, . . . , 0
+
+∈Rn )
+C
+i := {(x
+1, . . . , x
+n+1) ∈ S n
+|x
+i < 0} → B
+1(0, . . . , 0)
+(x
+1, . . . , x
+n+1) → (x
+1, . . . ,
+ x
+i, . . . , x
+n+1)1
+(x
+1, . . . , x
+n) → (x
+1, . . . , x
+i−1,
+1 −
+n
+k=1 x2
+k , x
+i, . . . , x
+n), oder −
+1 −
+n
+k=1 x2
+k für C
+i
+S n
+ =
+n+1
+i=1 (C
+i ∪ D
+i)
+Als kompakte Mannigfaltigkeit wird S n
+ auch „ geschlossene Mannigfaltigkeit“ genannt.
+5) [0, 1] ist keine Mannigfaltigkeit, denn:
+Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall
+ist.
+1
+x
+i wird rausgenommen
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+6) V
+1 =
+ (x, y) ∈ R2
+
+ x · y = 0
+ ist keine Mannigfaltigkeit.
+Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4
+Zusammenhangskomponenten. Jeder Rn
+ zerfällt jedoch in höchstens zwei Zusammenhangskomponenten,
+ wenn man einen Punkt entfernt.
+7) V
+2 =
+ (x, y) ∈ R2
+
+ x3
+ = y2
+ ist eine Mannigfaltigkeit.
+8) X = (R \ { 0 }) ∪ (0
+1, 0
+2)
+U ⊆ X offen ⇔
+U offen in R \ { 0 } , falls 0
+1 /∈ U, 0
+2 ∈ U
+∃ε > 0 : (−ε, ε) ⊆ U falls 0
+1 ∈ U, 0
+2 ∈ U
+Insbesondere sind (R \ { 0 }) ∪ { 0
+1 } und (R \ { 0 }) ∪ { 0
+2 } offen und homöomorph
+zu R.
+Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0
+1
+und 0
+2.
+9) GL
+n(R) ist eine Mannigfaltigkeit der Dimension n2
+, weil offene Teilmengen von Rn2
+eine Mannigfaltigkeit bilden.
+Definition 25
+Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Homöomorphismus
+ Z = (X ˙
+∪ Y )/
+∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation
+und der von ∼ induzierten Quotiententopologie.
+Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen
+Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit.
+Bemerkung 26
+Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit
+der Dimension n + m.
+Beweis: Produkte von Karten sind Karten.
+Beispiel 21
+Mannigfaltigkeiten mit Dimension 1:
+1) Offene Intervalle, R, (0, 1) sind alle homöomorph
+2) S 1
+Mannigfaltigkeiten mit Dimension 2:
+1) R2
+2) S 2
+ (0 Henkel)
+3) T 2
+ (1 Henkel)
+4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1
+Bemerkung 27
+Sei n ∈ N, F : Rn
+ → R stetig differenzierbar und X = V (F ) := { x ∈ Rn
+ | F (x) = 0 } das
+„vanishing set“ .
+Dann gilt:
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus.
+a) X ist abgeschlossen in Rn
+b) Ist grad(F )(X ) = 0 ∀x ∈ X , so ist X eine Mannigfaltigkeit der Dimension n − 1.
+Beweis:
+a)
+ Sei y ∈ Rn
+ \ V (F ). Weil F stetig ist, gibt es δ > 0, sodass F (B
+δ (y)) ⊆ B
+ε(F (y)) mit
+ε = 1
+2 F (y). Folgt B
+δ (y) ∩ V (F ) = ∅ ⇒ Rn
+ \ V (F ) ist offen.
+b) Sei x ∈ X mit grad(F )(x) = 0, also o. B. d. A. ∂F
+∂X
+1 (x) = 0, x = (x
+1, . . . , x
+n),
+x
+ := (x
+2, . . . , x
+n) ∈ Rn−1
+. Der Satz von der impliziten Funktion liefert nun: Es
+gibt Umgebungen U von x
+ und differenzierbare Funktionen g : U → R, sodass
+G : U → Rn
+, u → (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x
+in X ist.
+
+Beispiel 22
+1) F
+ : R3
+ → R, (x, y, z) → x2
+ + y2
+ + z2
+ − 1, V (F ) = S 2
+, grad(F ) = (2x, 2y, 2z) Bem. 27.b
+======⇒
+S n
+ ist n-dimensionale Mannigfaltigkeit in Rn+1
+2) F : R2
+ → R, (x, y) → y2
+ − x3
+ Es gilt: grad(F ) = (−3x2
+, 2y). Also: grad(0, 0) = (0, 0).
+−5
+−4
+−3
+−2
+−1
+0
+1
+2
+3
+4
+5−4
+ −2
+ 0
+ 2
+ 4−1000100
+ xyz
+ −1000100f (x, y)
+ (a) F (x, y) = y2
+ − x3 2 4 6 8 10 12
+−10−5510
+ xy
+ a = 1
+3
+a = 1
+a = 2
+(b) y2
+ − ax3
+ = 0
+Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a.
+Daher ist Bemerkung 27.b nicht anwendbar, aber V (F ) ist trotzdem eine 1-dimensionale
+topologische Mannigfaltigkeit.
+ 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
+Definition 26
+Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale
+Mannigfaltigkeit mit Rand, wenn es einen Atlas (U
+i, ϕ
+i) gibt, wobei U
+i ⊆ X
+i offen und
+ϕ
+i ein Homöomorphismus auf eine offene Teilmenge von
+Rn
++,0 := { (x
+1, . . . , x
+n) ∈ Rn
+ | x
+n ≥ 0 }
+ist.
+Rn
++,0 ist ein „Halbraum“ .
+Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten.
+∼
+=
+(a) Halbraum
+∼
+=
+(b) Pair of pants  ∼
+=
+(c) Sphäre mit einem Loch
+Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand
+Definition 27
+Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt
+∂ X :=
+(U,ϕ)∈A { x ∈ U | ϕ(x) = 0 }
+Rand von X .
+∂ X ist eine Mannigfaltigkeit der Dimension n − 1.
+Definition 28
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U
+i, ϕ
+i)
+i∈I
+Für i, j ∈ I mit U
+i ∩ U
+j = ∅ heißt
+ ϕ
+ij := ϕ
+j ◦ ϕ−1
+i
+ϕ
+i(U
+i ∩ U
+j ) → ϕ
+j (U
+i ∩ U
+j )
+Kartenwechsel oder Übergangsfunktion.
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+Rn
+ RnU
+i U
+j
+V
+i V
+jX
+ϕ
+i ϕ
+j
+Abbildung 2.4: Kartenwechsel
+2.2 Differenzierbare Mannigfaltigkeiten
+Definition 29
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U
+i, ϕ
+i)
+i∈I .
+a) X heißt differenzierbare Mannigfaltigkeit der Klasse C k
+ , wenn jede Kartenwechselabbildung
+ ϕ
+ij , i, j ∈ I k-mal stetig differenzierbar ist.
+b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannigfaltigkeit
+ der Klasse C ∞
+ ist.
+Differenzierbare Mannigfaltigkeiten der Klasse C ∞
+ werden auch glatt genannt.
+Definition 30
+Sei X eine differenzierbare Mannigfaltigkeit der Klasse C k
+ (k ∈ N ∪ { ∞ }) mit Atlas
+A = (U
+i, ϕ
+i)
+i∈I .
+a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1
+i
+und ϕ
+i ◦ ϕ−1
+ (i ∈ I mit U
+i ∩ U = ∅) differenzierbar von Klasse C k
+ sind.
+b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der
+Klasse C k
+ . Er heißt C k
+ -Struktur auf X .
+Eine C ∞
+-Struktur heißt auch differenzierbare Struktur auf X .
+Bemerkung 28
+Für n ≥ 4 gibt es auf S n
+ mehrere verschiedene differenzierbare Strukturen, die sogenannten
+„exotische Sphären“ .
+Definition 31
+Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X .
+a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C k
+ ), wenn
+es Karten (U, ϕ) von X mit x ∈ U und (V , ψ) von Y mit f (U ) ⊆ V gibt, sodass
+ψ ◦ f ◦ ϕ−1
+ stetig differenzierbar von Klasse C k
+ in ϕ(x) ist.
+b) f heißt differenzierbar (von Klasse C k
+ ), wenn f in jedem x ∈ X differenzierbar ist.
+c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C ∞
+ ist und es eine
+differenzierbare Abbildung g : Y → X von Klasse C ∞
+ gibt mit g ◦ f = id
+X und
+f ◦ g = id
+Y .
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+Bemerkung 29
+Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab.
+Beweis: Seien (U
+, ϕ
+) und (V
+, ψ
+) Karten von X bzw. Y um x bzw. f (x) mit f (U
+) ⊆ V
+.
+⇒ ψ
+ ◦ f ◦ (ϕ
+)−1
+= ψ
+ ◦ (ψ−1
+ ◦ ψ) ◦ f ◦ (ϕ−1
+ ◦ ϕ) ◦ (ϕ
+)−1
+ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1
+ differenzierbar ist.
+Beispiel 23
+f
+ : R → R, x → x3
+ ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := 3√
+x
+gilt: f ◦ g = id
+R, g ◦ f = id
+R
+Bemerkung 30
+Sei X eine glatte Mannigfaltigkeit. Dann ist
+Diffeo(X ) := { f : X → X | f ist Diffeomorphismus }
+eine Untergruppe von Homöo(X ).
+Definition 32
+S ⊆ R3
+ heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3
+ ∃U ⊆ R2
+ offen:
+∃ differenzierbare Abbildung F : U → V ∩ S : Rg(J
+F (u)) = 2 ∀u ∈ U .
+F heißt (lokale) reguläre Parametrisierung von S .
+F (u, v) = (x(u, v), y(u, v), z(u, v))
+J
+F (u, v) = 
+ ∂x
+∂u (p) ∂x
+∂v (p)
+∂y
+∂u (p) ∂y
+∂v (p)
+∂z
+∂u (p) ∂z
+∂v (p)
+
+Beispiel 24
+1) Rotationsflächen: Sei r : R → R
+>0 eine differenzierbare Funktion.
+F : R2
+ → R3
+ (u, v) → (r(u) cos(u), r(v) sin(u), v)
+J
+F (u, v) = 
+−r(v) sin u r
+(v) cos u
+r(v) cos u r
+(v) sin u
+0 1 
+
+hat Rang 2 für alle (u, v) ∈ R2
+.
+2) Kugelkoordinaten: F : R2
+ → R3
+,
+(u, v) → (R cos v cos u, R cos v sin u, R sin v)
+Es gilt: F (u, v) ∈ S 2
+R , denn
+R2
+ cos2
+(v) cos2
+(u) + R2
+ cos2
+(v) sin2
+(u) + R2
+ sin2
+(v)
+=R2
+(cos2
+(v) cos2
+(u) + cos2
+(v) sin2
+(u) + sin2
+(v))
+=R2
+cos2
+(v)(cos2
+(u) + sin2
+(u)) + sin2
+(v)
+=R2
+cos2
+(v) + sin2
+(v)
+=R2
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+N
+S v
+u
+(a) Kugelkoordinaten −1
+ 0
+ 1
+ 2
+ −2 −1 0 1 20.60.81
+ (b) Rotationskörper
+π
+2 π
+ 3π
+2 2π
+−1−0.50.51
+ xy
+ sin x
+cos x
+(c) Sinus und Kosinus haben keine gemeinsame Nullstelle
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+Die Jacobi-Matrix
+ J
+F (u, v) = 
+−R cos v sin u −R sin v cos u
+R cos v cos u −R sin v sin u
+0 R cos v 
+
+hat Rang 2 für cos v = 0. In N und S ist cos v = 0.
+Bemerkung 31
+Jede reguläre Fläche S ⊆ R3
+ ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
+Beweis:
+S ⊆ R3
+ ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
+regulären Flächen folgt direkt, dass Karten (U
+i, F
+i) und (U
+j ⊆ R2
+, F
+j : R2
+ → R3
+) von S mit
+U
+i ∩ U
+j = ∅ existieren, wobei F
+i und F
+j nach Definition differenzierbare Abbildungen sind.
+z.Z.: F −1
+j ◦ F
+i ist ein Diffeomorphismus.
+U
+i U
+jS
+s
+F
+i F
+j
+F −1
+j ◦F
+i
+Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31
+Idee:
+ Finde differenzierbare Funktion
+F −1
+j in Umgebung W von s, sodass
+F −1
+j |
+S∩W = F −1
+j .
+Ausführung: Sei u
+0 ∈ U
+i, v
+0 ∈ U
+j mit F
+i(u
+0) = s = F
+j (v
+0).
+Da Rg(J
+F
+j (v
+0)) = 2 ist, ist o. B. d. A.
+det
+ ∂x
+∂u ∂x
+∂v
+∂y
+∂u ∂y
+∂v
+ (v
+0) = 0
+und F
+j (u, v) = (x(u, v), y(u, v), z(u, v)).
+Definiere
+F
+j : U
+j × R → R3
+ durch
+
+F
+j (u, v, t) := (x(u, v), y(u, v), z(u, v) + t)
+Offensichtlich:
+F
+j |
+U
+j ×{ 0 } = F
+j
+J
+
+F
+j = 
+ ∂x
+∂u ∂x
+∂v 0
+∂y
+∂u ∂y
+∂v 0
+∂z
+∂u ∂z
+∂v 1
+
+ ⇒ det J
+
+F
+j (v
+0, 0) = 0
+Analysis II
+======⇒ Es gibt Umgebungen W von F
+j von
+F
+j (v
+0, 0) = F
+j (v
+0) = s, sodass
+F
+j auf W eine
+differenzierbar Inverse F −1
+j hat.
+ 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
+Weiter gilt:
+
+F
+j −1
+|
+W ∩S = F −1
+j |
+W ∩S
+⇒ F −1
+j ◦ F
+i|
+F −1
+i (W ∩S) = F −1
+j ◦ F
+i|
+F −1
+i (W ∩S)
+ist differenzierbar.
+Definition 33
+Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe.
+a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G × G → G und ι : G → G
+definiert durch
+ g ◦ h := g · h und ι(g) := g−1
+stetig sind.
+b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und
+(G, ι) differenzierbar sind.
+Beispiel 25 (Lie-Gruppen)
+1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen.
+2) GL
+n(R)
+3) (R×
+, ·)
+4) (R
+>0, ·)
+5) (Rn
+, +), denn A · B (i, j ) =
+n
+k=1 a
+ik b
+kj ist nach allen Variablen differenzierbar
+(A−1
+)(i, j ) = det(A
+ij )
+det A
+ A
+ij = 
+
+ a
+i1 . . . a
+in
+.
+.
+. .
+ .
+ . .
+.
+.
+a
+n1 . . . a
+nn
+
+ ∈ R(n−1)×(n−1)
+ist differenzierbar.
+det A
+ij kann 0 werden, da:
+
+ 1 1
+−1 0
+6) SL
+n(R) = { A ∈ GL
+n(R) | det(A) = 1 }
+Bemerkung 32
+Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung
+l
+g : G → G
+h → g · h
+ein Diffeomorphismus.
+  2.3. SIMPLIZIALKOMPLEX
+2.3 Simplizialkomplex
+Definition 34
+Seien v
+0, . . . , v
+k ∈ Rn
+ Punkte.
+a) v
+0, . . . , v
+k sind in allgemeiner Lage
+⇔
+ es gibt keinen (k − 1)-dimensionalen affinen Untervektorraum, der v
+0, . . . , v
+k enthält
+⇔ v
+1 − v
+0, . . . , v
+k − v
+0 sind linear unabhängig.
+b) conv(v
+0, . . . , v
+k ) :=
+
+k
+i=0 λ
+iv
+i
+
+ λ
+i ≥ 0,
+k
+i=0 λ
+i = 1
+ heißt die konvexe Hülle von
+v
+0, . . . , v
+k .
+Definition 35
+a)
+ Sei ∆n
+ = conv(e
+0, . . . , e
+n) ⊆ Rn+1
+ die konvexe Hülle der Standard-Basisvektoren
+e
+0, . . . , e
+n.
+Dann heißt ∆n
+ Standard-Simplex und n die Dimension des Simplex.
+b) Für Punkte v
+0, . . . , v
+k im Rn
+ in allgemeiner Lage heißt ∆(v
+0, . . . , v
+k ) = conv(v
+0, . . . , v
+k )
+ein k-Simplex in Rn
+.
+c) Ist ∆(v
+0, . . . , v
+k ) ein k-Simplex und I = { i
+0, . . . , i
+r } ⊆ { 0, . . . , k }, so ist s
+i
+0,...,i
+r :=
+conv(v
+i
+0 , . . . , v
+i
+r ) ein r-Simplex und heißt Teilsimplex oder Seite von ∆.
+(a) 0-Simplex ∆0
+1 2 3123
+ e
+0e
+1
+(b) 1-Simplex ∆1 1 2 3123
+ e
+0e
+1
+ e
+2
+(c) 2-Simplex ∆2 e
+0 e
+1e
+2
+e
+3
+(d) 3-Simplex ∆3
+Abbildung 2.6: Beispiele für k-Simplexe
+Definition 36
+a)
+ Eine endliche Menge K von Simplizes im Rn
+ heißt (endlicher) Simplizialkomplex,
+wenn gilt:
+(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K .
+(ii) Für ∆
+1, ∆
+2 ∈ K ist ∆
+1 ∩ ∆
+2 leer oder ein Teilsimplex von ∆
+1 und von ∆
+2.
+b) |K | :=
+∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K .
+c) Ist d = max { k ∈ N
+0 | K enthält k-Simplex }, so heißt d die Dimension von K .
+  2.3. SIMPLIZIALKOMPLEX
+(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex
+(ohne untere Fläche!) (c) 2D Simplizialkomplex
+(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex
+P
+(f ) P ist kein Teilsimplex, da Eigenschaft
+ Punkt b.ii verletzt ist P
+(g) Simplizialkomplex
+Abbildung 2.7: Beispiele für Simplizialkomplexe
+Definition 37
+Seien K, L Simplizialkomplexe. Eine stetige Abbildung
+f : |K | → |L|
+heißt simplizial, wenn für jedes ∆ ∈ K gilt:
+a) f (∆) ∈ L
+b) f |
+∆ : ∆ → f (∆) ist eine affine Abbildung.
+Beispiel 26 (Simpliziale Abbildungen)
+1) ϕ(e
+1) := b
+1, ϕ(e
+2) := b
+2
+ϕ ist eine eindeutig bestimmte lineare Abbildung
+  2.3. SIMPLIZIALKOMPLEX
+0 e
+2e
+1
+ 0 b
+1b
+2
+ϕ
+2) Folgende Abbildung ϕ : ∆n
+ → ∆n−1
+ ist simplizial:
+ϕ
+3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8)
+M
+ Ma
+a  ab
+ b  bc
+c  c
+dd
+ dM a
+b c
+d
+
+
+
+
+
+
+
+
+
+Abbildung 2.8: Abbildung eines Torus auf eine Sphäre
+Definition 38
+Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei a
+n(K ) die Anzahl der n-Simplizes in
+K .
+Dann heißt
+ χ(K ) := dim K
+
+n=0 (−1)n
+a
+n(K )
+Eulerzahl (oder Euler-Charakteristik) von K .
+Beispiel 27
+1) χ(∆1
+) = 2 − 1 = 1
+χ(∆2
+) = 3 − 3 + 1 = 1
+χ(∆3
+) = 4 − 6 + 4 − 1 = 1
+2) χ(Oktaeder-Oberfläche) = 6 − 12 + 8 = 2
+χ(Rand des Tetraeders) = 2
+χ(Ikosaeder) = 12 − 30 + 20 = 2
+3) χ(Würfel) = 8 − 12 + 6 = 2
+χ(Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2
+Bemerkung 33
+χ(∆n
+) = 1 für jedes n ∈ N
+0
+  2.3. SIMPLIZIALKOMPLEX
+Beweis: ∆n
+ ist die konvexe Hülle von (e
+0, . . . , e
+n) in Rn+1
+. Jede (k + 1)-elementige Teilmenge
+von { e
+0, . . . , e
+n } definiert ein k-Simplex.
+⇒ a
+k (∆n
+) =
+n+1
+k+1
+, k = 0, . . . , n
+⇒ χ(∆n
+) =
+n
+k=0(−1)k
+n+1
+k+1
+f (x) = (x + 1)n+1 Binomischer
+Lehrsatz
+=
+n+1
+k=0
+n+1
+k
+xk
+⇒ 0 =
+n+1
+k=0
+n+1
+k
+(−1)k
+ = χ(∆n
+) − 1
+⇒ χ(∆n
+) = 1
+Definition 39
+a) Ein 1D-Simplizialkomplex heißt Graph.
+b) Ein Graph, der homöomorph zu S 1
+ ist, heißt Kreis.
+c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält.
+(a) Dies wird häufig auch als
+Multigraph bezeichnet. (b) Planare Einbettung des Tetraeders
+
+(c) K
+5 (d) K
+3,3
+Abbildung 2.9: Beispiele für Graphen
+Bemerkung 34
+Für jeden Baum T gilt χ(T ) = 1.
+Beweis: Induktion über die Anzahl der Ecken.
+Bemerkung 35
+a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T , der alle Ecken von Γ
+enthält.2
+b) Ist n = a
+1(Γ) − a
+1(T ), so ist χ(Γ) = 1 − n.
+Beweis:
+a) Siehe „Algorithmus von Kruskal“.
+2
+T wird „Spannbaum“ genannt.
+  2.3. SIMPLIZIALKOMPLEX
+b) χ(Γ) = a
+0(Γ) − a
+1(Γ)
+= a
+0(Γ) − (n + a
+1(T ))
+= a
+0(T ) − a
+1(T ) − n
+= χ(T ) − n
+= 1 − n
+Bemerkung 36
+Sei ∆ ein n-Simplex und x ∈ ∆◦
+ ⊆ Rn
+. Sei K der Simplizialkomplex, der aus ∆ durch
+„Unterteilung“ in x entsteht. Dann ist χ(K ) = χ(∆) = 1.
+(a) K (b) ∆, das aus K durch Unterteilung
+ entsteht
+Abbildung 2.10: Beispiel für Bemerkung 36.
+Beweis: χ(K ) = χ(∆) − (−1)n
+
+n-Simplex + n
+
+k=0(−1)k
+n + 1
+k
+
+
+(1+(−1))n+1 = χ(∆)
+Definition 40
+Sei X ein topologischer Raum, K ein Simplizialkomplex und
+h : |K | → X
+ein Homöomorphismus von der geometrischen Realisierung |K | auf X . Dann heißt h eine
+Triangulierung von X .
+Beispiel 28 (Triangulierung des Torus)
+Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für
+fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind
+in Beispiel 28.
+Satz 2.1 (Eulersche Polyederformel)
+Sei P ein konvexes Polyeder in R3
+, d. h. ∂ P ist ein 2-dimensionaler Simplizialkomplex,
+sodass gilt:
+ ∀x, y ∈ ∂ P : [x, y] ⊆ P
+Dann ist χ(∂ P ) = 2.
+Beweis:
+1) Die Aussage ist richtig für den Tetraeder.
+2) O. B. d. A. sei 0 ∈ P und P ⊆ B
+1(0). Pro jeziere ∂ P von 0 aus auf ∂ B
+1(0) = S 2
+.
+Erhalte Triangulierung von S 2
+.
+  2.3. SIMPLIZIALKOMPLEX
+(a) Die beiden markierten Dreiecke schneiden sich im
+Mittelpunkt und in einer Seite. (b) Die beiden markierten Dreiecke schneiden sich im
+Mittelpunkt und außen.
+Abbildung 2.11: Fehlerhafte Triangulierungen
+(a) Einfache Triangulierung (b) Minimale Triangulierung
+Abbildung 2.12: Triangulierungen des Torus
+  2.3. SIMPLIZIALKOMPLEX
+3) Sind P
+1 und P
+2 konvexe Polygone und T
+1, T
+2 die zugehörigen Triangulierungen von
+S 2
+, so gibt es eine Triangulierung T , die sowohl um T
+1 als auch um T
+2 Verfeinerung
+ist (vgl. Abbildung 2.13).
+ T
+1
+T
+2
+T
+Abbildung 2.13: T ist eine Triangulierung, die für T
+1 und T
+2 eine Verfeinerung ist.
+Nach Bemerkung 36 ist χ(∂ P
+1) = χ(T
+1) = χ(T ) = χ(T
+2) = χ(∂ P
+2) = 2, weil o. B. d. A.
+P
+2 ein Tetraeder ist.
+Bemerkung 37 (Der Rand vom Rand ist 0)
+Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V .
+Sei A
+n die Menge der n-Simplizes in K , d. h.
+A
+n(K ) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K )
+und C
+n(K ) der R-Vektorraum mit Basis A
+n(K ), d. h.
+C
+n(K ) = 
+
+
+σ∈A
+n(K ) c
+σ · σ
+
+
+
+
+ c
+σ ∈ R 
+
+
+Sei σ = ∆(x
+0, . . . , x
+n) ∈ A
+n(K ), sodass x
+0 < x
+1 < · · · < x
+n.
+Für i = 0, . . . , n sei ∂
+iσ := ∆(x
+0, . . . , ˆx
+i, . . . , x
+n) die i-te Seite von σ und d
+σ = d
+nσ :=
+
+i=0(−1)i
+∂
+iσ ∈ C
+n−1(K ) und d
+n : C
+n(K ) → C
+n−1(K ) die dadurch definierte lineare
+Abbildung.
+Dann gilt: d
+n−1 ◦ d
+n = 0
+ a
+ bc
+σ
+e
+3 e
+1e
+2
+Abbildung 2.14: Simplizialkomplex mit Totalordnung
+Beispiel 29
+Sei a < b < c. Dann gilt:
+ d
+2σ = e
+1 − e
+2 + e
+3
+d
+1(e
+1 − e
+2 + e
+3) = (c − b) − (c − a) + (b − a)
+  2.3. SIMPLIZIALKOMPLEX
+= 0
+Sei a < b < c < d. Dann gilt für Tetraeder:
+d
+3(∆(a, b, c, d)) = ∆(b, c, d) − ∆(a, c, d) + ∆(a, b, d) − ∆(a, b, c), wobei:
+d
+2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c)
+d
+2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c)
+d
+2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b)
+d
+2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b)
+⇒ d
+2(d
+3(∆(a, b, c, d))) = 0
+Beweis: Sei σ ∈ A
+n. Dann gilt:
+d
+n−1(d
+nσ) = d
+n−1( n
+
+i=0 (−1)i
+∂
+iσ)
+= n
+
+i=0 (−1)i
+d
+n−1(∂
+iσ)
+= n
+
+i=0 (−1)i n−1
+
+j=0 ∂
+i(∂
+j σ)(−1)j
+=
+0≤i≤j≤n−1(−1)i+j
+ ∂
+j (∂
+i(σ)) +
+0≤j<i≤n(−1)i+j
+ ∂
+i−1(∂
+j σ)
+= 0
+weil jeder Summand aus der ersten Summe auch in der zweiten Summe vorkommt, aber mit
+umgekehrten Vorzeichen.
+Definition 41
+Sei K ein Simplizialkomplex, Z
+n := Kern(d
+n) ⊆ C
+n und B
+n := Bild(d
+n+1) ⊆ C
+n.
+a) H
+n = H
+n(K, R) := Z
+n/B
+n heißt n-te Homologiegruppe von K .
+b) b
+n(K ) := dim
+R H
+n heißt n-te Betti-Zahl von K .
+Bemerkung 38
+Nach Bemerkung 37 ist B
+n ⊆ Z
+n, denn d
+n+1(C ) ∈ Kern(d
+n) für C ∈ C
+n+1.
+Satz 2.2
+Für jeden endlichen Simplizialkomplex K der Dimension d gilt:
+d
+
+k=0(−1)k
+ b
+k (K ) = d
+
+k=0(−1)k
+ a
+k (K ) = χ(K )
+Bemerkung 39
+Es gilt nicht a
+k = b
+k ∀k ∈ N
+0.
+  2.3. SIMPLIZIALKOMPLEX
+Beweis:
+• Dimensionsformel für d
+n: a
+n = dim Z
+n + dim B
+n−1 für n ≥ 1
+• Dimensionsformel für Z
+n → H
+n = Z
+n/B
+n : dim Z
+n = b
+n + dim B
+n
+• dim Z
+d = b
+d, da dim Z
+d = b
+d + dim B
+d, wobei dim B
+d = 0, da a
+d+1 = 0
+• a
+0 − dim B
+0 = b
+0, da a
+0 − dim B
+0 = a
+0 − dim Z
+0 + b
+0 und a
+0 = dim Z
+0, weil a−1 = 0
+⇒ d
+
+k=0(−1)k
+ a
+k = a
+0 + d
+
+k=1(−1)k
+ (dim Z
+k + dim B
+k−1)
+= a
+0 + d
+
+k=1(−1)k
+ dim Z
+k + d−1
+
+k=0(−1)k+1
+ dim B
+k
+= a
+0 + d
+
+k=1(−1)k
+ dim Z
+k − d−1
+
+k=0(−1)k
+ dim B
+k
+= a
+0 + d−1
+
+k=1(−1)k
+ b
+k + (−1)d
+ dim Z
+d
+
+=b
+d − dim B
+0
+= b
+0 + d−1
+
+k=1(−1)k
+ b
+k + (−1)d
+b
+d
+= d
+
+k=0(−1)k
+ b
+k
+  2.3. SIMPLIZIALKOMPLEX
+Übungsaufgaben
+Aufgabe 7 (Zusammenhang)
+(a) Beweisen Sie, dass eine topologische Mannigfaltigkeit genau dann wegzusammenhängend
+ ist, wenn sie zusammenhängend ist
+(b) Betrachten Sie nun wie in Beispiel 20.8 den Raum X := (R \ { 0 }) ∪ { 0
+1, 0
+2 } versehen
+mit der dort definierten Topologie. Ist X wegzusammenhängend?
+3 Fundamentalgrupp e und Üb erlagerungen
+3.1 Homotopie von Wegen
+a
+ bγ
+1
+γ
+2
+(a) γ
+1 und γ
+2 sind homotop,
+da man sie „zueinander verschieben“
+ kann. a
+ bγ
+1
+γ
+2
+(b) γ
+1 und γ
+2 sind wegen dem
+Hindernis nicht homotop.
+Abbildung 3.1: Beispiele für Wege γ
+1 und γ
+2
+Definition 42
+Sei X ein topologischer Raum, a, b ∈ X , γ
+1, γ
+2 : I → X Wege von a nach b, d. h. γ
+1(0) =
+γ
+2(0) = a, γ
+1(1) = γ
+2(1) = b
+γ
+1 und γ
+2 heißen homotop, wenn es eine stetige Abbildung H : I × I → X mit
+H (t, 0) = γ
+1(t) ∀t ∈ I
+H (t, 1) = γ
+2(t) ∀t ∈ I
+und H (0, s) = a und H (1, s) = b für alle s ∈ I gibt. Dann schreibt man: γ
+1 ∼ γ
+2
+H heißt Homotopie zwischen γ
+1 und γ
+2.
+Bemerkung 40
+Sei X ein topologischer Raum, a, b ∈ X , γ
+1, γ
+2 : I → X Wege von a nach b und H eine
+Homotopie zwischen γ
+1 und γ
+2.
+Dann gilt: Der Weg
+ γ
+s : I → X, γ
+s(t) = H (t, s)
+ist Weg in X von a nach b für jedes s ∈ I .
+Beweis: H ist stetig, also ist H (t, s) insbesondere für jedes feste s stetig. Da H (0, s) = a und
+H (1, s) = b für alle s ∈ I und γ
+s eine Abbildung von I auf X ist, ist γ
+s ein Weg in X von a
+nach b für jedes s ∈ I .
+Bemerkung 41
+Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege in X von a nach b
+definiert.
+Beweis:
+  3.1. HOMOTOPIE VON WEGEN
+• reflexiv: H (t, s) = γ (t) für alle (t, s) ∈ I × I
+• symmetrisch: H
+(t, s) = H (t, 1 − s) für alle (t, s) ∈ I × I
+• transitiv: Seien H
+ bzw. H
+ Homotopien von γ
+1 nach γ
+2 bzw. von γ
+2 nach γ
+3.
+Dann sei H (t, s) :=
+H
+(t, 2s) falls 0 ≤ s ≤ 1
+2
+H
+(t, 2s − 1) falls 1
+2 ≤ s ≤ 1
+⇒ H ist stetig und Homotopie von γ
+1 nach γ
+3.
+
+Beispiel 30
+1) Sei X = S 1
+. γ
+1 und γ
+2 aus Abbildung 3.3a nicht homotop.
+2) Sei X = T 2
+. γ
+1, γ
+2 und γ
+3 aus Abbildung 3.3b sind paarweise nicht homotop.
+3) Sei X = R2
+ und a = b = (0, 0).
+Je zwei Wege im R2
+ mit Anfangs- und Endpunkt (0, 0) sind homotop.
+Abbildung 3.2: Zwei Wege im R2
+ mit Anfangs- und Endpunkt (0, 0)
+Sei γ
+0 : I → R2
+ der konstante Weg γ
+0(t) = (0, 0) ∀t ∈ I . Sei γ (0) = γ (1) = (0, 0).
+H (t, s) := (1 − s)γ (t) ist stetig, H (t, 0) = γ (t) ∀t ∈ I und H (t, 1) = (0, 0) ∀t ∈ I .
+Bemerkung 42
+Sei X ein topologischer Raum, γ : I → X ein Weg und ϕ : I → I stetig mit ϕ(0) = 0,
+ϕ(1) = 1. Dann sind γ und γ ◦ ϕ homotop.
+Beweis: Sei H (t, s) = γ ((1 − s)t + s · ϕ(t)).
+Dann ist H stetig, H (t, 0) = γ (t), H (t, 1) = γ (ϕ(t)), H (0, s) = γ (0) und H (1, s) =
+γ (1 − s + s) = γ (1)
+⇒ H ist Homotopie.
+  3.1. HOMOTOPIE VON WEGEN
+ab
+ γ
+1γ
+2
+(a) Kreis mit zwei Wegen a b
+(b) Torus mit drei Wegen
+Abbildung 3.3: Beispiele für (nicht)-Homotopie von Wegen
+Definition 43
+Seien γ
+1, γ
+2 Wege in X mit γ
+1(1) = γ
+2(0). Dann ist
+γ (t) =
+γ
+1(2t) falls 0 ≤ t < 1
+2
+γ
+2(2t − 1) falls 1
+2 ≤ t ≤ 1
+ein Weg in X . Er heißt zusammengesetzter Weg und man schreibt γ = γ
+1 ∗ γ
+2.
+Bemerkung 43
+Das Zusammensetzen von Wegen ist nur bis auf Homotopie assoziativ, d. h.:
+γ
+1 ∗ (γ
+2 ∗ γ
+3) = (γ
+1 ∗ γ
+2) ∗ γ
+3
+γ
+1 ∗ (γ
+2 ∗ γ
+3) ∼ (γ
+1 ∗ γ
+2) ∗ γ
+3
+mit γ
+1(1) = γ
+2(0) und γ
+2(1) = γ
+3(0).
+γ
+1 γ
+2 γ
+3
+0 1
+/2 3
+/4 1
+(a) γ
+1 ∗ (γ
+2 ∗ γ
+3)
+γ
+1 γ
+2 γ
+3
+0 1
+/4 1
+/2 1
+(b) (γ
+1 ∗ γ
+2) ∗ γ
+3
+Abbildung 3.4: Das Zusammensetzen von Wegen ist nicht assoziativ
+Beweis: Das Zusammensetzen von Wegen ist wegen Bemerkung 42 bis auf Homotopie assoziativ.
+Verwende dazu
+ ϕ(t) = 
+
+
+
+ 1
+2 t falls 0 ≤ t < 1
+2
+t − 1
+4 falls 1
+2 ≤ t < 3
+4
+2t − 1 falls 3
+4 ≤ t ≤ 1
+Bemerkung 44
+Sei X ein topologischer Raum, a, b, c ∈ X , γ
+1, γ
+1 Wege von a nach b und γ
+2, γ
+2 Wege von b
+nach c.
+Sind γ
+1 ∼ γ
+1 und γ
+2 ∼ γ
+2, so ist γ
+1 ∗ γ
+2 ∼ γ
+1 ∗ γ
+2.
+  3.2. FUNDAMENTALGRUPPE
+γ
+1γ
+1
+a
+ b c
+γ
+2γ
+2
+Abbildung 3.5: Situation aus Bemerkung 44
+.
+Beweis: Sei H
+i eine Homotopie zwischen γ
+i und γ
+i , i = 1, 2.
+Dann ist
+ H (t, s) :=
+H
+1(2t, s) falls 0 ≤ t ≤ 1
+2 ∀s ∈ I
+H
+2(2t − 1, s) falls 1
+2 ≤ t ≤ 1
+eine Homotopie zwischen γ
+1 ∗ γ
+2 und γ
+1 ∗ γ
+2.
+Eine spezielle Homotopieäquivalenz sind sog. Deformationsretraktionen:
+Definition 44
+Sei X ein topologischer Raum, A ⊆ X , r : X → A eine stetige Abbildung und ι = (id
+X )|
+A.
+a) ι : A → X mit ι(x) = x heißt die Inklusionsabbildung und man schreibt: ι : A → X .
+b) r heißt Retraktion, wenn r|
+A = id
+A ist.
+c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι ◦ r ∼ id
+X gibt.
+Beispiel 31 (Zylinder auf Kreis)
+Sei X = S 1
+ × R ein topologischer Raum und
+r : S 1
+ × R → S 1
+ × { 0 } ∼
+= S 1
+mit
+ r(x, y) := (x, 0)
+eine Abbildung. r ist eine Retraktion, da r|
+S1 ∼
+= id
+S
+1 .
+ι ◦ r : S 1
+ × R → S 1
+ × R
+(x, y) → (x, 0)
+H : (S 1
+ × R) × I → S 1
+ × R
+(x, y, t) → (x, ty)
+3.2 Fundamentalgruppe
+Für einen Weg γ sei [γ ] seine Homotopieklasse.
+Definition 45
+Sei X ein topologischer Raum und x ∈ X . Sei außerdem
+π
+1(X, x) := { [γ ] | γ ist Weg in X mit γ (0) = γ (1) = x }
+  3.2. FUNDAMENTALGRUPPE
+Durch [γ
+1] ∗
+G [γ
+2] := [γ
+1 ∗ γ
+2] wird π
+1(X, x) zu einer Gruppe. Diese Gruppe heißt Fundamentalgruppe
+ von X im Basispunkt x.
+Bemerkung 45
+Im R2
+ gibt es nur eine Homotopieklasse.
+Beweis: (Fundamentalgruppe ist eine Gruppe)
+a) Abgeschlossenheit folgt direkt aus der Definition von ∗
+G
+b) Assoziativität folgt aus Bemerkung 43
+c) Neutrales Element e = [γ
+0], γ
+0(t) = x ∀t ∈ I . e ∗ [γ ] = [γ ] = [γ ] ∗ e, da γ
+0 ∗ γ ∼ γ
+d) Inverses Element [γ ]−1
+ = [γ ] = [γ (1 − t)], denn γ ∗ γ ∼ γ
+0 ∼ γ ∗ γ
+Beispiel 32
+1) S 1
+ = { z ∈ C | |z| = 1 } =
+ (cos ϕ, sin ϕ) ∈ R2
+
+ 0 ≤ ϕ ≤ 2π
+π
+1(S 1
+, 1) =
+ [γ k
+ ]
+
+ k ∈ Z
+ ∼
+= Z. Dabei ist γ (t) = e2πit
+ = cos(2πt) + i sin(2πt) und
+γ k
+ := γ ∗ · · · ∗ γ
+
+k mal
+[γ k
+ ] → k ist ein Isomorphismus.
+2) π
+1(R2
+, 0) = π
+1(R2
+, x) = { e } für jedes x ∈ R2
+3) π
+1(Rn
+, x) = { e } für jedes x ∈ Rn
+4) G ⊆ Rn
+ heißt sternförmig bzgl. x ∈ G, wenn für jedes y ∈ G auch die Strecke
+[x, y] ⊆ G ist.
+Für jedes sternförmige G ⊆ Rn
+ ist π
+1(G, x) = { e }
+x
+Abbildung 3.6: Sternförmiges Gebiet
+.
+5) π
+1(S 2
+, x
+0) = { e }, da im R2
+ alle Wege homotop zu { e } sind. Mithilfe der stereographischen
+ Pro jektion kann von S 2
+ auf den R2
+ abgebildet werden.
+Dieses Argument funktioniert nicht mehr bei flächenfüllenden Wegen, d. h. wenn
+γ : I → S 2
+ surjektiv ist.
+Bemerkung 46
+Sei X ein topologischer Raum, a, b ∈ X , δ : I → X ein Weg von a nach b.
+Dann ist die Abbildung
+ α : π
+1(X, a) → π
+1(X, b) [γ ] → [δ ∗ γ ∗ δ]
+ein Gruppenisomorphismus.
+  3.2. FUNDAMENTALGRUPPE
+a
+ bγ
+ δ
+Abbildung 3.7: Situation aus Bemerkung 46
+.
+Beweis:
+ α([γ
+1] ∗ [γ
+2]) = [δ ∗ (γ
+1 ∗ γ
+2) ∗ δ]
+= [δ ∗ γ
+1 ∗ δ ∗ δ ∗ γ
+2 ∗ δ]
+= [δ ∗ γ
+1 ∗ δ] ∗ [δ ∗ γ
+2 ∗ δ]
+= α([γ
+1]) ∗ α([γ
+2])
+Definition 46
+Ein wegzusammenhängender topologischer Raum X heißt einfach zusammenhängend,
+wenn π
+1(X, x) = { e } für ein x ∈ X .
+Wenn π
+1(X, x) = { e } für ein x ∈ X gilt, dann wegen Bemerkung 46 sogar für alle x ∈ X .
+Bemerkung 47
+Es seien X, Y topologische Räume, f : X → Y eine stetige Abbildung, x ∈ X, y := f (x) ∈ Y .
+a) Dann ist die Abbildung f
+∗ : π
+1(X, x) → π
+1(Y , y), [γ ] → [f ◦ γ ] ein Gruppenhomomorphismus.
+
+b) Ist Z ein weiterer topologischer Raum und g : Y → Z eine stetige Abbildung z := g(y).
+Dann ist (g ◦ f )
+∗ = g
+∗ ◦ f
+∗ : π
+1(X, x) → π
+1(Z, z)
+Beweis:
+a) f
+∗ ist wohldefiniert: Seien γ
+1, γ
+2 homotope Wege von x. z.Z.: f ◦ γ
+1 ∼ f ◦ γ
+2: Nach
+Voraussetzung gibt es stetige Abbildungen H : I × I → X mit
+H (t, 0) = γ
+1(t),
+H (t, 1) = γ
+2(t),
+H (0, s) = H (1, s) = x.
+Dann ist f ◦ H : I × I → Y stetig mit (f ◦ H )(t, 0) = f (H (t, 0)) = f (γ
+1(t)) = (f ◦ γ
+1)(t)
+etc. ⇒ f ◦ γ
+1 ∼ f ◦ γ
+2.
+f
+∗([γ
+1] ∗ [γ
+2]) = [f ◦ (γ
+1 ∗ γ
+2)] = [(f ◦ γ
+1)] ∗ [(f ◦ γ
+2)] = f
+∗([γ
+1]) ∗ f
+∗([γ
+2])
+b) (g ◦ f )
+∗([γ ]) = [(g ◦ f ) ◦ γ ] = [g ◦ (f ◦ γ )] = g
+∗([f ◦ γ ]) = g
+∗(f
+∗([γ ])) = (g
+∗ ◦ f
+∗)([γ ])
+Beispiel 33
+1) f : S 1
+ → R2
+ ist injektiv, aber f
+∗ : π
+1(S 1
+, 1) ∼
+= Z → π
+1(R2
+, 1) = { e } ist nicht injektiv.
+2) f : R → S 1
+, t → (cos 2πt, sin 2πt) ist surjektiv, aber f
+∗ : π
+1(R, 0) = { e } → π
+1(S 1
+, 1) ∼
+=
+Z ist nicht surjektiv.
+  3.2. FUNDAMENTALGRUPPE
+Bemerkung 48
+Sei f : X → Y ein Homöomorphismus zwischen topologischen Räumen X, Y . Dann gilt:
+f
+∗ : π
+1(X, x) → π
+1(Y , f (x))
+ist ein Isomorphismus für jedes x ∈ X .
+Beweis: Sei g : Y → X die Umkehrabbildung, d. h. g ist stetig und f ◦ g = id
+Y , g ◦ f = id
+X
+⇒ f
+∗ ◦ g
+∗ = (f ◦ g)
+∗ = (id
+Y )
+∗ = id
+π
+1(Y,f (X ) und g
+∗ ◦ f
+∗ = id
+π
+1(X,x).
+Definition 47
+Seien X, Y topologische Räume, x
+0 ∈ X, y
+0 ∈ Y , f , g : X → Y stetig mit f (x
+0) = y
+0 = g(x
+0).
+f und g heißen homotop (f ∼ g), wenn es eine stetige Abbildung H : X × I → Y mit
+H (x, 0) = f (x) ∀x ∈ X
+H (x, 1) = g(x) ∀x ∈ X
+H (x
+0, s) = y
+0 ∀s ∈ I
+gibt.
+Bemerkung 49
+Sind f und g homotop, so ist f
+∗ = g
+∗ : π
+1(X, x
+0) → π
+1(Y , y
+0).
+Beweis: Sei γ ein geschlossener Weg in X um x
+0, d. h. [γ ] ∈ π
+1(X, x
+0).
+Z. z.: f ◦ γ ∼ g ◦ γ
+Sei dazu H
+γ : I × I → Y , (t, s) → H (γ (t), s). Dann gilt:
+H
+γ (t, 0) = H (γ (t), 0) = (f ◦ γ )(t) ∀t ∈ I
+H
+γ (1, s) = H (γ (1), s) = H (x
+0, s) = y
+0 ∀s ∈ I
+H
+γ (t, 1) = H (γ (t), 1) = g(γ (t)) ∀t ∈ I
+Beispiel 34
+f : X → Y , g : Y → X mit g ◦ f ∼ id
+X , f ◦ g ∼ id
+Y
+⇒ f
+∗ ist Isomorphismus. Konkret: f : R2
+ → { 0 } , g : { 0 } → R2
+⇒ f ◦ g = id
+{ 0 }, g ◦ f : R2
+ → R2
+, x → 0 für alle x.
+g ◦ f ∼ id
+R2 mit Homotopie: H : R2
+ × I → R2
+, H (x, s) = (1 − s)x (stetig!)
+⇒ H (x, 0) = x = id
+R2 (x), H (x, 1) = 0, H (0, s) = 0 ∀s ∈ I .
+Satz 3.1 (Satz von Seifert und van Kampen „light“)
+Sei X ein topologischer Raum, U, V ⊆ X offen mit U ∪ V = X und U ∩ V wegzusammenhängend.
+
+Dann wird π
+1(X, x) für x ∈ U ∩ V erzeugt von geschlossenen Wegen um x, die ganz in
+U oder ganz in V verlaufen.
+  3.3. ÜBERLAGERUNGEN
+Beweis: Sei γ : I → X ein geschlossener Weg um x. Überdecke I mit endlich vielen offenen
+Intervallen I
+1, I
+2, . . . , I
+n, die ganz in γ −1
+(U ) oder ganz in γ −1
+(V ) liegen.
+O. B. d. A. sei γ (I
+1) ⊆ U, γ (I
+2) ⊆ V , etc.
+Wähle t
+i ∈ I
+i ∩ I
+i+1, also γ (t
+i) ∈ U ∩ V . Sei σ
+i Weg in U ∩ V von x
+0 nach γ (t
+i) ⇒ γ ist
+homotop zu
+ γ
+1 ∗ σ
+1
+
+in U ∗ σ
+1 ∗ γ
+2 ∗
+ σ
+2
+
+in V ∗ · · · ∗ σ
+n−1 ∗ γ
+2 mit γ
+i := γ |
+I
+i
+a
+ b
+x
+Abbildung 3.8: Topologischer Raum X
+Beispiel 35 (Satz von Seifert und van Kampen)
+1) Sei X wie in Abbildung 3.8. π
+1(X, x) wird „frei“ erzeugt von a und b, weil π
+1(U, x) =
+a ∼
+= Z, π
+1(V , x) = b ∼
+= Z, insbesondere ist a ∗ b nicht homotop zu b ∗ a.
+2) Torus: π
+1(T 2
+, X ) wird erzeugt von a und b.
+VUa
+b
+ Va
+ b
+Abbildung 3.9: a ∗ b = b ∗ a ⇔ a ∗ b ∗ a ∗ b ∼ e
+3.3 Überlagerungen
+Definition 48
+Es seien X, Y zusammenhängende topologische Räume und p : Y → X eine stetige Abbildung.
+
+p heißt Überlagerung, wenn jedes x ∈ X eine offene Umgebung U = U (x) ⊆ X besitzt,
+sodass p−1
+(U ) disjunkte Vereinigung von offenen Teilmengen V
+j ⊆ Y ist (j ∈ I ) und
+p|
+V
+j : V
+j → U ein Homöomorphismus ist.
+|I | heißt Grad der Überlagerung p und man schreibt:
+deg p := |I |
+  3.3. ÜBERLAGERUNGEN
+Abbildung 3.10: R → S 1
+,
+t → (cos 2πt, sin 2πt)
+Beispiel 36
+1) siehe Abbildung 3.10
+2) siehe Abbildung 3.11
+3) Rn
+ → T n
+ = Rn
+/Zn
+4) S n
+ → P n
+(R)
+5) S 1
+ → S 1
+, z → z2
+, siehe Abbildung 3.12
+0 1 2 3 4 5 60123456
+ *
+******
+ ******
+ ******
+ ******
+ ******
+ ******
+ −−−→
+Abbildung 3.11: R2
+ → T 2
+ = R2
+/Z2
+Bemerkung 50
+Überlagerungen sind surjektiv.
+Beweis: Sei p : Y → X eine Überlagerung und x ∈ X beliebig. Dann existiert eine offene
+Umgebung U (x) ⊆ X und offene Teilmengen V
+j ⊆ X mit p−1
+(U ) = ˙
+ V
+j und p|
+V
+j : V
+j → U
+ist Homöomorphismus.
+D. h. es existiert ein y ∈ V
+j , so dass p|
+V
+j (y) = x. Da x ∈ X beliebig war und ein y ∈ Y
+existiert, mit p(y) = x, ist p surjektiv.
+  3.3. ÜBERLAGERUNGEN
+1i
+ zz2
+ϕ
+ϕ
+ z2
+Abbildung 3.12: t → (cos 4πt, sin 4πt)
+Definition 49
+Seien (X, T
+X ), (Y , T
+Y ) topologische Räume und f : X → Y eine Abbildung.
+f heißt offen :⇔ ∀U ∈ T
+X : f (U ) ∈ T
+Y .
+Beispiel 37 (Offene und stetige Abbildungen)
+Sei X ein topologischer Raum und seien f
+i : R → R mit i ∈ { 1, 2, 3 } und g : R → S 1
+ =
+{ z ∈ C | z = 1 } Abbildungen.
+1) f
+1 := id
+R ist eine offene und stetige Abbildung.
+2) g(x) := e2πix
+ ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
+3) f
+2(x) := 42 ist eine stetige, aber keine offene Abbildung.
+4) f
+3(x) :=
+0 falls x ∈ Q
+42 falls x ∈ R \ Q
+ist weder stetig noch offen.
+Bemerkung 51
+Überlagerungen sind offene Abbildungen.
+Beweis:
+ Sei y ∈ V und x ∈ p(V ), sodass x = p(y) gilt. Sei weiter U = U
+x eine offene Umgebung
+von x wie in Definition 48 und V
+j die Komponente von p−1
+(U ), die y enthält.
+Dann ist V ∩ V
+j offene Umgebung von y.
+⇒ p(V ∩ V
+j ) ist offen in p(V
+j ), also auch offen in X . Außerdem ist p(y) = x ∈ p(V ∩ V
+j ) und
+p(V ∩ V
+j ) ⊆ p(V ).
+⇒ p(V ) ist offen.
+Definition 50
+Sei X ein topologischer Raum und M ⊆ X .
+M heißt diskret in X , wenn M in X keinen Häufungspunkt hat.
+Bemerkung 52
+Sei p : Y → X Überlagerung, x ∈ X .
+a) X hausdorffsch ⇒ Y hausdorffsch
+b) p−1
+(x) ist diskret in Y für jedes x ∈ X .
+Beweis:
+a) Seien y
+1, y
+2 ∈ Y .
+1. Fall: p(y
+1) = p(y
+2) = x.
+  3.3. ÜBERLAGERUNGEN
+Sei U Umgebung von x wie in Definition 48, V
+j
+1 bzw. V
+j
+2 die Komponente von p−1
+(U ),
+die y
+1 bzw. y
+2 enthält.
+Dann ist V
+j
+1 = V
+j
+2 , weil beide ein Element aus p−1
+(x) enthalten.
+⇒ V
+j
+1 ∩ V
+j
+2 = ∅ nach Voraussetzung.
+2. Fall
+: p(y
+1) = p(y
+2).
+Dann seien U
+1 und U
+2 disjunkte Umgebungen von p(y
+1) und p(y
+2).
+⇒ p−1
+(U
+1) und p−1
+(U
+2) sind disjunkte Umgebungen von y
+1 und y
+2.
+b) Sei x ∈ X beliebig, aber fest.
+Zu zeigen
+: ∀y
+i ∈ p−1
+(x) : ∃V
+i ∈ T
+Y mit y
+i ∈ V
+i, sodass gilt:i = j ⇒ V
+i ∩ V
+j = ∅.
+Die V
+i existieren wegen der Definition einer Überlagerung: p heißt Überlagerung
+:⇔ ∀x ∈ X ∃U = U (x) ∈ T
+X : p−1
+(U ) = ˙
+V
+i∈T
+Y V
+i und p|
+V
+i ist Homöomorphismus.
+⇒ (p|
+V
+i )−1
+(x) = { y
+i }
+⇒
+ Alle y
+i liegen diskret in Y , da Häufungspunkte unendlich viele Elemente in jeder
+Umgebung benötigen.
+Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
+Sei p : Y → X Überlagerung. Dann gilt:
+∀x
+1, x
+2 ∈ X : |p−1
+(x
+1)| = |p−1
+(x
+2)|
+Hinweis:
+ |p−1
+(x
+1)| = ∞ ist erlaubt!
+Beweis: Sei U Umgebung von x
+1 wie in Definition 48, x ∈ U . Dann enthält jedes V
+j mit j ∈ I
+genau ein Element von p−1
+(x).
+⇒ |p−1
+(x)| ist konstant für x ∈ U
+X zhgd.
+====⇒ |p−1
+(x)| ist konstant für x ∈ X .
+Definition 51
+Es seien X, Y , Z topologische Räume, p : Y → X eine Überlagerung und f : Z → X stetig.
+Eine stetige Abbildung ˜
+f : Z → Y heißt Liftung von f , wenn p ◦ ˜
+f = f ist.
+Y
+X Z
+p ˜
+f
+ f
+Bemerkung 54 (Eindeutigkeit der Liftung)
+Sei Z zusammenhängend und f
+0, f
+1 : Z → Y Liftungen von f .
+∃z
+0 ∈ Z : f
+0(z
+0) = f
+1(z
+0) ⇒ f
+0 = f
+1
+Beweis: Sei T = { z ∈ Z | f
+0(z) = f
+1(z) }.
+Z. z.: T ist offen und Z \ T ist auch offen.
+  3.3. ÜBERLAGERUNGEN
+0 1 2 3 4 5 60123456
+T Liften
+−−−→ R2
+ /Z2
+Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht geschlossen
+
+Sei z ∈ T , x = f (z), U Umgebung von x wie in Definition 48, V die Komponente von p−1
+(U ),
+die y := f
+0(z) = f
+1(z) enthält.
+Sei q : U → V die Umkehrabbildung zu p|
+V .
+Sei W := f −1
+(U ) ∩ f −1
+0 (V ) ∩ f −1
+1 (V ). W ist offene Umgebung in Z von z.
+Behauptung:
+ W ⊆ T
+Denn für w ∈ W ist q(f (w)) = q((p ◦ f
+0))(w) = ((q ◦ p) ◦ f
+0)(w) = f
+0(w) = q(f (w)) = f
+1(w)
+⇒ T ist offen.
+Analog: Z \ T ist offen.
+Satz 3.2
+Sei p : Y → X Überlagerung, γ : I → X ein Weg, y ∈ Y mit p(y) = γ (0) =: x.
+Dann gibt es genau einen Weg ˜γ : I → Y mit ˜γ (0) = y und p ◦ ˜γ = γ .
+p : Y → X Überlagerung, X, Y wegzusammenhängend. p stetig und surjektiv, zu x ∈ X ∃
+Umgebung U , so dass p−1
+(U ) =
+ V
+j
+p|
+V
+j : V
+j → U Homöomorphismus.
+Bemerkung 55
+Wege in X lassen sich zu Wegen in Y liften.
+Zu jedem y ∈ p−1
+(γ (0)) gibt es genau einen Lift von γ .
+  3.3. ÜBERLAGERUNGEN
+Proposition 3.3
+Seien p : Y → X eine Überlagerung, a, b ∈ X , γ
+0, γ
+1 : I → X homotope Wege von a
+nach b, ˜a ∈ p−1
+(a), ˜γ
+0, ˜γ
+1 Liftungen von γ
+0 bzw. γ
+1 mit ˜γ
+i(0) = ˜a.
+Dann ist ˜γ
+0(1) = ˜γ
+1(1) und ˜γ
+0 ∼ ˜γ
+1.
+Beweis: Sei H : I × I → X Homotopie zwischen γ
+1 und γ
+2.
+Für s ∈ I sei γ
+s : I → X , t → H (t, s).
+Sei ˜γ
+s Lift von γ
+s mit ˜γ
+s(0) = ˜a
+Sei ˜
+H : I × I → Y , ˜
+H (t, s) := ( ˜γ
+s(t), s)
+Dann gilt:
+(i) ˜
+H ist stetig (Beweis wie für Bemerkung 54)
+(ii) ˜
+H (t, 0) = ˜γ
+0(t), ˜
+H (t, 1) = ˜γ
+1(t)
+(iii) ˜
+H (0, s) = ˜γ
+s(0) = ˜a
+(iv) ˜
+H (1, s) ∈ p−1
+(b)
+Da p−1
+(b) diskrete Teilmenge von Y ist
+⇒ ˜
+b
+s = ˜
+H (1, s) = ˜
+H (1, 0) ∀s ∈ I
+⇒ ˜
+b
+0 = ˜
+b
+1 und ˜
+H ist Homotopie zwischen ˜γ
+0 und ˜γ
+1.
+Folgerung 3.4
+Sei p : Y → X eine Überlagerung, x
+0 ∈ X, y
+0 ∈ p−1
+(x
+0)
+a) p
+∗ : π
+1(Y , y
+0) → π
+1(X, x
+0) ist injektiv
+b) [π
+1(X, x
+0) : p
+∗(π
+1(Y , y
+0))] = deg(p)
+Beweis:
+a) Sei ˜γ ein Weg in Y um y
+0 und p
+∗([˜γ ]) = e, also p ◦ ˜γ ∼ γ
+x
+0
+Nach Proposition 3.3 ist dann ˜γ homotop zum Lift des konstanten Wegs γ
+x
+0 mit
+Anfangspunkt y
+0, also zu γ
+y
+0 ⇒ [˜γ ] = e
+b) Sei d = deg p und p−1
+(x
+0) = { y
+0, y
+1, . . . , y
+d−1 }. Für einen geschlossenen Weg γ in X
+um x
+0 sei ˜γ die Liftung mit ˜γ (0) = y
+0.
+˜γ (1) ∈ { y
+0, . . . , y
+d−1 } hängt nur von [γ ] ∈ π
+1(X, x
+0) ab.
+Für geschlossene Wege γ
+0, γ
+1 um x gilt:
+˜γ
+0(1) = ˜γ
+1(1)
+⇔[ ˜γ
+0 ∗ ˜γ
+1−1
+] ∈ π
+1(Y , y
+0)
+⇔[γ
+0 ∗ γ −1
+1 ] ∈ p
+∗(π
+1(Y , y
+0))
+⇔[γ
+0] und [γ
+1]liegen in der selben Nebenklasse bzgl. p
+∗(π
+1(Y , y
+0))
+  3.3. ÜBERLAGERUNGEN
+Zu i ∈ { 0, . . . , d − 1 } gibt es Weg δ
+i in Y mit δ
+i(0) = y
+0 und δ
+i(1) = y
+i
+⇒ p ∪ δ
+i ist geschlossener Weg in X um x
+0.
+⇒ Jedes y
+i mit i = 0, . . . , d − 1 ist ˜γ (1) für ein [γ ] ∈ π
+1(X, x
+0).
+Bemerkung 56
+Sei p : Y → X Überlagerung und X einfach zusammenhängend.
+Dann ist p ein Homöomorphismus.
+Beweis: Wegen Bemerkung 55.a ist auch Y einfach zusammenhängend und wegen Bemerkung
+ 55.b ist deg(p) = 1, p ist also bijektiv.
+Nach Bemerkung 51 ist p offen ⇒ p−1
+ ist stetig. ⇒ p ist Homöomorphismus.
+Definition 52
+Eine Überlagerung p : ˜
+X → X heißt universell, wenn ˜
+X einfach zusammenhängend ist.
+Beispiel 38 (Universelle Überlagerungen)
+R → S 1
+, t → (cos 2πt, sin 2πt)
+R2
+ → T 2
+ = R2
+/Z2
+S n
+ → P n
+(R) für n ≥ 2
+Satz 3.5
+Sei p : ˜
+X → X eine universelle Überlagerung, q : Y → X weitere Überlagerung.
+Sei x
+0 ∈ X, ˜x
+0 ∈ ˜
+X , y
+0 ∈ Y mit q(y
+0) = x
+0 = p( ˜x
+0).
+Dann gibt es genau eine Überlagerung ˜p : ˜
+X → Y mit ˜p( ˜x
+0) = y
+0.
+Beweis: Sei z ∈ ˜
+X , γ
+z : I → ˜
+X ein Weg von ˜x
+0 nach z.
+Sei δ
+z die eindeutige Liftung von p ◦ γ
+z nach Y mit δ
+z (0) = y
+0.
+Setze ˜p(z) = δ
+z (1).
+Da ˜
+X einfach zusammenhängend ist, hängt ˜p(z) nicht vom gewählten Weg γ
+z ab.
+Offensichtlich ist q( ˜p(z)) = p(z).
+Zu zeigen: ˜p ist stetig in z ∈ ˜
+X :
+Sei W ⊆ Y offene Umgebung von ˜p(z).
+q offen
+====⇒ q(W ) ist offene Umgebung von p(z) · d( ˜p(z)).
+Sei U ⊆ q(W ) offen wie in Definition 48 und V ⊆ q−1
+(U ) die Komponente, die ˜p(z) enthält.
+O. B. d. A. sei V ⊆ W .
+Sei Z := p−1
+(U ). Für u ∈ Z sei δ ein Weg in Z von z nach u.
+⇒ γ
+z ∗ δ ist Weg von x
+0 nach u
+⇒ ˜p(u) ∈ V
+⇒ Z ⊆ ˜
+p−1
+(W )
+⇒ ˜p ist stetig
+  3.3. ÜBERLAGERUNGEN
+Folgerung 3.6
+Sind p : ˜
+X → X und q : ˜
+Y → X universelle Überlagerungen, so sind ˜
+X und ˜
+Y homöomorph.
+Beweis: Seien x
+0 ∈ X, ˜x
+0 ∈ ˜
+X mit p( ˜x
+0) = x
+0 und ˜y
+0 ∈ q−1
+(x
+0) ⊆ ˜
+Y .
+Nach Satz 3.5 gibt es genau eine Überlagerung
+f : ˜
+X → ˜
+Y mit f (x
+0) = ˜y
+0 und q ◦ f = p
+und genau eine Überlagerung
+g : ˜
+Y → ˜
+X mit g( ˜y
+0) = ˜x
+0 und p ◦ g = q
+Damit gilt: p ◦ q ◦ f = q ◦ f = p, q ◦ f ◦ g = p ◦ g = q. Also ist g ◦ f : ˜
+X → ˜
+X Lift von
+p : ˜
+X → X mit (g ◦ f )( ˜x
+0) = ˜x
+0.
+Da auch id
+ ˜x diese Eigenschaft hat, folgt mit Bemerkung 53: g ◦ f = id
+ ˜
+X .
+Analog gilt f ◦ g = id
+ ˜
+Y .
+Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
+Definition 53
+Sei (X, T) ein topologischer Raum und x ∈ X .
+U ⊆ T
+ heißt eine Umgebungsbasis von x, wenn jede offene Umgebung von x eine Teilmenge
+von U enthält.
+Satz 3.7
+Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
+Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
+Dann gibt es eine universelle Überlagerung.
+Beweis: Sei x
+0 ∈ X und ˜
+X := { (x, [γ ]) | x ∈ X, γ Weg von x
+o nach x } und p : ˜
+X → X, (x, [γ ]) →
+x.
+Die Topologie auf ˜
+X ist folgende: Definiere eine Umgebungsbasis von (x, [γ ]) wie folgt: Es
+sei U eine einfach zusammenhängende Umgebung von x und
+˜
+U = ˜
+U (x, [γ ]) := { (y, [γ ∗ α]) | y ∈ U, α Weg in U von x nach y }
+p ist Überlagerung: p|
+ ˜
+U : ˜
+U → U bijektiv. p ist stetig und damit p|
+ ˜
+U ein Homöomorphismus.
+Sind γ
+1, γ
+2 Wege von x
+0 nach x und γ
+1 ∼ γ
+2, so ist ˜
+U (x, [γ
+1]) ∩ ˜
+U (x, [γ
+2]) = ∅, denn: Ist
+γ
+1 ∗ α ∼ γ
+2 ∗ α, so ist auch γ
+1 ∼ γ
+2. Also ist p eine Überlagerung.
+˜
+X ist einfach zusammenhängend: Es sei ˜x
+0 := (x
+0, e) und ˜γ : I → ˜
+X ein geschlossener Weg
+um ˜x
+0.
+Sei γ := p(˜γ ).
+Annahme: [˜γ ] = e
+Mit Bemerkung 55.a folgt dann: [γ ] = e.
+Dann ist der Lift von γ nach ˜x mit Anfangspunkt ˜x
+0 ein Weg von ˜x
+0 nach (x
+0, [γ ]). Widerspruch.
+
+  3.3. ÜBERLAGERUNGEN
+Definition 54
+Es sei p : Y → X eine Überlagerung und f : Y → Y ein Homöomorphismus.
+a) f heißt Decktransformation von p :⇔ p ◦ f = p.
+b)
+ Die Decktransformationen von p : Y → X bilden mit der Verkettung eine Gruppe,
+die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y /X ) oder
+Deck(Y → X ).
+c) p heißt regulär, wenn | Deck(Y /X )| = deg p gilt.
+Bemerkung 57 (Eigenschaften der Decktransformation)
+a) (Deck Y /X , ◦) ist eine Gruppe
+b) Ist f ∈ Deck(Y /X ) und f = id, dann hat f keinen Fixpunkt.
+c) | Deck(Y /X )| ≤ deg p
+d)
+ Ist f eine reguläre Überlagerung, dann gilt: ∀x ∈ X : Deck(Y /X ) operiert transitiv
+auf der Menge der Urbilder f −1
+(x).
+Beweis:
+a) Es gilt:
+• id
+Y ∈ Deck Y /X ,
+• f , g ∈ Deck Y /X ⇒ p ◦ (f ◦ g) = (p ◦ f ) ◦ g = p ◦ g ⇒ f ◦ g ∈ Deck Y /X
+• f ∈ Deck Y /X ⇒ p ◦ f = p ⇒ p ◦ f −1
+ = (p ◦ f ) ◦ f −1
+ = p ◦ (f ◦ f −1
+) = p ⇒
+f −1
+ ∈ Deck Y /X
+b) Die Menge
+ Fix(f ) = { y ∈ Y | f (y) = y }
+ist abgeschlossen als Urbild der Diagonale ∆ ⊆ Y × Y unter der stetigen Abbildung
+y → (f (y), y). Außerdem ist Fix(f ) offen, denn ist y ∈ Fix(f ), so sei U eine Umgebung
+von p(y) ∈ X wie in Definition 48 und U ⊆ p−1
+(U ) die Komponente, die y enthält;
+also p : V → U ein Homöomorphismus. Dann ist W := f −1
+(V ) ∩ V offene Umgebung
+von y.
+Für z ∈ W ist f (z) ∈ V und p(f (z)) = p(z). Da p injektiv auf V ist, folgt f (z) = z,
+d. h. Fix(f ) = ∅.
+Da Y zusammenhängend ist, folgt aus Fix( ˜
+f ) = ∅ schon Fix(f ) = Y , also f = id
+Y .
+c) Es sei x
+0 ∈ X , deg(p) = d und p−1
+(x
+0) = { y
+0, . . . , y
+d−1 }. Für f ∈ Deck(Y /X ) ist
+f (y
+0) = { y
+0, . . . , y
+d−1 }.
+Zu i ∈ { 0, . . . , d − 1 } gibt es höchstens ein f ∈ Deck(Y /X ) mit f (y
+0) = y
+1, denn ist
+f (y
+0) = g(y
+0), so ist (g−1
+ ◦ f )(y
+0) = y
+0, also nach Bemerkung 57.c g−1
+ ◦ f = id
+Y .
+d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
+Beispiel 39 (Decktransformationen)
+1) p : R → S 1
+ : Deck(R/S 1
+) = { t → t + n | n ∈ Z } ∼
+= Z
+2) p : R2
+ → T 2
+ : Deck(R2
+/T 2
+) ∼
+= Z × Z = Z2
+3) p : S n
+ → P n
+(R) : Deck(S n
+/P n
+(R)) = { x → ±x } ∼
+= Z/2Z
+  3.3. ÜBERLAGERUNGEN
+Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamentalgruppe
+ herstellen:
+Satz 3.8
+Ist p : ˜
+X → X eine universelle Überlagerung, so gilt:
+Deck( ˜
+X /X ) ∼
+= π
+1(X, x
+0) ∀x
+0 ∈ X
+Beweis: Wähle ˜x
+0 ∈ p−1
+(x
+0). Es sei ρ : Deck( ˜x/x) → π
+1(X, x
+0) die Abbildung, die f auf [p(γ
+f )]
+abbildet, wobei γ
+f ein Weg von ˜x
+0 nach f ( ˜x
+0) sei. Da ˜x einfach zusammenhängend ist, ist
+γ
+f bis auf Homotopie eindeutig bestimmt und damit auch ρ wohldefiniert.
+• ρ ist Gruppenhomomorphismus
+: Seien f , g ∈ Deck( ˜
+X /X
+ ) ⇒ γ
+g◦f = γ
+g ∗ g(γ
+f ) ⇒
+p(γ
+g◦f ) = p(γ
+g ) ∗ (p ◦ g)
+
+=p (γ
+f ) = ρ(g) = ρ(f )
+• ρ ist injektiv: ρ(f ) = e ⇒ p(γ
+f ) ∼ γ
+x
+0 Satz 3.2
+====⇒ γ
+f ∼ γ
+ ˜x
+0 ⇒ f
+ (x
+0) = ˜x
+0 Bem. 57.c
+======⇒ f
+ =
+id
+ ˜x.
+• ρ ist surjektiv: Sei [γ ] ∈ π
+1(X, x
+0), ˜γ Lift von γ nach ˜x mit Anfangspunkt ˜x
+0. Der
+Endpunkt von ˜γ sei ˜x
+1.
+p ist reguläre Überlagerung: Seien ˜x
+0, ˜x
+1 ∈ ˜
+X mit p( ˜x
+0) = p( ˜x
+1). Nach Satz 3.5 gibt
+es genau eine Überlagerung ˜p : ˜
+X → X mit p = p ◦ ˜p und ˜p( ˜x
+0) = ˜x
+1. Somit ist ˜p eine
+Decktransformation und damit p eine reguläre Überlagerung.
+Da p reguläre Überlagerung ist, gibt es ein f ∈ Deck( ˜
+X /X ) mit f ( ˜x
+0) = ˜x
+1.
+Aus der Definition von ρ folgt: ρ(f ) = p(γ
+f ) = γ
+
+Beispiel 40 (Bestimmung von π
+1(S 1
+))
+p : R → S 1
+, t → (cos 2πt, sin 2πt) ist universelle Überlagerung, da R zusammenhängend ist.
+Für n ∈ Z sei f
+n : R → R, t → t + n die Translation um n.
+Es gilt: (p ◦ f
+n)(t) = p(f
+n(t)) = p(t) ∀t ∈ R, d. h. f
+n ist Decktransformation.
+Ist umgekehrt g irgendeine Decktransformation, so gilt insbesondere für t = 0:
+(cos(2πg(0)), sin(2πg(0))) = (p ◦ g)(0) = p(0) = (1, 0)
+Es existiert n ∈ Z mit g(0) = n. Da auch f
+n(0) = 0 + n = n gilt, folgt mit Bemerkung 57.c
+g = f
+n. Damit folgt:
+ Deck(R/S 1
+) = { f
+n | n ∈ Z } ∼
+= Z
+Nach Satz 3.8 also π
+1(S 1
+) ∼
+= Deck(R/S 1
+) ∼
+= Z
+  3.4. GRUPPENOPERATIONEN
+3.4 Gruppenoperationen
+Definition 55
+Sei (G, ·) eine Gruppe und X eine Menge.
+Eine Gruppenoperation von G auf X ist eine Abbildung ◦ : G × X → X für die gilt:
+a) 1
+G ◦ x = x ∀x ∈ X
+b) (g · h) ◦ x = g ◦ (h ◦ x) ∀g, h ∈ G∀x ∈ X
+Beispiel 41
+1) G = (Z, +), X = R, n ◦ x = x + n
+2) G operiert auf X = G durch g ◦ h := g · h
+3) G operiert auf X = G durch g ◦ h := g · h · g−1
+, denn
+i) 1
+G ◦ h = 1
+G · h · 1−1
+G = h
+ii) (g
+1 · g
+2) ◦ h = (g
+1 · g
+2) · h · (g · g
+2)−1
+= g
+1 · (g
+2 · h · g−1
+2 ) · g−1
+1
+= g
+1 ◦ (g
+2 ◦ h)
+Definition 56
+Sei G eine Gruppe, X ein topologischer Raum und ◦ : G × X → X eine Gruppenoperation.
+a) G operiert durch Homöomorphismen, wenn für jedes g ∈ G die Abbildung
+m
+g : X → X, x → g ◦ x
+ein Homöomorphismus ist.
+b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation ◦ stetig, wenn
+∀g ∈ G : m
+g ist stetig
+gilt.
+Bemerkung 58
+Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
+Beweis: Nach Voraussetzung ist m
+g := ◦|
+{ g }×X : X → X, x → g ◦ x stetig.
+Die Umkehrabbildung zu m
+g ist m
+g−1 :
+(m
+g−1 ◦ m
+g )(x) = m
+g−1 (m
+g (x))
+= m
+g−1 (g ◦ x)
+= g−1
+ ◦ (g ◦ x)
+Def. 55.b
+= (g−1
+ · g) ◦ x
+= 1
+G ◦ x
+Def. 55.a
+= x
+Beispiel 42
+In Beispiel 41.1 operiert Z durch Homöomorphismen.
+  3.4. GRUPPENOPERATIONEN
+Bemerkung 59
+Sei G eine Gruppe und X eine Menge.
+a)
+ Die Gruppenoperation von G auf X entsprechen bijektiv den Gruppenhomomorphismen
+ : G → Perm(X ) = Sym(X ) = { f : X → X | f ist bijektiv }
+b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
+Homöomorphismus den Gruppenhomomorphismen G → Homöo(X )
+Beweis:
+Sei ◦ : G × X → X eine Gruppenoperation von G auf X . Dann sei  : G → Perm(X )
+definiert durch (g)(X ) = g · x ∀g ∈ G, x ∈ X , also (g) = m
+g .
+ ist Homomorphismus: (g
+1 · g
+2) = m
+g
+1·g
+2 = m
+g
+1 ◦ m
+g
+2 = (g
+1) ◦ (g
+2), denn für x ∈ X :
+(g
+1 · g
+2)(x) = (g
+1 · g
+2) ◦ x = g
+1 ◦ (g
+2 ◦ x) = (g
+1)((g
+2)(x)) = ((g
+1) ◦ (g
+2))(x)
+Umgekehrt: Sei  : G → Perm(X ) Gruppenhomomorphismus. Definiere ◦ : G × X → X
+durch g ◦ x = (g)(x).
+z. z. Definition 55.b:
+ g
+1 ◦ (g
+2 ◦ x) = (g
+1)(g
+2 ◦ x)
+= (g
+1)((g
+2)(x))
+= ((g
+1) ◦ (g
+2))(x)
+ ist Hom.
+= (g
+1 · g
+2)(x)
+= (g
+1 · g
+2) ◦ x
+z. z. Definition 55.a: 1
+G · x = (1
+G)(x) = id
+X (x) = x, weil  ein Homomorphismus ist.
+Beispiel 43
+Sei X ein wegzusammenhängender topologischer Raum, p : ˜
+X → X
+ eine universelle Überlagerung,
+ x
+0 ∈ X , ˜x
+0 ∈ ˜
+X mit p( ˜x
+0) = x
+0.
+Dann operiert π
+1(X, x
+0) auf ˜
+X durch Homöomorphismen wie folgt:
+Für [γ ] ∈ π
+1(X, x
+0) und ˜x ∈ ˜
+X sei [γ ] ◦ ˜x = ˜
+γ ∗ (1) wobei ˜γ ein Weg von ˜x
+0 nach ˜x in ˜
+X
+sei,  := p( ˜
+δ) = p ◦ δ.
+Also: δ ist ein Weg in X von x
+0 nach x = p( ˜x) und
+γ ∗ δ die Liftung von γ ∗ δ mit
+Anfangspunkt ˜x
+0.
+[γ ] · ˜x hängt nicht von der Wahl von ˜γ ab; ist ˜γ
+ ein anderer Weg von ˜x
+0 nach ˜x, so sind ˜
+δ
+und ˜
+δ
+ homotop, also auch
+γ ∗ δ und
+γ ∗ δ
+ homotop.
+Gruppenoperation, denn:
+i) [e] ◦ ˜x =
+e ∗ δ = ˜x
+ii)
+γ
+1 ∗ γ
+2 ∗ δ(1) = [γ
+1 ∗ γ
+2] ◦ ˜x = ([γ
+1] ∗ [γ
+2]) ◦ ˜x
+γ
+1 ∗ γ
+2 ∗ δ(1) = [γ
+1] ◦ ( ˜
+γ
+2 ∗ δ)(1) = [γ
+1] ◦ ([γ
+2] ◦ ˜x)
+Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π
+1(X, x
+0)
+aus Beispiel 43 einen Gruppenhomomorphismus  : π
+1(X, x
+0) → Homöo(X ). Nach Satz 3.8 ist
+(π
+1(X, x
+0)) = Deck( ˜
+X /X )
+=
+ f : ˜
+X → ˜
+X Homöomorphismus
+
+ p ◦ f = p
+  3.4. GRUPPENOPERATIONEN
+Beispiel 44
+Sei X := S 2
+ ⊆ R3
+ und τ die Drehung um die z-Achse um 180◦
+.
+g = τ  = { id, τ } operiert auf S 2
+ durch Homöomorphismen.
+Frage: Was ist S 2
+/G? Ist S 2
+/G eine Mannigfaltigkeit?
+4 Euklidische und nichteuklidische
+Geometrie
+Definition 57
+Das Tripel (X, d, G) heißt genau dann eine Geometrie, wenn (X, d) ein metrischer Raum
+und ∅ = G ⊆ P (X ) gilt. Dann heißt G die Menge aller Geraden.
+4.1 Axiome für die euklidische Ebene
+Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
+nennt man Axiomensystem. Da der Begriff des Axiomensystems so grundlegend ist, hat man
+auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
+sein, die Axiome sollen möglichst unabhängig sein und Vollständigkeit wäre auch toll. Mit
+Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
+auf den ersten Blick eine einfache Eigenschaft zu sein. Auf den zweiten Blick muss man jedoch
+einsehen, dass das Parallelenproblem, also die Frage ob das Parallelenaxiom unabhängig von
+den restlichen Axiomen ist, über 2000 Jahre nicht gelöst wurde. Ein ganz anderes Kaliber ist
+die Frage nach der Vollständigkeit. Ein Axiomensystem gilt als Vollständig, wenn jede Aussage
+innerhalb des Systems verifizierbar oder falsifizierbar ist. Interessant ist hierbei der Gödelsche
+Unvollständigkeitssatz, der z. B. für die Arithmetik beweist, dass nicht alle Aussagen formal
+bewiesen oder widerlegt werden können.
+Kehren wir nun jedoch zurück zur Geometrie. Euklid hat in seiner Abhandlung „Die Elemente“
+ein Axiomensystem für die Geometrie aufgestellt.
+Euklids Axiome
+• Strecke zwischen je zwei Punkten
+• Jede Strecke bestimmt genau eine Gerade
+• Kreis (um jeden Punkt mit jedem Radius)
+• Je zwei rechte Winkel sind gleich (Isometrie, Bewegung)
+• Parallelenaxiom von Euklid:
+Wird eine Gerade so von zwei Geraden geschnitten, dass die Summe der Innenwinkel
+kleiner als zwei Rechte ist, dann schneiden sich diese Geraden auf der Seite dieser Winkel.
+Man mache sich klar, dass das nur dann nicht der Fall ist, wenn beide Geraden parallel
+ sind und senkrecht auf die erste stehen.
+Definition 58
+Eine euklidische Ebene ist eine Geometrie (X, d, G), die Axiome §1 - §5 erfüllt:
+§1) Inzidenzaxiome:
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+(i) Zu P = Q ∈ X gibt es genau ein g ∈ G mit { P, Q } ⊆ g.
+(ii) |g| ≥ 2 ∀g ∈ G
+(iii) X /∈ G
+§2) Abstandsaxiom: Zu P, Q, R ∈ X gibt es genau dann ein g ∈ G mit { P, Q, R } ⊆ g,
+wenn gilt:
+• d(P, R) = d(P, Q) + d(Q, R) oder
+• d(P, Q) = d(P, R) + d(R, Q) oder
+• d(Q, R) = d(Q, P ) + d(P, R)
+Definition 59
+Sei (X, d, G) eine Geometrie und seien P, Q, R ∈ X .
+a) P, Q, R liegen kollinear, wenn es g ∈ G gibt mit { P, Q, R } ⊆ g.
+b) Q liegt zwischen P und R, wenn d(P, R) = d(P, Q) + d(Q, R)
+c) Strecke
+ P R := { Q ∈ X | Q liegt zwischen P und R }
+d) Halbgeraden:
+P R+
+ := {Q ∈ X |Q liegt zwischen P und R oder
+R liegt zwischen P und Q}
+P R−
+ := { Q ∈ X | P liegt zwischen Q und R }
+P R
+P R−
+ P R
+ P R+
+Abbildung 4.1: Halbgeraden
+Bemerkung 60
+a) P R+
+ ∪ P R−
+ = P R
+b) P R+
+ ∩ P R−
+ = { P }
+Beweis:
+a) „ ⊆“ folgt direkt aus der Definition von P R+
+ und P R−
+„ ⊇“: Sei Q ∈ P R ⇒ P, Q, R sind kollinear.
+2
+⇒ 
+
+
+
+Q liegt zwischen P und R ⇒ Q ∈ P R
+R liegt zwischen P und Q ⇒ Q ∈ P R
+P liegt zwischen Q und R ⇒ Q ∈ P R
+b) „ ⊇“ ist offensichtlich
+„ ⊆“: Sei P R+
+ ∩ P R−
+. Dann ist d(Q, R) = d(P, Q) + d(P, R) weil Q ∈ P R−
+ und
+
+ d(P, R) = d(P, Q) + d(Q, R) oder
+d(P, Q) = d(P, R) + d(R, Q)
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+⇒ d(Q, R) = 2d(P, Q) + d(Q, R)
+⇒ d(P, Q) = 0
+⇒ P = Q
+d(P, Q) = 2d(P, R) + d(P, Q)
+⇒ P = R
+⇒ Widerspruch
+Definition 60
+§3) Anordnungsaxiome
+(i) Zu jeder Halbgerade H mit Anfangspunkt P ∈ X und jedem r ∈ R
+≥0 gibt es
+genau ein Q ∈ H mit d(P, Q) = r.
+(ii) Jede Gerade zerlegt X \ g = H
+1 ˙
+∪ H
+2 in zwei nichtleere Teilmengen H
+1, H
+2, sodass
+für alle A ∈ H
+i, B ∈ H
+j mit i, j ∈ { 1, 2 } gilt: AB ∩ g = ∅ ⇔ i = j .
+Diese Teilmengen H
+i heißen Halbebenen bzgl. g.
+§4) Bewegungsaxiom: Zu P, Q, P
+, Q
+ ∈ X mit d(P, Q) = d(P
+, Q
+) gibt es mindestens
+2 Isometrien ϕ
+1, ϕ
+2 mit ϕ
+i(P ) = P
+ und ϕ
+i(Q) = Q
+ mit i = 1, 2.1
+§5) Parallelenaxiom: Zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g gibt es
+höchstens ein h ∈ G mit P ∈ h und h ∩ g = ∅. h heißt Parallele zu g durch P .
+Satz 4.1 (Satz von Pasch)
+Seien P , Q, R nicht kollinear, g ∈ G mit g ∩ { P, Q, R } = ∅ und g ∩ P Q = ∅.
+Dann ist entweder g ∩
+ P R = ∅ oder g ∩ QR = ∅.
+Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
+schneiden, auch eine weitere Seite schneiden.
+Beweis: g ∩ P Q = ∅
+3(ii)
+⇒ P und Q liegen in verschiedenen Halbebenen bzgl. g
+⇒ o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
+⇒ g ∩
+ RP = ∅
+Bemerkung 61
+Sei P, Q ∈ X mit P = Q sowie A, B ∈ X \ P Q mit A = B . Außerdem seien A und B in der
+selben Halbebene bzgl. P Q sowie Q und B in der selben Halbebene bzgl. P A.
+Dann gilt: P B +
+ ∩ AQ = ∅
+Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
+eines konvexen Vierecks schneiden sich.
+Beweis: Sei P
+ ∈ P Q−
+, P
+ = P Satz 4.1
+====⇒ P B schneidet AP
+ ∪ AQ
+Sei C der Schnittpunkt. Dann gilt:
+1
+Die „Verschiebung“ von P
+Q
+ nach P Q und die Isometrie, die zusätzlich an der Gerade durch P und Q spiegelt.
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+PP  QA B
+C
+Abbildung 4.2: Situation aus Bemerkung 61
+(i) C ∈ P B +
+, denn A und B liegen in derselben Halbebene bzgl. P Q = P
+Q, also auch
+AP
+ und AQ.
+(ii) C liegt in derselben Halbebene bzgl. P A wie B , weil das für Q gilt.
+AP
+ liegt in der anderen Halbebene bzgl. P A ⇒ C /∈ P
+A ⇒ C ∈ AQ
+Da C ∈ P B +
+ und C ∈ AQ folgt nun direkt: ∅ = { C } ⊆ P B +
+ ∩ AQ
+Bemerkung 62
+Seien P, Q ∈ X mit P = Q und A, B ∈ X \ P Q in der selben Halbebene bzgl. P Q. Außerdem
+sei d(A, P ) = d(B , P ) und d(A, Q) = d(B , Q).
+Dann ist A = B .
+ P QAB
+Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
+Intuitiv weiß man, dass daraus folgt, dass A = B gilt.
+Beweis: durch Widerspruch
+Annahme: A = B
+Dann ist B /∈ (P A ∪ QA) wegen §2.
+1. Fall: Q und B liegen in derselben Halbebene bzgl. P A
+Bem. 61
+=====⇒ P B +
+ ∩ AQ = ∅.
+Sei C der Schnittpunkt vom P B und AQ.
+Dann gilt:
+(i) d(A, C ) + d(C, Q) = d(A, Q) Vor.
+= d(B , Q) < d(B , C ) + d(C, Q) ⇒ d(A, C ) < d(B , C )
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+P QB CA
+ (a) 1. Fall P QAB
+ (b) 2. Fall
+Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
+(ii) a) B liegt zwischen P und C .
+d(P, A) + d(A, C ) > d(P, C ) = d(P, B ) + d(B , C ) = d(P, A) + d(B , C ) ⇒
+d(A, C ) > d(B , C ) ⇒ Widerspruch zu Punkt (i)
+b) C liegt zwischen P und B
+d(P, C ) + d(C, A) > d(P, A) = d(P, B ) = d(P, C ) + d(C, B )
+⇒ d(C, A) > d(C, B )
+⇒ Widerspruch zu Punkt (i)
+2. Fall
+: Q und B liegen auf verschieden Halbebenen bzgl. P A.
+Dann liegen A und Q in derselben Halbebene bzgl. P B .
+Tausche A und B ⇒ Fall 1
+Bemerkung 63
+Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P = Q und ϕ eine Isometrie
+mit ϕ(P ) = P und ϕ(Q) = Q.
+Dann gilt ϕ(S ) = S ∀S ∈ P Q.
+Beweis:
+ O. B. d. A. sei S ∈ P Q 2
+⇔ d(P, Q) = d(P, S ) + d(S, Q)
+ϕ∈Iso(X )
+⇒ d(ϕ(P ), ϕ(Q)) = d(ϕ(P ), ϕ(S )) + d(ϕ(S ), ϕ(Q))
+P,Q∈Fix(ϕ)
+⇒ d(P, Q) = d(P, ϕ(S )) + d(ϕ(S ), Q)
+⇒ ϕ(S ) liegt zwischen P und Q
+⇒ d(P, S ) = d(ϕ(P ), ϕ(S )) = d(P, ϕ(S ))
+3(i)
+⇒ ϕ(S ) = S
+
+Proposition 4.2
+In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P
+, Q, Q
+ mit d(P, Q) = d(P
+, Q
+)
+höchstens zwei Isometrien mit ϕ(P ) = P
+ und ϕ(Q) = Q
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit
+ϕ
+i(P ) = P
+ und ϕ
+i(Q) = Q
+ gibt.
+Beweis: Seien ϕ
+1, ϕ
+2, ϕ
+3 Isometrien mit ϕ
+i(P ) = P
+, ϕ
+i(Q) = Q
+ mit i = 1, 2, 3.
+Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
+(Teil i) ∃R ∈ X \ P Q mit ϕ
+1(R) = ϕ
+2(R).
+(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id
+X .
+Aus (Teil i) und (Teil ii) folgt, dass ϕ−1
+2 ◦ ϕ
+1 = id
+X , also ϕ
+2 = ϕ
+1, da P , Q und R in diesem
+Fall Fixpunkte sind.
+Nun zu den Beweisen der Teilaussagen:
+(Teil i)
+ Sei R ∈ X \ P Q. Von den drei Punkten ϕ
+1(R), ϕ
+2(R), ϕ
+3(R) liegen zwei in der selben
+Halbebene bzgl. P
+Q
+ = ϕ
+i(P Q).
+O. B. d. A. seien ϕ
+1(R) und ϕ
+2(R) in der selben Halbebene.
+Es gilt: d(P
+, ϕ
+1(R)) = d(ϕ
+1(P ), ϕ
+1(R))
+= d(P, R)
+= d(ϕ
+2(P ), ϕ
+2(R))
+= d(P
+, ϕ
+2(R))
+und analog d(Q
+, ϕ
+1(R)) = d(Q
+, ϕ
+2(R))
+(Teil ii)
+ Seien P , Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR. Sei B ∈
+P Q \ { P, Q }. Dann ist ϕ(B ) = B wegen Bemerkung 63.
+Ist R ∈ AB , so enthält AB 2 Fixpunkte von ϕ Bem. 63
+=====⇒ ϕ(A) = A.
+P
+ B QC RA
+Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ P Q \ { P, Q }, A /∈ P Q ∪ P R ∪ QR
+Ist R /∈ AB , so ist AB ∩ P R = ∅ oder AB ∈ RQ = ∅ nach Satz 4.1. Der Schnittpunkt
+C ist dann Fixpunkt von ϕ
+ nach Bemerkung 63 ⇒ ϕ(A) = A.
+Bemerkung 64 (SWS-Kongruenzsatz)
+Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem AB C und A
+B
+C
+Dreiecke, für die gilt:
+(i) d(A, B ) = d(A
+, B
+)
+(ii) ∠C AB ∼
+= ∠C
+A
+B
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+(iii) d(A, C ) = d(A
+, C
+)
+Dann ist AB C kongruent zu A
+B
+C
+ .
+Beweis: Sei ϕ die Isometrie mit ϕ(A
+) = A, ϕ(A
+C +
+) = AC +
+ und ϕ(A
+B +
+) = AB +
+. Diese
+Isometrie existiert wegen Punkt §4.
+⇒ C ∈ ϕ(A
+C +
+) und B ∈ ϕ(A
+B +
+).
+d(A
+, C
+) = d(ϕ(A
+), ϕ(C
+)) = d(A, ϕ(C
+)) 3(i)
+==⇒ ϕ(C
+) = C
+d(A
+, B
+) = d(ϕ(A
+), ϕ(B
+)) = d(A, ϕ(B
+)) 3(i)
+==⇒ ϕ(B
+) = B
+Also gilt insbesondere ϕ(A
+B
+C
+) = AB C .
+Bemerkung 65 (WSW-Kongruenzsatz)
+Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem AB C und A
+B
+C
+Dreiecke, für die gilt:
+(i) d(A, B ) = d(A
+, B
+)
+(ii) ∠C AB ∼
+= ∠C
+A
+B
+(iii) ∠AB C ∼
+= ∠A
+B
+C
+Dann ist AB C kongruent zu A
+B
+C
+ .
+Beweis: Sei ϕ die Isometrie mit ϕ(A
+) = A, ϕ(B
+) = B und ϕ(C
+) liegt in der selben Halbebene
+bzgl. AB wie C . Diese Isometrie existiert wegen §4.
+Aus ∠C AB = ∠C
+A
+B
+ = ∠ϕ(C
+)ϕ(A
+)ϕ(B
+) = ∠ϕ(C
+)AB folgt, dass ϕ(C
+) ∈ AC +
+.
+Analog folgt aus ∠AB C = ∠A
+B
+C
+ = ∠ϕ(A
+)ϕ(B
+)ϕ(C
+) = ∠AB ϕ(C
+), dass ϕ(C
+) ∈
+B C +
+.
+Dann gilt ϕ(C
+) ∈ AC ∩ B C = { C } ⇒ ϕ(C
+) = C .
+Es gilt also ϕ(A
+B
+C
+) = AB C .
+Definition 61
+a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P .
+Man schreibt: ∠R
+1P R
+2 bzw. ∠R
+2P R
+12
+b)
+ Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
+anderen abbildet.
+c) ∠R
+1P
+R
+2 heißt kleiner als ∠R
+1P R
+2, wenn es eine Isometrie ϕ gibt, mit ϕ(P
+) = P ,
+ϕ(P
+R+
+1 ) = P R+
+1 und ϕ(R
+2) liegt in der gleichen Halbebene bzgl. P R
+1 wie R
+2 und in
+der gleichen Halbebene bzgl. P R
+2 wie R
+1
+d) Im Dreieck P QR gibt es Innenwinkel und Außenwinkel.
+Bemerkung 66
+In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel.
+Beweis: Zeige ∠P RQ < ∠RQP
+.
+Sei M der Mittelpunkt der Strecke QR und P
+ ∈ P Q+
+ \ P Q. Sei A ∈ M P −
+ mit d(P, M ) =
+d(M , A).
+2
+Für dieses Skript gilt: ∠R
+1P R
+2 = ∠R
+2P R
+1. Also sind insbesondere alle Winkel ≤ 180◦
+.
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+P
+ R
+1 R
+1R
+2R
+2
+(a) ∠R
+1P
+R
+2 ist kleiner als ∠R
+1P R
+2,
+vgl. Definition 61.c P
+Q R
+(b) Innenwinkel und Außenwinkel
+ in P QR, vgl. Definition
+ 61.d
+Abbildung 4.6: Situation aus Definition 61
+Q M
+A P
+ R
+(a) Parallelogramm AQPR α βR
+Q P
+(b) Innen- und Außenwinkel
+ von P QR
+Abbildung 4.7: Situation aus Bemerkung 66
+Es gilt: d(Q, M ) = d(M , R) und d(P, M ) = d(M , A) sowie ∠P M R = ∠AM Q ⇒ M RQ
+ist kongruent zu AM Q, denn eine der beiden Isometrien, die ∠P M R auf ∠AM Q abbildet,
+bildet R auf Q und P auf A ab.
+⇒ ∠M QA = ∠M RP = ∠QRP = ∠P RQ.
+Noch zu zeigen: ∠M QA < ∠RQP
+, denn A liegt in der selben Halbebene bzgl. P Q wie M .
+Proposition 4.3 (Existenz der Parallelen)
+Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4.
+Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine
+Parallele h ∈ G mit P ∈ h und g ∩ h = ∅.
+Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P
+ ∈ f mit
+d(P, P
+) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält.
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+Q hf
+ gP
+Abbildung 4.8: Situation aus Proposition 4.3
+Annahme: ϕ(g) ∩ g = ∅
+⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g.
+Dann ist ∠RQP = ∠RQP
+ < ∠RP P
+ nach Bemerkung 66 und ∠RQP = ∠RP P
+, weil
+ϕ(∠RQP ) = ∠RP P
+.
+⇒ Widerspruch
+⇒ ϕ(g) ∩ g = ∅
+Folgerung 4.4
+Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
+D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP +
+) = P R+
+, sodass ϕ(R) in der gleichen
+Halbebene bzgl. P Q liegt wie R.
+Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die
+beiden Halbgeraden bilden eine Gerade.
+Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
+Dreiecke mit drei 90◦
+-Winkeln.
+Proposition 4.5
+In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
+Innenwinkel ≤ π.
+ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
+Sei im Folgenden „ IWS“ die „Innenwinkelsumme“.
+Beweis: Sei  ein Dreieck mit IWS() = π + ε
+αβ
+γ
+ P
+(a) Summe der Winkel α, β und γ α
+1
+α
+2 βγ
+ M
+A BC A
+α
+(b) Situation aus Proposition 4.5
+Abbildung 4.10: Situation aus Proposition 4.5
+Sei α ein Innenwinkel von .
+Beh.: Es gibt ein Dreieck
+ mit IWS(
+) = IWS() und einem Innenwinkel α
+ ≤ α
+2 .
+Dann gibt es für jedes n ein
+n mit IWS(
+n) = IWS() und Innenwinkel α
+ ≤ α
+2n . Für
+α
+2n < ε ist dann die Summe der beiden Innenwinkel um
+n größer als π ⇒ Widerspruch
+zu Folgerung 4.4.
+Beweis: Es seien A, B , C ∈ X und  das Dreieck mit den Eckpunkten A, B , C und α sei
+der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C .
+Sei M der Mittelpunkt der Strecke B C . Sei außerdem α
+1 = ∠C AM und α
+2 = ∠B AM .
+Sei weiter A
+ ∈ M A−
+ mit d(A
+, M ) = d(A, M ).
+Die Situation ist in Abbildung 4.10b skizziert.
+⇒ (M A
+C ) und (M AB ) sind kongruent. ⇒ ∠AB M = ∠A
+C M und ∠M A
+C =
+∠M AB . ⇒ α + β + γ = IWS(AB C ) = IWS(AA
+C ) und α
+1 + α
+2 = α, also o. B. d. A.
+α
+1 ≤ α
+2
+Bemerkung 67
+In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
+α
+α
+α ββ
+γ
+A BC
+ g
+Abbildung 4.11: Situation aus Bemerkung 67
+Beweis: Sei g eine Parallele von AB durch C .
+• Es gilt α
+ = α wegen Proposition 4.3.
+• Es gilt β
+ = β wegen Proposition 4.3.
+• Es gilt α
+ = α
+ wegen Aufgabe 8.
+ 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
+⇒ IWS(AB C ) = γ + α
+ + β
+ = π
+Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
+π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
+4.2 Weitere Eigenschaften einer euklidischen Ebene
+Satz 4.6 (Strahlensatz)
+In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich.
+xy
+−1 0 1 2 3 40123
+ z
+ x λ2
+z
+λ2
+x
+Abbildung 4.12: Strahlensatz
+Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar.
+A
+ B  C
+B C
+cb a
+ c b
+ a
+Abbildung 4.13: Die Dreiecke AB C und AB
+C
+ sind ähnlich.
+4.2.1 Flächeninhalt
+Definition 62
+„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in
+kongruente Dreiecke zerlegen lassen.
+ 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
+(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Dreiecke
+
+Abbildung 4.14: Flächengleichheit
+Der Flächeninhalt eines Dreiecks ist 1
+/2 · Grundseite · Höhe.
+A BC
+L
+C h
+c
+ c
+(a) 1
+/2 · |AB| · |h
+c| ·
+A BC
+ L
+A
+h
+ac
+(b) 1
+/2 · |BC | · |h
+a|
+Abbildung 4.15: Flächenberechnung im Dreieck
+Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
+α α
+γ γ
+A BC
+ L
+A
+L
+C
+Abbildung 4.16: AB L
+a und C L
+C B sind ähnlich, weil IWS = π
+Strahlensatz
+=======⇒ a
+h
+c = c
+h
+a → a · h
+a = c · h
+c
+Satz 4.7 (Satz des Pythagoras)
+Im rechtwinkligen Dreieck gilt a2
+ + b2
+ = c2
+, wobei c die Hypotenuse und a, b die beiden
+Katheten sind.
+Beweis: (a + b) · (a + b) = a2
+ + 2ab + b2
+ = c2
+ + 4 · ( 1
+2 · a · b)
+ 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
+cb a
+A BC
+·
+(a) a, b sind Katheten und c ist die Hypotenuse
+ b a baba
+b
+a ·
+···
+γ
+ (b) Beweisskizze
+Abbildung 4.17: Satz des Pythagoras
+Satz 4.8
+Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d, G), nämlich X = R2
+,
+d = euklidischer Abstand, G = Menge der üblichen Geraden.
+Beweis:
+(i) (R2
+, d
+Euklid) ist offensichtlich eine euklidische Ebene.
+(ii) Sei (X, d) eine euklidische Ebene und g
+1, g
+2 Geraden in X , die sich in einem Punkt 0
+im rechten Winkel schneiden.
+Sei P ∈ X \ (g
+1 ∪ g
+2) ein Punkt und P
+X der Fußpunkt des Lots von P auf g
+1 (vgl.
+Aufgabe 9 (c)) und P
+Y der Fußpunkt des Lots von P auf g
+2.
+Sei x
+P := d(P
+X , 0) und y
+P := d(P
+Y , 0).
+In Abbildung 4.19 wurde die Situation skizziert.
+Sei h : X → R2
+ eine Abbildung mit h(P ) := (x
+P , y
+P ) Dadurch wird h auf dem
+Quadranten definiert, in dem P liegt, d. h.
+∀Q ∈ X mit P Q ∩ g
+1 = ∅ = P Q ∩ g
+2
+Fortsetzung auf ganz X durch konsistente Vorzeichenwahl.
+Im Folgenden werden zwei Aussagen gezeigt:
+(i) h ist surjektiv
+(ii) h ist eine Isometrie
+Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist.
+Nun zu den Beweisen der Teilaussagen:
+ 4.3. HYPERBOLISCHE GEOMETRIE
+·
+ g
+1g
+2
+ PX
+ (a) Schritt 1 ·
+ g
+1g
+2
+ x
+Py
+P P
+0
+ P
+XP
+YX
+ (b) Schritt 2
+Abbildung 4.18: Beweis zu Satz 4.8
+(i) Sei (x, y) ∈ R2
+, z. B. x ≥ 0, y ≥ 0. Sei P
+ ∈ g
+1 mit d(0, P
+) = x und P
+ auf der
+gleichen Seite von g
+2 wie P .
+ g
+1g
+2
+ x
+Py
+P P Q
+0 R
+X
+Abbildung 4.19: Beweis zu Satz 4.8
+(ii) Zu Zeigen: d(P, Q) = d(h(P ), h(Q))
+d(P, Q)2 Pythagoras
+= d(P, R)2
+ + d(R, Q)2
+ = (y
+Q − y
+P )2
+ + (x
+Q − x
+P )2
+.
+h(Q) = (x
+Q, y
+Q)
+4.3 Hyperbolische Geometrie
+Definition 63
+Sei
+ H := { z ∈ C | (z) > 0 } =
+ (x, y) ∈ R2
+
+ y > 0
+ 4.3. HYPERBOLISCHE GEOMETRIE
+die obere Halbebene bzw. Poincaré-Halbebene und G = G
+1 ∪ G
+2 mit
+G
+1 = { g
+1 ⊆ H | ∃m ∈ R, r ∈ R
+>0 : g
+1 = { z ∈ H : | z − m| = r } }
+G
+2 = { g
+2 ⊆ H | ∃x ∈ R : g
+2 = { z ∈ H : (z) = x } }
+Die Elemente aus G heißen hyperbolische Geraden.
+Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
+Die hyperbolischen Geraden erfüllen. . .
+a) . . . die Inzidenzaxiome §1
+b) . . . das Anordnungsaxiom §3 (ii)
+c) . . . nicht das Parallelenaxiom §5
+Beweis:
+a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
+Gegeben z
+1, z
+2 ∈ H
+Existenz:
+Fall 1 (z
+1) = (z
+2)
+⇒ z
+1 und z
+2 liegen auf
+ g = { z ∈ C | (z) = (z
+1) ∧ H }
+Siehe Abbildung 4.20a.
+Fall 2 (z
+1) = (z
+2)
+Betrachte nun z
+1 und z
+2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte
+ zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten
+zu z
+1 und z
+2 sind gleich weit von z
+1 und z
+2 entfernt. Daher ist der Schnittpunkt mit
+der x-Achse der Mittelpunkt eines Kreises durch z
+1 und z
+2 (vgl. Abbildung 4.20b)
+xy
+−1 0 1 2 3 4 501234
+ Z
+1Z
+2
+(Z
+1 )
+(a) Fall 1  xy
+−1 0 1
+ 2 3 4 501234
+ Z
+1 Z
+2
+(b) Fall 2
+Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
+Geraden
+b) Sei g ∈ G
+1 ˙
+∪ G
+2 eine hyperbolische Gerade.
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Es existieren disjunkte Zerlegungen von H \ g:
+Fall 1: g = { z ∈ H  z − m| = r } ∈ G
+1
+Dann gilt:
+ H = { z ∈ H  z − m| < r }
+
+
+=:H
+1 (Kreisinneres) ˙
+∪ { z ∈ H  z − m| > r }
+
+=:H
+2 (Kreisäußeres)
+Da r > 0 ist H
+1 nicht leer, da r ∈ R ist H
+2 nicht leer.
+Fall 2: g = { z ∈ H | z = x } ∈ G
+2
+Die disjunkte Zerlegung ist:
+H = { z ∈ H | (z) < x }
+
+
+=:H
+1 (Links) ˙
+∪ { z ∈ H | (z) > x }
+
+=:H
+2 (Rechts)
+Zu zeigen: ∀A ∈ H
+i, B ∈ H
+j mit i, j ∈ { 1, 2 } gilt: AB ∩ g = ∅ ⇔ i = j
+„ ⇐“: A ∈ H
+1, B ∈ H
+2 : AB ∩ g = ∅
+Da d
+H stetig ist, folgt diese Richtung direkt. Alle Punkte in H
+1 haben einen Abstand
+von m der kleiner ist als r und alle Punkte in H
+2 haben einen Abstand von m der
+größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige
+Abbildung f : R → R
+>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g = ∅
+„ ⇒“: A ∈ H
+i, B ∈ H
+j mit i, j ∈ { 1, 2 } : AB ∩ g = ∅ ⇒ i = j
+Sei h die Gerade, die durch A und B geht.
+Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen
+unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt
+schneiden. Sei C dieser Punkt.
+Aus A, B /∈ g folgt: C = A und C = B . Also liegt C zwischen A und B . Daraus folgt,
+dass A und B bzgl. g in verschiedenen Halbebenen liegen.
+c) Siehe Abbildung 4.21.
+ xy
+−5 −4 −3 −2 −1 0 1 2 3 4 5 6012345
+Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Definition 64
+Es seien a, b, c, d ∈ R mit ad − bc = 0 und σ : C → C eine Abbildung definiert durch
+σ(z) := az + b
+cz + d
+σ heißt Möbiustransformation.
+Proposition 4.9
+a) Die Gruppe SL
+2(R) operiert auf H durch die Möbiustransformation
+σ(z) :=
+a b
+c d
+ ◦ z := az + b
+cz + d
+b) Die Gruppe PSL
+2(R) = SL
+2(R)/
+(±I ) operiert durch σ auf H.
+c) PSL
+2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h.
+zu x
+0 < x
+1 < x
+∞ ∈ R gibt es genau ein σ ∈ PSL
+2(R) mit σ(x
+0) = 0, σ(x
+1) = 1,
+σ(x
+∞) = ∞.
+d) SL
+2(R) wird von den Matrizen
+
+λ 0
+0 λ−1
+
+
+=:A
+λ ,
+1 t
+0 1
+
+
+=:B
+t und
+ 0 1
+−1 0
+
+
+=:C mit t, λ ∈ R×
+erzeugt.
+e) PSL
+2(R) operiert auf G.
+Beweis:
+a) Sei z = x + i y ∈ H, d. h. y > 0 und σ =
+a b
+c d
+ ∈ SL
+2(R)
+⇒ σ(z) = a(x + i y) + b
+c(x + i y) + d
+= (ax + b) + i ay
+(cx + d) + i cy · (cx + d) − i cy
+(cx + d) − i cy
+= (ax + b)(cx + d) + aycy
+(cx + d)2
+ + (cy)2 + i ay(cx + d) − (ax + b)cy
+(cx + d)2
+ + (cy)2
+= axcx + axd + bcx + bd + aycy
+(cx + d)2
+ + (cy)2 + i (ad − bc)y
+(cx + d)2
+ + (cy)2
+SL
+2(R)
+= ac(x2
+ + y2
+) + adx + bcx + bd
+(cx + d)2
+ + (cy)2 + i y
+(cx + d)2
+ + (cy)2
+⇒ (σ(z)) = y
+(cx+d)2
++(cy)2 > 0
+Die Abbildung bildet also nach H ab. Außerdem gilt:
+
+1 0
+0 1
+ ◦ z = x + i y
+1 = x + i y = z
+ 4.3. HYPERBOLISCHE GEOMETRIE
+und
+
+a b
+c d
+ ◦
+a
+ b
+c
+ d
+ ◦ z
+ =
+a b
+c d
+ ◦ a
+z + b
+c
+z + d
+= a a
+z+b
+c
+z+d + b
+c a
+z+b
+c
+z+d + d
+= a(a
+z+b
+)+b(c
+z+d
+)
+c
+z+d
+c(a
+z+b
+)+d(c
+z+d
+)
+c
+z+d
+= a(a
+z + b
+) + b(c
+z + d
+)
+c(a
+z + b
+) + d(c
+z + d
+)
+= (aa
+ + bc
+)z + ab
+ + bd
+(ca
+ + db
+)z + cb
+ + dd
+=
+aa
+ + bc
+ ab
+ + bd
+ca
+ + db
+ cb
+ + dd
+ ◦ z
+=
+a b
+c d
+ ·
+a
+ b
+c
+ d
+ ◦ z
+b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL
+2(R) und z ∈ H.
+c) Ansatz: σ =
+a b
+c d
+ σ(x
+0) = ax
+0+b
+cx
+0+d !
+= 0 ⇒ ax
+0 + b = 0 ⇒ b = −ax
+0
+σ(x
+∞) = ∞ ⇒ cx
+∞ + d = 0 ⇒ d = −cx
+∞
+σ(x
+1) = 1 ⇒ ax
+1 + b = cx
+1 + d
+a(x
+1 − x
+0) = c(x
+1 − x
+∞) ⇒ c = a x
+1−x
+0
+x
+1−x
+∞
+⇒ −a2
+ · x
+∞ x
+1−x
+0
+x
+1−x
+∞ + a2
+x
+0 x
+1−x
+0
+x
+1−x
+∞ = 1
+⇒ a2 x
+1−x
+0
+x
+0−x
+∞ (x
+0 − x
+∞) = 1 ⇒ a2
+ = x
+1−x
+∞
+(x
+1−x
+∞)(x
+1−x
+0)
+d) Es gilt:
+ A−1
+λ = A
+ 1
+λ
+B −1
+t = B
+−t
+C −1
+ = C 3
+Daher genügt es zu zeigen, dass man mit A
+λ, B
+t und C alle Matrizen aus SL
+2(R)
+erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
+Matrizen der Form A
+λ, B
+t und C die Einheitsmatrix zu generieren.
+Sei also
+ M =
+a b
+c d
+ ∈ SL
+2(R)
+beliebig.
+Fall 1: a = 0
+Da M ∈ SL
+2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c = 0. Es
+folgt:
+
+ 0 1
+−1 0
+ ·
+a b
+c d
+ =
+ c d
+−a −b
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Gehe zu Fall 2.
+Fall 2: a = 0
+Nun wird in M durch M · A
+ 1
+a an der Stelle von a eine 1 erzeugt:
+
+a b
+c d
+ ·
+ 1
+a 0
+0 a
+ =
+ 1 ab
+c
+a ad
+Gehe zu Fall 3.
+Fall 3: a = 1
+
+1 b
+c d
+ ·
+1 −b
+0 1
+ =
+1 0
+c d − bc
+Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M
+2,2 = 1.
+Gehe zu Fall 4.
+Fall 4:
+ a = 1, b = 0, d = 1
+ A
+−1C B
+cC
+1 0
+c 1
+ =
+1 0
+0 1
+Daher erzeugen Matrizen der Form A
+λ, B
+t und C die Gruppe SL
+2R.
+e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
+• σ =
+λ 0
+0 λ−1
+, also σ(z) = λ2
+z. Daraus ergeben sich die Situationen, die in
+Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
+xy
+−1 0 1 2 3 4 5 6 70123
+ m λ2
+mm + irλ2
+m + iλ2
+r
+m + 1
+(a) Fall 1  xy
+−1 0 1 2 3 40123
+ z
+ x λ2
+ z
+λ2
+ x
+(b) Fall 2 (Strahlensatz)
+Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
+• Offensichtlich gilt die Aussage für σ =
+1 a
+0 1
+• Sei nun σ =
+ 0 1
+−1 0
+, also σ(z) = − 1
+z
+Bemerkung 69
+Zu hyperbolischen Geraden g
+1, g
+2 gibt es σ ∈ PSL
+2(R) mit σ(g
+1) = g
+2.
+ 4.3. HYPERBOLISCHE GEOMETRIE
+·
+ xy
+−1 0 101 z = r · eiϕ
+1
+z = 1
+r · eiϕ
+Abbildung 4.23: Inversion am Kreis
+Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a
+1) = b
+1 und σ(a
+2) = b
+2. Dann existiert
+σ(g
+1) := g
+2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
+Definition 65
+Seien z
+1, z
+2, z
+3, z
+4 ∈ C paarweise verschieden.
+Dann heißt
+ DV(z
+1, z
+2, z
+3, z
+4) := z
+1−z
+4
+z
+1−z
+2
+z
+3−z
+4
+z
+3−z
+2 = (z
+1 − z
+4) · (z
+3 − z
+2)
+(z
+1 − z
+2) · (z
+3 − z
+4)
+Doppelverhältnis von z
+1, . . . , z
+4.
+Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
+a) DV(z
+1, . . . , z
+4) ∈ C \ { 0, 1 }
+b) DV(z
+1, z
+4, z
+3, z
+2) = 1
+DV(z
+1,z
+2,z
+3,z
+4)
+c) DV(z
+3, z
+2, z
+1, z
+4) = 1
+DV(z
+1,z
+2,z
+3,z
+4)
+d) DV ist auch wohldefiniert, wenn eines der z
+i = ∞ oder wenn zwei der z
+i gleich sind.
+e) DV(0, 1, ∞, z
+4) = z
+4 (Der Fall z
+4 ∈ { 0, 1, ∞ } ist zugelassen).
+f ) Für σ ∈ PSL
+2(C) und z
+1, . . . , z
+4 ∈ C ∪ { ∞ } ist
+DV(σ(z
+1), σ(z
+2), σ(z
+3), σ(z
+4)) = DV(z
+1, z
+2, z
+3, z
+4)
+und für σ(z) = 1
+z gilt
+DV(σ(z
+1), σ(z
+2), σ(z
+3), σ(z
+4)) = DV(z
+1, z
+2, z
+3, z
+4)
+g) DV(z
+1, z
+2, z
+3, z
+4) ∈ R ∪ { ∞ } ⇔ z
+1, . . . , z
+4 liegen auf einer hyperbolischen Geraden.
+Beweis:
+a) DV(z
+1, . . . , z
+4) = 0, da z
+i paarweise verschieden
+DV(z
+1, . . . , z
+4) = 1, da:
+Annahme: DV(z
+1, . . . , z
+4) = 1
+⇔ (z
+1 − z
+2)(z
+3 − z
+4) = (z
+1 − z
+4)(z
+3 − z
+2)
+ 4.3. HYPERBOLISCHE GEOMETRIE
+⇔ z
+1z
+3 − z
+2z
+3 − z
+1z
+4 + z
+2z
+4 = z
+1z
+3 − z
+3z
+4 − z
+1z
+2 + z
+2z
+4
+⇔ z
+2z
+3 + z
+1z
+4 = z
+3z
+4 + z
+1z
+2
+⇔ z
+2z
+3 − z
+3z
+4 = z
+1z
+2 − z
+1z
+4
+⇔ z
+3(z
+2 − z
+4) = z
+1(z
+2 − z
+4)
+⇔ z
+3 = z
+1 oder z
+2 = z
+4
+Alle z
+i sind paarweise verschieden ⇒ Widerspruch
+b) DV(z
+1, z
+4, z
+3, z
+2) = (z
+1−z
+2)·(z
+3−z
+4)
+(z
+1−z
+4)·(z
+3−z
+2) = 1
+DV(z
+1,z
+2,z
+3,z
+4)
+c) DV(z
+3, z
+2, z
+1, z
+4) = (z
+3−z
+4)·(z
+1−z
+2)
+(z
+3−z
+2)·(z
+1−z
+4) = 1
+DV(z
+1,z
+2,z
+3,z
+4)
+d) Zwei der z
+i dürfen gleich sein, da:
+Fall 1 z
+1 = z
+4 oder z
+3 = z
+2
+In diesem Fall ist DV(z
+1, . . . , z
+4) = 0
+Fall 2 z
+1 = z
+2 oder z
+3 = z
+4
+Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z
+1, . . . , z
+4) = ∞ gilt.
+Fall 3 z
+1 = z
+3 oder z
+2 = z
+4
+Durch Einsetzen ergibt sich DV(z
+1, . . . , z
+4) = 1.
+Im Fall, dass ein z
+i = ∞ ist, ist entweder DV(0, 1, ∞, z
+4) = 0 oder DV(0, 1, ∞, z
+4) ± ∞
+e) DV(0, 1, ∞, z
+4) = (0−z
+4)·(∞−1)
+(0−1)·(∞−z
+4) = z
+4·(∞−1)
+∞−z
+4 = z
+4
+f ) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
+g) Sei σ ∈ PSL
+2(C) mit σ(z
+1) = 0, σ(z
+2) = 1, σ(z
+3) = ∞. Ein solches σ existiert, da man
+drei Parameter von σ wählen darf.
+Bem. 70.f
+⇒ DV(z
+1, . . . , z
+4) = DV(0, 1, ∞, σ(z
+4))
+⇒ DV(z
+1, . . . , z
+4) ∈ R ∪ { ∞ }
+⇔ σ(z
+4) ∈ R ∪ { ∞ }
+Behauptung folgt, weil σ−1
+(R ∪ ∞) ein Kreis oder eine Gerade in C ist.
+Definition 66
+Für z
+1, z
+2 ∈ H sei g
+z
+1,z
+2 die eindeutige hyperbolische Gerade durch z
+1 und z
+2 und a
+1, a
+2 die
+„Schnittpunkte“ von g
+z
+1,z
+2 mit R ∪ { ∞ }.
+Dann sei d
+H(z
+1, z
+2) := 1
+2 | ln DV(a
+1, z
+1, a
+2, z
+2)| und heiße hyperbolische Metrik.
+Beh.: Für z
+1, z
+2 ∈ H sei g
+z
+1,z
+2 die eindeutige hyperbolische Gerade durch z
+1 und z
+2 und a
+1, a
+2
+die „Schnittpunkte“ von g
+z
+1,z
+2 mit R ∪ { ∞ }.
+Dann gilt:
+ 1
+2 | ln DV(a
+1, z
+1, a
+2, z
+2)| = 1
+2 | ln DV(a
+2, z
+1, a
+1, z
+2)|
+Beweis: Wegen Bemerkung 70.c gilt:
+DV(a
+1, z
+1, a
+2, z
+2) = 1
+DV(a
+2, z
+1, a
+1, z
+2)
+Außerdem gilt:
+ ln 1
+x = ln x−1
+ = (−1) · ln x = − ln x
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Da der ln im Betrag steht, folgt direkt:
+1
+2 | ln DV(a
+1, z
+1, a
+2, z
+2)| = 1
+2 | ln DV(a
+2, z
+1, a
+1, z
+2)|
+Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelverhältnis
+ genutzt werden.
+Beh.: Die hyperbolische Metrik ist eine Metrik auf H.
+Beweis: Wegen Bemerkung 70.f ist
+d(z
+1, z
+2) := d(σ(z
+1), σ(z
+2)) mit σ(a
+1) = 0, σ(a
+2) = ∞
+d. h. σ(g
+z
+1,z
+2 ) = i R (imaginäre Achse).
+also gilt o. B. d. A. z
+1 = i a und z
+2 = i b mit a, b ∈ R und a < b.
+2d(i a, i b) =| ln DV(0, i a, ∞, i b) |
+=| ln (0 − i b)(∞ − i a)
+(0 − i a)(∞ − i b) |
+=| ln b
+a |
+= ln b − ln a
+Also: d(z
+1, z
+2) ≥ 0, d(z
+1, z
+2) = 0 ⇔ z
+1 = z
+2
+2d(z
+2, z
+1) =| ln DV(a
+2, z
+2, a
+1, z
+1) |
+=| ln DV(∞, i b, 0, i a) |
+Bem. 70.b
+= | ln DV(0, i b, ∞, i a) |
+= 2d(z
+1, z
+2)
+Liegen drei Punkte z
+1, z
+2, z
+3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z
+1, z
+3) =
+d(z
+1, z
+2) + d(z
+2, z
+3) (wenn z
+2 zwischen z
+1 und z
+3 liegt).
+Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die
+Vorlesung „Hyperbolische Geometrie“ verwiesen.
+Satz 4.10
+Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen
+Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt,
+aber Axiom §5 ist verletzt.
+ 4.3. HYPERBOLISCHE GEOMETRIE
+Übungsaufgaben
+Aufgabe 8
+Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels
+∠P QR ist der Winkel, der aus den Halbgeraden QP −
+ und QR−
+ gebildet wird. Die
+Nebenwinkel von ∠P QR sind die von QP +
+ und QR−
+ bzw. QP −
+ und QR+
+ gebildeten
+Winkel.
+Zeigen Sie:
+(a) Die beiden Nebenwinkel von ∠P QR sind gleich.
+(b) Der Winkel ∠P QR ist gleich seinem Scheitelwinkel.
+Aufgabe 9
+Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von
+Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y .
+Zeigen Sie:
+(a) Ist AB C ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die
+Winkel ∠AB C und ∠B C A gleich.
+(b) Ist AB C ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
+gegenüber und umgekehrt.
+(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit
+P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
+und der Schnittpunkt des Lots mit g heißt Lotfußpunkt .
+Aufgabe 10
+Seien f , g, h ∈ G und paarweise verschieden.
+Zeigen Sie: f  g ∧ g  h ⇒ f  h
+Aufgabe 11
+Beweise den Kongruenzsatz S S S .
+5 Krümmung
+Definition 67
+Sei f : [a, b] → Rn
+ eine eine Funktion aus C ∞
+. Dann heißt f Kurve.
+5.1 Krümmung von Kurven
+Definition 68
+Sei γ : I = [a, b] → Rn
+ eine Kurve.
+a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt:
+γ
+(t)
+2 = 1 ∀t ∈ I
+Dabei ist γ
+(t) = (γ
+1(t), γ
+2(t), . . . , γ
+n(t)).
+b) l(γ ) =
+ b
+a γ
+(t)dt heißt Länge von γ .
+Bemerkung 71 (Eigenschaften von Kurven I)
+Sei γ : I = [a, b] → Rn
+ eine C ∞
+-Funktion.
+a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ ) = b − a.
+b) Ist γ durch Bogenlänge parametrisiert, so ist γ
+(t) orthogonal zu γ
+(t) für alle t ∈ I .
+Beweis:
+a) l(γ ) =
+ b
+a γ
+(t)dt =
+ b
+a 1dt = b − a.
+b) Im Folgenden wird die Aussage nur für γ : [a, b] → R2
+ bewiesen. Allerdings funktioniert
+der Beweis im Rn
+ analog. Es muss nur die Ableitung angepasst werden.
+1 = γ
+(t) = γ
+(t)2
+ = γ
+(t), γ
+(t)
+⇒ 0 = d
+dt γ
+(t), γ
+(t)
+= d
+dt (γ
+1(t)γ
+1(t) + γ
+2(t)γ
+2(t))
+= 2 · (γ
+1 (t) · γ
+1(t) + γ
+2 (t) · γ
+2(t))
+= 2 · γ
+(t), γ
+(t)
+Definition 69
+Sei γ : I → R2
+ eine durch Bogenlänge parametrisierte Kurve.
+a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt:
+n(t), γ
+(t) = 0, n(t) = 1 und det((γ
+(t), n(t))) = +1
+  5.1. KRÜMMUNG VON KURVEN
+b) Seit κ : I → R so, dass gilt:
+ γ
+(t) = κ(t) · n(t)
+Dann heißt κ(t) Krümmung von γ in t.
+Da n(t) und γ
+(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t).
+Beispiel 45
+Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt:
+γ (t) =
+r · cos t
+r , r · sin t
+r
+ für t ∈ [0, 2πr]
+ist parametrisiert durch Bogenlänge, da gilt:
+γ
+(t) =
+(r · 1
+r )(− sin t
+r ), r 1
+r cos t
+r
+=
+− sin t
+r , cos t
+r
+Der Normalenvektor von γ in t ist
+n(t) =
+− cos t
+r , − sin t
+r
+da gilt:
+ n(t), γ
+(t) =
+− cos t
+r
+− sin t
+r
+ ,
+− sin t
+r
+cos t
+r
+= (− cos t
+r ) · (− sin t
+r ) + (− sin t
+r ) · (cos t
+r )
+= 0
+n(t) =
+
+
+(− cos t
+r , − sin t
+r )
+
+
+
+= (− cos t
+r )2
+ + (− sin t
+r )2
+= 1
+det(γ
+1(t), n(t)) =
+
+
+
+− sin t
+r − cos t
+r
+cos t
+r − sin t
+r
+
+
+
+= (− sin t
+r )2
+ − (− cos t
+r ) · cos t
+r
+= 1
+Die Krümmung ist für jedes t konstant 1
+r , da gilt:
+γ
+(t) =
+− 1
+r cos t
+r , − 1
+r sin t
+r
+= 1
+r ·
+− cos t
+r , − sin t
+r
+⇒ κ(t) = 1
+r
+  5.2. TANGENTIALEBENE
+Definition 70
+Sei γ : I → R3
+ eine durch Bogenlänge parametrisierte Kurve.
+a) Für t ∈ I heißt κ(t) := γ
+(t) die Krümmung von γ in t.
+b) Ist für t ∈ I die Ableitung γ
+(t) = 0, so heißt γ
+(t)
+γ
+(t) Normalenvektor an γ in t.
+c) b(t) sei ein Vektor, der γ
+(t), n(t) zu einer orientierten Orthonormalbasis von R3
+ ergänzt.
+Also gilt:
+ det(γ
+(t), n(t), b(t)) = 1
+b(t) heißt Binormalenvektor, die Orthonormalbasis
+
+ γ
+(t), n(t), b(t)
+heißt begleitendes Dreibein.
+Bemerkung 72 (Eigenschaften von Kurven II)
+Sei γ : I → R3
+ durch Bogenlänge parametrisierte Kurve.
+a) n(t) ist orthogonal zu γ
+(t).
+b) b(t) aus Definition 70.c ist eindeutig.
+5.2 Tangentialebene
+Erinnerung Sie sich an Definition 32 „reguläre Fläche“.
+Äquivalent dazu ist: S ist lokal von der Form
+V (f ) =
+ x ∈ R3
+
+ f (x) = 0
+für eine C ∞
+-Funktion f : R3
+ → R.
+Definition 71
+Sei S ⊆ R3
+ eine reguläre Fläche, s ∈ S , F : U → V ∩ S eine lokale Parametrisierung um
+s ∈ V :
+ (u, v) → (x(u, v), y(u, v), z(u, v))
+Für p = F −1
+(s) ∈ U sei
+ J
+F (p) = 
+ ∂x
+∂u (p) ∂x
+∂v (p)
+∂y
+∂u (p) ∂y
+∂v (p)
+∂z
+∂u (p) ∂z
+∂v (p)
+
+und D
+pF : R2
+ → R3
+ die durch J
+F (p) definierte lineare Abbildung.
+Dann heißt T
+sS := Bild(D
+pF ) die Tangentialebene an s ∈ S .
+Bemerkung 73 (Eigenschaften der Tangentialebene)
+a) T
+sS ist 2-dimensionaler Untervektorraum von R3
+.
+b) T
+sS =  ˜u, ˜v, wobei ˜u, ˜v die Spaltenvektoren der Jacobi-Matrix J
+F (p) sind.
+c) T
+sS hängt nicht von der gewählten Parametrisierung ab.
+  5.2. TANGENTIALEBENE
+d) Sei S = V (f ) eine reguläre Fläche in R3
+, also f : V → R eine C ∞
+-Funktion, V ⊆ R3
+offen, grad(f )(x) = 0 für alle x ∈ S .
+Dann ist T
+sS = (grad(f )(s))⊥
+ für jedes s ∈ S .
+Beweis:
+a) J
+F ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist
+eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein
+Vektorraum ist. Da Rg(J
+F ) = 2, ist auch dim(T
+sS ) = 2.
+b) Hier kann man wie in Punkt a) argumentieren
+c) T
+sS
+ = {x ∈ R3
+|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ (0) =
+s und γ
+(0) = x}
+Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
+d)
+ Sei x ∈ T
+sS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ
+(0) = s,
+sodass γ
+(0) = x gilt. Da γ (t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0
+⇒ 0 = (f ◦ γ )
+(0) = grad(f )(γ (0)), γ
+(0)
+⇒ T
+sS ⊆ grad(f )(s)⊥
+dim=2
+====⇒ T
+sS = (grad(f )(s))⊥
+Definition 72
+a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3
+ ist eine Abbildung n : S → S 2
+ ⊆
+R3
+ mit n(s) ∈ T
+sS ⊥
+ für jedes s ∈ S .
+b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt.
+Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden.
+Im Folgenden werden diese Begriffe jedoch synonym benutzt.
+Bemerkung 74 (Eigenschaften von Normalenfeldern)
+a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C ∞
+).
+b)
+ Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3
+ von s und eine lokale Parametrisierung
+F : U → V von S um s, sodass auf F (U ) = V ∩ S ein stetiges Normalenfeld existiert.
+c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen
+Parametrisierungen F
+i : U
+i → V
+i, i ∈ I gibt, sodass für alle i, j ∈ F und alle
+s ∈ V
+i ∩ V
+j ∩ S gilt:
+ det(D
+s V
+i→V
+j
+
+
+F
+j ◦ F −1
+i
+
+∈R3×3 ) > 0
+Beweis: Wird hier nicht geführt.
+Beispiel 46 (Normalenfelder)
+1) S = S 2
+, n
+1 = id
+S2 ist ein stetiges Normalenfeld.
+Auch n
+2 = −id
+S2 ist ein stetiges Normalenfeld.
+2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld,
+ aber kein stetiges Normalenfeld.
+  5.3. GAUSS-KRÜMMUNG
+Abbildung 5.1: Möbiusband
+5.3 Gauß-Krümmung
+Bemerkung 75
+Sei S eine reguläre Fläche, s ∈ S , n(s) ist ein Normalenvektor in s, x ∈ T
+sS , x = 1.
+Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3
+.
+Dann gibt es eine Umgebung V ⊆ R3
+ von s, sodass
+C := (s + E ) ∩ S ∩ V
+das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ (0) = s
+und γ
+(0) = x.
+Beweis: „Satz über implizite Funktionen“ 1
+Definition 73
+In der Situation aus Bemerkung 75 heißt die Krümmung κ
+γ (0) der Kurve γ in der Ebene
+(s + E ) im Punkt s die Normalkrümmung von S in s in Richtung x = γ
+(0).
+Man schreibt: κ
+Nor(s, x) := κ
+γ (0)
+Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt.
+Beispiel 47 (Gauß-Krümmung)
+1) S = S 2
+ = V (X 2
+ + Y 2
+ + Z 2
+ − 1) ist die Kugel um den Ursprung mit Radius 1, n = id,
+s = (0, 0, 1), x = (1, 0, 0)
+⇒ E = R · x + R · n(s) (x, z-Ebene)
+C = E ∩ S ist Kreislinie
+κ
+Nor(s, x) = 1
+r = 1
+2) S = V (X 2
+ + Z 2
+ − 1) ⊆ R3
+ ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0)
+x
+1 = (0, 1, 0) ⇒ E
+1 = R · e
+1 + R · e
+2 (x, y-Ebene)
+S ∩ E
+1 = V (X 2
+ + Y 2
+ − 1) ∩ E , Kreislinie in E
+⇒ κ
+Nor(s, x
+1) = ±1
+x
+2 = (0, 0, 1), E
+2 = R · e
+1 + R · e
+3 (x, z-Ebene)
+1
+Siehe z. B. https://github.com/MartinThoma/LaTeX- examples/tree/master/documents/Analysis%20II
+  5.3. GAUSS-KRÜMMUNG
+V ∩ E
+2 ∩ S =
+ (1, 0, z) ∈ R3
+
+ z ∈ R
+ ist eine Gerade
+⇒ κ
+Nor(s, x
+2) = 0
+3) S = V (X 2
+ − Y 2
+ − Z ), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
+x
+1 = (1, 0, 0), n(s) = (0, 0, 1)
+x
+2 = (0, 1, 0)
+κ
+Nor(s, x
+1) = 2
+κ
+Nor(s, x
+2) = −2
+−1.5 −1 −0.5 0 0.5 1 1.5
+−101012345
+ xyz
+ (a) S = V (X 2
+ + Z 2
+ − 1) −2 −1.5 −1 −0.5 0 0.5 1 1.5 2
+−2−1012−202
+ xyz
+ −4−2024f(x, y)
+ (b) S = V (X 2
+ − Y 2
+ − Z )
+Abbildung 5.2: Beispiele für reguläre Flächen
+Definition 74
+Sei S ⊆ R3
+ eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S .
+γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ (0) = s und
+γ
+(0) = 0.
+Sei n(0) := γ
+(0)
+γ
+(0) . Zerlege
+n(0) = n(0)t
+ + n(0)⊥
+ mit n(0)t
+ ∈ T
+sS und n(0)⊥
+ ∈ (T
+sS )⊥
+Dann ist n(0)⊥
+ = n(0), n(s) · n(s)
+κ
+Nor(s, γ ) := γ
+(0), n(s) die Normalkrümmung.
+Bemerkung 76
+Sei γ (t) = γ (−t), t ∈ [−ε, ε]. Dann ist κ
+Nor(s, γ ) = κ
+Nor(s, γ ).
+Beweis: γ
+(0) = γ
+(0), da γ
+(0) = −γ
+(0).
+Es gilt: κ
+Nor(s, γ ) hängt nur von |γ
+(0)| ab und ist gleich κ
+Nor(s, γ
+(0)).
+Bemerkung 77
+Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
+Sei T 1
+s S = { x ∈ T
+sS | x = 1 } ∼
+= S 1
+. Dann ist
+κn
+Nor(s) : T 1
+s S → R, x → κ
+Nor(s, x)
+eine glatte Funktion und Bild κn
+Nor(s) ist ein abgeschlossenes Intervall.
+Definition 75
+Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
+  5.3. GAUSS-KRÜMMUNG
+a) κn
+1 (s) : = min
+ κn
+Nor(s, x)
+
+ x ∈ T 1
+s S
+ und
+κn
+2 (s) : = max
+ κn
+Nor(s, x)
+
+ x ∈ T 1
+s S  heißen Hauptkrümmungen von S in s.
+b) K (s) := κn
+1 (s) · κn
+2 (s) heißt Gauß-Krümmung von S in s.
+Bemerkung 78
+Ersetzt man n durch −n, so gilt:
+κ−n
+Nor(s, x) = −κn
+Nor(x) ∀x ∈ T 1
+s S
+⇒ κ−n
+1 (s) = −κn
+2 (s)
+κ−n
+2 (s) = −κn
+1 (s)
+und K −n
+(s) = K n
+(s) =: K (s)
+Beispiel 48
+1) S = S 2
+. Dann ist κ
+1(s) = κ
+2(s) = ±1 ∀s ∈ S 2
+⇒ K (s) = 1
+2) Zylinder:
+κ
+1(s) = 0, κ
+2(s) = 1 ⇒ K (s) = 0
+3) Sattelpunkt auf hyperbolischem Paraboloid:
+κ
+1(s) < 0, κ
+2(s) = 0 → K (s) < 0
+4) S = Torus. Siehe Abbildung 5.3
+ s
+1s
+2
+s
+3
+Abbildung 5.3: K (s
+1) > 0, K (s
+2) = 0, K (s
+3) < 0
+Bemerkung 79
+Sei S eine reguläre Fläche, s ∈ S ein Punkt.
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+a) Ist K (s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T
+sS + s.
+b) Ist K (s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T
+sS + s.
+5.4 Erste und zweite Fundamentalform
+Sei S ⊆ R3
+ eine reguläre Fläche, s ∈ S , T
+sS die Tangentialebene an S in s und F : U → V eine
+lokale Parametrisierung von S um s. Weiter sei p := F −1
+(s).
+Definition 76
+Sei I
+S ∈ R2×2
+ definiert als
+I
+S : =
+g
+1,1(s) g
+1,2(s)
+g
+1,2(s) g
+2,2(s)
+ =
+E (s) F (s)
+F (s) G(s)
+mit g
+i,j = g
+s(D
+pF (e
+i), D
+pF (e
+j ))
+=  ∂ F
+∂ u
+i (p), ∂ F
+∂ u
+j (p) i, j ∈ { 1, 2 }
+Die Matrix I
+S heißt erste Fundamentalform von S bzgl. der Parametrisierung F .
+Bemerkung 80
+a)
+ Die Einschränkung des Standardskalarproduktes des R3
+ auf T
+sS macht T
+sS zu einem
+euklidischen Vektorraum.
+b) { D
+pF (e
+1), D
+pF (e
+2) } ist eine Basis von T
+sS .
+c) Bzgl. der Basis { D
+pF (e
+1), D
+pF (e
+2) } hat das Standardskalarprodukt aus Bemerkung
+ 80.a die Darstellungsmatrix I
+S .
+d) g
+i,j (s) ist eine differenzierbare Funktion von s.
+Bemerkung 81
+ det(I
+S ) =
+
+
+ ∂ F
+∂ u
+1 (p) × ∂ F
+∂ u
+2 (p)
+
+
+2
+Beweis: Sei ∂F
+∂u
+1 (p) = 
+x
+1
+x
+2
+x
+3
+
+ , ∂F
+∂u
+2 (p) = 
+ y
+1
+y
+2
+y3
+
+Dann ist ∂F
+∂u
+1 (p) × ∂F
+∂u
+2 (p) = 
+z
+1
+z
+2
+z
+3
+
+ mit
+ z
+1 = x
+2y
+3 − x
+3y
+2
+z
+2 = x
+3y
+1 − x
+1y
+3
+z
+3 = x
+1y
+2 − x
+2y
+1
+⇒  ∂ F
+∂ u
+1 (p) × ∂ F
+∂ u
+2 (p) = z2
+1 + z2
+2 + z2
+3
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+det(I
+S ) = g
+1,1g
+2,2 − g2
+1,2
+= 
+x
+1
+x
+2
+x
+3
+
+ , 
+x
+1
+x
+2
+x
+3
+ 
+y
+1
+y
+2
+y
+3
+
+ , 
+y
+1
+y
+2
+y
+3
+
+ − 
+x
+1
+x
+2
+x
+3
+
+ , 
+y
+1
+y
+2
+y
+3
+2
+= (x2
+1 + x2
+2 + x2
+3)(y2
+1 + y2
+2 + y2
+3 ) − (x
+1y
+1 + x
+2y
+2 + x
+3y
+3)2
+Definition 77
+a)
+ Das Differential dA =
+det(I )du
+1du
+2 heißt Flächenelement von S bzgl. der Parametrisierung
+ F .
+b) Für eine Funktion f : V → R heißt
+
+V f dA :=
+U f (F (u
+1, u
+2)
+
+
+=:s )
+det I (s)du
+1du
+2
+der Wert des Integrals von f über V , falls das Integral rechts existiert.
+Bemerkung 82
+a)
+V f dA ist unabhängig von der gewählten Parametrisierung.
+b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
+Dann ist
+S f dA wohldefiniert, falls (z. B.) S kompakt ist.
+Etwa:
+
+S f dA = n
+
+i=1
+V
+if dA
+−
+i=j
+V
+i∩V
+jf dA
++
+i,j,k
+V
+i∩V
+j ∩V
+kf dA
+− . . .
+Beweis:
+a) Mit Transformationsformel.
+b) Ist dem Leser überlassen.
+Proposition 5.1
+Sei S ⊆ R3
+ eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S 2
+.
+Dann gilt:
+a) n induziert für jedes s ∈ S eine lineare Abbildung d
+sn : T
+sS → T
+n(s)S 2
+ durch
+d
+sn(x) = d
+dt n(s„+“ tx
+
+Soll auf Fläche S bleiben)
+
+
+t=0
+Die Abbildung d
+sn heißt Weingarten-Abbildung
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+b) T
+n(s)S 2
+ = T
+sS .
+c) d
+sn ist ein Endomorphismus von T
+sS .
+d) d
+sn ist selbstadjungiert bzgl. des Skalarproduktes I
+S .
+Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt.
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+Beweis:
+a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
+b) T
+n(S)S 2
+ = n(s)⊥
+ = T
+sS
+c) Wegen Proposition 5.1 (a) ist d
+sn ein Homomorphismus.
+d) Zu zeigen: ∀x, y ∈ I
+sS : x, d
+sn(y) = d
+sn(x), y
+Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
+Basisvektoren zu zeigen.
+Sei x
+i = D
+pF (e
+i) = ∂F
+∂u
+i (p) i = 1, 2
+Beh.: x
+i, d
+sn(x
+j ) =  ∂2
+F
+∂u
+i∂u
+j (p), d
+sn(x
+i)
+⇒  ∂2
+F
+∂u
+i∂u
+j (p), d
+sn(x
+i) = x
+j , d
+sn(x
+i)
+Bew.: 0 =  ∂ F
+∂ u (p + te
+j ), n(p + te
+j )
+⇒ 0 = d
+dt
+ ∂ F
+∂ u (p + te
+j ), n(p + te
+j )
+
+
+
+t=0
+=  d
+dt ∂ F
+∂ u
+i (p + te
+j )
+
+∂2
+F
+∂u
+j ∂u
+i (p)
+
+
+t=0, n(s) + x
+i, d
+sn D
+pF (e
+j )
+
+x
+j
+Definition 78
+Die durch −d
+sn definierte symmetrische Bilinearform auf T
+sS heißt zweite Fundamentalform
+ von S in s bzgl. F .
+Man schreibt: I I
+s(x, y) = −d
+sn(x), y = I
+s(−d
+sn(x), y)
+Bemerkung 83
+Bezüglich der Basis { x
+1, x
+2 } von T
+sS hat I I
+s die Darstellungsmatrix
+(h(s)
+i,j )
+i,j=1,2 mit h
+i,j (s) =  ∂ 2
+F
+∂ u
+i∂ u
+j (p), n(s)
+Proposition 5.2
+Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ (0) = s. Dann gilt:
+κ
+Nor(s, γ ) = I I
+s(γ
+(0), γ
+(0))
+Beweis: Nach Definition 74 ist κ
+Nor(s, γ ) = γ
+(0), n(s). Nach Voraussetzung gilt
+n(γ (t)) ⊥ γ
+(t) ⇔ γ
+(0), n(s) = 0
+Die Ableitung nach t ergibt
+0 = d
+dt (n(γ (t)), γ
+(t))
+=
+ d
+dt n(γ (t))
+
+
+t=0, γ
+(0)
+ + n(s), γ
+(0)
+ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
+= d
+sn(γ
+(0)), γ
+(0) + κ
+Nor(s, γ )
+= −I I
+s(γ
+(0), γ
+(0)) + κ
+Nor(s, γ )
+Folgerung 5.3
+Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein:
+κ
+Nor(s, γ ) = κ
+Nor(s, γ
+(0))
+Satz 5.4
+Sei S ⊆ R3
+ eine reguläre, orientierbare Fläche und s ∈ S .
+a) Die Hauptkrümmungen κ
+1(s), κ
+2(s) sind die Eigenwerte von I I
+s.
+b) Für die Gauß-Krümmung gilt: K (s) = det(I I
+s)
+Beweis:
+a) I I
+s ist symmetrisch, I
+sS hat also eine Orthonormalbasis aus Eigenvektoren y
+1, y
+2 von
+I I
+s. Ist x ∈ T
+sS , x = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y
+1 + sin ϕ · y
+2.
+Seien λ
+1, λ
+2 die Eigenwerte von I I
+s, also I I
+s(y
+i, y
+i) = λ
+i. Dann gilt:
+I I
+s(x, x) = cos2
+ ϕλ
+1 + sin2
+ ϕλ
+2
+= (1 − sin2
+ ϕ)λ
+1 + sin2
+ ϕλ
+2
+= λ
+1 + sin2
+ ϕ(λ
+2 − λ
+1) ≥ λ
+1
+= cos2
+ ϕ + (1 − cos2
+ ϕ)λ
+2
+= λ
+2 − cos2
+ ϕ(λ
+2 − λ
+1) ≤ λ
+2
+Prop. 5.2
+=====⇒ λ
+1 = min
+ κ
+Nor(s, x)
+
+ x ∈ T 1
+s S
+λ
+2 = max
+ κ
+Nor(s, x)
+
+ x ∈ T 1
+s S
+Satz 5.5 (Satz von Gauß-Bonnet)
+Sei S ⊆ R3
+ eine kompakte orientierbare reguläre Fläche. Dann gilt:
+
+S K (s)dA = 2πχ(S )
+Dabei ist χ(S ) die Euler-Charakteristik von S .
+Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von
+Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden.
+Lösungen der Übungsaufgab en
+Lösung zu Aufgabe 1
+Teilaufgabe a) Es gilt:
+(i) ∅, X ∈ T
+X .
+(ii) T
+X ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U
+1, U
+2 ∈
+T
+X : U
+1 ∩ U
+2 ∈ T
+X .
+(iii)
+ Auch unter beliebigen Vereinigungen ist T
+X abgeschlossen, d. h. es gilt für eine
+beliebige Indexmenge I und alle U
+i ∈ T
+X für alle i ∈ I :
+i∈I U
+i ∈ T
+X
+Also ist (X, T
+X ) ein topologischer Raum.
+Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x = y und die einzige Umgebung von x
+ist X . Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden.
+(X, T
+X ) ist also nicht hausdorffsch.
+Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X, T
+X ) nach
+(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, T
+X )
+kein metrischer Raum sein kann.
+Lösung zu Aufgabe 2
+Teilaufgabe a)
+Beh.: ∀a ∈ Z : { a } ist abgeschlossen.
+Sei a ∈ Z beliebig. Dann gilt:
+Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de
+schicken.
+Teilaufgabe b)
+Beh.: { −1, 1 } ist nicht offen
+Bew.: durch Widerspruch
+Annahme: { −1, 1 } ist offen.
+Dann gibt es T ⊆ B, sodass
+M ∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele
+Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente
+⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { −1, 1 } ist
+nicht offen.
+Teilaufgabe c)
+Beh.: Es gibt unendlich viele Primzahlen.
+  Lösungen der Übungsaufgaben
+Bew.: durch Widerspruch
+Annahme: Es gibt nur endlich viele Primzahlen p ∈ P
+Dann ist
+ Z \ { −1, +1 } FS d. Arithmetik
+=
+p∈P U
+0,p
+endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich.
+Lösung zu Aufgabe 3
+(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form
+
+j∈J U
+j ×
+i∈N,i=j P
+i
+wobei J ⊆ N endlich und U
+j ⊆ P
+j offen ist.
+Beweis: Nach Definition der Produkttopologie bilden Mengen der Form
+
+i∈J U
+j ×
+i∈N\J P
+i
+wobei J ⊆ N endlich und U
+j ⊆ P
+j offen ∀j ∈ J eine Basis der Topologie.
+Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen
+Form.
+(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig.
+Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangskomponente
+ Z ⊆ P . Da Z zusammenhängend ist und ∀i ∈ I : p
+i : P → P
+i ist
+stetig, ist p
+i(Z ) ⊆ P
+i zusammenhängend für alle i ∈ N. Die zusammenhängenden
+Mengen von P
+i sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder
+p
+i(Z ) ⊆ { 0 } oder p
+i(Z ) ⊆ { 1 }. Es sei z
+i ∈ { 0, 1 } so, dass p
+i(Z ) ⊆ { z
+i } für
+alle i ∈ N. Dann gilt also:
+ p
+i(x)
+
+=x
+i = z
+i = p
+i(y)
+
+=y
+i ∀i ∈ N
+Somit folgt: x = y
+Lösung zu Aufgabe 4
+(a) Beh.: GL
+n(R) ist nicht kompakt.
+Bew.: det : GL
+n(R) → R \ { 0 } ist stetig. Außerdem ist det(GL
+n(R)) = R \ { 0 }
+nicht kompakt. 22
+⇒ GL
+n(R) ist nicht kompakt.
+(b) Beh.: SL
+1(R) ist nicht kompakt, für n > 1 ist SL
+n(R) kompakt.
+Bew.: Für SL
+1(R) gilt: SL
+1(R) =
+ A ∈ R1×1
+
+ det A = 1
+ =
+1
+ ∼
+= { 1 }. 22
+⇒ SL
+1(R)
+ist kompakt.
+  Lösungen der Übungsaufgaben
+SL
+n(R) ⊆ GL
+n(R) lässt sich mit einer Teilmenge des Rn2
+ identifizieren. Nach Satz 1.1
+sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere
+nun für für n ∈ N
+≥2, m ∈ N:
+ A
+m = diag
+n(m, 1
+m , . . . , 1)
+Dann gilt: det A
+m = 1, d. h. A
+m ∈ SL
+n(R), und A
+m ist unbeschränkt, da A
+m
+∞ =
+m −−−−→
+m→∞ ∞.
+(c) Beh.: P (R) ist kompakt.
+Bew.: P (R) ∼
+= S n
+/
+x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung
+ stetig. Da S n
+ als abgeschlossene und beschränkte Teilmenge des Rn+1
+ kompakt
+ist 22
+⇒ P (R) ist kompakt.
+Lösung zu Aufgabe 5
+Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden.
+Definition 79
+Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
+ϕ heißt Homomorphismus, wenn
+∀g
+1, g
+2 ∈ G : ϕ(g
+1 ∗ g
+2) = ϕ(g
+1) ◦ ϕ(g
+2)
+gilt.
+Es folgt direkt:
+1)
+ Sei X = R mit der Standarttopologie und ϕ
+1 : id
+R und R = (R, +). Dann ist ϕ
+1 ein
+Gruppenhomomorphismus und ein Homöomorphismus.
+2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ
+2 : G → H, x → x mod 3 ein
+Gruppenhomomorphismus. Jedoch ist ϕ
+2 nicht injektiv, also sicher kein Homöomorphismus.
+
+3) Sei X ein topologischer Raum. Dann ist id
+X ein Homöomorphismus. Da keine
+Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Gruppenhomomorphismus.
+
+Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten
+verwendet.
+Lösung zu Aufgabe 6
+Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf
+Seite 6.
+Definition 80
+Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
+ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist.
+Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen
+Sinn und ein Isomorphismus benötigt eine Gruppenstruktur.
+  Lösungen der Übungsaufgaben
+Lösung zu Aufgabe 7
+(a) Vor.: Sei M eine topologische Mannigfaltigkeit.
+Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend
+Beweis: „ ⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung
+direkt aus Bemerkung 23.
+„ ⇐“: Seien x, y ∈ M und
+Z := { z ∈ M | ∃Weg von x nach z }
+Es gilt:
+(i) Z = ∅, da M lokal wegzusammenhängend ist
+(ii) Z ist offen, da M lokal wegzusammenhängend ist
+(iii) Z C
+ := { ˜z ∈ M | Weg von x nach ˜z } ist offen
+Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ Z C
+ eine offene und
+wegzusammenhängende Umgebung U
+˜z ⊆ M .
+Es gilt sogar U
+˜z ⊆ Z C
+ , denn gäbe es ein U
+˜z  z ∈ Z , so gäbe es Wege γ
+2 :
+[0, 1] → M , γ
+2(0) = z, γ
+2(1) = x und γ
+1 : [0, 1] → M , γ
+1(0) = ˜z, γ
+1(1) = z.
+Dann wäre aber
+ γ : [0, 1] → M ,
+γ (x) =
+γ
+1(2x) falls 0 ≤ x ≤ 1
+2
+γ
+2(2x − 1) falls 1
+2 < x ≤ 1
+ein stetiger Weg von ˜z nach x ⇒ Widerspruch.
+Da M zusammenhängend ist und M = Z
+
+offen ∪ Z C
+
+offen , sowie Z = ∅ folgt Z C
+ = ∅.
+Also ist M = Z wegzusammenhängend.
+(b) Beh.: X ist wegzusammenhängend.
+Beweis: X := (R \ { 0 }) ∪ { 0
+1, 0
+2 } und (R \ { 0 }) ∪ { 0
+2 } sind homöomorph zu R.
+Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte
+0
+1 und 0
+2.
+Da (R \ { 0 }) ∪ { 0
+1 } homöomorph zu R ist, exisitert ein Weg γ
+1 von 0
+1 zu einem
+beliebigen Punkt a ∈ R \ { 0 }.
+Da (R \ { 0 }) ∪ { 0
+2 } ebenfalls homöomorph zu R ist, existiert außerdem ein
+Weg γ
+2 von a nach 0
+2. Damit existiert ein (nicht einfacher) Weg γ von 0
+1 nach
+0
+2.
+Lösung zu Aufgabe 9
+Vor.: Sei (X, d) eine absolute Ebene, A, B , C ∈ X und AB C ein Dreieck.
+  Lösungen der Übungsaufgaben
+(a) Beh.: AB ∼
+= AC ⇒ ∠AB C ∼
+= ∠AC B
+Bew.: Sei AB ∼
+= AC .
+⇒ ∃ Isometrie ϕ mit ϕ(B ) = C und ϕ(C ) = B und ϕ(A) = A.
+⇒ ϕ(∠AB C ) = ∠AC B
+⇒ ∠AB C ∼
+= ∠AC B
+(b) Beh.:
+ Der längeren Seite von AB C liegt der größere Winkel gegenüber und umgekehrt.
+
+Bew.: Sei d(A, C ) > d(A, B ). Nach §3 (i) gibt es C
+ ∈ AC +
+ mit d(A, C
+) = d(A, B )
+⇒ C
+ liegt zwischen A und C .
+Es gilt AB C
+ < AB C und aus Aufgabe 9 (a) folgt: AB C
+ = AC
+B .
+∠B C
+A ist ein nicht anliegender Außenwinkel zu ∠B C A Bem. 66
+=====⇒ B C
+A > B C A
+⇒ B C A < B C
+A = AB C
+ < AB C Sei umgekehrt AB C > B C A, kann
+wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B ) > d(A, C ) gelten.
+Wegen Aufgabe 9 (a) kann nicht d(A, B ) = d(A, C ) gelten.
+⇒ d(A, B ) < d(A, C )
+(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g
+Beh.: ∃! Lot
+Bew.:
+ ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden
+Halbebenen bzgl. g.
+⇒ ϕ(P )P schneidet g in F .
+Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g
+⇒ ϕ(P )P schneidet g in F .
+Sei A ∈ g \ { F }. Dann gilt ϕ(∠AF P ) = ∠AF ϕ(P ) = π ⇒ ∠AF P ist rechter Winkel.
+Gäbe es nun G ∈ g \ { F }, so dass P G weiteres Lot von P auf g ist, wäre P F G
+ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
+·
+ ·A
+ GP
+F
+ g
+Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P
+Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π
+⇒ G gibt es nicht.
+Lösung zu Aufgabe 10
+Sei f  h und o. B. d. A. f  g.
+f ∦ h ⇒ f ∩ h = ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
+zu g durch x, da x /∈ g. Diese ist f , da x ∈ f und f  g. Da aber x ∈ h, kann h nicht
+  Lösungen der Übungsaufgaben
+parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f = h). ⇒ g ∦ h
+Lösung zu Aufgabe 11
+Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem AB C und A
+B
+C
+Dreiecke, für die gilt:
+ d(A, B ) = d(A
+, B
+)
+d(A, C ) = d(A
+, C
+)
+d(B , C ) = d(B
+, C
+)
+Sei ϕ die Isometrie mit ϕ(A) = A
+, ϕ(B ) = B
+ und ϕ(C
+) liegt in der selben Halbebene
+bzgl. AB wie C . Diese Isometrie existiert wegen §4.
+Es gilt d(A, C ) = d(A
+, C
+) = d(ϕ(A
+), ϕ(C
+)) = d(A, ϕ(C
+)) und d(B , C ) = d(B
+, C
+) =
+d(ϕ(B
+), ϕ(C
+)) = d(B , ϕ(C
+)).
+Bem. 62
+=====⇒ C = ϕ(C ).
+Es gilt also ϕ(A
+B
+C
+) = AB C .
+Bildquellen
+Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt.
+Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert.
+Abb. 0.1a S 2
+: Tom Bombadil, tex.stackexchange.com/a/42865
+Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069
+Abb. 0.1e T 2
+: Jake, tex.stackexchange.com/a/70979/5645
+Abb. 1.6 Stereographische Pro jektion: texample.net/tikz/examples/map-pro jections
+Abb. 1.11 Knoten von Jim.belk aus der „ Blue knots“-Serie:
+– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png
+– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
+– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure- Eight_Knot.png
+– 6
+2-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
+Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3)
+Abb. 1.13
+ Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring.
+png
+Abb. 2.1
+ Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration.
+png
+Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014.
+Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
+Abb. 3.10 Überlagerung von S 1
+ mit R: Alex, tex.stackexchange.com/a/149706/5645
+Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint,
+commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png
+Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645
+Abb. 5.3 Krümmung des Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
+Abkürzungsverzeichnis
+Beh. Behauptung
+Bew. Beweis
+bzgl. bezüglich
+bzw. beziehungsweise
+ca. circa
+d. h. das heißt
+Def. Definition
+etc. et cetera
+ex. existieren
+Hom. Homomorphismus
+o. B. d. A. ohne Beschränkung der Allgemeinheit
+Prop. Proposition
+sog. sogenannte
+Vor. Voraussetzung
+vgl. vergleiche
+z. B. zum Beispiel
+zhgd. zusammenhängend
+z. z. zu zeigen
+Ergänzende Definitionen und Sätze
+Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle
+benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet,
+aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra
+und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen.
+Definition 81
+Sei D ⊆ R und x
+0 ∈ R. x
+0 heißt ein Häufungspunkt von D :⇔ ∃ Folge x
+n in D \ { x
+0 }
+mit x
+n → x
+0.
+Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra
+entnommen:
+Definition 82
+Es seien V und W K-Vektorräume und A(V ) und A(W ) die zugehörigen affinen Räume.
+Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ + µ = 1
+gilt:
+ f (λa + µb) = λf (a) + µf (b)
+Definition 83
+Sei V ein Vektorraum und S ⊆ V eine Teilmenge.
+S heißt eine Orthonormalbasis von V , wenn gilt:
+(i) S ist eine Basis von V
+(ii) ∀v ∈ S : v = 1
+(iii) ∀v
+1, v
+2 ∈ S : v
+1 = v
+2 ⇒ v
+1, v
+2 = 0
+Satz (Zwischenwertsatz)
+Sei a < b und f ∈ C [a, b] := C ([a, b]), weiter sei y
+0 ∈ R und f (a) < y
+0 < f (b) oder
+f (b) < y
+0 < f (a). Dann existiert ein x
+0 ∈ [a, b] mit f (x
+0) = y
+0.
+Definition 84
+Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung.
+v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f (v) = λv.
+Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f .
+Satz (Binomischer Lehrsatz)
+Sei x, y ∈ R. Dann gilt:
+ (x + y)n
+ = n
+
+k=0
+n
+k
+xn−k
+ yk
+ ∀n ∈ N
+0
+Definition 85
+Seien a, b ∈ R3
+ Vektoren.
+a × b := 
+a
+1
+b
+3
+a
+3
+
+ × 
+a
+1
+b
+3
+a
+3
+
+ = 
+a
+2b
+3 − a
+3b
+2
+a
+3b
+1 − a
+1b
+3
+a
+1b
+2 − a
+2b
+1
+
+Symb olverzeichnis
+Mengenoperationen
+Seien A, B und M Mengen.
+AC
+ Komplement von A
+P (M ) Potenzmenge von M
+M Abschluss von M
+∂ M Rand der Menge M
+M ◦
+ Inneres der Menge M
+A × B Kreuzprodukt
+A ⊆ B Teilmengenbeziehung
+A  B echte Teilmengenbeziehung
+A \ B Differenzmenge
+A ∪ B Vereinigung
+A ˙
+∪ B Disjunkte Vereinigung
+A ∩ B Schnitt
+Geometrie
+AB
+ Gerade durch die Punkte A und
+B
+AB Strecke mit Endpunkten A und B
+AB C Dreieck mit Eckpunkten A, B , C
+AB ∼
+= C D Die Strecken AB und C D sind
+isometrisch
+|K | Geometrische Realisierung des
+Simplizialkomplexes K
+Gruppen
+Sei X ein topologischer Raum und K ein Körper.
+
+Homöo(X ) Homöomorphismengruppe
+Iso(X ) Isometriengruppe
+GL
+n(K ) Allgemeine lineare Gruppe (von
+General Linear Group )
+SL
+n(K ) Spezielle lineare Gruppe
+PSL
+n(K ) Pro jektive lineare Gruppe Perm(X ) Permutationsgruppe
+Sym(X ) Symmetrische Gruppe
+Wege
+Sei γ : I → X ein Weg.
+[γ ] Homotopieklasse von γ
+γ
+1 ∗ γ
+2 Zusammenhängen von Wegen
+γ
+1 ∼ γ
+2 Homotopie von Wegen
+γ (x) Inverser Weg, also γ (x) := γ (1 − x)
+C Bild eines Weges γ , also C :=
+γ ([0, 1])
+Weiteres
+B Basis einer Topologie
+B
+δ (x) δ-Kugel um x
+S Subbasis einer Topologie
+T Topologie
+A Atlas
+P Pro jektiver Raum
+·, · Skalarprodukt
+X/
+∼ X modulo ∼
+[x]
+∼ Äquivalenzklassen von x bzgl. ∼
+x Norm von x
+|x| Betrag von x
+a Erzeugnis von a
+S n
+ Sphäre
+T n
+ Torus
+f ◦ g Verkettung von f und g
+π
+X Pro jektion auf X
+f |
+U f eingeschränkt auf U
+f −1
+(M ) Urbild von M
+Rg(M ) Rang von M
+χ(K ) Euler-Charakteristik von K
+  Symbolverzeichnis
+∆k
+ Standard-Simplex
+X #Y Verklebung von X und Y
+d
+n Lineare Abbildung aus Bemerkung
+ 37
+A ∼
+= B A ist isometrisch zu B
+f
+∗ Abbildung zwischen Fundamentalgruppen
+ (vgl. Seite 49)
+  Symbolverzeichnis
+Zahlenmengen
+N = { 1, 2, 3, . . . } Natürliche Zahlen
+Z = N ∪ { 0, −1, −2, . . . } Ganze Zahlen
+Q = Z ∪
+ 1
+2 , 1
+3 , 2
+3
+ =
+ z
+n mit z ∈ Z und n ∈ Z \ { 0 }
+ Rationale Zahlen
+R = Q ∪  √
+2, − 3√
+3, . . .
+ Reele Zahlen
+R
++ Echt positive reele Zahlen
+Rn
++,0 := { (x
+1, . . . , x
+n) ∈ Rn
+ | x
+n ≥ 0 } Halbraum
+R×
+ = R \ { 0 } Einheitengruppe von R
+C = { a + ib | a, b ∈ R } Komplexe Zahlen
+P = { 2, 3, 5, 7, . . . } Primzahlen
+H = { z ∈ C | z > 0 } obere Halbebene
+I = [0, 1]  R Einheitsintervall
+f : S 1
+ → R2
+ Einbettung der Kreislinie in die Ebene
+π
+1(X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X
+Fix(f ) Menge der Fixpunkte der Abbildung f
+ ·
+2 2-Norm; Euklidische Norm
+κ Krümmung
+κ
+Nor Normalenkrümmung
+V (f ) Nullstellenmenge von f 2
+Krümmung
+D
+pF : R2
+ → R3
+ Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
+T
+sS Tangentialebene an S ⊆ R3
+ durch s ∈ S
+d
+sn(x) Weingarten-Abbildung
+2
+von Vanishing Set
+Stichwortverzeichnis
+Abbildung
+affine, 107
+differenzierbare, 29
+homotope, 50
+offene, 53
+simpliziale, 35
+stetige, 9
+Abschluss, 3
+Abstand, 86
+Abstandsaxiom, 65
+Achterknoten, 20
+Aktion, siehe Gruppenoperation
+Anordnungsaxiome, 66
+Atlas, 24
+Außenwinkel, 70
+Axiom, 64
+Axiomensystem, 64
+Basis, 3
+Baum, 37
+Betti-Zahl, 41
+Bewegungsaxiom, 66
+Binormalenvektor, 89
+Cantorsches Diskontinuum, 22
+C k
+ -Struktur, 29
+Decktransformation, 59
+Decktransformationsgruppe, 59
+Deformationsretrakt, 47
+dicht, 3
+Diffeomorphismus, 29
+Dimension, 34
+diskret, 53
+Doppelverhältnis, 83
+Dreibein
+begleitendes, 89
+Ebene
+euklidische, 64
+Eigenvektor, 107
+Eigenwert, 107  einfach zusammenhängend, 49
+Einheitsnormalenfeld, 90
+Euler-Charakteristik, siehe Eulerzahl
+Eulersche Polyederformel, 38
+Eulerzahl, 36
+Färbbarkeit, 21
+Faser, siehe Urbild
+Fläche
+orientierbare, 90
+reguläre, 30
+Flächenelement, 95
+Formoperator, siehe Weingarten-Abbildung
+Fundamentalform
+erste, 94
+zweite, 97
+Fundamentalgruppe, 47
+Gauß-Krümmung, 92, 91–94
+Geometrie, 64
+Gerade, 64
+hyperbolische, 77
+Graph, 37
+Grenzwert, 8
+Gruppe
+allgemeine lineare, 22, 26
+spezielle lineare, 22
+topologische, 33
+Gruppe operiert durch Homöomorphismen,
+61
+Gruppenaktion, siehe Gruppenoperation
+Gruppenoperation, 60, 60–63
+stetige, 61
+Häufungspunkt, 107
+Hülle
+konvexe, 34
+Halbebene, 66
+Halbgerade, 65
+Halbraum, 28
+Hauptkrümmung, 92
+Hilbert-Kurve, 19, 19
+  Stichwortverzeichnis
+Homöomorphismengruppe, 10
+Homöomorphismus, 9
+Homologiegruppe, 41
+Homomorphismus, 101
+Homotopie, 44
+Homotopieklasse, 47
+Inklusionsabbildung, 47
+Innenwinkel, 70
+Inneres, 3
+Inzidenzaxiome, 64
+Isometrie, 6, 10
+Isometriegruppe, 10
+Isomorphismus, 101
+Isotopie, 20
+Jordankurve, 19
+geschlossene, 19
+Karte, 24
+Kartenwechsel, 28
+Kern
+offener, 3
+Kleeblattknoten, 20
+Klumpentopologie, siehe triviale Topologie
+Knoten, 20, 17–21
+äquivalente, 20
+trivialer, 20
+Knotendiagramm, 20
+kollinear, 65
+kongruent, siehe isometrisch
+Kongruenz, siehe Isometrie
+Kongruenzsatz
+SSS, 104
+SWS, 69
+SWW, 74
+WSW, 70
+Krümmung, 88, 89
+Kreis, 37
+Kreuzprodukt, 107
+Kurve, 87
+Länge einer, 87
+Lage
+allgemeine, 34
+Lehrsatz
+Binomischer, 107
+Lie-Gruppe, 33
+liegt zwischen, 65
+Liftung, 54
+Limes, 8  lokal, 3
+Lot, 86
+Lotfußpunkt, 86
+Möbiusband, 91
+Möbiustransformation, 80
+Mannigfaltigkeit, 24
+differenzierbare, 29
+geschlossene, 25
+glatte, 29
+mit Rand, 28
+Menge
+abgeschlossene, 2
+offene, 2
+zusammenhängende, 11
+Metrik, 6
+diskrete, 6
+hyperbolische, 84
+SNCF, 8
+Nebenwinkel, 86
+Neilsche Parabel, 27
+Normalenfeld, 90
+Normalenvektor, 87, 89
+Normalkrümmung, 91, 92, 98
+Oktaeder, 34
+Orthonormalbasis, 107
+Paraboloid
+hyperbolisches, 92
+Parallele, 66
+Parallelenaxiom, 64
+parametrisiert
+durch Bogenlänge, 87
+Parametrisierung
+reguläre, 30
+Polyzylinder, 17
+Produkttopologie, 4
+Pro jektion
+stereographische, 11
+Punkt, 34
+Quotiententopologie, 5, 10, 11
+Rand, 3, 28
+Raum
+hausdorffscher, 8
+kompakter, 14
+metrischer, 6
+pro jektiver, 5, 22, 25, 52
+  Stichwortverzeichnis
+topologischer, 2
+zusammenhängender, 11
+Realisierung
+geometrische, 34
+Retraktion, 47
+Satz von
+Gauß-Bonnet, 98
+Scheitelwinkel, 86
+Seite, 34
+Sierpińskiraum, 3, 22
+Simplex, 34
+Simplizialkomplex, 34
+Simplizialkomplexe
+flächengleiche, 74
+Sphäre
+exotische, 29
+Standard-Simplex, 34
+Standardtopologie, 2
+sternförmig, 48
+Stetigkeit, 9–11
+Strecke, 65
+Struktur
+differenzierbare, 29
+Subbasis, 3
+Tangentialebene, 89, 89–90
+Teilraum, 4
+Teilraumtopologie, 4
+Teilsimplex, 34
+Topologie
+diskrete, 2, 6
+euklidische, 2
+feinste, 11
+triviale, 2
+Zariski, 2, 12, 15
+Torus, iii, 5, 38, 51, 93
+Total Unzusammenhängend, 100
+Triangulierung, 38
+Überdeckung, 14
+Übergangsfunktion, siehe Kartenwechsel
+Überlagerung, 51, 51–60
+reguläre, 59
+universelle, 57
+Umgebung, 3
+Umgebungsbasis, 58
+vanishing set, 26
+Vektorprodukt, siehe Kreuzprodukt
+Verklebung, 26  verträglich, 29
+Würfel, 34
+Weg, 17
+einfacher, 17
+geschlossener, 17
+homotope, 44
+inverser, 48
+zusammengesetzter, 46
+Wegzusammenhang, 18
+Weingarten-Abbildung, 95
+Winkel, 70
+Zusammenhang, 11–14
+Zusammenhangskomponente, 13
+Zwischenwertsatz, 107
\ No newline at end of file
diff --git a/read/results/pymupdf/1601.03642.txt b/read/results/pymupdf/1601.03642.txt
index ab22ff4..6aeb67d 100644
--- a/read/results/pymupdf/1601.03642.txt
+++ b/read/results/pymupdf/1601.03642.txt
@@ -100,9 +100,9 @@ basic building blocks is a time-intensive and difﬁcult task.
 An important group of machine learning algorithms was
 inspired by biological neurons and are thus called artiﬁcial
 neural networks. Those networks are based on mathematical
-functions called artiﬁcial neurons which take n ∈ N num-
-bers x1, . . . , xn ∈ R as input, multiply them with weights
-w1, . . . , wn ∈ R, add them and apply a so called activation
+functions called artiﬁcial neurons which take n ∈N num-
+bers x1, . . . , xn ∈R as input, multiply them with weights
+w1, . . . , wn ∈R, add them and apply a so called activation
 function ϕ as visualized in Figure 1(a). One example of such
 an activation function is the sigmoid function ϕ(x) =
 1
diff --git a/read/results/pymupdf/1602.06541.txt b/read/results/pymupdf/1602.06541.txt
index f95bfd4..6d69bee 100644
--- a/read/results/pymupdf/1602.06541.txt
+++ b/read/results/pymupdf/1602.06541.txt
@@ -175,10 +175,10 @@ However, this can only support the explanation of
 particular problems or showcase special situation. For
 meaningful information about the overall accuracy, there
 are a couple of metrics how accuracy can be deﬁned.
-For this section, let k ∈ N be the number of classes,
-nij ∈ N0 with i, j ∈ 1, . . . , k be the number of pixels
+For this section, let k ∈N be the number of classes,
+nij ∈N0 with i, j ∈1, . . . , k be the number of pixels
 which belong to class i and were labeled as class j.
-(nij) is called a confusion matrix. Let ti = �k
+(nij) is called a confusion matrix. Let ti = Pk
 j=1 nij
 be the total number of pixels of class i.
 One way to compare segmentation algorithms is by
@@ -186,9 +186,9 @@ the pixel-wise accuracy of the predicted segmentation
 as done in many publications [SWRC06], [CP08],
 [LSD14]. This is also called per-pixel rate and de-
 ﬁned as
-�k
+Pk
 i=1 nii
-�k
+Pk
 i=1 ti . Taking the pixel-wise classiﬁcation
 accuracy has two major drawbacks:
 P1 Tasks like segmenting images for autonomous cars
@@ -209,29 +209,29 @@ car”
 Three accuracy metrics which do not suffer from
 problem P1 are used in [LSD14]:
 • mean accuracy: 1
-k · �k
+k · Pk
 i=1
 nii
-ti ∈ [0, 1]
+ti ∈[0, 1]
 • mean intersection over union:
 1
-k · �k
+k · Pk
 i=1
 nii
-ti−nii+�k
-j=1 nji ∈ [0, 1]
+ti−nii+Pk
+j=1 nji ∈[0, 1]
 • frequency
 weighted
 intersection
 over
 union:
-(�k
+(Pk
 i=1 ti)
-−1 �k
+−1 Pk
 i=1 ti ·
 nii
-ti−nii+�k
-j=1 nji ∈ [0, 1]
+ti−nii+Pk
+j=1 nji ∈[0, 1]
 Another problem might be pixels which cannot be
 assigned to one of the known classes. For this reason,
 [SWRC06] makes use of a void class. This class gets
@@ -473,7 +473,7 @@ an image is histogram equalization, which can be
 applied to improve contrast [PAA+87], [RM07].
 2) Histogram of oriented Gradients: Histogram of
 oriented gradients (HOG) features interpret the image
-as a discrete function I : N2 → { 0, . . . , 255 } which
+as a discrete function I : N2 →{ 0, . . . , 255 } which
 maps the position (x, y) to a color. For each pixel, there
 are two gradients: The partial derivative of x and y.
 Now the original image is transformed to two feature
@@ -687,8 +687,8 @@ D. SVMs
 SVMs are well-studied binary classiﬁers which can
 be described by ﬁve central ideas. For those ideas, the
 training data is represented as (xi, yi) where xi is the
-feature vector and yi ∈ { −1, 1 } the binary label for
-training example i ∈ { 1, . . . , m }.
+feature vector and yi ∈{ −1, 1 } the binary label for
+training example i ∈{ 1, . . . , m }.
 1) If data is linearly separable, it can be separated
 by a hyperplane. There is one hyperplane which
 maximizes the distance to the next datapoints
@@ -698,12 +698,12 @@ w,b
 1
 2∥w∥2
 s.t. ∀m
-i=1yi · (⟨w, xi⟩ + b)
-�
-��
-�
+i=1yi · (⟨w, xi⟩+ b)
+|
+{z
+}
 sgn applied to this gives the classiﬁcation
-≥ 1
+≥1
 2) Even if the underlying process which generates the
 features for the two classes is linearly separable,
 noise can make the data not separable. The intro-
@@ -711,7 +711,7 @@ duction of slack variables to relax the requirement
 of linear separability solves this problem. The
 trade-off between accepting some errors and a
 more complex model is weighted by a parameter
-C ∈ R+
+C ∈R+
 0 . The bigger C, the more errors are
 accepted. The new optimization problem is:
 minimize
@@ -719,13 +719,13 @@ w
 1
 2∥w∥2 + C ·
 m
-�
+X
 i=1
 ξi
 s.t. ∀m
-i=1yi · (⟨w, xi⟩ + b) ≥ 1 − ξi
-Note that 0 ≤ ξi ≤ 1 means that the data point
-is within the margin, whereas ξi ≥ 1 means it is
+i=1yi · (⟨w, xi⟩+ b) ≥1 −ξi
+Note that 0 ≤ξi ≤1 means that the data point
+is within the margin, whereas ξi ≥1 means it is
 misclassiﬁed. An SVM with C > 0 is also called
 a soft-margin SVM.
 3) The primal problem is to ﬁnd the normal vector
@@ -733,10 +733,10 @@ w and the bias b. The dual problem is to express
 w as a linear combination of the training data xi:
 w =
 m
-�
+X
 i=1
 αiyixi
-where yi ∈ { −1, 1 } represents the class of the
+where yi ∈{ −1, 1 } represents the class of the
 training example and αi are Lagrange multipliers.
 The usage of Lagrange multipliers is explained
 with some examples in [Smi04]. The usage of the
@@ -750,22 +750,22 @@ to [Bur98]:
 maximize
 αi
 m
-�
+X
 i=1
-αi − 1
+αi −1
 2
 m
-�
+X
 i=1
 m
-�
+X
 j=1
 αiαjyiyj⟨xi, xj⟩
 s.t. ∀m
-i=10 ≤ αi ≤ C
+i=10 ≤αi ≤C
 s.t.
 m
-�
+X
 i=1
 αiyi = 0
 
@@ -783,14 +783,14 @@ This function K is called a kernel. The idea of
 never explicitly transforming the vectors xi to the
 higher dimensional space is called the kernel trick.
 Common kernels include the polynomial kernel
-KP (xi, xj) = (⟨xi, xj⟩ + r)p
+KP (xi, xj) = (⟨xi, xj⟩+ r)p
 of degree p and coefﬁcient r, the Gaussian radial
 basis function (RBF) kernel
 KGauss(xi, xj) = e
 −γ∥xi−xj ∥2
 2σ2
 and the sigmoid kernel
-Ktanh(xi, xj) = tanh(γ⟨xi, xj⟩ − r)
+Ktanh(xi, xj) = tanh(γ⟨xi, xj⟩−r)
 where the parameter γ determines how much
 inﬂuence single training examples have.
 5) The described SVMs can only distinguish between
@@ -862,14 +862,14 @@ gets labeled as shown in Figure 3. For example, a MRF
 which is trained on images of the size 224 px×224 pixel
 and gets the raw RGB values as features has
 224 · 224 · 3
-�
-��
-�
+|
+{z
+}
 input
 + 224 · 224
-�
-��
-�
+|
+{z
+}
 output
 = 200 704
 random variables. Those random variables are condi-
@@ -891,18 +891,18 @@ typically live on 0, . . . , 255 or [0, 1].
 The probability of x, y can be expressed as
 P(x, y) = 1
 Z e−E(x,y)
-where Z = �
+where Z = P
 x,y e−E(x,y) is a normalization term
 called the partition function and E is called the energy
 function. A common choice for the energy function is
 E(x, y) =
-�
+X
 c∈C
 ψc(x, y)
 where ψ is called a clique potential. One choice for
 cliques of size two x, y = (x1, x2) is [KP06]
 ψc(x1, x2) = wδ(x1, x2) =
-�
+(
 +w
 if x1 ̸= x2
 −w
@@ -927,14 +927,14 @@ compared to MRFs is that no distribution assumption
 about x has to be made.
 A CRF has the partition function Z:
 Z(x) =
-�
+X
 y
 P(x, y)
 and joint probability distribution
 P(y|x) =
 1
 Z(x)
-�
+Y
 c∈C
 ψc(yc|x)
 The simplest way to deﬁne the clique potentials ψ is
@@ -981,7 +981,7 @@ function to the weighted sum and gives an output. Those
 neurons can take either a feature vector as input or the
 output of other neurons. In this way, they build up
 feature hierarchies.
-The parameters they learn are the weights w ∈ R.
+The parameters they learn are the weights w ∈R.
 They are learned by gradient descent. To do so, an error
 function — usually cross-entropy or mean squared error
 — is necessary. For the gradient descent algorithm, one
@@ -2274,7 +2274,7 @@ Classes
 Channels
 Data source
 Colon Crypt DB
-(302 px − 1116 px) × (349 px − 875 px)
+(302 px −1116 px) × (349 px −875 px)
 389
 2
 3
@@ -2286,19 +2286,19 @@ DIARETDB1
 3
 [KKV+14]
 KITTI Road
-(1226 px − 1242 px) × (370 px − 376 px)
+(1226 px −1242 px) × (370 px −376 px)
 289
 2
 3
 [FKG13]
 MSRCv1
-(213 px − 320 px) × (213 px − 320 px)
+(213 px −320 px) × (213 px −320 px)
 240
 9
 3
 [MSR]
 MSRCv2
-(213 px − 320 px) × (162 px − 320 px)
+(213 px −320 px) × (162 px −320 px)
 591
 23
 3
@@ -2310,13 +2310,13 @@ Open-CAS Endoscopic Datasets
 3
 [MHMK+14]
 PASCAL VOC 2012
-(142 px − 500 px) × ( 71 px − 500 px)
+(142 px −500 px) × ( 71 px −500 px)
 2913
 20
 3
 [EVGW+12]
 Warwick-QU
-(567 px − 775 px) × (430 px − 522 px)
+(567 px −775 px) × (430 px −522 px)
 165
 5
 3
diff --git a/read/results/pymupdf/1707.09725.txt b/read/results/pymupdf/1707.09725.txt
index f8bdbce..61fb43c 100644
--- a/read/results/pymupdf/1707.09725.txt
+++ b/read/results/pymupdf/1707.09725.txt
@@ -429,34 +429,34 @@ This chapter introduces linear image ﬁlters in Section 2.1, then standard laye
 CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3,
 transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5.
 2.1. Linear Image Filters
-A linear image ﬁlter (also called a ﬁlter bank or a kernel) is an element F ∈ Rkw×kh×d,
+A linear image ﬁlter (also called a ﬁlter bank or a kernel) is an element F ∈Rkw×kh×d,
 where kw represents the ﬁlter’s width, kh the ﬁlter’s height and d the number of input
-channels. The ﬁlter F is convolved with the image I ∈ Rw×h×d to produce a new image I′.
+channels. The ﬁlter F is convolved with the image I ∈Rw×h×d to produce a new image I′.
 The output image I′ has only one channel. Each pixel I′(x, y) of the output image gets
 calculated by point-wise multiplication of one ﬁlter element with one element of the original
 image I:
 I′(x, y) =
-⌊ kw
+⌊kw
 2 ⌋
-�
-ix=1−⌈ kw
+X
+ix=1−⌈kw
 2 ⌉
-⌊ kh
+⌊kh
 2 ⌋
-�
-iy=1−⌈ kh
+X
+iy=1−⌈kh
 2 ⌉
 d
-�
+X
 ic=1
 I(x + ix, y + iy, ic) · F(ix, iy, ic)
 This procedure is explained by Figure 2.1. It is essentially a discrete convolution.
-I ∈ R7×7
+I ∈R7×7
 Filter kernel
-F ∈ R3×3
+F ∈R3×3
 Result of point-wise
 multiplication
-I′ ∈ R7×7
+I′ ∈R7×7
 104
 116
 116
@@ -591,7 +591,7 @@ high-level features which are combinations of the low-level features.
 Also, models should utilize the fact that the pixels of images are ordered. One way to use
 this is by learning image ﬁlters in so called convolutional layers.
 While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature
-map is a matrix m ∈ Rw×h, but typically the width equals the height (w = h). For an RGB
+map is a matrix m ∈Rw×h, but typically the width equals the height (w = h). For an RGB
 4
 
 2.2. CNN Layer Types
@@ -609,16 +609,16 @@ the linear convolutions are the parameters which are adapted to the training dat
 number n of ﬁlters as well as the ﬁlter’s size kw × kh are hyperparameters of convolutional
 layers. Sometimes, it is denoted as n@kw × kh. Although the ﬁlter depth is usually omitted
 in the notation, the ﬁlters are of dimension kw × kh × d(i−1), where d(i−1) is the number of
-feature maps of the input layer (i − 1).
-Another hyperparameter of convolution layers is the stride s ∈ N≥1 and the padding.
+feature maps of the input layer (i −1).
+Another hyperparameter of convolution layers is the stride s ∈N≥1 and the padding.
 Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the
 size of the feature maps doesn’t change.
 The hyperparameters of convolutional layers are
-• the number of ﬁlters n ∈ N≥1,
-• kw, kh ∈ N≥1 of the ﬁlter size kw × kh × d(i−1),
+• the number of ﬁlters n ∈N≥1,
+• kw, kh ∈N≥1 of the ﬁlter size kw × kh × d(i−1),
 • the activation function of the layer (see Table B.3) and
-• the stride s ∈ N≥1
-Typical choices are n ∈ { 32, 64, 128 }, kw = kh = k ∈ { 1, 3, 5, 11 } such as in [KSH12,
+• the stride s ∈N≥1
+Typical choices are n ∈{ 32, 64, 128 }, kw = kh = k ∈{ 1, 3, 5, 11 } such as in [KSH12,
 SZ14, SLJ+15], rectiﬁed linear unit (ReLU) activation and s = 1.
 The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+89].
 With weight sharing, the ﬁlters can be learned with stochastic gradient descent (SGD) just
@@ -631,28 +631,28 @@ if only the ﬂattened output is compared.
 This is easier to see when the ﬁltering operation is denoted formally:
 o(i)(x) = b +
 k
-�
+X
 j=1
 wij · xj
-with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d }
+with i ∈{ 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d }
 [2.1]
 o(x,y,z)(I) = b +
-⌊ kw
+⌊kw
 2 ⌋
-�
-ix=1−⌈ kw
+X
+ix=1−⌈kw
 2 ⌉
-⌊ kh
+⌊kh
 2 ⌋
-�
-iy=1−⌈ kh
+X
+iy=1−⌈kh
 2 ⌉
 d
-�
+X
 ic=1
 Fz(ix, iy, ic) · I(x + ix, y + iy, ic)
 [2.2]
-with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d }
+with a bias b ∈R, x ∈{ 1, . . . , w } , y ∈{ 1, . . . , h } and z ∈{ 1, . . . , d }
 One can see that most weights of the equivalent MLP are zero and many weights are
 equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters.
 The eﬀect of fewer parameters is that less training data is necessary to get suitable
@@ -698,7 +698,7 @@ Another insight recently got important: Every fully connected layer has an equiv
 convolutional layer which has the same weights.2 This way, one can use the complete
 classiﬁcation network as a very complex non-linear image ﬁlter which can be used for
 semantic segmentation.
-A fully connected layer with d ∈ N≥1 inputs and n ∈ N≥1 nodes can be interpreted as a
+A fully connected layer with d ∈N≥1 inputs and n ∈N≥1 nodes can be interpreted as a
 convolutional layer with an input of shape 1 × 1 × d and n ﬁlters of size 1 × 1. This will
 produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs.
 When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize
@@ -708,8 +708,8 @@ omitted if a convolution layer without padding and a ﬁlter size equal to the f
 size is applied. This was used by [LSD15].
 2.2.2. Pooling Layers
 Pooling summarizes a p × p area of the input feature map. Just like convolutional layers,
-pooling can be used with a stride of s ∈ N>1. As s ≥ 2 is the usual choice, pooling layers
-are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as
+pooling can be used with a stride of s ∈N>1. As s ≥2 is the usual choice, pooling layers
+are sometimes also called subsampling layers. Typically, p ∈{ 2, 3, 4, 5 } and s = 2 such as
 for AlexNet [KSH12] and VGG-16 [SZ14].
 The type of summary for the set of activations A varies between the functions listed
 in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling
@@ -722,26 +722,26 @@ Name
 Deﬁnition
 Used by
 Max pooling
-max { a ∈ A }
+max { a ∈A }
 [BPL10, KSH12]
 Average / mean pooling
 1
 |A|
-�
+P
 a∈A a
 LeNet-5 [LBBH98] and [KSlB+10]
 ℓ2 pooling
-��
+pP
 a∈A a2
 [Le13]
 Stochastic pooling
 *
 [ZF13]
-Table 2.1.: Pooling types for a set A of activations a ∈ R.
+Table 2.1.: Pooling types for a set A of activations a ∈R.
 (*) For stochastic pooling, each of the p×p activation values ai in the pooling region gets
 picked with probability pi =
 ai
-�
+P
 aj ∈A aj . This assumes the activations ai are non-negative.
 Pooling is applied for three reasons: To get local translational invariance, to get invariance
 against minor local changes and, most important, for data reduction to
@@ -834,13 +834,13 @@ Dropout is a technique used to prevent overﬁtting and co-adaptations of neuron
 the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is
 well-described in [SHK+14].
 A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of
-the same shape D ∈ { 0, 1 }s is sampled, where each element di is sampled independently
+the same shape D ∈{ 0, 1 }s is sampled, where each element di is sampled independently
 from a Bernoulli distribution. The results are element-wise multiplied to calculate the
 output out of the Dropout layer:
-out = D ⊙ in
-with di ∼ B(1, p)
-where ⊙ is the Hadamard product
-(A ⊙ B)i,j := (A)i,j(B)i,j
+out = D ⊙in
+with di ∼B(1, p)
+where ⊙is the Hadamard product
+(A ⊙B)i,j := (A)i,j(B)i,j
 Hence every value of the input gets set to zero with a dropout probability of p. Typically,
 Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob-
 ability than later layers. In order to keep the expected output at the same value, the
@@ -871,23 +871,23 @@ One way to approach this problem is by normalizing mini-batches as described in
 Batch Normalization layer with d-dimensional input x = (x(1), . . . , x(d)) is ﬁrst normalized
 point-wise to
 ˆx(k) =
-x(k) − ¯x(k)
-�
+x(k) −¯x(k)
+p
 s′[x(k)]2 + ε
 with ¯x(k) =
 1
 m
-�m
+Pm
 i=1 x(k)
 i
 being the sample mean and s′[x(k)]2 =
 1
 m
-�m
+Pm
 i=1(x(k)
 i
-− ¯x(k)) the
-sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0
+−¯x(k)) the
+sample variance where m ∈N≥1 is the number of training samples per mini-batch, ε > 0
 being a small constant to prevent division by zero and x(k)
 i
 is the activation of neuron k for
@@ -899,7 +899,7 @@ In the case of fully connected layers, this is applied to the activation, before
 is applied. If it is applied after the activation, it harms the training in early stages. For
 convolution, only one γ and one β is learned per feature map.
 One important special case is γ(k) =
-�
+p
 s′[x(k)]2 + ε and β(k) = ¯x(k), which would make the
 Batch Normalization layer an identity layer.
 During evaluation time,3 the expected value and the variance are calculated once for the
@@ -907,10 +907,10 @@ complete dataset. An unbiased estimate of the empirical variance is used.
 The question where Batch Normalization layers (BN) should be applied and for which
 reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the
 activation function. Considering this, the possible options for the order are:
-1. CONV / FC → BN → activation function → Dropout → . . .
-2. CONV / FC → activation function → BN → Dropout → . . .
-3. CONV / FC → activation function → Dropout → BN → . . .
-4. CONV / FC → Dropout → BN → activation function → . . .
+1. CONV / FC →BN →activation function →Dropout →. . .
+2. CONV / FC →activation function →BN →Dropout →. . .
+3. CONV / FC →activation function →Dropout →BN →. . .
+4. CONV / FC →Dropout →BN →activation function →. . .
 The authors of [IS15] suggest to use Batch Normalization before the activation function
 as in Items 1 and 4. Batch Normalization after the activation lead to better results in
 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md
@@ -953,7 +953,7 @@ Image source: [HZRS15a]
 Two common ways to add more parameters to neural networks are increasing their depth
 by adding more layers or increasing their width by adding more neurons / ﬁlters. Inception
 blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as
-“ResNeXt block”: Increasing the cardinality C ∈ N≥1. By cardinality, the authors describe
+“ResNeXt block”: Increasing the cardinality C ∈N≥1. By cardinality, the authors describe
 the concept of having C small convolutional networks with the same topology but diﬀerent
 weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not
 combine aggregation blocks with residual blocks as the authors did.
@@ -976,7 +976,7 @@ the same topology, the learned weights are diﬀerent. The outputs of the groups
 concatenated.
 The hyperparameters of an aggregation block are:
 • The topology of the group members.
-• The cardinality C ∈ N≥1. Note that a cardinality of C = 1 is equivalent in every
+• The cardinality C ∈N≥1. Note that a cardinality of C = 1 is equivalent in every
 aspect to using the group network without an aggregation block.
 12
 
@@ -1063,31 +1063,31 @@ are not covered by the training set and thus indicate the need to collect more d
 
 2. Convolutional Neural Networks
 2.5.2. Confusion Matrices
-A confusion matrix is a matrix (c)ij ∈ NK×K
+A confusion matrix is a matrix (c)ij ∈NK×K
 ≥0
-, where K ∈ N≥2 is the number of classes,
+, where K ∈N≥2 is the number of classes,
 which contains all correct and wrong classiﬁcations. The item cij is the number of times
 items of class i were classiﬁed as class j. This means the correct classiﬁcation is on the
-diagonal cii and all wrong classiﬁcations are of the diagonal. The sum �K
+diagonal cii and all wrong classiﬁcations are of the diagonal. The sum PK
 i=1
-�K
+PK
 j=1 cij is the
 total number of samples which were evaluated and
-�
+P
 i=1 cii
-�K
+PK
 i=1
-�K
+PK
 j=1 cij is the accuracy.
-The sums r(i) = �K
+The sums r(i) = PK
 j=1 cij of each class i are worth being investigated as they show if the
 classes are skewed. If the number of samples of one class dominates the data set, then the
 classiﬁer can get a high accuracy by simply always prediction the most common class. If
 the accuracy of the classiﬁer is close to the a priory probability of the most common class,
 techniques to deal with skewed classes might help.
 An automatic criterion to check for this problem is
-accuracy ≤ max({ r(i) | i = 1, . . . , k })
-�k
+accuracy ≤max({ r(i) | i = 1, . . . , k })
+Pk
 i=1 r(i)
 + ε
 where ε is a small value to compensate the fact that some examples might be correct just
@@ -1096,20 +1096,20 @@ Other values which should be checked are the class-wise sensitivities:
 s(k) = # correctly identiﬁed instances of class k
 # instances of class k
 = ckk
-r(k) ∈ [0, 1]
+r(k) ∈[0, 1]
 If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is
 necessary for s(i).
 The class-wise confusion
 fconfusability(k1, k2) =
 ck1k2
-�K
+PK
 j=1 ck1j
 indicates if class k1 gets often classiﬁed as class k2. The highest values here can indicate
 if two classes should be merged or a specialized model for separating those classes could
 improve the overall system.
 2.5.3. Validation Curves: Accuracy, loss and other metrics
 Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal
-axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are
+axis and a quality metric on the vertical axis. Accuracy, error = (1 −accuracy) or loss are
 typical quality metrics. Other quality metrics can be found in [OHIL16].
 In case that the number of training epochs are used as the examined hyperparameter,
 validation curves give an indicator if training longer improves the model’s performance. By
@@ -1138,7 +1138,7 @@ Error
 Training set
 Validation set
 Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs
-and the quality metric is the error (1 − accuracy). The longer the network is trained,
+and the quality metric is the error (1 −accuracy). The longer the network is trained,
 the better it gets on the training set. At some point the network is ﬁt too well to the
 training data and loses its capability to generalize. At this point the quality curve of
 the training set and the validation set diverge. While the classiﬁer is still improving on
@@ -1153,40 +1153,40 @@ real value to a complex event like the predicted class of a feature vector. It i
 the objective function. For classiﬁcation problems the loss function is typically cross-entropy
 with ℓ1 or ℓ2 regularization, as it was described in [NH92]:
 ECE(W) = −
-�
+X
 x∈X
 K
-�
+X
 k=1
 [tx
 k log(ox
-k) + (1 − tx
-k) log(1 − ox
+k) + (1 −tx
+k) log(1 −ox
 k)]
-�
-��
-�
+|
+{z
+}
 cross-entropy data loss
 + λ1 ·
 ℓ1
-� �� �
-�
+z }| {
+X
 w∈W
 |w| +λ2 ·
 ℓ2
-� �� �
-�
+z }| {
+X
 w∈W
 w2
-�
-��
-�
+|
+{z
+}
 model complexity loss
-where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and
+where W are the weights, X is the training data set, K ∈N≥0 is the number of classes and
 tx
 k indicates if the training example x is of class k. ox
 k is the output of the classiﬁcation
-algorithm which depends on the weights. λ1, λ2 ∈ [0, ∞) weights the regularization and is
+algorithm which depends on the weights. λ1, λ2 ∈[0, ∞) weights the regularization and is
 typically smaller than 0.1.
 17
 
@@ -1218,15 +1218,15 @@ Quality criteria
 There are several quality criteria for classiﬁcation models. Most quality criteria are based
 the confusion matrix c which denotes at cij the number of times the real class was i and j
 was predicted. This means the diagonal contains the number of correct predictions. For
-the following, let ti = �k
+the following, let ti = Pk
 j=1 cij be the number of training samples for class i. The most
 common quality criterion is accuracy:
 accuracy(c) =
-�k
+Pk
 i=1 cii
-�k
+Pk
 i=1 ti
-∈ [0, 1]
+∈[0, 1]
 One problem of accuracy as a quality criterion are skewed classes. If one class is by far
 more common than all other classes, then the simplest way to achieve a high score is to
 always classify everything as the most common class.
@@ -1234,11 +1234,11 @@ In order to ﬁx this problem, one can use the mean accuracy:
 mean-accuracy(c) = 1
 k ·
 k
-�
+X
 i=1
 cii
 ti
-∈ [0, 1]
+∈[0, 1]
 For two-class problems there are many other metrics like precision, recall and Fβ-score.
 Quality criteria for semantic segmentation are explained in [Tho16].
 Besides the quality of the classiﬁcation result, several other quality criteria are important
@@ -1335,7 +1335,7 @@ Gradient-based approaches
 In [SVZ13], a gradient-based approach was used to generate image-speciﬁc class saliency
 maps. The authors describe the problem as a ranking problem, where each pixel of the
 image I0 is assigned a score Sc(I0) for a class c of interest. CNNs are non-linear functions,
-but they can be approximated by the ﬁrst order Taylor expansion Sc(I) ≈ wT I + b where
+but they can be approximated by the ﬁrst order Taylor expansion Sc(I) ≈wT I + b where
 w is the derivative of Sc at I0.
 21
 
@@ -1385,7 +1385,7 @@ max
 (x,y)∈{−k,...,k}2\(0,0)
 ⟨Wi, T(Wj, x, y)⟩f
 ∥Wi∥2 ∥Wj∥2
-∈ [−1, 1],
+∈[−1, 1],
 where T(·, x, y) denotes the translation of the ﬁrst operand by (x, y), with zero padding at
 the borders to keep the shape. ⟨·, ·⟩f denotes the ﬂattened inner product, where the two
 operands are ﬂattened into column vectors before applying the standard inner product. The
@@ -1398,7 +1398,7 @@ is deﬁned as
 ¯ρk(W) = 1
 N
 N
-�
+X
 i=1
 N
 max
@@ -1481,17 +1481,16 @@ connected to the output nodes.
 4. Correlation Maximization: Train the weights of the candidates by maximizing S,
 the correlation between candidates output value V with the networks residual error:
 S =
-�
+X
 o∈O
-������
-�
+
+X
 p∈T
-�
-Vp − ¯V
-�
-(Ep,o − ¯
+ Vp −¯V
+
+(Ep,o −¯
 Eo)
-������
+
 where O is the set of output nodes, T is the training set, Vp is the candidate neurons
 activation for a training pattern p. Ep,o is the residual output error at node o for
 pattern p. ¯V and ¯
@@ -1510,7 +1509,7 @@ maximization whereas the white squares are trainable weights.
 Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where
 weights are deterministic and ﬁxed at prediction time, each weight wij in Meiosis networks
 follows a normal distribution:
-wij ∼ N(µij, σ2
+wij ∼N(µij, σ2
 ij)
 28
 
@@ -1520,12 +1519,14 @@ ij.
 The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell
 division. A node j is splitted, when the random part dominates the value of the sampled
 weights:
-�
-�i σij
+P
+i σij
+P
 i µij
 > 1 and
-�
-�k σjk
+P
+k σjk
+P
 k µjk
 > 1
 The mean of the new nodes is sampled around the old mean, half the variance is assigned
@@ -1534,7 +1535,7 @@ Hence Meiosis networks only change the number of neurons per layer. They do not
 layers or add skip connections.
 3.1.3. Automatic Structure Optimization
 Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on-
-line handwriting recognition. It makes use of the confusion matrix C = (cij) ∈ Nk×k
+line handwriting recognition. It makes use of the confusion matrix C = (cij) ∈Nk×k
 ≥0
 (see Section 2.5.2) to guide the topology learning. They deﬁne a confusion-symmetry matrix
 S with sij = sji = cij · cji. The maximum of S deﬁnes where the ASO algorithm adds
@@ -1560,7 +1561,7 @@ of parameter k the saliency sk. The parameters with the lowest saliency are dele
 means they are set to 0 and are not updated anymore.
 A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights
 in a much better way. This requires, however, to calculate the inverse Hessian matrix
-H−1 ∈ Rn×n where n ∈ N is typically n > 106.
+H−1 ∈Rn×n where n ∈N is typically n > 106.
 A much simpler and computationally cheaper pruning criterion is the weight magnitude.
 [HPTD15] prunes all weights w which are below a threshold θ:
 w ←
@@ -1568,7 +1569,7 @@ w ←
 
 
 w
-if w ≥ θ
+if w ≥θ
 0
 otherwise
 3.3. Genetic approaches
@@ -1678,7 +1679,7 @@ can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS
 OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral
 clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of
 them do not allow a human to improve the found clustering manually.
-The confusion matrix (c)ij ∈ Nk×k states how often class i was present and class j was
+The confusion matrix (c)ij ∈Nk×k states how often class i was present and class j was
 34
 
 4.2. Clustering classes
@@ -1698,12 +1699,12 @@ Hence the order of the classes is permutated in such a way that the highest erro
 to the diagonal. One possible objective function to be minimized is
 f(C) =
 n
-�
+X
 i=1
 n
-�
+X
 j=1
-Cij · |i − j|
+Cij · |i −j|
 [4.1]
 which punishes errors linearly with the distance to the diagonal. This method is called CMO
 in the following.
@@ -1736,7 +1737,7 @@ confusions are not made and thus many elements of the confusion matrix are close
 Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1).
 Once a permutation of the classes is found which has a low score Equation (4.1), the clusters
 can either be made by hand by deciding why classes should not be in one clusters. With
-such a permutation, only n − 1 binary decisions have to be made and hence only the list of
+such a permutation, only n −1 binary decisions have to be made and hence only the list of
 classes has to be read. Alternatively, one can calculate the confusions C′
 i,i+1 + C′
 i+1,i for
@@ -1772,7 +1773,7 @@ are grouped to 20 superclasses. It includes animals, people, plants, outdoor sce
 and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain
 the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16].
 GTSRB (German Traﬃc Sign Recognition Benchmark) is a 43-class dataset of traﬃc signs.
-The 51 839 images are in color and of a minimum size of 25 px× 25 px up to 266 px× 232 px.
+The 51 839 images are in color and of a minimum size of 25 px×25 px up to 266 px×232 px.
 The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11].
 According to [SSSI], human performance is at 98.84 %.
 HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images
@@ -1813,7 +1814,7 @@ Early stopping [Pre98] with the validation accuracy as a stopping criterion and
 patience of 10 epochs. Kernel weights are initialized according to the uniform initialization
 scheme of He [HZRS15b] (see Appendix B.3).
 The architecture of the baseline model uses a pattern of
-Conv-Block(n) = (Convolution − Batch Normalization − Activation)n − Pooling
+Conv-Block(n) = (Convolution −Batch Normalization −Activation)n −Pooling
 The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for
 the last layer where softmax is used. Before the last two convolutional layer, a dropout
 layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1.
@@ -1969,7 +1970,7 @@ BN + Softmax
 k
 @ 1 ×
 1
-�
+P
 515k
 +892 512
 1032k
@@ -2008,7 +2009,7 @@ Dropout, p = 0.5
 C k@1 × 1/1
 Global AVG pooling
 BN + Softmax
-Figure 5.1.: Architecture of the baseline model. C 32@3× 3/1 is a convolutional layer with 32 ﬁlters
+Figure 5.1.: Architecture of the baseline model. C 32@3×3/1 is a convolutional layer with 32 ﬁlters
 of kernel size 3 × 3 with stride 1.
 39
 
@@ -2164,7 +2165,7 @@ The distribution of ﬁlter weights by layer is visualized in Figure 5.2 and the
 of bias weights by layer is shown in Figure 5.3. Although both ﬁgures only show the
 distribution for one speciﬁc model trained on CIFAR-100, the following observed patterns
 are consistent for 70 models (7 datasets and 10 models per dataset):
-• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the
+• The empiric [0.5 −percentile, 99.5 −percentile] interval which contains 99 % of the
 ﬁlter weights is almost symmetric around zero. The same is true for the bias weights.
 • The farther a layer is from the input away, the smaller the 99-percentile interval is,
 except for the last layer (see Table A.1).
@@ -2215,7 +2216,7 @@ trained on CIFAR-100.
 5. Experimental Evaluation
 Figure 5.6.: The distribution of the range of values (max - min) of ﬁlters by channel and layer. For
 each ﬁlter, the range of values is recorded by channel. The smaller this range is, the
-less information is lost if a n × n ﬁlter is replaced by a (n − 1) × (n − 1) ﬁlter.
+less information is lost if a n × n ﬁlter is replaced by a (n −1) × (n −1) ﬁlter.
 44
 
 5.1. Baseline Model and Training setup
@@ -2354,7 +2355,7 @@ viewed in electronic form.
 CIFAR-100 has pre-deﬁned coarse classes. Those are used as a ground truth for the clusters
 which are to be found. The number of errors is determined by (i) Join all n clusters which
 contain the classes of the coarse class C to a set M. The error is n. (ii) Within M, ﬁnd the
-set of classes M− which do not belong to C. (iii) The ﬁnal error is n + |M−|. As can be
+set of classes M−which do not belong to C. (iii) The ﬁnal error is n + |M−|. As can be
 seen in Table 5.4, both clustering methods ﬁnd reasonable clusters. CMO, however, has
 only half the error of spectral clustering.
 The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be
@@ -2424,7 +2425,7 @@ B, B
 B, B
 0
 C
-C, c, ⊂ and C , ξ, E and C
+C, c, ⊂and C , ξ, E and C
 4
 C, c, ⊂, C and C
 1
@@ -2454,9 +2455,9 @@ K, κ
 K, κ
 0
 L
-L, ⌊ and L, L
+L, ⌊and L, L
 1
-L, ⌊ and L, L
+L, ⌊and L, L
 1
 M
 M and M and M
@@ -2469,14 +2470,14 @@ N and N, N and N
 N and N, N and N, ℵ
 3
 O
-O, O, 0, ◦, °, � and o
+O, O, 0, ◦, °, # and o
 1
-O, O, 0, ◦, ° and � and o
+O, O, 0, ◦, ° and # and o
 2
 P
 P, P and p, ρ and P and ℘
 3
-P and P, P, ℘ and p, ρ
+P and P, P, ℘and p, ρ
 2
 Q
 Q, Q, Q, ι, ⊔, ≳, ℓ, ℑ, Æ, 1
@@ -2494,12 +2495,12 @@ S, s, S
 S, s, S
 0
 T
-T, ⊤ and T , τ
+T, ⊤and T , τ
 1
-T, ⊤ and T , τ
+T, ⊤and T , τ
 1
 U
-U, ∪ and u, U, A
+U, ∪and u, U, A
 1
 U, u, U, A and ∪
 2
@@ -2783,11 +2784,10 @@ feature maps of layer i where i = 0 is the input layer and all ﬁlters are 3 ×
 a bias, then the number of parameters is
 Parameters =
 k
-�
+X
 i=1
-�
-(ni−1 · 32 + 1) · ni
-�
+ (ni−1 · 32 + 1) · ni
+
 Hence the width of one layer does not only inﬂuence the parameters in this layer, but also
 in the next layer.
 The number of possible subsequent layers of one feature map size is enormous, even if
@@ -2890,7 +2890,7 @@ Batch Normalization.
 
 5.9. Batch size
 5.9. Batch size
-The mini-batch size m ∈ N≥1 inﬂuences
+The mini-batch size m ∈N≥1 inﬂuences
 • Epochs until convergence: The smaller m, the more often the model is updated
 in one epoch. Those updates, however, are based on fewer samples of the dataset.
 Hence the gradients of diﬀerent mini-batches can noticeably diﬀer. In the literature,
@@ -3033,28 +3033,28 @@ As expected, PReLU and ELU performed best. Unexpected was that the logistic func
 tanh and softplus performed worse than the identity and it is unclear why the pure-softmax
 network performed so much better than the logistic function. One hypothesis why the
 logistic function performs so bad is that it cannot produce negative outputs. Hence the
-logistic− function was developed:
+logistic−function was developed:
 logistic−(x) =
 1
-1 + e−x − 0.5
-The logistic− function has the same derivative as the logistic function and hence still suﬀers
-from the vanishing gradient problem. The network with the logistic− function achieves an
+1 + e−x −0.5
+The logistic−function has the same derivative as the logistic function and hence still suﬀers
+from the vanishing gradient problem. The network with the logistic−function achieves an
 accuracy which is 11.30 % better than the network with the logistic function, but is still
 5.54 % worse than the ELU.
 Similarly, ReLU was adjusted to have a negative output:
-ReLU−(x) = max(−1, x) = ReLU(x + 1) − 1
-The results of ReLU− are much worse on the training set, but perform similar on the test
+ReLU−(x) = max(−1, x) = ReLU(x + 1) −1
+The results of ReLU−are much worse on the training set, but perform similar on the test
 61
 
 5. Experimental Evaluation
 set. The result indicates that the possibility of hard zero and thus a sparse representation
 is either not important or similar important as the possibility to produce negative outputs.
 This contradicts [GBB11, SMGS14].
-A key diﬀerence between the logistic− function and ELU is that ELU does neither suﬀers
+A key diﬀerence between the logistic−function and ELU is that ELU does neither suﬀers
 from the vanishing gradient problem nor is its range of values bound. For this reason, the
 S2ReLU activation function, deﬁned as
 S2ReLU(x) = ReLU(x
-2 + 1) − ReLU(−x
+2 + 1) −ReLU(−x
 2 + 1) =
 
 
@@ -3065,11 +3065,11 @@ S2ReLU(x) = ReLU(x
 
 
 
-− x
+−x
 2 + 1
-if x ≤ −2
+if x ≤−2
 x
-if − 2 ≤ x ≤ 2
+if −2 ≤x ≤2
 x
 2 + 1
 if x > −2
@@ -3428,8 +3428,8 @@ why they improve the test accuracy is by reducing the variance.
 The idea of label smoothing is to use the ensemble prediction of the training data as labels
 for another classiﬁer. For every element x of the training set, the one-hot encoded target
 t(x) is smoothed by the ensemble prediction yE(x)
-t′(x) = α · t(x) + (1 − α)yE(x)
-where α ∈ [0, 1] is the smoothing factor.
+t′(x) = α · t(x) + (1 −α)yE(x)
+where α ∈[0, 1] is the smoothing factor.
 There are three reasons why label smoothing could be beneﬁcial:
 • Training speed: The ensemble prediction contains more information about the
 image than binary class decisions. Classiﬁers in computer vision predict how similar
@@ -3616,7 +3616,7 @@ BN + Softmax
 k
 @ 1 ×
 1
-�
+P
 514k
 +947 654
 520k
@@ -4087,35 +4087,35 @@ trained on CIFAR-100.
 Figure A.2.: The distribution of bias weights of a model without batch normalization trained on
 CIFAR-100.
 Algorithm 1 Simulated Annealing for minimizing Equation (4.1).
-Require: C ∈ Nn×n, steps ∈ N, T ∈ R+, c ∈ (0, 1)
+Require: C ∈Nn×n, steps ∈N, T ∈R+, c ∈(0, 1)
 procedure SimulatedAnnealing(C, steps, T, c)
-bestScore ← accuracy(C)
-bestC ← C
-for i = 0; i < steps; i ← i + 1 do
-p ← randomFloat(0, 1)
+bestScore ←accuracy(C)
+bestC ←C
+for i = 0; i < steps; i ←i + 1 do
+p ←randomFloat(0, 1)
 if p < 0.5 then
-▷ Swap rows
-i ← randomInteger(1, . . . , n)
-j ← randomInteger(1, . . . , n) \ { i }
-p ← randomUniform(0, 1)
-C′ ← swap(C, i, j)
-s ← accuracy(C′)
-if p < exp( s−bestScore
+▷Swap rows
+i ←randomInteger(1, . . . , n)
+j ←randomInteger(1, . . . , n) \ { i }
+p ←randomUniform(0, 1)
+C′ ←swap(C, i, j)
+s ←accuracy(C′)
+if p < exp(s−bestScore
 T
 ) then
-C ← C′
+C ←C′
 if s > bestScore then
-bestScore ← s
-bestC ← C
-T ← T · c
+bestScore ←s
+bestC ←C
+T ←T · c
 else
-▷ Move Block
-s ← randomInteger(1, . . . , n)
-▷ Block start
-e ← randomInteger(s, . . . , n)
-▷ Block end
-i ← randomInteger(1, . . . , n − (e − s))
-▷ Block insert position
+▷Move Block
+s ←randomInteger(1, . . . , n)
+▷Block start
+e ←randomInteger(s, . . . , n)
+▷Block end
+i ←randomInteger(1, . . . , n −(e −s))
+▷Block insert position
 Move Block (s, . . . , e) to position i
 return bestM
 76
@@ -4364,10 +4364,10 @@ Vertical ﬂip
 2
 [DWD15]1
 Rotation
-∼ 40 (δ = 20)
+∼40 (δ = 20)
 [DSRB14]
 Scaling
-∼ 14 (δ ∈ [0.7, 1.4])
+∼14 (δ ∈[0.7, 1.4])
 [DSRB14]
 Crops
 322 = 1024
@@ -4377,16 +4377,16 @@ Shearing
 GANs
 [BCW+17]
 Brightness
-∼ 20 (δ ∈ [0.5, 1.5])
+∼20 (δ ∈[0.5, 1.5])
 [How13]
 Hue
 51 (δ = 0.1)
 [MRM15, DSRB14]
 Saturation
-∼ 20 (δ = 0.5)
+∼20 (δ = 0.5)
 [DSRB14]
 Contrast
-∼ 20 (δ ∈ [0.5, 1.5])
+∼20 (δ ∈[0.5, 1.5])
 [How13]
 Channel shift
 [KSH12]
@@ -4402,13 +4402,13 @@ Less common, but also reasonable are:
 • Color casting (used by [WYS+15])
 • Vignetting (used by [WYS+15])
 • Lens distortion (used by [WYS+15])
-1Vertical ﬂipping combined with 180◦ rotation is equivalent to horizontal ﬂipping
+1Vertical ﬂipping combined with 180◦rotation is equivalent to horizontal ﬂipping
 80
 
 B.3. Initialization
 Weight initializations are usually chosen to be small and centered around zero. One way to
 characterize many initialization schemes is by
-w ∼ α · U[−1, 1] + β · N(0, 1) + γ with α, β, γ ≥ 0
+w ∼α · U[−1, 1] + β · N(0, 1) + γ with α, β, γ ≥0
 Table B.2 shows six commonly used weight initialization schemes. Several schemes use the
 same idea, that unit-variance is desired for each layer as the training converges faster [IS15].
 Name
@@ -4419,11 +4419,11 @@ Reference
 Constant
 α = 0
 β = 0
-γ ≥ 0
+γ ≥0
 used by [ZF14]
 Xavier/Glorot uniform
 α =
-�
+q
 6
 nin+nout
 β = 0
@@ -4432,10 +4432,10 @@ nin+nout
 Xavier/Glorot normal
 α = 0
 β =
-�
+
 2
 (nin+nout)
-�2
+2
 γ = 0
 [GB10]
 He
@@ -4455,7 +4455,7 @@ LSUV
 —
 γ = 0
 [MM15]
-Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N(0, 1) + γ.
+Table B.2.: Weight initialization schemes of the form w ∼α · U[−1, 1] + β · N(0, 1) + γ.
 nin, nout are the number of units in the previous layer and the next layer. Typically,
 biases are initialized with constant 0 and weights by one of the other schemes to prevent
 unit-coadaptation. However, dropout makes it possible to use constant initialization for
@@ -4464,46 +4464,46 @@ LSUV and Orthogonal initialization cannot be described with this simple pattern.
 B.4. Objective function
 For classiﬁcation tasks, the cross-entropy
 ECE(W) = −
-�
+X
 x∈X
 K
-�
+X
 k=1
 [tx
 k log(ox
-k) + (1 − tx
-k) log(1 − ox
+k) + (1 −tx
+k) log(1 −ox
 k)]
 is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation,
 X is the set of training examples, K is the number of classes, tx
-k ∈ { 0, 1 } indicates if the
+k ∈{ 0, 1 } indicates if the
 training example x is of class k, ox
 k is the output of the classiﬁer for the training example x
 and class k.
-However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added:
+However, regularization terms weighted with a constant λ ∈(0, +∞) are sometimes added:
 • LASSO: ℓ1 (e.g., used in [HPTD15])
 • Weight decay: ℓ2 (e.g., λ = 0.0005 as in [MSM16])
-• Orthogonality regularization (|(W T · W − I)|, see [VTKP17])
+• Orthogonality regularization (|(W T · W −I)|, see [VTKP17])
 81
 
 B.5. Optimization Techniques
 Most relevant optimization techniques for CNNs are based on SGD, which updates the
 weights according to the rule
-wji ← wji + ∆wji with ∆wji = −η ∂Ex
+wji ←wji + ∆wji with ∆wji = −η ∂Ex
 ∂wji
-where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate.
+where η ∈(0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate.
 A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically
-mini-batch sizes are |B| ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes
+mini-batch sizes are |B| ∈{ 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes
 lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes
 lead to longer training times due to computational overhead and to more training steps due
 to gradient noise.
-wji ← wji + ∆wji with ∆wji = −η∂EB
+wji ←wji + ∆wji with ∆wji = −η∂EB
 ∂wji
 Nine variations which adjust the learning rate during training are:
 • Momentum:
 w(t+1)
 ji
-← w(t)
+←w(t)
 ji + ∆w(t+1)
 ji
 with ∆w(t+1)
@@ -4512,17 +4512,17 @@ ji
 ∂wji
 + α∆w(t)
 ji
-with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16])
+with α ∈[0, 1], typically 0.9 (e.g., [ZF14, MSM16])
 • Adagrad [DHS11]
 • RProp and the mini-batch version RMSProp [TH12]
 • Adadelta [Zei12]
-• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)−c, where t ∈ N0 is the training step,
+• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)−c, where t ∈N0 is the training step,
 a, c are constants.
 • Performance Scheduling [SHY+13]: Measure the error on the cross validation set and
 decrease the learning rate when the algorithms improvement is below a threshold.
-• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10− t
-k where t ∈ N0 is the
-training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps
+• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10−t
+k where t ∈N0 is the
+training step, η(0) is the initial learning rate, k ∈N≥1 is the number of training steps
 until the learning rate is decreased by
 1
 10th.
@@ -4566,7 +4566,7 @@ Sign function†
 
 
 +1
-if x ≥ 0
+if x ≥0
 −1
 if x < 0
 { −1, 1 }
@@ -4632,8 +4632,8 @@ ELU
 
 x
 if x > 0
-α(ex − 1)
-if x ≤ 0
+α(ex −1)
+if x ≤0
 (−∞, +∞)
 
 
@@ -4646,13 +4646,13 @@ otherwise
 Softmax‡
 o(x)j =
 exj
-�K
+PK
 k=1 exk
 [0, 1]K
 o(x)j ·
-�K
+PK
 k=1 exk−exj
-�K
+PK
 k=1 exk
 [KSH12, Tho14a]
 Maxout‡
@@ -4668,7 +4668,7 @@ otherwise
 [GWFM+13]
 Table B.3.: Overview of activation functions. Functions marked with † are not diﬀerentiable at 0
 and functions marked with ‡ operate on all elements of a layer simultaneously. The
-hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other
+hyperparameters α ∈(0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other
 activation function like randomized leaky ReLUs exist [XWCL15], but are far less
 commonly used.
 Some functions are smoothed versions of others, like the logistic function for the
@@ -4700,7 +4700,7 @@ y
 ϕ2(x) = tanh(x)
 ϕ3(x) = max(0, x)
 ϕ4(x) = log(ex + 1)
-ϕ5(x) = max(x, ex − 1)
+ϕ5(x) = max(x, ex −1)
 Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative
 numbers. The image of ELU, ReLU and Softplus is not bound on the positive side,
 whereas tanh and the logistic function are always below 1.
@@ -4729,8 +4729,8 @@ has ki · ki−1(n · m + 1) parameters. The +1 is due to the bias.
 • A fully connected layer with n nodes after k feature maps of size m1 × m2 has
 n · (k · m1 · m2 + 1) parameters.
 • A dense block with a depth of L, a growth rate of n and 3 × 3 ﬁlters has L + n · 32 +
-32 · n2 �L
-i=0(L − i) = L + 9n + 9n2 L2−L
+32 · n2 PL
+i=0(L −i) = L + 9n + 9n2 L2−L
 2
 parameters.
 According to [HPTD15], AlexNet has 60 million parameters which is roughly the number
@@ -4741,13 +4741,13 @@ the following number are only giving rough estimates.
 In the following, nϕ denotes the number of FLOPs to compute the non-linearity ϕ. For
 simplicity, nϕ = 5 was chosen.
 • A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with
-W ∈ Rn×k, x ∈ Rk×1, b ∈ Rn×1. It hence needs about n · (k + (k − 1) + 1) = 2nk
+W ∈Rn×k, x ∈Rk×1, b ∈Rn×1. It hence needs about n · (k + (k −1) + 1) = 2nk
 additions / multiplications before the non-linearity ϕ is calculated. The total number
 of FLOPs is 2 · n · k + n · nϕ.
 • In the following, biases are ignored. A convolutional layer with ki ﬁlters of size n × m
 being applied to ki−1 ﬁlter maps of size w × h results in ki ﬁlter maps of size w × h if
 padding is applied. For each element of each ﬁlter map, n·m·ki−1 multiplications and
-(n · m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1) · (ki · w · h)
+(n · m · ki−1 −1) additions have to be made. This results in (2nmki−1 −1) · (ki · w · h)
 operations. The total number of FLOPs is (2·n·m·ki−1 −1)·(ki ·w ·h)+ki ·w ·h·nϕ.
 This is, of course, a naive way of calculating a convolution. There are other ways of
 calculating convolutions [LG16].
@@ -4864,7 +4864,7 @@ Fully Connected (output)
 850
 1 730
 10
-�
+P
 61 710
 15 144 446
 9118
@@ -4879,7 +4879,7 @@ The ﬁrst CNN which achieved major improvements on the ImageNet dataset was Ale
 Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 param-
 eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet.
 Note that the uncompressed size is at least 60 965 224 ﬂoats · 32 bit
-ﬂoat ≈ 244 MB.
+ﬂoat ≈244 MB.
 Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed
 by pooling layers multiple times. At the end, a fully connected network is applied.
 Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1).
@@ -4976,7 +4976,7 @@ FC
 4 097 000
 8 M
 1000
-�
+P
 60 965 224
 3300 M
 1 122 568
@@ -5193,7 +5193,7 @@ FC
 4 097 000
 8 M
 1000
-�
+P
 138 357 544
 31 000 M
 15 245 800
@@ -5287,7 +5287,7 @@ Dropout (p=0.8)
 Softmax
 1 537 000
 1000
-�
+P
 42 679 816
 Table D.4.: Inception-v4 network.
 95
@@ -5348,43 +5348,43 @@ STL-10
 3
 [CLN11, CLN10]
 Caltech-101
-(80 px − 3481 px)
-×(92 px − 3999 px)
+(80 px −3481 px)
+×(92 px −3999 px)
 9144
 102
 3
 [FFP03, FFFP06]
 Caltech-256
-(75 px − 7913 px)
-×(75 px − 7913 px)
+(75 px −7913 px)
+×(75 px −7913 px)
 30 607
 257
 3
 [Gri06, GG07]
 ILSVRC 20121
-(8 px − 9331 px)
-×(10 px − 6530 px)
+(8 px −9331 px)
+×(10 px −6530 px)
 1.2 · 106
 1000
 3
 [Ima12, RDS+14]
 Places3652
-(290px − 3158px)
-×(225px − 2630px)
+(290px −3158px)
+×(225px −2630px)
 1.8 · 106
 365
 3
 [Zho16, ZKL+16]
 GTSRB
-(25 px − 266 px)
-×(25 px − 232 px)
+(25 px −266 px)
+×(25 px −232 px)
 51 839
 43
 3
 [SSSI, SSSI12]
 Asirra3
-(4 px − 500 px)
-×(4 px − 500 px)
+(4 px −500 px)
+×(4 px −500 px)
 25 000
 2
 3
@@ -5475,15 +5475,15 @@ Table E.2.: An overview over state of the art results achieved in computer visio
 Algorithm 2 Create a classiﬁcation dataset from a semantic segmentation dataset
 Require: Semantic segmentation dataset (DS)
 procedure CreateDataset(Annotated dataset DS)
-DC ← List
-w ← desired image width
-h ← desired image height
+DC ←List
+w ←desired image width
+h ←desired image height
 for Image and associated label (x, y) in DS do
-i ← randint(0, L.width − w)
-j ← randint(0, L.height − h)
-cL ← crop(y, (i, j), (i + w, j + h))
+i ←randint(0, L.width −w)
+j ←randint(0, L.height −h)
+cL ←crop(y, (i, j), (i + w, j + h))
 if at least 50% of s are of one class then
-cI ← crop(x, (i, j), (i + w, j + h))
+cI ←crop(x, (i, j), (i + w, j + h))
 D.append((cI, cL))
 return (DC)
 98
@@ -5666,7 +5666,8 @@ Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . .
 55
 5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 67
-A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+A.1 Image Filters
+. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 75
 A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . .
 76
diff --git a/read/results/pymupdf/2201.00021.txt b/read/results/pymupdf/2201.00021.txt
index 3cb4a10..50e59a7 100644
--- a/read/results/pymupdf/2201.00021.txt
+++ b/read/results/pymupdf/2201.00021.txt
@@ -49,7 +49,7 @@ quent observations have led to the detection of new metastable
 ammonia masers, including 15NH3 (3,3) (Mauersberger et al.
 1986), NH3 (1,1) (Gaume et al. 1996), NH3 (2,2) (Mills et al.
 2018), NH3 (5,5) (Cesaroni et al. 1992), NH3 (6,6) (Beuther
-⋆ Member of the International Max Planck Research School (IM-
+⋆Member of the International Max Planck Research School (IM-
 PRS) for Astronomy and Astrophysics at the universities of Bonn and
 Cologne.
 et al. 2007), NH3 (7,7), NH3 (9,9), and NH3 (12,12) (Henkel
@@ -133,7 +133,7 @@ January and 2021 February, July, and August. The S14mm dou-
 ble beam secondary focus receiver was employed. The full width
 at half maximum (FWHM) beam size is 49′′ at 18.5 GHz, the
 frequency of the target line. The observations were performed in
-position switching mode, and the oﬀ position was 10′ in azimuth
+position switching mode, and the oﬀposition was 10′ in azimuth
 1 Based on observations with the 100-meter telescope of the MPIfR
 (Max-Planck-Institut für Radioastronomie) at Eﬀelsberg.
 away from the source. For observations made before 2021 Au-
@@ -151,7 +151,7 @@ a main-beam brightness temperature, TMB, scale. This ﬂux den-
 sity was calibrated assuming a TMB/S ratio of 1.95 K/Jy, derived
 from continuum cross scans of NGC 7027 (the ﬂux density was
 adopted from Ott et al. 1994). Calibration uncertainties are esti-
-mated to be ∼ 10%.
+mated to be ∼10%.
 We used the GILDAS/CLASS2 package (Pety 2005) to re-
 duce the spectral line data. A ﬁrst-order polynomial was sub-
 tracted from each spectrum for baseline removal.
@@ -306,11 +306,11 @@ sented in Fig. 2.
 Three diﬀerent locations showing NH3 (9,6) emission are
 found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6)
 component sizes are (1′′.42±0′′.43)×(0′′.54±0′′.62) at P.A. = 97◦
-(M1), (0′′.42 ± 0′′.27) × (0′′.15 ± 0′′.27) at P.A. = 150◦ (M2), and
+(M1), (0′′.42 ± 0′′.27) × (0′′.15 ± 0′′.27) at P.A. = 150◦(M2), and
 Article number, page 4 of 10
 
 Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-(1′′.17 ± 0′′.34) × (0′′.27 ± 0′′.46) at P.A. = 53◦ (M3) and are thus
+(1′′.17 ± 0′′.34) × (0′′.27 ± 0′′.46) at P.A. = 53◦(M3) and are thus
 comparable to or smaller than the beam size.
 Overall, the NH3 (9,6) line from G34.26+0.15 weakened
 during the time interval from 2020 January to 2021 August by
@@ -380,7 +380,7 @@ Finally, the non-detections of the (8,5) and (10,7) lines also
 indicate that the (9,6) line is special. This allows us to derive
 lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity
 ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas
-the NH3 (8,5) and (10,7) lines are para-NH3 (K � 3n) lines.
+the NH3 (8,5) and (10,7) lines are para-NH3 (K , 3n) lines.
 The minimum ortho-to-para ratios are in the range 12–42 and 1–
 8 toward Cep A and G34.26+0.15, respectively. The statistical
 weights for the ortho states are twice as large as those for the
@@ -394,18 +394,18 @@ the case of G34.26+0.15 is likely similar.
 published (quasi-)thermal NH3 emission
 The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
 show thermal emission toward Cep A over a velocity range of
-−13 km s−1 ≤ VLSR ≤ −4 km s−1 (Brown et al. 1981; Güsten
+−13 km s−1 ≤VLSR ≤−4 km s−1 (Brown et al. 1981; Güsten
 et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average
 NH3 column density of ∼5×1015 cm−2 was estimated for a region
 of 3′′ around HW2 (Torrelles et al. 1999). This high NH3 abun-
 dance could provide a suitable environment for maser species.
-Large line widths (∆V1/2 ≃7.0 km s−1) with VLSR ∼ −10 km s−1
+Large line widths (∆V1/2 ≃7.0 km s−1) with VLSR ∼−10 km s−1
 in both (1,1) and (2,2) lines were found toward HW2 (Torrelles
 et al. 1993). The velocity is similar to the cloud’s systemic lo-
 cal standard of rest (LSR) velocity of −11.2 km s−1, which
 is based on CO (Narayanan & Walker 1996) and HCO+ ob-
 servations (Gómez et al. 1999). Our (9,6) maser is redshifted
-(−0.9 km s−1 ≤ VLSR
+(−0.9 km s−1 ≤VLSR
 ≤2.9 km s−1) and shares positions with
 the outﬂowing gas seen in CO and HCO+ with similarly red-
 shifted velocities. Therefore, we argue that the (9,6) masers are
@@ -426,19 +426,19 @@ While these lines were measured with a beam size of about
 lines with the kinetic temperature reveals the size of the hot,
 ammonia-emitting core to be only ∼2.5′′. All those measured
 NH3 lines were quasi-thermal and had LSR velocities of
-∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1
+∼58.5 km s−1, close to the systemic velocity of ∼58.1 km s−1
 obtained from C17O observations (Wyrowski et al. 2012).
 Their line widths (∆V1/2 ≥3.6 km s−1) are larger than what
-we ﬁnd (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1) for each (9,6)
+we ﬁnd (0.35 km s−1 ≤∆V1/2 ≤0.94 km s−1) for each (9,6)
 maser component (see details in Table A.3). In all, we may
 have observed four diﬀerent (9,6) velocity features. Three
 are blueshifted at VLSR
-∼ 53.8 km s−1, 55.8 km s−1, and
+∼53.8 km s−1, 55.8 km s−1, and
 56.8 km s−1, and a fourth, tentatively detected, at 62.5 km s−1.
 This tentative redshifted feature was only potentially detected
 with Eﬀelsberg in 2020 January. The velocity is similar to that
 of the JVLA measurements on the NH3 (1,1) absorption line
-against continuum source C (∼ 7′′ resolution; Keto et al. 1987)
+against continuum source C (∼7′′ resolution; Keto et al. 1987)
 Article number, page 5 of 10
 
 A&A proofs: manuscript no. mainArxiv
@@ -463,9 +463,9 @@ maser ﬂux is associated with the compact H ii region HW3d. OH
 maser features close to the H ii regions are also seen in HW2
 (e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These
 three kinds of masers in Cep A have a large velocity range of
-−25 km s−1 ≤ VLSR ≤ −2 km s−1 and are widespread around
+−25 km s−1 ≤VLSR ≤−2 km s−1 and are widespread around
 HW2 and HW3, while NH3 (9,6) emission is only detected at
-−0.9 km s−1 ≤ VLSR
+−0.9 km s−1 ≤VLSR
 ≤2.9 km s−1 toward a sub-arcsecond-
 sized region to the west of the peak continuum position of HW2
 (see Fig. 3). This suggests that the NH3 (9,6) maser in Cep A
@@ -475,7 +475,7 @@ In G34.26+0.15, OH (Zheng et al. 2000), H2O (Imai et al.
 2011), and CH3OH (Bartkiewicz et al. 2016) masers have been
 detected east of source C (Fig. 4), and none of them coincides
 with the head of C. The NH3 (9,6) maser M1 is also found
-slightly oﬀ the head of source C. This could suggest that M1
+slightly oﬀthe head of source C. This could suggest that M1
 is powered by continuum source C or by an outﬂow. Near com-
 ponent B, there are some OH and CH3OH masers but no H2O
 or NH3 masers. A group of H2O masers, well-known tracers
@@ -577,7 +577,7 @@ Acknowledgements. We would like to thank the anonymous referee for the use-
 ful comments that improve the manuscript. Y.T.Y. is a member of the Interna-
 tional Max Planck Research School (IMPRS) for Astronomy and Astrophysics
 at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China
-Scholarship Council (CSC) for its support. We would like to thank the staﬀ at
+Scholarship Council (CSC) for its support. We would like to thank the staﬀat
 the Eﬀelsberg for their help provided during the observations. We thank the staﬀ
 of the JVLA, especially Tony Perreault and Edward Starr, for their assistance
 with the observations and data reduction. This research has made use of the
@@ -710,7 +710,7 @@ Epoch
 Channel
 S ν
 rms
-�
+R
 S νdv
 VLSR
 ∆V1/2
diff --git a/read/results/pymupdf/2201.00022.txt b/read/results/pymupdf/2201.00022.txt
index d97554f..1a33d74 100644
--- a/read/results/pymupdf/2201.00022.txt
+++ b/read/results/pymupdf/2201.00022.txt
@@ -1,4 +1,4 @@
-Draft version January 4, 2022
+Draft version July 7, 2022
 Typeset using LATEX twocolumn style in AASTeX631
 The Formation of Intermediate Mass Black Holes in Galactic Nuclei
 Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3
@@ -7,17 +7,23 @@ Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3
 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel
 ABSTRACT
 Most stellar evolution models predict that black holes (BHs) should not exist above approximately
-50−70 M⊙. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and
-above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs),
-can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding
-main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax-
-ation, we ﬁnd that this channel can be quite eﬃcient, forming IMBHs as massive as 104 M⊙. Our
-results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for-
-mation channel also has implications for observations. Collisions between stars and BHs can produce
-electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally,
-formed through this channel, both black holes in the mass gap and IMBHs can merge with the super-
-massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational
-wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively).
+50 −70 M⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
+indicate the existence of BHs with masses at and above this threshold.
+We suggest that massive
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
+processes such as collisions, mass segregation, and relaxation, we ﬁnd that this channel can be quite
+eﬃcient, forming IMBHs as massive as 104 M⊙. This upper limit assumes that (1) the BHs accrete a
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
+new stars are introduced into the region near the SMBH is high enough to oﬀset depletion by stellar
+disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs,
+respectively).
 1. INTRODUCTION
 The
 recently
@@ -28,10 +34,10 @@ source
 GW190521 (The LIGO Scientiﬁc Collaboration et al.
 2020a,b) produced an intermediate mass black hole of
 approximately 142 M⊙. This event may have also had a
-85 M⊙ progenitor, which falls within the pair-instability
+85 M⊙progenitor, which falls within the pair-instability
 mass gap that limits stellar black holes (BHs) to no
 more than ∼
-< 50 M⊙ (e.g., Heger et al. 2003; Woosley
+< 50 M⊙(e.g., Heger et al. 2003; Woosley
 2017)1. Similarly, the merger products of GW150914,
 GW170104, and GW170814 fall within the mass gap
 (e.g., Abbott et al. 2016, 2017a,b).
@@ -39,18 +45,18 @@ BH mergers that
 form second generation BHs and, in some cases, inter-
 mediate mass BHs (IMBHs), these gravitational wave
 (GW) events can occur in globular clusters, young stel-
+Corresponding author: Sanaea C. Rose
+srose@astro.ucla.edu
+1 Note that the exact lower and upper limits may be sensitive to
+metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
+2017a; Limongi & Chieﬃ2018a; Sakstein et al. 2020; Belczynski
+et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
 lar clusters, or the ﬁeld (e.g., Rodriguez et al. 2018; Ro-
 driguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
 2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
 2021; Arca Sedda et al. 2021).
 However, IMBHs are
 not limited to these locations and may reside in galac-
-Corresponding author: Sanaea C. Rose
-srose@astro.ucla.edu
-1 Note that the exact lower and upper limits may be sensitive to
-metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
-2017a; Limongi & Chieﬃ 2018a; Sakstein et al. 2020; Belczynski
-et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
 tic nuclei as well.
 Several studies propose that our
 own galactic center may host an IMBH in the inner pc
@@ -69,27 +75,28 @@ lated gas (e.g., Begelman et al. 2006; Yue et al. 2014;
 Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
 2016). These high redshift IMBHs would need to sur-
 vive galaxy evolution and mergers to present day (e.g.,
+arXiv:2201.00022v2  [astro-ph.GA]  6 Jul 2022
+
+2
+Rose et al.
 Rashkov & Madau 2014), with signiﬁcant eﬀects on their
 stellar and even dark matter surroundings (e.g., Bertone
 et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
 et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
 popular formation channel relies on the coalescence of
-many stellar-mass black holes.
-For example, IMBHs
+many stellar-mass black holes, which may seed objects
+as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs
 may form in the centers of globular clusters, where few-
 body interactions lead to the merger of stellar-mass BHs
 (e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha
 et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
-arXiv:2201.00022v1  [astro-ph.GA]  31 Dec 2021
-
-2
-Rose et al.
 driguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
 2020b). Other formation mechanisms invoke successive
-collisions and mergers of massive stars (e.g., Portegies
-Zwart & McMillan 2002; Portegies Zwart et al. 2004;
-Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al.
-2021; Di Carlo et al. 2021).
+collisions and mergers of massive stars (e.g., Ebisuzaki
+et al. 2001; Portegies Zwart & McMillan 2002; Portegies
+Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017;
+Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al.
+2021; Das et al. 2021a,b; Escala 2021).
 The main obstacle to sequential BH mergers in clus-
 ters is that the merger recoil velocity kick often exceeds
 the escape velocity from the cluster (e.g., Schnittman
@@ -103,19 +110,30 @@ single interactions, binary BH GW merger, and GW
 merger recoil kicks. The post-kick merger product sinks
 back towards the cluster center over a dynamical fric-
 tion timescale. Using this approach, they showed that
-103 − 104 M⊙ IMBHs can form eﬃciently over the life-
+103 −104 M⊙IMBHs can form eﬃciently over the life-
 time of a cluster.
-However, as discussed in Section 2.2, direct star-BH
+However, as discussed in Section 2.2, direct BH-star
 collisions are much more frequent than BH-BH collision
 in galactic nuclei, making the former a promising chan-
-nel for BH growth. We propose that IMBHs can form
-naturally within the central pc of a SMBH in a galactic
-center. Speciﬁcally, these IMBHs form through repeated
-collisions with main sequence stars, accreting some or
-all of the star’s mass depending on the details of the
-collision. We demonstrate that this channel can create
-IMBHs with masses as large as 104 M⊙, depending on
-the density proﬁle of the surrounding stars.
+nel for BH growth. In an N-body study of young star
+clusters, Rizzuto et al. (2022) ﬁnd that BH-star colli-
+sions are a main contributor to the formation of BHs
+in the mass gap and IMBHs. In a similar vein, Stone
+et al. (2017) demonstrate that massive BHs can form
+from repeated tidal encounters between stars and BHs.
+More generally, several studies have explored the role of
+collisions in a GN, with implications for the stellar and
+red giant populations (e.g., Dale & Davies 2006; Dale
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+et al. 2021). We propose that IMBHs can form naturally
+within the central pc of a galactic center through re-
+peated collisions between BHs and main sequence stars.
+During a collision, the BH can accrete some portion of
+the star’s mass. Over many collisions, it can grow ap-
+preciably in size. We demonstrate that this channel can
+create IMBHs with masses as large as 104 M⊙, an upper
+limit that depends on the density proﬁle of the surround-
+ing stars and the eﬃciency of the accretion.
 The paper is structured as follows: we describe rele-
 vant physical processes and our approach in Section 2.
 In particular, we provide an overview of collisions in
@@ -126,15 +144,15 @@ mass growth with each collision and presents analytic
 solutions to our equations in two diﬀerent regimes, ef-
 ﬁcient collisions and ineﬃcient collisions We compare
 these solutions to our statistical results.
-Sections 2.5
-and 2.7 discuss implications for GW merger events be-
+Sections 2.6
+and 2.8 discuss implications for GW merger events be-
 tween IMBHs and the SMBH. We then incorporate re-
 laxation processes and discuss the subsequent results in
-Section 2.8. Finally, we discuss and summarize our ﬁnd-
+Section 2.9. Finally, we discuss and summarize our ﬁnd-
 ings in Section 3.
 2. METHODOLOGY
 We consider a population of stellar mass BHs embed-
-ded in a cluster of 1 M⊙ stars. When stars and BHs
+ded in a cluster of 1 M⊙stars. When stars and BHs
 collide, the BHs can accrete mass. The growth rate de-
 pends on the physical processes outlined below. We use
 a statistical approach to estimate the stellar encounters
@@ -144,7 +162,7 @@ We consider a population of BHs within the inner few
 parsecs of the SMBH in a galactic nucleus (GN). We as-
 sume that the BH mass distribution follows that of the
 stars from which they originate, a Kroupa initial mass
-function dN/dm ∝ m−2.35. While this choice represents
+function dN/dm ∝m−2.35. While this choice represents
 a gross oversimpliﬁcation, it has very little bearing on
 our ﬁnal results. Future work may address the particu-
 lars of the BH mass distribution, but we do not expect
@@ -154,7 +172,7 @@ and lower limits of the BH mass distribution are 5 and
 compass the range of upper bounds predicted by stellar
 evolution models, which vary between 40 and 125 M⊙
 depending on the metallicity (Heger et al. 2003; Woosley
-2017; Spera & Mapelli 2017b; Limongi & Chieﬃ 2018b;
+2017; Spera & Mapelli 2017b; Limongi & Chieﬃ2018b;
 Belczynski et al. 2020b; Renzo et al. 2020). We assume
 that the orbits of the BHs follow a thermal eccentricity
 distribution. We draw their semimajor axes, a•, from a
@@ -166,7 +184,20 @@ all distances from the SMBH, including within 0.01 pc.
 Otherwise, the innermost region of the GN would be
 poorly represented in our sample.
 We consider other
-observationally motivated distributions in Section 2.8,
+
+IMBH Formation in Galactic Nuclei
+3
+Figure 1. We plot the relevant timescales, including col-
+lision (green), relaxation (gold), and BH-BH GW capture
+(purple), for a single BH in the GN as a function of distance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit.
+The timescales depend on the
+density, so we adopt a range of density proﬁles, bounded by
+α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
+blue line represents the time for a 105 M⊙BH to merge with
+the SMBH through GW emission.
+observationally motivated distributions in Section 2.9,
 but reserve a more detailed examination of the distribu-
 tion’s impact for future work.
 2.2. Direct Collisions
@@ -178,14 +209,14 @@ where n is the number density of objects, σ is the ve-
 locity dispersion, and A is the cross-section. We use the
 collision timescale from Rose et al. (2020):
 t−1
-coll = πn(a•)σ(a•)
+coll =πn(a•)σ(a•)
 ×
-�
+
 f1(e•)r2
 c + f2(e•)rc
 2G(mBH + m⋆)
 σ(a•)2
-�
+
 . (1)
 where G is the gravitational constant and rc is the sum
 of the radii of the interacting objects, a black hole with
@@ -195,36 +226,23 @@ the eccentricity of the BH’s orbit about the SMBH on
 the collision rate, while n and σ are simply evaluated
 at the semimajor axis of the orbit (see below).
 Note
-
-IMBH Formation in Galactic Nuclei
-3
-Figure 1. We plot the relevant timescales, including col-
-lision (green), relaxation (gold), and BH-BH GW capture
-(purple), for a single BH in the GN as a function of distance
-from the SMBH. For the collision timescale, we assume the
-BH is on a circular orbit.
-The timescales depend on the
-density, so we adopt a range of density proﬁles, bounded by
-α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
-blue line represents the time for a 105 M⊙ BH to merge with
-the SMBH through GW emission.
 that this timescale equation includes the eﬀects of grav-
 itational focusing, which enhances the cross-section of
 interaction.
 Assuming a circular orbit for simplicity, we plot the
 timescale for a BH orbiting in the GN to collide with
-a 1 M⊙ star as a function of distance from the SMBH
+a 1 M⊙star as a function of distance from the SMBH
 in Figure 1.2 As this timescale depends on the density
 of surrounding stars, we adopt a density proﬁle of the
 form:
 ρ(r•) = ρ0
-�r•
+r•
 r0
-�−α
+−α
 ,
 (2)
 where r• denotes the distance from the SMBH. We adopt
-a SMBH mass of 4 × 106 M⊙ such that our ﬁducial GN
+a SMBH mass of 4 × 106 M⊙such that our ﬁducial GN
 matches our own galactic center (e.g., Ghez et al. 2005;
 Genzel et al. 2003). In this case, the normalization in
 Eq. (2) is ρ0 = 1.35 × 106 M⊙/pc3 at r0 = 0.25 pc (Gen-
@@ -236,12 +254,10 @@ n(r•) = ρ(r•)
 1 M⊙
 .
 (3)
-2 We note that the eccentricity has a very minor eﬀect on the
-collision timescale (Rose et al. 2020).
 The collision timescale also depends on the velocity dis-
 persion, which we express as:
 σ(r•) =
-�
+s
 GM•
 r•(1 + α),
 (4)
@@ -249,10 +265,10 @@ where α is the slope of the density proﬁle and M• de-
 notes the mass of the SMBH (Alexander 1999; Alexan-
 der & Pfuhl 2014). As mentioned above, Eq. (1) depends
 on the sum of the radii of the colliding objects, rc. We
-take rc = 1 R⊙ because these interactions involve a BH
+take rc = 1 R⊙because these interactions involve a BH
 and a star, and the former has a much smaller physi-
 cal cross-section. For example, the Schwarzschild radius
-of a 10 M⊙ BH is only 30 km, or 4.31 × 10−5 R⊙. For
+of a 10 M⊙BH is only 30 km, or 4.31 × 10−5 R⊙. For
 this reason, direct collisions between compact objects
 are very rare and not included in our model.
 We note that direct collisions between BHs, via GW
@@ -268,6 +284,11 @@ vant equations, see O’Leary et al. 2009; Gond´an et al.
 2018, for example). Thus, we expect that star-BH col-
 lisions will be the main driver of IMBH growth in the
 GN.
+2 We note that the eccentricity has a very minor eﬀect on the
+collision timescale (Rose et al. 2020).
+
+4
+Rose et al.
 2.3. Statistical Approach to Collisions
 We simulate the mass growth of a population of BHs
 with initial conditions detailed in Section 2.1. Over an
@@ -285,14 +306,6 @@ expected to accrete in a single collision (see Section 2.4
 for details). We recalculate the collision timescale using
 the updated BH mass and repeat this process until the
 time elapsed equals the simulation time of 10 Gyr3.
-3 Closer to the SMBH, ∆t may exceed the collision timescale by
-a factor of a few for steep density proﬁles. We include a safe-
-guard in our code which takes the ratio tcoll/∆t and rounds it
-to the nearest integer. We take this integer to be the number of
-collisions and increase the BH mass accordingly.
-
-4
-Rose et al.
 2.4. Mass Growth
 When a BH collides with a star, it may accrete ma-
 terial and grow in mass. The details of the accretion
@@ -304,15 +317,19 @@ We begin by con-
 sidering the escape velocity from the BH at the star’s
 outermost point, its surface, which corresponds to the
 maximum impact parameter 1 R⊙. Qualitatively, one
-might expect that the BH could accrete the entire star
-(i.e., ∆m ∼ 1 M⊙) if the relative velocity is smaller than
+might expect that the BH could capture the entire star
+(i.e., ∆m ∼1 M⊙) if the relative velocity is smaller than
 the escape velocity from the BH at this point. However,
 in the vicinity of the SMBH, the dispersion velocity of
 the stars may be much larger than the escape velocity
 from the BH at the star’s surface. In this case, the BH
-accretes a “tunnel” of material through the star. This
+captures a “tunnel” of material through the star. This
 tunnel has radius equal to the Bondi radius and length
-approximately 1 R⊙.
+approximately 1 R⊙. For the purposes of this study, we
+assume that the BH accretes all of the material that
+it captures. The details of the accretion are uncertain,
+however, and it may be much less eﬃcient than our re-
+sults imply. We discuss accretion in Section 2.5.
 To estimate ∆m, we begin with the Bondi-Hoyle ac-
 cretion rate, ˙m, given by:
 ˙m = 4πG2m2
@@ -320,6 +337,24 @@ BHρstar
 (c2s + σ2)3/2
 ,
 (5)
+3 Closer to the SMBH, ∆t may exceed the collision timescale by
+a factor of a few for steep density proﬁles. We include a safe-
+guard in our code which takes the ratio tcoll/∆t and rounds it
+to the nearest integer. We take this integer to be the number of
+collisions and increase the BH mass accordingly.
+Figure 2. We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10 M⊙for all of the BHs. Assuming
+the density proﬁle of stars has α = 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high.
+The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
 where cs is the speed of sound in the star and ρstar is its
 density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
 et al. 1985; Edgar 2004, see latter for a review).
@@ -327,22 +362,22 @@ We
 approximate the density as 1 M⊙/(4πR3
 ⊙/3) and take
 the conservative value of cs = 500 km s−1, which is
-consistent with the sound speed inside a 1 M⊙ star
+consistent with the sound speed inside a 1 M⊙star
 (Christensen-Dalsgaard et al. 1996) and allows us to set
 a lower limit on ∆m. To ﬁnd ∆m, at each collision, we
 have:
 ∆m = min( ˙m × t⋆,cross, 1 M⊙) ,
 (6)
-where t⋆,cross ∼ R⊙/σ is the crossing time of the BH in
+where t⋆,cross ∼R⊙/σ is the crossing time of the BH in
 the star. We take the minimum between ˙m×t⋆,cross and
-1 M⊙ because the BH cannot accrete more mass than
+1 M⊙because the BH cannot accrete more mass than
 one star at each collision.
 Figure 2 juxtaposes the expected growth using Bondi-
 Hoyle-Lyttleton accretion (blue small points) with a
 much simpler model in which the BH accretes the star’s
-entire mass, 1 M⊙ (red large points).
+entire mass, 1 M⊙(red large points).
 Both examples
-start with identical populations of 10 M⊙ BHs (grey)
+start with identical populations of 10 M⊙BHs (grey)
 and simulate growth through collisions using a statisti-
 cal approach. As the BHs grow, the collision timescale,
 which depends on mBH, decreases.
@@ -352,23 +387,13 @@ sult is exponential growth (see discussion and details
 surrounding Eq. (8)). In Figure 2, however, the simula-
 tions assume α = 1 for the stellar density proﬁle, ensur-
 ing the collision timescale is long compared to the sim-
+
+IMBH Formation in Galactic Nuclei
+5
 ulation time, 10 Gyr. Therefore, the BHs grow slowly,
-Figure 2. We consider an example that highlights the mass
-growth as a function of distance from the SMBH. Grey dots
-represent the initial masses and distances from the SMBH
-of the BHs involved in the simulation. For simplicity, we set
-the inital mass equal to 10 M⊙ for all of the BHs. Assuming
-the density proﬁle of stars has α = 1, we consider two cases:
-BHs accrete all of the star’s mass during a collision (red) and
-only a portion of the star’s mass is accreted during a collision
-given by Eq. 6 (blue). The latter case results in less growth
-closer to the SMBH where the velocity dispersion becomes
-high.
-The shaded regions and dashed lines represent the
-analytical predictions detailed in Section 2.4.
 and their ﬁnal masses can be approximated using the
 following equation:
-mﬁnal(tcoll → const.) = minitial + ∆m T
+mﬁnal(tcoll →const.)=minitial + ∆m T
 tcoll
 ,
 (7)
@@ -376,7 +401,7 @@ in which T represents the simulation time and ∆m and
 tcoll remain constant, approximated as their initial val-
 ues.
 This equation is plotted in Figure 2 for both cases,
-∆m = 1 M⊙ (red) and ∆m from Bondi-Hoyle-Lyttleton
+∆m = 1 M⊙(red) and ∆m from Bondi-Hoyle-Lyttleton
 accretion (blue), and the curves coincide with the cor-
 responding simulated results. The shaded regions rep-
 resent one standard deviation from Eq. (7), calculated
@@ -396,9 +421,6 @@ star’s mass.
 Eq. 7 does not apply for other values of α. When the
 collision timescale is shorter, corresponding to a larger
 index α in the density proﬁle (see Figure 1), the growth
-
-IMBH Formation in Galactic Nuclei
-5
 is very eﬃcient and ∆m quickly approaches 1 M⊙. Con-
 sequently, while we can now assume ∆m = 1 M⊙, we
 can no longer assume the collision timescale is constant.
@@ -407,45 +429,98 @@ For
 ∆m = 1M⊙, the general solution is reached by solving
 the diﬀerential equation dm/dt = 1 M⊙/tcoll(m), which
 gives:
-mﬁnal(∆m → 1 M⊙) = −A + (minitial + A) eCT
+mﬁnal(∆m →1 M⊙)=−A + (minitial + A) eCT
 (8)
 where A = σ2Rstar/G and C = 2πGnstarRstar/σ. As an
 example, we plot this curve in purple for the α = 2 case,
 in Figure 3, which agrees with the simulated masses.
-2.5. GW Inspiral
+2.5. Uncertainties in Accretion
+We note that the ∆M calculated in this proof-of-
+concept study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challeng-
+ing; this complex problem requires numerically solving
+the generalized GR ﬂuid equations with cooling, heat-
+ing, and radiative transfer, etc. and remains an active
+ﬁeld of research (e.g., Blandford & Begelman 1999; Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star re-
+sults in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity re-
+main uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accu-
+mulate signiﬁcant amounts of mass over many collisions
+even if it accretes very little in a single one.
+We ex-
+plore the viability of our channel using a physically mo-
+tivated ineﬃcient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately vesc/(cη), where vesc is
+the escape velocity from the BH at 1 R⊙and η is the
+accretion eﬃciency at the ISCO. We take η to be 0.1
+(e.g., Yu & Tremaine 2002).
+This expression for the
+fraction accreted is consistent with Kremer et al. (2022)
+equation 19 for s = 0.5, which is a reasonable value for
+s, a free parameter between 0.2 and 0.8.
+We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more eﬃcient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radi-
+ation away from the captured material (e.g., Blandford
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumen-
+shchev 2008; Begelman 2012a,b; McKinney et al. 2014).
+2.6. GW Inspiral
 When a BH is close to the SMBH, GW emission can
 circularize and shrink its orbit. We implement the ef-
 fects of GW emission on the BH’s semimajor axis and
 eccentricity following Peters & Mathews (1963a). The
 characteristic timescale to merge a BH with an SMBH
 is given by:
-tGW ≈ 2.9 × 1012 yr
-�
+tGW ≈2.9 × 1012 yr
+
 M•
 106 M⊙
-�−1 � mBH
+−1  mBH
 106 M⊙
-�−1
+−1
 ×
-� M• + mBH
+ M• + mBH
 2 × 106 M⊙
-�−1 �
+−1 
 a•
-10−4 pc
-�4
-× f(e•)(1 − e2
+10−2 pc
+4
+×f(e•)(1 −e2
 •)7/2 ,
 (9)
 where f(e•) is a function of e•.
 For all values of e•,
 f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We
-plot this timescale for a 1 × 105 M⊙ BH in Figure 1 in
+plot this timescale for a 1 × 105 M⊙BH in Figure 1 in
 blue.
+
+6
+Rose et al.
+Figure 3. On the right, we plot ﬁnal masses of 500 BHs using diﬀerent values of α in the density proﬁle, shallow (α = 1) to
+cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
+of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
+merger times of these BHs.
 In our simulations, we assume a BH has merged with
 the SMBH when the condition tGW < telapsed is met.
 When this condition is satisﬁed, we terminate mass
 growth through collisions for that BH.4
-2.6. IMBH growth
+2.7. IMBH growth
 As detailed above, BH-stellar collisions can increase
 the BH masses as a function of time. Here, we examine
 the sensitivity of the BH growth to the density power
@@ -456,24 +531,24 @@ proﬁles, will result in more eﬃcient mass growth.
 In
 Figure 1, larger values of α lead to collision timescales
 in the GN’s inner region, inwards of 0.25 pc, that are
-4 For comparison, we also incrementally changed the semimajor
-axis and eccentricity from GW emission following the equations
-in Peters & Mathews (1963b).
-This method leads to a slight
-increase in the ﬁnal IMBH masses because it accounts for the
-collisions that take place while the orbit is gradually shrinking.
 much smaller that the 10 Gyr simulation time. Figure 3
 conﬁrms this expectation. It depicts the mass growth of
 a uniform distribution of BHs with initial conditions de-
 tailed in Section 2.1 for ﬁve α values, spanning 1 (green)
 to 2 (purple). The most massive IMBHs form inwards
 of 0.25 pc for the α = 2 case.
-2.7. Gravitational Wave Mergers and Intermediate
+2.8. Gravitational Wave Mergers and Intermediate
 and Extreme Mass Ratio Inspiral Candidates
 Towards the SMBH, eﬃcient collisions can create BHs
 massive enough to merge with the SMBH through GWs.
-Following the method detailed in Section 2.5, when a
+Following the method detailed in Section 2.6, when a
 given BH meets the criterion tGW < telapsed, we mark
+4 For comparison, we also incrementally changed the semimajor
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b).
+This method leads to a slight
+increase in the ﬁnal IMBH masses because it accounts for the
+collisions that take place while the orbit is gradually shrinking.
 it as merged with the SMBH. We assume that at this
 point the dynamics of the BH will be determined by GW
 emission, shrinking and circularizing the BHs orbit un-
@@ -484,7 +559,7 @@ merger. It is interesting to note that even in the ab-
 sence of relaxation processes, which are often invoked
 to explain the formation of EMRIs, EMRIs and notably
 IMRIs can form in this region.
-2.8. Two Body Relaxation Processes
+2.9. Two Body Relaxation Processes
 A BH orbiting the SMBH experiences weak gravita-
 tional interactions with other objects in the GN. Over a
 relaxation time, these interactions alter its orbit about
@@ -492,38 +567,42 @@ the SMBH. The two-body relaxation timescale for a
 single-mass system is:
 trelax = 0.34
 σ3
-G2ρ⟨M∗⟩ ln Λrlx
+G2ρ⟨M∗⟩ln Λrlx
 ,
 (10)
-where ln Λrlx is the Coulomb logarithm and ⟨M∗⟩ is the
+where ln Λrlx is the Coulomb logarithm and ⟨M∗⟩is the
 average mass of the surrounding objects, here assumed
-to be 1 M⊙ (Spitzer 1987; Binney & Tremaine 2008,
+to be 1 M⊙(Spitzer 1987; Binney & Tremaine 2008,
 Eq. (7.106)). This equation represents the approximate
 timescale for a BH on a semi-circular orbit to change
 its orbital energy and angular momentum by order of
 themselves. The BH experiences diﬀusion in its angular
 momentum and energy as a function of time (depending
 on the eccentricity of the orbit, this process can be more
-eﬃcient Fragione & Sari 2018; Sari & Fragione 2019). In
-Figure 1, we plot the relaxation timescale in gold for a
-range of α. We note that the Bahcall & Wolf (1976) pro-
-ﬁle, α = 7/4, corresponds to zero net ﬂux and therefore
-does not preferentially migrate objects inward.
-Additionally,
-because they are more massive on
-average than the surrounding objects, BHs are ex-
-pected to segregate inwards in the GN (e.g., Shapiro
-& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
-Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
+eﬃcient Fragione & Sari 2018; Sari & Fragione 2019).
+Relaxation can cause the orbit of an object in a GN to
+reach high eccentricities. If the object is a BH, it can
+spiral into the SMBH and form an EMRI, while a star
 
-6
-Rose et al.
-Figure 3. On the right, we plot ﬁnal masses of 500 BHs using diﬀerent values of α in the density proﬁle, shallow (α = 1) to
-cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
-of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
-merger times of these BHs.
+IMBH Formation in Galactic Nuclei
+7
+can be tidally disrupted by the SMBH (e.g. Magorrian
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
+Alexander 2005; Aharon & Perets 2016; Stone & Met-
+zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
+Naoz et al. 2022). The relaxation process is therefore
+crucial to our study. In Figure 1, we plot the relaxation
+timescale in gold for a range of α. We note that the Bah-
+call & Wolf (1976) proﬁle, α = 7/4, corresponds to zero
+net ﬂux and therefore does not preferentially migrate
+objects inward.
+Additionally, because BHs are more massive on av-
+erage than the surrounding objects, they are expected
+to segregate inwards in the GN (e.g., Shapiro &
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004).
 They sink toward the SMBH on the mass segregation
-timescale, tseg ≈ ⟨M∗⟩/mBH × trelax (e.g., Spitzer 1987;
+timescale, tseg ≈⟨M∗⟩/mBH × trelax (e.g., Spitzer 1987;
 Fregeau et al. 2002; Merritt 2006), which is typically an
 order of magnitude smaller than the relaxation timescale
 plotted in Figure 1.
@@ -536,12 +615,12 @@ of zero and a standard deviation of ∆vrlx/
 √
 3, where
 ∆vrlx = v•
-�
+p
 P•/trlx (see Bradnick et al. 2017, for an
 approach to changes in the angular momentum). The
 new orbital parameters can be calculated following Lu
-& Naoz (2019), and see Naoz et al. in prep for full set
-of equations.
+& Naoz (2019), and see Naoz et al. (2022) for the full
+set of equations.
 We account for the eﬀects of relaxation processes,
 including mass-segregation, using a multi-faceted ap-
 proach. We begin by migrating each BH towards the
@@ -557,7 +636,7 @@ terings with the now rarer main-sequence stars.
 The
 BHs will then settle onto a Bahcall-Wolf proﬁle, while
 the stars may follow a shallower proﬁle, with approx-
-imately n⋆ ∝ r−1.5, inwards of the transition radius
+imately n⋆∝r−1.5, inwards of the transition radius
 (Linial & Sari in prep.).
 Therefore, after the initial mass segregation, we allow
 the BHs to begin diﬀusing over a relaxation timescale,
@@ -566,7 +645,7 @@ dom process. In this random process, some of the BHs
 may migrate closer to the SMBH. We terminate mass
 growth when the BH enters the inner 200 au of the GN,
 within which the density of stars is uncertain. This cut-
-oﬀ is based on the 120 au pericenter of S0-2, the closest
+oﬀis based on the 120 au pericenter of S0-2, the closest
 known star to the SMBH (e.g., Ghez et al. 2005).
 Another physical process that causes inward migra-
 tion is dynamical friction. A cursory derivation based
@@ -584,7 +663,7 @@ towards the SMBH, their concentration in the inner re-
 gion of the GN increases, allowing them to dominate the
 scattering. We reserve the inclusion of these interactions
 for future study.
-2.9. Eﬀect of Relaxation Processes
+2.10. Eﬀect of Relaxation Processes
 As depicted in Figure 4, two-body relaxation processes
 result in more EMRIs and IMRIs events.
 These pro-
@@ -592,17 +671,6 @@ cesses allow BHs that begin further from the SMBH
 to migrate inwards and grow more eﬃciently in mass.
 However, it also impedes the growth of BHs that are
 initially closer to the SMBH by allowing them to dif-
-
-IMBH Formation in Galactic Nuclei
-7
-Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and ﬁnal mass versus ﬁnal distance (red)
-for 500 BHs. This simulation includes relaxation processes, including mass segregation, diﬀusion, and dynamical friction. We
-assume α = 1.75 for the GN density proﬁle. Faded stars represent BHs that merged with the SMBH. As a result of inward
-migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more
-BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two
-diﬀerent values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes.
-The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density proﬁle. Faded
-stars represent BHs that merged with the SMBH.
 fuse out of the inner region where collisions are eﬃcient.
 As can be seen in Figure 4, the net result is that more
 BHs grow, but the maximum mass is lower compared
@@ -615,9 +683,9 @@ In fact, using a KS test, we
 ﬁnd that we cannot reject the hypothesis that the two
 distributions were drawn from the same sample for the
 α = 1.75 and α = 2 results. Interestingly, a BH mass
-IMF with an average of 10 M⊙ leads to a ﬁnal distri-
-bution with an average of ∼ 200 M⊙ and a median of
-∼ 45 M⊙, which lies within the mass gap.
+IMF with an average of 10 M⊙leads to a ﬁnal distri-
+bution with an average of ∼200 M⊙and a median of
+∼45 M⊙, which lies within the mass gap.
 3. DISCUSSION AND PREDICTIONS
 We explore the feasibility of forming IMBHs in a
 GN through successive collisions between a stellar-mass
@@ -625,20 +693,31 @@ BH and main-sequence stars.
 Taking both a statisti-
 cal and analytic approach, we show that this channel
 can produce IMBHs eﬃciently with masses as high as
-103−4 M⊙ and may result in many IMBH-SMBH merg-
-ers (intermediate-mass ratio inspiral, IMRIs) and EM-
-RIs.
+103−4 M⊙and may result in many IMBH-SMBH merg-
+ers (intermediate-mass ratio inspirals, or IMRIs) and
+EMRIs.
+
+8
+Rose et al.
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and ﬁnal mass versus ﬁnal distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diﬀusion, and dynamical friction.
+We assume α = 1.75 for the GN density proﬁle. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two diﬀerent values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
 As the stellar mass BH collides with a star, the BH
 will grow in mass. The increase may equal star’s en-
 tire mass if the relative velocity is smaller than the es-
 cape velocity from the BH at 1 R⊙. However, near the
 SMBH, the velocity dispersion may be larger than the
 escape velocity from the BH at the star’s radius. In this
-limit, the BH accretes a “tunnel” of material through
+limit, the BH captures a “tunnel” of material through
 the star, estimated using Bondi-Hoyle-Lyttleton accre-
 tion. In our statistical analysis, we account for Bondi-
 Hoyle-Lyttleton accretion and ﬁnd that BHs outside of
-10−2 pc from the SMBH can accrete the entire star (see
+10−2 pc from the SMBH can capture the entire star (see
 Figure 2).
 The eﬃciency of collisions, and therefore IMBH,
 EMRI, and IMRI formation as well, are sensitive to
@@ -651,9 +730,32 @@ However, the inclusion of relaxation processes in the
 simulations dampens the inﬂuence of the stellar density
 proﬁle by allowing BHs to diﬀuse into regions of more
 or less eﬃcient growth. As a result, more BHs grow in
-mass, but their maximum mass is smaller (∼ 104 M⊙).
+mass, but their maximum mass is smaller (∼104 M⊙).
 Additionally, the ﬁnal masses have no apparent depen-
 dence on distance from the SMBH (see Figure 4).
+Most simulations in our study assume that the BHs
+accrete all of the mass that they capture. The ﬁnal BH
+masses can be taken as an upper limit. We note that
+the accretion is a highly uncertain process and repre-
+sents an active ﬁeld of study (e.g., Blandford & Begel-
+man 1999; Park & Ostriker 2001; Narayan et al. 2003;
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+Narayan et al. 2022). To assess the limits of our model,
+we also consider a physically motivated accretion model,
+momentum-driven winds (Section 2.5). We present the
+ﬁnal mass distribution for momentum-driven winds in
+Figure 4.
+Importantly, we ﬁnd that BHs within the
+mass gap still form naturally despite the substantially
+reduced accretion. About 5% of the BHs grow by 10
+to 100 M⊙. Furthermore, if we increase this ∆M esti-
+mate by a factor of 2 (i.e., use η = 0.05), the simula-
+tion produces a 3.5 × 103 M⊙IMBH for the same initial
+conditions. Our proof-of-concept demonstrates that col-
+lisions between BH and stars are an important process
+that should be taken into account in dense places such
+as a GN.
 Mass growth through BH-main-sequence star colli-
 sions may act in concert with other IMBH formation
 channels, such as compact object binary mergers (e.g.,
@@ -673,18 +775,87 @@ Kozai Lidov mechanism, leaving behind a single star or
 a single compact object (e.g., Stephan et al. 2016, 2019;
 Hoang et al. 2018). Additionally, to be susceptible to
 evaporation, BH binaries must have a wider conﬁgura-
-tion. Otherwise, they will be more tightly bound that
-
-8
-Rose et al.
-the average kinetic energy of the surrounding objects,
+tion. Otherwise, they will be more tightly bound than
+the average kinetic energy of the surrounding objects
 and will only harden through weak gravitational inter-
+
+IMBH Formation in Galactic Nuclei
+9
 actions with neighboring stars (see for example Figure
 6 in Rose et al. 2020).
-Not included in this study, collisions between the BH
-and other compact objects will increase the BH growth
-rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra-
-gione et al. 2021) and even neutron star BH mergers
+We note that we assume a steady-state and treat the
+stars as a reservoir in this model. Future work will take a
+more nuanced approach to the background stars, whose
+density as a function of time can be inﬂuenced by several
+factors. Firstly, the relaxation of the stellar population
+occurs on Gyr timescales. Some studies have suggested
+that in situ star formation can occur in the Galactic
+Center as close as 0.04 pc from the SMBH (e.g., Levin
+& Beloborodov 2003; Paumard et al. 2006), and star
+formation episodes can occur as often as every ∼5 Myr
+(e.g. Lu et al. 2009). Therefore, we expect that after
+the ﬁrst Gyr, stars within ≲0.01 pc will be replenished
+at intervals consistent with the star formation episodes;
+the infalling populations of stars are separated by ∼
+5−10 Myr, which is shorter than the collision timescale.
+However, star-star collisions may complicate this pic-
+ture within ∼0.01 pc. As discussed above, regular star
+formation ensures the BHs always have a stellar popula-
+tion to interact with outside of ∼0.01 pc.5 At 0.01 pc,
+however, the kinetic energy during a collision between
+two 1 M⊙stars is larger than their binding energies.
+Collisions can therefore thin out the stellar populations
+during the time it takes them to diﬀuse to these small
+radii, ≲0.01 pc, and may reduce the BH growth in the
+innermost region. We reserve the inclusion of star-star
+collisions for future work. We also note that the disrup-
+tion of binary stars by the SMBH may help replenish
+the stellar population even as collisions work to deplete
+it (e.g., Balberg et al. 2013); when a binary is disrupted,
+one of the stars is captured on a tightly bound orbit
+about the SMBH.
+An IMBH may also aﬀect the stellar density proﬁle.
+As it spirals into the SMBH, it can perturb stellar orbits,
+and these interactions can lead to hypervelocity stars
+(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt
+2008).
+L¨ockmann & Baumgardt (2008) show that an
+IMBH can modify an initially steep stellar density pro-
+ﬁle to become consistent with the ﬂatter cusp observed
+in the Galactic Center. The stars may then be replen-
+ished on 100 Myr timescales (Baumgardt et al. 2006a).
+Therefore, after the formation of the ﬁrst few IMBHs,
+subsequent BH growth may occur in bursts, coinciding
+with replenishment of the stars.
+While there are many competing dynamical processes
+that shape the stellar density proﬁle, we stress that α
+5 In fact, the star-star collision timescale is greater than 10 Myr
+for the entire parameter space, save at 0.001 pc for larger values
+of α; the BH-star collision timescale plotted in Fig. 1 is the same
+order of magnitude as the star-star collision timescale.
+can simply be chosen to encapsulate all of the relevant
+physics. A value for α that is constrained by observa-
+tions must already reﬂect ongoing processes like star-
+star collisions and replenishment. Sch¨odel et al. (2018)
+ﬁnd the observed stellar mass enclosed within 0.01 pc of
+the Milky Way’s Galactic Center to be approximately
+180 M⊙. This estimate is consistent to order of magni-
+tude with our α = 1.25 case. In a simulation like those
+depicted in Figure 4, which include relaxation, α = 1.25
+leads to a maximum IMBH mass of 140 M⊙. Further-
+more, while the stellar mass within 0.01 pc may be a
+few hundred M⊙, Do et al. (2019) and GRAVITY Col-
+laboration et al. (2020) set an upper limit on the mass
+enclosed within the orbit of S0-2 to be about a few thou-
+sand M⊙, or 0.1% of the central mass. This upper limit
+can include mass that was previously in stars but is now
+in BHs. In that case, the 180 M⊙is what remains of the
+stars, while BHs and IMBHs make up the ∼1000 M⊙
+in the innermost region.
+Also not included in this study, collisions between the
+BH and other compact objects will increase the BH
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+Fragione et al. 2021) and even neutron star BH mergers
 (e.g., Hoang et al. 2020) become more likely as the BHs
 increase in mass through stellar collisions. As a result,
 the BH-BH collision timescale, discussed in Section 2.2,
@@ -693,45 +864,56 @@ BHs to grow through this channel in addition to stel-
 lar collisions. Additionally, this compact object mergers
 result in GW recoil, which may have a large impact on
 the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
-2021)
+2021).
 The BH’s mass growth increases GW emission, which
-dissipates energy from the orbit. Along with relaxation
-processes, GW emission causes BHs to sink towards the
-SMBH and eventually undergo a merger. As a result,
-the GN environment is conducive to the formation of
-EMRIs and IMRIs. The GW emission from EMRIs and
-IMRIs is expected to be at mHz frequencies, making
-them promising candidates for LISA to observe. While
-the exact rate calculation is beyond the scope of this
-study, the mechanism outlined here seems very promis-
-ing.
-Our results also suggest that IMBHs are likely to ex-
-ists in many galactic nuclei, as well as within our own
-galactic center.
-This implication seems to be consis-
-tent with recent observational and theoretical studies
-(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004;
-G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
-& Liu 2013; Generozov & Madigan 2020; Fragione et al.
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-Collaboration et al. 2020).
+dissipates energy from the orbit. Along with relaxation,
+GW emission causes BHs to sink towards the SMBH
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
+and IMRIs.
+The GW emission from EMRIs and IM-
+RIs is expected to be at mHz frequencies, making them
+promising candidates for LISA to observe. While the
+exact rate calculation is beyond the scope of this study,
+the mechanism outlined here seems very promising.
+Our results also suggest that BHs within the mass gap
+as well as IMBHs likely exist in many galactic nuclei, as
+well as within our own galactic center. This implication
+seems to be consistent with recent observational and
+theoretical studies (e.g., Hansen & Milosavljevi´c 2003;
+Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris
+& Merritt 2009; Chen & Liu 2013; Generozov & Madi-
+gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz
+et al. 2020; GRAVITY Collaboration et al. 2020).
+
+10
+Rose et al.
 Lastly, the collisions between stellar mass BHs and
 stars may contribute to the x-ray emission from our
-galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al.
-2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter-
-actions, in particular grazing collisions, may also result
-in tidal disruption events (e.g., Perets et al. 2016; Sam-
-sing et al. 2019; Kremer et al. 2021). Thus, the process
-outlined here may produce electromagnetic signatures
-in addition to GW mergers.
-SR thanks the Charles E Young fellowship, the Nina
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre-
+mer et al. (2022) for a discussion of electromagnetic sig-
+natures from BH-star collisions)6. These interactions,
+in particular grazing collisions, may also result in tidal
+disruption events (e.g., Baumgardt et al. 2006b; Perets
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre-
+mer et al. 2021). Thus, the process outlined here may
+produce electromagnetic signatures in addition to GW
+mergers.
+We thank the anonymous referee for useful comments.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use-
+ful discussion.
+SR thanks the Charles E. Young Fellowship, the Nina
 Byers Fellowship, and the Michael A. Jura Memorial
 Graduate Award for support. SR and SN acknowledge
 the partial support from NASA ATP 80NSSC20K0505.
 SN thanks Howard and Astrid Preston for their gener-
 ous support. IL thanks support from the Adams Fellow-
 ship. SN and RS thank the Bhaumik Institute visitor
-program.
+program. This work was performed in part at the As-
+pen Center for Physics, which is supported by National
+Science Foundation grant PHY-1607611.
 REFERENCES
 Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
 PhRvL, 116, 241102,
@@ -740,22 +922,42 @@ doi: 10.1103/PhysRevLett.116.241102
 doi: 10.1103/PhysRevLett.118.221101
 —. 2017b, PhRvL, 119, 141101,
 doi: 10.1103/PhysRevLett.119.141101
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
+doi: 10.3847/2041-8205/830/1/L1
 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
 doi: 10.1088/0004-637X/780/2/148
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
+doi: 10.1007/s41114-018-0013-8
+6 The connection between the observed X-ray sources at the Galac-
+tic Center and tidal capture has been suggested by Generozov
+et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
+alternative channels.
 Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
 2021, arXiv e-prints, arXiv:2109.12119.
 https://arxiv.org/abs/2109.12119
 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
 doi: 10.1086/154711
-5 The connection between the observed X-ray sources at the Galac-
-tic Center and tidal capture has been suggested by Generozov
-et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
-alternative channels.
 Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
 043002, doi: 10.1103/PhysRevD.102.043002
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+doi: 10.1093/mnrasl/slt071
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+2006a, MNRAS, 372, 174,
+doi: 10.1111/j.1365-2966.2006.10818.x
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+Makino, J. 2006b, MNRAS, 372, 467,
+doi: 10.1111/j.1365-2966.2006.10885.x
 Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
 613, 1143, doi: 10.1086/423299
+Begelman, M. C. 1979, MNRAS, 187, 237,
+doi: 10.1093/mnras/187.2.237
+—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3
+
+IMBH Formation in Galactic Nuclei
+11
+—. 2012b, MNRAS, 420, 2912,
+doi: 10.1111/j.1365-2966.2011.20071.x
 Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
 MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
 Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
@@ -768,17 +970,20 @@ Binney, J., & Tremaine, S. 1987, Galactic dynamics
 —. 2008, Galactic Dynamics: Second Edition
 Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
 doi: 10.1086/342655
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433,
+doi: 10.1093/mnras/179.3.433
 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
 427, doi: 10.1086/500727
 Bondi, H. 1952, MNRAS, 112, 195,
 doi: 10.1093/mnras/112.2.195
-
-IMBH Formation in Galactic Nuclei
-9
 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
 doi: 10.1093/mnras/104.5.273
 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
 2042, doi: 10.1093/mnras/stx1007
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+860, 14, doi: 10.3847/1538-4357/aac2c4
 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
 C. 2012, JCAP, 2012, 054,
 doi: 10.1088/1475-7516/2012/07/054
@@ -796,16 +1001,35 @@ et al. 1996, Science, 272, 1286,
 doi: 10.1126/science.272.5266.1286
 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
 doi: 10.1086/156685
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
+doi: 10.1111/j.1365-2966.2005.09937.x
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
+2009, MNRAS, 393, 1016,
+doi: 10.1111/j.1365-2966.2008.14254.x
 Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
 MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+C. N. 2021a, MNRAS, 505, 2186,
+doi: 10.1093/mnras/stab1428
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+T. C. N. 2021b, MNRAS, 503, 1051,
+doi: 10.1093/mnras/stab402
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+2005, ApJ, 620, 878, doi: 10.1086/427142
 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
 MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
 MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+doi: 10.1126/science.aav8137
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+562, L19, doi: 10.1086/338118
 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
 110, 221101, doi: 10.1103/PhysRevLett.110.221101
 Edgar, R. 2004, NewAR, 48, 843,
 doi: 10.1016/j.newar.2004.06.001
+Escala, A. 2021, ApJ, 908, 57,
+doi: 10.3847/1538-4357/abd93c
 Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
 Monthly Notices of the Royal Astronomical Society, 443,
 2410, doi: 10.1093/mnras/stu1280
@@ -832,6 +1056,9 @@ doi: 10.3847/1538-4357/ab94bc
 Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
 J. P. 2018, MNRAS, 478, 4030,
 doi: 10.1093/mnras/sty1262
+
+12
+Rose et al.
 Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
 Modern Physics, 82, 3121,
 doi: 10.1103/RevModPhys.82.3121
@@ -863,25 +1090,41 @@ Dosopoulou, F. 2018, ApJ, 856, 140,
 doi: 10.3847/1538-4357/aaafce
 Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
 doi: 10.3847/1538-4357/abb66a
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
+doi: 10.1086/431475
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
+doi: 10.1086/529025
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
+2003, ApJ, 592, 1042, doi: 10.1086/375769
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
+106, doi: 10.1088/0004-637X/796/2/106
 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
 Royal Astronomical Society, 374, 1557,
 doi: 10.1111/j.1365-2966.2006.11275.x
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+https://arxiv.org/abs/2201.12368
 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
 doi: 10.3847/1538-4357/abeb14
 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
 45, doi: 10.3847/1538-4357/abb945
+Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
+doi: 10.1086/376675
 Limongi, M., & Chieﬃ, A. 2018a, ApJS, 237, 13,
 doi: 10.3847/1538-4365/aacb24
 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
+L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
+doi: 10.1111/j.1365-2966.2007.12699.x
 Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
 doi: 10.1093/mnras/stz036
-
-10
-Rose et al.
 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
 690, 1463, doi: 10.1088/0004-637X/690/2/1463
 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
 doi: 10.1086/319848
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+doi: 10.1046/j.1365-8711.1999.02853.x
 Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
 Mapelli, M., Bouﬀanais, Y., Santoliquido, F., Arca Sedda,
@@ -889,6 +1132,15 @@ M., & Artale, M. C. 2021a, arXiv e-prints,
 arXiv:2109.06222. https://arxiv.org/abs/2109.06222
 Mapelli, M., Dall’Amico, M., Bouﬀanais, Y., et al. 2021b,
 MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+doi: 10.1111/j.1365-2966.2006.10256.x
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+doi: 10.1086/422244
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+Narayan, R. 2014, MNRAS, 441, 3177,
+doi: 10.1093/mnras/stu762
 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
 doi: 10.1088/0034-4885/69/9/R01
 Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847,
@@ -898,17 +1150,38 @@ Muno, M. P., Pfahl, E., Baganoﬀ, F. K., et al. 2005, ApJL,
 622, L113, doi: 10.1086/429721
 Muno, M. P., Bauer, F. E., Baganoﬀ, F. K., et al. 2009,
 ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+618, 569, doi: 10.1086/426067
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+L18, doi: 10.3847/2041-8213/ac574b
 Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
 doi: 10.1088/0004-637X/795/2/102
 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
 L35, doi: 10.3847/2041-8213/ab4fed
+
+IMBH Formation in Galactic Nuclei
+13
 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
 888, L8, doi: 10.3847/2041-8213/ab5e3b
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+Curd, B. 2022, MNRAS, 511, 3795,
+doi: 10.1093/mnras/stac285
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005,
+ApJ, 628, 368, doi: 10.1086/430728
 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
 2127, doi: 10.1111/j.1365-2966.2009.14653.x
 O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
 & O’Shaughnessy, R. 2006, ApJ, 637, 937,
 doi: 10.1086/498446
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+D. 2010, ApJ, 722, 642,
+doi: 10.1088/0004-637X/722/1/642
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+doi: 10.1086/319042
+Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643,
+1011, doi: 10.1086/503273
 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
 Stephen R., J. 2016, ApJ, 823, 113,
 doi: 10.3847/0004-637X/823/2/113
@@ -926,6 +1199,8 @@ Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
 doi: 10.1088/0004-637X/780/2/187
 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
 A56, doi: 10.1051/0004-6361/202037710
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+MNRAS, doi: 10.1093/mnras/stac231
 Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
 Rasio, F. A. 2018, PhRvL, 120, 151101,
 doi: 10.1103/PhysRevLett.120.151101
@@ -939,6 +1214,8 @@ Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
 & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
 https://arxiv.org/abs/2009.01213
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
 100, 043009, doi: 10.1103/PhysRevD.100.043009
 Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
@@ -948,6 +1225,8 @@ Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K.
 doi: 10.1086/339917
 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
 doi: 10.1086/519309
+Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
+609, A27, doi: 10.1051/0004-6361/201730452
 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
 doi: 10.1086/156521
 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
@@ -963,6 +1242,10 @@ Spitzer, L. 1987, Dynamical evolution of globular clusters
 Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
 e-prints. https://arxiv.org/abs/1603.02709
 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
+Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017,
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+doi: 10.1093/mnras/stv2281
 The LIGO Scientiﬁc Collaboration, the Virgo
 Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
 arXiv:2009.01075. https://arxiv.org/abs/2009.01075
@@ -977,13 +1260,19 @@ Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
 G. N. 2021, MNRAS, 504, 146,
 doi: 10.1093/mnras/stab842
 
-IMBH Formation in Galactic Nuclei
-11
+14
+Rose et al.
 Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
 Breivik, K. 2021, ApJ, 917, 76,
 doi: 10.3847/1538-4357/ac088d
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+doi: 10.1086/379767
 Woosley, S. E. 2017, ApJ, 836, 244,
 doi: 10.3847/1538-4357/836/2/244
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+doi: 10.1046/j.1365-8711.2002.05532.x
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+doi: 10.1088/0004-637X/761/2/129
 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
 2014, Monthly Notices of the Royal Astronomical
 Society, 440, 1263, doi: 10.1093/mnras/stu351
diff --git a/read/results/pymupdf/2201.00037.txt b/read/results/pymupdf/2201.00037.txt
index 98cfad6..fba5233 100644
--- a/read/results/pymupdf/2201.00037.txt
+++ b/read/results/pymupdf/2201.00037.txt
@@ -51,7 +51,7 @@ symmetry axis are both coplanar with, and precess about, the normal to the Lapla
 its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot,
 2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is
 reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513
-yr with an inclination angle of I = 8.5330◦ between the orbit and Laplace plane normals [Ba-
+yr with an inclination angle of I = 8.5330◦between the orbit and Laplace plane normals [Ba-
 land et al., 2017]. Measurements of the obliquity εm, deﬁned as the angle of misalignment be-
 tween the spin-symmetry axis and the orbit normal, have been obtained by diﬀerent techniques,
 including ground based radar observations [Margot et al., 2007, 2012], and stereo digital ter-
@@ -102,7 +102,7 @@ mal to the Laplace plane (ˆeL
 8.5330◦. The symmetry axis of the mantle ˆep
 3 is oﬀset
 from ˆeI
-3 by εm ≈ 2 arcmin. ˆeI
+3 by εm ≈2 arcmin. ˆeI
 3 and ˆep
 3 are coplanar with, and precess about, ˆeL
 3 in a retrograde direction
@@ -121,7 +121,7 @@ This indicates that it is only the mantle that librates, and that the outer part
 These evidences do not necessarily imply that the whole of Mercury’s core is ﬂuid, but only that
 its outermost part must be. A solid inner core may have nucleated at the centre although its
 size is not well constrained. Inner core growth leads to planetary contraction, and the inferred
-radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an
+radial contraction of ∼7 km since the late heavy bombardment [Byrne et al., 2014] places an
 approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner
 core could be larger if a signiﬁcant fraction of its growth occurred earlier in Mercury’s history.
 –3–
@@ -216,30 +216,30 @@ CrMB and surface. The ﬂattenings at all interior boundaries are speciﬁed suc
 consistent with the observed degree 2 spherical harmonic coeﬃcients of gravity J2 and C22; their
 numerical values are given in Table 1. Speciﬁcally, J2 and C22 are connected to the principal
 moments of inertia of Mercury (C > B > A) and to the polar and equatorial ﬂattenings by
-J2 = C − ¯A
+J2 = C −¯A
 MR2 = 8π
 15
 1
 MR2
-�
-(ρs − ρf)r5
-sϵs + (ρf − ρm)r5
-fϵf + (ρm − ρc)r5
+
+(ρs −ρf)r5
+sϵs + (ρf −ρm)r5
+fϵf + (ρm −ρc)r5
 mϵm + ρcR5ϵr
-�
+
 ,
 (1a)
-C22 = B − A
+C22 = B −A
 4MR2 = 8π
 15
 1
 4MR2
-�
-(ρs − ρf)r5
-sξs + (ρf − ρm)r5
-fξf + (ρm − ρc)r5
+
+(ρs −ρf)r5
+sξs + (ρf −ρm)r5
+fξf + (ρm −ρc)r5
 mξm + ρcR5ξr
-�
+
 .
 (1b)
 where ¯A is the mean equatorial moment of inertia deﬁned below. The same procedure was used
@@ -291,15 +291,15 @@ Table 1.
 Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109
 m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4π
 3 ¯ρR3 = M. The numerical
-values of ϵr and ξr are calculated from ϵr = (¯a − c)/R and ξr = (a − b)/R, where ¯a = 1
+values of ϵr and ξr are calculated from ϵr = (¯a −c)/R and ξr = (a −b)/R, where ¯a = 1
 2(a + b) and where
 a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor
 axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J2 and C22 are
 computed from Equation (4) in the Supporting Information of Genova et al. [2019].
 and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon.
 Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog-
-raphy and the axes of the principal moments of inertia, which amount to a polar oﬀset of ∼ 2◦
-and an equatorial oﬀset of ∼ 15◦ [Perry et al., 2015].
+raphy and the axes of the principal moments of inertia, which amount to a polar oﬀset of ∼2◦
+and an equatorial oﬀset of ∼15◦[Perry et al., 2015].
 Once the densities and ﬂattenings of all interior regions are known, we can specify the mo-
 ments of inertia of the ﬂuid core (Cf > Bf > Af) and solid inner core (Cs > Bs > As)
 along with the mean equatorial moments of inertia
@@ -313,17 +313,17 @@ along with the mean equatorial moments of inertia
 From these, we deﬁne the polar (e, ef, es) and equatorial (γ, γs) dynamical ellipticities of the
 whole planet (no subscript), ﬂuid core (subscript f) and solid inner core (subscript s), which
 enter our rotational model,
-e = C − ¯A
+e = C −¯A
 ¯A
-ef = Cf − ¯Af
+ef = Cf −¯Af
 ¯Af
-es = Cs − ¯As
+es = Cs −¯As
 ¯As
 ,
 (3a)
-γ = B − A
+γ = B −A
 ¯A
-γs = Bs − As
+γs = Bs −As
 ¯As
 .
 (3b)
@@ -377,7 +377,7 @@ b)
 Figure 2.
 Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b)
 in a frame attached to the rotating mantle. The orbit normal (ˆeI
-3) is tilted by an angle I = 8.533◦ from
+3) is tilted by an angle I = 8.533◦from
 the Laplace normal (ˆeL
 3 ) and the symmetry axis of Mercury’s mantle (ˆep
 3) is tilted by an obliquity εm
@@ -394,7 +394,7 @@ mantle axes ˆep
 2 with respect to the Cassini plane. Viewed in the frame attached to the rotating
 mantle (b), the Cassini plane is rotating at frequency ωΩo
 =
-−Ωo − Ωp cos I in the longitudinal direc-
+−Ωo −Ωp cos I in the longitudinal direc-
 tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of
 illustration.
 –7–
@@ -415,7 +415,7 @@ plane varies on long timescales, but it can be taken as invariable in inertial s
 purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆeL
 3 and ˆeI
 3
-is the orbital inclination I = 8.5330◦ [Baland et al., 2017], the angle between ˆeI
+is the orbital inclination I = 8.5330◦[Baland et al., 2017], the angle between ˆeI
 3 and ˆep
 3 is the
 obliquity εm and the angle between ˆeL
@@ -429,9 +429,9 @@ fer to as the ‘mantle’ in the context of our rotational model. The rotation
 of the mantle are expected to remain in close alignment, but they do not coincide exactly. We
 deﬁne the rotation rate vector of the mantle by Ω, and its misalignment from ˆep
 3 by an angle
-θm. Note that θm ≪ εm and it is often the spin axis of Mercury which is used to deﬁne the
+θm. Note that θm ≪εm and it is often the spin axis of Mercury which is used to deﬁne the
 obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆep
-3 and Ω would
+3 and Ωwould
 characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and
 the angles I, εm and θm would completely describe the Cassini state. The presence of a ﬂuid
 outer core and solid inner core require three additional orientation vectors and angles. The sym-
@@ -439,9 +439,9 @@ metry axis of the inner core is deﬁned by unit vector ˆes
 3 and its misalignment from ˆep
 3 by an
 angle θn. The rotation vectors of the ﬂuid core and inner core are deﬁned as Ωf and Ωs, re-
-spectively, and their misalignment from the rotation vector of the mantle Ω are deﬁned by an-
+spectively, and their misalignment from the rotation vector of the mantle Ωare deﬁned by an-
 gles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close
-alignment, so θn ≈ θs. To be formal in our deﬁnition of the diﬀerent angles of misalignment,
+alignment, so θn ≈θs. To be formal in our deﬁnition of the diﬀerent angles of misalignment,
 for I deﬁned positive as depicted on Figure 2a, all angles are deﬁned positive in the clockwise
 direction.
 At equilibrium in the Cassini state, the three orientation vectors (ˆeI
@@ -455,14 +455,14 @@ in inertial space, the Cassini plane is rotating in a retrograde direction at fr
 in the frame attached to the mantle rotating at sidereal frequency Ωo, the Cassini plane is ro-
 tating in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles
 per Mercury day, is equal to
-ω = −1 − δω cos(θp) .
+ω = −1 −δω cos(θp) .
 (5)
 The factor δω = Ωp/Ωo = 4.933 × 10−7 is the Poincar´e number, expressing the ratio of the
 forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal
 as seen in the mantle frame is expressed as
 d
 dt ˆeL
-3 + Ω × ˆeL
+3 + Ω× ˆeL
 3 = 0 ,
 (6)
 or equivalently, by Equation (19e) of Stys and Dumberry [2018],
@@ -472,7 +472,7 @@ or equivalently, by Equation (19e) of Stys and Dumberry [2018],
 
 Conﬁdential manuscript submitted to JGR-Planets
 This expresses a formal connection between θp and θm which is independent of the interior struc-
-ture of Mercury. Using Equation (5) and cos(θm) → 1, this connection can be rewritten as
+ture of Mercury. Using Equation (5) and cos(θm) →1, this connection can be rewritten as
 sin(θm) = δω sin(θp) .
 (8)
 and thus the relative amplitudes of θm and θp depend of the Poincar´e number δω.
@@ -508,16 +508,16 @@ angle θm, as seen in the mantle frame, can be written as
 θm1(t) + iθm2(t) = ˜m exp[iωΩot] ,
 (10a)
 where
-˜m ≡ ˜m(ω) = Re[ ˜m] + iIm[ ˜m] ,
+˜m ≡˜m(ω) = Re[ ˜m] + iIm[ ˜m] ,
 (10b)
 is the amplitude at frequency ωΩo. Equivalent deﬁnitions apply for all other angles, with the
 connection as follows:
-θm ⇔ ˜m ,
-θf ⇔ ˜mf ,
-θs ⇔ ˜ms ,
-θn ⇔ ˜ns ,
-θp ⇔ ˜p ,
-εm ⇔ ˜εm .
+θm ⇔˜m ,
+θf ⇔˜mf ,
+θs ⇔˜ms ,
+θn ⇔˜ns ,
+θp ⇔˜p ,
+εm ⇔˜εm .
 (11)
 The notation ˜m, ˜mf, ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991].
 Note that all tilded amplitudes are complex: their imaginary part reﬂects the out-of-phase re-
@@ -530,7 +530,7 @@ real. We concentrate our analysis in this work on the real part of the solutions
 sponds to the mutual alignment of these ﬁve rotation angles in the Cassini plane. As such, ˜εm
 corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm,
 though we keep the tilde notation in the presentation of our results to emphasize that it rep-
-resents the real part of the solution from our system. Furthermore, since ˜m ≪ ˜εm, we often
+resents the real part of the solution from our system. Furthermore, since ˜m ≪˜εm, we often
 refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
 more customarily described in terms of the latter in the literature.
 The model of Mathews et al. [1991] is developed under the assumption of small angles as
@@ -539,40 +539,40 @@ rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. T
 tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer-
 cury, the ﬂuid core, and the inner core in the reference frame of the rotating mantle. These three
 equations are
-(ω − e) ˜m + (1 + ω)
-� ¯Af
+(ω −e) ˜m + (1 + ω)
+" ¯Af
 ¯A ˜mf +
 ¯As
 ¯A ˜ms + α3es
 ¯As
 ¯A ˜ns
-�
+#
 =
 1
 iΩ2o ¯A
-�
+
 ˜Γsun
-�
+
 ,
 (12a)
-ω ˜m + (1 + ω + ef) ˜mf − ωα1es
+ω ˜m + (1 + ω + ef) ˜mf −ωα1es
 ¯As
 ¯Af
 ˜ns =
 1
 iΩ2o ¯Af
-�
-− ˜Γcmb − ˜Γicb
-�
+
+−˜Γcmb −˜Γicb
+
 ,
 (12b)
-(ω − α3es) ˜m + α1es ˜mf + (1 + ω) ˜ms + (1 + ω − α2) es˜ns =
+(ω −α3es) ˜m + α1es ˜mf + (1 + ω) ˜ms + (1 + ω −α2) es˜ns =
 1
 iΩ2o ¯As
-�
+
 ˜Γs
 sun + ˜Γicb
-�
+
 ,
 (12c)
 and a fourth equation consists of a kinematic relation that expresses the change in the orien-
@@ -584,25 +584,25 @@ and are given by
 α1 = ρf
 ρs
 ,
-α3 = 1 − α1 ,
-α2 = α1 − α3αg ,
+α3 = 1 −α1 ,
+α2 = α1 −α3αg ,
 (13a)
 where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied
 on the inner core,
 αg = 8πG
 5Ω2o
-[ρc(ϵr − ϵm) + ρm(ϵm − ϵf) + ρfϵf] ,
+[ρc(ϵr −ϵm) + ρm(ϵm −ϵf) + ρfϵf] ,
 (13b)
 where G is the gravitational constant.
 ˜Γsun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
 a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by
 ˜Γsun = −iΩ2
 o ¯A
-�
+
 φm˜εm +
 ¯As
 ¯A α3φs˜ns
-�
+
 ,
 (14)
 where
@@ -613,29 +613,29 @@ Conﬁdential manuscript submitted to JGR-Planets
 2
 n2
 Ω2o
-�
+
 G210 e + 1
 2G201 γ
-�
+
 ,
 (15a)
 φs = 3
 2
 n2
 Ω2o
-�
+
 G210 es + 1
 2G201 γs
-�
+
 ,
 (15b)
 and where G210 and G201 are functions of the orbital eccentricity ec,
 G210 =
 1
-(1 − e2c)3/2 ,
+(1 −e2c)3/2 ,
 (16a)
 G201 = 7
-2ec − 123
+2ec −123
 16 e3
 c + 489
 128e5
@@ -652,7 +652,7 @@ CMB and on the inner core at the ICB, respectively. These torques can be paramet
 terms of dimensionless complex coupling constants Kicb and Kcmb and the diﬀerential angu-
 lar velocities at each boundary [e.g Buﬀett, 1992; Buﬀett et al., 2002],
 ˜Γicb = iΩ2
-o ¯AsKicb( ˜mf − ˜ms) ,
+o ¯AsKicb( ˜mf −˜ms) ,
 (18a)
 ˜Γcmb = iΩ2
 o ¯AfKcmb ˜mf .
@@ -665,14 +665,14 @@ and this is provided by Equation (7). For small angles θm and θp, this gives [
 ˜m + (1 + ω)˜p = 0 .
 (19)
 For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This
-is because θp ≈ 8.567◦ whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the
+is because θp ≈8.567◦whereas ˜εm ≈2 arcmin and thus the latter obeys more strictly the
 condition of small angles assumed in our framework. Furthermore, the external torques act-
 ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜εm. Writ-
-ten in terms of ˜εm, and with the approximation of ˜εm ≪ 1 and ˜m ≪ 1, Equation (7) becomes
+ten in terms of ˜εm, and with the approximation of ˜εm ≪1 and ˜m ≪1, Equation (7) becomes
 ˜m + (1 + ω)˜εm = −(1 + ω) tan I .
 (20)
 Likewise, the frequency ω from Equation (5) can be written simply in terms of I,
-ω = −1 − δω cos I .
+ω = −1 −δω cos I .
 (21)
 The set of four Equations (12) with the addition of Equation (20) form a linear system
 of equations for the ﬁve rotational variables ˜m, ˜mf, ˜ms, ˜ns and ˜εm. It captures the response
@@ -692,7 +692,7 @@ and the elements of matrix M are
 M =
 
 
-ω − e
+ω −e
 (1 + ω)
 ¯
 Af
@@ -705,9 +705,8 @@ A
 ¯
 As¯
 A α3
-�
-(1 + ω)es + φs
-�
+ (1 + ω)es + φs
+
 φm
 ω
 1 + ω + ef + Kcmb +
@@ -726,10 +725,10 @@ As
 ¯
 Af
 0
-ω − α3es
-α1es − Kicb
+ω −α3es
+α1es −Kicb
 1 + ω + Kicb
-(1 + ω − α2)es + α3φs
+(1 + ω −α2)es + α3φs
 α3φs
 0
 0
@@ -785,18 +784,17 @@ Conﬁdential manuscript submitted to JGR-Planets
 The Cassini state of a single-body, rigid Mercury
 For a rigid planet with no ﬂuid and solid cores, our system of equations reduces to Equa-
 tions (12a) and (20),
-(ω − e) ˜m + φm ˜εm = 0 ,
+(ω −e) ˜m + φm ˜εm = 0 ,
 (23a)
 ˜m + (1 + ω)˜εm = −(1 + ω) tan I .
 (23b)
-Using Equation (21), δω ≪ 1, and the approximation ¯A(1+e+δω cos I) = C + ¯Aδω cos I ≈
+Using Equation (21), δω ≪1, and the approximation ¯A(1+e+δω cos I) = C + ¯Aδω cos I ≈
 C, these can be written as
 C ˜m = ¯Aφm ˜εm ,
 (24a)
 ˜m = δω
-�
-sin I + cos I ˜εm
-�
+ sin I + cos I ˜εm
+
 .
 (24b)
 Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330◦, δω =
@@ -804,9 +802,8 @@ Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330
 than ˜εm: the oﬀset of the rotation axis of the mantle with respect to its symmetry axis is very
 small. Substituting Equation (24b) in Equation (24a) gives
 CΩp
-�
-sin I + cos I ˜εm
-�
+ sin I + cos I ˜εm
+
 = ¯AΩoφm˜εm ,
 (25)
 and isolating for ˜εm,
@@ -822,7 +819,7 @@ CΩp sin I
 −CΩp cos I + nMR2 (G210J2 + 2G201C22) .
 (27)
 This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1
-[see for instance Equation (1) of Baland et al., 2017, where their deﬁnition of ˙Ω is equal to −Ωp].
+[see for instance Equation (1) of Baland et al., 2017, where their deﬁnition of ˙Ωis equal to −Ωp].
 Hence, in the absence of a ﬂuid core and inner core, our system retrieves the Cassini state of
 Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo-
 ment of inertia ˆC,
@@ -845,9 +842,9 @@ cession of Mercury. As seen in the inertial frame, its frequency is given by
 Conﬁdential manuscript submitted to JGR-Planets
 ωfp = nMR2
 C
-�
+
 G210J2 + 2G201C22
-�
+
 ,
 (29)
 which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com-
@@ -870,47 +867,43 @@ the free precession period is much shorter than the forcing period of 325 kyr. U
 The obliquity of Mercury is thus determined by how the forcing frequency Ωp compares with
 the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassini state 1 [Peale,
 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant
-ampliﬁcation if Ωp ≈ ωfp. Since ωfp ≫ Ωp, resonant ampliﬁcation is minimal and the re-
-sulting obliquity, ˜εm ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5◦.
+ampliﬁcation if Ωp ≈ωfp. Since ωfp ≫Ωp, resonant ampliﬁcation is minimal and the re-
+sulting obliquity, ˜εm ≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦.
 2.3.2
 The misalignment of the ﬂuid and solid cores
-With ω = −1 − δω cos I and δω ≪ 1, Equation (12d) gives ˜ns ≈ ˜ms; as for the mantle,
+With ω = −1 −δω cos I and δω ≪1, Equation (12d) gives ˜ns ≈˜ms; as for the mantle,
 the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state.
 The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure,
 so it remains unchanged when a ﬂuid and a solid cores are present. Substituting it in Equa-
 tion (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes
 CΩp
-�
-sin I + cos I ˜εm
-�
-+ ( ¯Af cos I Ωp) ˜mf + ¯As(cos I Ωp − Ωoα3φs)˜ns = ¯AΩoφm˜εm .
+ sin I + cos I ˜εm
+
++ ( ¯Af cos I Ωp) ˜mf + ¯As(cos I Ωp −Ωoα3φs)˜ns = ¯AΩoφm˜εm .
 (31)
 This latter equation shows how the misaligned inner core and ﬂuid core can lead to a modiﬁ-
 cation of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by
-˜ns ≈ Ωp
+˜ns ≈Ωp
 κλs
-�
-1 + Ωo(Kicb − α1es)
+
+1 + Ωo(Kicb −α1es)
 λf
-� �
-sin I + cos I ˜εm
-�
-− Ωoα3φs
+  sin I + cos I ˜εm
+
+−Ωoα3φs
 κλs
 ˜εm ,
 (32a)
-˜mf ≈ Ωp
+˜mf ≈Ωp
 λf
-�
-sin I + cos I ˜εm
-�
+ sin I + cos I ˜εm
+
 + Ωo
 λf
 ¯As
 ¯Af
-�
-Kicb − α1es
-�
+ Kicb −α1es
+
 ˜ns ,
 (32b)
 where
@@ -919,40 +912,39 @@ where
 ¯Af
 Ω2
 o
-�
-Kicb − α1es
-�2
+ Kicb −α1es
+2
 λs λf
 ,
 (33a)
-λf = ¯σf − Ωp cos I ,
+λf = ¯σf −Ωp cos I ,
 (33b)
-λs = ¯σs − Ωp cos I ,
+λs = ¯σs −Ωp cos I ,
 (33c)
 –14–
 
 Conﬁdential manuscript submitted to JGR-Planets
 and where we have introduced the frequencies
 ¯σf = Ωo
-�
+
 ef + Kcmb +
 ¯As
 ¯Af
 Kicb
-�
+
 ,
 (33d)
 ¯σs = Ωo
-�
-esα3αg − esα1 + α3φs + Kicb
-�
+
+esα3αg −esα1 + α3φs + Kicb
+
 .
 (33e)
 These solutions are good approximations for all the results that we present in section 3. For
 an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they pro-
 vide useful predictions of ˜ns and ˜mf.
 In the limit of a very strong coupling between the ﬂuid core, solid core and mantle, ¯σs ≫
-Ωp and ¯σf ≫ Ωp, so that ˜ns → 0, ˜mf → 0 and Equation (31) reverts back to Equation (25)
+Ωp and ¯σf ≫Ωp, so that ˜ns →0, ˜mf →0 and Equation (31) reverts back to Equation (25)
 for a rigid planet. In the opposite limit of no coupling between the ﬂuid core, solid core and
 mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM cou-
 pling, Kcmb = Kicb = 0), then
@@ -962,17 +954,16 @@ pling, Kcmb = Kicb = 0), then
 ˜mf = ˜ns = −(tan I + ˜εm) .
 (34)
 Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm =
-C − ¯Af − ¯As, we obtain
+C −¯Af −¯As, we obtain
 Cm Ωp
-�
-sin I + cos I ˜εm
-�
+ sin I + cos I ˜εm
+
 = ¯AΩoφm˜εm .
 (35)
 which describes, as expected, a forced precession of the mantle alone. If this was the case for
-Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈ 0.88 arcmin, substantially smaller
-than the observed obliquity of ˜εm ≈ 2 arcmin.
-If ¯σf ≈ Ωp (and thus λf → 0) and/or ¯σs ≈ Ωp (and thus λs → 0) resonant ampliﬁca-
+Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈0.88 arcmin, substantially smaller
+than the observed obliquity of ˜εm ≈2 arcmin.
+If ¯σf ≈Ωp (and thus λf →0) and/or ¯σs ≈Ωp (and thus λs →0) resonant ampliﬁca-
 tion leads to large amplitudes for ˜mf, ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and
 ¯σs are closely related to the FCN and FICN frequencies ωfcn and ωficn, respectively. Hence,
 just as a large mantle obliquity can result from resonant ampliﬁcation when the forcing frequency
@@ -981,8 +972,8 @@ onant ampliﬁcation when the forcing frequency approaches the FCN or FICN frequ
 frequencies depend on the interior density structure and are not known. However, we will show
 that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of
 a few hundred yr. This is suﬃciently far from the forcing period (325 kyr) that we do not ex-
-pect an important ampliﬁcation eﬀect. Furthermore, since ωfcn, ωficn ≫ Ωp, then ¯σf ≫ Ωp
-and ¯σs ≫ Ωp, and we are in the strong coupling limit. The mantle obliquity should be close
+pect an important ampliﬁcation eﬀect. Furthermore, since ωfcn, ωficn ≫Ωp, then ¯σf ≫Ωp
+and ¯σs ≫Ωp, and we are in the strong coupling limit. The mantle obliquity should be close
 to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜mf and
 ˜ns should be of the order of ˜εm or smaller. This further justiﬁes the assumption of small an-
 gles that we have adopted.
@@ -1009,7 +1000,7 @@ Cm
 1 + ζ ,
 (36)
 where
-f(ec) = 1 − 11e2
+f(ec) = 1 −11e2
 c + 959
 48 e4
 c ,
@@ -1120,29 +1111,29 @@ nal torque. As shown by Baland et al. [2019], the external torque allow solid re
 a free motion in inertial space thereby aﬀecting the free rotational modes. To a good approx-
 imation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0
 are given by
-ωfcn ≈ −Ωo
-�
+ωfcn ≈−Ωo
+
 ¯A
 ¯Am + ¯As
-� �
+ 
 ef + φm
-�
+
 + Ωo
 efφm
 (ef + φm) ,
 (38a)
-ωficn ≈ Ωo
-� ¯A + ¯As
-¯A − ¯As
-� �
-esα1 − esα3αg − α3φs
-�
+ωficn ≈Ωo
+ ¯A + ¯As
+¯A −¯As
+ 
+esα1 −esα3αg −α3φs
+
 .
 (38b)
 The expression of the FICN frequency involves the inertial torque (term esα1) and the grav-
 itational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core.
 For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 =
-0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg ≫ α1;
+0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg ≫α1;
 the gravitational torque dominates the inertial torque, in large part because of the slow rota-
 tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion
 is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and
@@ -1151,16 +1142,16 @@ and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approxi
 –17–
 
 Conﬁdential manuscript submitted to JGR-Planets
-sion for the FICN diﬀers by a factor ( ¯A+ ¯As)/( ¯A− ¯As) compared to that given in Dumberry
+sion for the FICN diﬀers by a factor ( ¯A+ ¯As)/( ¯A−¯As) compared to that given in Dumberry
 and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon.
 The expression for FCN frequency diﬀers from the usual expression for Earth. First, it
 involves the external torque from the Sun captured by the parameter φm. If we set φm = 0,
 we obtain the FCN frequency for a decoupled model in which only interior torques contribute,
-ωfcn,int ≈ −Ωo
-�
+ωfcn,int ≈−Ωo
+
 ¯A
 ¯Am + ¯As
-�
+
 ef .
 (38c)
 This frequency is slightly diﬀerent from the usual expression for Earth, involving the ratio ¯A/( ¯Am+
@@ -1189,7 +1180,7 @@ of the ﬂuid core spin axis from the mantle is signiﬁcant: ˜mf is approximat
 a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin
 at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which
 coincides closely with the symmetry axis), so the obliquity of the spin axis of the ﬂuid core with
-respect to the orbit normal is ˜εm+ ˜mf ≈ 6 arcmin. The reason why the obliquity of the spin
+respect to the orbit normal is ˜εm+ ˜mf ≈6 arcmin. The reason why the obliquity of the spin
 axis of the ﬂuid core is larger than that of the mantle can be understood from Equation (32b),
 which shows that ˜mf is determined by the resonant ampliﬁcation of the FCN mode at the forc-
 ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the
@@ -1274,29 +1265,29 @@ C′ = C + ¯Acχ ,
 where ¯Ac = ¯Af + ¯As and
 χ = Ωp cos I
 ¯Ac
-�
+
 ¯Af
-(¯σf − Ωp cos I) +
+(¯σf −Ωp cos I) +
 ¯As
-(¯σs − Ωp cos I)
-�
+(¯σs −Ωp cos I)
+
 −
 ¯As
 ¯Ac
 Ωoα3φs
-(¯σs − Ωp cos I) .
+(¯σs −Ωp cos I) .
 (41)
 The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN
 and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then
 how the core is entrained to precess with the mantle, with the coupling between the two ex-
 pressed in terms of the resonant ampliﬁcation of the FCN and FICN frequencies. In the limit
-of ¯σf, ¯σs → 0, then χ = −1, C′ = Cm, the core is fully decoupled from the mantle and we
-retrieve Equation (35). If instead ¯σf, ¯σs → ∞, then χ = 0, C′ = C and we retrieve the pre-
+of ¯σf, ¯σs →0, then χ = −1, C′ = Cm, the core is fully decoupled from the mantle and we
+retrieve Equation (35). If instead ¯σf, ¯σs →∞, then χ = 0, C′ = C and we retrieve the pre-
 diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp,
 as is the case here, resonant ampliﬁcation is weak, χ is small and positive, C′ > C and this
 leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is grav-
 itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the
-misalignment of the ﬂuid core. In Equation (41), ¯σs ≫ ¯σf, so to a good approximation
+misalignment of the ﬂuid core. In Equation (41), ¯σs ≫¯σf, so to a good approximation
 –19–
 
 Conﬁdential manuscript submitted to JGR-Planets
@@ -1304,9 +1295,9 @@ Conﬁdential manuscript submitted to JGR-Planets
 ¯Af
 ¯Ac
 Ωo cos I
-(¯σf − Ωp cos I) .
+(¯σf −Ωp cos I) .
 (42)
-For a small inner core, χ ≈ 7.55×10−3. As the inner core grows, ¯Af decreases, and the com-
+For a small inner core, χ ≈7.55×10−3. As the inner core grows, ¯Af decreases, and the com-
 bination ¯Acχ also decreases. This implies that C′ decreases with inner core size and, consequently,
 ˜εm also decreases with inner core size, as seen in Figure 4a, though it remains larger than the
 prediction for a rigid planet.
@@ -1317,13 +1308,13 @@ density ρc and its thickness h. Changing ρs, ρc and/or h requires a diﬀeren
 ρm and rf in order to match M, ˆC and ˆCm. In turn, this leads to diﬀerent ellipticities at in-
 terior boundary in order to match J2 and C22, and thus diﬀerent predictions for ˜εm, ˜mf and
 ˜ns. To illustrate this, we show on Figure 4 two additional predictions computed with crustal
-thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼ 0.025%, but
-the changes in ˜mf and ˜ns are more substantial, ∼ 5% and ∼ 10%, respectively.
+thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼0.025%, but
+the changes in ˜mf and ˜ns are more substantial, ∼5% and ∼10%, respectively.
 We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment
 of inertia of the whole planet, which we denote by ˜εg. A diﬀerence between ˜εg and ˜εm occurs
 if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core
 (with ˜ns assumed small) leads to an oﬀ-diagonal component of the moment of inertia tensor
-of (Cs− ¯As)α3˜ns = ¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that
+of (Cs−¯As)α3˜ns = ¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that
 the moment of inertia of the whole planet is purely diagonal is ( ¯Asesα3˜ns)/( ¯Ae), and hence a
 good approximation of ˜εg is
 ˜εg = ˜εm +
@@ -1345,21 +1336,21 @@ Kcmb =
 πρfr4
 f
 ¯Af
-� ν
+r ν
 2Ωo
-�
-0.195 − 1.976i
-�
+
+0.195 −1.976i
+
 ,
 (44a)
 Kicb = πρfr4
 s
 ¯As
-� ν
+r ν
 2Ωo
-�
-0.195 − 1.976i
-�
+
+0.195 −1.976i
+
 ,
 (44b)
 where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte-
@@ -1372,19 +1363,19 @@ Conﬁdential manuscript submitted to JGR-Planets
 The above parameterizations are valid only under the assumption that the ﬂow in the bound-
 ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds
 number Re = rf∆uf/ν, associated with the diﬀerential velocity ∆uf = rfΩo ˜mf at the CMB.
-For rf = 2000 km, and taking ˜mf = 4 arcmin ≈ 0.001 rad from the results in the previous
-section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109. Such a large Reynolds number indicates
+For rf = 2000 km, and taking ˜mf = 4 arcmin ≈0.001 rad from the results in the previous
+section, we get ∆uf ∼2 mm/s and Re ∼6 × 109. Such a large Reynolds number indicates
 that the viscous friction between the ﬂuid core and mantle should induce turbulent ﬂows, as
 is the case for the Cassini state of the Moon [Yoder, 1981; Williams et al., 2001; C´ebron et al.,
 2019]. For a boundary layer that involves turbulent ﬂows, the viscous torque should be inde-
 pendent of the ﬂuid viscosity and proportional to the square of the diﬀerential velocity. The
 coupling constant Kcmb should be in the form
 Kcmb = fcmb
-�� ˜mf
-��
-�
-0.195 − 1.976i
-�
+ ˜mf
+
+
+0.195 −1.976i
+
 ,
 (45)
 where fcmb is a numerical factor that depends among other things on surface roughness. In-
@@ -1400,8 +1391,8 @@ by ﬁtting a rotation model to the librations of the Moon observed by Lunar Las
 of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1 [Williams
 and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K
 and Kcmb is
-���Im[Kcmb]
-��� = K
+Im[Kcmb]
+ = K
 CL
 CL
 CfL
@@ -1410,14 +1401,14 @@ CfL
 ,
 (46)
 where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6 s−1 the lunar
-rotation rate. With CfL/CL ∼ 7 × 10−4 [e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼
+rotation rate. With CfL/CL ∼7 × 10−4 [e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼
 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and as-
-suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2
+suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈5 × 10−4 m2
 s−1, about 500 times larger than the laminar viscosity. Note that the diﬀerential velocity at the
 CMB of the Moon is closer to 3 cm/s [Yoder, 1981; Williams et al., 2001], more than 10 times
 larger than our estimate for Mercury above. Since the eﬀective turbulent coupling constant Kcmb
 is proportional to the diﬀerential velocity, the eﬀective turbulent viscosity appropriate for Mer-
-cury should be smaller. Thus, ν ≈ 5×10−4 m2 s−1 gives a conservative upper bound for the
+cury should be smaller. Thus, ν ≈5×10−4 m2 s−1 gives a conservative upper bound for the
 possible eﬀective turbulent viscosity that can be expected for Mercury.
 Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for diﬀerent choices
 of eﬀective viscosities. For ν = 10−5 m2 s−1, viscous coupling is too weak to aﬀect ˜εm and
@@ -1428,7 +1419,7 @@ of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet.
 viscous coupling model is diﬀerent than the one used by Peale et al. [2014], our results for ˜εm
 and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the oﬀset of the
 ﬂuid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis-
-cosity that we have identiﬁed above (i.e ν ≈ 5 × 10−4 m2 s−1), the inﬂuence of viscous cou-
+cosity that we have identiﬁed above (i.e ν ≈5 × 10−4 m2 s−1), the inﬂuence of viscous cou-
 –21–
 
 Conﬁdential manuscript submitted to JGR-Planets
@@ -1529,76 +1520,76 @@ has been developed in a few studies [e.g. Buﬀett, 1992; Buﬀett et al., 2002;
 by Br =
 √
 3
-�
+
 Bd
 r
-�
+
 cos θ, where
-�
+
 Bd
 r
-�
+
 is the r.m.s. strength of the ﬁeld, the coupling constant
 Kcmb can be written is the form
-Kcmb = 3(1 − i)Fcmb
-�
+Kcmb = 3(1 −i)Fcmb
+
 Bd
 r
-�2 ,
+2 ,
 (47)
 where
 Fcmb =
 1
 Ωoρfrf
-�
+
 1
 σmδm
 +
 1
 σfδf
-�−1
+−1
 ,
 (48)
 and where σm, δm =
-�
+p
 2/(σmµΩo) and σf, δf =
-�
+p
 2/(σfµΩo) are the electrical conductivi-
 ties and magnetic skin depths in the mantle and ﬂuid core, respectively, with µ = 4π ×10−7
 N A−2 the magnetic permeability of free space. The r.m.s. ﬁeld strength
-�
+
 Bd
 r
-�
+
 is connected to
 the Gauss coeﬃcient g0
 1 of the surface magnetic ﬁeld by
-�
+
 Bd
 r
-�
+
 =
 2
 √
 3
-� R
+ R
 rf
-�3 ��g0
+3 g0
 1
-�� .
+ .
 (49)
 We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity
 of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding
-to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1 [Constable, 2015]. In con-
-trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S
-m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)−1 ≫ (σfδf)−1. Tak-
+to the CMB of Mercury is in the range of σm ∼0.01 −1 S m−1 [Constable, 2015]. In con-
+trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼106 S
+m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)−1 ≫(σfδf)−1. Tak-
 ing σm = 1 S m−1,
-��g0
+g0
 1
-�� = 190 nT for Mercury’s dipole ﬁeld [Anderson et al., 2012], rf =
-2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈ (3.1 × 10−11) · (1 − i). To put this amplitude
+ = 190 nT for Mercury’s dipole ﬁeld [Anderson et al., 2012], rf =
+2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈(3.1 × 10−11) · (1 −i). To put this amplitude
 in perspective, taking a molecular viscosity of ν = 10−6 m2 s−1 in Equation (44a) gives a vis-
-cous coupling constant of Kcmb ≈ (6.0 × 10−7) · (0.195 − 1.976i). Hence, EM coupling at the
+cous coupling constant of Kcmb ≈(6.0 × 10−7) · (0.195 −1.976i). Hence, EM coupling at the
 CMB is much weaker than viscous coupling, even if we include other spherical harmonic com-
 ponents of the radial magnetic ﬁeld.
 EM coupling can be enhanced if strongly stratiﬁed pockets of core ﬂuid are trapped by
@@ -1606,22 +1597,22 @@ CMB cavities [Buﬀett, 2010; Glane and Buﬀett, 2018], in which case the eﬀe
 closer to σf. Likewise, σm can be increased if a more electrically conducting layer has formed
 at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction
 of solid FeS crystals precipitating out of the ﬂuid core [e.g. Hauck et al., 2013]. However, even
-in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains
+in the extreme case of σm = σf = 106 S m−1, Kcmb ≈(1.6 × 10−8) · (1 −i), which remains
 –23–
 
 Conﬁdential manuscript submitted to JGR-Planets
-smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces
+smaller by a factor ∼60 than the smallest possible viscous coupling constant. Viscous forces
 dominate the tangential stress on the CMB of Mercury.
 At the ICB, because we can expect the electrical conductivity in both the solid inner core
 and ﬂuid core to be similar, and because the radial magnetic ﬁeld is likely much stronger, EM
 coupling can be much larger and dominate viscous coupling. We assume that the magnetic ﬁeld
 morphology at the ICB is dominantly comprised of small spatial scales for example as predicted
 by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in
-terms of an equivalent uniform radial magnetic ﬁeld ⟨Br⟩ capturing its r.m.s. strength [Buf-
+terms of an equivalent uniform radial magnetic ﬁeld ⟨Br⟩capturing its r.m.s. strength [Buf-
 fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the
 ﬂuid and solid core, the coupling constant Kicb can be written in the form
 Kicb = 5
-4(1 − i)Ficb ⟨Br⟩2 ,
+4(1 −i)Ficb ⟨Br⟩2 ,
 (50)
 where
 Ficb =
@@ -1630,29 +1621,29 @@ Ficb =
 ,
 (51)
 and where δ =
-�
+p
 2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to
 rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based
-on the r.m.s. strength ⟨Br⟩ rather than a true ﬁeld morphology tends to overestimate the strength
+on the r.m.s. strength ⟨Br⟩rather than a true ﬁeld morphology tends to overestimate the strength
 of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic
 ﬁeld at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are
-absorbed in the range of possible ⟨Br⟩ values.
+absorbed in the range of possible ⟨Br⟩values.
 The parametrization of Equation (50) is only valid in a ’weak ﬁeld’ regime [Buﬀett et al.,
 2002], when the feedback from the Lorentz force on the ﬂow in the ﬂuid core can be neglected.
-When ⟨Br⟩ is suﬃciently large, this is no longer the case. EM coupling then enters a ’strong
+When ⟨Br⟩is suﬃciently large, this is no longer the case. EM coupling then enters a ’strong
 ﬁeld’ regime [Buﬀett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which
-Kicb increases linearly with ⟨Br⟩ instead of quadratically. A good approximation of Kicb cal-
+Kicb increases linearly with ⟨Br⟩instead of quadratically. A good approximation of Kicb cal-
 culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012],
 KE
-icb = (0.175 − i0.138) ⟨Br⟩ ,
+icb = (0.175 −i0.138) ⟨Br⟩,
 (52)
-where ⟨Br⟩ is in units of Tesla. The superscript E emphasizes that the numerical factors are
+where ⟨Br⟩is in units of Tesla. The superscript E emphasizes that the numerical factors are
 appropriate for the parameter values adopted for Earth in the computation of Dumberry and
 Koot [2012]. To adapt these numerical factors to Mercury, we write,
-Kicb = (0.175 − i0.138)Ficb
+Kicb = (0.175 −i0.138)Ficb
 FE
 icb
-⟨Br⟩ ,
+⟨Br⟩,
 (53)
 where FE
 icb is deﬁned as in Equation (51) but using the parameters for Earth as deﬁned in Dumb-
@@ -1661,7 +1652,7 @@ km, σ = 5 × 105 S m−1, which gives FE
 icb = 90.36 T−2.
 To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1 in the core of
 Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and
-strong ﬁeld regime occurs when ⟨Br⟩ ≈ 1.53 mT for the real part of Kicb. ⟨Br⟩ at the ICB
+strong ﬁeld regime occurs when ⟨Br⟩≈1.53 mT for the real part of Kicb. ⟨Br⟩at the ICB
 of Mercury is unknown. The dynamo model of Christensen [2006] showed that the ﬁeld geom-
 etry inside the core could be dominated by small length scales, yet only the weaker lower har-
 monics of the ﬁeld would penetrate through a thermally stratiﬁed layer in the upper region of
@@ -1669,21 +1660,21 @@ monics of the ﬁeld would penetrate through a thermally stratiﬁed layer in th
 
 Conﬁdential manuscript submitted to JGR-Planets
 the ﬂuid core and reach the surface. If so, the ﬁeld strength inside the core can exceed the sur-
-face ﬁeld strength by a factor 1000. Taking a surface ﬁeld strength equal to ∼ 300 nT [e.g An-
-derson et al., 2012], ⟨Br⟩ at the ICB could be as large as 0.3 mT, corresponding to approxi-
+face ﬁeld strength by a factor 1000. Taking a surface ﬁeld strength equal to ∼300 nT [e.g An-
+derson et al., 2012], ⟨Br⟩at the ICB could be as large as 0.3 mT, corresponding to approxi-
 mately 10% of the ﬁeld strength within Earth’s core. Given that it is perhaps unlikely that Mer-
 cury’s ﬁeld can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of
 Mercury remains in the weak ﬁeld regime.
 Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for diﬀerent choices
-of ⟨Br⟩. The larger ⟨Br⟩ is, the stronger is the EM coupling at the ICB, and the smaller is the
+of ⟨Br⟩. The larger ⟨Br⟩is, the stronger is the EM coupling at the ICB, and the smaller is the
 diﬀerential rotation between the ﬂuid core and inner core. The inner core and ﬂuid core are vir-
-tually locked into a common precession motion when ⟨Br⟩ > 0.3 mT. Further increasing ⟨Br⟩
+tually locked into a common precession motion when ⟨Br⟩> 0.3 mT. Further increasing ⟨Br⟩
 above 1 mT does not change the solution as EM coupling already dominates all other torques
 on the inner core. This is the case even when EM coupling transitions into the strong ﬁeld regime.
 EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and
-��g0
+g0
 1
-�� =
+ =
 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core
 we retrieved the solutions of ˜εm and ˜mf shown in Figure 4.
 As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with
@@ -1699,13 +1690,13 @@ locked into a common precession motion, a good approximation of ˜εm is given b
 diction as Equations (39-40) involving the eﬀective moment of inertia C′, except χ is now given
 by
 χ =
-¯AcΩp cos I − ¯AsΩoα3φs
-¯AfΩo(ef + Kcmb) + ¯AsΩoesα3αg − ¯AcΩp cos I .
+¯AcΩp cos I −¯AsΩoα3φs
+¯AfΩo(ef + Kcmb) + ¯AsΩoesα3αg −¯AcΩp cos I .
 (54)
 For a small inner core, ¯AcΩp cos I > ¯AsΩoα3φs and χ is positive. Because ¯AsΩoα3φs increases
 with inner core size, χ gets smaller, and so do C′ and ˜εm. The mantle obliquity drops from 2.049
 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015
-arcmin. For an inner core larger than ≈ 1000 km, ¯AcΩp cos I < ¯AsΩoα3φs, so χ becomes neg-
+arcmin. For an inner core larger than ≈1000 km, ¯AcΩp cos I < ¯AsΩoα3φs, so χ becomes neg-
 ative, C′ becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes
 smaller than the prediction based on a rigid planet.
 The larger the inner core is, the smaller are the misalignments of the ﬂuid and solid cores
@@ -1784,9 +1775,9 @@ viscous forces, and that at the ICB should be dominated by EM forces. To simplif
 sider a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling.
 We choose an eﬀective viscosity at the CMB of ν = 10−4 m2 s−1, which we believe to be a
 representative value given the comparison with the Moon (see section 3.3). We take a radial
-ﬁeld strength at the ICB of ⟨Br⟩ = 0.3 mT, approximately the ﬁeld strength expected under
+ﬁeld strength at the ICB of ⟨Br⟩= 0.3 mT, approximately the ﬁeld strength expected under
 the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa-
-tive’ coupling model, although the uncertainty on ν and ⟨Br⟩ obviously remains high.
+tive’ coupling model, although the uncertainty on ν and ⟨Br⟩obviously remains high.
 Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’
 coupling model (black lines) under the ﬁxed inner core density scenario that we have used in
 sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same rep-
@@ -1872,7 +1863,7 @@ f and i′
 s; these represent the obliquities with respect to the orbital plane and are connected
 to our variables by: i′
 m = ˜εm, i′
-f = ˜εm + ˜m+ ˜mf ≈ ˜εm + ˜mf and i′
+f = ˜εm + ˜m+ ˜mf ≈˜εm + ˜mf and i′
 s = ˜εm + ˜ns. To summarize
 their results, i′
 f and i′
@@ -1885,7 +1876,7 @@ m gets progressively larger and is displaced further away
 from its expected orientation based of a rigid planet (see their Figure 6). The change in i′
 m they
 obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan-
-etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered),
+etary radius (≈1463 km, close to the maximum inner core size of 1500 km we have considered),
 is approximately an increase of 5 × 10−5 rad = 0.17 arcmin. This also corresponds approxi-
 mately to the deviation of the obliquity with respect to that of a rigid planet.
 When only viscous stress is included in our model (section 3.3), our results are substan-
@@ -1929,7 +1920,7 @@ est changes of the mantle obliquity εm compared to the obliquity predicted on t
 entirely rigid planet (εr
 m). Let us denote this diﬀerence as ∆εm = εm−εr
 m. The largest ∆εm
-occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This diﬀerence is decreased
+occurs for a small or no inner core, and is ∆εm ≈0.01 arcmin. This diﬀerence is decreased
 as the inner core size is increased. For a suﬃciently large inner core, in the case of a strong EM
 coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value
 remains smaller than 0.01 arcmin.
@@ -1942,11 +1933,11 @@ planet. But it also implies that the observed obliquity cannot be used to place
 the inner core size.
 Nevertheless, our results show that the presence of a ﬂuid core and inner core aﬀect the
 resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change
-in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006
+in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈0.006
 arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation
 motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which
-is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliq-
-uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008
+is approximately 0.85 arcsec (≈0.014 arcmin) [Baland et al., 2017]. The precision on the obliq-
+uity from the upcoming BepiColombo satellite mission is expected to be ≤0.5 arcsec (≤0.008
 arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the preces-
 sion of the pericenter, a Cassini state model that includes a ﬂuid and solid core will then be
 necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens
@@ -1960,14 +1951,14 @@ tle. Since gravitational coupling prevents a large inner core tilt with respect
 –28–
 
 Conﬁdential manuscript submitted to JGR-Planets
-ﬁnd that the misalignment ∆εg = εg − εm is limited. The maximum oﬀset that we obtain
-is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of oﬀset is important in the
+ﬁnd that the misalignment ∆εg = εg −εm is limited. The maximum oﬀset that we obtain
+is approximately ∆εg ≈0.007 arcmin. This limited magnitude of oﬀset is important in the
 light of the recent obliquity of the gravity ﬁeld estimated in Genova et al. [2019], εg = 1.968±
 0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the
 spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm =
 2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent
 with one another within their error estimates. In their interpretation, Genova et al. [2019] sug-
-gest that the diﬀerent central value of the obliquity that they obtain (smaller by ∼ 0.07 ar-
+gest that the diﬀerent central value of the obliquity that they obtain (smaller by ∼0.07 ar-
 cmin) is perhaps explained by an oﬀset ∆εg due to the presence of a (possibly large) solid in-
 ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg
 that we predict. Moreover, we predict that the obliquity of the gravity ﬁeld should be larger
@@ -1980,7 +1971,7 @@ and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduce
 cous and EM coupling also lead to a displacement of these axes in the direction perpendicu-
 lar to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on track-
 ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that
-the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin).
+the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼0.03 arcmin).
 Although this oﬀset is smaller than the measurement errors, so that the observed obliquity is
 still consistent with no deviation away from the Cassini plane, some amount of dissipation in-
 variably takes place. These measurements give then a measure of the possible amplitude of the
diff --git a/read/results/pymupdf/2201.00069.txt b/read/results/pymupdf/2201.00069.txt
index f535534..54e4e3e 100644
--- a/read/results/pymupdf/2201.00069.txt
+++ b/read/results/pymupdf/2201.00069.txt
@@ -2,7 +2,7 @@ MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs
 1
 A MeerKAT, e-MERLIN, H.E.S.S. and Swift search for persistent
 and transient emission associated with three localised FRBs
-J. O. Chibueze,1,2★ M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4
+J. O. Chibueze,1,2★M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4
 C. Venter,1 I. Heywood,7,8,9 A. M. S. Richards,3 D. R. A. Williams,3 M. Kramer,3,5
 R. Beswick,3 M. C. Bezuidenhout,3 R. P. Breton,3 L. N. Driessen,3 F. Jankowski,3
 E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3, H. Qiu,11 K. Rajwade,3
@@ -42,16 +42,16 @@ MNRAS 000, 1–15 (2021)
 Preprint 4 January 2022
 Compiled using MNRAS LATEX style ﬁle v3.0
 ABSTRACT
-We report on a search for persistent radio emission from the one-oﬀ Fast Radio Burst (FRB)
+We report on a search for persistent radio emission from the one-oﬀFast Radio Burst (FRB)
 20190714A, as well as from two repeating FRBs, 20190711A and 20171019A, using the
 MeerKAT radio telescope. For FRB 20171019A we also conducted simultaneous observations
 with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma rays and
 searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV
-ﬂux upper limit of 1.39×10−16 erg cm−2 s−1Å−1, X-ray limit of ∼ 6.6×10−14 erg cm−2 s−1 and
-a limit on the very-high-energy gamma-ray ﬂux Φ(𝐴��� > 120 GeV) < 1.7 × 10−12 erg cm−2 s−1.
-We obtain a radio upper limit of ∼15𝛹���Jy beam−1 for persistent emission at the locations of both
+ﬂux upper limit of 1.39×10−16 erg cm−2 s−1Å−1, X-ray limit of ∼6.6×10−14 erg cm−2 s−1 and
+a limit on the very-high-energy gamma-ray ﬂux Φ(𝐸> 120 GeV) < 1.7 × 10−12 erg cm−2 s−1.
+We obtain a radio upper limit of ∼15𝜇Jy beam−1 for persistent emission at the locations of both
 FRBs 20190711A and 20171019A, but detect diﬀuse radio emission with a peak brightness
-of ∼53𝛹���Jy beam−1 associated with FRB 20190714A at 𝐴��� = 0.2365. This represents the ﬁrst
+of ∼53𝜇Jy beam−1 associated with FRB 20190714A at 𝑧= 0.2365. This represents the ﬁrst
 detection of the radio continuum emission potentially associated with the host (galaxy) of FRB
 20190714A, and is only the third known FRB to have such an association. Given the possible
 association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB
@@ -67,11 +67,11 @@ logical distances (e.g. Lorimer et al. 2007; Thornton et al. 2013;
 Macquart et al. 2020). The estimated high radio luminosities and
 associated brightness temperatures required to produce these short-
 timescale energetic events at such distances are what makes them
-intriguing (Petroﬀ et al. 2021; Caleb & Keane 2021). They have been
-observed to emit from ∼ 110 MHz − 8 GHz, though not yet across
+intriguing (Petroﬀet al. 2021; Caleb & Keane 2021). They have been
+observed to emit from ∼110 MHz −8 GHz, though not yet across
 a wide and continuous frequency band due to the variable band-
 limited spectra of the single pulses. Over 600 FRBs have been dis-
-covered1 of which ∼ 20 have been seen to repeat, and it is presently
+covered1 of which ∼20 have been seen to repeat, and it is presently
 uncertain whether they all do (Caleb et al. 2019; James et al. 2020).
 The extraordinary observed characteristics of the repeating and non-
 repeating FRBs have led to various progenitor models with the bulk
@@ -94,7 +94,7 @@ of several potential mechanisms. In the magnetar model by Margalit
 et al. (2019), FRBs produced by binary neutron star mergers and
 accretion induced collapse are expected to be accompanied by per-
 sistent radio continuum emission on timescales of months to years.
-★ james.chibueze@nwu.ac.za
+★james.chibueze@nwu.ac.za
 † manisha.caleb@manchester.ac.uk
 1 https://www.wis-tns.org/
 The persistent emission is powered by the nebula of relativistic elec-
@@ -111,29 +111,29 @@ time by several instruments (Tavani et al. 2021; Ridnaia et al. 2021;
 Mereghetti et al. 2020; Insight-HXMT 2020).
 Of the 19 FRBs that have been associated with host galax-
 ies2, only the sub-arcsecond localisation of the repeating FRB
-20121102A to a host galaxy at a redshift of 𝐴��� = 0.19273 ± 0.0008
+20121102A to a host galaxy at a redshift of 𝑧= 0.19273 ± 0.0008
 (Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physi-
-cally associated with a compact (≤ 0.7 pc), persistent radio source
-of luminosity 𝛹���𝐴���𝛹��� ∼ 1039 erg s−1 at a few GHz (Marcote et al.
+cally associated with a compact (≤0.7 pc), persistent radio source
+of luminosity 𝜈𝐿𝜈∼1039 erg s−1 at a few GHz (Marcote et al.
 2017). This source is detectable from 300 MHz – 26 GHz (Resmi
-et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% vari-
+et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼10% vari-
 ability on day timescales. In contrast, a similar sub-milliarcsecond
 localisation of another repeating FRB 20180916B to a nearby mas-
-sive spiral galaxy at 𝐴��� = 0.0337 ± 0.0002 (Marcote et al. 2020)
+sive spiral galaxy at 𝑧= 0.0337 ± 0.0002 (Marcote et al. 2020)
 showed no associated persistent radio emission. This places a strong
-upper limit on the persistent source luminosity of 𝛹���𝐴���𝛹��� ≲ 7.6×1035
+upper limit on the persistent source luminosity of 𝜈𝐿𝜈≲7.6×1035
 erg s−1 at 1.6 GHz, which is three orders of magnitude lower than
 that of FRB 20121102A. Recently, the CHIME/FRB collaboration
 announced heightened activity in the repeating FRB 20201124A
 (Chime/FRB Collaboration 2021), which was localised to a host
-galaxy at a redshift of 𝐴��� = 0.0979 ± 0.0001 (Fong et al. 2021).
+galaxy at a redshift of 𝑧= 0.0979 ± 0.0001 (Fong et al. 2021).
 Persistent radio emission was detected by the upgraded Giant Me-
 trewave Radio Telescope (uGMRT) (Wharton et al. 2021) and the
 Karl G. Jansky Very Large Array (JVLA) (Ricci et al. 2021) on
 angular scales of a few arcseconds, but resolved out to scales of
-∼ 0.1 arcseconds with the European VLBI Network (Marcote et al.
+∼0.1 arcseconds with the European VLBI Network (Marcote et al.
 2021).
-Localisations of four one-oﬀ FRBs through imaging of
+Localisations of four one-oﬀFRBs through imaging of
 2 https://frbhosts.org/
 © 2021 The Authors
 
@@ -149,7 +149,7 @@ ducted at a centre frequency of 6.5 GHz. No persistent emission as
 luminous as the one associated with FRB 20121102A was detected
 for the ASKAP FRBs (Bhandari et al. 2020). While the true age of
 FRB 121102A is unknown, models based on polarization studies
-predict the age to be ∼ 6 − 17 years (Hilmarsson et al. 2021). It is
+predict the age to be ∼6 −17 years (Hilmarsson et al. 2021). It is
 possible that younger, more active FRBs like FRB 20121102A are
 associated with persistent radio emission while the emission might
 have faded over time for the older ones. The possibility of repeating
@@ -158,7 +158,7 @@ increasing arcsecond localisations suggests that we are entering an
 era where we can begin to look for evidence of multiple classes by
 studying FRB host galaxies and multi-wavelength counterparts.
 In this paper, we report on the search for persistent radio emis-
-sion in the host galaxies of one apparent one-oﬀ source (FRB
+sion in the host galaxies of one apparent one-oﬀsource (FRB
 20190714A) and two repeating sources (FRBs 20171019A and
 20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case
 of the latter, we also conducted simultaneous observations with the
@@ -190,17 +190,17 @@ servations are considered in this paper. The data correlation was
 done with the SKARAB correlator (Hickish et al. 2016) in 4k mode
 which gives 4096 channels across the 856 MHz bandwidth resulting
 in a frequency resolution of ∼209 kHz. The data were reduced us-
-ing the semi-automated MeerKAT data analysis pipelines - 𝐴���𝐴���𝐴���𝐴���𝐴���3
+ing the semi-automated MeerKAT data analysis pipelines - 𝑜𝑥𝑘𝑎𝑡3
 (Heywood 2020).
 2.1.1
 Imaging analysis
-The 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline employs a collection of publicly available ra-
+The 𝑜𝑥𝑘𝑎𝑡pipeline employs a collection of publicly available ra-
 dio interferometry data reduction software. The ﬁnal data prod-
 3 https://ascl.net/code/v/2627
 ucts, including reduced and calibrated visibility data (including
 self-calibration), continuum (including sub-band) images as well
 as diagnostic plots, are provided by the pipeline. The customary
-conﬁguration of the 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline incorporates ﬂagging, cross-
+conﬁguration of the 𝑜𝑥𝑘𝑎𝑡pipeline incorporates ﬂagging, cross-
 calibration and self-calibration processes. In the ﬂagging process,
 the low-gain bandpass edges (856 MHz to 880 MHz and 1658 MHz
 to 1800 MHz) are ﬂagged on all baselines, along with the location of
@@ -210,9 +210,9 @@ the spectrum are then ﬂagged on baselines shorter than 600 m.
 Then, other possible RFI aﬀected data are ﬂagged out using the
 CASA routines rﬂag and tfcrop for the calibrators, and using the
 tricolour package for the target ﬁelds.
-The cross-calibration steps using 𝐴���𝐴���𝐴���𝐴���𝐴��� were standard, includ-
+The cross-calibration steps using 𝑜𝑥𝑘𝑎𝑡were standard, includ-
 ing setting the ﬂux scale and deriving corrections for residual delay
-calibration, bandpass and time-varying gain. The 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline
+calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡pipeline
 uses the customary tasks from the CASA (McMullin et al. 2007)
 suite for cross-calibration. After applying all the corrections to the
 target ﬁeld, we channel-averaged the dataset by a factor of ﬁve chan-
@@ -228,11 +228,11 @@ WSClean generates the multi-frequency synthesis (MFS) map, in
 joined-channel deconvolution mode, with a central frequency of
 1283 MHz. In other words, the MFS map is a full bandwidth map.
 In WSClean, each of the sub-bands is deconvolved separately with
-an initially high mask of 20𝜋���rms (using the auto masking function
+an initially high mask of 20𝜎rms (using the auto masking function
 provided by WSClean), to generate an artefact-free model of the
 target ﬁeld for the self-calibration process. This masking threshold
-was iteratively reduced to a value of 3𝜋���rms in the ﬁnal iteration
-of imaging. The 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline uses the customary tasks from the
+was iteratively reduced to a value of 3𝜎rms in the ﬁnal iteration
+of imaging. The 𝑜𝑥𝑘𝑎𝑡pipeline uses the customary tasks from the
 Cubical software (Kenyon et al. 2018) for self-calibration.
 2.1.2
 Single pulse searches
@@ -243,7 +243,7 @@ forming User Supplied Equipment (FBFUSE) that was designed and
 developed at the Max Planck Institute for Radio Astronomy in Bonn.
 For this project, FBFUSE combined the data into 764 total-intensity
 tied-array beams which were used to populate the primary beam of
-∼ 1 deg2 of the array. The data are then captured at 306.24 μs time
+∼1 deg2 of the array. The data are then captured at 306.24 μs time
 resolution by the Transient User Supplied Equipment (TUSE), a
 real-time transient detection backend instrument developed by the
 MeerTRAP4 team at the University of Manchester. More details on
@@ -269,29 +269,29 @@ hanced Multi-Element Remote-Linked Interferometer Network, e-
 MERLIN array in the United Kingdom (project code: CY10003)
 on 13 January, 2021 (see Section 3.1.2). Six antennas were used
 including the 75-m Lovell telescope and the target pointing cen-
-tre was R.A. = 12ℎ15𝐴���55𝐴���.12, Dec. = −13◦01′15.′′7. 1407+2827
+tre was R.A. = 12ℎ15𝑚55𝑠.12, Dec. = −13◦01′15.′′7. 1407+2827
 was used as the bandpass calibrator, 1331+3030 as the ﬂux cal-
 ibrator and 1216−1033 as the phase calibrator. The angular sep-
 aration between the target and the phase calibrator is 2.47◦. The
 data reduction was done following standard e-MERLIN calibra-
 tion procedures6 with additional ﬂagging of bad visibilities fol-
 lowed by imaging. We found two confusing sources in the ﬁeld,
-at R.A. = 12ℎ15𝐴���44𝐴���.669, Dec. = −12◦57′59.′′56 and R.A. =
-12ℎ15𝐴���37𝐴���.216, Dec. = −13◦09′33.′′44 at 4.1′ and 9.4′ from the
+at R.A. = 12ℎ15𝑚44𝑠.669, Dec. = −12◦57′59.′′56 and R.A. =
+12ℎ15𝑚37𝑠.216, Dec. = −13◦09′33.′′44 at 4.1′ and 9.4′ from the
 pointing centre, respectively. They had apparent ﬂux densities of 4
 and 1.3 mJy without primary beam correction. We used these for
 self-calibration of the ﬁeld and then subtracted them before ﬁnal
 imaging. The ﬁnal image synthesized beam is 0.′′65 × 0.′′15, posi-
-tion angle 15◦ elongated in the Declination direction due to the low
+tion angle 15◦elongated in the Declination direction due to the low
 target elevation from the UK.
 2.3
 The Swift satellite: UVOT and XRT observations
 Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA
 space mission operating in soft-X-rays and optical/UV. Here we
 use data from the X-ray Telescope (XRT) (Burrows et al. 2005)
-which operates in the soft X-ray domain of 0.3 − 10 keV as well as
+which operates in the soft X-ray domain of 0.3 −10 keV as well as
 data taken by the UV/Optical Telescope (UVOT) (Roming et al.
-2005) operating in the UV to optical domain (170 − 600 nm).
+2005) operating in the UV to optical domain (170 −600 nm).
 During the FRB 20171019A multi-wavelength (MWL) observing
 campaign, two 2 ks target-of-opportunity (ToO) observations were
 performed with Swift from 2019-09-28 18:37:02 to 2019-09-28
@@ -338,19 +338,19 @@ RESULTS
 3.1
 MeerKAT
 The theoretical thermal noise of the MeerKAT can be calculated as
-𝐴���rms = 1
-𝛹���𝐴���
+𝑆rms = 1
+𝜂𝑐
 SEFD
 √︃
-𝐴���pol × 𝐴���(𝐴��� − 1) × Δ𝛹��� × 𝐴���int
+𝑛pol × 𝑁(𝑁−1) × Δ𝜈× 𝑡int
 .
 (1)
 The system equivalent ﬂux density (SEFD) of MeerKAT at the
-1.28 GHz is 443 Jy and 𝛹���𝐴��� is the correlator eﬃciency. We used 𝐴���pol
-= 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝛹��� =
-856 MHz bandwidth and 𝐴���int = 21600 sec observing time for one
-epoch. This gives the theoretical rms of ∼ 2 𝛹���Jy beam−1. The typical
-image rms obtained from our residual images is ∼ 5 𝛹���Jy beam−1,
+1.28 GHz is 443 Jy and 𝜂𝑐is the correlator eﬃciency. We used 𝑛pol
+= 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈=
+856 MHz bandwidth and 𝑡int = 21600 sec observing time for one
+epoch. This gives the theoretical rms of ∼2 𝜇Jy beam−1. The typical
+image rms obtained from our residual images is ∼5 𝜇Jy beam−1,
 which is 2.5 times the expected theoretical rms. The wideband MFS
 image does not allow primary beam correction procedure as this can
 only be done on the sub-band images with limited rms for detection
@@ -362,7 +362,7 @@ their NRAO (National Radio Astronomy Observatory) VLA (Very
 Large Array) Sky Survey (NVSS) counterparts. However, Chibueze
 et al. (2021, submitted) conﬁrmed that the overall ﬂux densities
 obtained with MeerKAT and NVSS are in good agreement with
-each other within errors of ∼ 5%. We compared the astrometry of
+each other within errors of ∼5%. We compared the astrometry of
 the discrete radio sources obtained with MeerKAT and NVSS in
 Figure 1. The position uncertainty of the MeerKAT ranges from
 0.′′2 (close to the centre of the primary beam) to a few arcseconds
@@ -387,7 +387,7 @@ tion to sources within 5′′. Using this spatial coincidence criterion,
 we identiﬁed a persistent 1283 MHz continuum source near FRB
 20190714A, detected in both the 14 September 2019 and the 28
 September 2019 epoch. The peak of the MeerKAT radio emission
-is oﬀset by ∼ 2′′.1 from the peak of the 𝐴���-band magnitude of the op-
+is oﬀset by ∼2′′.1 from the peak of the 𝑖-band magnitude of the op-
 tical galaxy identiﬁed in the Panoramic Survey Telescope and Rapid
 Response System (PanSTARRS, located at Haleakala Observatory)
 image (shown as contours in Figures 2 and 3). The MeerKAT ra-
@@ -397,22 +397,22 @@ dio source is oﬀset by 1.′′68 from the localisation region of FRB
 e-MERLIN detection of compact emission towards
 FRB 20190714
 Compact persistent emission was detected in the 1.51 GHz e-
-MERLIN image at R.A. = 12ℎ15𝐴���55𝐴���.116, Dec. = −13◦01′14.′′48
-at 86 𝛹���Jy beam−1 by e-MERLIN. The stochastic position uncer-
+MERLIN image at R.A. = 12ℎ15𝑚55𝑠.116, Dec. = −13◦01′14.′′48
+at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncer-
 tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa-
 ration between phase-calibrator and target, and antenna position
 uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric
 uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively.
 The oﬀset from the FRB position is negligible in R.A. and 1.2
 arcsec in Dec. The rms in this region (of full primary beam sen-
-sitivity) is 20 𝛹���Jy beam−1, making this a 4.3𝜋���rms detection. It is
-∼1.5𝜋���rms higher than that of the MeerKAT detection. Although the
+sitivity) is 20 𝜇Jy beam−1, making this a 4.3𝜎rms detection. It is
+∼1.5𝜎rms higher than that of the MeerKAT detection. Although the
 e-MERLIN ﬂux scale nominal uncertainty is ∼5%, in these data it
 is possibly higher due to the low declination of the phase-reference
 source and to the strong RFI which were removed from the data
 but may have aﬀected the linearity of the receiver response. The
-peak of the e-MERLIN radio emission is oﬀset by ∼ 1.′′4 from the
-peak of the PanSTARRS 𝐴���-band emission in Figures 2 and 3. The
+peak of the e-MERLIN radio emission is oﬀset by ∼1.′′4 from the
+peak of the PanSTARRS 𝑖-band emission in Figures 2 and 3. The
 e-MERLIN radio source (shown by the cyan cross in Figures 2 and
 3) is oﬀset by 0.′′53 from the localised position of FRB 20190714.
 We estimate the probability of a chance alignment of a back-
@@ -420,21 +420,21 @@ ground persistent radio source and the host galaxy, following the
 procedure of Eftekhari et al. (2018). Instead of using the FRB lo-
 calisation region, we use the area of the galaxy, which is taken as
 2′′ × 2′′, twice the half light radius from Heintz et al. (2020). Given
-the source has a ﬂux density of ∼ 90𝛹���Jy we estimate the chance
-alignment probability of 0.0008, which corresponds to 3.4𝜋���. The
-ﬂux density threshold, assuming 3𝜋���, for an unresolved radio source
-is ∼ 15 𝛹���Jy. If instead we consider the probability of detecting any
-radio source above our ﬂux density threshold of 15𝛹���Jy, the probabil-
+the source has a ﬂux density of ∼90𝜇Jy we estimate the chance
+alignment probability of 0.0008, which corresponds to 3.4𝜎. The
+ﬂux density threshold, assuming 3𝜎, for an unresolved radio source
+is ∼15 𝜇Jy. If instead we consider the probability of detecting any
+radio source above our ﬂux density threshold of 15𝜇Jy, the probabil-
 ity of a chance alignment is, therefore, approximately 0.8%, making
-the statistical signiﬁcance of our detection 2.6𝜋���. This represents the
+the statistical signiﬁcance of our detection 2.6𝜎. This represents the
 ﬁrst detection of radio continuum emission associated with the host
 (galaxy) of FRB 20190714A (see Figure 2 and 3).
 3.1.3
 MeerKAT non-detections
 No continuum emission was detected near FRBs 20171019A and
 20190711A. As each of the images of these sources has an rms
-of ∼ 5 𝛹���Jy beam−1, the 3𝜋��� intensity upper limit of any emission
-associated with FRBs 20171019A and 20190711A will be ∼ 15 𝛹���Jy
+of ∼5 𝜇Jy beam−1, the 3𝜎intensity upper limit of any emission
+associated with FRBs 20171019A and 20190711A will be ∼15 𝜇Jy
 beam−1 (see Table 1).
 Candidate pulses above a signal-to-noise (S/N) of 10 from the
 single pulse search with MeerTRAP were visually inspected oﬄine.
@@ -446,14 +446,14 @@ Swift
 The UVOT summed image is presented in Figure 4. The UVOT
 ﬁeld of view corresponds roughly to the uncertainty7 of the locali-
 sation region of FRB 20171019A (RA = 7.5′and DEC = 7′). Using
-uvotdetect, we ﬁnd 30 sources above the 5𝜋��� level and within the
+uvotdetect, we ﬁnd 30 sources above the 5𝜎level and within the
 FRB 20171019A uncertainty region. Using a 3 arcsec maximum
 separation, which is slightly larger than the UVOT PSF (Breeveld
 et al. 2010), these sources are cross-matched with known catalogue
 sources. We ﬁnd that out of the 30 sources detected by UVOT, 28
 are spatially coincident with stars catalogued in the SDSS catalogue
 (DR12; Alam et al. 2015), and one source is coincident with a galaxy
-(AGN broadline SDSS ID: 1237652599570890948 at 𝐴��� ∼ 0.156).
+(AGN broadline SDSS ID: 1237652599570890948 at 𝑧∼0.156).
 This galaxy is also detected by the MeerKAT radio observations. We
 use the NASA/IPAC Extragalactic Database (NED)8 to search for
 known galaxies in the FRB 20171019A uncertainty regions. We ﬁnd
@@ -461,7 +461,7 @@ multiple galaxies with unknown redshifts, therefore we cannot draw
 conclusions on the host galaxy from our observations. Using a 50′′
 circular ON region centred on the position of FRB 20171019A and
 a 50′′ OFF region that does not contain any of the detected sources,
-we run the uvotsource tool with a 5𝜋��� background threshold and
+we run the uvotsource tool with a 5𝜎background threshold and
 obtain a ﬂux upper limit of 1.4 × 10−16 erg cm−2 s−1Å−1 without
 applying a Calactic extinction correction.
 The XRT summed image is shown in Figure 5. At the edge
@@ -470,7 +470,7 @@ the Wolf 1561 star. As we consider this source unrelated to the
 FRB, we use the online Swift-XRT data products generator (Evans
 et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3-
 10 keV range on the count rate of 0.001885 counts.s−1. Using
-WebPIMMS9 (v4.11a) and assuming a weighted average 𝐴���H = 5.12×
+WebPIMMS9 (v4.11a) and assuming a weighted average 𝑁H = 5.12×
 1020 cm−2 from the direction of the source estimated from the
 NASA’s HEASARC 10 online tools (HI4PI Collaboration et al.
 2016) and a power law model with a photon index = 2, this upper
@@ -485,13 +485,13 @@ A second analysis using an independent event calibration and recon-
 struction (Parsons & Hinton 2014) conﬁrms this result. A search for
 variable emission on timescales ranging from milliseconds to sev-
 eral minutes with tools provided in (Brun et al. 2020) does not reveal
-any variability above 2.2 𝜋���. For the total data set of 1.8 h, 95% conﬁ-
+any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% conﬁ-
 dence level (C. L.) upper limits on the photon ﬂux are derived using
 the method described by Rolke et al. (2005). The energy threshold
 of the data is highly dependent on the zenith angle of the observa-
 tions. For these observations, the zenith angles range from 15 to 25
 deg, which leads to an energy threshold for the stacked data set of
-𝐴���th = 120 GeV. The upper limit on the Very High Energy (VHE)
+𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE)
 7 https://www.wis-tns.org/object/20171019a
 8 https://ned.ipac.caltech.edu; NED is funded by the National
 Aeronautics and Space Administration and operated by the California Insti-
@@ -507,8 +507,8 @@ Chibueze et al.
 Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the diﬀerence in position between the
 MeerKAT and NVSS sources.
 gamma-ray ﬂux above that threshold and assuming an energy depen-
-dence following 𝐴���−2 is Φ(𝐴��� > 120 GeV) < 2.10 × 10−12 cm−2 s−1
-or Φ(𝐴��� > 120 GeV) < 1.7 × 10−12 erg cm−2 s−1. A variation of
+dence following 𝐸−2 is Φ(𝐸> 120 GeV) < 2.10 × 10−12 cm−2 s−1
+or Φ(𝐸> 120 GeV) < 1.7 × 10−12 erg cm−2 s−1. A variation of
 ± 0.5 of the assumed spectral index leads to a variation in the upper
 limit of less than ± 19%. A map of energy ﬂux upper limits covering
 the full region accessible within the H.E.S.S. ﬁeld of view above
@@ -517,20 +517,20 @@ the full region accessible within the H.E.S.S. ﬁeld of view above
 DISCUSSION
 Of the targeted FRB ﬁelds reported here, only FRB 20190714A
 is observed to be spatially coincident with a persistent radio con-
-tinuum source. We obtain an upper limit of ∼ 15 𝛹���Jy beam−1 for
+tinuum source. We obtain an upper limit of ∼15 𝜇Jy beam−1 for
 FRBs 20190711A and 20171019A, respectively, and a peak inten-
-sity of ∼ 53 𝛹���Jy beam−1 for the emission coincident with FRB
+sity of ∼53 𝜇Jy beam−1 for the emission coincident with FRB
 20190714A. This source is detected at both epochs with similar
 intensities within the measured rms of the images (see Tables 1 and
 2 for details). The values in the Table 2 are derived by carrying
 out 2D Gaussian ﬁt using similar ellipses enclosing the detected
-persistent emission. The average ﬂux density is ∼ 3 times less than
+persistent emission. The average ﬂux density is ∼3 times less than
 that of the persistent source associated with FRBs 20121102A, one
-of the most proliﬁc repeaters, located at 𝐴��� = 0.19273(8). Persistent
+of the most proliﬁc repeaters, located at 𝑧= 0.19273(8). Persistent
 radio emission from FRB 20201124A was detected by the uGMRT
 (Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular
 scales of a few arcseconds. However, it is resolved out at scales of
-∼ 0.1 arcseconds with the European VLBI Network (Marcote et al.
+∼0.1 arcseconds with the European VLBI Network (Marcote et al.
 2021) suggesting that it is not a compact source directly associated
 with the FRB. In contrast, the other localised, proliﬁc repeating
 FRB 20180916A has no persistent radio counterpart.
@@ -538,8 +538,8 @@ In the image in Figure 3 one can see that the persistent radio
 source lies at the edge of the optical extent of the host galaxy
 as seen in PanSTARRS observations (Heintz et al. 2020). Our
 derived 1283 MHz peak position with MeerKAT places it just
-1.′′68 away from the position of FRB 20190714A (𝛹���𝐴���2000, 𝛹���𝐴���2000
-= 12ℎ15𝐴���55𝐴���.12, -13◦01′15.′′70; Heintz et al. 2020). The posi-
+1.′′68 away from the position of FRB 20190714A (𝛼𝐽2000, 𝛿𝐽2000
+= 12ℎ15𝑚55𝑠.12, -13◦01′15.′′70; Heintz et al. 2020). The posi-
 tional uncertainty on the FRB position is 0.′′283. Similarly, the peak
 1.51 GHz e-MERLIN position of the persistent radio source is sepa-
 rated from the position of FRB 20190714A by 0.′′53. The persistent
@@ -550,7 +550,7 @@ MNRAS 000, 1–15 (2021)
 MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs
 7
 Figure 2. FRB 20190714A MeerKAT epoch I image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours
-(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝐴���-band optical counterpart coincident in position with the persistent radio emission. The
+(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The
 white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our
 e-MERLIN observations.
 MNRAS 000, 1–15 (2021)
@@ -558,7 +558,7 @@ MNRAS 000, 1–15 (2021)
 8
 Chibueze et al.
 Figure 3. FRB 20190714A MeerKAT epoch II image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours
-(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝐴���-band optical counterpart coincident in position with the persistent radio emission. The
+(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The
 white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our
 e-MERLIN observations.
 MNRAS 000, 1–15 (2021)
@@ -566,14 +566,14 @@ MNRAS 000, 1–15 (2021)
 MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs
 9
 Figure 4. UVOT summed image of FRB 20171019A region taken during the MWL observation campaign in September-October 2019. The white circles
-indicate sources detected above 5𝜋���. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper
+indicate sources detected above 5𝜎. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper
 limits while the magenta region indicates the background region used. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar
 et al. (2019).
 Table 1. Details of the FRB ﬁelds observed with MeerKAT.
 Field name
 Observation date
 Synthesized beam
-rms (𝛹���Jy beam−1)
+rms (𝜇Jy beam−1)
 Detected?
 FRB 20171019A
 28 September 2019
@@ -583,34 +583,34 @@ FRB 20171019A
 18 October 2019
 6.′′8 × 5.′′0
 5.2
-< 15𝛹���Jy beam−1
+< 15𝜇Jy beam−1
 FRB 20190711A
 23 August 2019
 11.′′7 × 4.′′9
 4.9
-< 15𝛹���Jy beam−1
+< 15𝜇Jy beam−1
 FRB 20190711A
 09 September 2019
 12.′′5 × 4.′′9
 4.6
-< 15𝛹���Jy beam−1
+< 15𝜇Jy beam−1
 FRB 20190714A
 14 September 2019
 7.′′1 × 6.′′2
 4.2
-54.4 𝛹���Jy beam−1
+54.4 𝜇Jy beam−1
 FRB 20190714A
 28 September 2019
 6.′′5 × 5.′′1
 5.8
-52.0 𝛹���Jy beam−1
+52.0 𝜇Jy beam−1
 Table 2. Details of the radio continuum source associated with FRB 20190714A.
 Field name
 Observation date
 Telescope
-𝛹���centre (GHz)
-𝛹���J2000
-𝛹���J2000
+𝜈centre (GHz)
+𝛼J2000
+𝛿J2000
 Maj. × min. axis
 Pos. angle
 Int. ﬂux density
@@ -618,29 +618,29 @@ FRB 20190714A
 28 September 2019
 MeerKAT
 1.283
-12ℎ15𝐴���55𝐴���.154
+12ℎ15𝑚55𝑠.154
 -13◦01′17.′′30
 9.′′6 × 7.′′4
 88.7◦
-87.4 𝛹���Jy
+87.4 𝜇Jy
 FRB 20190714A
 18 October 2019
 MeerKAT
 1.283
-12ℎ15𝐴���55𝐴���.193
+12ℎ15𝑚55𝑠.193
 −13◦01′17.′′18
 8.′′2 × 6.′′4
 12.2◦
-80.7 𝛹���Jy
+80.7 𝜇Jy
 FRB 20190714A
 13 January 2021
 e-MERLIN
 1.510
-12ℎ15𝐴���55𝐴���.116
+12ℎ15𝑚55𝑠.116
 −13◦01′14.′′51
 0.′′15 × 0.′′65
 17.6◦
-107.5 𝛹���Jy
+107.5 𝜇Jy
 large oﬀset from the centre of the galaxy makes the persistent source
 unlikely to be an AGN. So far this FRB has not been seen to repeat.
 Higher resolution imaging will be required to be certain of a direct
@@ -659,12 +659,12 @@ Wolf 1561 star is shown in cyan and is labelled. The green box indicates FRB 201
 FRB 20190714A (780 Mpc), an unresolved source with an an-
 gular size of 0.′′6 corresponds to a physical extent of ≲2.3 kpc. The
 uGMRT reported the detection of an unresolved radio emission at
-650 MHz with a ﬂux density of 700±100 𝛹���Jy (Wharton et al. 2021),
+650 MHz with a ﬂux density of 700±100 𝜇Jy (Wharton et al. 2021),
 while the JVLA detected persistent emission with a ﬂux density of
-340 ± 30 𝛹���Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated
-spectral index between these frequencies (∼ −0.5, Ricci et al. 2021),
-the 1.3 GHz ﬂux density would be ∼ 500 𝛹���Jy (similar to the 3-𝜋���
-upper limit on observations from 1 − 2 GHz; Law et al. 2021). The
+340 ± 30 𝜇Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated
+spectral index between these frequencies (∼−0.5, Ricci et al. 2021),
+the 1.3 GHz ﬂux density would be ∼500 𝜇Jy (similar to the 3-𝜎
+upper limit on observations from 1 −2 GHz; Law et al. 2021). The
 ﬂux density we measured for FRB 20190714A is a factor of ∼10
 lower than FRB20201124A, but FRB 20190714A is also a factor
 2.6 more distant. Therefore, the ﬂux densities would be comparable
@@ -679,7 +679,7 @@ to FRB 20121102A, is a young nebula powered ﬂaring magnetar
 embedded in a 20–50 year-old supernova remnant (Beloborodov
 2017; Metzger et al. 2019). The lack of a bright persistent radio
 source associated with the repeater FRB 20180916A suggests that
-it is comparatively older at ≳ 200 − 500 years and the persistent
+it is comparatively older at ≳200 −500 years and the persistent
 radio source may have faded. In the model by Metzger et al. (2019),
 the nebula is suggested to contribute signiﬁcantly to the rotation
 measure and dispersion measure (DM), as well as to the persis-
@@ -715,7 +715,7 @@ MNRAS 000, 1–15 (2021)
 MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs
 11
 Figure 6. Map of upper limits on the VHE gamma-ray energy ﬂux derived from the H.E.S.S. observations. The limits are valid above 120 GeV and assume
-a photon ﬂux distribution following an 𝐴���−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al.
+a photon ﬂux distribution following an 𝐸−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al.
 (2019).The oversampling radius is 0.1◦.
 emissions. In the case of existence of X-ray non-thermal outbursts,
 the lack of VHE detection could indicate that inverse Compton is
@@ -743,11 +743,11 @@ of three FRBs (FRB 20190714A, 20190711A and 20171019A),
 and also a multi-wavelength campaign on one of these (FRB
 20171019A).
 We detected persistent compact radio emission associated with
-FRB 20190714A (at 𝐴��� = 0.2365) using the MeerKAT and e-
+FRB 20190714A (at 𝑧= 0.2365) using the MeerKAT and e-
 MERLIN radio telescope. This represents the ﬁrst detection of the
 radio continuum emission associated with the host (galaxy) of FRB
 20190714A and is only the third known FRB to have such an as-
-sociation. We furthermore obtained a radio upper limit of∼ 15𝛹���Jy
+sociation. We furthermore obtained a radio upper limit of∼15𝜇Jy
 beam−1 for the repeating FRBs 20190711A and 20171019A.
 We also performed UV, X-ray and VHE observations with the
 Swift and H.E.S.S. instruments and obtained upper limits in the three
@@ -798,7 +798,7 @@ the Austrian Federal Ministry of Education, Science and Research
 and the Austrian Science Fund (FWF), the Australian Research
 Council (ARC), the Japan Society for the Promotion of Science
 and by the University of Amsterdam. We appreciate the excellent
-work of the technical support staﬀ in Berlin, Zeuthen, Heidelberg,
+work of the technical support staﬀin Berlin, Zeuthen, Heidelberg,
 Palaiseau, Paris, Saclay, Tübingen and in Namibia in the construc-
 tion and operation of the equipment. This work beneﬁted from
 services provided by the H.E.S.S. Virtual Organisation, supported
@@ -889,7 +889,7 @@ Mereghetti S., et al., 2020, ApJ, 898, L29
 Metzger B. D., Margalit B., Sironi L., 2019, MNRAS, 485, 4091
 Oﬀringa A. R., et al., 2014, MNRAS, 444, 606
 Parsons R. D., Hinton J. A., 2014, Astroparticle Physics, 56, 26
-Petroﬀ E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p.
+PetroﬀE., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p.
 arXiv:2107.10113
 Platts E., Weltman A., Walters A., Tendulkar S. P., Gordin J. E. B., Kandhai
 S., 2019, Phys. Rep., 821, 1
diff --git a/read/results/pymupdf/2201.00151.txt b/read/results/pymupdf/2201.00151.txt
index 86ff84e..42faaeb 100644
--- a/read/results/pymupdf/2201.00151.txt
+++ b/read/results/pymupdf/2201.00151.txt
@@ -426,7 +426,7 @@ All the relevant properties of the galaxy are given in Table 1,
 including numbers of particles and total masses for both compo-
 nents, and details on the shape of the stellar component: the axis
 ratios minor to major (shortest to longest) c/a, intermediate to
-major b/a, and the triaxiality parameter T = (a2 − b2)/(a2 − c2).
+major b/a, and the triaxiality parameter T = (a2 −b2)/(a2 −c2).
 We distinguish between the half-mass radius provided in the Il-
 lustris database and the half-number radius r1/2, which we use
 for further calculations in this paper. The diﬀerence between the
@@ -496,7 +496,7 @@ persion. Both populations show a weak rotation signal at large
 distances from the center.
 The velocity anisotropy parameter β(r)
 =
-1 − (σ2
+1 −(σ2
 θ +
 σ2
 φ)/(2σ2
@@ -549,10 +549,10 @@ sity proﬁle with the King formula (King 1962)
 I(R) = I0
 
 1
-�
+p
 1 + (R/Rc)2 −
 1
-�
+p
 1 + (Rt/Rc)2
 
 2
@@ -673,11 +673,11 @@ where I0, Rc, and Rt are the model parameters. The proﬁle can
 be analytically deprojected to obtain the 3D density
 ρ(r) = ρ0
 z2
-�1
+"1
 z arccos(z) −
-�
-1 − z2
-�
+p
+1 −z2
+#
 ,
 (2)
 where
@@ -687,7 +687,7 @@ I0
 (3)
 and
 z =
-�
+s
 r2 + R2c
 R2c + R2
 t
@@ -704,10 +704,10 @@ We follow the approach introduced in Kowalczyk et al. (2018),
 namely we model the total mass proﬁle with the mass-to-light
 ratio Υ varying with radius:
 log Υ(r) =
-�
+(
 log(Υ0)
-r ≤ r0
-a(log r − log r0)c + log(Υ0)
+r ≤r0
+a(log r −log r0)c + log(Υ0)
 r > r0
 (5)
 Article number, page 5 of 12
@@ -778,8 +778,8 @@ where r is the distance from the center of the galaxy, r0 is a
 constant, while Υ0, a, and c are the parameters of a model. We
 have assumed log r0 = 0.33 which corresponds to three softening
 scales for stellar particles in the Illustris simulation.
-We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04
-and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the require-
+We probed the parameter a ∈[0 : 1.3] with a step ∆a = 0.04
+and c ∈[1.1 : 2.9] with a step ∆c = 0.2, imposing the require-
 ment on the total density proﬁle to be monotonically decreasing
 with radius. For each set of parameters and for each line of sight
 we generated 1200 orbits using 100 values of energy (expressed
@@ -795,7 +795,7 @@ of Υ0 were obtained with a simple transformation of velocities
 given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In or-
 der to smooth out the numerical artifacts, the three-dimensional
 χ2 spaces were then interpolated with 12-order polynomials
-(∼ a4c4Υ4
+(∼a4c4Υ4
 0) that were further used to determine the global min-
 imums (identiﬁed as the best-ﬁtting models) and 1, 2, 3 σ con-
 ﬁdence levels which for three parameters correspond to ∆χ2 =
@@ -1266,7 +1266,7 @@ and 4th velocity moments (top to bottom) for the three data sam-
 ples: all stars, population I, and population II (in red, orange, and
 blue, respectively). The error bars indicate 1 σ sampling errors.
 The parameter space for Υ(r) has been probed as follows:
-a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a
+a ∈[0 : 1.85] with a step ∆a = 0.05 and c ∈[1.2 : 6] with a
 step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the
 parameter c was ﬁxed at c = 3 and now we ﬁt it as a free pa-
 rameter. As for the mock data in Section 3.2, diﬀerent values of
@@ -1395,9 +1395,9 @@ panel) for the Fornax dSph.
 closed total mass at larger radii. In particular, for the mass en-
 closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48
 −1.56 × 108
-M⊙ from the ﬁt for all stars and Mpops(< 1.8 kpc) = 4.71+0.87
+M⊙from the ﬁt for all stars and Mpops(< 1.8 kpc) = 4.71+0.87
 −1.13 ×
-108 M⊙ from the ﬁt of populations, while previously we had
+108 M⊙from the ﬁt of populations, while previously we had
 Mold(< 1.8 kpc) = 3.7+1.4
 −1.3 × 108 M⊙.
 Interestingly, despite the signiﬁcant shift of the position of
diff --git a/read/results/pymupdf/2201.00178.txt b/read/results/pymupdf/2201.00178.txt
index 7b745f8..90b7e18 100644
--- a/read/results/pymupdf/2201.00178.txt
+++ b/read/results/pymupdf/2201.00178.txt
@@ -14,8 +14,8 @@ probe steady, near-surface ﬂows in the Sun. Using Doppler cubes obtained from
 Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling
 measurements to show that the resulting divergence and radial vorticity maps at supergranular length
 scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre-
-lation Tracking method. We ﬁnd that the Pearson correlation coeﬃcient is ≥ 0.9 for divergence ﬂows,
-while ≥ 0.8 is obtained for the radial vorticity.
+lation Tracking method. We ﬁnd that the Pearson correlation coeﬃcient is ≥0.9 for divergence ﬂows,
+while ≥0.8 is obtained for the radial vorticity.
 Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662)
 1. INTRODUCTION
 Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its eﬀect
@@ -73,8 +73,8 @@ complete derivation of the forward problem. Working in the plane-parallel atmosp
 denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the
 solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface
 that are small when compared to the solar radius. When imaging steady, near-surface ﬂows in the neighbourhood
-of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the
-horizontal wavenumber qR⊙ ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal
+of the supergranular scale (∼30 Mm), we expect the measured spectral cross-correlation signal to peak around the
+horizontal wavenumber qR⊙≈120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal
 wavenumber of the ﬂow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the ﬂow
 perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon
 & Rieutord 2018), permitting us to model the ﬂow vector uuu = (ux, uy, uz) in the Cartesian domain like so (Unno et al.
@@ -82,7 +82,7 @@ perturbation described in a horizontal Fourier domain. Supergranular velocities
 uσ = ∇×[∇×(P ez)] + ∇×(T ez),
 (1)
 where P = P σ(x) and T = T σ(x) are poloidal and toroidal scalar functions, varying with position x and temporal
-frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
+frequency σ. ∇is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
 perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for
 example), here we only consider the frequency bin σ = 0, denoting the temporally averaged ﬂow over the period
 of analysis.
@@ -96,12 +96,12 @@ Fourier domain, and since we wish to image horizontal ﬂows on a small patch of
 function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal ﬂows are described by Pq(z) and
 Tq(z), respectively. Furthermore, we parametrize the ﬂow along ez using basis functions f(z) (Chebyshev, B-spline,
 etc). This is expressed as
-P ≡ Pq(z) =
-�
+P ≡Pq(z) =
+X
 j
 fj(z) Pqj,
-T ≡ Tq(z) =
-�
+T ≡Tq(z) =
+X
 j
 fj(z) Tqj.
 (3)
@@ -123,9 +123,9 @@ k+q thus
 to the ﬂow coeﬃcients Pqj and Tqj (see eq A7)
 ⟨φω∗
 k φω
-k+q⟩ = Hω
+k+q⟩= Hω
 kk′nn′
-�
+X
 j
 Cqj,kPqj + Dqj,kTqj.
 (4)
@@ -146,13 +146,13 @@ k φω
 k+q (see Woodard 2006, 2014, 2016) results
 in the B-coeﬃcients Bk,q, according to
 Bk,q =
-�
+P
 ω
 Hω∗
 kk′nn′φω∗
 k φω
 k+q
-�
+P
 ω
 |Hω
 kk′nn′|2
@@ -162,21 +162,21 @@ Multiplying eq 4 on both sides by Hω∗
 kk′nn′ and substituting by eq 5 on the left-hand-side results in a concisely deﬁned
 forward problem (compare with eq 4)
 Bk,q =
-�
+X
 j
 Cqj,kPqj + Dqj,kTqj.
 (6)
 In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω.
 Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk,
 |ω| ∈
-�
-ωnk − ϵΓnk/2, ωnk + ϵΓnk/2
-�
+
+ωnk −ϵΓnk/2, ωnk + ϵΓnk/2
+
 or
 |ω| ∈
-�
-ωn′k′ − ϵΓn′k′/2, ωn′k′ + ϵΓn′k′/2
-�
+
+ωn′k′ −ϵΓn′k′/2, ωn′k′ + ϵΓn′k′/2
+
 .
 (7)
 Summing over ±ω guarantees that the parity Bk,q = B∗
@@ -186,7 +186,7 @@ Taking the complex conjugate on both sides of eq 6 and considering the negative
 −k,
 B∗
 −k,−q =
-�
+X
 j
 C−qj,−kP ∗
 −qj + D−qj,−kT ∗
@@ -208,11 +208,11 @@ and with ﬁnite lifetimes. This stochasticity leads to realization noise in rep
 Mani et al.
 Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and p2 (green). The shaded
 regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of
-kR⊙ and ω/2π to which we have restricted ourselves in this analysis. Beyond kR⊙ of 2000, it is seen that the theoretical ﬁtting
+kR⊙and ω/2π to which we have restricted ourselves in this analysis. Beyond kR⊙of 2000, it is seen that the theoretical ﬁtting
 of mode frequencies start deviating from the observed dispersion relation for the f-mode.
 such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model
 as in H21, which was motivated by the above discussion,
-Gk,q ≡ ⟨|Bk,q|2⟩,
+Gk,q ≡⟨|Bk,q|2⟩,
 (9)
 where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A
 for explanation).
@@ -234,23 +234,23 @@ Eq 6, while short enough that supergranules do not substantially evolve (lifetim
 from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015).
 Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral
 proﬁles of the two modes [n, k] and [n′, k′] closely align in ω space. This implies that their mode frequencies should be
-suﬃciently close (|ωnk − ωn′k′| ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
+suﬃciently close (|ωnk −ωn′k′| ≤δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
 ±ω is signiﬁcant only over a few linewidths (ϵ, the summation parameter; see eq 7). We have empirically found and
-tabulated δ in Table 1 for the radial order couplings n-n′ ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only
+tabulated δ in Table 1 for the radial order couplings n-n′ ∈f-f, p1-p1, and p2-p2 (the signal strength depends only
 weakly on ϵ; we set it to 3 line widths).
 Figure 1 shows that for any two adjacent ridges (adjacent n and n′), mode frequencies ωnk and ωn′k become spaced
 farther apart with increasing wavenumber kR⊙. It is also known that mode linewidth Γ grows with radial orders for
 a given kR⊙. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
-observation set the total number of modes within a range of kR⊙ (and ω/2π) that can be clearly observed, thereby
+observation set the total number of modes within a range of kR⊙(and ω/2π) that can be clearly observed, thereby
 aﬀecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually
-inspecting the power-spectrum), the parameters describing the extent of coupling over diﬀerent ranges of kR⊙ at ﬁxed
-radial order are diﬀerent. In wavenumber, we restrict our analysis to within 200 ≤ kR⊙ ≤ 2000 and qR⊙ ≤ 300. Our
-frequency range is conﬁned to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz).
+inspecting the power-spectrum), the parameters describing the extent of coupling over diﬀerent ranges of kR⊙at ﬁxed
+radial order are diﬀerent. In wavenumber, we restrict our analysis to within 200 ≤kR⊙≤2000 and qR⊙≤300. Our
+frequency range is conﬁned to span the range over which acoustic modes are observed (2 ≤ω/2π ≤5 in mHz).
 
 Imaging near-surface flows using mode-coupling analysis
 5
 Coupling
-kR⊙ range
+kR⊙range
 # of
 δ
 modes
@@ -286,16 +286,16 @@ Bk,q from the linear relation in eq 6. We describe inversion using regularized-l
 leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods
 complement each other (see Sekii 1997), where RLS tries to minimize the misﬁt between data and model, whereas
 SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis
-functions fj(z) (J ≪ M; see eq 3 and section 3.1), whereas SOLA scales as M 2 (see Appendix B). For M > 5000,
+functions fj(z) (J ≪M; see eq 3 and section 3.1), whereas SOLA scales as M 2 (see Appendix B). For M > 5000,
 computation starts to quickly become expensive for SOLA.
 Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While
 f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is
 present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are
 interested in only surface ﬂows, we leave higher order coupling to future work.
 It bears mentioning that the slopes of the ridges in the kR⊙-ν spectrum (Figure 1) increase with radial order. This
-limits us to low-to-intermediate kR⊙ (< 1000) for these higher radial orders if we are to remain under the acoustic cut-
-oﬀ frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals
-from low kR⊙ - too large an observation region could possibly render invalid the Cartesian geometry approximation.
+limits us to low-to-intermediate kR⊙(< 1000) for these higher radial orders if we are to remain under the acoustic cut-
+oﬀfrequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals
+from low kR⊙- too large an observation region could possibly render invalid the Cartesian geometry approximation.
 Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions
 separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich
 helioseismic technique.
@@ -303,9 +303,9 @@ helioseismic technique.
 For given q, the forward problem may be stated as
 KU = B,
 (10)
-with the aim to minimize the misﬁt �
+with the aim to minimize the misﬁt P
 k
-||KU − B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed
+||KU −B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed
 by the sensitivity kernels: {Cqj,k, Dqj,k}. U is a vector composed of the ﬂow coeﬃcients: {Pqj, Tqj} and B is a vector
 composed of computed B-coeﬃcients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and
 toroidal ﬂow. We use B-spline basis functions as our fj(z), comprising 11 knots spaced uniformly in acoustic radius,
@@ -320,28 +320,28 @@ U =(K⊺Λ−1K)−1K⊺Λ−1B.
 
 6
 Mani et al.
-Figure 2. Left: Averaging kernel for poloidal ﬂow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙ = [−112, −45],
-at the depth zo = −0.41 Mm. Right: L-curve for the mode qR⊙ = [−112, −45]; the knee (λ = 2.48) is marked by a blue
+Figure 2. Left: Averaging kernel for poloidal ﬂow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙= [−112, −45],
+at the depth zo = −0.41 Mm. Right: L-curve for the mode qR⊙= [−112, −45]; the knee (λ = 2.48) is marked by a blue
 diamond.
-Since the least-squares problem is typically ill-posed, we restate the minimization as �
+Since the least-squares problem is typically ill-posed, we restate the minimization as P
 k
-||KU − B||2 + λ||U||2 with
-the regularization parameter λ which this results in a trade-oﬀ between misﬁt reduction (ﬁrst term) and solution
+||KU −B||2 + λ||U||2 with
+the regularization parameter λ which this results in a trade-oﬀbetween misﬁt reduction (ﬁrst term) and solution
 norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the
 data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this
 regularization makes the problem better conditioned and is now deﬁned as
 U = (K⊺Λ−1K + λI)−1K⊺Λ−1B,
 (13)
 where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
-by plotting ||U||2 vs ||KU − B||2 for diﬀerent values of λ (see right panel of Figure 2), is usually chosen as the
+by plotting ||U||2 vs ||KU −B||2 for diﬀerent values of λ (see right panel of Figure 2), is usually chosen as the
 regularization parameter. After successfully inverting for U, we reconstruct the ﬂow using eq 3. Results for poloidal
 ﬂow Pq are shown in Figure 3.
 4. LCT
 To improve conﬁdence in the imaged near-surface ﬂows through mode-coupling, we compare them with ﬂows obtained
 from Local Correlation Tracking method (LCT; November & Simon 1988).
 LCT provides surface-ﬂow maps by
-examining the advection of convective granules (1.2 Mm, qR⊙ ≈ 3500; Hathaway et al. 2015) by underlying larger-
-scale ﬂow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm),
+examining the advection of convective granules (1.2 Mm, qR⊙≈3500; Hathaway et al. 2015) by underlying larger-
+scale ﬂow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈35 Mm),
 LCT is an eﬀective method (see Rieutord et al. 2001) to produce surface horizontal ﬂow maps of supergranulation.
 Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2
 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob-
@@ -353,9 +353,9 @@ A Gaussian of width sigma allows to isolate a small region surrounding the grid
 moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in
 section 1.1. The two patches I1, I2 are then cross correlated for diﬀerent values of position shifts ∆x,
 Cij(∆x, ∆y) =
-�
+Z
 dx I∗
-1(−x)I2(∆x − x).
+1(−x)I2(∆x −x).
 (14)
 The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule.
 Provided that the time diﬀerence ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10
@@ -366,7 +366,7 @@ requires the input sigma, which we set to 4 pix, that captures the extent of loc
 
 Imaging near-surface flows using mode-coupling analysis
 7
-Figure 3. Top: Inverted poloidal ﬂow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR⊙ and
+Figure 3. Top: Inverted poloidal ﬂow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR⊙and
 qyR⊙. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the
 mean. Total power appears to increase through the radial orders. Power is in units of m2/s4.
 dominant length scale of the velocity ﬁeld in the images. The Postel-projected intensity images are fed as input to the
@@ -387,15 +387,15 @@ y = q2, div is given by,
 ∇h · uuu(q, z) = q2∂zP,
 (16)
 and curl is given by,
-�
-∇ × uuu(q, z)
-�
+h
+∇× uuu(q, z)
+i
 z = q2T.
 (17)
 We follow similar steps to those taken in Langfellner et al. (2015) for comparison of ﬂow maps with LCT. The
-essential step for comparison at diﬀerent length scales is to bandpass ﬁlter the Fourier-space ﬂow around the qR⊙ of
+essential step for comparison at diﬀerent length scales is to bandpass ﬁlter the Fourier-space ﬂow around the qR⊙of
 interest (see Figure 4), and subsequently convert it to real space.
-We seek to show comparisons (see Figures 5, 6, and 7) for qR⊙ = 100, 150, 200 and 250. To suﬃciently delineate
+We seek to show comparisons (see Figures 5, 6, and 7) for qR⊙= 100, 150, 200 and 250. To suﬃciently delineate
 ﬂows at these length scales, we apply a Gaussian ﬁlter (see Figure 4) to ﬂows obtained from eqns 16 and 17. The
 Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to
 obtain a real-space steady-ﬂow map.
@@ -403,16 +403,16 @@ obtain a real-space steady-ﬂow map.
 8
 Mani et al.
 Figure 4. Left: Divergence-ﬂow power spectrum |div|2, from eqn 16, obtained from inversion using all the couplings. The
-power-spectrum is then ﬁltered with a bandpass centered around qR⊙ = 150 (middle panel). The resulting spectra is shown in
+power-spectrum is then ﬁltered with a bandpass centered around qR⊙= 150 (middle panel). The resulting spectra is shown in
 the right panel. The units of |div|2 are in s−2. For illustration, we show the action of the ﬁlter on the power-spectrum |div|2
 since it is a real quantity, but recall that it is the Fourier-space ﬂow div (a complex quantity) on which we apply the ﬁlter.
 For LCT, we ﬁrst apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of
-smoothing depends on the length scale qR⊙ to be compared with mode-coupling.
+smoothing depends on the length scale qR⊙to be compared with mode-coupling.
 div and curl are then simply
 computed by
 div = ∂xvx + ∂yvy,
 (18)
-curl = ∂xvy − ∂yvx.
+curl = ∂xvy −∂yvx.
 (19)
 We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian ﬁlters as for mode-coupling,
 and transform back to real space.
@@ -421,10 +421,10 @@ performed for mode-coupling (M-C) and for LCT -
 M-C :
 φ(x, y; t)
 3D FFT
-=====⇒ φω
+=====⇒φω
 k, Bk,q
 inversion
-======⇒ P, T
+======⇒P, T
 ∇h·
 ===⇒
 ∇×
@@ -436,7 +436,7 @@ div, curl
 LCT :
 I1, I2
 FLCT
-====⇒ vx, vy
+====⇒vx, vy
 smooth,
 ======⇒
 ∇h· ∇×
@@ -448,11 +448,11 @@ Filtered,
 Fourier-space
 ﬂows
 2D FFT
-=====⇒ div, curl
+=====⇒div, curl
 6. RESULTS
 Table 2 summarizes the results of the comparison between ﬂows obtained from mode-coupling and LCT. Figure 5,
 where we have used all the couplings to perform inversions, shows a 97% correlation between divergence ﬂows from
-the two methods near supergranular scale (qR⊙ ≈ 100). Near-surface ﬂows are imaged most faithfully when all the
+the two methods near supergranular scale (qR⊙≈100). Near-surface ﬂows are imaged most faithfully when all the
 couplings are used. Since vortical ﬂows are imaged at a region near the equator, it is possible that the source of
 vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between
 the vortical ﬂows as inferred from the two methods, despite being an order of magnitude weaker than the divergence
@@ -460,7 +460,7 @@ the vortical ﬂows as inferred from the two methods, despite being an order of
 insuﬃcient modes for the p2-p2 case (see Table 1), we are unable to infer vortical ﬂows with conviction other than near
 the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished
 through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical
-ﬂows at diﬀerent scales (qR⊙ = 100, 150) can yield extremely good agreement with LCT. As the length scale of the
+ﬂows at diﬀerent scales (qR⊙= 100, 150) can yield extremely good agreement with LCT. As the length scale of the
 inferred ﬂow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases.
 An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to
 comment substantively on the ﬂows at these scales.
@@ -468,10 +468,10 @@ comment substantively on the ﬂows at these scales.
 
 Imaging near-surface flows using mode-coupling analysis
 9
-(a) qR⊙ = 100, f-f + p1-p1 + p2-p2
+(a) qR⊙= 100, f-f + p1-p1 + p2-p2
 Figure 5. Real-space divergence ﬂows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
 for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass ﬁltered around
-qR⊙ = 100 (see Figure 4). Corresponding scatter plots and correlation coeﬃcients are shown in the bottom row. We cut edges
+qR⊙= 100 (see Figure 4). Corresponding scatter plots and correlation coeﬃcients are shown in the bottom row. We cut edges
 out from the ﬂow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-ﬁt line through the scatter
 plots are 0.51 for divergence and 0.01 for vorticity. The vorticity ﬂow maps are saturated to show only 40% of the maximum
 values.
@@ -494,17 +494,17 @@ and the regularization parameter to be used in the inversion. We then separately
 
 10
 Mani et al.
-(a) qR⊙ = 100, f-f
-(b) qR⊙ = 150, p1-p1
+(a) qR⊙= 100, f-f
+(b) qR⊙= 150, p1-p1
 Figure 6. Real-space divergence ﬂows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
 for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass ﬁltered around
-qR⊙ = 100, and using (b) p1-p1 coupling (bottom row), bandpass ﬁltered around qR⊙ = 150. We cut edges out from the ﬂow
+qR⊙= 100, and using (b) p1-p1 coupling (bottom row), bandpass ﬁltered around qR⊙= 150. We cut edges out from the ﬂow
 maps and compare a circular region of diameter ≈175 Mm.
-(a) qR⊙ = 200, f-f + p1-p1 + p2-p2
-(b) qR⊙ = 250, f-f + p1-p1 + p2-p2
+(a) qR⊙= 200, f-f + p1-p1 + p2-p2
+(b) qR⊙= 250, f-f + p1-p1 + p2-p2
 Figure 7. Real-space divergence ﬂows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
 for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass ﬁltered around
-(a) qR⊙ = 200, and (b) qR⊙ = 250. We cut edges out from the ﬂow maps and compare a circular region of diameter ≈175 Mm.
+(a) qR⊙= 200, and (b) qR⊙= 250. We cut edges out from the ﬂow maps and compare a circular region of diameter ≈175 Mm.
 vorticity maps for LCT for diﬀerent values of smoothing. These ﬂow maps are then compared with those obtained
 from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation
 (corresponding to the above depths and smoothing) is noted and comparison ﬂow maps are plotted for the desired
@@ -594,40 +594,40 @@ As described in section 1.1, we seek to describe the ﬂow u as a function of q
 eq 3 into eq 2,
 uσ
 q(z) =
-�
+X
 j
-�
+
 q2 fjez + iq f ′
 j
-�
+	
 P σ
 jq + iq×ez fjT σ
 jq.
 (A1)
-For ﬂows in the anelastic limit (u ≪ speed of sound), we can denote the ﬂow perturbation operator as δLσ =
-−2iωρuσ · ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
+For ﬂows in the anelastic limit (u ≪speed of sound), we can denote the ﬂow perturbation operator as δLσ =
+−2iωρuσ · ∇(see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
 δLσ
 q =
 −2iω ρ (iuσ
 q · k + uσ
 q · ez∂z),
 (A2)
-= −2iωρ �
+=−2iωρ P
 j
-�
+
 −k · q f ′
 jP σ
-jq − k · (q×ez) fjT σ
+jq −k · (q×ez) fjT σ
 jq + q2 fjP σ
 jq ∂z
-�
+	
 .
 (A3)
 
 12
 Mani et al.
 Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006)
-ξk ≡ ξnk(z) = iˆkHnk(z)ez + ˆzVnk(z),
+ξk ≡ξnk(z) = iˆkHnk(z)ez + ˆzVnk(z),
 (A4)
 where H and V are real-valued functions; n and n′ are dropped for compactness of notation. Then the coupling of
 two modes ξk and ξk′ (k′ = k + q), by the ﬂow perturbation operator δLσ
@@ -636,17 +636,17 @@ k′(σ), is
 given by
 Λk
 k′(σ) ≡
-�
+Z
 dx (δLσ
 qξk) · ξ∗
 k′ =
-�
+Z
 dx
-�
-− 2iωρ
-�
+"
+−2iωρ
+X
 j
-�
+n
 q2 fjP σ
 jq (ˆk · ˆk
 ′ H′
@@ -655,24 +655,24 @@ k′ + V ′
 kV ∗
 k′)
 −
-�
+
 k · q f ′
 jP σ
 jq + k · (q×ez) fjT σ
 jq
-�
+
 (ˆk · ˆk
 ′ HkH∗
 k′ + VkV ∗
 k′)
-� �
+o #
 (A5)
 We desire to linearly relate the coupling integral in the above equation to the ﬂows P and T, through poloidal and
 toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by
 Cqj,k =
-�
+Z
 dz ρ
-�
+h
 q2 fj (ˆk · ˆk
 ′ H′
 kH∗
@@ -684,10 +684,10 @@ j (ˆk · ˆk
 ′ HkH∗
 k′ + VkV ∗
 k′)
-�
+i
 ,
 Dqj,k = k · (q×ez)
-�
+Z
 dz ρ fj (ˆk · ˆk
 ′ HkH∗
 k′ + VkV ∗
@@ -698,7 +698,7 @@ measurement between modes k and k + q From eq 8 of Woodard (2014), we write the
 waveﬁeld cross-correlation as
 ⟨φω∗
 k φω+σ
-k+q ⟩ = Hω
+k+q ⟩= Hω
 kk′σΛk
 k′(σ),
 (A7)
@@ -718,7 +718,7 @@ Rω
 k =
 1
 ω2
-nk − ω2 − iωγnk/2,
+nk −ω2 −iωγnk/2,
 (A9)
 where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing
 mode damping −iωγρ as an operator in the diﬀerential equation that governs undamped, driven oscillations (see eq
@@ -732,13 +732,13 @@ are established. Mode normalization N is given by
 Nk = 1
 Q
 Q
-�
+X
 k
-�
+P
 ω
 |φω
 k|2
-�
+P
 ω
 Rω
 k
@@ -748,7 +748,7 @@ where the
 1
 Q
 Q
-�
+P
 k
 on the right-hand-side implies average over all [kx, ky] (Q terms in all) such that k = |k| is constant.
 This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within ﬁve linewidths of ωnk.
@@ -769,16 +769,16 @@ Imaging near-surface flows using mode-coupling analysis
 B. SOLA INVERSIONS
 Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors
 for the mode q and depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion
-�
+P
 k
 αk,zoBk,q allows for an average value of the ﬂow Pq(z) to be estimated at the depth zo. To obtain the coeﬃcients
 αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an
-’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen
+’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆is chosen
 which the averaging kernel should resemble after performing inversion.
 B.1. Kernels in the integral form
 Since the kernels in eq A6 are manifest as coeﬃcients on a basis fj(z), we ﬁrst derive kernels that can be expressed
 as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following deﬁnitions:
-P ≡ Pq(z), p ≡ Pqj, F ≡ fj(z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal ﬂow for
+P ≡Pq(z), p ≡Pqj, F ≡fj(z), B ≡Bk,q C ≡Cqj,k and K ≡Kk,q(z), we write (assume only poloidal ﬂow for
 simplicity, the same derivations hold true for toroidal ﬂow as well)
 P = Fp
 (B11)
@@ -796,11 +796,11 @@ where
 K = (F T F)−1F T C,
 i.e.,
 Kk,q(z) =
-�
+X
 j,j′
-� �
+h Z
 dz fj(z)fj′(z)
-�−1
+i−1
 fj′(z)Cqj′,k
 (B14)
 B.2. Obtaining the coeﬃcients α
@@ -809,23 +809,23 @@ T (z, zo) =
 1
 √
 2π∆2 exp
-�z − zo
+z −zo
 2∆2
-�
+
 .
 (B15)
 This can be achieved by solving the optimization problem
 minimize X =
-�
+Z
 dz
-�
-T (z, zo) − Θq(z, zo)
-�2
+h
+T (z, zo) −Θq(z, zo)
+i2
 ,
 (B16)
 where we introduce the averaging kernel for mode q thus
 Θq(z, zo) =
-�
+X
 k
 αk,zoKk,q(z).
 (B17)
@@ -834,46 +834,46 @@ and B14.
 
 14
 Mani et al.
-Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR⊙ =
-[−112, −45] and kR⊙ = [−853, −157] is chosen for all the radial order couplings for comparison.
+Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR⊙=
+[−112, −45] and kR⊙= [−853, −157] is chosen for all the radial order couplings for comparison.
 Right: Averaging kernel
-(eq B17) using SOLA, for qR⊙ = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15).
+(eq B17) using SOLA, for qR⊙= [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15).
 Integral of the averaging kernel over z is 0.89.
 Setting ∂X
-∂α → 0 gives us the matrix problem to be solved
+∂α →0 gives us the matrix problem to be solved
 A{α} = v,
 {α} =
-�
+h
 A + µI
-�−1
+i−1
 v,
 (B18)
 where the square matrix A =
-�
+R
 dz Kk,q(z)Kk′,q(z) and v =
-�
+R
 dz Kk,q(z)T (z, zo). Here, k′ is just a dummy index for
 denoting elements in the matrix A, (k′ ̸= k+q). In the last line of eq B18, we introduce regularization using an Identity
 matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining
 α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α
-obtained from eq B18 into last line of eq B13, and �
+obtained from eq B18 into last line of eq B13, and P
 k
 on both sides
-�
+X
 k
 αk,zoBσ
 k,q =
-�
+X
 k
 αk,zo
-�
+Z
 dz Kk,q(z)P σ
 q (z),
 =
-�
+Z
 dz Θq(z, zo)P σ
 q (z),
-≈ ⟨P σ
+≈⟨P σ
 q (zo)⟩
 (B19)
 Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di-
@@ -901,10 +901,10 @@ doi: 10.1007/s41116-020-00028-3
 
 Imaging near-surface flows using mode-coupling analysis
 15
-Figure 9. Left: Poloidal ﬂow power-spectrum for f-f as a function of qxR⊙ and qyR⊙. Right: Corresponding power-spectrum
-averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2/s4.
+Figure 9. Left: Poloidal ﬂow power-spectrum for f-f as a function of qxR⊙and qyR⊙. Right: Corresponding power-spectrum
+averaged over the azimuthal angle. Shaded region shows ±1 −σ error around the mean. Power is in units of m2/s4.
 Figure 10. Real-space divergence ﬂows (in units of 10−5s−1) for mode-coupling inversion through SOLA using f-f coupling,
-and LCT, bandpass ﬁltered around qR⊙ = 100. We cut edges out from the ﬂow maps and compare a circular region of diameter
+and LCT, bandpass ﬁltered around qR⊙= 100. We cut edges out from the ﬂow maps and compare a circular region of diameter
 ≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-ﬁt line through the scatter plot is
 1.05. For demonstration, we show inversions only for poloidal ﬂow using SOLA.
 De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh,
diff --git a/read/results/pymupdf/2201.00200.txt b/read/results/pymupdf/2201.00200.txt
index 34504e3..768dc6c 100644
--- a/read/results/pymupdf/2201.00200.txt
+++ b/read/results/pymupdf/2201.00200.txt
@@ -167,7 +167,7 @@ panel to a model with luminosity enhanced by a factor of ten.
 The dash-dotted red lines show ∆T/T0 (in %), the relative dif-
 ference between the time and space averages of the temperature,
 T, and the initial temperature, T0. The solid blue lines show the
-time and space averages of the sub-adiabaticity (∇ − ∇ad). The
+time and space averages of the sub-adiabaticity (∇−∇ad). The
 dashed black lines show the initial proﬁle of the sub-adiabaticity,
 (∇−∇ad)init. The convective boundary is indicated by the vertical
 solid line (see details in B21)
@@ -178,7 +178,7 @@ boundary found in the simulations of B21 is illustrated in Fig.
 cial enhancement in the luminosity by a factor of ten because the
 features are intensiﬁed in these ‘boosted’ models (upper panel).
 The ﬁgure shows the local heating in the overshooting layer and
-its impact on the sub-adiabaticity (∇ − ∇ad), with ∇ = d log T
+its impact on the sub-adiabaticity (∇−∇ad), with ∇= d log T
 d log P the
 2
 
@@ -212,10 +212,10 @@ deﬁned by
 A = 1
 Γ1
 d ln P
-d ln r − d ln ρ
+d ln r −d ln ρ
 d ln r ,
 (1)
-with Γ1 = (∂ ln P/∂ ln ρ)ad. Starting from a reference evolu-
+with Γ1 = (∂ln P/∂ln ρ)ad. Starting from a reference evolu-
 tionary model, Buldgen et al. (2020) used an inversion pro-
 cedure to iteratively reconstruct a solar model. Successive in-
 versions of the Ledoux discriminant allowed them to obtain a
@@ -243,22 +243,22 @@ the quantity (ASun - Aref).
 The second concerns the speed of sound. The same positive
 bump at the same location as for the Ledoux discriminant, A, is
 observed for the quantity (c2
-s,Sun − c2
+s,Sun −c2
 s,ref)/c2
 s,ref. The corrections
 applied to A during the reconstruction procedure also reduce the
 discrepancy in the speed of sound in the radiative region.
 The third concerns the entropy. Large discrepancies are ob-
 served in both the radiative region and the convective zone. The
-1 Less sub-adiabatic means that |∇ − ∇ad| decreases compared to the
+1 Less sub-adiabatic means that |∇−∇ad| decreases compared to the
 initial proﬁle.
-entropy discrepancy (S Sun − S ref)/S ref has two positive peaks in
+entropy discrepancy (S Sun −S ref)/S ref has two positive peaks in
 the radiative zone, one just below the overshooting region and a
-larger peak deeper at ∼ 40% of the stellar radius. This discrep-
+larger peak deeper at ∼40% of the stellar radius. This discrep-
 ancy is negative in the convective zone. The corrections applied
 to A help reduce these entropy discrepancies in both regions.
 The fourth concerns the density. The quantity (ρSun −
-ρref)/ρref has a negative peak in the radiative region, at ∼ 35%
+ρref)/ρref has a negative peak in the radiative region, at ∼35%
 of the stellar radius, and is positive in the convective zone.
 Importantly, Buldgen et al. (2020) mention that their recon-
 struction procedure gives similar Ledoux discriminant proﬁles
@@ -305,23 +305,23 @@ ature gradient in the overshooting layer that qualitatively repro-
 duces the behaviour displayed in Fig. 1. We deﬁne an overshoot-
 ing length dov = αovHP,CB, with HP,CB the pressure scale height
 at the convective boundary and αov a free parameter. We also de-
-ﬁne two radial locations, rov = rCB − dov and rmid = rCB − dov/2,
+ﬁne two radial locations, rov = rCB −dov and rmid = rCB −dov/2,
 with rCB the radial location of the convective boundary. The tem-
-perature gradient is modiﬁed as follows. For rmid ≤ r < rCB, we
+perature gradient is modiﬁed as follows. For rmid ≤r < rCB, we
 use
-∇ = g(r)∇ad + (1 − g(r))∇rad,
+∇= g(r)∇ad + (1 −g(r))∇rad,
 (2)
 with
-g(r) = sin{[(r − rmid)/(rCB − rmid)]a × π/2}.
+g(r) = sin{[(r −rmid)/(rCB −rmid)]a × π/2}.
 (3)
 3
 
 Baraﬀe et al.: Local heating due to convective overshooting and the solar modelling problem
-For rov ≤ r < rmid, we use
-∇ = ∇rad − h(r)∇ad,
+For rov ≤r < rmid, we use
+∇= ∇rad −h(r)∇ad,
 (4)
 with
-h(r) = b × sin{[(rmid − r)/(rmid − rov)] × π}.
+h(r) = b × sin{[(rmid −r)/(rmid −rov)] × π}.
 (5)
 Sine functions are used in Eqs. (3) and (5) to reproduce the
 smooth variations in the temperature gradient below the convec-
@@ -337,7 +337,7 @@ results, but we note that the results are insensitive to the value of
 b.
 3.2.1. Thermal equilibrium models
 The details of the procedure for the ﬁrst method are the follow-
-ing. We calculate the evolution of a 1 M⊙ model with an initial
+ing. We calculate the evolution of a 1 M⊙model with an initial
 helium mass fraction of 0.28, metallicity Z = 0.02, and a mix-
 ing length lmix = 1.9HP. We use a reference model that is in
 thermal equilibrium2 and has the luminosity and radius of the
diff --git a/read/results/pymupdf/2201.00201.txt b/read/results/pymupdf/2201.00201.txt
index 31d8868..88a2f93 100644
--- a/read/results/pymupdf/2201.00201.txt
+++ b/read/results/pymupdf/2201.00201.txt
@@ -50,7 +50,7 @@ rill 1942), and showed that the shorter periods are also accom-
 panied by a higher velocity dispersion. Furthermore, groups of
 LPVs with relatively short periods are characterized by a greater
 scale height above the Galactic plane. This was shown, using for
-⋆ Corresponding
+⋆Corresponding
 author:
 M.
 Trabucchi
@@ -126,9 +126,9 @@ We employed PARSEC-COLIBRI isochrones (Marigo et al.
 sion 1.2S) for the preceding evolution. The adopted set of
 isochrones covers the range 0.001 to 0.016 in initial metal-
 licity (Zi), with a 0.001 step, while it spans the age interval
-8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB
+8.00 ≤log(τ/yr) ≤10.45 with a step of 0.05. Since the AGB
 phase is short-lived, it only spans a small range of initial masses
-for each given isochrone, of order of 10−2 M⊙ at most.
+for each given isochrone, of order of 10−2 M⊙at most.
 The adopted isochrones include linear pulsation periods from
 Trabucchi et al. (2019) for overtone modes and nonlinear periods
 computed with the period-mass-radius relation from Trabucchi
@@ -183,7 +183,7 @@ Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters
 in the Galaxy and LMC, respectively, thereby ensuring that ages
 would be homogeneously derived for clusters in both galaxies.
 Age uncertainties from Baumgardt et al. (2013), provided for
-each cluster, are generally around σlog(τ) ≃ 0.05. Kharchenko
+each cluster, are generally around σlog(τ) ≃0.05. Kharchenko
 et al. (2016) do not provide age uncertainties, but a reasonable
 upper limit for their method should be σlog(τ) = 0.2 based on
 the analysis of Kharchenko et al. (2005) (the same value was
@@ -191,7 +191,7 @@ adopted by Grady et al. 2019, in their Fig. 7).
 As discussed by Kamath et al. (2010), the age of the SMC
 cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is
 consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij
-et al. (2014), while it is as young as τ ≃ 0.89 ± 0.015 Gyr ac-
+et al. (2014), while it is as young as τ ≃0.89 ± 0.015 Gyr ac-
 cording to Perren et al. (2017). Since an accurate estimate is not
 necessary for our exploratory analysis, we took a rough average
 and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846
@@ -207,8 +207,8 @@ their clusters we adopted the distance moduli µLMC = 18.49 ±
 (2017). We searched for data on interstellar extinction from sev-
 eral literature works (e.g., Nayak et al. 2016; Kharchenko et al.
 2016; Perren et al. 2017), all of which suggest that extinction
-in the Ks ﬁlter is smaller than ∼ 0.1 mag for most of the clus-
-ters we considered, and at most as large as ∼ 0.3 mag, which is
+in the Ks ﬁlter is smaller than ∼0.1 mag for most of the clus-
+ters we considered, and at most as large as ∼0.3 mag, which is
 negligible for our purposes.
 Article number, page 2 of 9
 
@@ -247,14 +247,14 @@ riod of LPVs pulsating in the FM decreases with increasing age.
 Crosses mark the average properties of the three groups of C-
 rich LPVs from Feast et al. (2006, their table 4), which ﬁt the
 general pattern with the exception of their group 3, estimated to
-be older than what our models predict at P ≃ 650.
+be older than what our models predict at P ≃650.
 We also show a linear best-ﬁt to the models distribution
 (weighted by NFM), which shows a fairly good agreement with
 the best-ﬁt to observations by Grady et al. (2019, also shown).
 However, the best-ﬁt line does not fully capture the properties
 of the predictions, nor of the observed trend. Indeed, models are
 indicative of a substantial dispersion around the relation. For in-
-stance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550
+stance, at 1 Gyr, the FM period ranges from ∼200 days to ∼550
 days. Conversely, LPVs pulsating in the FM with a period of 350
 days are predicted to be at least ∼200 Myr old, but they can be as
 old as ∼3 Gyr. Observed data are consistent with the predicted
@@ -284,7 +284,7 @@ predictions and observations. We note that in both cases, the dis-
 tribution is skewed toward short periods, which seems to be true
 at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2,
 which is a version of the PA plane limited to an O-rich compo-
-sition2. Indeed, although at τ ≲ 5 Gyr the observed sample is
+sition2. Indeed, although at τ ≲5 Gyr the observed sample is
 very scarce, it appears to be consistent with models predicting a
 more densely populated region in the shorter-period half of the
 PA distribution.
@@ -301,7 +301,7 @@ higher masses, so that younger C-rich models are more concen-
 trated at longer periods, leading to a steeper PA relation com-
 pared with the O-rich case. These predictions agree with ob-
 servations on the old side of the period distribution, while the
-scarcity of C stars at τ ≃ 0.6 Gyr prevents us from performing a
+scarcity of C stars at τ ≃0.6 Gyr prevents us from performing a
 comparison at younger ages.
 In appendix B, we provide analytic PA relations by ﬁtting the
 high-density parts of the O- and C-rich models’ distribution. We
@@ -343,7 +343,7 @@ a linear scale, normalized to maximum). Symbols represent observed LPVs (green:
 indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of
 galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-ﬁt
 to models and the best-ﬁt by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked
-in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the eﬀect of the TP-AGB boosting is
+in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼9.15 and ∼10.10, respectively). For clarity, the eﬀect of the TP-AGB boosting is
 suppressed in panel (a).
 Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best ﬁt to the models, while
 dashed lines are best ﬁts to the edges of the model distribution (see the text for more details).
@@ -370,7 +370,7 @@ warmer and have smaller radii compared with metal-rich ones.
 As a consequence, the bulk of the period distribution of metal-
 poor LPVs is at periods shorter than Pb, so they only contribute
 to the global distribution (i.e., at all Zi at a given age) over a
-small period range at P ≳ Pb. In contrast, metal-rich LPVs have
+small period range at P ≳Pb. In contrast, metal-rich LPVs have
 periods well beyond Pb, so they contribute both at that value and
 at longer periods. The result is an excess of FM-dominated LPVs
 near Pb, that is to say on the short side of the overall period dis-
@@ -684,7 +684,7 @@ Three of the sources without a spectral type lack Gaia pho-
 tometry, so they cannot be classiﬁed with the Gaia-2MASS. Two
 of them (LW5 and LW22 in 47 Tuc) have no match in Gaia
 EDR3, but they have NIR data and are probably O-rich based on
-their position in the J − Ks versus Ks color-magnitude diagram.
+their position in the J −Ks versus Ks color-magnitude diagram.
 The third source is one of the two stars in NGC 1903 from the
 list of Grady et al. (2019), which we identiﬁed with the 2MASS
 source J05171633-6920298. It is likely C-rich according to the
@@ -745,7 +745,7 @@ log(τ/yr) = a0 + a1 (P/ ˜P) + a2 (P/ ˜P)2 ,
 (where ˜P = 350 days) and employed a Lenvenberg-Marquardt
 nonlinear regression algorithm3 to derive the best-ﬁt coeﬃcients,
 which are listed in Table B.1. We remark that these best-ﬁt ex-
-pressions are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3
+pressions are only valid in the intervals 8.0 ≤log(τ/yr) ≤10.3
 and 20 < P/days < 700 for O-rich composition, and within
 3 We made use of the Python library SciPy to perform Gaussian KDE
 modeling and best-ﬁt, respectively, by means of the gaussian_kde
@@ -786,7 +786,7 @@ upper edge
 8.498
 -1.827
 -0.9959
-8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich
+8.6 ≤log(τ/yr) ≤9.3 and 140 < P/days < 620 in the C-rich
 case.
 Because of the connection between age and initial mass, the
 PA relation can be translated into a period-initial mass relation,
@@ -803,7 +803,7 @@ namely the star-formation history and age-metallicity relation.
 Appendix C: The shape of the period distribution
 As an example case, we consider an isochrone of age log(τ/yr) =
 8.3 and initial metallicity Zi = 0.006. Stars on the TP-AGB have
-initial masses Mi ≃ 3.85 M⊙ over a small range of ∼ 10−3 M⊙.
+initial masses Mi ≃3.85 M⊙over a small range of ∼10−3 M⊙.
 The relation between period and initial mass is displayed in
 panel (a) of Fig. C.1, where isochrone portions undergoing
 Table B.2. Best-ﬁt coeﬃcients for the period-initial mass relation and
@@ -842,11 +842,11 @@ distributions for a few diﬀerent cases.
 It is instructive, to begin with, to ignore the eﬀect of thermal
 pulses and consider only the quiescent evolution (green lines in
 Fig. C.1). The smallest initial mass corresponds to a star that just
-entered the TP-AGB, when the FM has a period of ∼ 240 days
+entered the TP-AGB, when the FM has a period of ∼240 days
 but is not dominant. It only becomes dominant above a threshold
 radius Rdom,0, that is for periods longer than a (mass-dependent)
 critical period Pdom,0 (the solid gray line in Fig. C.1). The least
-evolved (quiescent) model with dominant FM has PFM ≃ 360
+evolved (quiescent) model with dominant FM has PFM ≃360
 days (green circle and horizontal line), corresponding to a sharp
 cut in the period distribution shown in panel (b) of Fig. C.1.
 As a star evolves along the AGB it expands, and its period be-
@@ -855,8 +855,8 @@ a higher initial mass are more evolved, hence they have a larger
 radius and a longer period. The rate at which a period increases
 with radius is not ﬁxed, but rather decreases with evolution. Ac-
 cording to the prescription of Trabucchi et al. (2021b), a period
-grows with radius as a broken power-law with exponent α ≃ 1.8
-if R < Rb, and with α ≃ 1.25 at larger radii.
+grows with radius as a broken power-law with exponent α ≃1.8
+if R < Rb, and with α ≃1.25 at larger radii.
 This is equivalent to saying that the period grows more
 slowly after it exceeds a critical value Pb = P(Rb), marked by
 the gray dotted line in Fig. C.1. The isochrone reaches it at
@@ -867,7 +867,7 @@ Fig. B.1. Similar to Fig. 2, but showing initial mass Mi in place of age. The be
 PFM – Mi relation are shown.
 Fig. C.1. Period distribution at ﬁxed age and metallicity. Panel (a) shows
 period as a function of initial mass (current mass on the top axis) on the
-TP-AGB for a ∼ 200 Myr old isochrone with Zi = 0.006. Red lines
+TP-AGB for a ∼200 Myr old isochrone with Zi = 0.006. Red lines
 show full thermal pulses, while blue lines ignore luminosity spikes and
 green lines show only the quiescent evolution. The same color code
 is used for the period distributions (normalized to their maximum) on
@@ -878,7 +878,7 @@ Gray lines mark the critical values of periods at which the FM becomes
 dominant (solid line), less sensitive to radius (dotted line, which occurs
 at the vertical line for this speciﬁc isochrone), and independent of radius
 (dashed line).
-Mi ≃ 3.8524 M⊙ (vertical gray line), when PFM ≃ 420 days. In
+Mi ≃3.8524 M⊙(vertical gray line), when PFM ≃420 days. In
 models with a smaller initial mass, the period is still increasing
 at a relatively large rate as the envelope expands, while in more
 massive models the period has already become less sensitive to
@@ -890,7 +890,7 @@ this maximum, while limiting the selection to DFMP, produces
 a distribution skewed toward short periods, as found in Sect. 3.
 If the luminosity dips following thermal pulses are taken
 into account (blue lines), the corresponding envelope contrac-
-tion causes the period to decrease, and the cut at ∼ 360 days
+tion causes the period to decrease, and the cut at ∼360 days
 becomes less sharp. Because of mass loss, the threshold period
 Pdom,0 is lowered, so that the shortest period associated with
 DFMP does not correspond to the least evolved model (green
diff --git a/read/results/pymupdf/2201.00214.txt b/read/results/pymupdf/2201.00214.txt
index 24bbe18..956a7d5 100644
--- a/read/results/pymupdf/2201.00214.txt
+++ b/read/results/pymupdf/2201.00214.txt
@@ -129,7 +129,7 @@ has ten different wavelength channels, three in white light and UV, and the othe
 channels. Between these seven, the 304 ﬁlter, which is mostly sensitive to chromospheric temper-
 atures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015).
 Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335
-). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK.
+). These are covering the coronal temperature range from T ≈0.6 to T ≥16MK.
 The two below data sets are ﬁnally selected to study thermal variations and coronal loops
 oscillations in ﬂaring or non-ﬂaring active regions. A few distinct loops are visible in the regions.
 Finally, these loops are chosen:
@@ -155,7 +155,7 @@ at (230, 165) arcsec and its width and height are 450
 ′′ × 456
 ′′ /750 × 775 pixels. The ﬂare
 occurring in this active region is an X2.1 class ﬂare located close to the disk center at latitude
-14◦ north and longitude 18◦ west (269.9 arcsec, 129.9 arcsec). This ﬂare initiates at 22:12UT,
+14◦north and longitude 18◦west (269.9 arcsec, 129.9 arcsec). This ﬂare initiates at 22:12UT,
 ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection
 (CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with
 the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for
@@ -207,7 +207,7 @@ in terms of the logarithm of the temperature, which has three free parameters (A
 Boerner, 2011):
 DEMi = dEMi
 dT
-= EMp,i exp (−[log (T) − log (Tp,i)
+= EMp,i exp (−[log (T) −log (Tp,i)
 2σ2
 T,i
 ).
@@ -216,7 +216,7 @@ In which, Tp,i is the DEM peak temperature, EMp,i is the peak EM function, and 
 logarithmic width of the temperature for that strip. To calculate the background-subtracted ﬂuxes
 (for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below):
 F0λ =
-� dEM(T)
+Z dEM(T)
 dT
 Rλ(T)dT = ∑
 k
diff --git a/read/results/pymupdf/GeoTopo-book.txt b/read/results/pymupdf/GeoTopo-book.txt
index 5e9a446..ca23581 100644
--- a/read/results/pymupdf/GeoTopo-book.txt
+++ b/read/results/pymupdf/GeoTopo-book.txt
@@ -33,7 +33,7 @@ Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werd
 in „Analysis I“ vermittelt.
 Außerdem wird vorausgesetzt, dass (aﬃne) Vektorräume, Faktorräume, lineare Unabhängigkeit,
 der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare
-Algebra II“ wird der Begriﬀ der Orthonormalbasis eingeführt.
+Algebra II“ wird der Begriﬀder Orthonormalbasis eingeführt.
 
 iii
 (a) S2
@@ -159,105 +159,105 @@ Stichwortverzeichnis
 1 Topologische Grundbegriﬀe
 1.1 Topologische Räume
 Deﬁnition 1
-Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P(X)
+Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆P(X)
 mit folgenden Eigenschaften
-(i) ∅, X ∈ T
-(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T
-(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist
-�
+(i) ∅, X ∈T
+(ii) Sind U1, U2 ∈T, so ist U1 ∩U2 ∈T
+(iii) Ist I eine Menge und Ui ∈T für jedes i ∈I, so ist
+[
 i∈I
-Ui ∈ T
+Ui ∈T
 Die Elemente von T heißen oﬀene Teilmengen von X.
-A ⊆ X heißt abgeschlossen, wenn X \ A oﬀen ist.
+A ⊆X heißt abgeschlossen, wenn X \ A oﬀen ist.
 Es gibt auch Mengen, die weder abgeschlossen, noch oﬀen sind wie z. B. [0, 1). Auch gibt es
 Mengen, die sowohl abgeschlossen als auch oﬀen sind.
 Bemerkung 1 (Mengen, die oﬀen & abgeschlossen sind, ex.)
-Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }.
-Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind oﬀen. Außerdem XC = X \ X = ∅ ∈ T und
-X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement oﬀener Mengen abgeschlossen.
+Betrachte ∅und X mit der trivialen Topologie Ttriv = { ∅, X }.
+Es gilt: X ∈T und ∅∈T, d. h. X und ∅sind oﬀen. Außerdem XC = X \ X = ∅∈T und
+X \ ∅= X ∈T, d. h. X und ∅sind als Komplement oﬀener Mengen abgeschlossen.
 ■
 Beispiel 1 (Topologien)
 1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie TEuklid:
-U ⊆ Rn oﬀen ⇔ für jedes x ∈ U gibt es r > 0,
-sodass Br(x) = { y ∈ Rn | d(x, y) < r } ⊆ U
+U ⊆Rn oﬀen ⇔für jedes x ∈U gibt es r > 0,
+sodass Br(x) = { y ∈Rn | d(x, y) < r } ⊆U
 Diese Topologie wird auch „Standardtopologie des Rn“ genannt. Sie beinhaltet unter
 anderem alle oﬀenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli-
 chem Mittelpunkt (vgl. Deﬁnition 1.ii).
 2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum.
 3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie.
-4) X := R, TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie
+4) X := R, TZ := { U ⊆R | R \ U endlich } ∪{ ∅} heißt Zariski-Topologie
 Beobachtungen:
-• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 }
+• U ∈TZ ⇔∃f ∈R[X], sodass R \ U = V (f) = { x ∈R | f(x) = 0 }
 • Es gibt keine disjunkten oﬀenen Mengen in TZ.
 
 4
 1.1. TOPOLOGISCHE RÄUME
-5) X := Rn, TZ = {U ⊆ Rn|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass
+5) X := Rn, TZ = {U ⊆Rn|Es gibt Polynome f1, . . . , fr ∈R[X1, . . . , Xn] sodass
 Rn \ U = V (f1, . . . , fr)}
 6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum.
 ∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen.
 Deﬁnition 2
-Sei (X, T) ein topologischer Raum und x ∈ X.
-Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und
-U0 ⊆ U.
+Sei (X, T) ein topologischer Raum und x ∈X.
+Eine Teilmenge U ⊆X heißt Umgebung von x, wenn es ein U0 ∈T gibt mit x ∈U0 und
+U0 ⊆U.
 Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt.
 Deﬁnition 3
-Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge.
-a) M◦ := { x ∈ M | M ist Umgebung von x } =
-�
+Sei (X, T) ein topologischer Raum und M ⊆X eine Teilmenge.
+a) M◦:= { x ∈M | M ist Umgebung von x } =
+[
 U⊆M
 U∈T
 U heißt Inneres oder
 oﬀener
 Kern von M.
 b) M :=
-�
+\
 M⊆A
 A abgeschlossen
 A heißt abgeschlossene Hülle oder Abschluss von M.
-c) ∂M := M \ M◦ heißt Rand von M.
+c) ∂M := M \ M◦heißt Rand von M.
 d) M heißt dicht in X, wenn M = X ist.
 Beispiel 2
-1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅
+1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦= ∅
 2) Sei X = R und M = (a, b). Dann gilt: M = [a, b]
 3) Sei X = R, T = TZ und M = (a, b). Dann gilt: M = R
 Deﬁnition 4
 Sei (X, T) ein topologischer Raum.
-a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen
+a) B ⊆T heißt Basis der Topologie T, wenn jedes U ∈T Vereinigung von Elementen
 aus B ist.
-b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen
+b) S ⊆T heißt Subbasis der Topologie T, wenn jedes U ∈T Vereinigung von endlichen
 Durchschnitten von Elementen aus S ist.
 Beispiel 3 (Basis und Subbasis)
 1) Jede Basis ist auch eine Subbasis, z.B.
-S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als
+S = { (a, b) | a, b ∈R, a < b } ist für R mit der Standardtopologie sowohl Basis als
 auch Subbasis.
 2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist
-B = { Br(x) | r ∈ Q>0, x ∈ Qn }
+B = { Br(x) | r ∈Q>0, x ∈Qn }
 ist eine abzählbare Basis von T.
 3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }.
 Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt:
 
 5
 1.1. TOPOLOGISCHE RÄUME
-• S ⊆ T
-• ∅, { 0, 1 } und { 0, 2 } ∈ S
-• { 0 } = { 0, 1 } ∩ { 0, 2 }
-• X = { 0, 1 } ∪ { 0, 2 }
+• S ⊆T
+• ∅, { 0, 1 } und { 0, 2 } ∈S
+• { 0 } = { 0, 1 } ∩{ 0, 2 }
+• X = { 0, 1 } ∪{ 0, 2 }
 Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen
 aus S erzeugt werden kann.
 Bemerkung 2
-Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S
+Sei X eine Menge und S ⊆P(X). Dann gibt es genau eine Topologie T auf X, für die S
 Subbasis ist.
 Deﬁnition 5
-Sei (X, T) ein topologischer Raum und Y ⊆ X.
-TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y .
+Sei (X, T) ein topologischer Raum und Y ⊆X.
+TY := { U ∩Y | U ∈T } ist eine Topologie auf Y .
 TY heißt Teilraumtopologie und (Y, TY ) heißt ein Teilraum von (X, T).
 Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt.
 Deﬁnition 6
 Seien X1, X2 topologische Räume.
-U ⊆ X1 × X2 sei oﬀen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit
-i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt.
-T = { U ⊆ X1 × X2 | U oﬀen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie.
+U ⊆X1 × X2 sei oﬀen, wenn es zu jedem x = (x1, x2) ∈U Umgebungen Ui um xi mit
+i = 1, 2 gibt, sodass U1 × U2 ⊆U gilt.
+T = { U ⊆X1 × X2 | U oﬀen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie.
 B = { U1 × U2 | Ui oﬀen in Xi, i = 1, 2 } ist eine Basis von T.
 U
 x
@@ -267,10 +267,10 @@ U2
 U1
 X1
 X2
-Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U
+Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆U
 Beispiel 4 (Produkttopologien)
 1) X1 = X2 = R mit euklidischer Topologie.
-⇒ Die Produkttopologie auf R × R = R2 stimmt mit der euklidischen Topologie auf
+⇒Die Produkttopologie auf R × R = R2 stimmt mit der euklidischen Topologie auf
 R2 überein.
 2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R2: U1 × U2
 (Siehe Abbildung 1.2)
@@ -281,17 +281,17 @@ U1 = R \ N
 U2 = R \ N
 Abbildung 1.2: Zariski-Topologie auf R2
 Deﬁnition 7
-Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge
-der Äquivalenzklassen, π : X → X,
-x �→ [x]∼.
+Sei X ein topologischer Raum, ∼eine Äquivalenzrelation auf X, X = X/∼sei die Menge
+der Äquivalenzklassen, π : X →X,
+x 7→[x]∼.
 TX :=
-�
-U ⊆ X
-�� π−1(U) ∈ TX
-�
+
+U ⊆X
+ π−1(U) ∈TX
+	
 (X, TX) heißt Quotiententopologie.
 Beispiel 5
-X = R, a ∼ b :⇔ a − b ∈ Z
+X = R, a ∼b :⇔a −b ∈Z
 R
 -1
 0
@@ -305,13 +305,13 @@ a
 U
 a
 π−1(u)
-0 ∼ 1, d. h. [0] = [1]
+0 ∼1, d. h. [0] = [1]
 Beispiel 6
-Sei X = R2 und (x1, y1) ∼ (x2, y2) ⇔ x1 − x2 ∈ Z und y1 − y2 ∈ Z. Dann ist X/∼ ein Torus.
+Sei X = R2 und (x1, y1) ∼(x2, y2) ⇔x1 −x2 ∈Z und y1 −y2 ∈Z. Dann ist X/∼ein Torus.
 Beispiel 7 (Projektiver Raum)
 X = Rn+1 \ { 0 } ,
-x ∼ y ⇔ ∃λ ∈ R× mit y = λx
-⇔ x und y liegen auf der gleichen
+x ∼y ⇔∃λ ∈R× mit y = λx
+⇔x und y liegen auf der gleichen
 Ursprungsgerade
 X = Pn(R)
 
@@ -330,35 +330,35 @@ Also für n = 1:
 4
 1.2 Metrische Räume
 Deﬁnition 8
-Sei X eine Menge. Eine Abbildung d : X × X → R+
+Sei X eine Menge. Eine Abbildung d : X × X →R+
 0 heißt Metrik, wenn gilt:
 (i) Deﬁnitheit:
-d(x, y) = 0 ⇔ x = y
-∀x, y ∈ X
+d(x, y) = 0 ⇔x = y
+∀x, y ∈X
 (ii) Symmetrie:
 d(x, y) = d(y, x)
-∀x, y ∈ X
+∀x, y ∈X
 (iii) Dreiecksungleichung:
-d(x, z) ≤ d(x, y) + d(y, z)
-∀x, y, z ∈ X
+d(x, z) ≤d(x, y) + d(y, z)
+∀x, y, z ∈X
 Das Paar (X, d) heißt ein metrischer Raum.
 Bemerkung 3
 Sei (X, d) ein metrischer Raum und
-Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R+
-B = { Br(x) ⊆ P(X) | x ∈ X, r ∈ R+ } ist Basis einer Topologie auf X.
+Br(x) := { y ∈X | d(x, y) < r } für x ∈X, r ∈R+
+B = { Br(x) ⊆P(X) | x ∈X, r ∈R+ } ist Basis einer Topologie auf X.
 Deﬁnition 9
-Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit
-∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2))
+Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X →Y eine Abbildung mit
+∀x1, x2 ∈X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2))
 Dann heißt ϕ eine Isometrie von X nach Y .
 Beispiel 8 (Skalarprodukt erzeugt Metrik)
 Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt ⟨·, ·⟩. Dann wird V
 durch d(x, y) :=
-�
-⟨x − y, x − y⟩ zum metrischen Raum.
+p
+⟨x −y, x −y⟩zum metrischen Raum.
 Beispiel 9 (diskrete Metrik)
 Sei X eine Menge. Dann heißt
 d(x, y) =
-�
+(
 0
 falls x = y
 1
@@ -368,7 +368,7 @@ die diskrete Metrik. Die Metrik d induziert die diskrete Topologie.
 8
 1.2. METRISCHE RÄUME
 Beispiel 10
-X = R2 und d ((x1, y1), (x2, y2)) := max(∥x1 − x2∥, ∥y1 − y2∥) ist Metrik.
+X = R2 und d ((x1, y1), (x2, y2)) := max(∥x1 −x2∥, ∥y1 −y2∥) ist Metrik.
 Beobachtung: d erzeugt die euklidische Topologie.
 Br(0) =
 r
@@ -395,10 +395,10 @@ X = R2
 4
 Deﬁnition 10
 Ein topologischer Raum X heißt hausdorﬀsch, wenn es für je zwei Punkte x ̸= y in X
-Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅.
+Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩Uy = ∅.
 Bemerkung 4 (Trennungseigenschaft)
 Metrische Räume sind hausdorﬀsch, wegen
-d(x, y) > 0 ⇒ ∃ε > 0 : Bε(x) ∩ Bε(y) = ∅
+d(x, y) > 0 ⇒∃ε > 0 : Bε(x) ∩Bε(y) = ∅
 Beispiel 12 (Topologische Räume und Hausdorﬀ-Räume)
 1) (R, TZ) ist ein topologischer Raum, der nicht hausdorﬀsch ist.
 2) (R, TEuklid) ist ein topologischer Hausdorﬀ-Raum.
@@ -407,15 +407,15 @@ Seien X, X1, X2 Hausdorﬀ-Räume.
 a) Jeder Teilraum von X ist hausdorﬀsch.
 b) X1 × X2 ist hausdorﬀsch (vgl. Abbildung 1.4).
 Deﬁnition 11
-Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder
-Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle
-n ≥ n0.
+Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈X heißt Grenzwert oder
+Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈U für alle
+n ≥n0.
 Bemerkung 6
 Ist X hausdorﬀsch, so hat jede Folge in X höchstens einen Grenzwert.
 Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge.
-Da X hausdorﬀsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls
-x ̸= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0
-⇒ x = y
+Da X hausdorﬀsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩Uy = ∅falls
+x ̸= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈Ux ∩Uy für alle n ≥n0
+⇒x = y
 ■
 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt.
 
@@ -432,42 +432,42 @@ X2
 Abbildung 1.4: Wenn X1, X2 hausdorﬀsch sind, dann auch X1 × X2
 1.3 Stetigkeit
 Deﬁnition 12
-Seien (X, TX), (Y, TY ) topologische Räume und f : X → Y eine Abbildung.
-a) f heißt stetig :⇔ ∀U ∈ TY : f−1(U) ∈ TX.
+Seien (X, TX), (Y, TY ) topologische Räume und f : X →Y eine Abbildung.
+a) f heißt stetig :⇔∀U ∈TY : f−1(U) ∈TX.
 b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g :
-Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY .
+Y →X gibt, sodass g ◦f = idX und f ◦g = idY .
 Bemerkung 72
-Seien X, Y metrische Räume und f : X → Y eine Abbildung.
-Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für
-alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε.
-Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)).
+Seien X, Y metrische Räume und f : X →Y eine Abbildung.
+Dann gilt: f ist stetig ⇔zu jedem x ∈X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für
+alle y ∈X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε.
+Beweis: „⇒“: Sei x ∈X, ε > 0 gegeben und U := Bε(f(x)).
 Dann ist U oﬀen in Y .
 Def. 12.a
-=====⇒ f−1(U) ist oﬀen in X. Dann ist x ∈ f−1(U).
-⇒ ∃δ > 0, sodass Bδ(x) ⊆ f−1(U)
-⇒ f(Bδ(x)) ⊆ U
-⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh.
-„⇐“: Sei U ⊆ Y oﬀen, X ∈ f−1(U).
-Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U
+=====⇒f−1(U) ist oﬀen in X. Dann ist x ∈f−1(U).
+⇒∃δ > 0, sodass Bδ(x) ⊆f−1(U)
+⇒f(Bδ(x)) ⊆U
+⇒{ y ∈X | dX(x, y) < δ } ⇒Beh.
+„⇐“: Sei U ⊆Y oﬀen, X ∈f−1(U).
+Dann gibt es ε > 0, sodass Bε(f(x)) ⊆U
 Vor.
-==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x)))
-⇒ Bδ(x) ⊆ f−1(Bε(f(x))) ⊆ f−1(U)
+==⇒Es gibt δ > 0, sodass f(Bδ(x)) ⊆Bε(f(x)))
+⇒Bδ(x) ⊆f−1(Bε(f(x))) ⊆f−1(U)
 ■
 Bemerkung 8
-Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt:
+Seien X, Y topologische Räume und f : X →Y eine Abbildung. Dann gilt:
 f ist stetig
-⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f−1(A) ⊆ X ist abgeschlossen.
+⇔für jede abgeschlossene Teilmenge A ⊆Y gilt : f−1(A) ⊆X ist abgeschlossen.
 Beispiel 13 (Stetige Abbildungen und Homöomorphismen)
-1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus.
+1) Für jeden topologischen Raum X gilt: idX : X →X ist Homöomorphismus.
 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt.
 
 11
 1.3. STETIGKEIT
 2) Ist (Y, TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung
-f : X → Y stetig.
-3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen
+f : X →Y stetig.
+3) Ist X diskreter topologischer Raum, so ist f : X →Y stetig für jeden topologischen
 Raum Y und jede Abbildung f.
-4) Sei X = [0, 1), Y = S1 = { z ∈ C | ∥z∥ = 1 } und f(t) = e2πit.
+4) Sei X = [0, 1), Y = S1 = { z ∈C | ∥z∥= 1 } und f(t) = e2πit.
 R
 0
 1
@@ -477,61 +477,61 @@ g
 Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist.
 Die Umkehrabbildung g ist nicht stetig, da g−1(U) nicht oﬀen ist (vgl. Abbildung 1.5).
 Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig)
-Seien X, Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen.
-Dann ist g ◦ f : X → Z stetig.
+Seien X, Y, Z topologische Räume, f : X →Y und g : Y →Z stetige Abbildungen.
+Dann ist g ◦f : X →Z stetig.
 X
 f
-�
+/
 g◦f
-�
+ 
 Y
 g
-�
+
 Z
-Beweis: Sei U ⊆ Z oﬀen ⇒ (g ◦ f)−1(U) = f−1(g−1(U)). g−1(U) ist oﬀen in Y weil g stetig
+Beweis: Sei U ⊆Z oﬀen ⇒(g ◦f)−1(U) = f−1(g−1(U)). g−1(U) ist oﬀen in Y weil g stetig
 ist, f−1(g−1(U)) ist oﬀen in X, weil f stetig ist.
 ■
 Bemerkung 10
 a) Für jeden topologischen Raum X ist
-Homöo(X) := { f : X → X | f ist Homöomorphismus }
+Homöo(X) := { f : X →X | f ist Homöomorphismus }
 eine Gruppe.
-b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus.
-c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für
+b) Jede Isometrie f : X →Y zwischen metrischen Räumen ist ein Homöomorphismus.
+c) Iso(X) := { f : X →X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für
 jeden metrischen Raum X.
 Bemerkung 11 (Projektionen sind stetig)
-Seien X, Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen
-πX : (x, y) �→ x und πY : (x, y) �→ y
+Seien X, Y topologische Räume. πX : X × Y →X und πY : X × Y →Y die Projektionen
+πX : (x, y) 7→x und πY : (x, y) 7→y
 Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig.
-Beweis: Sei U ⊆ X oﬀen
-⇒ π−1
+Beweis: Sei U ⊆X oﬀen
+⇒π−1
 X (U) = U × Y ist oﬀen in X × Y .
 ■
 Bemerkung 12
-Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum
-versehen mit der Quotiententopologie, π : X → X, x �→ [x]∼.
+Sei X ein topologischer Raum, ∼eine Äquivalenzrelation auf X, X = X/∼der Bahnenraum
+versehen mit der Quotiententopologie, π : X →X, x 7→[x]∼.
 Dann ist π stetig.
 
 12
 1.4. ZUSAMMENHANG
-Beweis: Nach Deﬁnition ist U ⊆ X oﬀen ⇔ π−1(U) ⊆ X oﬀen.
+Beweis: Nach Deﬁnition ist U ⊆X oﬀen ⇔π−1(U) ⊆X oﬀen.
 ■
 Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird.
 Beispiel 14 (Stereographische Projektion)
-Rn und Sn \ { N } sind homöomorph für beliebiges N ∈ Sn. Es gilt:
+Rn und Sn \ { N } sind homöomorph für beliebiges N ∈Sn. Es gilt:
 Sn =
-�
-x ∈ Rn+1 �� ∥x∥ = 1
-�
+
+x ∈Rn+1  ∥x∥= 1
+	
 =
-�
-x ∈ Rn+1
-�����
+(
+x ∈Rn+1
+
 n+1
-�
+X
 i=1
 x2
 i = 1
-�
+)
 O. B. d. A. sei N =
 
 
@@ -548,11 +548,11 @@ O. B. d. A. sei N =
 
 . Die Gerade durch N und P schneidet die Ebene H in genau
 einem Punkt ˆP. P wird auf ˆP abgebildet.
-f :Sn \ { N } → Rn
-P �→
+f :Sn \ { N } →Rn
+P 7→
 genau ein Punkt
-� �� �
-LP ∩ H
+z }| {
+LP ∩H
 wobei Rn = H =
 
 
@@ -567,8 +567,8 @@ x1
 xn+1
 
 
- ∈ Rn+1
-�������
+∈Rn+1
+
 xn+1 = 0
 
 
@@ -592,8 +592,8 @@ Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
 1.4 Zusammenhang
 Deﬁnition 13
 a) Ein Raum X heißt zusammenhängend, wenn es keine oﬀenen, nichtleeren Teilmengen
-U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X.
-b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit
+U1, U2 von X gibt mit U1 ∩U2 = ∅und U1 ∪U2 = X.
+b) Eine Teilmenge Y ⊆X heißt zusammenhängend, wenn Y als topologischer Raum mit
 der Teilraumtopologie zusammenhängend ist.
 
 13
@@ -607,82 +607,82 @@ N
 P
 Abbildung 1.6: Visualisierung der stereographischen Projektion
 Bemerkung 13
-X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2
-mit A1 ∩ A2 = ∅ und A1 ∪ A2 = X.
+X ist zusammenhängend ⇔Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2
+mit A1 ∩A2 = ∅und A1 ∪A2 = X.
 Beispiel 15 (Zusammenhang von Räumen)
 1) (Rn, TEuklid) ist zusammenhängend, denn:
-Annahme: Rn = U1 ˙∪ U2 mit ∅ ̸= U1, U2 ∈ TEuklid existieren.
-Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun
-betrachten wir V ⊊ Rn als (metrischen) Teilraum mit der Teilraumtopologie TV .
-Somit gilt U1 ∩ [x, y] ∈ TV wegen der Deﬁnition der Teilraumtopologie.
-Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder
-Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 oﬀen.
-2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0
+Annahme: Rn = U1 ˙∪U2 mit ∅̸= U1, U2 ∈TEuklid existieren.
+Sei x ∈U1, y ∈U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun
+betrachten wir V ⊊Rn als (metrischen) Teilraum mit der Teilraumtopologie TV .
+Somit gilt U1 ∩[x, y] ∈TV wegen der Deﬁnition der Teilraumtopologie.
+Dann gibt es z ∈[x, y] mit z ∈∂(U1 ∩[x, y]), aber z /∈U1 ⇒z ∈U2. In jeder
+Umgebung von z liegt ein Punkt von U1 ⇒Widerspruch zu U2 oﬀen.
+2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪R>0
 3) R2 \ { 0 } ist zusammenhängend.
-4) Q ⊊ R ist nicht zusammenhängend, da (Q ∩ R<
+4) Q ⊊R ist nicht zusammenhängend, da (Q ∩R<
 √
-2) ∪ (Q ∩ R>
+2) ∪(Q ∩R>
 √
 2) = Q
-5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist.
+5) { x } ist zusammenhängend für jedes x ∈X, wobei X ein topologischer Raum ist.
 6) R mit Zariski-Topologie ist zusammenhängend.
 Bemerkung 14
-Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen-
+Sei X ein topologischer Raum und A ⊆X zusammenhängend. Dann ist auch A zusammen-
 hängend.
 
 14
 1.4. ZUSAMMENHANG
 Beweis: durch Widerspruch
-Annahme: A = A1 ∪ A2, Ai abgeschlossen, Ai ̸= ∅, A1 ∩ A2 = ∅
-⇒ A = (A ∩ A1)
-�
-��
-�
+Annahme: A = A1 ∪A2, Ai abgeschlossen, Ai ̸= ∅, A1 ∩A2 = ∅
+⇒A = (A ∩A1)
+|
+{z
+}
 abgeschlossen
-˙∪ (A ∩ A2)
-�
-��
-�
+˙∪(A ∩A2)
+|
+{z
+}
 abgeschlossen
-�
-��
-�
+|
+{z
+}
 disjunkt
-Wäre A ∩ A1 = ∅
-⇒ A ⊆ A = A1 ˙∪ A2
-⇒ A ⊆ A2 ⇒ A ⊆ A2
-⇒ A1 = ∅
-⇒ Widerspruch zu A1 ̸= ∅
-⇒ A ∩ A1 ̸= ∅ und analog A ∩ A2 ̸= ∅
-⇒ Widerspruch zu A ist zusammenhängend.
+Wäre A ∩A1 = ∅
+⇒A ⊆A = A1 ˙∪A2
+⇒A ⊆A2 ⇒A ⊆A2
+⇒A1 = ∅
+⇒Widerspruch zu A1 ̸= ∅
+⇒A ∩A1 ̸= ∅und analog A ∩A2 ̸= ∅
+⇒Widerspruch zu A ist zusammenhängend.
 ■
 Bemerkung 15
-Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend.
-Ist A ∩ B ̸= ∅, dann ist A ∪ B zusammenhängend.
-Beweis: Sei A ∪ B = U1 ˙∪ U2, Ui ̸= ∅ oﬀen
+Sei X ein topologischer Raum und A, B ⊆X zusammenhängend.
+Ist A ∩B ̸= ∅, dann ist A ∪B zusammenhängend.
+Beweis: Sei A ∪B = U1 ˙∪U2, Ui ̸= ∅oﬀen
 o. B. d. A.
-======⇒ A = (A ∩ U1) ˙∪ (A ∩ U2) oﬀen
+======⇒A = (A ∩U1) ˙∪(A ∩U2) oﬀen
 A zhgd.
-====⇒ A ∩ U1 = ∅
+====⇒A ∩U1 = ∅
 A∩B̸=∅
-====⇒ U1 ⊆ B
-B = (B ∩ U1)
-�
-��
-�
+====⇒U1 ⊆B
+B = (B ∩U1)
+|
+{z
+}
 =U1
-∪ (B ∩ U2)
-�
-��
-�
+∪(B ∩U2)
+|
+{z
+}
 =∅
 ist unerlaubte Zerlegung.
 ■
 Deﬁnition 14
 Sei X ein topologischer Raum.
-Für x ∈ X sei Z(x) ⊆ X deﬁniert durch
+Für x ∈X sei Z(x) ⊆X deﬁniert durch
 Z(x) :=
-�
+[
 A⊆Xzhgd.
 x∈A
 A
@@ -696,52 +696,52 @@ Beweis:
 
 15
 1.5. KOMPAKTHEIT
-a) Sei Z(x) = A1 ˙∪ A2 mit Ai ̸= ∅ abgeschlossen.
-O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A,
-die auch x enthält. ⇒ A = (A ∩ A1)
-�
-��
-�
+a) Sei Z(x) = A1 ˙∪A2 mit Ai ̸= ∅abgeschlossen.
+O. B. d. A. sei x ∈A1 und y ∈A2. y liegt in einer zusammehängenden Teilmenge A,
+die auch x enthält. ⇒A = (A ∩A1)
+|
+{z
+}
 ∋x
-∪ (A ∩ A2)
-�
-��
-�
+∪(A ∩A2)
+|
+{z
+}
 ∋y
 ist unerlaubte Zerlegung.
-b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x)
-c) Ist Z(y) ∩ Z(x) ̸= ∅ Bem. 15
-=====⇒ Z(y) ∪ Z(x) ist zusammenhängend.
-⇒ Z(x) ∪ Z(y) ⊆ Z(x) ⇒ Z(y) ⊆ Z(x)
-⊆ Z(y) ⇒ Z(x) ⊆ Z(y)
+b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒Z(x) ⊆Z(x) ⇒Z(x) = Z(x)
+c) Ist Z(y) ∩Z(x) ̸= ∅Bem. 15
+=====⇒Z(y) ∪Z(x) ist zusammenhängend.
+⇒Z(x) ∪Z(y) ⊆Z(x) ⇒Z(y) ⊆Z(x)
+⊆Z(y) ⇒Z(x) ⊆Z(y)
 ■
 Bemerkung 17
-Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend.
-Beweis: Sei f(A) = U1 ∪ U2, Ui ̸= ∅, oﬀen, disjunkt.
-⇒ f−1(f(A)) = f−1(U1) ∪ f−1(U2)
-⇒ A = (A ∩ f−1(U1))
-�
-��
-�
+Sei f : X →Y stetig. Ist A ⊆X zusammenhängend, so ist f(A) ⊆Y zusammenhängend.
+Beweis: Sei f(A) = U1 ∪U2, Ui ̸= ∅, oﬀen, disjunkt.
+⇒f−1(f(A)) = f−1(U1) ∪f−1(U2)
+⇒A = (A ∩f−1(U1))
+|
+{z
+}
 ̸=∅
-∪ (A ∩ f−1(U2))
-�
-��
-�
+∪(A ∩f−1(U2))
+|
+{z
+}
 ̸=∅
 ■
 1.5 Kompaktheit
 Deﬁnition 15
-Sei X eine Menge und U ⊆ P(X).
+Sei X eine Menge und U ⊆P(X).
 U heißt eine Überdeckung von X, wenn gilt:
-∀x ∈ X : ∃M ∈ U : x ∈ M
+∀x ∈X : ∃M ∈U : x ∈M
 Deﬁnition 16
 Ein topologischer Raum X heißt kompakt, wenn jede oﬀene Überdeckung von X
 U = { Ui }i∈I mit Ui oﬀen in X
 eine endliche Teilüberdeckung
-�
+[
 i∈J⊆I
-Ui = X mit |J| ∈ N
+Ui = X mit |J| ∈N
 besitzt.
 Bemerkung 18
 Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie.
@@ -753,67 +753,67 @@ einem der Ui enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich v
 1.5. KOMPAKTHEIT
 der Länge δ unterteilen und alle Ui in die endliche Überdeckung aufnehmen, die Teilintervalle
 enthalten.
-Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1]
-der Länge 1/n sodass In ⊊ Ui für alle i ∈ J.
-Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann
-gibt es i ∈ J mit x ∈ Ui. Da Ui oﬀen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui.
-Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x − xn| < ε/2, also
-In ⊆ (x − ε, x + ε) ⊆ Ui für mindestens ein n ∈ N.4
-⇒ Widerspruch
+Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈N ein Intervall In ⊆[0, 1]
+der Länge 1/n sodass In ⊊Ui für alle i ∈J.
+Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈[0, 1]. Dann
+gibt es i ∈J mit x ∈Ui. Da Ui oﬀen ist, gibt es ein ε > 0, sodass (x −ε, x + ε) ⊆Ui.
+Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥n0 : |x −xn| < ε/2, also
+In ⊆(x −ε, x + ε) ⊆Ui für mindestens ein n ∈N.4
+⇒Widerspruch
 Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in
 Uij enthalten.
-⇒ Uj1, . . . , Ujd ist endliche Teilüberdeckung von U.
+⇒Uj1, . . . , Ujd ist endliche Teilüberdeckung von U.
 ■
 Beispiel 16 (Kompakte Räume)
 1) R ist nicht kompakt.
 2) (0, 1) ist nicht kompakt.
-Un = (1/n, 1 − 1/n) ⇒ �
+Un = (1/n, 1 −1/n) ⇒S
 n∈N Un = (0, 1)
 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch.
 Bemerkung 19
-Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt.
+Sei X kompakter Raum, A ⊆X abgeschlossen. Dann ist A kompakt.
 Beweis: Sei (Vi)i∈I oﬀene Überdeckung von A.
-Dann gibt es für jedes i ∈ I eine oﬀene Teilmenge Ui ⊆ X mit Vi = Ui ∩ A.
-⇒ A ⊆
-�
+Dann gibt es für jedes i ∈I eine oﬀene Teilmenge Ui ⊆X mit Vi = Ui ∩A.
+⇒A ⊆
+[
 i∈I
 Ui
-⇒ U = { Ui | i ∈ I } ∪ { X \ A } ist oﬀene Überdeckung von X
+⇒U = { Ui | i ∈I } ∪{ X \ A } ist oﬀene Überdeckung von X
 X kompakt
-=======⇒ es gibt i1, . . . , in ∈ I, sodass
-n�
+=======⇒es gibt i1, . . . , in ∈I, sodass
+n[
 j=1
-Uij ∪ (X \ A) = X
+Uij ∪(X \ A) = X
 ⇒
 
 
-n�
+n[
 j=1
-Uij ∪ (X \ A)
+Uij ∪(X \ A)
 
- ∩ A = A
+∩A = A
 ⇒
-n�
+n[
 j=1
-(Uij ∩ A)
-�
-��
-�
+(Uij ∩A)
+|
+{z
+}
 =Vij
-∪ ((X \ A) ∩ A)
-�
-��
-�
+∪((X \ A) ∩A)
+|
+{z
+}
 =∅
 = A
-⇒ Vi1, . . . , Vin überdecken A.
+⇒Vi1, . . . , Vin überdecken A.
 ■
 Bemerkung 20
 Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie
 kompakt.
-Beweis: Sei (Wi)i∈I eine oﬀene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es
-oﬀene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆ Wi.
-3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
+Beweis: Sei (Wi)i∈I eine oﬀene Überdeckung von X × Y . Für jedes (x, y) ∈X × Y gibt es
+oﬀene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈I, sodass Ux,y × Vx,y ⊆Wi.
+3Dies gilt nicht für alle n ≥n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
 4Sogar für unendlich viele.
 
 17
@@ -827,93 +827,92 @@ Ux,y
 Y
 X
 Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen
-Die oﬀenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y
-kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit �m(x0)
+Die oﬀenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈Y überdecken { x0 } × y. Da Y
+kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit Sm(x0)
 i=1
 Ux0,yi ×
-Vx0,yi ⊇ { x0 } × Y .
-Sei Ux0 := �m(x)
-i=1 Ux0,yi. Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit �n
+Vx0,yi ⊇{ x0 } × Y .
+Sei Ux0 := Tm(x)
+i=1 Ux0,yi. Da X kompakt ist, gibt es x1, . . . , xn ∈X mit Sn
 j=1 Uxj = X
-⇒ �k
+⇒Sk
 j=1
-�m(xj)
+Sm(xj)
 i=1
-�
-Uxj,yi × Vxj,yi
-�
-�
-��
-�
+ Uxj,yi × Vxj,yi
+
+|
+{z
+}
 Ein grün-oranges Kästchen
-⊇ X × Y
-⇒ �
+⊇X × Y
+⇒S
 j
-�
+S
 i Wi(xj, yi) = X × Y
 ■
 Bemerkung 21
-Sei X ein Hausdorﬀraum und K ⊆ X kompakt. Dann ist K abgeschlossen.
+Sei X ein Hausdorﬀraum und K ⊆X kompakt. Dann ist K abgeschlossen.
 Beweis: z. Z.: Komplement ist oﬀen
-Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \ K. Für jedes x ∈ K seien
-Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅.
+Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈X \ K. Für jedes x ∈K seien
+Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩Vy = ∅.
 Xi
 K
 x
 y
-Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass �m
-i=1 Uxi ⊇ K.
+Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈K, sodass Sm
+i=1 Uxi ⊇K.
 Sei V :=
-n�
+n\
 i=1
 Vxi
 
 18
 1.6. WEGE UND KNOTEN
-⇒ V ∩
-� n�
+⇒V ∩
+ n[
 i=1
 Uxi
-�
+!
 = ∅
-⇒ V ∩ K = ∅
-⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist.
-⇒ X \ K ist oﬀen
+⇒V ∩K = ∅
+⇒V ist Überdeckung von y, die ganz in X \ K enthalten ist.
+⇒X \ K ist oﬀen
 Damit ist K abgeschlossen.
 ■
 Bemerkung 22
-Seien X, Y topologische Räume, f : X → Y stetig.
-Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt.
+Seien X, Y topologische Räume, f : X →Y stetig.
+Ist K ⊆X kompakt, so ist f(K) ⊆Y kompakt.
 Beweis: Sei (Vi)i∈I oﬀene Überdeckung von f(K)
 f stetig
-====⇒ (f−1(Vi))i∈I ist oﬀene Überdeckung von K
+====⇒(f−1(Vi))i∈I ist oﬀene Überdeckung von K
 Kompakt
-=====⇒ es gibt i1, . . . , in, sodass f−1(Vi1), . . . , f−1(Vin) Überdeckung von K ist.
-⇒ f(f−1(Vi1)), . . . , f(f−1(Vin)) überdecken f(K).
-Es gilt: f(f−1(V )) = V ∩ f(X)
+=====⇒es gibt i1, . . . , in, sodass f−1(Vi1), . . . , f−1(Vin) Überdeckung von K ist.
+⇒f(f−1(Vi1)), . . . , f(f−1(Vin)) überdecken f(K).
+Es gilt: f(f−1(V )) = V ∩f(X)
 ■
 Satz 1.1 (Heine-Borel)
 Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und
 abgeschlossen ist.
-Beweis: „⇒“: Sei K ⊆ Rn (oder Cn) kompakt.
+Beweis: „⇒“: Sei K ⊆Rn (oder Cn) kompakt.
 Da Rn und Cn hausdorﬀsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset-
-zung kann K mit endlich vielen oﬀenen Kugeln von Radien 1 überdeckt werden ⇒ K ist
+zung kann K mit endlich vielen oﬀenen Kugeln von Radien 1 überdeckt werden ⇒K ist
 beschränkt.
-„⇐“ Sei A ⊆ Rn (oder Cn) beschränkt und abgeschlossen.
+„⇐“ Sei A ⊆Rn (oder Cn) beschränkt und abgeschlossen.
 Dann gibt es einen Würfel W = [−N, N] × · · · × [−N, N]
-�
-��
-�
+|
+{z
+}
 n mal
-mit A ⊆ W bzw. „Polyzylinder“
-Z = { (z1, . . . , zn) ∈ Cn | zi ≤ N für i = 1, . . . , n }
+mit A ⊆W bzw. „Polyzylinder“
+Z = { (z1, . . . , zn) ∈Cn | zi ≤N für i = 1, . . . , n }
 Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch
 kompakt. Genauso ist Z kompakt, weil
-{ z ∈ C ∥ z| ≤ 1 }
+{ z ∈C ∥z| ≤1 }
 homöomorph zu
-�
-(x, y) ∈ R2 �� ∥(x, y)∥ ≤ 1
-�
+
+(x, y) ∈R2  ∥(x, y)∥≤1
+	
 ist.
 ■
 1.6 Wege und Knoten
@@ -922,56 +921,56 @@ Sei X ein topologischer Raum.
 
 19
 1.6. WEGE UND KNOTEN
-a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X.
+a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] →X.
 b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt.
 c) γ heißt einfach, wenn γ|[0,1) injektiv ist.
 Beispiel 17
 Ist X diskret, so ist jeder Weg konstant, d. h. von der Form
-∀x ∈ [0, 1] : γ(x) = c,
-c ∈ X
+∀x ∈[0, 1] : γ(x) = c,
+c ∈X
 Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ.
 Deﬁnition 18
 Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten
-x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ(0) = x und γ(1) = y.
+x, y ∈X einen Weg γ : [0, 1] →X gibt mit γ(0) = x und γ(1) = y.
 Bemerkung 23
 Sei X ein topologischer Raum.
-a) X ist wegzusammenhängend ⇒ X ist zusammenhängend
-b) X ist wegzusammenhängend ̸⇐ X ist zusammenhängend
+a) X ist wegzusammenhängend ⇒X ist zusammenhängend
+b) X ist wegzusammenhängend ̸⇐X ist zusammenhängend
 Beweis:
 a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte,
-abgeschlossene Teilmengen von X mit A1 ∪ A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1] → X
+abgeschlossene Teilmengen von X mit A1 ∪A2 = X. Sei x ∈A1, y ∈A2, γ : [0, 1] →X
 ein Weg von x nach y.
-Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist.
-C = (C ∩ A1)
-�
-��
-�
+Dann ist C := γ([0, 1]) ⊆X zusammenhängend, weil γ stetig ist.
+C = (C ∩A1)
+|
+{z
+}
 ∋x
-∪ (C ∩ A2)
-�
-��
-�
+∪(C ∩A2)
+|
+{z
+}
 ∋y
-ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch
+ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒Widerspruch
 b) Sei X =
-�
-(x, y) ∈ R2 ��� x2 + y2 = 1 ∨ y = 1 + 2 · e− 1
-10 x �
+n
+(x, y) ∈R2  x2 + y2 = 1 ∨y = 1 + 2 · e−1
+10 x o
 .
 Abbildung 1.8a veranschaulicht diesen Raum.
-Sei U1 ∪ U2 = X, U1 ̸= U2 = ∅, Ui oﬀen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2,
+Sei U1 ∪U2 = X, U1 ̸= U2 = ∅, Ui oﬀen. X = C ∪S. Dann ist C ⊆U1 oder C ⊆U2,
 weil C und S zusammenhängend sind.
 Also ist C = U1 und S = U2 (oder umgekehrt).
-Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist.
-Aber: Bε(y) ∩ S ̸= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht
+Sei y ∈C = U1, ε > 0 und Bε(y) ⊆U1 eine Umgebung von y, die in U1 enthalten ist.
+Aber: Bε(y) ∩S ̸= ∅⇒Widerspruch ⇒X ∪S ist zusammenhängend, aber nicht
 wegzusammenhängend.
 ■
 Beispiel 18 (Hilbert-Kurve)
-Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbil-
+Es gibt stetige, surjektive Abbildungen [0, 1] →[0, 1] × [0, 1]. Ein Beispiel ist die in Abbil-
 dung 1.9 dargestellte Hilbert-Kurve.
 Deﬁnition 19
 Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ :
-[0, 1] → C ⊆ X bzw. γ : S1 → C ⊆ X, wobei C := Bild γ.
+[0, 1] →C ⊆X bzw. γ : S1 →C ⊆X, wobei C := Bild γ.
 
 20
 1.6. WEGE UND KNOTEN
@@ -984,8 +983,8 @@ Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ
 X
 Y
 {(x, sin( 1
-x)) ∈ X × Y }
-(−1, 1) ⊆ Y
+x)) ∈X × Y }
+(−1, 1) ⊆Y
 (b) Sinus
 Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
 sind.
@@ -1020,24 +1019,24 @@ Beispiel 19 (Knoten)
 (d) 62-Knoten
 Abbildung 1.11: Beispiele für verschiedene Knoten
 Deﬁnition 21
-Zwei Knoten γ1, γ2 : S1 → R3 heißen äquivalent, wenn es eine stetige Abbildung
-H : S1 × [0, 1] → R3
+Zwei Knoten γ1, γ2 : S1 →R3 heißen äquivalent, wenn es eine stetige Abbildung
+H : S1 × [0, 1] →R3
 gibt mit
 H(z, 0) = γ1(z)
-∀z ∈ S1
+∀z ∈S1
 H(z, 1) = γ2(z)
-∀z ∈ S1
-und für jedes feste t ∈ [0, 1] ist
-Hz : S1 → R3, z �→ H(z, t)
+∀z ∈S1
+und für jedes feste t ∈[0, 1] ist
+Hz : S1 →R3, z 7→H(z, t)
 ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2.
 Deﬁnition 22
-Sei γ : [0, 1] → R3 ein Knoten, E eine Ebene und π : R3 → E eine Projektion auf E.
+Sei γ : [0, 1] →R3 ein Knoten, E eine Ebene und π : R3 →E eine Projektion auf E.
 π heißt Knotendiagramm von γ, wenn gilt:
-��π−1(x)
-�� ≤ 2
-∀x ∈ π(γ)
+π−1(x)
+ ≤2
+∀x ∈π(γ)
 Ist (π|γ([0,1]))−1(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt:
-∃λ > 1 : (y1 − x) = λ(y2 − x)
+∃λ > 1 : (y1 −x) = λ(y2 −x)
 Satz 1.3 (Satz von Reidemeister)
 Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie
 durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können.
@@ -1065,20 +1064,20 @@ Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierp
 (b) Ist (X, TX) hausdorﬀsch?
 (c) Ist TX von einer Metrik erzeugt?
 Aufgabe 2
-Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie
+Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈Z, b ∈Z \ { 0 }) erzeugten Topologie
 versehen.
 Zeigen Sie:
 (a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen.
 (b) { −1, 1 } ist nicht oﬀen.
 (c) Es gibt unendlich viele Primzahlen.
 Aufgabe 3 (Cantorsches Diskontinuum)
-Für jedes i ∈ N sei Pi := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := �
+Für jedes i ∈N sei Pi := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := Q
 i∈N Pi.
 (a) Wie sehen die oﬀenen Mengen von P aus?
 (b) Was können Sie über den Zusammenhang von P sagen?
 Aufgabe 4 (Kompaktheit)
-(a) Ist GLn(R) = { A ∈ Rn×n | det(A) ̸= 0 } kompakt?
-(b) Ist SLn(R) = { A ∈ Rn×n | det(A) = 1 } kompakt?
+(a) Ist GLn(R) = { A ∈Rn×n | det(A) ̸= 0 } kompakt?
+(b) Ist SLn(R) = { A ∈Rn×n | det(A) = 1 } kompakt?
 (c) Ist P(R) kompakt?
 Aufgabe 5 (Begriﬀe)
 Deﬁnieren Sie die Begriﬀe „Homomorphismus“ und „Homöomorphismus“.
@@ -1097,18 +1096,18 @@ Deﬁnieren Sie die Begriﬀe „Isomorphismus“, „Isotopie“ und „Isometr
 Simplizialkomplexe
 2.1 Topologische Mannigfaltigkeiten
 Deﬁnition 24
-Sei (X, T) ein topologischer Raum und n ∈ N.
-a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V
-Homöomorphismus von U auf eine oﬀene Teilmenge V ⊆ Rn.
+Sei (X, T) ein topologischer Raum und n ∈N.
+a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈T und ϕ : U →V
+Homöomorphismus von U auf eine oﬀene Teilmenge V ⊆Rn.
 b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui, ϕi)i∈I von Karten auf X,
-sodass �
+sodass S
 i∈I Ui = X.
 c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorﬀsch ist,
 eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt.
 Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn ähnlich.
 Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten)
-Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R.
-Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ Rn, wobei
+Jede n-dimensionale Mannigfaltigkeit mit n ≥1 ist mindestens so mächtig wie R.
+Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈T und ϕ : U →V ⊆Rn, wobei
 V oﬀen und ϕ ein Homöomorphismus ist, eine Karte auf X.
 Da jede oﬀene Teilmenge des Rn genauso mächtig ist wie der Rn, ϕ als Homöomorphismus
 insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig
@@ -1118,91 +1117,91 @@ hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn.
 Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
 beliebig viele Elemente haben.
 Bemerkung 25
-a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1]
+a) Es gibt surjektive, stetige Abbildungen [0, 1] →[0, 1] × [0, 1]
 b) Für n ̸= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz
 von der Gebietstreue“ (Brouwer):
-Ist U ⊆ Rn oﬀen und f : U → Rn stetig und injektiv, so ist f(U) oﬀen.
+Ist U ⊆Rn oﬀen und f : U →Rn stetig und injektiv, so ist f(U) oﬀen.
 Ist n < m und Rm homöomorph zu Rn, so wäre
-f : Rn → Rm → Rn,
-(x1, . . . , xn) �→ (x1, x2, . . . , xn, 0, . . . , 0)
-eine stetige injektive Abbildung. Also müsste f(Rn) oﬀen sein ⇒ Widerspruch
+f : Rn →Rm →Rn,
+(x1, . . . , xn) 7→(x1, x2, . . . , xn, 0, . . . , 0)
+eine stetige injektive Abbildung. Also müsste f(Rn) oﬀen sein ⇒Widerspruch
 
 26
 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
 Beispiel 20 (Mannigfaltigkeiten)
-1) Jede oﬀene Teilmenge U ⊆ Rn ist eine n-dimensionale Mannigfaltigkeit mit einem
+1) Jede oﬀene Teilmenge U ⊆Rn ist eine n-dimensionale Mannigfaltigkeit mit einem
 Atlas aus einer Karte.
 2) Cn ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
-(z1, . . . , zn) �→ (ℜ(z1), ℑ(z1), . . . , ℜ(zn), ℑ(zn))
-3) Pn(R) = (Rn+1 \ { 0 })/∼ = Sn/∼ und Pn(C) sind Mannigfaltigkeiten der Dimension
+(z1, . . . , zn) 7→(ℜ(z1), ℑ(z1), . . . , ℜ(zn), ℑ(zn))
+3) Pn(R) = (Rn+1 \ { 0 })/∼= Sn/∼und Pn(C) sind Mannigfaltigkeiten der Dimension
 n bzw. 2n, da gilt:
-Sei Ui := { (x0 : · · · : xn) ∈ Pn(R) | xi ̸= 0 } ∀i ∈ 0, . . . , n. Dann ist Pn(R) = �n
+Sei Ui := { (x0 : · · · : xn) ∈Pn(R) | xi ̸= 0 } ∀i ∈0, . . . , n. Dann ist Pn(R) = Sn
 i=0 Ui
 und die Abbildung
-Ui → Rn
-(x0 : · · · : xn) �→
-�x0
+Ui →Rn
+(x0 : · · · : xn) 7→
+x0
 xi
 , . . . ,
-�
-��
+
+
 xi
 xi
 , . . . , xn
 xi
-�
+
 (y1 : · · · : yi−1 : 1 : yi : · · · : yn)
-�→
+7→
 (y1, . . . , yn)
 ist bijektiv.
 Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas:
-x = (1 : 0 : 0) ∈ U0 → R2
-x �→ (0, 0)
-y = (0 : 1 : 1) ∈ U2 → R2
-y �→ (0, 1)
-Umgebung: B1(0, 1) → { (1 : u : v) | ∥(u, v)∥ < 1 } = V1
+x = (1 : 0 : 0) ∈U0 →R2
+x 7→(0, 0)
+y = (0 : 1 : 1) ∈U2 →R2
+y 7→(0, 1)
+Umgebung: B1(0, 1) →{ (1 : u : v) | ∥(u, v)∥< 1 } = V1
 Umgebung: B1(0, 1) →
-�
+
 (w : z : 1)
-�� w2 + z2 < 1
-�
+ w2 + z2 < 1
+	
 = V2
-V1 ∩ V2 = ∅?
-(a : b : c) ∈ V1 ∩ V2
-⇒ a ̸= 0 und ( b
+V1 ∩V2 = ∅?
+(a : b : c) ∈V1 ∩V2
+⇒a ̸= 0 und ( b
 a)2 + ( c
-a)2 < 1 ⇒ c
+a)2 < 1 ⇒c
 a < 1
-⇒ c ̸= 0 und ( a
+⇒c ̸= 0 und (a
 c)2 + ( b
-c)2 < 1 ⇒ a
+c)2 < 1 ⇒a
 c < 1
-⇒ Widerspruch
+⇒Widerspruch
 4) Sn =
-�
-x ∈ Rn+1 �� ∥x∥ = 1
-�
+
+x ∈Rn+1  ∥x∥= 1
+	
 ist n-dimensionale Mannigfaltigkeit.
 Karten:
-Di := {(x1, . . . , xn+1) ∈ Sn|xi > 0} → B1(0, . . . , 0
-� �� �
+Di := {(x1, . . . , xn+1) ∈Sn|xi > 0} →B1(0, . . . , 0
+| {z }
 ∈Rn
 )
-Ci := {(x1, . . . , xn+1) ∈ Sn|xi < 0} → B1(0, . . . , 0)
-(x1, . . . , xn+1) �→ (x1, . . . ,
+Ci := {(x1, . . . , xn+1) ∈Sn|xi < 0} →B1(0, . . . , 0)
+(x1, . . . , xn+1) 7→(x1, . . . ,
 
 xi, . . . , xn+1)1
-(x1, . . . , xn) �→ (x1, . . . , xi−1,
-�
-1 − �n
+(x1, . . . , xn) 7→(x1, . . . , xi−1,
+q
+1 −Pn
 k=1 x2
 k, xi, . . . , xn), oder −
-�
-1 − �n
+q
+1 −Pn
 k=1 x2
 k für Ci
-Sn = �n+1
-i=1 (Ci ∪ Di)
+Sn = Sn+1
+i=1 (Ci ∪Di)
 Als kompakte Mannigfaltigkeit wird Sn auch „geschlossene Mannigfaltigkeit“ genannt.
 5) [0, 1] ist keine Mannigfaltigkeit, denn:
 Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem oﬀenem Intervall
@@ -1212,34 +1211,34 @@ ist.
 27
 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
 6) V1 =
-�
-(x, y) ∈ R2 �� x · y = 0
-�
+
+(x, y) ∈R2  x · y = 0
+	
 ist keine Mannigfaltigkeit.
 Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4
 Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen-
 hangskomponenten, wenn man einen Punkt entfernt.
 7) V2 =
-�
-(x, y) ∈ R2 �� x3 = y2 �
+
+(x, y) ∈R2  x3 = y2 	
 ist eine Mannigfaltigkeit.
-8) X = (R \ { 0 }) ∪ (01, 02)
-U ⊆ X oﬀen ⇔
-�
+8) X = (R \ { 0 }) ∪(01, 02)
+U ⊆X oﬀen ⇔
+(
 U oﬀen in R \ { 0 } ,
-falls 01 /∈ U, 02 ∈ U
-∃ε > 0 : (−ε, ε) ⊆ U
-falls 01 ∈ U, 02 ∈ U
-Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } oﬀen und homöomorph
+falls 01 /∈U, 02 ∈U
+∃ε > 0 : (−ε, ε) ⊆U
+falls 01 ∈U, 02 ∈U
+Insbesondere sind (R \ { 0 }) ∪{ 01 } und (R \ { 0 }) ∪{ 02 } oﬀen und homöomorph
 zu R.
 Aber: X ist nicht hausdorﬀsch! Denn es gibt keine disjunkten Umgebungen von 01
 und 02.
 9) GLn(R) ist eine Mannigfaltigkeit der Dimension n2, weil oﬀene Teilmengen von Rn2
 eine Mannigfaltigkeit bilden.
 Deﬁnition 25
-Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y oﬀen, Φ : U → V ein Ho-
-möomorphismus Z = (X ˙∪ Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation
-und der von ∼ induzierten Quotiententopologie.
+Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆X und V ⊆Y oﬀen, Φ : U →V ein Ho-
+möomorphismus Z = (X ˙∪Y )/∼mit der von u ∼Φ(u) ∀u ∈U erzeugten Äquivalenzrelation
+und der von ∼induzierten Quotiententopologie.
 Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen
 Karten. Falls Z hausdorﬀsch ist, ist Z eine n-dimensionale Mannigfaltigkeit.
 Bemerkung 26
@@ -1257,7 +1256,7 @@ Mannigfaltigkeiten mit Dimension 2:
 3) T 2 (1 Henkel)
 4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1
 Bemerkung 27
-Sei n ∈ N, F : Rn → R stetig diﬀerenzierbar und X = V (F) := { x ∈ Rn | F(x) = 0 } das
+Sei n ∈N, F : Rn →R stetig diﬀerenzierbar und X = V (F) := { x ∈Rn | F(x) = 0 } das
 „vanishing set“.
 Dann gilt:
 
@@ -1266,26 +1265,26 @@ Dann gilt:
 Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus.
 a) X ist abgeschlossen in Rn
 b) Ist grad(F)(X) ̸= 0
-∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n − 1.
+∀x ∈X, so ist X eine Mannigfaltigkeit der Dimension n −1.
 Beweis:
-a) Sei y ∈ Rn \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆ Bε(F(y)) mit
+a) Sei y ∈Rn \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆Bε(F(y)) mit
 ε = 1
-2∥F(y)∥. Folgt Bδ(y) ∩ V (F) = ∅ ⇒ Rn \ V (F) ist oﬀen.
-b) Sei x ∈ X mit grad(F)(x) ̸= 0, also o. B. d. A.
+2∥F(y)∥. Folgt Bδ(y) ∩V (F) = ∅⇒Rn \ V (F) ist oﬀen.
+b) Sei x ∈X mit grad(F)(x) ̸= 0, also o. B. d. A.
 ∂F
 ∂X1 (x) ̸= 0, x = (x1, . . . , xn),
-x′ := (x2, . . . , xn) ∈ Rn−1. Der Satz von der impliziten Funktion liefert nun: Es
-gibt Umgebungen U von x′ und diﬀerenzierbare Funktionen g : U → R, sodass
-G : U → Rn, u �→ (g(u), u) eine stetige Abbildung auf eine oﬀene Umgebung V von x
+x′ := (x2, . . . , xn) ∈Rn−1. Der Satz von der impliziten Funktion liefert nun: Es
+gibt Umgebungen U von x′ und diﬀerenzierbare Funktionen g : U →R, sodass
+G : U →Rn, u 7→(g(u), u) eine stetige Abbildung auf eine oﬀene Umgebung V von x
 in X ist.
 ■
 Beispiel 22
-1) F : R3 → R,
-(x, y, z) �→ x2+y2+z2−1, V (F) = S2, grad(F) = (2x, 2y, 2z) Bem. 27.b
+1) F : R3 →R,
+(x, y, z) 7→x2+y2+z2−1, V (F) = S2, grad(F) = (2x, 2y, 2z) Bem. 27.b
 ======⇒
 Sn ist n-dimensionale Mannigfaltigkeit in Rn+1
-2) F : R2 → R,
-(x, y) �→ y2 −x3 Es gilt: grad(F) = (−3x2, 2y). Also: grad(0, 0) = (0, 0).
+2) F : R2 →R,
+(x, y) 7→y2 −x3 Es gilt: grad(F) = (−3x2, 2y). Also: grad(0, 0) = (0, 0).
 −5
 −4
 −3
@@ -1312,7 +1311,7 @@ z
 0
 100
 f(x, y)
-(a) F(x, y) = y2 − x3
+(a) F(x, y) = y2 −x3
 2
 4
 6
@@ -1329,7 +1328,7 @@ a = 1
 3
 a = 1
 a = 2
-(b) y2 − ax3 = 0
+(b) y2 −ax3 = 0
 Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a.
 Daher ist Bemerkung 27.b nicht anwendbar, aber V (F) ist trotzdem eine 1-dimensionale
 topologische Mannigfaltigkeit.
@@ -1338,10 +1337,10 @@ topologische Mannigfaltigkeit.
 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
 Deﬁnition 26
 Sei X ein Hausdorﬀraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale
-Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆ Xi oﬀen und
+Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆Xi oﬀen und
 ϕi ein Homöomorphismus auf eine oﬀene Teilmenge von
 Rn
-+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 }
++,0 := { (x1, . . . , xn) ∈Rn | xn ≥0 }
 ist.
 Rn
 +,0 ist ein „Halbraum“.
@@ -1356,17 +1355,17 @@ Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand
 Deﬁnition 27
 Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt
 ∂X :=
-�
+[
 (U,ϕ)∈A
-{ x ∈ U | ϕ(x) = 0 }
+{ x ∈U | ϕ(x) = 0 }
 Rand von X.
-∂X ist eine Mannigfaltigkeit der Dimension n − 1.
+∂X ist eine Mannigfaltigkeit der Dimension n −1.
 Deﬁnition 28
 Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I
-Für i, j ∈ I mit Ui ∩ Uj ̸= ∅ heißt
-ϕij := ϕj ◦ ϕ−1
+Für i, j ∈I mit Ui ∩Uj ̸= ∅heißt
+ϕij := ϕj ◦ϕ−1
 i
-ϕi(Ui ∩ Uj) → ϕj(Ui ∩ Uj)
+ϕi(Ui ∩Uj) →ϕj(Ui ∩Uj)
 Kartenwechsel oder Übergangsfunktion.
 
 30
@@ -1385,54 +1384,54 @@ Abbildung 2.4: Kartenwechsel
 Deﬁnition 29
 Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I.
 a) X heißt diﬀerenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Karten-
-wechselabbildung ϕij, i, j ∈ I k-mal stetig diﬀerenzierbar ist.
+wechselabbildung ϕij, i, j ∈I k-mal stetig diﬀerenzierbar ist.
 b) X heißt diﬀerenzierbare Mannigfaltigkeit, wenn X eine diﬀerenzierbare Mannig-
-faltigkeit der Klasse C∞ ist.
-Diﬀerenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt.
+faltigkeit der Klasse C∞ist.
+Diﬀerenzierbare Mannigfaltigkeiten der Klasse C∞werden auch glatt genannt.
 Deﬁnition 30
-Sei X eine diﬀerenzierbare Mannigfaltigkeit der Klasse Ck (k ∈ N ∪ { ∞ }) mit Atlas
+Sei X eine diﬀerenzierbare Mannigfaltigkeit der Klasse Ck (k ∈N ∪{ ∞}) mit Atlas
 A = (Ui, ϕi)i∈I.
-a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1
+a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ϕ−1
 i
-und ϕi ◦ ϕ−1 (i ∈ I mit Ui ∩ U ̸= ∅) diﬀerenzierbar von Klasse Ck sind.
+und ϕi ◦ϕ−1 (i ∈I mit Ui ∩U ̸= ∅) diﬀerenzierbar von Klasse Ck sind.
 b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der
 Klasse Ck. Er heißt Ck-Struktur auf X.
 Eine C∞-Struktur heißt auch diﬀerenzierbare Struktur auf X.
 Bemerkung 28
-Für n ≥ 4 gibt es auf Sn mehrere verschiedene diﬀerenzierbare Strukturen, die sogenannten
+Für n ≥4 gibt es auf Sn mehrere verschiedene diﬀerenzierbare Strukturen, die sogenannten
 „exotische Sphären“.
 Deﬁnition 31
-Seien X, Y diﬀerenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X.
-a) Eine stetige Abbildung f : X → Y heißt diﬀerenzierbar in x (von Klasse Ck), wenn
-es Karten (U, ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass
-ψ ◦ f ◦ ϕ−1 stetig diﬀerenzierbar von Klasse Ck in ϕ(x) ist.
-b) f heißt diﬀerenzierbar (von Klasse Ck), wenn f in jedem x ∈ X diﬀerenzierbar ist.
-c) f heißt Diﬀeomorphismus, wenn f diﬀerenzierbar von Klasse C∞ ist und es eine
-diﬀerenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und
-f ◦ g = idY .
+Seien X, Y diﬀerenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈X.
+a) Eine stetige Abbildung f : X →Y heißt diﬀerenzierbar in x (von Klasse Ck), wenn
+es Karten (U, ϕ) von X mit x ∈U und (V, ψ) von Y mit f(U) ⊆V gibt, sodass
+ψ ◦f ◦ϕ−1 stetig diﬀerenzierbar von Klasse Ck in ϕ(x) ist.
+b) f heißt diﬀerenzierbar (von Klasse Ck), wenn f in jedem x ∈X diﬀerenzierbar ist.
+c) f heißt Diﬀeomorphismus, wenn f diﬀerenzierbar von Klasse C∞ist und es eine
+diﬀerenzierbare Abbildung g : Y →X von Klasse C∞gibt mit g ◦f = idX und
+f ◦g = idY .
 
 31
 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
 Bemerkung 29
 Die Bedingung in Deﬁnition 31.a hängt nicht von den gewählten Karten ab.
-Beweis: Seien (U ′, ϕ′) und (V ′, ψ′) Karten von X bzw. Y um x bzw. f(x) mit f(U ′) ⊆ V ′.
-⇒ ψ′ ◦ f ◦ (ϕ′)−1
-= ψ′ ◦ (ψ−1 ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ′)−1
-ist genau dann diﬀerenzierbar, wenn ψ ◦ f ◦ ϕ−1 diﬀerenzierbar ist.
+Beweis: Seien (U′, ϕ′) und (V ′, ψ′) Karten von X bzw. Y um x bzw. f(x) mit f(U′) ⊆V ′.
+⇒ψ′ ◦f ◦(ϕ′)−1
+= ψ′ ◦(ψ−1 ◦ψ) ◦f ◦(ϕ−1 ◦ϕ) ◦(ϕ′)−1
+ist genau dann diﬀerenzierbar, wenn ψ ◦f ◦ϕ−1 diﬀerenzierbar ist.
 Beispiel 23
-f : R → R,
-x �→ x3 ist kein Diﬀeomorphismus, aber Homöomorphismus, da mit g(x) :=
+f : R →R,
+x 7→x3 ist kein Diﬀeomorphismus, aber Homöomorphismus, da mit g(x) :=
 3√x
-gilt: f ◦ g = idR,
-g ◦ f = idR
+gilt: f ◦g = idR,
+g ◦f = idR
 Bemerkung 30
 Sei X eine glatte Mannigfaltigkeit. Dann ist
-Diﬀeo(X) := { f : X → X | f ist Diﬀeomorphismus }
+Diﬀeo(X) := { f : X →X | f ist Diﬀeomorphismus }
 eine Untergruppe von Homöo(X).
 Deﬁnition 32
-S ⊆ R3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 ∃U ⊆ R2 oﬀen:
-∃ diﬀerenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2
-∀u ∈ U.
+S ⊆R3 heißt reguläre Fläche :⇔∀s ∈S ∃Umgebung V (s) ⊆R3 ∃U ⊆R2 oﬀen:
+∃diﬀerenzierbare Abbildung F : U →V ∩S: Rg(JF (u)) = 2
+∀u ∈U.
 F heißt (lokale) reguläre Parametrisierung von S.
 F(u, v) = (x(u, v), y(u, v), z(u, v))
 JF (u, v) =
@@ -1453,9 +1452,9 @@ JF (u, v) =
 
 
 Beispiel 24
-1) Rotationsﬂächen: Sei r : R → R>0 eine diﬀerenzierbare Funktion.
-F : R2 → R3
-(u, v) �→ (r(u) cos(u), r(v) sin(u), v)
+1) Rotationsﬂächen: Sei r : R →R>0 eine diﬀerenzierbare Funktion.
+F : R2 →R3
+(u, v) 7→(r(u) cos(u), r(v) sin(u), v)
 JF (u, v) =
 
 
@@ -1467,19 +1466,17 @@ r′(v) sin u
 1
 
 
-hat Rang 2 für alle (u, v) ∈ R2.
-2) Kugelkoordinaten: F : R2 → R3,
-(u, v) �→ (R cos v cos u, R cos v sin u, R sin v)
-Es gilt: F(u, v) ∈ S2
+hat Rang 2 für alle (u, v) ∈R2.
+2) Kugelkoordinaten: F : R2 →R3,
+(u, v) 7→(R cos v cos u, R cos v sin u, R sin v)
+Es gilt: F(u, v) ∈S2
 R, denn
 R2 cos2(v) cos2(u) + R2 cos2(v) sin2(u) + R2 sin2(v)
 =R2(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v))
-=R2 �
-cos2(v)(cos2(u) + sin2(u)) + sin2(v)
-�
-=R2 �
-cos2(v) + sin2(v)
-�
+=R2  cos2(v)(cos2(u) + sin2(u)) + sin2(v)
+
+=R2  cos2(v) + sin2(v)
+
 =R2
 
 32
@@ -1533,14 +1530,14 @@ R cos v
 
 hat Rang 2 für cos v ̸= 0. In N und S ist cos v = 0.
 Bemerkung 31
-Jede reguläre Fläche S ⊆ R3 ist eine 2-dimensionale, diﬀerenzierbare Mannigfaltigkeit.
+Jede reguläre Fläche S ⊆R3 ist eine 2-dimensionale, diﬀerenzierbare Mannigfaltigkeit.
 Beweis:
-S ⊆ R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Deﬁnition von
-regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆ R2, Fj : R2 → R3) von S mit
-Ui ∩ Uj ̸= ∅ existieren, wobei Fi und Fj nach Deﬁnition diﬀerenzierbare Abbildungen sind.
+S ⊆R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Deﬁnition von
+regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆R2, Fj : R2 →R3) von S mit
+Ui ∩Uj ̸= ∅existieren, wobei Fi und Fj nach Deﬁnition diﬀerenzierbare Abbildungen sind.
 z.Z.: F −1
 j
-◦ Fi ist ein Diﬀeomorphismus.
+◦Fi ist ein Diﬀeomorphismus.
 Ui
 Uj
 S
@@ -1551,19 +1548,19 @@ F −1
 j
 ◦Fi
 Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31
-Idee: Finde diﬀerenzierbare Funktion �
+Idee: Finde diﬀerenzierbare Funktion g
 F −1
 j
-in Umgebung W von s, sodass �
+in Umgebung W von s, sodass g
 F −1
 j
 |S∩W = F −1
 j
 .
-Ausführung: Sei u0 ∈ Ui, v0 ∈ Uj mit Fi(u0) = s = Fj(v0).
+Ausführung: Sei u0 ∈Ui, v0 ∈Uj mit Fi(u0) = s = Fj(v0).
 Da Rg(JFj(v0)) = 2 ist, ist o. B. d. A.
 det
-� ∂x
+ ∂x
 ∂u
 ∂x
 ∂v
@@ -1571,16 +1568,16 @@ det
 ∂u
 ∂y
 ∂v
-�
+
 (v0) ̸= 0
 und Fj(u, v) = (x(u, v), y(u, v), z(u, v)).
-Deﬁniere �
-Fj : Uj × R → R3 durch
-�
+Deﬁniere f
+Fj : Uj × R →R3 durch
+f
 Fj(u, v, t) := (x(u, v), y(u, v), z(u, v) + t)
-Oﬀensichtlich: �
+Oﬀensichtlich: f
 Fj|Uj×{ 0 } = Fj
-J�
+Jf
 Fj =
 
 
@@ -1600,11 +1597,11 @@ Fj =
 ∂v
 1
 
- ⇒ det J�
+⇒det Jf
 Fj(v0, 0) ̸= 0
 Analysis II
-======⇒ Es gibt Umgebungen W von Fj von �
-Fj(v0, 0) = Fj(v0) = s, sodass �
+======⇒Es gibt Umgebungen W von Fj von f
+Fj(v0, 0) = Fj(v0) = s, sodass f
 Fj auf W eine
 diﬀerenzierbar Inverse F −1
 j
@@ -1613,26 +1610,26 @@ hat.
 34
 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
 Weiter gilt:
-�
+f
 Fj
 −1|W∩S = F −1
 j
 |W∩S
-⇒ F −1
+⇒F −1
 j
-◦ Fi|F −1
+◦Fi|F −1
 i
 (W∩S) = F −1
 j
-◦ Fi|F −1
+◦Fi|F −1
 i
 (W∩S)
 ist diﬀerenzierbar.
 Deﬁnition 33
 Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe.
-a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G → G und ι : G → G
+a) G heißt topologische Gruppe, wenn die Abbildungen ◦: G×G →G und ι : G →G
 deﬁniert durch
-g ◦ h := g · h und ι(g) := g−1
+g ◦h := g · h und ι(g) := g−1
 stetig sind.
 b) Ist G eine diﬀerenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und
 (G, ι) diﬀerenzierbar sind.
@@ -1641,7 +1638,7 @@ Beispiel 25 (Lie-Gruppen)
 2) GLn(R)
 3) (R×, ·)
 4) (R>0, ·)
-5) (Rn, +), denn A · B(i, j) = �n
+5) (Rn, +), denn A · B(i, j) = Pn
 k=1 aikbkj ist nach allen Variablen diﬀerenzierbar
 (A−1)(i, j) = det(Aij)
 det A
@@ -1660,44 +1657,44 @@ an1
 ann
 
 
- ∈ R(n−1)×(n−1)
+∈R(n−1)×(n−1)
 ist diﬀerenzierbar.
 det Aij kann 0 werden, da:
-� 1
+ 1
 1
 −1
 0
-�
-6) SLn(R) = { A ∈ GLn(R) | det(A) = 1 }
+
+6) SLn(R) = { A ∈GLn(R) | det(A) = 1 }
 Bemerkung 32
-Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung
-lg : G → G
-h �→ g · h
+Ist G eine Lie-Gruppe und g ∈G, so ist die Abbildung
+lg : G →G
+h 7→g · h
 ein Diﬀeomorphismus.
 
 35
 2.3. SIMPLIZIALKOMPLEX
 2.3 Simplizialkomplex
 Deﬁnition 34
-Seien v0, . . . , vk ∈ Rn Punkte.
+Seien v0, . . . , vk ∈Rn Punkte.
 a) v0, . . . , vk sind in allgemeiner Lage
-⇔ es gibt keinen (k−1)-dimensionalen aﬃnen Untervektorraum, der v0, . . . , vk enthält
-⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig.
+⇔es gibt keinen (k−1)-dimensionalen aﬃnen Untervektorraum, der v0, . . . , vk enthält
+⇔v1 −v0, . . . , vk −v0 sind linear unabhängig.
 b) conv(v0, . . . , vk) :=
-� �k
+n Pk
 i=0 λivi
-��� λi ≥ 0, �k
+ λi ≥0, Pk
 i=0 λi = 1
-�
+o
 heißt die konvexe Hülle von
 v0, . . . , vk.
 Deﬁnition 35
-a) Sei ∆n = conv(e0, . . . , en) ⊆ Rn+1 die konvexe Hülle der Standard-Basisvektoren
+a) Sei ∆n = conv(e0, . . . , en) ⊆Rn+1 die konvexe Hülle der Standard-Basisvektoren
 e0, . . . , en.
 Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex.
 b) Für Punkte v0, . . . , vk im Rn in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk)
 ein k-Simplex in Rn.
-c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir :=
+c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆{ 0, . . . , k }, so ist si0,...,ir :=
 conv(vi0, . . . , vir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆.
 (a) 0-Simplex ∆0
 1
@@ -1728,11 +1725,11 @@ Abbildung 2.6: Beispiele für k-Simplexe
 Deﬁnition 36
 a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomplex,
 wenn gilt:
-(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K.
-(ii) Für ∆1, ∆2 ∈ K ist ∆1 ∩ ∆2 leer oder ein Teilsimplex von ∆1 und von ∆2.
-b) |K| := �
-∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K.
-c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension von K.
+(i) Für ∆∈K und S ⊆∆Teilsimplex ist S ∈K.
+(ii) Für ∆1, ∆2 ∈K ist ∆1 ∩∆2 leer oder ein Teilsimplex von ∆1 und von ∆2.
+b) |K| := S
+∆∈K ∆(mit Teilraumtopologie) heißt geometrische Realisierung von K.
+c) Ist d = max { k ∈N0 | K enthält k-Simplex }, so heißt d die Dimension von K.
 
 36
 2.3. SIMPLIZIALKOMPLEX
@@ -1749,10 +1746,10 @@ P
 Abbildung 2.7: Beispiele für Simplizialkomplexe
 Deﬁnition 37
 Seien K, L Simplizialkomplexe. Eine stetige Abbildung
-f : |K| → |L|
-heißt simplizial, wenn für jedes ∆ ∈ K gilt:
-a) f(∆) ∈ L
-b) f|∆ : ∆ → f(∆) ist eine aﬃne Abbildung.
+f : |K| →|L|
+heißt simplizial, wenn für jedes ∆∈K gilt:
+a) f(∆) ∈L
+b) f|∆: ∆→f(∆) ist eine aﬃne Abbildung.
 Beispiel 26 (Simpliziale Abbildungen)
 1) ϕ(e1) := b1, ϕ(e2) := b2
 ϕ ist eine eindeutig bestimmte lineare Abbildung
@@ -1766,7 +1763,7 @@ e1
 b1
 b2
 ϕ
-2) Folgende Abbildung ϕ : ∆n → ∆n−1 ist simplizial:
+2) Folgende Abbildung ϕ : ∆n →∆n−1 ist simplizial:
 ϕ
 3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8)
 M
@@ -1818,58 +1815,58 @@ b
 b
 Abbildung 2.8: Abbildung eines Torus auf eine Sphäre
 Deﬁnition 38
-Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in
+Sei K ein endlicher Simplizialkomplex. Für n ≥0 sei an(K) die Anzahl der n-Simplizes in
 K.
 Dann heißt
 χ(K) :=
 dim K
-�
+X
 n=0
 (−1)nan(K)
 Eulerzahl (oder Euler-Charakteristik) von K.
 Beispiel 27
-1) χ(∆1) = 2 − 1 = 1
-χ(∆2) = 3 − 3 + 1 = 1
-χ(∆3) = 4 − 6 + 4 − 1 = 1
-2) χ(Oktaeder-Oberﬂäche) = 6 − 12 + 8 = 2
+1) χ(∆1) = 2 −1 = 1
+χ(∆2) = 3 −3 + 1 = 1
+χ(∆3) = 4 −6 + 4 −1 = 1
+2) χ(Oktaeder-Oberﬂäche) = 6 −12 + 8 = 2
 χ(Rand des Tetraeders) = 2
-χ(Ikosaeder) = 12 − 30 + 20 = 2
-3) χ(Würfel) = 8 − 12 + 6 = 2
-χ(Würfel, unterteilt in Dreiecksﬂächen) = 8 − (12 + 6) + (6 · 2) = 2
+χ(Ikosaeder) = 12 −30 + 20 = 2
+3) χ(Würfel) = 8 −12 + 6 = 2
+χ(Würfel, unterteilt in Dreiecksﬂächen) = 8 −(12 + 6) + (6 · 2) = 2
 Bemerkung 33
-χ(∆n) = 1 für jedes n ∈ N0
+χ(∆n) = 1 für jedes n ∈N0
 
 38
 2.3. SIMPLIZIALKOMPLEX
 Beweis: ∆n ist die konvexe Hülle von (e0, . . . , en) in Rn+1. Jede (k + 1)-elementige Teilmenge
 von { e0, . . . , en } deﬁniert ein k-Simplex.
-⇒ ak(∆n) =
-�n+1
+⇒ak(∆n) =
+ n+1
 k+1
-�
+
 ,
 k = 0, . . . , n
-⇒ χ(∆n) = �n
-k=0(−1)k�n+1
+⇒χ(∆n) = Pn
+k=0(−1)k n+1
 k+1
-�
+
 f(x) = (x + 1)n+1
 Binomischer
 Lehrsatz
 =
-�n+1
+Pn+1
 k=0
-�n+1
+ n+1
 k
-�
+
 xk
-⇒ 0 = �n+1
+⇒0 = Pn+1
 k=0
-�n+1
+ n+1
 k
-�
-(−1)k = χ(∆n) − 1
-⇒ χ(∆n) = 1
+
+(−1)k = χ(∆n) −1
+⇒χ(∆n) = 1
 ■
 Deﬁnition 39
 a) Ein 1D-Simplizialkomplex heißt Graph.
@@ -1888,45 +1885,45 @@ Beweis: Induktion über die Anzahl der Ecken.
 Bemerkung 35
 a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ
 enthält.2
-b) Ist n = a1(Γ) − a1(T), so ist χ(Γ) = 1 − n.
+b) Ist n = a1(Γ) −a1(T), so ist χ(Γ) = 1 −n.
 Beweis:
 a) Siehe „Algorithmus von Kruskal“.
 2T wird „Spannbaum“ genannt.
 
 39
 2.3. SIMPLIZIALKOMPLEX
-b) χ(Γ) = a0(Γ) − a1(Γ)
-= a0(Γ) − (n + a1(T))
-= a0(T) − a1(T) − n
-= χ(T) − n
-= 1 − n
+b) χ(Γ) = a0(Γ) −a1(Γ)
+= a0(Γ) −(n + a1(T))
+= a0(T) −a1(T) −n
+= χ(T) −n
+= 1 −n
 Bemerkung 36
-Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ Rn. Sei K der Simplizialkomplex, der aus ∆ durch
+Sei ∆ein n-Simplex und x ∈∆◦⊆Rn. Sei K der Simplizialkomplex, der aus ∆durch
 „Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1.
 (a) K
 (b) ∆, das aus K durch Unter-
 teilung entsteht
 Abbildung 2.10: Beispiel für Bemerkung 36.
-Beweis: χ(K) = χ(∆) − (−1)n
-� �� �
+Beweis: χ(K) = χ(∆) −(−1)n
+| {z }
 n-Simplex
 +
 n
-�
+X
 k=0
 (−1)k
-�n + 1
+n + 1
 k
-�
-�
-��
-�
+
+|
+{z
+}
 (1+(−1))n+1
 = χ(∆)
 ■
 Deﬁnition 40
 Sei X ein topologischer Raum, K ein Simplizialkomplex und
-h : |K| → X
+h : |K| →X
 ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine
 Triangulierung von X.
 Beispiel 28 (Triangulierung des Torus)
@@ -1936,11 +1933,11 @@ in Beispiel 28.
 Satz 2.1 (Eulersche Polyederformel)
 Sei P ein konvexes Polyeder in R3, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex,
 sodass gilt:
-∀x, y ∈ ∂P : [x, y] ⊆ P
+∀x, y ∈∂P : [x, y] ⊆P
 Dann ist χ(∂P) = 2.
 Beweis:
 1) Die Aussage ist richtig für den Tetraeder.
-2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2.
+2) O. B. d. A. sei 0 ∈P und P ⊆B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2.
 Erhalte Triangulierung von S2.
 
 40
@@ -1968,27 +1965,27 @@ P2 ein Tetraeder ist.
 Bemerkung 37 (Der Rand vom Rand ist 0)
 Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V .
 Sei An die Menge der n-Simplizes in K, d. h.
-An(K) := { σ ∈ K | dim(σ) = n }
+An(K) := { σ ∈K | dim(σ) = n }
 für n = 0, . . . , d = dim(K)
 und Cn(K) der R-Vektorraum mit Basis An(K), d. h.
 Cn(K) =
 
 
 
-�
+X
 σ∈An(K)
 cσ · σ
-������
-cσ ∈ R
+
+cσ ∈R
 
 
 
-Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn.
+Sei σ = ∆(x0, . . . , xn) ∈An(K), sodass x0 < x1 < · · · < xn.
 Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , ˆxi, . . . , xn) die i-te Seite von σ und dσ = dnσ :=
-�
-i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch deﬁnierte lineare
+P
+i=0(−1)i∂iσ ∈Cn−1(K) und dn : Cn(K) →Cn−1(K) die dadurch deﬁnierte lineare
 Abbildung.
-Dann gilt: dn−1 ◦ dn = 0
+Dann gilt: dn−1 ◦dn = 0
 a
 b
 c
@@ -1999,46 +1996,46 @@ e2
 Abbildung 2.14: Simplizialkomplex mit Totalordnung
 Beispiel 29
 Sei a < b < c. Dann gilt:
-d2σ = e1 − e2 + e3
-d1(e1 − e2 + e3) = (c − b) − (c − a) + (b − a)
+d2σ = e1 −e2 + e3
+d1(e1 −e2 + e3) = (c −b) −(c −a) + (b −a)
 
 42
 2.3. SIMPLIZIALKOMPLEX
 = 0
 Sei a < b < c < d. Dann gilt für Tetraeder:
-d3(∆(a, b, c, d)) = ∆(b, c, d) − ∆(a, c, d) + ∆(a, b, d) − ∆(a, b, c), wobei:
+d3(∆(a, b, c, d)) = ∆(b, c, d) −∆(a, c, d) + ∆(a, b, d) −∆(a, b, c), wobei:
 d2( ∆(b, c, d)) =
 ∆(c, d)−∆(b, d) + ∆(b, c)
 d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c)
 d2( ∆(a, b, d)) =
 ∆(b, d)−∆(a, d) + ∆(a, b)
 d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b)
-⇒ d2(d3(∆(a, b, c, d))) = 0
-Beweis: Sei σ ∈ An. Dann gilt:
+⇒d2(d3(∆(a, b, c, d))) = 0
+Beweis: Sei σ ∈An. Dann gilt:
 dn−1(dnσ) = dn−1(
 n
-�
+X
 i=0
 (−1)i∂iσ)
 =
 n
-�
+X
 i=0
 (−1)idn−1(∂iσ)
 =
 n
-�
+X
 i=0
 (−1)i
 n−1
-�
+X
 j=0
 ∂i(∂jσ)(−1)j
 =
-�
+X
 0≤i≤j≤n−1
 (−1)i+j∂j(∂i(σ)) +
-�
+X
 0≤j<i≤n
 (−1)i+j∂i−1(∂jσ)
 = 0
@@ -2046,74 +2043,74 @@ weil jeder Summand aus der ersten Summe auch in der zweiten Summe vorkommt, aber
 umgekehrten Vorzeichen.
 ■
 Deﬁnition 41
-Sei K ein Simplizialkomplex, Zn := Kern(dn) ⊆ Cn und Bn := Bild(dn+1) ⊆ Cn.
+Sei K ein Simplizialkomplex, Zn := Kern(dn) ⊆Cn und Bn := Bild(dn+1) ⊆Cn.
 a) Hn = Hn(K, R) := Zn/Bn heißt n-te Homologiegruppe von K.
 b) bn(K) := dimR Hn heißt n-te Betti-Zahl von K.
 Bemerkung 38
-Nach Bemerkung 37 ist Bn ⊆ Zn, denn dn+1(C) ∈ Kern(dn) für C ∈ Cn+1.
+Nach Bemerkung 37 ist Bn ⊆Zn, denn dn+1(C) ∈Kern(dn) für C ∈Cn+1.
 Satz 2.2
 Für jeden endlichen Simplizialkomplex K der Dimension d gilt:
 d
-�
+X
 k=0
 (−1)kbk(K) =
 d
-�
+X
 k=0
 (−1)kak(K) = χ(K)
 Bemerkung 39
-Es gilt nicht ak = bk ∀k ∈ N0.
+Es gilt nicht ak = bk ∀k ∈N0.
 
 43
 2.3. SIMPLIZIALKOMPLEX
 Beweis:
-• Dimensionsformel für dn: an = dim Zn + dim Bn−1 für n ≥ 1
-• Dimensionsformel für Zn → Hn = Zn/Bn : dim Zn = bn + dim Bn
+• Dimensionsformel für dn: an = dim Zn + dim Bn−1 für n ≥1
+• Dimensionsformel für Zn →Hn = Zn/Bn : dim Zn = bn + dim Bn
 • dim Zd = bd, da dim Zd = bd + dim Bd, wobei dim Bd = 0, da ad+1 = 0
-• a0 − dim B0 = b0, da a0 − dim B0 = a0 − dim Z0 + b0 und a0 = dim Z0, weil a−1 = 0
+• a0 −dim B0 = b0, da a0 −dim B0 = a0 −dim Z0 + b0 und a0 = dim Z0, weil a−1 = 0
 ⇒
 d
-�
+X
 k=0
 (−1)kak = a0 +
 d
-�
+X
 k=1
 (−1)k(dim Zk + dim Bk−1)
 = a0 +
 d
-�
+X
 k=1
 (−1)k dim Zk +
 d−1
-�
+X
 k=0
 (−1)k+1 dim Bk
 = a0 +
 d
-�
+X
 k=1
 (−1)k dim Zk −
 d−1
-�
+X
 k=0
 (−1)k dim Bk
 = a0 +
 d−1
-�
+X
 k=1
 (−1)kbk + (−1)d dim Zd
-� �� �
+| {z }
 =bd
-− dim B0
+−dim B0
 = b0 +
 d−1
-�
+X
 k=1
 (−1)kbk + (−1)dbd
 =
 d
-�
+X
 k=0
 (−1)kbk
 
@@ -2143,23 +2140,23 @@ b
 Hindernis nicht homotop.
 Abbildung 3.1: Beispiele für Wege γ1 und γ2
 Deﬁnition 42
-Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b, d. h. γ1(0) =
+Sei X ein topologischer Raum, a, b ∈X, γ1, γ2 : I →X Wege von a nach b, d. h. γ1(0) =
 γ2(0) = a, γ1(1) = γ2(1) = b
-γ1 und γ2 heißen homotop, wenn es eine stetige Abbildung H : I × I → X mit
-H(t, 0) = γ1(t) ∀t ∈ I
-H(t, 1) = γ2(t) ∀t ∈ I
-und H(0, s) = a und H(1, s) = b für alle s ∈ I gibt. Dann schreibt man: γ1 ∼ γ2
+γ1 und γ2 heißen homotop, wenn es eine stetige Abbildung H : I × I →X mit
+H(t, 0) = γ1(t) ∀t ∈I
+H(t, 1) = γ2(t) ∀t ∈I
+und H(0, s) = a und H(1, s) = b für alle s ∈I gibt. Dann schreibt man: γ1 ∼γ2
 H heißt Homotopie zwischen γ1 und γ2.
 Bemerkung 40
-Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b und H eine
+Sei X ein topologischer Raum, a, b ∈X, γ1, γ2 : I →X Wege von a nach b und H eine
 Homotopie zwischen γ1 und γ2.
 Dann gilt: Der Weg
-γs : I → X,
+γs : I →X,
 γs(t) = H(t, s)
-ist Weg in X von a nach b für jedes s ∈ I.
+ist Weg in X von a nach b für jedes s ∈I.
 Beweis: H ist stetig, also ist H(t, s) insbesondere für jedes feste s stetig. Da H(0, s) = a und
-H(1, s) = b für alle s ∈ I und γs eine Abbildung von I auf X ist, ist γs ein Weg in X von a
-nach b für jedes s ∈ I.
+H(1, s) = b für alle s ∈I und γs eine Abbildung von I auf X ist, ist γs ein Weg in X von a
+nach b für jedes s ∈I.
 ■
 Bemerkung 41
 Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege in X von a nach b
@@ -2168,18 +2165,18 @@ Beweis:
 
 46
 3.1. HOMOTOPIE VON WEGEN
-• reﬂexiv: H(t, s) = γ(t) für alle (t, s) ∈ I × I
-• symmetrisch: H′(t, s) = H(t, 1 − s) für alle (t, s) ∈ I × I
+• reﬂexiv: H(t, s) = γ(t) für alle (t, s) ∈I × I
+• symmetrisch: H′(t, s) = H(t, 1 −s) für alle (t, s) ∈I × I
 • transitiv: Seien H′ bzw. H′′ Homotopien von γ1 nach γ2 bzw. von γ2 nach γ3.
 Dann sei H(t, s) :=
-�
+(
 H′(t, 2s)
-falls 0 ≤ s ≤ 1
+falls 0 ≤s ≤1
 2
-H′′(t, 2s − 1)
+H′′(t, 2s −1)
 falls 1
-2 ≤ s ≤ 1
-⇒ H ist stetig und Homotopie von γ1 nach γ3.
+2 ≤s ≤1
+⇒H ist stetig und Homotopie von γ1 nach γ3.
 ■
 Beispiel 30
 1) Sei X = S1. γ1 und γ2 aus Abbildung 3.3a nicht homotop.
@@ -2187,17 +2184,17 @@ Beispiel 30
 3) Sei X = R2 und a = b = (0, 0).
 Je zwei Wege im R2 mit Anfangs- und Endpunkt (0, 0) sind homotop.
 Abbildung 3.2: Zwei Wege im R2 mit Anfangs- und Endpunkt (0, 0)
-Sei γ0 : I → R2 der konstante Weg γ0(t) = (0, 0) ∀t ∈ I. Sei γ(0) = γ(1) = (0, 0).
-H(t, s) := (1 − s)γ(t) ist stetig, H(t, 0) = γ(t) ∀t ∈ I und H(t, 1) = (0, 0) ∀t ∈ I.
+Sei γ0 : I →R2 der konstante Weg γ0(t) = (0, 0) ∀t ∈I. Sei γ(0) = γ(1) = (0, 0).
+H(t, s) := (1 −s)γ(t) ist stetig, H(t, 0) = γ(t) ∀t ∈I und H(t, 1) = (0, 0) ∀t ∈I.
 Bemerkung 42
-Sei X ein topologischer Raum, γ : I → X ein Weg und ϕ : I → I stetig mit ϕ(0) = 0,
-ϕ(1) = 1. Dann sind γ und γ ◦ ϕ homotop.
-Beweis: Sei H(t, s) = γ((1 − s)t + s · ϕ(t)).
+Sei X ein topologischer Raum, γ : I →X ein Weg und ϕ : I →I stetig mit ϕ(0) = 0,
+ϕ(1) = 1. Dann sind γ und γ ◦ϕ homotop.
+Beweis: Sei H(t, s) = γ((1 −s)t + s · ϕ(t)).
 Dann ist H stetig, H(t, 0) = γ(t),
 H(t, 1) = γ(ϕ(t)),
 H(0, s) = γ(0) und H(1, s) =
-γ(1 − s + s) = γ(1)
-⇒ H ist Homotopie.
+γ(1 −s + s) = γ(1)
+⇒H ist Homotopie.
 ■
 
 47
@@ -2214,18 +2211,18 @@ Abbildung 3.3: Beispiele für (nicht)-Homotopie von Wegen
 Deﬁnition 43
 Seien γ1, γ2 Wege in X mit γ1(1) = γ2(0). Dann ist
 γ(t) =
-�
+(
 γ1(2t)
-falls 0 ≤ t < 1
+falls 0 ≤t < 1
 2
-γ2(2t − 1)
+γ2(2t −1)
 falls 1
-2 ≤ t ≤ 1
-ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ = γ1 ∗ γ2.
+2 ≤t ≤1
+ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ = γ1 ∗γ2.
 Bemerkung 43
 Das Zusammensetzen von Wegen ist nur bis auf Homotopie assoziativ, d. h.:
-γ1 ∗ (γ2 ∗ γ3) ̸= (γ1 ∗ γ2) ∗ γ3
-γ1 ∗ (γ2 ∗ γ3) ∼ (γ1 ∗ γ2) ∗ γ3
+γ1 ∗(γ2 ∗γ3) ̸= (γ1 ∗γ2) ∗γ3
+γ1 ∗(γ2 ∗γ3) ∼(γ1 ∗γ2) ∗γ3
 mit γ1(1) = γ2(0) und γ2(1) = γ3(0).
 γ1
 γ2
@@ -2234,7 +2231,7 @@ mit γ1(1) = γ2(0) und γ2(1) = γ3(0).
 1/2
 3/4
 1
-(a) γ1 ∗ (γ2 ∗ γ3)
+(a) γ1 ∗(γ2 ∗γ3)
 γ1
 γ2
 γ3
@@ -2242,7 +2239,7 @@ mit γ1(1) = γ2(0) und γ2(1) = γ3(0).
 1/4
 1/2
 1
-(b) (γ1 ∗ γ2) ∗ γ3
+(b) (γ1 ∗γ2) ∗γ3
 Abbildung 3.4: Das Zusammensetzen von Wegen ist nicht assoziativ
 Beweis: Das Zusammensetzen von Wegen ist wegen Bemerkung 42 bis auf Homotopie assoziativ.
 Verwende dazu
@@ -2254,25 +2251,25 @@ Verwende dazu
 
 1
 2t
-falls 0 ≤ t < 1
+falls 0 ≤t < 1
 2
-t − 1
+t −1
 4
 falls 1
-2 ≤ t < 3
+2 ≤t < 3
 4
-2t − 1
+2t −1
 falls 3
-4 ≤ t ≤ 1
+4 ≤t ≤1
 Bemerkung 44
-Sei X ein topologischer Raum, a, b, c ∈ X, γ1, γ′
+Sei X ein topologischer Raum, a, b, c ∈X, γ1, γ′
 1 Wege von a nach b und γ2, γ′
 2 Wege von b
 nach c.
-Sind γ1 ∼ γ′
-1 und γ2 ∼ γ′
-2, so ist γ1 ∗ γ2 ∼ γ′
-1 ∗ γ′
+Sind γ1 ∼γ′
+1 und γ2 ∼γ′
+2, so ist γ1 ∗γ2 ∼γ′
+1 ∗γ′
 2.
 
 48
@@ -2292,42 +2289,42 @@ Beweis: Sei Hi eine Homotopie zwischen γi und γ′
 i, i = 1, 2.
 Dann ist
 H(t, s) :=
-�
+(
 H1(2t, s)
-falls 0 ≤ t ≤ 1
+falls 0 ≤t ≤1
 2
-∀s ∈ I
-H2(2t − 1, s)
+∀s ∈I
+H2(2t −1, s)
 falls 1
-2 ≤ t ≤ 1
-eine Homotopie zwischen γ1 ∗ γ2 und γ′
-1 ∗ γ′
+2 ≤t ≤1
+eine Homotopie zwischen γ1 ∗γ2 und γ′
+1 ∗γ′
 2.
 Eine spezielle Homotopieäquivalenz sind sog. Deformationsretraktionen:
 Deﬁnition 44
-Sei X ein topologischer Raum, A ⊆ X, r : X → A eine stetige Abbildung und ι = (idX)|A.
-a) ι : A → X mit ι(x) = x heißt die Inklusionsabbildung und man schreibt: ι : A �→ X.
+Sei X ein topologischer Raum, A ⊆X, r : X →A eine stetige Abbildung und ι = (idX)|A.
+a) ι : A →X mit ι(x) = x heißt die Inklusionsabbildung und man schreibt: ι : A ,→X.
 b) r heißt Retraktion, wenn r|A = idA ist.
-c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι ◦ r ∼ idX gibt.
+c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι ◦r ∼idX gibt.
 Beispiel 31 (Zylinder auf Kreis)
 Sei X = S1 × R ein topologischer Raum und
-r : S1 × R → S1 × { 0 } ∼= S1
+r : S1 × R →S1 × { 0 } ∼= S1
 mit
 r(x, y) := (x, 0)
 eine Abbildung. r ist eine Retraktion, da r|S1 ∼= idS1.
-ι ◦ r : S1 × R → S1 × R
-(x, y) �→ (x, 0)
-H : (S1 × R) × I → S1 × R
-(x, y, t) �→ (x, ty)
+ι ◦r : S1 × R →S1 × R
+(x, y) 7→(x, 0)
+H : (S1 × R) × I →S1 × R
+(x, y, t) 7→(x, ty)
 3.2 Fundamentalgruppe
 Für einen Weg γ sei [γ] seine Homotopieklasse.
 Deﬁnition 45
-Sei X ein topologischer Raum und x ∈ X. Sei außerdem
+Sei X ein topologischer Raum und x ∈X. Sei außerdem
 π1(X, x) := { [γ] | γ ist Weg in X mit γ(0) = γ(1) = x }
 
 49
 3.2. FUNDAMENTALGRUPPE
-Durch [γ1] ∗G [γ2] := [γ1 ∗ γ2] wird π1(X, x) zu einer Gruppe. Diese Gruppe heißt Funda-
+Durch [γ1] ∗G [γ2] := [γ1 ∗γ2] wird π1(X, x) zu einer Gruppe. Diese Gruppe heißt Funda-
 mentalgruppe von X im Basispunkt x.
 Bemerkung 45
 Im R2 gibt es nur eine Homotopieklasse.
@@ -2335,42 +2332,42 @@ Beweis: (Fundamentalgruppe ist eine Gruppe)
 a) Abgeschlossenheit folgt direkt aus der Deﬁnition von ∗G
 b) Assoziativität folgt aus Bemerkung 43
 c) Neutrales Element e = [γ0], γ0(t) = x
-∀t ∈ I. e ∗ [γ] = [γ] = [γ] ∗ e, da γ0 ∗ γ ∼ γ
+∀t ∈I. e ∗[γ] = [γ] = [γ] ∗e, da γ0 ∗γ ∼γ
 d)
-Inverses Element [γ]−1 = [γ] = [γ(1 − t)], denn γ ∗ γ ∼ γ0 ∼ γ ∗ γ
+Inverses Element [γ]−1 = [γ] = [γ(1 −t)], denn γ ∗γ ∼γ0 ∼γ ∗γ
 Beispiel 32
-1) S1 = { z ∈ C | |z| = 1 } =
-�
-(cos ϕ, sin ϕ) ∈ R2 �� 0 ≤ ϕ ≤ 2π
-�
+1) S1 = { z ∈C | |z| = 1 } =
+
+(cos ϕ, sin ϕ) ∈R2  0 ≤ϕ ≤2π
+	
 π1(S1, 1) =
-�
+
 [γk]
-�� k ∈ Z
-� ∼= Z. Dabei ist γ(t) = e2πit = cos(2πt) + i sin(2πt) und
-γk := γ ∗ · · · ∗ γ
-�
-��
-�
+ k ∈Z
+	 ∼= Z. Dabei ist γ(t) = e2πit = cos(2πt) + i sin(2πt) und
+γk := γ ∗· · · ∗γ
+|
+{z
+}
 k mal
-[γk] �→ k ist ein Isomorphismus.
-2) π1(R2, 0) = π1(R2, x) = { e } für jedes x ∈ R2
-3) π1(Rn, x) = { e } für jedes x ∈ Rn
-4) G ⊆ Rn heißt sternförmig bzgl. x ∈ G, wenn für jedes y ∈ G auch die Strecke
-[x, y] ⊆ G ist.
-Für jedes sternförmige G ⊆ Rn ist π1(G, x) = { e }
+[γk] 7→k ist ein Isomorphismus.
+2) π1(R2, 0) = π1(R2, x) = { e } für jedes x ∈R2
+3) π1(Rn, x) = { e } für jedes x ∈Rn
+4) G ⊆Rn heißt sternförmig bzgl. x ∈G, wenn für jedes y ∈G auch die Strecke
+[x, y] ⊆G ist.
+Für jedes sternförmige G ⊆Rn ist π1(G, x) = { e }
 x
 Abbildung 3.6: Sternförmiges Gebiet
 .
 5) π1(S2, x0) = { e }, da im R2 alle Wege homotop zu { e } sind. Mithilfe der stereogra-
 phischen Projektion kann von S2 auf den R2 abgebildet werden.
 Dieses Argument funktioniert nicht mehr bei ﬂächenfüllenden Wegen, d. h. wenn
-γ : I → S2 surjektiv ist.
+γ : I →S2 surjektiv ist.
 Bemerkung 46
-Sei X ein topologischer Raum, a, b ∈ X, δ : I → X ein Weg von a nach b.
+Sei X ein topologischer Raum, a, b ∈X, δ : I →X ein Weg von a nach b.
 Dann ist die Abbildung
-α : π1(X, a) → π1(X, b)
-[γ] �→ [δ ∗ γ ∗ δ]
+α : π1(X, a) →π1(X, b)
+[γ] 7→[δ ∗γ ∗δ]
 ein Gruppenisomorphismus.
 
 50
@@ -2382,93 +2379,93 @@ b
 Abbildung 3.7: Situation aus Bemerkung 46
 .
 Beweis:
-α([γ1] ∗ [γ2]) = [δ ∗ (γ1 ∗ γ2) ∗ δ]
-= [δ ∗ γ1 ∗ δ ∗ δ ∗ γ2 ∗ δ]
-= [δ ∗ γ1 ∗ δ] ∗ [δ ∗ γ2 ∗ δ]
-= α([γ1]) ∗ α([γ2])
+α([γ1] ∗[γ2]) = [δ ∗(γ1 ∗γ2) ∗δ]
+= [δ ∗γ1 ∗δ ∗δ ∗γ2 ∗δ]
+= [δ ∗γ1 ∗δ] ∗[δ ∗γ2 ∗δ]
+= α([γ1]) ∗α([γ2])
 Deﬁnition 46
 Ein wegzusammenhängender topologischer Raum X heißt einfach zusammenhängend,
-wenn π1(X, x) = { e } für ein x ∈ X.
-Wenn π1(X, x) = { e } für ein x ∈ X gilt, dann wegen Bemerkung 46 sogar für alle x ∈ X.
+wenn π1(X, x) = { e } für ein x ∈X.
+Wenn π1(X, x) = { e } für ein x ∈X gilt, dann wegen Bemerkung 46 sogar für alle x ∈X.
 Bemerkung 47
-Es seien X, Y topologische Räume, f : X → Y eine stetige Abbildung, x ∈ X, y := f(x) ∈ Y .
-a) Dann ist die Abbildung f∗ : π1(X, x) → π1(Y, y), [γ] → [f ◦ γ] ein Gruppenhomomor-
+Es seien X, Y topologische Räume, f : X →Y eine stetige Abbildung, x ∈X, y := f(x) ∈Y .
+a) Dann ist die Abbildung f∗: π1(X, x) →π1(Y, y), [γ] →[f ◦γ] ein Gruppenhomomor-
 phismus.
-b) Ist Z ein weiterer topologischer Raum und g : Y → Z eine stetige Abbildung z := g(y).
-Dann ist (g ◦ f)∗ = g∗ ◦ f∗ : π1(X, x) → π1(Z, z)
+b) Ist Z ein weiterer topologischer Raum und g : Y →Z eine stetige Abbildung z := g(y).
+Dann ist (g ◦f)∗= g∗◦f∗: π1(X, x) →π1(Z, z)
 Beweis:
-a) f∗ ist wohldeﬁniert: Seien γ1, γ2 homotope Wege von x. z.Z.: f ◦ γ1 ∼ f ◦ γ2: Nach
-Voraussetzung gibt es stetige Abbildungen H : I × I → X mit
+a) f∗ist wohldeﬁniert: Seien γ1, γ2 homotope Wege von x. z.Z.: f ◦γ1 ∼f ◦γ2: Nach
+Voraussetzung gibt es stetige Abbildungen H : I × I →X mit
 H(t, 0) = γ1(t),
 H(t, 1) = γ2(t),
 H(0, s) = H(1, s) = x.
-Dann ist f ◦H : I ×I → Y stetig mit (f ◦H)(t, 0) = f(H(t, 0)) = f(γ1(t)) = (f ◦γ1)(t)
-etc. ⇒ f ◦ γ1 ∼ f ◦ γ2.
-f∗([γ1] ∗ [γ2]) = [f ◦ (γ1 ∗ γ2)] = [(f ◦ γ1)] ∗ [(f ◦ γ2)] = f∗([γ1]) ∗ f∗([γ2])
-b) (g ◦ f)∗([γ]) = [(g ◦ f) ◦ γ] = [g ◦ (f ◦ γ)] = g∗([f ◦ γ]) = g∗(f∗([γ])) = (g∗ ◦ f∗)([γ])
+Dann ist f ◦H : I ×I →Y stetig mit (f ◦H)(t, 0) = f(H(t, 0)) = f(γ1(t)) = (f ◦γ1)(t)
+etc. ⇒f ◦γ1 ∼f ◦γ2.
+f∗([γ1] ∗[γ2]) = [f ◦(γ1 ∗γ2)] = [(f ◦γ1)] ∗[(f ◦γ2)] = f∗([γ1]) ∗f∗([γ2])
+b) (g ◦f)∗([γ]) = [(g ◦f) ◦γ] = [g ◦(f ◦γ)] = g∗([f ◦γ]) = g∗(f∗([γ])) = (g∗◦f∗)([γ])
 Beispiel 33
-1) f : S1 �→ R2 ist injektiv, aber f∗ : π1(S1, 1) ∼= Z → π1(R2, 1) = { e } ist nicht injektiv.
-2) f : R → S1, t �→ (cos 2πt, sin 2πt) ist surjektiv, aber f∗ : π1(R, 0) = { e } → π1(S1, 1) ∼=
+1) f : S1 ,→R2 ist injektiv, aber f∗: π1(S1, 1) ∼= Z →π1(R2, 1) = { e } ist nicht injektiv.
+2) f : R →S1, t 7→(cos 2πt, sin 2πt) ist surjektiv, aber f∗: π1(R, 0) = { e } →π1(S1, 1) ∼=
 Z ist nicht surjektiv.
 
 51
 3.2. FUNDAMENTALGRUPPE
 Bemerkung 48
-Sei f : X → Y ein Homöomorphismus zwischen topologischen Räumen X, Y . Dann gilt:
-f∗ : π1(X, x) → π1(Y, f(x))
-ist ein Isomorphismus für jedes x ∈ X.
-Beweis: Sei g : Y → X die Umkehrabbildung, d. h. g ist stetig und f ◦ g = idY , g ◦ f = idX
-⇒ f∗ ◦ g∗ = (f ◦ g)∗ = (idY )∗ = idπ1(Y,f(X) und g∗ ◦ f∗ = idπ1(X,x).
+Sei f : X →Y ein Homöomorphismus zwischen topologischen Räumen X, Y . Dann gilt:
+f∗: π1(X, x) →π1(Y, f(x))
+ist ein Isomorphismus für jedes x ∈X.
+Beweis: Sei g : Y →X die Umkehrabbildung, d. h. g ist stetig und f ◦g = idY , g ◦f = idX
+⇒f∗◦g∗= (f ◦g)∗= (idY )∗= idπ1(Y,f(X) und g∗◦f∗= idπ1(X,x).
 Deﬁnition 47
-Seien X, Y topologische Räume, x0 ∈ X, y0 ∈ Y, f, g : X → Y stetig mit f(x0) = y0 = g(x0).
-f und g heißen homotop (f ∼ g), wenn es eine stetige Abbildung H : X × I → Y mit
-H(x, 0) = f(x) ∀x ∈ X
-H(x, 1) = g(x) ∀x ∈ X
-H(x0, s) = y0 ∀s ∈ I
+Seien X, Y topologische Räume, x0 ∈X, y0 ∈Y, f, g : X →Y stetig mit f(x0) = y0 = g(x0).
+f und g heißen homotop (f ∼g), wenn es eine stetige Abbildung H : X × I →Y mit
+H(x, 0) = f(x) ∀x ∈X
+H(x, 1) = g(x) ∀x ∈X
+H(x0, s) = y0 ∀s ∈I
 gibt.
 Bemerkung 49
-Sind f und g homotop, so ist f∗ = g∗ : π1(X, x0) → π1(Y, y0).
-Beweis: Sei γ ein geschlossener Weg in X um x0, d. h. [γ] ∈ π1(X, x0).
-Z. z.: f ◦ γ ∼ g ◦ γ
-Sei dazu Hγ : I × I → Y, (t, s) �→ H(γ(t), s). Dann gilt:
-Hγ(t, 0) = H(γ(t), 0) = (f ◦ γ)(t) ∀t ∈ I
-Hγ(1, s) = H(γ(1), s) = H(x0, s) = y0 ∀s ∈ I
-Hγ(t, 1) = H(γ(t), 1) = g(γ(t)) ∀t ∈ I
+Sind f und g homotop, so ist f∗= g∗: π1(X, x0) →π1(Y, y0).
+Beweis: Sei γ ein geschlossener Weg in X um x0, d. h. [γ] ∈π1(X, x0).
+Z. z.: f ◦γ ∼g ◦γ
+Sei dazu Hγ : I × I →Y, (t, s) 7→H(γ(t), s). Dann gilt:
+Hγ(t, 0) = H(γ(t), 0) = (f ◦γ)(t) ∀t ∈I
+Hγ(1, s) = H(γ(1), s) = H(x0, s) = y0 ∀s ∈I
+Hγ(t, 1) = H(γ(t), 1) = g(γ(t)) ∀t ∈I
 Beispiel 34
-f : X → Y, g : Y → X mit g ◦ f ∼ idX, f ◦ g ∼ idY
-⇒ f∗ ist Isomorphismus. Konkret: f : R2 → { 0 } , g : { 0 } → R2
-⇒ f ◦ g = id{ 0 }, g ◦ f : R2 → R2, x �→ 0 für alle x.
-g ◦ f ∼ idR2 mit Homotopie: H : R2 × I → R2, H(x, s) = (1 − s)x (stetig!)
-⇒ H(x, 0) = x = idR2(x), H(x, 1) = 0, H(0, s) = 0 ∀s ∈ I.
+f : X →Y, g : Y →X mit g ◦f ∼idX, f ◦g ∼idY
+⇒f∗ist Isomorphismus. Konkret: f : R2 →{ 0 } , g : { 0 } →R2
+⇒f ◦g = id{ 0 }, g ◦f : R2 →R2, x 7→0 für alle x.
+g ◦f ∼idR2 mit Homotopie: H : R2 × I →R2, H(x, s) = (1 −s)x (stetig!)
+⇒H(x, 0) = x = idR2(x), H(x, 1) = 0, H(0, s) = 0 ∀s ∈I.
 Satz 3.1 (Satz von Seifert und van Kampen „light“)
-Sei X ein topologischer Raum, U, V ⊆ X oﬀen mit U ∪ V = X und U ∩ V wegzusam-
+Sei X ein topologischer Raum, U, V ⊆X oﬀen mit U ∪V = X und U ∩V wegzusam-
 menhängend.
-Dann wird π1(X, x) für x ∈ U ∩ V erzeugt von geschlossenen Wegen um x, die ganz in
+Dann wird π1(X, x) für x ∈U ∩V erzeugt von geschlossenen Wegen um x, die ganz in
 U oder ganz in V verlaufen.
 
 52
 3.3. ÜBERLAGERUNGEN
-Beweis: Sei γ : I → X ein geschlossener Weg um x. Überdecke I mit endlich vielen oﬀenen
+Beweis: Sei γ : I →X ein geschlossener Weg um x. Überdecke I mit endlich vielen oﬀenen
 Intervallen I1, I2, . . . , In, die ganz in γ−1(U) oder ganz in γ−1(V ) liegen.
-O. B. d. A. sei γ(I1) ⊆ U, γ(I2) ⊆ V , etc.
-Wähle ti ∈ Ii ∩ Ii+1, also γ(ti) ∈ U ∩ V . Sei σi Weg in U ∩ V von x0 nach γ(ti) ⇒ γ ist
+O. B. d. A. sei γ(I1) ⊆U, γ(I2) ⊆V , etc.
+Wähle ti ∈Ii ∩Ii+1, also γ(ti) ∈U ∩V . Sei σi Weg in U ∩V von x0 nach γ(ti) ⇒γ ist
 homotop zu
-γ1 ∗ σ1
-� �� �
+γ1 ∗σ1
+| {z }
 in U
-∗ σ1 ∗ γ2 ∗ σ2
-�
-��
-�
+∗σ1 ∗γ2 ∗σ2
+|
+{z
+}
 in V
-∗ · · · ∗ σn−1 ∗ γ2 mit γi := γ|Ii
+∗· · · ∗σn−1 ∗γ2 mit γi := γ|Ii
 a
 b
 x
 Abbildung 3.8: Topologischer Raum X
 Beispiel 35 (Satz von Seifert und van Kampen)
 1) Sei X wie in Abbildung 3.8. π1(X, x) wird „frei“ erzeugt von a und b, weil π1(U, x) =
-⟨a⟩ ∼= Z, π1(V, x) = ⟨b⟩ ∼= Z, insbesondere ist a ∗ b nicht homotop zu b ∗ a.
+⟨a⟩∼= Z, π1(V, x) = ⟨b⟩∼= Z, insbesondere ist a ∗b nicht homotop zu b ∗a.
 2) Torus: π1(T 2, X) wird erzeugt von a und b.
 V
 U
@@ -2477,27 +2474,27 @@ b
 V
 a
 b
-Abbildung 3.9: a ∗ b = b ∗ a ⇔ a ∗ b ∗ a ∗ b ∼ e
+Abbildung 3.9: a ∗b = b ∗a ⇔a ∗b ∗a ∗b ∼e
 3.3 Überlagerungen
 Deﬁnition 48
-Es seien X, Y zusammenhängende topologische Räume und p : Y → X eine stetige Abbil-
+Es seien X, Y zusammenhängende topologische Räume und p : Y →X eine stetige Abbil-
 dung.
-p heißt Überlagerung, wenn jedes x ∈ X eine oﬀene Umgebung U = U(x) ⊆ X besitzt,
-sodass p−1(U) disjunkte Vereinigung von oﬀenen Teilmengen Vj ⊆ Y ist (j ∈ I) und
-p|Vj : Vj → U ein Homöomorphismus ist.
+p heißt Überlagerung, wenn jedes x ∈X eine oﬀene Umgebung U = U(x) ⊆X besitzt,
+sodass p−1(U) disjunkte Vereinigung von oﬀenen Teilmengen Vj ⊆Y ist (j ∈I) und
+p|Vj : Vj →U ein Homöomorphismus ist.
 |I| heißt Grad der Überlagerung p und man schreibt:
 deg p := |I|
 
 53
 3.3. ÜBERLAGERUNGEN
-Abbildung 3.10: R → S1,
-t �→ (cos 2πt, sin 2πt)
+Abbildung 3.10: R →S1,
+t 7→(cos 2πt, sin 2πt)
 Beispiel 36
 1) siehe Abbildung 3.10
 2) siehe Abbildung 3.11
-3) Rn → T n = Rn/Zn
-4) Sn → Pn(R)
-5) S1 → S1, z �→ z2, siehe Abbildung 3.12
+3) Rn →T n = Rn/Zn
+4) Sn →Pn(R)
+5) S1 →S1, z 7→z2, siehe Abbildung 3.12
 0
 1
 2
@@ -2550,13 +2547,13 @@ Beispiel 36
 *
 *
 −−−→
-Abbildung 3.11: R2 → T 2 = R2/Z2
+Abbildung 3.11: R2 →T 2 = R2/Z2
 Bemerkung 50
 Überlagerungen sind surjektiv.
-Beweis: Sei p : Y → X eine Überlagerung und x ∈ X beliebig. Dann existiert eine oﬀene
-Umgebung U(x) ⊆ X und oﬀene Teilmengen Vj ⊆ X mit p−1(U) = ˙� Vj und p|Vj : Vj → U
+Beweis: Sei p : Y →X eine Überlagerung und x ∈X beliebig. Dann existiert eine oﬀene
+Umgebung U(x) ⊆X und oﬀene Teilmengen Vj ⊆X mit p−1(U) = ˙S Vj und p|Vj : Vj →U
 ist Homöomorphismus.
-D. h. es existiert ein y ∈ Vj, so dass p|Vj(y) = x. Da x ∈ X beliebig war und ein y ∈ Y
+D. h. es existiert ein y ∈Vj, so dass p|Vj(y) = x. Da x ∈X beliebig war und ein y ∈Y
 existiert, mit p(y) = x, ist p surjektiv.
 ■
 
@@ -2568,40 +2565,40 @@ z
 z2
 ϕϕ
 z2
-Abbildung 3.12: t �→ (cos 4πt, sin 4πt)
+Abbildung 3.12: t 7→(cos 4πt, sin 4πt)
 Deﬁnition 49
-Seien (X, TX), (Y, TY ) topologische Räume und f : X → Y eine Abbildung.
-f heißt oﬀen :⇔ ∀U ∈ TX : f(U) ∈ TY .
+Seien (X, TX), (Y, TY ) topologische Räume und f : X →Y eine Abbildung.
+f heißt oﬀen :⇔∀U ∈TX : f(U) ∈TY .
 Beispiel 37 (Oﬀene und stetige Abbildungen)
-Sei X ein topologischer Raum und seien fi : R → R mit i ∈ { 1, 2, 3 } und g : R → S1 =
-{ z ∈ C | ∥z∥ = 1 } Abbildungen.
+Sei X ein topologischer Raum und seien fi : R →R mit i ∈{ 1, 2, 3 } und g : R →S1 =
+{ z ∈C | ∥z∥= 1 } Abbildungen.
 1) f1 := idR ist eine oﬀene und stetige Abbildung.
 2) g(x) := e2πix ist eine oﬀene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
 3) f2(x) := 42 ist eine stetige, aber keine oﬀene Abbildung.
 4) f3(x) :=
-�
+(
 0
-falls x ∈ Q
+falls x ∈Q
 42
-falls x ∈ R \ Q
+falls x ∈R \ Q
 ist weder stetig noch oﬀen.
 Bemerkung 51
 Überlagerungen sind oﬀene Abbildungen.
-Beweis: Sei y ∈ V und x ∈ p(V ), sodass x = p(y) gilt. Sei weiter U = Ux eine oﬀene Umgebung
+Beweis: Sei y ∈V und x ∈p(V ), sodass x = p(y) gilt. Sei weiter U = Ux eine oﬀene Umgebung
 von x wie in Deﬁnition 48 und Vj die Komponente von p−1(U), die y enthält.
-Dann ist V ∩ Vj oﬀene Umgebung von y.
-⇒ p(V ∩ Vj) ist oﬀen in p(Vj), also auch oﬀen in X. Außerdem ist p(y) = x ∈ p(V ∩ Vj) und
-p(V ∩ Vj) ⊆ p(V ).
-⇒ p(V ) ist oﬀen.
+Dann ist V ∩Vj oﬀene Umgebung von y.
+⇒p(V ∩Vj) ist oﬀen in p(Vj), also auch oﬀen in X. Außerdem ist p(y) = x ∈p(V ∩Vj) und
+p(V ∩Vj) ⊆p(V ).
+⇒p(V ) ist oﬀen.
 Deﬁnition 50
-Sei X ein topologischer Raum und M ⊆ X.
+Sei X ein topologischer Raum und M ⊆X.
 M heißt diskret in X, wenn M in X keinen Häufungspunkt hat.
 Bemerkung 52
-Sei p : Y → X Überlagerung, x ∈ X.
-a) X hausdorﬀsch ⇒ Y hausdorﬀsch
-b) p−1(x) ist diskret in Y für jedes x ∈ X.
+Sei p : Y →X Überlagerung, x ∈X.
+a) X hausdorﬀsch ⇒Y hausdorﬀsch
+b) p−1(x) ist diskret in Y für jedes x ∈X.
 Beweis:
-a) Seien y1, y2 ∈ Y .
+a) Seien y1, y2 ∈Y .
 1. Fall: p(y1) = p(y2) = x.
 
 55
@@ -2609,31 +2606,31 @@ a) Seien y1, y2 ∈ Y .
 Sei U Umgebung von x wie in Deﬁnition 48, Vj1 bzw. Vj2 die Komponente von p−1(U),
 die y1 bzw. y2 enthält.
 Dann ist Vj1 ̸= Vj2, weil beide ein Element aus p−1(x) enthalten.
-⇒ Vj1 ∩ Vj2 = ∅ nach Voraussetzung.
+⇒Vj1 ∩Vj2 = ∅nach Voraussetzung.
 2. Fall: p(y1) ̸= p(y2).
 Dann seien U1 und U2 disjunkte Umgebungen von p(y1) und p(y2).
-⇒ p−1(U1) und p−1(U2) sind disjunkte Umgebungen von y1 und y2.
-b) Sei x ∈ X beliebig, aber fest.
-Zu zeigen: ∀yi ∈ p−1(x) : ∃Vi ∈ TY mit yi ∈ Vi, sodass gilt:i ̸= j ⇒ Vi ∩ Vj = ∅.
+⇒p−1(U1) und p−1(U2) sind disjunkte Umgebungen von y1 und y2.
+b) Sei x ∈X beliebig, aber fest.
+Zu zeigen: ∀yi ∈p−1(x) : ∃Vi ∈TY mit yi ∈Vi, sodass gilt:i ̸= j ⇒Vi ∩Vj = ∅.
 Die Vi existieren wegen der Deﬁnition einer Überlagerung: p heißt Überlagerung
-:⇔ ∀x ∈ X∃U = U(x) ∈ TX : p−1(U) = ˙�
+:⇔∀x ∈X∃U = U(x) ∈TX : p−1(U) = ˙S
 Vi∈TY Vi und p|Vi ist Homöomorphismus.
-⇒ (p|Vi)−1(x) = { yi }
-⇒ Alle yi liegen diskret in Y , da Häufungspunkte unendlich viele Elemente in jeder
+⇒(p|Vi)−1(x) = { yi }
+⇒Alle yi liegen diskret in Y , da Häufungspunkte unendlich viele Elemente in jeder
 Umgebung benötigen.
 ■
 Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
-Sei p : Y → X Überlagerung. Dann gilt:
-∀x1, x2 ∈ X : |p−1(x1)| = |p−1(x2)|
-Hinweis: |p−1(x1)| = ∞ ist erlaubt!
-Beweis: Sei U Umgebung von x1 wie in Deﬁnition 48, x ∈ U. Dann enthält jedes Vj mit j ∈ I
+Sei p : Y →X Überlagerung. Dann gilt:
+∀x1, x2 ∈X : |p−1(x1)| = |p−1(x2)|
+Hinweis: |p−1(x1)| = ∞ist erlaubt!
+Beweis: Sei U Umgebung von x1 wie in Deﬁnition 48, x ∈U. Dann enthält jedes Vj mit j ∈I
 genau ein Element von p−1(x).
-⇒ |p−1(x)| ist konstant für x ∈ U
+⇒|p−1(x)| ist konstant für x ∈U
 X zhgd.
-====⇒ |p−1(x)| ist konstant für x ∈ X.
+====⇒|p−1(x)| ist konstant für x ∈X.
 Deﬁnition 51
-Es seien X, Y, Z topologische Räume, p : Y → X eine Überlagerung und f : Z → X stetig.
-Eine stetige Abbildung ˜f : Z → Y heißt Liftung von f, wenn p ◦ ˜f = f ist.
+Es seien X, Y, Z topologische Räume, p : Y →X eine Überlagerung und f : Z →X stetig.
+Eine stetige Abbildung ˜f : Z →Y heißt Liftung von f, wenn p ◦˜f = f ist.
 Y
 X
 Z
@@ -2641,9 +2638,9 @@ p
 ˜f
 f
 Bemerkung 54 (Eindeutigkeit der Liftung)
-Sei Z zusammenhängend und f0, f1 : Z → Y Liftungen von f.
-∃z0 ∈ Z : f0(z0) = f1(z0) ⇒ f0 = f1
-Beweis: Sei T = { z ∈ Z | f0(z) = f1(z) }.
+Sei Z zusammenhängend und f0, f1 : Z →Y Liftungen von f.
+∃z0 ∈Z : f0(z0) = f1(z0) ⇒f0 = f1
+Beweis: Sei T = { z ∈Z | f0(z) = f1(z) }.
 Z. z.: T ist oﬀen und Z \ T ist auch oﬀen.
 
 56
@@ -2664,142 +2661,142 @@ Z. z.: T ist oﬀen und Z \ T ist auch oﬀen.
 6
 T
 Liften
-−−−→ R2/Z2
+−−−→R2/Z2
 Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht ge-
 schlossen
-Sei z ∈ T, x = f(z), U Umgebung von x wie in Deﬁnition 48, V die Komponente von p−1(U),
+Sei z ∈T, x = f(z), U Umgebung von x wie in Deﬁnition 48, V die Komponente von p−1(U),
 die y := f0(z) = f1(z) enthält.
-Sei q : U → V die Umkehrabbildung zu p|V .
-Sei W := f−1(U) ∩ f−1
-0 (V ) ∩ f−1
+Sei q : U →V die Umkehrabbildung zu p|V .
+Sei W := f−1(U) ∩f−1
+0 (V ) ∩f−1
 1 (V ). W ist oﬀene Umgebung in Z von z.
-Behauptung: W ⊆ T
-Denn für w ∈ W ist q(f(w)) = q((p ◦ f0))(w) = ((q ◦ p) ◦ f0)(w) = f0(w) = q(f(w)) = f1(w)
-⇒ T ist oﬀen.
+Behauptung: W ⊆T
+Denn für w ∈W ist q(f(w)) = q((p ◦f0))(w) = ((q ◦p) ◦f0)(w) = f0(w) = q(f(w)) = f1(w)
+⇒T ist oﬀen.
 Analog: Z \ T ist oﬀen.
 Satz 3.2
-Sei p : Y → X Überlagerung, γ : I → X ein Weg, y ∈ Y mit p(y) = γ(0) =: x.
-Dann gibt es genau einen Weg ˜γ : I → Y mit ˜γ(0) = y und p ◦ ˜γ = γ.
-p : Y → X Überlagerung, X, Y wegzusammenhängend. p stetig und surjektiv, zu x ∈ X∃
-Umgebung U, so dass p−1(U) = � Vj
-p|Vj : Vj → U Homöomorphismus.
+Sei p : Y →X Überlagerung, γ : I →X ein Weg, y ∈Y mit p(y) = γ(0) =: x.
+Dann gibt es genau einen Weg ˜γ : I →Y mit ˜γ(0) = y und p ◦˜γ = γ.
+p : Y →X Überlagerung, X, Y wegzusammenhängend. p stetig und surjektiv, zu x ∈X∃
+Umgebung U, so dass p−1(U) = S Vj
+p|Vj : Vj →U Homöomorphismus.
 Bemerkung 55
 Wege in X lassen sich zu Wegen in Y liften.
-Zu jedem y ∈ p−1(γ(0)) gibt es genau einen Lift von γ.
+Zu jedem y ∈p−1(γ(0)) gibt es genau einen Lift von γ.
 
 57
 3.3. ÜBERLAGERUNGEN
 Proposition 3.3
-Seien p : Y → X eine Überlagerung, a, b ∈ X, γ0, γ1 : I → X homotope Wege von a
-nach b, ˜a ∈ p−1(a), ˜γ0, ˜γ1 Liftungen von γ0 bzw. γ1 mit ˜γi(0) = ˜a.
-Dann ist ˜γ0(1) = ˜γ1(1) und ˜γ0 ∼ ˜γ1.
-Beweis: Sei H : I × I → X Homotopie zwischen γ1 und γ2.
-Für s ∈ I sei γs : I → X, t �→ H(t, s).
+Seien p : Y →X eine Überlagerung, a, b ∈X, γ0, γ1 : I →X homotope Wege von a
+nach b, ˜a ∈p−1(a), ˜γ0, ˜γ1 Liftungen von γ0 bzw. γ1 mit ˜γi(0) = ˜a.
+Dann ist ˜γ0(1) = ˜γ1(1) und ˜γ0 ∼˜γ1.
+Beweis: Sei H : I × I →X Homotopie zwischen γ1 und γ2.
+Für s ∈I sei γs : I →X, t 7→H(t, s).
 Sei ˜γs Lift von γs mit ˜γs(0) = ˜a
-Sei ˜H : I × I → Y,
+Sei ˜H : I × I →Y,
 ˜H(t, s) := ( ˜γs(t), s)
 Dann gilt:
 (i) ˜H ist stetig (Beweis wie für Bemerkung 54)
 (ii) ˜H(t, 0) = ˜γ0(t),
 ˜H(t, 1) = ˜γ1(t)
 (iii) ˜H(0, s) = ˜γs(0) = ˜a
-(iv) ˜H(1, s) ∈ p−1(b)
+(iv) ˜H(1, s) ∈p−1(b)
 Da p−1(b) diskrete Teilmenge von Y ist
-⇒ ˜bs = ˜H(1, s) = ˜H(1, 0) ∀s ∈ I
-⇒ ˜b0 = ˜b1 und ˜H ist Homotopie zwischen ˜γ0 und ˜γ1.
+⇒˜bs = ˜H(1, s) = ˜H(1, 0) ∀s ∈I
+⇒˜b0 = ˜b1 und ˜H ist Homotopie zwischen ˜γ0 und ˜γ1.
 ■
 Folgerung 3.4
-Sei p : Y → X eine Überlagerung, x0 ∈ X, y0 ∈ p−1(x0)
-a) p∗ : π1(Y, y0) → π1(X, x0) ist injektiv
+Sei p : Y →X eine Überlagerung, x0 ∈X, y0 ∈p−1(x0)
+a) p∗: π1(Y, y0) →π1(X, x0) ist injektiv
 b) [π1(X, x0) : p∗(π1(Y, y0))] = deg(p)
 Beweis:
-a) Sei ˜γ ein Weg in Y um y0 und p∗([˜γ]) = e, also p ◦ ˜γ ∼ γx0
+a) Sei ˜γ ein Weg in Y um y0 und p∗([˜γ]) = e, also p ◦˜γ ∼γx0
 Nach Proposition 3.3 ist dann ˜γ homotop zum Lift des konstanten Wegs γx0 mit
-Anfangspunkt y0, also zu γy0 ⇒ [˜γ] = e
+Anfangspunkt y0, also zu γy0 ⇒[˜γ] = e
 b) Sei d = deg p und p−1(x0) = { y0, y1, . . . , yd−1 }. Für einen geschlossenen Weg γ in X
 um x0 sei ˜γ die Liftung mit ˜γ(0) = y0.
-˜γ(1) ∈ { y0, . . . , yd−1 } hängt nur von [γ] ∈ π1(X, x0) ab.
+˜γ(1) ∈{ y0, . . . , yd−1 } hängt nur von [γ] ∈π1(X, x0) ab.
 Für geschlossene Wege γ0, γ1 um x gilt:
 ˜γ0(1) = ˜γ1(1)
-⇔[ ˜γ0 ∗ ˜γ1−1] ∈ π1(Y, y0)
-⇔[γ0 ∗ γ−1
-1 ] ∈ p∗(π1(Y, y0))
+⇔[ ˜γ0 ∗˜γ1−1] ∈π1(Y, y0)
+⇔[γ0 ∗γ−1
+1 ] ∈p∗(π1(Y, y0))
 ⇔[γ0] und [γ1]liegen in der selben Nebenklasse bzgl. p∗(π1(Y, y0))
 
 58
 3.3. ÜBERLAGERUNGEN
-Zu i ∈ { 0, . . . , d − 1 } gibt es Weg δi in Y mit δi(0) = y0 und δi(1) = yi
-⇒ p ∪ δi ist geschlossener Weg in X um x0.
-⇒ Jedes yi mit i = 0, . . . , d − 1 ist ˜γ(1) für ein [γ] ∈ π1(X, x0).
+Zu i ∈{ 0, . . . , d −1 } gibt es Weg δi in Y mit δi(0) = y0 und δi(1) = yi
+⇒p ∪δi ist geschlossener Weg in X um x0.
+⇒Jedes yi mit i = 0, . . . , d −1 ist ˜γ(1) für ein [γ] ∈π1(X, x0).
 Bemerkung 56
-Sei p : Y → X Überlagerung und X einfach zusammenhängend.
+Sei p : Y →X Überlagerung und X einfach zusammenhängend.
 Dann ist p ein Homöomorphismus.
 Beweis: Wegen Bemerkung 55.a ist auch Y einfach zusammenhängend und wegen Bemer-
 kung 55.b ist deg(p) = 1, p ist also bijektiv.
-Nach Bemerkung 51 ist p oﬀen ⇒ p−1 ist stetig. ⇒ p ist Homöomorphismus.
+Nach Bemerkung 51 ist p oﬀen ⇒p−1 ist stetig. ⇒p ist Homöomorphismus.
 ■
 Deﬁnition 52
-Eine Überlagerung p : ˜X → X heißt universell, wenn ˜X einfach zusammenhängend ist.
+Eine Überlagerung p : ˜X →X heißt universell, wenn ˜X einfach zusammenhängend ist.
 Beispiel 38 (Universelle Überlagerungen)
-R → S1,
-t �→ (cos 2πt, sin 2πt)
-R2 → T 2 = R2/Z2
-Sn → Pn(R) für n ≥ 2
+R →S1,
+t 7→(cos 2πt, sin 2πt)
+R2 →T 2 = R2/Z2
+Sn →Pn(R) für n ≥2
 Satz 3.5
-Sei p : ˜X → X eine universelle Überlagerung, q : Y → X weitere Überlagerung.
-Sei x0 ∈ X, ˜x0 ∈ ˜X, y0 ∈ Y mit q(y0) = x0 = p( ˜x0).
-Dann gibt es genau eine Überlagerung ˜p : ˜X → Y mit ˜p( ˜x0) = y0.
-Beweis: Sei z ∈ ˜X, γz : I → ˜X ein Weg von ˜x0 nach z.
-Sei δz die eindeutige Liftung von p ◦ γz nach Y mit δz(0) = y0.
+Sei p : ˜X →X eine universelle Überlagerung, q : Y →X weitere Überlagerung.
+Sei x0 ∈X, ˜x0 ∈˜X, y0 ∈Y mit q(y0) = x0 = p( ˜x0).
+Dann gibt es genau eine Überlagerung ˜p : ˜X →Y mit ˜p( ˜x0) = y0.
+Beweis: Sei z ∈˜X, γz : I →˜X ein Weg von ˜x0 nach z.
+Sei δz die eindeutige Liftung von p ◦γz nach Y mit δz(0) = y0.
 Setze ˜p(z) = δz(1).
 Da ˜X einfach zusammenhängend ist, hängt ˜p(z) nicht vom gewählten Weg γz ab.
 Oﬀensichtlich ist q(˜p(z)) = p(z).
-Zu zeigen: ˜p ist stetig in z ∈ ˜X:
-Sei W ⊆ Y oﬀene Umgebung von ˜p(z).
+Zu zeigen: ˜p ist stetig in z ∈˜X:
+Sei W ⊆Y oﬀene Umgebung von ˜p(z).
 q oﬀen
-====⇒ q(W) ist oﬀene Umgebung von p(z) · d(˜p(z)).
-Sei U ⊆ q(W) oﬀen wie in Deﬁnition 48 und V ⊆ q−1(U) die Komponente, die ˜p(z) enthält.
-O. B. d. A. sei V ⊆ W.
-Sei Z := p−1(U). Für u ∈ Z sei δ ein Weg in Z von z nach u.
-⇒ γz ∗ δ ist Weg von x0 nach u
-⇒ ˜p(u) ∈ V
-⇒ Z ⊆ ˜
+====⇒q(W) ist oﬀene Umgebung von p(z) · d(˜p(z)).
+Sei U ⊆q(W) oﬀen wie in Deﬁnition 48 und V ⊆q−1(U) die Komponente, die ˜p(z) enthält.
+O. B. d. A. sei V ⊆W.
+Sei Z := p−1(U). Für u ∈Z sei δ ein Weg in Z von z nach u.
+⇒γz ∗δ ist Weg von x0 nach u
+⇒˜p(u) ∈V
+⇒Z ⊆˜
 p−1(W)
-⇒ ˜p ist stetig
+⇒˜p ist stetig
 
 59
 3.3. ÜBERLAGERUNGEN
 Folgerung 3.6
-Sind p : ˜X → X und q : ˜Y → X universelle Überlagerungen, so sind ˜X und ˜Y homöomorph.
-Beweis: Seien x0 ∈ X, ˜x0 ∈ ˜X mit p( ˜x0) = x0 und ˜y0 ∈ q−1(x0) ⊆ ˜Y .
+Sind p : ˜X →X und q : ˜Y →X universelle Überlagerungen, so sind ˜X und ˜Y homöomorph.
+Beweis: Seien x0 ∈X, ˜x0 ∈˜X mit p( ˜x0) = x0 und ˜y0 ∈q−1(x0) ⊆˜Y .
 Nach Satz 3.5 gibt es genau eine Überlagerung
-f : ˜X → ˜Y mit f(x0) = ˜y0 und q ◦ f = p
+f : ˜X →˜Y mit f(x0) = ˜y0 und q ◦f = p
 und genau eine Überlagerung
-g : ˜Y → ˜X mit g( ˜y0) = ˜x0 und p ◦ g = q
-Damit gilt: p ◦ q ◦ f = q ◦ f = p, q ◦ f ◦ g = p ◦ g = q. Also ist g ◦ f : ˜X → ˜X Lift von
-p : ˜X → X mit (g ◦ f)( ˜x0) = ˜x0.
-Da auch id˜x diese Eigenschaft hat, folgt mit Bemerkung 53: g ◦ f = id ˜
+g : ˜Y →˜X mit g( ˜y0) = ˜x0 und p ◦g = q
+Damit gilt: p ◦q ◦f = q ◦f = p, q ◦f ◦g = p ◦g = q. Also ist g ◦f : ˜X →˜X Lift von
+p : ˜X →X mit (g ◦f)( ˜x0) = ˜x0.
+Da auch id˜x diese Eigenschaft hat, folgt mit Bemerkung 53: g ◦f = id ˜
 X.
-Analog gilt f ◦ g = id ˜Y .
+Analog gilt f ◦g = id ˜Y .
 ■
 Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
 Deﬁnition 53
-Sei (X, T) ein topologischer Raum und x ∈ X.
-U ⊆ T heißt eine Umgebungsbasis von x, wenn jede oﬀene Umgebung von x eine Teilmenge
+Sei (X, T) ein topologischer Raum und x ∈X.
+U ⊆T heißt eine Umgebungsbasis von x, wenn jede oﬀene Umgebung von x eine Teilmenge
 von U enthält.
 Satz 3.7
 Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
 Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
 Dann gibt es eine universelle Überlagerung.
-Beweis: Sei x0 ∈ X und ˜X := { (x, [γ]) | x ∈ X, γ Weg von xo nach x } und p : ˜X → X, (x, [γ]) �→
+Beweis: Sei x0 ∈X und ˜X := { (x, [γ]) | x ∈X, γ Weg von xo nach x } und p : ˜X →X, (x, [γ]) 7→
 x.
 Die Topologie auf ˜X ist folgende: Deﬁniere eine Umgebungsbasis von (x, [γ]) wie folgt: Es
 sei U eine einfach zusammenhängende Umgebung von x und
-˜U = ˜U(x, [γ]) := { (y, [γ ∗ α]) | y ∈ U, α Weg in U von x nach y }
-p ist Überlagerung: p| ˜U : ˜U → U bijektiv. p ist stetig und damit p| ˜U ein Homöomorphismus.
-Sind γ1, γ2 Wege von x0 nach x und γ1 ∼ γ2, so ist ˜U(x, [γ1]) ∩ ˜U(x, [γ2]) = ∅, denn: Ist
-γ1 ∗ α ∼ γ2 ∗ α, so ist auch γ1 ∼ γ2. Also ist p eine Überlagerung.
-˜X ist einfach zusammenhängend: Es sei ˜x0 := (x0, e) und ˜γ : I → ˜X ein geschlossener Weg
+˜U = ˜U(x, [γ]) := { (y, [γ ∗α]) | y ∈U, α Weg in U von x nach y }
+p ist Überlagerung: p| ˜U : ˜U →U bijektiv. p ist stetig und damit p| ˜U ein Homöomorphismus.
+Sind γ1, γ2 Wege von x0 nach x und γ1 ∼γ2, so ist ˜U(x, [γ1]) ∩˜U(x, [γ2]) = ∅, denn: Ist
+γ1 ∗α ∼γ2 ∗α, so ist auch γ1 ∼γ2. Also ist p eine Überlagerung.
+˜X ist einfach zusammenhängend: Es sei ˜x0 := (x0, e) und ˜γ : I →˜X ein geschlossener Weg
 um ˜x0.
 Sei γ := p(˜γ).
 Annahme: [˜γ] ̸= e
@@ -2810,85 +2807,85 @@ spruch.
 60
 3.3. ÜBERLAGERUNGEN
 Deﬁnition 54
-Es sei p : Y → X eine Überlagerung und f : Y → Y ein Homöomorphismus.
-a) f heißt Decktransformation von p :⇔ p ◦ f = p.
-b) Die Decktransformationen von p : Y → X bilden mit der Verkettung eine Gruppe,
+Es sei p : Y →X eine Überlagerung und f : Y →Y ein Homöomorphismus.
+a) f heißt Decktransformation von p :⇔p ◦f = p.
+b) Die Decktransformationen von p : Y →X bilden mit der Verkettung eine Gruppe,
 die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y/X) oder
-Deck(Y → X).
+Deck(Y →X).
 c) p heißt regulär, wenn | Deck(Y/X)| = deg p gilt.
 Bemerkung 57 (Eigenschaften der Decktransformation)
 a) (Deck Y/X, ◦) ist eine Gruppe
-b) Ist f ∈ Deck(Y/X) und f ̸= id, dann hat f keinen Fixpunkt.
-c) | Deck(Y/X)| ≤ deg p
-d) Ist f eine reguläre Überlagerung, dann gilt: ∀x ∈ X : Deck(Y/X) operiert transitiv
+b) Ist f ∈Deck(Y/X) und f ̸= id, dann hat f keinen Fixpunkt.
+c) | Deck(Y/X)| ≤deg p
+d) Ist f eine reguläre Überlagerung, dann gilt: ∀x ∈X : Deck(Y/X) operiert transitiv
 auf der Menge der Urbilder f−1(x).
 Beweis:
 a) Es gilt:
-• idY ∈ Deck Y/X,
-• f, g ∈ Deck Y/X ⇒ p ◦ (f ◦ g) = (p ◦ f) ◦ g = p ◦ g ⇒ f ◦ g ∈ Deck Y/X
-• f ∈ Deck Y/X ⇒ p ◦ f = p ⇒ p ◦ f−1 = (p ◦ f) ◦ f−1 = p ◦ (f ◦ f−1) = p ⇒
-f−1 ∈ Deck Y/X
+• idY ∈Deck Y/X,
+• f, g ∈Deck Y/X ⇒p ◦(f ◦g) = (p ◦f) ◦g = p ◦g ⇒f ◦g ∈Deck Y/X
+• f ∈Deck Y/X ⇒p ◦f = p ⇒p ◦f−1 = (p ◦f) ◦f−1 = p ◦(f ◦f−1) = p ⇒
+f−1 ∈Deck Y/X
 b) Die Menge
-Fix(f) = { y ∈ Y | f(y) = y }
-ist abgeschlossen als Urbild der Diagonale ∆ ⊆ Y × Y unter der stetigen Abbildung
-y �→ (f(y), y). Außerdem ist Fix(f) oﬀen, denn ist y ∈ Fix(f), so sei U eine Umgebung
-von p(y) ∈ X wie in Deﬁnition 48 und U ⊆ p−1(U) die Komponente, die y enthält;
-also p : V → U ein Homöomorphismus. Dann ist W := f−1(V ) ∩ V oﬀene Umgebung
+Fix(f) = { y ∈Y | f(y) = y }
+ist abgeschlossen als Urbild der Diagonale ∆⊆Y × Y unter der stetigen Abbildung
+y 7→(f(y), y). Außerdem ist Fix(f) oﬀen, denn ist y ∈Fix(f), so sei U eine Umgebung
+von p(y) ∈X wie in Deﬁnition 48 und U ⊆p−1(U) die Komponente, die y enthält;
+also p : V →U ein Homöomorphismus. Dann ist W := f−1(V ) ∩V oﬀene Umgebung
 von y.
-Für z ∈ W ist f(z) ∈ V und p(f(z)) = p(z). Da p injektiv auf V ist, folgt f(z) = z,
+Für z ∈W ist f(z) ∈V und p(f(z)) = p(z). Da p injektiv auf V ist, folgt f(z) = z,
 d. h. Fix(f) ̸= ∅.
-Da Y zusammenhängend ist, folgt aus Fix( ˜f) ̸= ∅ schon Fix(f) = Y , also f = idY .
-c) Es sei x0 ∈ X, deg(p) = d und p−1(x0) = { y0, . . . , yd−1 }. Für f ∈ Deck(Y/X) ist
+Da Y zusammenhängend ist, folgt aus Fix( ˜f) ̸= ∅schon Fix(f) = Y , also f = idY .
+c) Es sei x0 ∈X, deg(p) = d und p−1(x0) = { y0, . . . , yd−1 }. Für f ∈Deck(Y/X) ist
 f(y0) = { y0, . . . , yd−1 }.
-Zu i ∈ { 0, . . . , d − 1 } gibt es höchstens ein f ∈ Deck(Y/X) mit f(y0) = y1, denn ist
-f(y0) = g(y0), so ist (g−1 ◦ f)(y0) = y0, also nach Bemerkung 57.c g−1 ◦ f = idY .
+Zu i ∈{ 0, . . . , d −1 } gibt es höchstens ein f ∈Deck(Y/X) mit f(y0) = y1, denn ist
+f(y0) = g(y0), so ist (g−1 ◦f)(y0) = y0, also nach Bemerkung 57.c g−1 ◦f = idY .
 d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
 Beispiel 39 (Decktransformationen)
-1) p : R → S1 : Deck(R/S1) = { t �→ t + n | n ∈ Z } ∼= Z
-2) p : R2 → T 2 : Deck(R2/T 2) ∼= Z × Z = Z2
-3) p : Sn → Pn(R) : Deck(Sn/Pn(R)) = { x �→ ±x } ∼= Z/2Z
+1) p : R →S1 : Deck(R/S1) = { t 7→t + n | n ∈Z } ∼= Z
+2) p : R2 →T 2 : Deck(R2/T 2) ∼= Z × Z = Z2
+3) p : Sn →Pn(R) : Deck(Sn/Pn(R)) = { x 7→±x } ∼= Z/2Z
 
 61
 3.3. ÜBERLAGERUNGEN
 Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamen-
 talgruppe herstellen:
 Satz 3.8
-Ist p : ˜X → X eine universelle Überlagerung, so gilt:
+Ist p : ˜X →X eine universelle Überlagerung, so gilt:
 Deck( ˜X/X) ∼= π1(X, x0)
-∀x0 ∈ X
-Beweis: Wähle ˜x0 ∈ p−1(x0). Es sei ρ : Deck(˜x/x) → π1(X, x0) die Abbildung, die f auf [p(γf)]
+∀x0 ∈X
+Beweis: Wähle ˜x0 ∈p−1(x0). Es sei ρ : Deck(˜x/x) →π1(X, x0) die Abbildung, die f auf [p(γf)]
 abbildet, wobei γf ein Weg von ˜x0 nach f( ˜x0) sei. Da ˜x einfach zusammenhängend ist, ist
 γf bis auf Homotopie eindeutig bestimmt und damit auch ρ wohldeﬁniert.
-• ρ ist Gruppenhomomorphismus: Seien f, g ∈ Deck( ˜X/X) ⇒ γg◦f = γg ∗ g(γf) ⇒
-p(γg◦f) = p(γg) ∗ (p ◦ g)
-� �� �
+• ρ ist Gruppenhomomorphismus: Seien f, g ∈Deck( ˜X/X) ⇒γg◦f = γg ∗g(γf) ⇒
+p(γg◦f) = p(γg) ∗(p ◦g)
+| {z }
 =p
 (γf) = ρ(g) ̸= ρ(f)
-• ρ ist injektiv: ρ(f) = e ⇒ p(γf) ∼ γx0
+• ρ ist injektiv: ρ(f) = e ⇒p(γf) ∼γx0
 Satz 3.2
-====⇒ γf ∼ γ ˜
-x0 ⇒ f(x0) = ˜x0
+====⇒γf ∼γ ˜
+x0 ⇒f(x0) = ˜x0
 Bem. 57.c
-======⇒ f =
+======⇒f =
 id˜x.
-• ρ ist surjektiv: Sei [γ] ∈ π1(X, x0), ˜γ Lift von γ nach ˜x mit Anfangspunkt ˜x0. Der
+• ρ ist surjektiv: Sei [γ] ∈π1(X, x0), ˜γ Lift von γ nach ˜x mit Anfangspunkt ˜x0. Der
 Endpunkt von ˜γ sei ˜x1.
-p ist reguläre Überlagerung: Seien ˜x0, ˜x1 ∈ ˜X mit p( ˜x0) = p( ˜x1). Nach Satz 3.5 gibt
-es genau eine Überlagerung ˜p : ˜X → X mit p = p ◦ ˜p und ˜p( ˜x0) = ˜x1. Somit ist ˜p eine
+p ist reguläre Überlagerung: Seien ˜x0, ˜x1 ∈˜X mit p( ˜x0) = p( ˜x1). Nach Satz 3.5 gibt
+es genau eine Überlagerung ˜p : ˜X →X mit p = p ◦˜p und ˜p( ˜x0) = ˜x1. Somit ist ˜p eine
 Decktransformation und damit p eine reguläre Überlagerung.
-Da p reguläre Überlagerung ist, gibt es ein f ∈ Deck( ˜X/X) mit f( ˜x0) = ˜x1.
+Da p reguläre Überlagerung ist, gibt es ein f ∈Deck( ˜X/X) mit f( ˜x0) = ˜x1.
 Aus der Deﬁnition von ρ folgt: ρ(f) = p(γf) = γ
 ■
 Beispiel 40 (Bestimmung von π1(S1))
-p : R → S1, t �→ (cos 2πt, sin 2πt) ist universelle Überlagerung, da R zusammenhängend ist.
-Für n ∈ Z sei fn : R → R, t �→ t + n die Translation um n.
-Es gilt: (p ◦ fn)(t) = p(fn(t)) = p(t)
-∀t ∈ R, d. h. fn ist Decktransformation.
+p : R →S1, t 7→(cos 2πt, sin 2πt) ist universelle Überlagerung, da R zusammenhängend ist.
+Für n ∈Z sei fn : R →R, t 7→t + n die Translation um n.
+Es gilt: (p ◦fn)(t) = p(fn(t)) = p(t)
+∀t ∈R, d. h. fn ist Decktransformation.
 Ist umgekehrt g irgendeine Decktransformation, so gilt insbesondere für t = 0:
-(cos(2πg(0)), sin(2πg(0))) = (p ◦ g)(0) = p(0) = (1, 0)
-Es existiert n ∈ Z mit g(0) = n. Da auch fn(0) = 0 + n = n gilt, folgt mit Bemerkung 57.c
+(cos(2πg(0)), sin(2πg(0))) = (p ◦g)(0) = p(0) = (1, 0)
+Es existiert n ∈Z mit g(0) = n. Da auch fn(0) = 0 + n = n gilt, folgt mit Bemerkung 57.c
 g = fn. Damit folgt:
-Deck(R/S1) = { fn | n ∈ Z } ∼= Z
+Deck(R/S1) = { fn | n ∈Z } ∼= Z
 Nach Satz 3.8 also π1(S1) ∼= Deck(R/S1) ∼= Z
 
 62
@@ -2896,40 +2893,40 @@ Nach Satz 3.8 also π1(S1) ∼= Deck(R/S1) ∼= Z
 3.4 Gruppenoperationen
 Deﬁnition 55
 Sei (G, ·) eine Gruppe und X eine Menge.
-Eine Gruppenoperation von G auf X ist eine Abbildung ◦ : G × X → X für die gilt:
-a) 1G ◦ x = x
-∀x ∈ X
-b) (g · h) ◦ x = g ◦ (h ◦ x)
-∀g, h ∈ G∀x ∈ X
+Eine Gruppenoperation von G auf X ist eine Abbildung ◦: G × X →X für die gilt:
+a) 1G ◦x = x
+∀x ∈X
+b) (g · h) ◦x = g ◦(h ◦x)
+∀g, h ∈G∀x ∈X
 Beispiel 41
-1) G = (Z, +), X = R, n ◦ x = x + n
-2) G operiert auf X = G durch g ◦ h := g · h
-3) G operiert auf X = G durch g ◦ h := g · h · g−1, denn
-i) 1G ◦ h = 1G · h · 1−1
+1) G = (Z, +), X = R, n ◦x = x + n
+2) G operiert auf X = G durch g ◦h := g · h
+3) G operiert auf X = G durch g ◦h := g · h · g−1, denn
+i) 1G ◦h = 1G · h · 1−1
 G = h
-ii) (g1 · g2) ◦ h = (g1 · g2) · h · (g · g2)−1
+ii) (g1 · g2) ◦h = (g1 · g2) · h · (g · g2)−1
 = g1 · (g2 · h · g−1
 2 ) · g−1
 1
-= g1 ◦ (g2 ◦ h)
+= g1 ◦(g2 ◦h)
 Deﬁnition 56
-Sei G eine Gruppe, X ein topologischer Raum und ◦ : G × X → X eine Gruppenoperation.
-a) G operiert durch Homöomorphismen, wenn für jedes g ∈ G die Abbildung
-mg : X → X, x �→ g ◦ x
+Sei G eine Gruppe, X ein topologischer Raum und ◦: G × X →X eine Gruppenoperation.
+a) G operiert durch Homöomorphismen, wenn für jedes g ∈G die Abbildung
+mg : X →X, x 7→g ◦x
 ein Homöomorphismus ist.
-b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation ◦ stetig, wenn
-∀g ∈ G : mg ist stetig
+b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation ◦stetig, wenn
+∀g ∈G : mg ist stetig
 gilt.
 Bemerkung 58
 Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
-Beweis: Nach Voraussetzung ist mg := ◦|{ g }×X : X → X, x �→ g ◦ x stetig.
+Beweis: Nach Voraussetzung ist mg := ◦|{ g }×X : X →X, x 7→g ◦x stetig.
 Die Umkehrabbildung zu mg ist mg−1:
-(mg−1 ◦ mg)(x) = mg−1(mg(x))
-= mg−1(g ◦ x)
-= g−1 ◦ (g ◦ x)
+(mg−1 ◦mg)(x) = mg−1(mg(x))
+= mg−1(g ◦x)
+= g−1 ◦(g ◦x)
 Def. 55.b
-= (g−1 · g) ◦ x
-= 1G ◦ x
+= (g−1 · g) ◦x
+= 1G ◦x
 Def. 55.a
 = x
 Beispiel 42
@@ -2940,74 +2937,74 @@ In Beispiel 41.1 operiert Z durch Homöomorphismen.
 Bemerkung 59
 Sei G eine Gruppe und X eine Menge.
 a) Die Gruppenoperation von G auf X entsprechen bijektiv den Gruppenhomomorphismen
-ϱ : G → Perm(X) = Sym(X) = { f : X → X | f ist bijektiv }
+ϱ : G →Perm(X) = Sym(X) = { f : X →X | f ist bijektiv }
 b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
-Homöomorphismus den Gruppenhomomorphismen G → Homöo(X)
+Homöomorphismus den Gruppenhomomorphismen G →Homöo(X)
 Beweis:
-Sei ◦ : G × X → X eine Gruppenoperation von G auf X. Dann sei ϱ : G → Perm(X)
+Sei ◦: G × X →X eine Gruppenoperation von G auf X. Dann sei ϱ : G →Perm(X)
 deﬁniert durch ϱ(g)(X) = g · x
-∀g ∈ G, x ∈ X, also ϱ(g) = mg.
-ϱ ist Homomorphismus: ϱ(g1 · g2) = mg1·g2 = mg1 ◦ mg2 = ϱ(g1) ◦ ϱ(g2), denn für x ∈ X :
-ϱ(g1 · g2)(x) = (g1 · g2) ◦ x = g1 ◦ (g2 ◦ x) = ϱ(g1)(ϱ(g2)(x)) = (ϱ(g1) ◦ ϱ(g2))(x)
-Umgekehrt: Sei ϱ : G → Perm(X) Gruppenhomomorphismus. Deﬁniere ◦ : G × X → X
-durch g ◦ x = ϱ(g)(x).
+∀g ∈G, x ∈X, also ϱ(g) = mg.
+ϱ ist Homomorphismus: ϱ(g1 · g2) = mg1·g2 = mg1 ◦mg2 = ϱ(g1) ◦ϱ(g2), denn für x ∈X :
+ϱ(g1 · g2)(x) = (g1 · g2) ◦x = g1 ◦(g2 ◦x) = ϱ(g1)(ϱ(g2)(x)) = (ϱ(g1) ◦ϱ(g2))(x)
+Umgekehrt: Sei ϱ : G →Perm(X) Gruppenhomomorphismus. Deﬁniere ◦: G × X →X
+durch g ◦x = ϱ(g)(x).
 z. z. Deﬁnition 55.b:
-g1 ◦ (g2 ◦ x) = ϱ(g1)(g2 ◦ x)
+g1 ◦(g2 ◦x) = ϱ(g1)(g2 ◦x)
 = ϱ(g1)(ϱ(g2)(x))
-= (ϱ(g1) ◦ ϱ(g2))(x)
+= (ϱ(g1) ◦ϱ(g2))(x)
 ϱ ist Hom.
 =
 ϱ(g1 · g2)(x)
-= (g1 · g2) ◦ x
+= (g1 · g2) ◦x
 z. z. Deﬁnition 55.a: 1G · x = ϱ(1G)(x) = idX(x) = x, weil ϱ ein Homomorphismus ist.
 Beispiel 43
-Sei X ein wegzusammenhängender topologischer Raum, p : ˜X → X eine universelle Überla-
-gerung, x0 ∈ X, ˜x0 ∈ ˜X mit p( ˜x0) = x0.
+Sei X ein wegzusammenhängender topologischer Raum, p : ˜X →X eine universelle Überla-
+gerung, x0 ∈X, ˜x0 ∈˜X mit p( ˜x0) = x0.
 Dann operiert π1(X, x0) auf ˜X durch Homöomorphismen wie folgt:
-Für [γ] ∈ π1(X, x0) und ˜x ∈ ˜X sei [γ] ◦ ˜x =
+Für [γ] ∈π1(X, x0) und ˜x ∈˜X sei [γ] ◦˜x =
 ˜
-γ ∗ ϱ(1) wobei ˜γ ein Weg von ˜x0 nach ˜x in ˜X
-sei, ϱ := p(˜δ) = p ◦ δ.
-Also: δ ist ein Weg in X von x0 nach x = p(˜x) und �
-γ ∗ δ die Liftung von γ ∗ δ mit
+γ ∗ϱ(1) wobei ˜γ ein Weg von ˜x0 nach ˜x in ˜X
+sei, ϱ := p(˜δ) = p ◦δ.
+Also: δ ist ein Weg in X von x0 nach x = p(˜x) und ]
+γ ∗δ die Liftung von γ ∗δ mit
 Anfangspunkt ˜x0.
 [γ] · ˜x hängt nicht von der Wahl von ˜γ ab; ist ˜γ′ ein anderer Weg von ˜x0 nach ˜x, so sind ˜δ
-und ˜δ′ homotop, also auch �
-γ ∗ δ und �
-γ ∗ δ′ homotop.
+und ˜δ′ homotop, also auch ]
+γ ∗δ und ]
+γ ∗δ′ homotop.
 Gruppenoperation, denn:
-i) [e] ◦ ˜x = �
-e ∗ δ = ˜x
+i) [e] ◦˜x = g
+e ∗δ = ˜x
 ii)
-�
-γ1 ∗ γ2 ∗ δ(1) = [γ1 ∗ γ2] ◦ ˜x = ([γ1] ∗ [γ2]) ◦ ˜x
-γ1 ∗ γ2 ∗ δ(1) = [γ1] ◦ (
+^
+γ1 ∗γ2 ∗δ(1) = [γ1 ∗γ2] ◦˜x = ([γ1] ∗[γ2]) ◦˜x
+γ1 ∗γ2 ∗δ(1) = [γ1] ◦(
 ˜
-γ2 ∗ δ)(1) = [γ1] ◦ ([γ2] ◦ ˜x)
+γ2 ∗δ)(1) = [γ1] ◦([γ2] ◦˜x)
 Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π1(X, x0)
-aus Beispiel 43 einen Gruppenhomomorphismus ϱ : π1(X, x0) → Homöo(X). Nach Satz 3.8 ist
+aus Beispiel 43 einen Gruppenhomomorphismus ϱ : π1(X, x0) →Homöo(X). Nach Satz 3.8 ist
 ϱ(π1(X, x0)) = Deck( ˜X/X)
 =
-�
-f : ˜X → ˜X Homöomorphismus
-��� p ◦ f = p
-�
+n
+f : ˜X →˜X Homöomorphismus
+ p ◦f = p
+o
 
 64
 3.4. GRUPPENOPERATIONEN
 Beispiel 44
-Sei X := S2 ⊆ R3 und τ die Drehung um die z-Achse um 180◦.
-g = ⟨τ⟩ = { id, τ } operiert auf S2 durch Homöomorphismen.
+Sei X := S2 ⊆R3 und τ die Drehung um die z-Achse um 180◦.
+g = ⟨τ⟩= { id, τ } operiert auf S2 durch Homöomorphismen.
 Frage: Was ist S2/G? Ist S2/G eine Mannigfaltigkeit?
 
 4 Euklidische und nichteuklidische
 Geometrie
 Deﬁnition 57
 Das Tripel (X, d, G) heißt genau dann eine Geometrie, wenn (X, d) ein metrischer Raum
-und ∅ ̸= G ⊆ P(X) gilt. Dann heißt G die Menge aller Geraden.
+und ∅̸= G ⊆P(X) gilt. Dann heißt G die Menge aller Geraden.
 4.1 Axiome für die euklidische Ebene
 Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
-nennt man Axiomensystem. Da der Begriﬀ des Axiomensystems so grundlegend ist, hat man
+nennt man Axiomensystem. Da der Begriﬀdes Axiomensystems so grundlegend ist, hat man
 auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
 sein, die Axiome sollen möglichst unabhängig sein und Vollständigkeit wäre auch toll. Mit
 Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
@@ -3036,24 +3033,24 @@ Eine euklidische Ebene ist eine Geometrie (X, d, G), die Axiome §1 - §5 erfül
 
 66
 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-(i) Zu P ̸= Q ∈ X gibt es genau ein g ∈ G mit { P, Q } ⊆ g.
-(ii) |g| ≥ 2
-∀g ∈ G
-(iii) X /∈ G
-§2) Abstandsaxiom: Zu P, Q, R ∈ X gibt es genau dann ein g ∈ G mit { P, Q, R } ⊆ g,
+(i) Zu P ̸= Q ∈X gibt es genau ein g ∈G mit { P, Q } ⊆g.
+(ii) |g| ≥2
+∀g ∈G
+(iii) X /∈G
+§2) Abstandsaxiom: Zu P, Q, R ∈X gibt es genau dann ein g ∈G mit { P, Q, R } ⊆g,
 wenn gilt:
 • d(P, R) = d(P, Q) + d(Q, R) oder
 • d(P, Q) = d(P, R) + d(R, Q) oder
 • d(Q, R) = d(Q, P) + d(P, R)
 Deﬁnition 59
-Sei (X, d, G) eine Geometrie und seien P, Q, R ∈ X.
-a) P, Q, R liegen kollinear, wenn es g ∈ G gibt mit { P, Q, R } ⊆ g.
+Sei (X, d, G) eine Geometrie und seien P, Q, R ∈X.
+a) P, Q, R liegen kollinear, wenn es g ∈G gibt mit { P, Q, R } ⊆g.
 b) Q liegt zwischen P und R, wenn d(P, R) = d(P, Q) + d(Q, R)
-c) Strecke PR := { Q ∈ X | Q liegt zwischen P und R }
+c) Strecke PR := { Q ∈X | Q liegt zwischen P und R }
 d) Halbgeraden:
-PR+ := {Q ∈ X|Q liegt zwischen P und R oder
+PR+ := {Q ∈X|Q liegt zwischen P und R oder
 R liegt zwischen P und Q}
-PR− := { Q ∈ X | P liegt zwischen Q und R }
+PR−:= { Q ∈X | P liegt zwischen Q und R }
 P
 R
 PR−
@@ -3061,64 +3058,64 @@ PR
 PR+
 Abbildung 4.1: Halbgeraden
 Bemerkung 60
-a) PR+ ∪ PR− = PR
-b) PR+ ∩ PR− = { P }
+a) PR+ ∪PR−= PR
+b) PR+ ∩PR−= { P }
 Beweis:
 a) „⊆“ folgt direkt aus der Deﬁnition von PR+ und PR−
-„⊇“: Sei Q ∈ PR ⇒ P, Q, R sind kollinear.
+„⊇“: Sei Q ∈PR ⇒P, Q, R sind kollinear.
 2⇒
 
 
 
 
 
-Q liegt zwischen P und R ⇒ Q ∈ PR
-R liegt zwischen P und Q ⇒ Q ∈ PR
-P liegt zwischen Q und R ⇒ Q ∈ PR
+Q liegt zwischen P und R ⇒Q ∈PR
+R liegt zwischen P und Q ⇒Q ∈PR
+P liegt zwischen Q und R ⇒Q ∈PR
 b) „⊇“ ist oﬀensichtlich
-„⊆“: Sei PR+ ∩ PR−. Dann ist d(Q, R) = d(P, Q) + d(P, R) weil Q ∈ PR− und
-� d(P, R) = d(P, Q) + d(Q, R) oder
+„⊆“: Sei PR+ ∩PR−. Dann ist d(Q, R) = d(P, Q) + d(P, R) weil Q ∈PR−und
+ d(P, R) = d(P, Q) + d(Q, R) oder
 d(P, Q) = d(P, R) + d(R, Q)
-�
+
 
 67
 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-⇒ d(Q, R) = 2d(P, Q) + d(Q, R)
-⇒ d(P, Q) = 0
-⇒ P = Q
+⇒d(Q, R) = 2d(P, Q) + d(Q, R)
+⇒d(P, Q) = 0
+⇒P = Q
 d(P, Q) = 2d(P, R) + d(P, Q)
-⇒ P = R
-⇒ Widerspruch
+⇒P = R
+⇒Widerspruch
 Deﬁnition 60
 §3) Anordnungsaxiome
-(i) Zu jeder Halbgerade H mit Anfangspunkt P ∈ X und jedem r ∈ R≥0 gibt es
-genau ein Q ∈ H mit d(P, Q) = r.
+(i) Zu jeder Halbgerade H mit Anfangspunkt P ∈X und jedem r ∈R≥0 gibt es
+genau ein Q ∈H mit d(P, Q) = r.
 (ii) Jede Gerade zerlegt X \g = H1 ˙∪H2 in zwei nichtleere Teilmengen H1, H2, sodass
-für alle A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g ̸= ∅ ⇔ i ̸= j.
+für alle A ∈Hi, B ∈Hj mit i, j ∈{ 1, 2 } gilt: AB ∩g ̸= ∅⇔i ̸= j.
 Diese Teilmengen Hi heißen Halbebenen bzgl. g.
-§4) Bewegungsaxiom: Zu P, Q, P ′, Q′ ∈ X mit d(P, Q) = d(P ′, Q′) gibt es mindestens
+§4) Bewegungsaxiom: Zu P, Q, P ′, Q′ ∈X mit d(P, Q) = d(P ′, Q′) gibt es mindestens
 2 Isometrien ϕ1, ϕ2 mit ϕi(P) = P ′ und ϕi(Q) = Q′ mit i = 1, 2.1
-§5) Parallelenaxiom: Zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g gibt es
-höchstens ein h ∈ G mit P ∈ h und h ∩ g = ∅. h heißt Parallele zu g durch P.
+§5) Parallelenaxiom: Zu jeder Geraden g ∈G und jedem Punkt P ∈X \ g gibt es
+höchstens ein h ∈G mit P ∈h und h ∩g = ∅. h heißt Parallele zu g durch P.
 Satz 4.1 (Satz von Pasch)
-Seien P, Q, R nicht kollinear, g ∈ G mit g ∩ { P, Q, R } = ∅ und g ∩ PQ ̸= ∅.
-Dann ist entweder g ∩ PR ̸= ∅ oder g ∩ QR ̸= ∅.
+Seien P, Q, R nicht kollinear, g ∈G mit g ∩{ P, Q, R } = ∅und g ∩PQ ̸= ∅.
+Dann ist entweder g ∩PR ̸= ∅oder g ∩QR ̸= ∅.
 Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
 schneiden, auch eine weitere Seite schneiden.
-Beweis: g ∩ PQ ̸= ∅
+Beweis: g ∩PQ ̸= ∅
 3(ii)
-⇒ P und Q liegen in verschiedenen Halbebenen bzgl. g
-⇒ o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
-⇒ g ∩ RP ̸= ∅
+⇒P und Q liegen in verschiedenen Halbebenen bzgl. g
+⇒o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
+⇒g ∩RP ̸= ∅
 Bemerkung 61
-Sei P, Q ∈ X mit P ̸= Q sowie A, B ∈ X \ PQ mit A ̸= B. Außerdem seien A und B in der
+Sei P, Q ∈X mit P ̸= Q sowie A, B ∈X \ PQ mit A ̸= B. Außerdem seien A und B in der
 selben Halbebene bzgl. PQ sowie Q und B in der selben Halbebene bzgl. PA.
-Dann gilt: PB+ ∩ AQ ̸= ∅
+Dann gilt: PB+ ∩AQ ̸= ∅
 Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
 eines konvexen Vierecks schneiden sich.
-Beweis: Sei P ′ ∈ PQ−, P ′ ̸= P
+Beweis: Sei P ′ ∈PQ−, P ′ ̸= P
 Satz 4.1
-====⇒ PB schneidet AP ′ ∪ AQ
+====⇒PB schneidet AP ′ ∪AQ
 Sei C der Schnittpunkt. Dann gilt:
 1Die „Verschiebung“ von P ′Q′ nach PQ und die Isometrie, die zusätzlich an der Gerade durch P und Q spiegelt.
 
@@ -3131,14 +3128,14 @@ A
 B
 C
 Abbildung 4.2: Situation aus Bemerkung 61
-(i) C ∈ PB+, denn A und B liegen in derselben Halbebene bzgl. PQ = P ′Q, also auch
+(i) C ∈PB+, denn A und B liegen in derselben Halbebene bzgl. PQ = P ′Q, also auch
 AP ′ und AQ.
 (ii) C liegt in derselben Halbebene bzgl. PA wie B, weil das für Q gilt.
-AP ′ liegt in der anderen Halbebene bzgl. PA ⇒ C /∈ P ′A ⇒ C ∈ AQ
-Da C ∈ PB+ und C ∈ AQ folgt nun direkt: ∅ ̸= { C } ⊆ PB+ ∩ AQ
+AP ′ liegt in der anderen Halbebene bzgl. PA ⇒C /∈P ′A ⇒C ∈AQ
+Da C ∈PB+ und C ∈AQ folgt nun direkt: ∅̸= { C } ⊆PB+ ∩AQ
 ■
 Bemerkung 62
-Seien P, Q ∈ X mit P ̸= Q und A, B ∈ X \PQ in der selben Halbebene bzgl. PQ. Außerdem
+Seien P, Q ∈X mit P ̸= Q und A, B ∈X \PQ in der selben Halbebene bzgl. PQ. Außerdem
 sei d(A, P) = d(B, P) und d(A, Q) = d(B, Q).
 Dann ist A = B.
 P
@@ -3149,14 +3146,14 @@ Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind
 Intuitiv weiß man, dass daraus folgt, dass A = B gilt.
 Beweis: durch Widerspruch
 Annahme: A ̸= B
-Dann ist B /∈ (PA ∪ QA) wegen §2.
+Dann ist B /∈(PA ∪QA) wegen §2.
 1. Fall: Q und B liegen in derselben Halbebene bzgl. PA
 Bem. 61
-=====⇒ PB+ ∩ AQ ̸= ∅.
+=====⇒PB+ ∩AQ ̸= ∅.
 Sei C der Schnittpunkt vom PB und AQ.
 Dann gilt:
 (i) d(A, C) + d(C, Q) = d(A, Q) Vor.
-= d(B, Q) < d(B, C) + d(C, Q) ⇒ d(A, C) < d(B, C)
+= d(B, Q) < d(B, C) + d(C, Q) ⇒d(A, C) < d(B, C)
 
 69
 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
@@ -3175,32 +3172,32 @@ Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
 (ii)
 a) B liegt zwischen P und C.
 d(P, A) + d(A, C) > d(P, C) = d(P, B) + d(B, C) = d(P, A) + d(B, C) ⇒
-d(A, C) > d(B, C) ⇒ Widerspruch zu Punkt (i)
+d(A, C) > d(B, C) ⇒Widerspruch zu Punkt (i)
 b) C liegt zwischen P und B
 d(P, C) + d(C, A) > d(P, A) = d(P, B) = d(P, C) + d(C, B)
-⇒ d(C, A) > d(C, B)
-⇒ Widerspruch zu Punkt (i)
+⇒d(C, A) > d(C, B)
+⇒Widerspruch zu Punkt (i)
 2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. PA.
 Dann liegen A und Q in derselben Halbebene bzgl. PB.
-Tausche A und B ⇒ Fall 1
+Tausche A und B ⇒Fall 1
 ■
 Bemerkung 63
-Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P ̸= Q und ϕ eine Isometrie
+Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈X mit P ̸= Q und ϕ eine Isometrie
 mit ϕ(P) = P und ϕ(Q) = Q.
 Dann gilt ϕ(S) = S
-∀S ∈ PQ.
+∀S ∈PQ.
 Beweis:
-O. B. d. A. sei S ∈ PQ 2⇔ d(P, Q) = d(P, S) + d(S, Q)
+O. B. d. A. sei S ∈PQ 2⇔d(P, Q) = d(P, S) + d(S, Q)
 ϕ∈Iso(X)
 ⇒
 d(ϕ(P), ϕ(Q)) = d(ϕ(P), ϕ(S)) + d(ϕ(S), ϕ(Q))
 P,Q∈Fix(ϕ)
 ⇒
 d(P, Q) = d(P, ϕ(S)) + d(ϕ(S), Q)
-⇒ ϕ(S) liegt zwischen P und Q
-⇒ d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S))
+⇒ϕ(S) liegt zwischen P und Q
+⇒d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S))
 3(i)
-⇒ ϕ(S) = S
+⇒ϕ(S) = S
 ■
 Proposition 4.2
 In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P ′, Q, Q′ mit d(P, Q) = d(P ′, Q′)
@@ -3212,14 +3209,14 @@ Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometri
 ϕi(P) = P ′ und ϕi(Q) = Q′ gibt.
 Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P) = P ′, ϕi(Q) = Q′ mit i = 1, 2, 3.
 Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
-(Teil i) ∃R ∈ X \ PQ mit ϕ1(R) = ϕ2(R).
+(Teil i) ∃R ∈X \ PQ mit ϕ1(R) = ϕ2(R).
 (Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX.
 Aus (Teil i) und (Teil ii) folgt, dass ϕ−1
 2
-◦ ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem
+◦ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem
 Fall Fixpunkte sind.
 Nun zu den Beweisen der Teilaussagen:
-(Teil i) Sei R ∈ X \ PQ. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben
+(Teil i) Sei R ∈X \ PQ. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben
 Halbebene bzgl. P ′Q′ = ϕi(PQ).
 O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene.
 Es gilt: d(P ′, ϕ1(R)) = d(ϕ1(P), ϕ1(R))
@@ -3227,19 +3224,19 @@ Es gilt: d(P ′, ϕ1(R)) = d(ϕ1(P), ϕ1(R))
 = d(ϕ2(P), ϕ2(R))
 = d(P ′, ϕ2(R))
 und analog d(Q′, ϕ1(R)) = d(Q′, ϕ2(R))
-(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈ PQ und A /∈ PQ ∪ PR ∪ QR. Sei B ∈
+(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈PQ und A /∈PQ ∪PR ∪QR. Sei B ∈
 PQ \ { P, Q }. Dann ist ϕ(B) = B wegen Bemerkung 63.
-Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ Bem. 63
-=====⇒ ϕ(A) = A.
+Ist R ∈AB, so enthält AB 2 Fixpunkte von ϕ Bem. 63
+=====⇒ϕ(A) = A.
 P
 B
 Q
 C
 R
 A
-Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ PQ \ { P, Q }, A /∈ PQ ∪ PR ∪ QR
-Ist R /∈ AB, so ist AB ∩ PR ̸= ∅ oder AB ∈ RQ ̸= ∅ nach Satz 4.1. Der Schnittpunkt
-C ist dann Fixpunkt von ϕ′ nach Bemerkung 63 ⇒ ϕ(A) = A.
+Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈PQ \ { P, Q }, A /∈PQ ∪PR ∪QR
+Ist R /∈AB, so ist AB ∩PR ̸= ∅oder AB ∈RQ ̸= ∅nach Satz 4.1. Der Schnittpunkt
+C ist dann Fixpunkt von ϕ′ nach Bemerkung 63 ⇒ϕ(A) = A.
 Bemerkung 64 (SWS-Kongruenzsatz)
 Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem △ABC und △A′B′C′
 Dreiecke, für die gilt:
@@ -3252,13 +3249,13 @@ Dreiecke, für die gilt:
 Dann ist △ABC kongruent zu △A′B′C′ .
 Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(A′C′+) = AC+ und ϕ(A′B′+) = AB+. Diese
 Isometrie existiert wegen Punkt §4.
-⇒ C ∈ ϕ(A′C′+) und B ∈ ϕ(A′B′+).
+⇒C ∈ϕ(A′C′+) und B ∈ϕ(A′B′+).
 d(A′, C′) = d(ϕ(A′), ϕ(C′)) = d(A, ϕ(C′))
 3(i)
-==⇒ ϕ(C′) = C
+==⇒ϕ(C′) = C
 d(A′, B′) = d(ϕ(A′), ϕ(B′)) = d(A, ϕ(B′))
 3(i)
-==⇒ ϕ(B′) = B
+==⇒ϕ(B′) = B
 Also gilt insbesondere ϕ(△A′B′C′) = △ABC.
 ■
 Bemerkung 65 (WSW-Kongruenzsatz)
@@ -3270,20 +3267,21 @@ Dreiecke, für die gilt:
 Dann ist △ABC kongruent zu △A′B′C′ .
 Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(B′) = B und ϕ(C′) liegt in der selben Halbebene
 bzgl. AB wie C. Diese Isometrie existiert wegen §4.
-Aus ∠CAB = ∠C′A′B′ = ∠ϕ(C′)ϕ(A′)ϕ(B′) = ∠ϕ(C′)AB folgt, dass ϕ(C′) ∈ AC+.
+Aus ∠CAB = ∠C′A′B′ = ∠ϕ(C′)ϕ(A′)ϕ(B′) = ∠ϕ(C′)AB folgt, dass ϕ(C′) ∈AC+.
 Analog folgt aus ∠ABC = ∠A′B′C′ = ∠ϕ(A′)ϕ(B′)ϕ(C′) = ∠ABϕ(C′), dass ϕ(C′) ∈
 BC+.
-Dann gilt ϕ(C′) ∈ AC ∩ BC = { C } ⇒ ϕ(C′) = C.
+Dann gilt ϕ(C′) ∈AC ∩BC = { C } ⇒ϕ(C′) = C.
 Es gilt also ϕ(△A′B′C′) = △ABC.
 ■
 Deﬁnition 61
-a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P.
+a) Ein Winkel ist ein Punkt P ∈X zusammen mit 2 Halbgeraden mit Anfangspunkt P.
 Man schreibt: ∠R1PR2 bzw. ∠R2PR12
 b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
 anderen abbildet.
 c) ∠R′
 1P ′R′
-ϕ(P ′R′+2 heißt kleiner als ∠R1PR2, wenn es eine Isometrie ϕ gibt, mit ϕ(P ′) = P,
+2 heißt kleiner als ∠R1PR2, wenn es eine Isometrie ϕ gibt, mit ϕ(P ′) = P,
+ϕ(P ′R′+
 1 ) = PR+
 1 und ϕ(R′
 2) liegt in der gleichen Halbebene bzgl. PR1 wie R2 und in
@@ -3292,9 +3290,9 @@ d) Im Dreieck △PQR gibt es Innenwinkel und Außenwinkel.
 Bemerkung 66
 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel.
 Beweis: Zeige ∠PRQ < ∠RQP ′.
-Sei M der Mittelpunkt der Strecke QR und P ′ ∈ PQ+ \ PQ. Sei A ∈ MP − mit d(P, M) =
+Sei M der Mittelpunkt der Strecke QR und P ′ ∈PQ+ \ PQ. Sei A ∈MP −mit d(P, M) =
 d(M, A).
-2Für dieses Skript gilt: ∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel ≤ 180◦.
+2Für dieses Skript gilt: ∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel ≤180◦.
 
 72
 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
@@ -3330,16 +3328,16 @@ P
 (b) Innen- und Außenwin-
 kel von △PQR
 Abbildung 4.7: Situation aus Bemerkung 66
-Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒ △MRQ
+Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒△MRQ
 ist kongruent zu △AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet,
 bildet R auf Q und P auf A ab.
-⇒ ∠MQA = ∠MRP = ∠QRP = ∠PRQ.
+⇒∠MQA = ∠MRP = ∠QRP = ∠PRQ.
 Noch zu zeigen: ∠MQA < ∠RQP ′, denn A liegt in der selben Halbebene bzgl. PQ wie M.
 Proposition 4.3 (Existenz der Parallelen)
 Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4.
-Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine
-Parallele h ∈ G mit P ∈ h und g ∩ h = ∅.
-Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P ′ ∈ f mit
+Dann gibt es zu jeder Geraden g ∈G und jedem Punkt P ∈X \ g mindestens eine
+Parallele h ∈G mit P ∈h und g ∩h = ∅.
+Beweis: Seien P, Q ∈f ∈G und ϕ die Isometrie, die Q auf P und P auf P ′ ∈f mit
 d(P, P ′) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält.
 
 73
@@ -3350,12 +3348,12 @@ f
 g
 P
 Abbildung 4.8: Situation aus Proposition 4.3
-Annahme: ϕ(g) ∩ g ̸= ∅
-⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g.
+Annahme: ϕ(g) ∩g ̸= ∅
+⇒Es gibt einen Schnittpunkt { R } = ϕ(g) ∩g.
 Dann ist ∠RQP = ∠RQP ′ < ∠RPP ′ nach Bemerkung 66 und ∠RQP = ∠RPP ′, weil
 ϕ(∠RQP) = ∠RPP ′.
-⇒ Widerspruch
-⇒ ϕ(g) ∩ g = ∅
+⇒Widerspruch
+⇒ϕ(g) ∩g = ∅
 ■
 Folgerung 4.4
 Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
@@ -3367,13 +3365,14 @@ Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidisc
 Dreiecke mit drei 90◦-Winkeln.
 Proposition 4.5
 In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
-Innenwinkel ≤ π.
+Innenwinkel ≤π.
 
 74
 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
 Sei im Folgenden „IWS“ die „Innenwinkelsumme“.
-Beweis: Sei △ ein Dreieck mit IWS(△) = π + ε
-β α
+Beweis: Sei △ein Dreieck mit IWS(△) = π + ε
+α
+β
 γ
 P
 (a) Summe der Winkel α, β und γ
@@ -3389,21 +3388,21 @@ A′
 (b) Situation aus Proposition 4.5
 Abbildung 4.10: Situation aus Proposition 4.5
 Sei α ein Innenwinkel von △.
-Beh.: Es gibt ein Dreieck △′ mit IWS(△′) = IWS(△) und einem Innenwinkel α′ ≤ α
+Beh.: Es gibt ein Dreieck △′ mit IWS(△′) = IWS(△) und einem Innenwinkel α′ ≤α
 2 .
-Dann gibt es für jedes n ein △n mit IWS(△n) = IWS(△) und Innenwinkel α′ ≤ α
+Dann gibt es für jedes n ein △n mit IWS(△n) = IWS(△) und Innenwinkel α′ ≤α
 2n . Für
 α
-2n < ε ist dann die Summe der beiden Innenwinkel um △n größer als π ⇒ Widerspruch
+2n < ε ist dann die Summe der beiden Innenwinkel um △n größer als π ⇒Widerspruch
 zu Folgerung 4.4.
-Beweis: Es seien A, B, C ∈ X und △ das Dreieck mit den Eckpunkten A, B, C und α sei
+Beweis: Es seien A, B, C ∈X und △das Dreieck mit den Eckpunkten A, B, C und α sei
 der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C.
 Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM.
-Sei weiter A′ ∈ MA− mit d(A′, M) = d(A, M).
+Sei weiter A′ ∈MA−mit d(A′, M) = d(A, M).
 Die Situation ist in Abbildung 4.10b skizziert.
-⇒ △(MA′C) und △(MAB) sind kongruent. ⇒ ∠ABM = ∠A′CM und ∠MA′C =
-∠MAB. ⇒ α+β +γ = IWS(△ABC) = IWS(△AA′C) und α1 +α2 = α, also o. B. d. A.
-α1 ≤ α
+⇒△(MA′C) und △(MAB) sind kongruent. ⇒∠ABM = ∠A′CM und ∠MA′C =
+∠MAB. ⇒α+β +γ = IWS(△ABC) = IWS(△AA′C) und α1 +α2 = α, also o. B. d. A.
+α1 ≤α
 2
 Bemerkung 67
 In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
@@ -3425,7 +3424,7 @@ Beweis: Sei g eine Parallele von AB durch C.
 
 75
 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-⇒ IWS(△ABC) = γ + α′′ + β′ = π
+⇒IWS(△ABC) = γ + α′′ + β′ = π
 Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
 π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
 4.2 Weitere Eigenschaften einer euklidischen Ebene
@@ -3500,14 +3499,14 @@ LA
 LC
 Abbildung 4.16: △ABLa und △CLCB sind ähnlich, weil IWS = π
 Strahlensatz
-=======⇒ a
+=======⇒a
 hc =
 c
-ha → a · ha = c · hc
+ha →a · ha = c · hc
 Satz 4.7 (Satz des Pythagoras)
 Im rechtwinkligen Dreieck gilt a2 + b2 = c2, wobei c die Hypotenuse und a, b die beiden
 Katheten sind.
-Beweis: (a + b) · (a + b) = a2 + 2ab + b2 = c2 + 4 · ( 1
+Beweis: (a + b) · (a + b) = a2 + 2ab + b2 = c2 + 4 · (1
 2 · a · b)
 
 77
@@ -3543,13 +3542,13 @@ Beweis:
 (i) (R2, dEuklid) ist oﬀensichtlich eine euklidische Ebene.
 (ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0
 im rechten Winkel schneiden.
-Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl.
+Sei P ∈X \ (g1 ∪g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl.
 Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2.
 Sei xP := d(PX, 0) und yP := d(PY , 0).
 In Abbildung 4.19 wurde die Situation skizziert.
-Sei h : X → R2 eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem
+Sei h : X →R2 eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem
 Quadranten deﬁniert, in dem P liegt, d. h.
-∀Q ∈ X mit PQ ∩ g1 = ∅ = PQ ∩ g2
+∀Q ∈X mit PQ ∩g1 = ∅= PQ ∩g2
 Fortsetzung auf ganz X durch konsistente Vorzeichenwahl.
 Im Folgenden werden zwei Aussagen gezeigt:
 (i) h ist surjektiv
@@ -3577,7 +3576,7 @@ PY
 X
 (b) Schritt 2
 Abbildung 4.18: Beweis zu Satz 4.8
-(i) Sei (x, y) ∈ R2, z. B. x ≥ 0, y ≥ 0. Sei P ′ ∈ g1 mit d(0, P ′) = x und P ′ auf der
+(i) Sei (x, y) ∈R2, z. B. x ≥0, y ≥0. Sei P ′ ∈g1 mit d(0, P ′) = x und P ′ auf der
 gleichen Seite von g2 wie P.
 g1
 g2
@@ -3592,21 +3591,21 @@ Abbildung 4.19: Beweis zu Satz 4.8
 (ii) Zu Zeigen: d(P, Q) = d(h(P), h(Q))
 d(P, Q)2 Pythagoras
 =
-d(P, R)2 + d(R, Q)2 = (yQ − yP )2 + (xQ − xP )2.
+d(P, R)2 + d(R, Q)2 = (yQ −yP )2 + (xQ −xP )2.
 h(Q) = (xQ, yQ)
 4.3 Hyperbolische Geometrie
 Deﬁnition 63
 Sei
-H := { z ∈ C | ℑ(z) > 0 } =
-�
-(x, y) ∈ R2 �� y > 0
-�
+H := { z ∈C | ℑ(z) > 0 } =
+
+(x, y) ∈R2  y > 0
+	
 
 79
 4.3. HYPERBOLISCHE GEOMETRIE
-die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪ G2 mit
-G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z − m| = r } }
-G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : ℜ(z) = x } }
+die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪G2 mit
+G1 = { g1 ⊆H | ∃m ∈R, r ∈R>0 : g1 = { z ∈H : | z −m| = r } }
+G2 = { g2 ⊆H | ∃x ∈R : g2 = { z ∈H : ℜ(z) = x } }
 Die Elemente aus G heißen hyperbolische Geraden.
 Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
 Die hyperbolischen Geraden erfüllen. . .
@@ -3615,11 +3614,11 @@ b) . . . das Anordnungsaxiom §3 (ii)
 c) . . . nicht das Parallelenaxiom §5
 Beweis:
 a) Oﬀensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
-Gegeben z1, z2 ∈ H
+Gegeben z1, z2 ∈H
 Existenz:
 Fall 1 ℜ(z1) = ℜ(z2)
-⇒ z1 und z2 liegen auf
-g = { z ∈ C | ℜ(z) = ℜ(z1) ∧ H }
+⇒z1 und z2 liegen auf
+g = { z ∈C | ℜ(z) = ℜ(z1) ∧H }
 Siehe Abbildung 4.20a.
 Fall 2 ℜ(z1) ̸= ℜ(z2)
 Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech-
@@ -3663,48 +3662,48 @@ Z2
 (b) Fall 2
 Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
 Geraden
-b) Sei g ∈ G1 ˙∪ G2 eine hyperbolische Gerade.
+b) Sei g ∈G1 ˙∪G2 eine hyperbolische Gerade.
 
 80
 4.3. HYPERBOLISCHE GEOMETRIE
 Es existieren disjunkte Zerlegungen von H \ g:
-Fall 1: g = { z ∈ H ∥ z − m| = r } ∈ G1
+Fall 1: g = { z ∈H ∥z −m| = r } ∈G1
 Dann gilt:
-H = { z ∈ H ∥ z − m| < r }
-�
-��
-�
+H = { z ∈H ∥z −m| < r }
+|
+{z
+}
 =:H1 (Kreisinneres)
-˙∪ { z ∈ H ∥ z − m| > r }
-�
-��
-�
+˙∪{ z ∈H ∥z −m| > r }
+|
+{z
+}
 =:H2 (Kreisäußeres)
-Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer.
-Fall 2: g = { z ∈ H | ℜz = x } ∈ G2
+Da r > 0 ist H1 nicht leer, da r ∈R ist H2 nicht leer.
+Fall 2: g = { z ∈H | ℜz = x } ∈G2
 Die disjunkte Zerlegung ist:
-H = { z ∈ H | ℜ(z) < x }
-�
-��
-�
+H = { z ∈H | ℜ(z) < x }
+|
+{z
+}
 =:H1 (Links)
-˙∪ { z ∈ H | ℜ(z) > x }
-�
-��
-�
+˙∪{ z ∈H | ℜ(z) > x }
+|
+{z
+}
 =:H2 (Rechts)
-Zu zeigen: ∀A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g ̸= ∅ ⇔ i ̸= j
-„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g ̸= ∅
+Zu zeigen: ∀A ∈Hi, B ∈Hj mit i, j ∈{ 1, 2 } gilt: AB ∩g ̸= ∅⇔i ̸= j
+„⇐“: A ∈H1, B ∈H2 : AB ∩g ̸= ∅
 Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand
 von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der
 größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige
-Abbildung f : R → R>0 auﬀassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g ̸= ∅
-„⇒“: A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g ̸= ∅ ⇒ i ̸= j
+Abbildung f : R →R>0 auﬀassen kann, greift der Zwischenwertsatz ⇒AB ∩g ̸= ∅
+„⇒“: A ∈Hi, B ∈Hj mit i, j ∈{ 1, 2 } : AB ∩g ̸= ∅⇒i ̸= j
 Sei h die Gerade, die durch A und B geht.
-Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen
+Da A, B /∈g, aber A, B ∈h gilt, haben g und h insbesondere mindestens einen
 unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt
 schneiden. Sei C dieser Punkt.
-Aus A, B /∈ g folgt: C ̸= A und C ̸= B. Also liegt C zwischen A und B. Daraus folgt,
+Aus A, B /∈g folgt: C ̸= A und C ̸= B. Also liegt C zwischen A und B. Daraus folgt,
 dass A und B bzgl. g in verschiedenen Halbebenen liegen.
 c) Siehe Abbildung 4.21.
 x
@@ -3732,76 +3731,76 @@ Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
 81
 4.3. HYPERBOLISCHE GEOMETRIE
 Deﬁnition 64
-Es seien a, b, c, d ∈ R mit ad − bc ̸= 0 und σ : C → C eine Abbildung deﬁniert durch
+Es seien a, b, c, d ∈R mit ad −bc ̸= 0 und σ : C →C eine Abbildung deﬁniert durch
 σ(z) := az + b
 cz + d
 σ heißt Möbiustransformation.
 Proposition 4.9
 a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation
 σ(z) :=
-�a
+a
 b
 c
 d
-�
-◦ z := az + b
+
+◦z := az + b
 cz + d
 b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H.
-c) PSL2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h.
-zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1,
+c) PSL2(R) operiert auf R ∪{ ∞}. Diese Gruppenoperation ist 3-fach transitiv, d. h.
+zu x0 < x1 < x∞∈R gibt es genau ein σ ∈PSL2(R) mit σ(x0) = 0, σ(x1) = 1,
 σ(x∞) = ∞.
 d) SL2(R) wird von den Matrizen
-�λ
+λ
 0
 0
 λ−1
-�
-�
-��
-�
+
+|
+{z
+}
 =:Aλ
 ,
-�1
+1
 t
 0
 1
-�
-� �� �
+
+| {z }
 =:Bt
 und
-� 0
+ 0
 1
 −1
 0
-�
-�
-��
-�
+
+|
+{z
+}
 =:C
-mit t, λ ∈ R×
+mit t, λ ∈R×
 erzeugt.
 e) PSL2(R) operiert auf G.
 Beweis:
-a) Sei z = x + iy ∈ H, d. h. y > 0 und σ =
-�a
+a) Sei z = x + iy ∈H, d. h. y > 0 und σ =
+a
 b
 c
 d
-�
-∈ SL2(R)
-⇒ σ(z) = a(x + iy) + b
+
+∈SL2(R)
+⇒σ(z) = a(x + iy) + b
 c(x + iy) + d
 = (ax + b) + iay
-(cx + d) + icy · (cx + d) − icy
-(cx + d) − icy
+(cx + d) + icy · (cx + d) −icy
+(cx + d) −icy
 = (ax + b)(cx + d) + aycy
 (cx + d)2 + (cy)2
-+ i ay(cx + d) − (ax + b)cy
++ i ay(cx + d) −(ax + b)cy
 (cx + d)2 + (cy)2
 = axcx + axd + bcx + bd + aycy
 (cx + d)2 + (cy)2
 + i
-(ad − bc)y
+(ad −bc)y
 (cx + d)2 + (cy)2
 SL2(R)
 =
@@ -3810,47 +3809,47 @@ ac(x2 + y2) + adx + bcx + bd
 + i
 y
 (cx + d)2 + (cy)2
-⇒ ℑ(σ(z)) =
+⇒ℑ(σ(z)) =
 y
 (cx+d)2+(cy)2 > 0
 Die Abbildung bildet also nach H ab. Außerdem gilt:
-�1
+1
 0
 0
 1
-�
-◦ z = x + iy
+
+◦z = x + iy
 1
 = x + iy = z
 
 82
 4.3. HYPERBOLISCHE GEOMETRIE
 und
-�a
+a
 b
 c
 d
-�
+
 ◦
-��a′
+a′
 b′
 c′
 d′
-�
-◦ z
-�
+
+◦z
+
 =
-�a
+a
 b
 c
 d
-�
-◦ a′z + b′
+
+◦a′z + b′
 c′z + d′
 =
-a a′z+b′
+aa′z+b′
 c′z+d′ + b
-c a′z+b′
+ca′z+b′
 c′z+d′ + d
 =
 a(a′z+b′)+b(c′z+d′)
@@ -3862,44 +3861,44 @@ c(a′z + b′) + d(c′z + d′)
 = (aa′ + bc′)z + ab′ + bd′
 (ca′ + db′)z + cb′ + dd′
 =
-�aa′ + bc′
+aa′ + bc′
 ab′ + bd′
 ca′ + db′
 cb′ + dd′
-�
-◦ z
+
+◦z
 =
-��a
+a
 b
 c
 d
-�
+
 ·
-�a′
+a′
 b′
 c′
 d′
-��
-◦ z
-b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H.
+
+◦z
+b) Es gilt σ(z) = (−σ)(z) für alle σ ∈SL2(R) und z ∈H.
 c) Ansatz: σ =
-�a
+a
 b
 c
 d
-�
+
 σ(x0) = ax0+b
 cx0+d
-!= 0 ⇒ ax0 + b = 0 ⇒ b = −ax0
-σ(x∞) = ∞ ⇒ cx∞ + d = 0 ⇒ d = −cx∞
-σ(x1) = 1 ⇒ ax1 + b = cx1 + d
-a(x1 − x0) = c(x1 − x∞) ⇒ c = a x1−x0
+!= 0 ⇒ax0 + b = 0 ⇒b = −ax0
+σ(x∞) = ∞⇒cx∞+ d = 0 ⇒d = −cx∞
+σ(x1) = 1 ⇒ax1 + b = cx1 + d
+a(x1 −x0) = c(x1 −x∞) ⇒c = a x1−x0
 x1−x∞
-⇒ −a2 · x∞ x1−x0
-x1−x∞ + a2x0 x1−x0
-x1−x∞ = 1
-⇒ a2 x1−x0
-x0−x∞ (x0 − x∞) = 1 ⇒ a2 =
+⇒−a2 · x∞x1−x0
+x1−x∞+ a2x0 x1−x0
+x1−x∞= 1
+⇒a2 x1−x0
+x0−x∞(x0 −x∞) = 1 ⇒a2 =
 x1−x∞
 (x1−x∞)(x1−x0)
 d) Es gilt:
@@ -3916,33 +3915,33 @@ erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation
 Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren.
 Sei also
 M =
-�a
+a
 b
 c
 d
-�
-∈ SL2(R)
+
+∈SL2(R)
 beliebig.
 Fall 1: a = 0
-Da M ∈ SL2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c ̸= 0. Es
+Da M ∈SL2(R) ist, gilt det M = 1 = ad −bc = −bc. Daher ist insbesondere c ̸= 0. Es
 folgt:
-� 0
+ 0
 1
 −1
 0
-�
+
 ·
-�a
+a
 b
 c
 d
-�
+
 =
-� c
+ c
 d
 −a
 −b
-�
+
 
 83
 4.3. HYPERBOLISCHE GEOMETRIE
@@ -3950,68 +3949,68 @@ Gehe zu Fall 2.
 Fall 2: a ̸= 0
 Nun wird in M durch M · A 1
 a an der Stelle von a eine 1 erzeugt:
-�a
+a
 b
 c
 d
-�
+
 ·
-� 1
+ 1
 a
 0
 0
 a
-�
+
 =
-�1
+1
 ab
 c
 a
 ad
-�
+
 Gehe zu Fall 3.
 Fall 3: a = 1
-�1
+1
 b
 c
 d
-�
+
 ·
-�1
+1
 −b
 0
 1
-�
+
 =
-�1
+1
 0
 c
-d − bc
-�
-Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M2,2 = 1.
+d −bc
+
+Da wir det M = 1 = ad −bc = d −bc wissen, gilt sogar M2,2 = 1.
 Gehe zu Fall 4.
 Fall 4: a = 1, b = 0, d = 1
 A−1CBcC
-�1
+1
 0
 c
 1
-�
+
 =
-�1
+1
 0
 0
 1
-�
+
 Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R.
 ■
 e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
 • σ =
-�λ
+λ
 0
 0
 λ−1
-�
+
 , also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in
 Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
 x
@@ -4054,21 +4053,21 @@ x
 (b) Fall 2 (Strahlensatz)
 Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
 • Oﬀensichtlich gilt die Aussage für σ =
-�1
+1
 a
 0
 1
-�
+
 • Sei nun σ =
-� 0
+ 0
 1
 −1
 0
-�
-, also σ(z) = − 1
+
+, also σ(z) = −1
 z
 Bemerkung 69
-Zu hyperbolischen Geraden g1, g2 gibt es σ ∈ PSL2(R) mit σ(g1) = g2.
+Zu hyperbolischen Geraden g1, g2 gibt es σ ∈PSL2(R) mit σ(g1) = g2.
 
 84
 4.3. HYPERBOLISCHE GEOMETRIE
@@ -4088,46 +4087,46 @@ Abbildung 4.23: Inversion am Kreis
 Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert
 σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
 Deﬁnition 65
-Seien z1, z2, z3, z4 ∈ C paarweise verschieden.
+Seien z1, z2, z3, z4 ∈C paarweise verschieden.
 Dann heißt
 DV(z1, z2, z3, z4) :=
 z1−z4
 z1−z2
 z3−z4
 z3−z2
-= (z1 − z4) · (z3 − z2)
-(z1 − z2) · (z3 − z4)
+= (z1 −z4) · (z3 −z2)
+(z1 −z2) · (z3 −z4)
 Doppelverhältnis von z1, . . . , z4.
 Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
-a) DV(z1, . . . , z4) ∈ C \ { 0, 1 }
+a) DV(z1, . . . , z4) ∈C \ { 0, 1 }
 b) DV(z1, z4, z3, z2) =
 1
 DV(z1,z2,z3,z4)
 c) DV(z3, z2, z1, z4) =
 1
 DV(z1,z2,z3,z4)
-d) DV ist auch wohldeﬁniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind.
-e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen).
-f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ { ∞ } ist
+d) DV ist auch wohldeﬁniert, wenn eines der zi = ∞oder wenn zwei der zi gleich sind.
+e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈{ 0, 1, ∞} ist zugelassen).
+f) Für σ ∈PSL2(C) und z1, . . . , z4 ∈C ∪{ ∞} ist
 DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
 und für σ(z) = 1
 z gilt
 DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
-g) DV(z1, z2, z3, z4) ∈ R ∪ { ∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden.
+g) DV(z1, z2, z3, z4) ∈R ∪{ ∞} ⇔z1, . . . , z4 liegen auf einer hyperbolischen Geraden.
 Beweis:
 a) DV(z1, . . . , z4) ̸= 0, da zi paarweise verschieden
 DV(z1, . . . , z4) ̸= 1, da:
 Annahme: DV(z1, . . . , z4) = 1
-⇔ (z1 − z2)(z3 − z4) = (z1 − z4)(z3 − z2)
+⇔(z1 −z2)(z3 −z4) = (z1 −z4)(z3 −z2)
 
 85
 4.3. HYPERBOLISCHE GEOMETRIE
-⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4
-⇔ z2z3 + z1z4 = z3z4 + z1z2
-⇔ z2z3 − z3z4 = z1z2 − z1z4
-⇔ z3(z2 − z4) = z1(z2 − z4)
-⇔ z3 = z1 oder z2 = z4
-Alle zi sind paarweise verschieden ⇒ Widerspruch
+⇔z1z3 −z2z3 −z1z4 + z2z4 = z1z3 −z3z4 −z1z2 + z2z4
+⇔z2z3 + z1z4 = z3z4 + z1z2
+⇔z2z3 −z3z4 = z1z2 −z1z4
+⇔z3(z2 −z4) = z1(z2 −z4)
+⇔z3 = z1 oder z2 = z4
+Alle zi sind paarweise verschieden ⇒Widerspruch
 ■
 b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4)
 (z1−z4)·(z3−z2) =
@@ -4141,31 +4140,31 @@ d) Zwei der zi dürfen gleich sein, da:
 Fall 1 z1 = z4 oder z3 = z2
 In diesem Fall ist DV(z1, . . . , z4) = 0
 Fall 2 z1 = z2 oder z3 = z4
-Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞ gilt.
+Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞gilt.
 Fall 3 z1 = z3 oder z2 = z4
 Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1.
-Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞
+Im Fall, dass ein zi = ∞ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞
 e) DV(0, 1, ∞, z4) = (0−z4)·(∞−1)
 (0−1)·(∞−z4) = z4·(∞−1)
 ∞−z4
 = z4
 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man
+g) Sei σ ∈PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man
 drei Parameter von σ wählen darf.
 Bem. 70.f
 ⇒
 DV(z1, . . . , z4) = DV(0, 1, ∞, σ(z4))
 ⇒
-DV(z1, . . . , z4) ∈ R ∪ { ∞ }
-⇔ σ(z4) ∈ R ∪ { ∞ }
-Behauptung folgt, weil σ−1(R ∪ ∞) ein Kreis oder eine Gerade in C ist.
+DV(z1, . . . , z4) ∈R ∪{ ∞}
+⇔σ(z4) ∈R ∪{ ∞}
+Behauptung folgt, weil σ−1(R ∪∞) ein Kreis oder eine Gerade in C ist.
 Deﬁnition 66
-Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die
-„Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }.
+Für z1, z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die
+„Schnittpunkte“ von gz1,z2 mit R ∪{ ∞}.
 Dann sei dH(z1, z2) := 1
 2| ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik.
-Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2
-die „Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }.
+Beh.: Für z1, z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2
+die „Schnittpunkte“ von gz1,z2 mit R ∪{ ∞}.
 Dann gilt:
 1
 2| ln DV(a1, z1, a2, z2)| = 1
@@ -4176,7 +4175,7 @@ DV(a1, z1, a2, z2) =
 DV(a2, z1, a1, z2)
 Außerdem gilt:
 ln 1
-x = ln x−1 = (−1) · ln x = − ln x
+x = ln x−1 = (−1) · ln x = −ln x
 
 86
 4.3. HYPERBOLISCHE GEOMETRIE
@@ -4191,21 +4190,21 @@ Beh.: Die hyperbolische Metrik ist eine Metrik auf H.
 Beweis: Wegen Bemerkung 70.f ist
 d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) = ∞
 d. h. σ(gz1,z2) = iR (imaginäre Achse).
-also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b.
+also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈R und a < b.
 2d(ia, ib) =| ln DV(0, ia, ∞, ib) |
-=| ln (0 − ib)(∞ − ia)
-(0 − ia)(∞ − ib) |
+=| ln (0 −ib)(∞−ia)
+(0 −ia)(∞−ib) |
 =| ln b
 a |
-= ln b − ln a
-Also: d(z1, z2) ≥ 0, d(z1, z2) = 0 ⇔ z1 = z2
+= ln b −ln a
+Also: d(z1, z2) ≥0, d(z1, z2) = 0 ⇔z1 = z2
 2d(z2, z1) =| ln DV(a2, z2, a1, z1) |
 =| ln DV(∞, ib, 0, ia) |
 Bem. 70.b
 =
 | ln DV(0, ib, ∞, ia) |
 = 2d(z1, z2)
-Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) =
+Liegen drei Punkte z1, z2, z3 ∈C auf einer hyperbolischen Geraden, so gilt d(z1, z3) =
 d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt).
 Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die
 Vorlesung „Hyperbolische Geometrie“ verwiesen.
@@ -4218,60 +4217,60 @@ aber Axiom §5 ist verletzt.
 4.3. HYPERBOLISCHE GEOMETRIE
 Übungsaufgaben
 Aufgabe 8
-Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels
-∠PQR ist der Winkel, der aus den Halbgeraden QP − und QR− gebildet wird. Die
-Nebenwinkel von ∠PQR sind die von QP + und QR− bzw. QP − und QR+ gebildeten
+Seien (X, d) eine absolute Ebene und P, Q, R ∈X Punkte. Der Scheitelwinkel des Winkels
+∠PQR ist der Winkel, der aus den Halbgeraden QP −und QR−gebildet wird. Die
+Nebenwinkel von ∠PQR sind die von QP + und QR−bzw. QP −und QR+ gebildeten
 Winkel.
 Zeigen Sie:
 (a) Die beiden Nebenwinkel von ∠PQR sind gleich.
 (b) Der Winkel ∠PQR ist gleich seinem Scheitelwinkel.
 Aufgabe 9
-Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von
-Punkten ist deﬁniert durch d(P, Y ) := inf d(P, y)|y ∈ Y .
+Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆X von
+Punkten ist deﬁniert durch d(P, Y ) := inf d(P, y)|y ∈Y .
 Zeigen Sie:
 (a) Ist △ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die
 Winkel ∠ABC und ∠BCA gleich.
 (b) Ist △ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
 gegenüber und umgekehrt.
-(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit
-P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
+(c) Sind g eine Gerade und P /∈g ein Punkt, so gibt es eine eindeutige Gerade h mit
+P ∈h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
 und der Schnittpunkt des Lots mit g heißt Lotfußpunkt.
 Aufgabe 10
-Seien f, g, h ∈ G und paarweise verschieden.
-Zeigen Sie: f ∥ g ∧ g ∥ h ⇒ f ∥ h
+Seien f, g, h ∈G und paarweise verschieden.
+Zeigen Sie: f ∥g ∧g ∥h ⇒f ∥h
 Aufgabe 11
 Beweise den Kongruenzsatz SSS.
 
 5 Krümmung
 Deﬁnition 67
-Sei f : [a, b] → Rn eine eine Funktion aus C∞. Dann heißt f Kurve.
+Sei f : [a, b] →Rn eine eine Funktion aus C∞. Dann heißt f Kurve.
 5.1 Krümmung von Kurven
 Deﬁnition 68
-Sei γ : I = [a, b] → Rn eine Kurve.
+Sei γ : I = [a, b] →Rn eine Kurve.
 a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt:
 ∥γ′(t)∥2 = 1
-∀t ∈ I
+∀t ∈I
 Dabei ist γ′(t) = (γ′
 1(t), γ′
 2(t), . . . , γ′
 n(t)).
 b) l(γ) =
-� b
+R b
 a ∥γ′(t)∥dt heißt Länge von γ.
 Bemerkung 71 (Eigenschaften von Kurven I)
-Sei γ : I = [a, b] → Rn eine C∞-Funktion.
-a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b − a.
-b) Ist γ durch Bogenlänge parametrisiert, so ist γ′(t) orthogonal zu γ′′(t) für alle t ∈ I.
+Sei γ : I = [a, b] →Rn eine C∞-Funktion.
+a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b −a.
+b) Ist γ durch Bogenlänge parametrisiert, so ist γ′(t) orthogonal zu γ′′(t) für alle t ∈I.
 Beweis:
 a) l(γ) =
-� b
+R b
 a ∥γ′(t)∥dt =
-� b
-a 1dt = b − a.
-b) Im Folgenden wird die Aussage nur für γ : [a, b] → R2 bewiesen. Allerdings funktioniert
+R b
+a 1dt = b −a.
+b) Im Folgenden wird die Aussage nur für γ : [a, b] →R2 bewiesen. Allerdings funktioniert
 der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden.
-1 = ∥γ′(t)∥ = ∥γ′(t)∥2 = ⟨γ′(t), γ′(t)⟩
-⇒ 0 = d
+1 = ∥γ′(t)∥= ∥γ′(t)∥2 = ⟨γ′(t), γ′(t)⟩
+⇒0 = d
 dt⟨γ′(t), γ′(t)⟩
 = d
 dt(γ′
@@ -4286,145 +4285,145 @@ dt(γ′
 2(t))
 = 2 · ⟨γ′′(t), γ′(t)⟩
 Deﬁnition 69
-Sei γ : I → R2 eine durch Bogenlänge parametrisierte Kurve.
-a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt:
-⟨n(t), γ′(t)⟩ = 0, ∥n(t)∥ = 1 und det((γ′(t), n(t))) = +1
+Sei γ : I →R2 eine durch Bogenlänge parametrisierte Kurve.
+a) Für t ∈I sei n(t) Normalenvektor an γ in t wenn gilt:
+⟨n(t), γ′(t)⟩= 0, ∥n(t)∥= 1 und det((γ′(t), n(t))) = +1
 
 89
 5.1. KRÜMMUNG VON KURVEN
-b) Seit κ : I → R so, dass gilt:
+b) Seit κ : I →R so, dass gilt:
 γ′′(t) = κ(t) · n(t)
 Dann heißt κ(t) Krümmung von γ in t.
 Da n(t) und γ′′(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t).
 Beispiel 45
 Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt:
 γ(t) =
-�
+
 r · cos t
 r, r · sin t
 r
-�
-für t ∈ [0, 2πr]
+
+für t ∈[0, 2πr]
 ist parametrisiert durch Bogenlänge, da gilt:
 γ′(t) =
-�
+
 (r · 1
-r)(− sin t
+r)(−sin t
 r), r1
 r cos t
 r
-�
+
 =
-�
-− sin t
+
+−sin t
 r, cos t
 r
-�
+
 Der Normalenvektor von γ in t ist
 n(t) =
-�
-− cos t
-r, − sin t
+
+−cos t
+r, −sin t
 r
-�
+
 da gilt:
-⟨n(t), γ′(t)⟩ =
-��− cos t
+⟨n(t), γ′(t)⟩=
+−cos t
 r
-− sin t
+−sin t
 r
-�
+
 ,
-�− sin t
+−sin t
 r
 cos t
 r
-��
-= (− cos t
-r) · (− sin t
-r) + (− sin t
+
+= (−cos t
+r) · (−sin t
+r) + (−sin t
 r) · (cos t
 r)
 = 0
-∥n(t)∥ =
-����(− cos t
-r, − sin t
+∥n(t)∥=
+(−cos t
+r, −sin t
 r)
-����
-= (− cos t
-r)2 + (− sin t
+
+= (−cos t
+r)2 + (−sin t
 r)2
 = 1
 det(γ′
 1(t), n(t)) =
-����
-�− sin t
+
+−sin t
 r
-− cos t
+−cos t
 r
 cos t
 r
-− sin t
+−sin t
 r
-�����
-= (− sin t
-r)2 − (− cos t
+
+= (−sin t
+r)2 −(−cos t
 r) · cos t
 r
 = 1
 Die Krümmung ist für jedes t konstant 1
 r, da gilt:
 γ′′(t) =
-�
+
 −1
 r cos t
 r, −1
 r sin t
 r
-�
+
 = 1
 r ·
-�
-− cos t
-r, − sin t
+
+−cos t
+r, −sin t
 r
-�
-⇒ κ(t) = 1
+
+⇒κ(t) = 1
 r
 
 90
 5.2. TANGENTIALEBENE
 Deﬁnition 70
-Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve.
-a) Für t ∈ I heißt κ(t) := ∥γ′′(t)∥ die Krümmung von γ in t.
-b) Ist für t ∈ I die Ableitung γ′′(t) ̸= 0, so heißt
+Sei γ : I →R3 eine durch Bogenlänge parametrisierte Kurve.
+a) Für t ∈I heißt κ(t) := ∥γ′′(t)∥die Krümmung von γ in t.
+b) Ist für t ∈I die Ableitung γ′′(t) ̸= 0, so heißt
 γ′′(t)
-∥γ′′(t)∥ Normalenvektor an γ in t.
+∥γ′′(t)∥Normalenvektor an γ in t.
 c) b(t) sei ein Vektor, der γ′(t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt.
 Also gilt:
 det(γ′(t), n(t), b(t)) = 1
 b(t) heißt Binormalenvektor, die Orthonormalbasis
-�
+
 γ′(t), n(t), b(t)
-�
+	
 heißt begleitendes Dreibein.
 Bemerkung 72 (Eigenschaften von Kurven II)
-Sei γ : I → R3 durch Bogenlänge parametrisierte Kurve.
+Sei γ : I →R3 durch Bogenlänge parametrisierte Kurve.
 a) n(t) ist orthogonal zu γ′(t).
 b) b(t) aus Deﬁnition 70.c ist eindeutig.
 5.2 Tangentialebene
 Erinnerung Sie sich an Deﬁnition 32 „reguläre Fläche“.
 Äquivalent dazu ist: S ist lokal von der Form
 V (f) =
-�
-x ∈ R3 �� f(x) = 0
-�
-für eine C∞-Funktion f : R3 → R.
+
+x ∈R3  f(x) = 0
+	
+für eine C∞-Funktion f : R3 →R.
 Deﬁnition 71
-Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um
-s ∈ V :
-(u, v) �→ (x(u, v), y(u, v), z(u, v))
-Für p = F −1(s) ∈ U sei
+Sei S ⊆R3 eine reguläre Fläche, s ∈S, F : U →V ∩S eine lokale Parametrisierung um
+s ∈V :
+(u, v) 7→(x(u, v), y(u, v), z(u, v))
+Für p = F −1(s) ∈U sei
 JF (p) =
 
 
@@ -4442,8 +4441,8 @@ JF (p) =
 ∂v(p)
 
 
-und DpF : R2 → R3 die durch JF (p) deﬁnierte lineare Abbildung.
-Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈ S.
+und DpF : R2 →R3 die durch JF (p) deﬁnierte lineare Abbildung.
+Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈S.
 Bemerkung 73 (Eigenschaften der Tangentialebene)
 a) TsS ist 2-dimensionaler Untervektorraum von R3.
 b) TsS = ⟨˜u, ˜v⟩, wobei ˜u, ˜v die Spaltenvektoren der Jacobi-Matrix JF (p) sind.
@@ -4451,45 +4450,46 @@ c) TsS hängt nicht von der gewählten Parametrisierung ab.
 
 91
 5.2. TANGENTIALEBENE
-d) Sei S = V (f) eine reguläre Fläche in R3, also f : V → R eine C∞-Funktion, V ⊆ R3
-oﬀen, grad(f)(x) ̸= 0 für alle x ∈ S.
-Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S.
+d) Sei S = V (f) eine reguläre Fläche in R3, also f : V →R eine C∞-Funktion, V ⊆R3
+oﬀen, grad(f)(x) ̸= 0 für alle x ∈S.
+Dann ist TsS = (grad(f)(s))⊥für jedes s ∈S.
 Beweis:
 a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist
 eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein
 Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2.
 b) Hier kann man wie in Punkt a) argumentieren
-c) TsS = {x ∈ R3|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ(0) =
+c) TsS = {x ∈R3|∃parametrisierte Kurve γ : [−ε, +ε] →S für ein ε > 0 mit γ(0) =
 s und γ′(0) = x}
 Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-d) Sei x ∈ TsS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ′(0) = s,
-sodass γ′(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0
-⇒ 0 = (f ◦ γ)′(0) = ⟨grad(f)(γ(0)), γ′(0)⟩
-⇒ TsS ⊆ grad(f)(s)⊥
+d) Sei x ∈TsS, γ : [−ε, +ε] →S eine parametrisierte Kurve mit ε > 0 und γ′(0) = s,
+sodass γ′(0) = x gilt. Da γ(t) ∈S für alle t ∈[−ε, ε], ist f ◦γ = 0
+⇒0 = (f ◦γ)′(0) = ⟨grad(f)(γ(0)), γ′(0)⟩
+⇒TsS ⊆grad(f)(s)⊥
 dim=2
-====⇒ TsS = (grad(f)(s))⊥
+====⇒TsS = (grad(f)(s))⊥
 Deﬁnition 72
-a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3 ist eine Abbildung n : S → S2 ⊆
-R3 mit n(s) ∈ TsS⊥ für jedes s ∈ S.
+a) Ein Normalenfeld auf der regulären Fläche S ⊆R3 ist eine Abbildung n : S →S2 ⊆
+R3 mit n(s) ∈TsS⊥für jedes s ∈S.
 b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt.
 Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden.
 Im Folgenden werden diese Begriﬀe jedoch synonym benutzt.
 Bemerkung 74 (Eigenschaften von Normalenfeldern)
 a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞).
-b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3 von s und eine lokale Parametrisierung
-F : U → V von S um s, sodass auf F(U) = V ∩ S ein stetiges Normalenfeld existiert.
+b) Zu jedem s ∈S gibt es eine Umgebung V ⊆R3 von s und eine lokale Parametrisierung
+F : U →V von S um s, sodass auf F(U) = V ∩S ein stetiges Normalenfeld existiert.
 c) S ist genau dann orientierbar, wenn es einen diﬀerenzierbaren Atlas von S aus lokalen
-Parametrisierungen Fi : Ui → Vi, i ∈ I gibt, sodass für alle i, j ∈ F und alle
-s ∈ Vi ∩ Vj ∩ S gilt:
+Parametrisierungen Fi : Ui →Vi, i ∈I gibt, sodass für alle i, j ∈F und alle
+s ∈Vi ∩Vj ∩S gilt:
 det(Ds
-� Vi→Vj
-��
-�
-Fj ◦ F −1
+Vi→Vj
+z
+}|
+{
+Fj ◦F −1
 i
-�
-��
-�
+|
+{z
+}
 ∈R3×3
 ) > 0
 Beweis: Wird hier nicht geführt.
@@ -4504,11 +4504,11 @@ lenfeld, aber kein stetiges Normalenfeld.
 Abbildung 5.1: Möbiusband
 5.3 Gauß-Krümmung
 Bemerkung 75
-Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, ∥x∥ = 1.
+Sei S eine reguläre Fläche, s ∈S, n(s) ist ein Normalenvektor in s, x ∈TsS, ∥x∥= 1.
 Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3.
-Dann gibt es eine Umgebung V ⊆ R3 von s, sodass
-C := (s + E) ∩ S ∩ V
-das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ(0) = s
+Dann gibt es eine Umgebung V ⊆R3 von s, sodass
+C := (s + E) ∩S ∩V
+das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] →S enthält mit γ(0) = s
 und γ′(0) = x.
 Beweis: „Satz über implizite Funktionen“1
 Deﬁnition 73
@@ -4517,28 +4517,28 @@ In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in d
 Man schreibt: κNor(s, x) := κγ(0)
 Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt.
 Beispiel 47 (Gauß-Krümmung)
-1) S = S2 = V (X2 + Y 2 + Z2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id,
+1) S = S2 = V (X2 + Y 2 + Z2 −1) ist die Kugel um den Ursprung mit Radius 1, n = id,
 s = (0, 0, 1), x = (1, 0, 0)
-⇒ E = R · x + R · n(s) (x, z-Ebene)
-C = E ∩ S ist Kreislinie
+⇒E = R · x + R · n(s) (x, z-Ebene)
+C = E ∩S ist Kreislinie
 κNor(s, x) = 1
 r = 1
-2) S = V (X2 + Z2 − 1) ⊆ R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0)
-x1 = (0, 1, 0) ⇒ E1 = R · e1 + R · e2 (x, y-Ebene)
-S ∩ E1 = V (X2 + Y 2 − 1) ∩ E, Kreislinie in E
-⇒ κNor(s, x1) = ±1
+2) S = V (X2 + Z2 −1) ⊆R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0)
+x1 = (0, 1, 0) ⇒E1 = R · e1 + R · e2 (x, y-Ebene)
+S ∩E1 = V (X2 + Y 2 −1) ∩E, Kreislinie in E
+⇒κNor(s, x1) = ±1
 x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene)
 1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II
 
 93
 5.3. GAUSS-KRÜMMUNG
-V ∩ E2 ∩ S =
-�
-(1, 0, z) ∈ R3 �� z ∈ R
-�
+V ∩E2 ∩S =
+
+(1, 0, z) ∈R3  z ∈R
+	
 ist eine Gerade
-⇒ κNor(s, x2) = 0
-3) S = V (X2 − Y 2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
+⇒κNor(s, x2) = 0
+3) S = V (X2 −Y 2 −Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
 x1 = (1, 0, 0), n(s) = (0, 0, 1)
 x2 = (0, 1, 0)
 κNor(s, x1) =
@@ -4563,7 +4563,7 @@ x2 = (0, 1, 0)
 x
 y
 z
-(a) S = V (X2 + Z2 − 1)
+(a) S = V (X2 + Z2 −1)
 −2
 −1.5
 −1
@@ -4590,30 +4590,30 @@ z
 2
 4
 f(x, y)
-(b) S = V (X2 − Y 2 − Z)
+(b) S = V (X2 −Y 2 −Z)
 Abbildung 5.2: Beispiele für reguläre Flächen
 Deﬁnition 74
-Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S.
-γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
+Sei S ⊆R3 eine reguläre Fläche, s ∈S und n ein stetiges Normalenfeld auf S.
+γ : [−ε, ε] →S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
 γ′′(0) ̸= 0.
 Sei n(0) :=
 γ′′(0)
 ∥γ′′(0)∥. Zerlege
-n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS)⊥
-Dann ist n(0)⊥ = ⟨n(0), n(s)⟩ · n(s)
-κNor(s, γ) := ⟨γ′′(0), n(s)⟩ die Normalkrümmung.
+n(0) = n(0)t + n(0)⊥mit n(0)t ∈TsS und n(0)⊥∈(TsS)⊥
+Dann ist n(0)⊥= ⟨n(0), n(s)⟩· n(s)
+κNor(s, γ) := ⟨γ′′(0), n(s)⟩die Normalkrümmung.
 Bemerkung 76
-Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ).
+Sei γ(t) = γ(−t), t ∈[−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ).
 Beweis: γ′′(0) = γ′′(0), da γ′(0) = −γ′(0).
 Es gilt: κNor(s, γ) hängt nur von |γ′(0)| ab und ist gleich κNor(s, γ′(0)).
 Bemerkung 77
 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
 Sei T 1
-s S = { x ∈ TsS | ∥x∥ = 1 } ∼= S1. Dann ist
+s S = { x ∈TsS | ∥x∥= 1 } ∼= S1. Dann ist
 κn
 Nor(s) : T 1
-s S → R,
-x �→ κNor(s, x)
+s S →R,
+x 7→κNor(s, x)
 eine glatte Funktion und Bild κn
 Nor(s) ist ein abgeschlossenes Intervall.
 Deﬁnition 75
@@ -4623,21 +4623,21 @@ Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
 5.3. GAUSS-KRÜMMUNG
 a) κn
 1(s) : = min
-�
+
 κn
 Nor(s, x)
-�� x ∈ T 1
+ x ∈T 1
 s S
-�
+	
 und
 κn
 2(s) : = max
-�
+
 κn
 Nor(s, x)
-�� x ∈ T 1
+ x ∈T 1
 s S
-�
+	
 heißen Hauptkrümmungen von S in s.
 b) K(s) := κn
 1(s) · κn
@@ -4646,9 +4646,9 @@ Bemerkung 78
 Ersetzt man n durch −n, so gilt:
 κ−n
 Nor(s, x) = −κn
-Nor(x) ∀x ∈ T 1
+Nor(x) ∀x ∈T 1
 s S
-⇒ κ−n
+⇒κ−n
 1 (s) = −κn
 2(s)
 κ−n
@@ -4656,48 +4656,48 @@ s S
 1(s)
 und K−n(s) = Kn(s) =: K(s)
 Beispiel 48
-1) S = S2. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S2
-⇒ K(s) = 1
+1) S = S2. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈S2
+⇒K(s) = 1
 2) Zylinder:
-κ1(s) = 0, κ2(s) = 1 ⇒ K(s) = 0
+κ1(s) = 0, κ2(s) = 1 ⇒K(s) = 0
 3) Sattelpunkt auf hyperbolischem Paraboloid:
-κ1(s) < 0, κ2(s) = 0 → K(s) < 0
+κ1(s) < 0, κ2(s) = 0 →K(s) < 0
 4) S = Torus. Siehe Abbildung 5.3
 s1
 s2
 s3
 Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0
 Bemerkung 79
-Sei S eine reguläre Fläche, s ∈ S ein Punkt.
+Sei S eine reguläre Fläche, s ∈S ein Punkt.
 
 95
 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
 a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s.
 b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s.
 5.4 Erste und zweite Fundamentalform
-Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine
+Sei S ⊆R3 eine reguläre Fläche, s ∈S, TsS die Tangentialebene an S in s und F : U →V eine
 lokale Parametrisierung von S um s. Weiter sei p := F −1(s).
 Deﬁnition 76
-Sei IS ∈ R2×2 deﬁniert als
+Sei IS ∈R2×2 deﬁniert als
 IS : =
-�g1,1(s)
+g1,1(s)
 g1,2(s)
 g1,2(s)
 g2,2(s)
-�
+
 =
-�E(s)
+E(s)
 F(s)
 F(s)
 G(s)
-�
+
 mit gi,j = gs(DpF(ei), DpF(ej))
-= ⟨ ∂F
+= ⟨∂F
 ∂ui
 (p), ∂F
 ∂uj
 (p)⟩
-i, j ∈ { 1, 2 }
+i, j ∈{ 1, 2 }
 Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F.
 Bemerkung 80
 a) Die Einschränkung des Standardskalarproduktes des R3 auf TsS macht TsS zu einem
@@ -4708,13 +4708,13 @@ kung 80.a die Darstellungsmatrix IS.
 d) gi,j(s) ist eine diﬀerenzierbare Funktion von s.
 Bemerkung 81
 det(IS) =
-����
+
 ∂F
 ∂u1
 (p) × ∂F
 ∂u2
 (p)
-����
+
 2
 Beweis: Sei ∂F
 ∂u1 (p) =
@@ -4724,7 +4724,7 @@ x1
 x2
 x3
 
- ,
+,
 ∂F
 ∂u2 (p) =
 
@@ -4743,31 +4743,31 @@ z1
 z2
 z3
 
- mit
-z1 = x2y3 − x3y2
-z2 = x3y1 − x1y3
-z3 = x1y2 − x2y1
-⇒ ∥ ∂F
+mit
+z1 = x2y3 −x3y2
+z2 = x3y1 −x1y3
+z3 = x1y2 −x2y1
+⇒∥∂F
 ∂u1
 (p) × ∂F
 ∂u2
-(p)∥ = z2
+(p)∥= z2
 1 + z2
 2 + z2
 3
 
 96
 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-det(IS) = g1,1g2,2 − g2
+det(IS) = g1,1g2,2 −g2
 1,2
 =
-�
+*
 
 x1
 x2
 x3
 
- ,
+,
 
 
 x1
@@ -4775,13 +4775,13 @@ x2
 x3
 
 
-� �
++ *
 
 y1
 y2
 y3
 
- ,
+,
 
 
 y1
@@ -4789,15 +4789,15 @@ y2
 y3
 
 
-�
++
 −
-�
+*
 
 x1
 x2
 x3
 
- ,
+,
 
 
 y1
@@ -4805,78 +4805,78 @@ y2
 y3
 
 
-�2
++2
 = (x2
 1 + x2
 2 + x2
 3)(y2
 1 + y2
 2 + y2
-3) − (x1y1 + x2y2 + x3y3)2
+3) −(x1y1 + x2y2 + x3y3)2
 Deﬁnition 77
 a) Das Diﬀerential dA =
-�
+p
 det(I)du1du2 heißt Flächenelement von S bzgl. der Para-
 metrisierung F.
-b) Für eine Funktion f : V → R heißt
-�
+b) Für eine Funktion f : V →R heißt
+Z
 V
 fdA :=
-�
+Z
 U
 f(F(u1, u2)
-�
-��
-�
+|
+{z
+}
 =:s
 )
-�
+p
 det I(s)du1du2
 der Wert des Integrals von f über V , falls das Integral rechts existiert.
 Bemerkung 82
 a)
-�
+R
 V fdA ist unabhängig von der gewählten Parametrisierung.
-b) Sei f : S → R eine Funktion, die im Sinne von Deﬁnition 77.b lokal integrierbar ist.
+b) Sei f : S →R eine Funktion, die im Sinne von Deﬁnition 77.b lokal integrierbar ist.
 Dann ist
-�
+R
 S fdA wohldeﬁniert, falls (z. B.) S kompakt ist.
 Etwa:
-�
+Z
 S
 fdA =
 n
-�
+X
 i=1
-�
+Z
 Vi
 fdA
 −
-�
+X
 i̸=j
-�
+Z
 Vi∩Vj
 fdA
 +
-�
+X
 i,j,k
-�
+Z
 Vi∩Vj∩Vk
 fdA
-− . . .
+−. . .
 Beweis:
 a) Mit Transformationsformel.
 b) Ist dem Leser überlassen.
 Proposition 5.1
-Sei S ⊆ R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S2.
+Sei S ⊆R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S →S2.
 Dann gilt:
-a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S2 durch
+a) n induziert für jedes s ∈S eine lineare Abbildung dsn : TsS →Tn(s)S2 durch
 dsn(x) = d
 dtn(s„+“tx
-� �� �
+| {z }
 Soll auf Fläche S bleiben
 )
-���
+
 t=0
 Die Abbildung dsn heißt Weingarten-Abbildung
 
@@ -4891,110 +4891,110 @@ Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt.
 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
 Beweis:
 a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-b) Tn(S)S2 = ⟨n(s)⟩⊥ = TsS
+b) Tn(S)S2 = ⟨n(s)⟩⊥= TsS
 c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus.
-d) Zu zeigen: ∀x, y ∈ IsS : ⟨x, dsn(y)⟩ = ⟨dsn(x), y⟩
+d) Zu zeigen: ∀x, y ∈IsS : ⟨x, dsn(y)⟩= ⟨dsn(x), y⟩
 Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
 Basisvektoren zu zeigen.
 Sei xi = DpF(ei) = ∂F
 ∂ui (p)
 i = 1, 2
-Beh.: ⟨xi, dsn(xj)⟩ = ⟨
+Beh.: ⟨xi, dsn(xj)⟩= ⟨
 ∂2F
 ∂ui∂uj (p), dsn(xi)⟩
-⇒ ⟨
+⇒⟨
 ∂2F
-∂ui∂uj (p), dsn(xi)⟩ = ⟨xj, dsn(xi)⟩
+∂ui∂uj (p), dsn(xi)⟩= ⟨xj, dsn(xi)⟩
 Bew.:
 0 =
 ⟨∂F
 ∂u (p + tej), n(p + tej)⟩
-⇒ 0 = d
+⇒0 = d
 dt
-�
+
 ⟨∂F
 ∂u (p + tej), n(p + tej)⟩
-����
+
 t=0
-= ⟨ d
+= ⟨d
 dt
 ∂F
 ∂ui
 (p + tej)
-�
-��
-�
+|
+{z
+}
 ∂2F
 ∂uj∂ui (p)
-���
-t=0, n(s)⟩ + ⟨xi, dsn DpF(ej)
-�
-��
-�
+
+t=0, n(s)⟩+ ⟨xi, dsn DpF(ej)
+|
+{z
+}
 xj
 ⟩
 Deﬁnition 78
 Die durch −dsn deﬁnierte symmetrische Bilinearform auf TsS heißt zweite Fundamental-
 form von S in s bzgl. F.
-Man schreibt: IIs(x, y) = ⟨−dsn(x), y⟩ = Is(−dsn(x), y)
+Man schreibt: IIs(x, y) = ⟨−dsn(x), y⟩= Is(−dsn(x), y)
 Bemerkung 83
 Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix
 (h(s)
-i,j )i,j=1,2 mit hi,j(s) = ⟨ ∂2F
+i,j )i,j=1,2 mit hi,j(s) = ⟨∂2F
 ∂ui∂uj
 (p), n(s)⟩
 Proposition 5.2
-Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt:
+Sei γ : [−ε, ε] →S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt:
 κNor(s, γ) = IIs(γ′(0), γ′(0))
 Beweis: Nach Deﬁnition 74 ist κNor(s, γ) = ⟨γ′′(0), n(s)⟩. Nach Voraussetzung gilt
-n(γ(t)) ⊥ γ′(t) ⇔ ⟨γ′′(0), n(s)⟩ = 0
+n(γ(t)) ⊥γ′(t) ⇔⟨γ′′(0), n(s)⟩= 0
 Die Ableitung nach t ergibt
 0 = d
 dt(⟨n(γ(t)), γ′(t))
 =
-� d
+ d
 dtn(γ(t))
-���
+
 t=0, γ′(0)
-�
+
 + ⟨n(s), γ′′(0)⟩
 
 99
 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-= ⟨dsn(γ′(0)), γ′(0)⟩ + κNor(s, γ)
+= ⟨dsn(γ′(0)), γ′(0)⟩+ κNor(s, γ)
 = −IIs(γ′(0), γ′(0)) + κNor(s, γ)
 Folgerung 5.3
 Die beiden Deﬁnitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein:
 κNor(s, γ) = κNor(s, γ′(0))
 Satz 5.4
-Sei S ⊆ R3 eine reguläre, orientierbare Fläche und s ∈ S.
+Sei S ⊆R3 eine reguläre, orientierbare Fläche und s ∈S.
 a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs.
 b) Für die Gauß-Krümmung gilt: K(s) = det(IIs)
 Beweis:
 a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von
-IIs. Ist x ∈ TsS, ∥x∥ = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2.
+IIs. Ist x ∈TsS, ∥x∥= 1, so gibt es ϕ ∈[0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2.
 Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi, yi) = λi. Dann gilt:
 IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2
-= (1 − sin2 ϕ)λ1 + sin2 ϕλ2
-= λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1
-= cos2 ϕ + (1 − cos2 ϕ)λ2
-= λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2
+= (1 −sin2 ϕ)λ1 + sin2 ϕλ2
+= λ1 + sin2 ϕ(λ2 −λ1) ≥λ1
+= cos2 ϕ + (1 −cos2 ϕ)λ2
+= λ2 −cos2 ϕ(λ2 −λ1) ≤λ2
 Prop. 5.2
-=====⇒ λ1 = min
-�
+=====⇒λ1 = min
+
 κNor(s, x)
-�� x ∈ T 1
+ x ∈T 1
 s S
-�
+	
 λ2 = max
-�
+
 κNor(s, x)
-�� x ∈ T 1
+ x ∈T 1
 s S
-�
+	
 Satz 5.5 (Satz von Gauß-Bonnet)
-Sei S ⊆ R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:
-�
+Sei S ⊆R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:
+Z
 S
 K(s)dA = 2πχ(S)
 Dabei ist χ(S) die Euler-Charakteristik von S.
@@ -5004,33 +5004,33 @@ Christian Bär (2. Auﬂage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen w
 Lösungen der Übungsaufgaben
 Lösung zu Aufgabe 1
 Teilaufgabe a) Es gilt:
-(i) ∅, X ∈ TX.
+(i) ∅, X ∈TX.
 (ii) TX ist oﬀensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈
-TX : U1 ∩ U2 ∈ TX.
+TX : U1 ∩U2 ∈TX.
 (iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine
-beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I : �
-i∈I Ui ∈ TX
+beliebige Indexmenge I und alle Ui ∈TX für alle i ∈I : S
+i∈I Ui ∈TX
 Also ist (X, TX) ein topologischer Raum.
 Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x ̸= y und die einzige Umgebung von x
-ist X. Da y = 0 ∈ X können also x und y nicht durch oﬀene Mengen getrennt werden.
+ist X. Da y = 0 ∈X können also x und y nicht durch oﬀene Mengen getrennt werden.
 (X, TX) ist also nicht hausdorﬀsch.
 Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorﬀsch. Da (X, TX) nach
 (b) nicht hausdorﬀsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, TX)
 kein metrischer Raum sein kann.
 Lösung zu Aufgabe 2
 Teilaufgabe a)
-Beh.: ∀a ∈ Z : { a } ist abgeschlossen.
-Sei a ∈ Z beliebig. Dann gilt:
+Beh.: ∀a ∈Z : { a } ist abgeschlossen.
+Sei a ∈Z beliebig. Dann gilt:
 Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de
 schicken.
 Teilaufgabe b)
 Beh.: { −1, 1 } ist nicht oﬀen
 Bew.: durch Widerspruch
 Annahme: { −1, 1 } ist oﬀen.
-Dann gibt es T ⊆ B, sodass �
-M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele
+Dann gibt es T ⊆B, sodass S
+M∈T M = { −1, 1 }. Aber alle U ∈B haben unendlich viele
 Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente
-⇒ keine endliche nicht-leere Menge kann in dieser Topologie oﬀen sein ⇒ { −1, 1 } ist
+⇒keine endliche nicht-leere Menge kann in dieser Topologie oﬀen sein ⇒{ −1, 1 } ist
 nicht oﬀen.
 ■
 Teilaufgabe c)
@@ -5039,97 +5039,96 @@ Beh.: Es gibt unendlich viele Primzahlen.
 101
 Lösungen der Übungsaufgaben
 Bew.: durch Widerspruch
-Annahme: Es gibt nur endlich viele Primzahlen p ∈ P
+Annahme: Es gibt nur endlich viele Primzahlen p ∈P
 Dann ist
 Z \ { −1, +1 } FS d. Arithmetik
 =
-�
+[
 p∈P
 U0,p
 endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich.
 ■
 Lösung zu Aufgabe 3
 (a) Beh.: Die oﬀenen Mengen von P sind Vereinigungen von Mengen der Form
-�
+Y
 j∈J
 Uj ×
-�
+Y
 i∈N,i̸=j
 Pi
-wobei J ⊆ N endlich und Uj ⊆ Pj oﬀen ist.
+wobei J ⊆N endlich und Uj ⊆Pj oﬀen ist.
 Beweis: Nach Deﬁnition der Produkttopologie bilden Mengen der Form
-�
+Y
 i∈J
 Uj ×
-�
+Y
 i∈N\J
 Pi
-wobei J ⊆ N endlich und Uj ⊆ Pj oﬀen ∀j ∈ J eine Basis der Topologie.
+wobei J ⊆N endlich und Uj ⊆Pj oﬀen ∀j ∈J eine Basis der Topologie.
 Damit sind die oﬀenen Mengen von P Vereinigungen von Mengen der obigen
 Form.
 ■
 (b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig.
-Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs-
-komponente Z ⊆ P. Da Z zusammenhängend ist und ∀i ∈ I : pi : P → Pi ist
-stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden
-Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder
-pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für
-alle i ∈ N. Dann gilt also:
+Beweis: Es seinen x, y ∈P und x sowie y liegen in der gleichen Zusammenhangs-
+komponente Z ⊆P. Da Z zusammenhängend ist und ∀i ∈I : pi : P →Pi ist
+stetig, ist pi(Z) ⊆Pi zusammenhängend für alle i ∈N. Die zusammenhängenden
+Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈N gilt entweder
+pi(Z) ⊆{ 0 } oder pi(Z) ⊆{ 1 }. Es sei zi ∈{ 0, 1 } so, dass pi(Z) ⊆{ zi } für
+alle i ∈N. Dann gilt also:
 pi(x)
-� �� �
+| {z }
 =xi
 = zi = pi(y)
-� �� �
+| {z }
 =yi
-∀i ∈ N
+∀i ∈N
 Somit folgt: x = y
 ■
 Lösung zu Aufgabe 4
 (a) Beh.: GLn(R) ist nicht kompakt.
-Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 }
+Bew.: det : GLn(R) →R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 }
 nicht kompakt. 22
-⇒ GLn(R) ist nicht kompakt.
+⇒GLn(R) ist nicht kompakt.
 ■
 (b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt.
 Bew.: Für SL1(R) gilt: SL1(R) =
-�
-A ∈ R1×1 �� det A = 1
-�
+
+A ∈R1×1  det A = 1
+	
 =
-�
-1
-� ∼= { 1 }. 22
-⇒ SL1(R)
+ 1
+ ∼= { 1 }. 22
+⇒SL1(R)
 ist kompakt.
 
 102
 Lösungen der Übungsaufgaben
-SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des Rn2 identiﬁzieren. Nach Satz 1.1
+SLn(R) ⊆GLn(R) lässt sich mit einer Teilmenge des Rn2 identiﬁzieren. Nach Satz 1.1
 sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Deﬁniere
-nun für für n ∈ N≥2, m ∈ N:
+nun für für n ∈N≥2, m ∈N:
 Am = diagn(m, 1
 m, . . . , 1)
-Dann gilt: det Am = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da ∥Am∥∞ =
+Dann gilt: det Am = 1, d. h. Am ∈SLn(R), und Am ist unbeschränkt, da ∥Am∥∞=
 m −−−−→
-m→∞ ∞.
+m→∞∞.
 ■
 (c) Beh.: P(R) ist kompakt.
 Bew.: P(R) ∼= Sn/x∼−x. Per Deﬁnition der Quotiententopologie ist die Klassenabbil-
 dung stetig. Da Sn als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt
 ist 22
-⇒ P(R) ist kompakt.
+⇒P(R) ist kompakt.
 ■
 Lösung zu Aufgabe 5
 Die Deﬁnition von Homöomorphismus kann auf Seite 9 nachgelesen werden.
 Deﬁnition 79
-Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
+Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G →H eine Abbildung.
 ϕ heißt Homomorphismus, wenn
-∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2)
+∀g1, g2 ∈G : ϕ(g1 ∗g2) = ϕ(g1) ◦ϕ(g2)
 gilt.
 Es folgt direkt:
 1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R, +). Dann ist ϕ1 ein
 Gruppenhomomorphismus und ein Homöomorphismus.
-2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G → H, x �→ x mod 3 ein
+2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G →H, x 7→x mod 3 ein
 Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomor-
 phismus.
 3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine
@@ -5141,7 +5140,7 @@ Lösung zu Aufgabe 6
 Die Deﬁnition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf
 Seite 6.
 Deﬁnition 80
-Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung.
+Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G →H eine Abbildung.
 ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist.
 Eine Isotopie ist also für Knoten deﬁniert, Isometrien machen nur in metrischen Räumen
 Sinn und ein Isomorphismus benötigt eine Gruppenstruktur.
@@ -5150,82 +5149,82 @@ Sinn und ein Isomorphismus benötigt eine Gruppenstruktur.
 Lösungen der Übungsaufgaben
 Lösung zu Aufgabe 7
 (a) Vor.: Sei M eine topologische Mannigfaltigkeit.
-Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend
+Beh.: M ist wegzusammehängend ⇔M ist zusammenhängend
 Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung
 direkt aus Bemerkung 23.
-„⇐“: Seien x, y ∈ M und
-Z := { z ∈ M | ∃Weg von x nach z }
+„⇐“: Seien x, y ∈M und
+Z := { z ∈M | ∃Weg von x nach z }
 Es gilt:
 (i) Z ̸= ∅, da M lokal wegzusammenhängend ist
 (ii) Z ist oﬀen, da M lokal wegzusammenhängend ist
-(iii) ZC := { ˜z ∈ M | ∄Weg von x nach ˜z } ist oﬀen
-Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ ZC eine oﬀene und
-wegzusammenhängende Umgebung U˜z ⊆ M.
-Es gilt sogar U˜z ⊆ ZC, denn gäbe es ein U˜z ∋ z ∈ Z, so gäbe es Wege γ2 :
-[0, 1] → M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M, γ1(0) = ˜z, γ1(1) = z.
+(iii) ZC := { ˜z ∈M | ∄Weg von x nach ˜z } ist oﬀen
+Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ZC eine oﬀene und
+wegzusammenhängende Umgebung U˜z ⊆M.
+Es gilt sogar U˜z ⊆ZC, denn gäbe es ein U˜z ∋z ∈Z, so gäbe es Wege γ2 :
+[0, 1] →M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] →M, γ1(0) = ˜z, γ1(1) = z.
 Dann wäre aber
-γ : [0, 1] → M,
+γ : [0, 1] →M,
 γ(x) =
-�
+(
 γ1(2x)
-falls 0 ≤ x ≤ 1
+falls 0 ≤x ≤1
 2
-γ2(2x − 1)
+γ2(2x −1)
 falls 1
-2 < x ≤ 1
-ein stetiger Weg von ˜z nach x ⇒ Widerspruch.
+2 < x ≤1
+ein stetiger Weg von ˜z nach x ⇒Widerspruch.
 Da M zusammenhängend ist und M =
 Z
-����
+|{z}
 oﬀen
-∪ ZC
-����
+∪ZC
+|{z}
 oﬀen
-, sowie Z ̸= ∅ folgt ZC = ∅.
+, sowie Z ̸= ∅folgt ZC = ∅.
 Also ist M = Z wegzusammenhängend.
 ■
 (b) Beh.: X ist wegzusammenhängend.
-Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R.
+Beweis: X := (R \ { 0 }) ∪{ 01, 02 } und (R \ { 0 }) ∪{ 02 } sind homöomorph zu R.
 Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte
 01 und 02.
 Da (R\{ 0 })∪{ 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem
-beliebigen Punkt a ∈ R \ { 0 }.
-Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein
+beliebigen Punkt a ∈R \ { 0 }.
+Da (R \ { 0 }) ∪{ 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein
 Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach
 02.
 ■
 Lösung zu Aufgabe 9
-Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈ X und △ABC ein Dreieck.
+Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈X und △ABC ein Dreieck.
 
 104
 Lösungen der Übungsaufgaben
-(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB
+(a) Beh.: AB ∼= AC ⇒∠ABC ∼= ∠ACB
 Bew.: Sei AB ∼= AC.
-⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
-⇒ ϕ(∠ABC) = ∠ACB
-⇒ ∠ABC ∼= ∠ACB
+⇒∃Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
+⇒ϕ(∠ABC) = ∠ACB
+⇒∠ABC ∼= ∠ACB
 ■
 (b) Beh.: Der längeren Seite von △ABC liegt der größere Winkel gegenüber und umge-
 kehrt.
-Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C′ ∈ AC+ mit d(A, C′) = d(A, B)
-⇒ C′ liegt zwischen A und C.
+Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C′ ∈AC+ mit d(A, C′) = d(A, B)
+⇒C′ liegt zwischen A und C.
 Es gilt ∡ABC′ < ∡ABC und aus Aufgabe 9 (a) folgt: ∡ABC′ = ∡AC′B.
 ∠BC′A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66
-=====⇒ ∡BC′A > ∡BCA
-⇒ ∡BCA < ∡BC′A = ∡ABC′ < ∡ABC Sei umgekehrt ∡ABC > ∡BCA, kann
+=====⇒∡BC′A > ∡BCA
+⇒∡BCA < ∡BC′A = ∡ABC′ < ∡ABC Sei umgekehrt ∡ABC > ∡BCA, kann
 wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B) > d(A, C) gelten.
 Wegen Aufgabe 9 (a) kann nicht d(A, B) = d(A, C) gelten.
-⇒ d(A, B) < d(A, C)
+⇒d(A, B) < d(A, C)
 ■
-(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g
+(c) Vor.: Sei g eine Gerade, P ∈X und P /∈g
 Beh.: ∃! Lot
 Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden
 Halbebenen bzgl. g.
-⇒ ϕ(P)P schneidet g in F.
+⇒ϕ(P)P schneidet g in F.
 Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g
-⇒ ϕ(P)P schneidet g in F.
-Sei A ∈ g\{ F }. Dann gilt ϕ(∠AFP) = ∠AFϕ(P) = π ⇒ ∠AFP ist rechter Winkel.
-Gäbe es nun G ∈ g \ { F }, so dass PG weiteres Lot von P auf g ist, wäre △PFG
+⇒ϕ(P)P schneidet g in F.
+Sei A ∈g\{ F }. Dann gilt ϕ(∠AFP) = ∠AFϕ(P) = π ⇒∠AFP ist rechter Winkel.
+Gäbe es nun G ∈g \ { F }, so dass PG weiteres Lot von P auf g ist, wäre △PFG
 ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
 ·
 ·
@@ -5236,16 +5235,16 @@ F
 g
 Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P
 Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π
-⇒ G gibt es nicht.
+⇒G gibt es nicht.
 ■
 Lösung zu Aufgabe 10
-Sei f ∥ h und o. B. d. A. f ∥ g.
-f ∦ h ⇒ f ∩ h ̸= ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
-zu g durch x, da x /∈ g. Diese ist f, da x ∈ f und f ∥ g. Da aber x ∈ h, kann h nicht
+Sei f ∥h und o. B. d. A. f ∥g.
+f ∦h ⇒f ∩h ̸= ∅, sei also x ∈f ∩h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
+zu g durch x, da x /∈g. Diese ist f, da x ∈f und f ∥g. Da aber x ∈h, kann h nicht
 
 105
 Lösungen der Übungsaufgaben
-parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f ̸= h). ⇒ g ∦ h ■
+parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f ̸= h). ⇒g ∦h ■
 Lösung zu Aufgabe 11
 Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem △ABC und △A′B′C′
 Dreiecke, für die gilt:
@@ -5257,7 +5256,7 @@ bzgl. AB wie C. Diese Isometrie existiert wegen §4.
 Es gilt d(A, C) = d(A′, C′) = d(ϕ(A′), ϕ(C′)) = d(A, ϕ(C′)) und d(B, C) = d(B′, C′) =
 d(ϕ(B′), ϕ(C′)) = d(B, ϕ(C′)).
 Bem. 62
-=====⇒ C = ϕ(C).
+=====⇒C = ϕ(C).
 Es gilt also ϕ(△A′B′C′) = △ABC.
 ■
 
@@ -5312,42 +5311,42 @@ benötigten Begriﬀe deﬁniert und erklärt werden. Die folgenden Begriﬀe wu
 aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra
 und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Deﬁnitionen bereitstellen.
 Deﬁnition 81
-Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 }
-mit xn → x0.
+Sei D ⊆R und x0 ∈R. x0 heißt ein Häufungspunkt von D :⇔∃Folge xn in D \ { x0 }
+mit xn →x0.
 Folgende Deﬁnition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra
 entnommen:
 Deﬁnition 82
 Es seien V und W K-Vektorräume und A(V ) und A(W) die zugehörigen aﬃnen Räume.
-Eine Abbildung f : V → W heißt aﬃn, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1
+Eine Abbildung f : V →W heißt aﬃn, falls für alle a, b ∈V und alle λ, µ ∈K mit λ+µ = 1
 gilt:
 f(λa + µb) = λf(a) + µf(b)
 Deﬁnition 83
-Sei V ein Vektorraum und S ⊆ V eine Teilmenge.
+Sei V ein Vektorraum und S ⊆V eine Teilmenge.
 S heißt eine Orthonormalbasis von V , wenn gilt:
 (i) S ist eine Basis von V
-(ii) ∀v ∈ S : ∥v∥ = 1
-(iii) ∀v1, v2 ∈ S : v1 ̸= v2 ⇒ ⟨v1, v2⟩ = 0
+(ii) ∀v ∈S : ∥v∥= 1
+(iii) ∀v1, v2 ∈S : v1 ̸= v2 ⇒⟨v1, v2⟩= 0
 Satz (Zwischenwertsatz)
 Sei a < b und f ∈
-C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder
-f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0.
+C[a, b] := C([a, b]), weiter sei y0 ∈R und f(a) < y0 < f(b) oder
+f(b) < y0 < f(a). Dann existiert ein x0 ∈[a, b] mit f(x0) = y0.
 Deﬁnition 84
-Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung.
-v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv.
-Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f.
+Sei V ein Vektorraum über einem Körper K und f : V →V eine lineare Abbildung.
+v ∈V \ { 0 } heißt Eigenvektor :⇔∃λ ∈K : f(v) = λv.
+Wenn ein solches λ ∈K existiert, heißt es Eigenwert von f.
 Satz (Binomischer Lehrsatz)
-Sei x, y ∈ R. Dann gilt:
+Sei x, y ∈R. Dann gilt:
 (x + y)n =
 n
-�
+X
 k=0
-�n
+n
 k
-�
+
 xn−kyk
-∀n ∈ N0
+∀n ∈N0
 Deﬁnition 85
-Seien a, b ∈ R3 Vektoren.
+Seien a, b ∈R3 Vektoren.
 a × b :=
 
 
@@ -5355,19 +5354,19 @@ a1
 b3
 a3
 
- ×
+×
 
 
 a1
 b3
 a3
 
- =
+=
 
 
-a2b3 − a3b2
-a3b1 − a1b3
-a1b2 − a2b1
+a2b3 −a3b2
+a3b1 −a1b3
+a1b2 −a2b1
 
 
 
@@ -5385,14 +5384,14 @@ Rand der Menge M
 M◦
 Inneres der Menge M
 A × B Kreuzprodukt
-A ⊆ B Teilmengenbeziehung
-A ⊊ B echte Teilmengenbeziehung
+A ⊆B Teilmengenbeziehung
+A ⊊B echte Teilmengenbeziehung
 A \ B
 Diﬀerenzmenge
-A ∪ B Vereinigung
-A ˙∪ B
+A ∪B Vereinigung
+A ˙∪B
 Disjunkte Vereinigung
-A ∩ B
+A ∩B
 Schnitt
 Geometrie
 AB
@@ -5425,14 +5424,14 @@ Permutationsgruppe
 Sym(X)
 Symmetrische Gruppe
 Wege
-Sei γ : I → X ein Weg.
+Sei γ : I →X ein Weg.
 [γ]
 Homotopieklasse von γ
-γ1 ∗ γ2
+γ1 ∗γ2
 Zusammenhängen von Wegen
-γ1 ∼ γ2 Homotopie von Wegen
+γ1 ∼γ2 Homotopie von Wegen
 γ(x)
-Inverser Weg, also γ(x) := γ(1 − x)
+Inverser Weg, also γ(x) := γ(1 −x)
 C
 Bild eines Weges γ, also C :=
 γ([0, 1])
@@ -5448,9 +5447,9 @@ A
 Atlas
 P
 Projektiver Raum
-⟨·, ·⟩ Skalarprodukt
-X/∼ X modulo ∼
-[x]∼ Äquivalenzklassen von x bzgl. ∼
+⟨·, ·⟩Skalarprodukt
+X/∼X modulo ∼
+[x]∼Äquivalenzklassen von x bzgl. ∼
 ∥x∥
 Norm von x
 |x|
@@ -5461,7 +5460,7 @@ Sn
 Sphäre
 T n
 Torus
-f ◦ g
+f ◦g
 Verkettung von f und g
 πX
 Projektion auf X
@@ -5493,45 +5492,45 @@ Symbolverzeichnis
 Zahlenmengen
 N = { 1, 2, 3, . . . }
 Natürliche Zahlen
-Z = N ∪ { 0, −1, −2, . . . }
+Z = N ∪{ 0, −1, −2, . . . }
 Ganze Zahlen
 Q = Z ∪
-� 1
+ 1
 2, 1
 3, 2
 3
-�
+	
 =
-� z
-n mit z ∈ Z und n ∈ Z \ { 0 }
-�
+ z
+n mit z ∈Z und n ∈Z \ { 0 }
+	
 Rationale Zahlen
 R = Q ∪
-� √
+ √
 2, −
 3√
 3, . . .
-�
+	
 Reele Zahlen
 R+ Echt positive reele Zahlen
 Rn
-+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 }
++,0 := { (x1, . . . , xn) ∈Rn | xn ≥0 }
 Halbraum
 R× = R \ { 0 } Einheitengruppe von R
-C = { a + ib | a, b ∈ R }
+C = { a + ib | a, b ∈R }
 Komplexe Zahlen
 P = { 2, 3, 5, 7, . . . }
 Primzahlen
-H = { z ∈ C | ℑz > 0 }
+H = { z ∈C | ℑz > 0 }
 obere Halbebene
-I = [0, 1] ⊊ R
+I = [0, 1] ⊊R
 Einheitsintervall
-f : S1 �→ R2 Einbettung der Kreislinie in die Ebene
+f : S1 ,→R2 Einbettung der Kreislinie in die Ebene
 π1(X, x)
-Fundamentalgruppe im topologischen Raum X um x ∈ X
+Fundamentalgruppe im topologischen Raum X um x ∈X
 Fix(f)
 Menge der Fixpunkte der Abbildung f
-∥ · ∥2
+∥· ∥2
 2-Norm; Euklidische Norm
 κ
 Krümmung
@@ -5540,9 +5539,9 @@ Normalenkrümmung
 V (f)
 Nullstellenmenge von f2
 Krümmung
-DpF : R2 → R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
+DpF : R2 →R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
 TsS
-Tangentialebene an S ⊆ R3 durch s ∈ S
+Tangentialebene an S ⊆R3 durch s ∈S
 dsn(x)
 Weingarten-Abbildung
 2von Vanishing Set
diff --git a/read/results/pypdf/1601.03642.txt b/read/results/pypdf/1601.03642.txt
index 16681de..6659a55 100644
--- a/read/results/pypdf/1601.03642.txt
+++ b/read/results/pypdf/1601.03642.txt
@@ -2,7 +2,7 @@
 Creativity in Machine Learning
 Martin Thoma
 E-Mail: info@martin-thoma.de
-Abstract —Recent machine learning techniques can be modified
+Abstract—Recent machine learning techniques can be modified
 to produce creative results. Those results did not exist before; it
 is not a trivial combination of the data which was fed into the
 machine learning system. The obtained results come in multiple
@@ -42,25 +42,30 @@ a lot of data has become available. The idea of machine learning
 is to make use of this data.
 A formal definition of the field of Machine Learning is given
 by Tom Mitchel [Mit97]:
-A computer program is said to learn from experienceEwith
- respect to some class of tasks Tand
+A computer program is said to learn from experience
+ E with respect to some class of tasks T and
 performance measure P, if its performance at tasks
-inT, as measured by P, improves with experience E.Σϕx0
+in T, as measured by P, improves with experience E.
+Σ ϕ
+x0
 x1
 x2
 x3
-xnw0
+xn
+w0
 w1
 w2
 w3
-wn...
-(a)Example of an artificial neuron unit.
-xiare the input signals and wiare
+wn
+...
+(a) Example of an artificial neuron unit.
+xi are the input signals and wi are
 weights which have to get learned.
 Each input signal gets multiplied
 with its weight, everything gets
-summed up and the activation functionϕis
- applied.(b)A visualization of a simple feedforward
+summed up and the activation function
+ ϕ is applied.
+(b) A visualization of a simple feedforward
  neural network. The 5 input
  nodes are red, the 2 bias nodes
 are gray, the 3 hidden units are
@@ -94,20 +99,21 @@ of time required to understand such a complex system from
 basic building blocks is a time-intensive and difficult task.
 An important group of machine learning algorithms was
 inspired by biological neurons and are thus called artificial
-neural networks . Those networks are based on mathematical
-functions called artificial neurons which take n∈Nnumbersx1,
- . . . , x n∈Ras input, multiply them with weights
-w1, . . . , w n∈R, add them and apply a so called activation
-function ϕas visualized in Figure 1(a). One example of such
-an activation function is the sigmoid function ϕ(x) =1
-1+e−x.
+neural networks. Those networks are based on mathematical
+functions called artificial neurons which take n ∈N numbers
+ x1, . . . , xn ∈R as input, multiply them with weights
+w1, . . . , wn ∈R, add them and apply a so called activation
+function ϕ as visualized in Figure 1(a). One example of such
+an activation function is the sigmoid function ϕ(x) = 1
+1+e−x .
 Those functions act as building blocks for more complex
 systems as they can be chained and grouped in layers as
 visualized in Figure 1(b). The interesting question is how
-the parameters wiare learned. This is usually done by an
-optimization technique called gradient descent . The gradient
+the parameters wi are learned. This is usually done by an
+optimization technique called gradient descent. The gradient
 descent algorithm takes a function which has to be derivable,
-starts at any point of the surface of this error function andarXiv:1601.03642v1  [cs.CV]  12 Jan 2016
+starts at any point of the surface of this error function and
+arXiv:1601.03642v1  [cs.CV]  12 Jan 2016
 
 makes a step in the direction which goes downwards. Hence
 it tries to find a minimum of this high-dimensional function.
@@ -118,7 +124,7 @@ III. I MAGE DATA
 Applying a simple neural network on image data directly can
 work, but the number of parameters gets extraordinary large.
 One would take one neuron per pixel and channel. This means
-for500px×500pxRGB images one would get 750,000input
+for 500 px ×500 px RGB images one would get 750,000 input
 signals. To approach this problem, so called Convolutional
 Neural Networks (CNNs) were introduced. Instead of learning
 the full connection between the input layer and the first
@@ -158,7 +164,7 @@ might be chosen is because neural networks are structured
 in layers. Recent publications tend to have more and more
 layers [HZRS15]. The used jargon is to say they get “deeper”.
 As this technique as published by Google engineers, the
-technique is called Google DeepDream .
+technique is called Google DeepDream.
 Fig. 2: Aurelia aurita
 Fig. 3: DeepDream impression of Aurelia aurita
 It has become famous in the internet [Red]. Usually, the images
@@ -182,14 +188,14 @@ different artists to an arbitrary image of their choice.
 
 (a) Original Image
  (b) Style image
-(c)The artistic style of Van Gogh’s “Starry Night” applied to the photograph
+(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph
 of a Scottish Highland Cattle.
 Fig. 4: The algorithm takes both, the original image and the
 style image to produce the result.
 This artistic style imitation can be seen itself as creative work.
 An example is given by Figure 4. The code which created this
 example is available under [Joh16].
-Something similar was done by [SPB+14], where the style of
+Something similar was done by [SPB +14], where the style of
 a portrait photograph was transferred to another photograph.
 A demo can be seen on [Shi14].
 C. Drawing Robots
@@ -198,7 +204,7 @@ AIKON (Automatic IKONic drawing) which can automatically
 generated sketches for portraits [TL05]. AIKON takes a digital
 photograph, detects faces on them and sketches them with a
 pen-plotter.
-Tresset and Leymaire use k-means clustering [KMN+02] to
+Tresset and Leymaire use k-means clustering [KMN +02] to
 segment regions of the photograph with similar color which,
 in turn, will get a similar shading.
 Such a drawing robot could apply machine learning techniques
@@ -207,7 +213,8 @@ could apply self-learning techniques to draw results most
 similar to the artists impression of the image. However, the
 system described in [TL05] seems not to be a machine
 learning computer program according to the definition by Tom
-Mitchell [Mit97].IV. T EXT DATA
+Mitchell [Mit97].
+IV. T EXT DATA
 Digital text is the first form of natural communication which
 involved computers. It is used in the form of chats, websites,
 on collaborative projects like Wikipedia, in scientific literature.
@@ -218,7 +225,7 @@ This list could be continued and most of these kinds of texts
 are now available in digital form. This digital form can be
 used to teach machines to generate similar texts.
 The most simple language model which is of use is an n-gram
-model. This model makes use of sequences of the length nto
+model. This model makes use of sequences of the length n to
 model language. It can be used to get the probability of a third
 word, given the previous two words. This way, a complete text
 can be generated word by word. Refinements and extensions
@@ -251,7 +258,7 @@ A. Similar Texts Generation
 Karpathy trained multiple character RNNs on different datasets
 and gave an excellent introduction [Kar15b]. He trained it on
 Paul Graham’s essays, all the works of Shakespeare, the Hutter
-Prize [hut] 100MB dataset of raw Wikipedia articles, the raw
+Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw
 LATEX source file of a book about algebraic stacks and geometry
 and Linux C code.
 With that training data, the models can generate similar texts.
@@ -299,16 +306,17 @@ subtitles of movies as well as the astonishing increase in
 computing power to train RNNs and language models similar
 to the ones described before.
 Interesting results like the following were obtained by [VL15]:
-Human : what is the purpose of life ?
-Machine : to serve the greater good .
-Human : what is the purpose of living ?
-Machine : to live forever .
+Human: what is the purpose of life ?
+Machine: to serve the greater good .
+Human: what is the purpose of living ?
+Machine: to live forever .
 V. A UDIO DATA
 Common machine learning tasks which involve audio data
 are speech recognition, speaker identification, identification of
 songs. This leads to some less-common, but interesting topics:
 The composition of music, the synthesizing of audio as art.
-While the composition might be considered in Section IV,we will now investigate the work which was done in audio
+While the composition might be considered in Section IV,
+we will now investigate the work which was done in audio
 synthesization.
 A. Emily Howell
 David Cope created a project called “Experiments in Musical
@@ -344,12 +352,12 @@ Recurrent neural networks — LSTM networks, to be exact
 (GRU) to build a network which can be trained to generate
 music. Instead of taking notes directly or MIDI files, Nayebi
 and Vitelli took raw audio waveforms as input. Those audio
-waveforms are feature vectors given for time steps 0,1, . . . , t−
-1, t. The network is given those feature vectors X1, . . . , X t
+waveforms are feature vectors given for time steps 0, 1, . . . , t−
+1, t. The network is given those feature vectors X1, . . . , Xt
 and has to predict the following feature vector Xt+1. This
 means it continues the music. As the input is continuous, the
 problem was modeled as a regression task. Discrete Fourier
-Transformation (DFT) was used on chunks of length Nof the
+Transformation (DFT) was used on chunks of length N of the
 music to obtain features in the frequency domain.
 An implementation can be found at [VN15] and a demonstration
 can be found at [Vit15].
@@ -387,10 +395,10 @@ REFERENCES
 [Cop05] ——, Computer models of musical creativity . MIT Press
 Cambridge, 2005.
 [Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online].
-Available: https://www.youtube.com/watch?v=jLR- cuCwI
+Available: https://www.youtube.com/watch?v=jLR- c uCwI
 [Cop13] ——, “The well-programmed clavier: Style in computer music
 composition,” XRDS: Crossroads, The ACM Magazine for
-Students , vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available:
+Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available:
 http://dl.acm.org/citation.cfm?id=2460444
 [Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [Online].
  Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/
@@ -410,7 +418,8 @@ arnumber=6795963
 Available: http://prize.hutter1.net/
 [HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning
 for image recognition,” arXiv preprint arXiv:1512.03385 , 2015.
-[Online]. Available: http://arxiv.org/abs/1512.03385[Joh15a] D. Johnson, “Biaxial recurrent neural network for music
+[Online]. Available: http://arxiv.org/abs/1512.03385
+[Joh15a] D. Johnson, “Biaxial recurrent neural network for music
 composition,” GitHub, Aug. 2015. [Online]. Available: https:
 //github.com/hexahedria/biaxial-rnn-music-composition
 [Joh15b] ——, “Composing music with recurrent neural
@@ -428,7 +437,7 @@ http://karpathy.github.io/2015/05/21/rnn-effectiveness/
 and A. Wu, “An efficient k-means clustering algorithm: analysis
 and implementation,” Pattern Analysis and Machine Intelligence,
 IEEE Transactions on , vol. 24, no. 7, pp. 881–892, Jul 2002.
-[Mit97] T. M. Mitchell, Machine learning , ser. McGraw Hill series in
+[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in
 computer science. McGraw-Hill, 1997.
 [MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going
 deeper into neural networks,” googleresearch.blogspot.co.uk,
@@ -448,7 +457,7 @@ com/r/deepdream/
 Hj5lGFzlubU
 [SPB+14] Y . Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand,
 “Style transfer for headshot portraits,” ACM Transactions on
-Graphics (TOG) , vol. 33, no. 4, p. 148, 2014. [Online]. Available:
+Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available:
 http://dl.acm.org/citation.cfm?id=2601137
 [TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in
 Proceedings of VSMM , 2005, pp. 739–748.
@@ -468,7 +477,7 @@ Available: https://github.com/MattVitelli/GRUV
 [Wei76] J. Weizenbaum, Computer Power and Human Reason: From
 Judgement to Calculation . W.H.Freeman & Co Ltd, 1976.
 [ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional
- networks,” in Computer Vision–ECCV 2014 . Springer,
+ networks,” in Computer Vision–ECCV 2014. Springer,
 2014, pp. 818–833.
 
 APPENDIX A
@@ -516,15 +525,15 @@ was starting to signing a major tripad of aid exile.]]
 C. Linux Code, 1
 /*
 
-*Increment the size file of the new incorrect UI_FILTER group information
-*of the size generatively.
+* Increment the size file of the new incorrect UI_FILTER group information
+* of the size generatively.
 */
 static int indicate_policy(void)
 {
 int error;
 if (fd == MARN_EPT) {
 /*
-*The kernel blank will coeld it to userspace.
+* The kernel blank will coeld it to userspace.
 */
 if (ss->segment < mem_total)
 unblock_graph_and_set_blocked();
@@ -537,7 +546,7 @@ selector = seg / 16;
 setup_works = true;
 for (i = 0; i < blocks; i++) {
 seq = buf[i++];
-bpf = bd->bd.next + i *search;
+bpf = bd->bd.next + i * search;
 if (fd) {
 current = blocked;
 }
@@ -549,21 +558,21 @@ return segtable;
 }
 D. Linux Code, 2
 /*
-*Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved.
+* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
-*under the terms of the GNU General Public License version 2 as published by
-*the Free Software Foundation.
+* under the terms of the GNU General Public License version 2 as published by
+* the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
-*but WITHOUT ANY WARRANTY; without even the implied warranty of
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *
-*GNU General Public License for more details.
+* GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
-*Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 #include <linux/kexec.h>
 #include <linux/errno.h>
diff --git a/read/results/pypdf/1602.06541.txt b/read/results/pypdf/1602.06541.txt
index c5c1984..46adcb7 100644
--- a/read/results/pypdf/1602.06541.txt
+++ b/read/results/pypdf/1602.06541.txt
@@ -2,7 +2,7 @@
 A Survey of Semantic Segmentation
 Martin Thoma
 info@martin-thoma.de
-Abstract —This survey gives an overview over different
+Abstract—This survey gives an overview over different
 techniques used for pixel-level semantic segmentation.
 Metrics and datasets for the evaluation of segmentation
  algorithms and traditional approaches for segmentation
@@ -18,10 +18,10 @@ Semantic segmentation is the task of clustering
 parts of images together which belong to the same
 object class. This type of algorithm has several usecases
  such as detecting road signs [ MBLAGJ+07],
-detecting tumors [ MBVLG02 ], detecting medical instruments
- in operations [ WAH97 ], colon crypts segmentation
- [ CRSS14 ], land use and land cover classification
- [ HDT02 ]. In contrast, non-semantic segmentation
+detecting tumors [ MBVLG02], detecting medical instruments
+ in operations [W AH97], colon crypts segmentation
+ [ CRSS14], land use and land cover classification
+ [HDT02]. In contrast, non-semantic segmentation
 only clusters pixels together based on general characteristics
  of single objects. Hence the task of non-semantic
 segmentation is not well-defined, as many different
@@ -48,7 +48,8 @@ brief, non-exhaustive summary of recently published
 semantic segmentation algorithms which are based on
 neural networks in Section VI. Finally, Section VII
 informs the reader about typical problematic cases for
-segmentation algorithms.II. T AXONOMY OF SEGMENTATION ALGORITHMS
+segmentation algorithms.
+II. T AXONOMY OF SEGMENTATION ALGORITHMS
 The computer vision community has published a
 wide range of segmentation algorithms so far. Those
 algorithms can be grouped by the kind of data they
@@ -68,16 +69,16 @@ such, the classes on which the algorithm is trained is a
 central design decision.
 Most algorithms work with a fixed set of classes;
 some even only work on binary classes like foreground
- vs background [RM07 ], [CS10 ] or street vs
+ vs background [RM07], [ CS10] or street vs
 no street [BKTT15].
 However, there are also unsupervised segmentation
 algorithms which do not distinguish classes at all (see
 Section V-B) as well as segmentation algorithms which
 are able to recognize when they don’t know a class.
-For example, in [ GRC+08] avoid class was added
+For example, in [ GRC+08] a void class was added
 for classes which were not in the training set. Such
 a void class was also used in the MSRCv2 dataset
-(see Section III-B 2) to make it possible to make more
+(see Section III-B2) to make it possible to make more
 coarse segmentations and thus having to spend less
 time annotating the image.
 B. Class affiliation of pixels
@@ -90,31 +91,32 @@ we simultaneously two labels to the coordinates of the
 glass: Glass and table. Although there is much more
 work being done on single class affiliation segmentation
  algorithms, there is a publication about multiple
-class affiliation segmentation [ LRAL08 ]. Similarly,
+class affiliation segmentation [ LRAL08]. Similarly,
 recent publications in pixel-level object segmentation
-used layered models [YHRF12].arXiv:1602.06541v2  [cs.CV]  11 May 2016
+used layered models [YHRF12].
+arXiv:1602.06541v2  [cs.CV]  11 May 2016
 
 C. Input Data
 The available data which can be used for the
 inference of a segmentation varies by application.
-•Grayscale vs colored : Grayscale images are
+• Grayscale vs colored : Grayscale images are
 commonly used in medical imaging such as
 magnetic resonance (MR) imaging or ultrasonography
  whereas colored photographs are obviously
 widespread.
-•Excluding or including depth data : RGB-D,
+• Excluding or including depth data : RGB-D,
 sometimes also called range [ HJBJ+96] is available
  in robotics, autonomous cars and recently
 also in consumer electronics such as Microsoft
 Kinect [Zha12].
-•Single image vs stereo images vs cosegmentation
- : Single image segmentation is the
+• Single image vs stereo images vs cosegmentation:
+ Single image segmentation is the
 most wide-spread kind of segmentation, but using
-stereo images was already tried in [ BVZ01 ]. It can
+stereo images was already tried in [ BVZ01]. It can
 be seen as a more natural way of segmentation as
 most mammals have two eyes. It can also be seen
 as being related to having depth data.
-Co-segmentation as in [ RMBK06 ], [CXGS12 ] is
+Co-segmentation as in [ RMBK06], [CXGS12] is
 the problem of finding a consistent segmentation
 for multiple images. This problem can be seen
 in two ways: One the one hand, it can be seen
@@ -124,25 +126,25 @@ after the first can be used as an additional source
 of information to find a meaningful segmentation.
 This idea can be extended to time series such as
 videos.
-•2D vs 3D : Segmenting images is a 2D segmentation
- task where the smallest unit is called a pixel .
+• 2D vs 3D : Segmenting images is a 2D segmentation
+ task where the smallest unit is called a pixel.
 In 3D data, such as volumetric X-ray CT images
-as they were used in [ HHR01 ], the smallest unit
+as they were used in [ HHR01], the smallest unit
 is called a voxel.
 D. Operation state
 The operation state of the classifying machine can
-either be active as in [ SUM+11], [SSA12 ] where robots
-can move objects to find a segmentation or passive ,
+either be active as in [SUM+11], [SSA12] where robots
+can move objects to find a segmentation or passive,
 where the received image cannot be influenced. Among
 the passive algorithms, some segment in a completely
 automatic fashion, others work in an interactive mode.
 One example would be a system where the user clicks
 on the background or marks a coarse segmentation and
-the algorithm finds a fine-grained segmentation. [ BJ00 ],
-[RKB04 ], [PS07 ] describe systems which work in an
+the algorithm finds a fine-grained segmentation. [ BJ00],
+[RKB04], [PS07] describe systems which work in an
 interactive mode.
 (a) Example Scene
- (b)Visualization of a found segmentation
+ (b) Visualization of a found segmentation
 
 Figure 1: An example of a scene and a possible visualization
  of a found segmentation.
@@ -166,29 +168,31 @@ However, this can only support the explanation of
 particular problems or showcase special situation. For
 meaningful information about the overall accuracy, there
 are a couple of metrics how accuracy can be defined.
-For this section, let k∈Nbe the number of classes,
-nij∈N0withi,j∈1,...,k be the number of pixels
-which belong to class iand were labeled as class j.
-(nij)is called a confusion matrix . Letti=∑k
-j=1nij
+For this section, let k∈N be the number of classes,
+nij ∈N0 with i,j ∈1,...,k be the number of pixels
+which belong to class i and were labeled as class j.
+(nij) is called a confusion matrix. Let ti = ∑k
+j=1 nij
 be the total number of pixels of class i.
 One way to compare segmentation algorithms is by
 the pixel-wise accuracy of the predicted segmentation
-as done in many publications [ SWRC06 ], [CP08 ],
-[LSD14 ]. This is also called per-pixel rate and defined
- as∑k
-i=1nii∑k
-i=1ti. Taking the pixel-wise classification
+as done in many publications [ SWRC06], [ CP08],
+[LSD14]. This is also called per-pixel rate and defined
+ as
+∑k
+i=1 nii∑k
+i=1 ti
+. Taking the pixel-wise classification
 accuracy has two major drawbacks:
-P1Tasks like segmenting images for autonomous cars
+P1 Tasks like segmenting images for autonomous cars
 have large regions which have one class. This
 makes achieving classification accuracies of more
-than30%with a priori knowledge only possible.
+than 30 % with a priori knowledge only possible.
 For example, a system might learn that a certain
 position of the image is most of the time “sky”
 while another position is most of the time “road”.
 
-P2The manually labeled images could have a more
+P2 The manually labeled images could have a more
 coarse labeling. For example, a human classifier
 could have labeled a region as “car” and the
 algorithm could have split that region into the
@@ -196,50 +200,56 @@ general “car” and the more specific “wheel of a
 car”
 Three accuracy metrics which do not suffer from
 problem P1 are used in [LSD14]:
-•mean accuracy :1
-k·∑k
-i=1nii
-ti∈[0,1]
-•mean intersection over union :
+• mean accuracy: 1
+k ·∑k
+i=1
+nii
+ti
+∈[0,1]
+• mean intersection over union :
 1
-k·∑k
-i=1nii
+k ·∑k
+i=1
+nii
 ti−nii+∑k
-j=1nji∈[0,1]
-•frequency weighted intersection over union :
+j=1 nji
+∈[0,1]
+• frequency weighted intersection over union :
 (∑k
-i=1ti)−1∑k
-i=1ti·nii
+i=1 ti)
+−1 ∑k
+i=1 ti · nii
 ti−nii+∑k
-j=1nji∈[0,1]
+j=1 nji
+∈[0,1]
 Another problem might be pixels which cannot be
 assigned to one of the known classes. For this reason,
-[SWRC06 ] makes use of a void class. This class gets
+[SWRC06] makes use of a void class. This class gets
 completely ignored for all quality measures. Hence the
 total number of pixels is assumed to be width·height−
 number of void pixels.
 One way to deal with problem P1 and problem P2
-is giving the confusion matrix as done in [ SWRC06 ].
+is giving the confusion matrix as done in [ SWRC06].
 However, this approach is not feasible if many classes
 are given.
-TheF-measure is useful for binary classification
+The F-measure is useful for binary classification
  task such as the KITTI road segmentation
-benchmark [ FKG13 ] or crypt segmentation as done
-by [CRSS14 ]. It is calculated as “the harmonic mean
+benchmark [ FKG13] or crypt segmentation as done
+by [CRSS14]. It is calculated as “the harmonic mean
 of the precision and recall” [PH05]:
-Fβ= (1 +β)2 tp
-(1 +β2)·tp+β2·fn+fp
-whereβ= 1 is chosen in most cases and tpmeans
-true positive ,fnmeans false negative andfpmeans
-false positive .
+Fβ = (1 + β)2 tp
+(1 + β2) ·tp + β2 ·fn + fp
+where β = 1 is chosen in most cases and tp means
+true positive, fn means false negative and fp means
+false positive.
 Finally, it should be noted that a lot of other measures
 for the accuracy of segmentations were proposed for
 non-semantic segmentation. One of those accuracy
 measures is Normalized Probabilistic Rand (NPR)
-index which was introduced in [ UPH05 ] and evaluated
+index which was introduced in [ UPH05] and evaluated
  in [ CSI+09] on dermoscopy images. Other
 non-semantic segmentation measures were introduced
-in [MFTM01 ], but the reason for creating them seems to
+in [MFTM01], but the reason for creating them seems to
 be to deal with the under-defined task description of nonsemantic
  segmentation. These accuracy measures try to
 deal with different levels of coarsity of the segmentation.
@@ -249,16 +259,17 @@ and thus those measures are not explained here.
 time for the inference on a single image is a hard
 requirement for some applications. For example, in the
 case of autonomous cars an algorithm which classifies
-pixel as street or no-street and thus makes a semanticsegmentation, every image needs to be processed within
-20ms [BKTT15]. This time is called latency .
+pixel as street or no-street and thus makes a semantic
+segmentation, every image needs to be processed within
+20 ms [BKTT15]. This time is called latency.
 Most papers do not give exact values for the time
 their application needs. One reason might be that this is
 very hardware, implementation and in some cases even
 data specific. For example, [ HJBJ+96] notes that their
-algorithm needs 10son a Sun SparcStation 20. The
-fastest CPU ever produced for this system had 200MHz .
+algorithm needs 10 s on a Sun SparcStation 20. The
+fastest CPU ever produced for this system had 200 MHz.
 Comparing this directly with results which were obtained
- using an Intel i7-4820K with 3.9GHz would not
+ using an Intel i7-4820K with 3.9 GHz would not
 be meaningful.
 However, it does still make sense to mention the
 execution time as well as the hardware in individual
@@ -294,10 +305,10 @@ the following, only the most widely used ones as well
 as three medical databases are described. An overview
 over the quantity and the kind of data is given by
 Table I.
-1) PASCAL VOC: The PASCAL1VOC2challenge
+1) PASCAL VOC: The PASCAL1 VOC2 challenge
 was organized eight times with different datasets:
 Once every year from 2005 to 2012 [ EVGW+b].
-1pattern analysis, statistical modelling and comput ational learning,
+1pattern analysis, statistical modelling and computational learning,
 an EU network of excellence
 2Visual Object Classes
 
@@ -322,7 +333,7 @@ database of 591 photographs with pixel-level annotation
 of 21 classes: aeroplane, bike, bird, boat, body, book,
 building, car, cat, chair, cow, dog, face, flower, grass,
 road, sheep, sign, sky, tree, water. Additionally, there
-is avoid label for pixels which do not belong to
+is a void label for pixels which do not belong to
 any of the 21 classes or which are close to the
 segmentation boundary. This allows a “rough and quick
 hand-segmentation which does not align exactly with
@@ -331,7 +342,7 @@ the object boundaries” [SWRC06].
 consists of 165 images with pixel-level annotation of
 5 classes: “healthy, adenomatous, moderately differentiated,
  moderately-to-poorly differentiated, and poorly
-differentiated” [ CSM09 ]. This dataset is part of the
+differentiated” [CSM09]. This dataset is part of the
 Gland Segmentation (GlaS) challenge.
 The DIARETDB1 [ KKV+14] is a dataset of 89 images
  fundus images. Those images show the interior
@@ -356,11 +367,16 @@ a majority vote on a pixel basis of 10 segmentations
 given by 10 different KWs.
 Training
 Prediction
-PostprocessingWindow-wise
+Postprocessing
 
-ClassificationWindow
-extractionData
-augmentationFeature extractionPreprocessingFigure 2: A typical segmentation pipeline gets raw
+Window-wise
+Classification
+Window
+extraction
+Data
+augmentationFeature extraction
+Preprocessing
+Figure 2: A typical segmentation pipeline gets raw
 pixel data, applies preprocessing techniques
 like scaling and feature extraction like HOG
 features. For training, data augmentation
@@ -375,18 +391,18 @@ Fields (MRFs).
 IV. S EGMENTATION PIPELINE
 Typically, semantic segmentation is done with a
 classifier which operates on fixed-size feature inputs
-and a sliding-window approach [ DT05 ], [YBCK10 ],
+and a sliding-window approach [ DT05], [ YBCK10],
 [SCZ08]. This means a classifier is trained on images
 of a fixed size. The trained classifier is then fed with
-rectangular regions of the image which are called windows
- . Although the classifier gets an image patch of e.g.
-51px×51pxof the environment, it might only classify
+rectangular regions of the image which are called windows.
+ Although the classifier gets an image patch of e.g.
+51 px ×51 px of the environment, it might only classify
 the center pixel or a subset of the complete window.
 This segmentation pipeline is visualized in Figure 2.
-This approach was taken by [ BKTT15 ] and a majority
+This approach was taken by [ BKTT15] and a majority
  of the VOC2007 participants [ EVGW+a]. As this
-approach has to apply the patch classifier 512·512 =
-262 144 times for images of size 512px×512px, there
+approach has to apply the patch classifier 512 ·512 =
+262 144 times for images of size 512 px×512 px, there
 are techniques for speeding it up such as applying a
 stride and interpolating the results.
 Neural networks are able to apply the sliding window
@@ -411,7 +427,7 @@ Fields in Section V-E and Support Vector Machines
 (SVMs) in Section V-D. Postprocessing is covered in
 Section V-G.
 It should be noted that algorithms can use combination
- of methods. For example, [ TNL14 ] makes use of a
+ of methods. For example, [ TNL14] makes use of a
 combination of a SVM and a MRF. Also, auto-encoders
 can be used to learn features which in turn can be used
 by any classifier.
@@ -426,49 +442,50 @@ for the gray-value) are the most widely used features. A
 typical image is in the RGB color space, but depending
 on the classifier and the problem another color space
 might result in better segmentations. RGB, YcBcr, HSL,
-Lab and YIQ are some examples used by [ CRSS14 ].
+Lab and YIQ are some examples used by [ CRSS14].
 No single color space has been proven to be superior
-to all others in all contexts [ CJSW01 ]. However, the
+to all others in all contexts [ CJSW01]. However, the
 most common choices seem to be RGB and HSI.
 Reasons for choosing RGB is simplicity and the support
 by programming languages, whereas the choice of
 the HSI color space might make it simpler for the
 classifier to become invariant to illumination. One
 reason for choosing CIE-L*a*b* color space is that it
-approximates human perception of brightness [ KP92 ].
+approximates human perception of brightness [ KP92].
 It follows that choosing the L*a*b color space helps
 algorithms to detect structures which are seen by
 humans. Another way of improving the structure within
 an image is histogram equalization, which can be
-applied to improve contrast [PAA+87], [RM07].
+applied to improve contrast [PAA +87], [RM07].
 2) Histogram of oriented Gradients: Histogram of
 oriented gradients (HOG) features interpret the image
-as a discrete function I:N2→{0,..., 255}which
-maps the position (x,y)to a color. For each pixel, there
-are two gradients: The partial derivative of xandy.
+as a discrete function I : N2 →{ 0,..., 255 }which
+maps the position (x,y) to a color. For each pixel, there
+are two gradients: The partial derivative of x and y.
 Now the original image is transformed to two feature
 maps of equal size which represents the gradient. These
-feature maps are splitted into patches and a histogram ofthe directions is calculated for each patch. HOG features
-were proposed in [ DT05 ] and are used in [ BMBM10 ],
+feature maps are splitted into patches and a histogram of
+the directions is calculated for each patch. HOG features
+were proposed in [ DT05] and are used in [ BMBM10],
 [FGMR10] for segmentation tasks.
 3) SIFT: Scale-invariant feature transform (SIFT)
 feature descriptors describe keypoints in an image. The
-image patch of the size 16×16around the keypoint
-is taken. This patch is divided in 16distinct parts of
-the size 4×4. For each of those parts a histogram of
+image patch of the size 16 ×16 around the keypoint
+is taken. This patch is divided in 16 distinct parts of
+the size 4 ×4. For each of those parts a histogram of
 8 orientations is calculated similar as for HOG features.
 This results in a 128-dimensional feature vector for
 each keypoint.
 It should be emphasized that SIFT is a global feature
 for a complete image.
-SIFT is described in detail in [ Low04 ] and are used
+SIFT is described in detail in [ Low04] and are used
 in [PTN09].
 4) BOV: Bag-of-visual-words (BOV), also called
 bag of keypoints , is based on vector quantization.
 Similar to HOG features, BOV features are histograms
 which count the number of occurrences of certain
 patterns within a patch of the image. BOV are described
-in [CDF+04] and used in combination with SIFT
+in [ CDF+04] and used in combination with SIFT
 feature descriptors in [CP08].
 5) Poselets: Poselets rely on manually added extra
 keypoints such as “right shoulder”, “left shoulder”,
@@ -480,10 +497,10 @@ like airplanes, ships, organs or cells where the human
 annotators do not know the keypoints. Additionally, the
 keypoints have to be chosen for every single class. There
 are strategies to deal with those problems like viewpointdependent
- keypoints. Poselets were used in [ BMBM10 ]
-to detect people and in [ BBMM11 ] for general object
+ keypoints. Poselets were used in [ BMBM10]
+to detect people and in [ BBMM11] for general object
 detection of the PASCAL VOC dataset.
-6) Textons: Atexton is the minimal building block
+6) Textons: A texton is the minimal building block
 of vision. The computer vision literature does not give a
 strict definition for textons, but edge detectors could be
 one example. One might argue that deep learning techniques
@@ -499,7 +516,7 @@ contain much more information. A simple approach
 to deal with this is downsampling the high-resolution
 image to a low-resolution variant. Another way of
 doing dimensionality reduction is principal component
-analysis (PCA), which is applied by [ COWR11 ]. The
+analysis (PCA), which is applied by [ COWR11]. The
 idea behind PCA is to find a hyperplane on which all
 
 feature vectors can be projected with a minimal loss
@@ -526,18 +543,18 @@ consistent regions or region boundaries.
 directly be applied on the pixels, when one gives a
 feature vector per pixel. Two clustering algorithms are
 k-means and the mean-shift algorithm.
-Thek-means algorithm is a general-purpose clustering
+The k-means algorithm is a general-purpose clustering
  algorithm which requires the number of clusters to
-be given beforehand. Initially, it places the kcentroids
+be given beforehand. Initially, it places the k centroids
 randomly in the feature space. Then it assigns each
 data point to the nearest centroid, moves the centroid
 to the center of the cluster and continues the process
 until a stopping criterion is reached. A faster variant is
 described in [Har75].
-k-means was applied by [ CLP98 ] for medical image
+k-means was applied by [ CLP98] for medical image
 segmentation.
 Another clustering algorithm is the mean-shift algorithm
- which was introduced by [ CM02 ] for segmentation
+ which was introduced by [ CM02] for segmentation
  tasks. The algorithm finds the cluster centers
 by initializing centroids at random seed points and
 iteratively shifting them to the mean coordinate within
@@ -550,8 +567,9 @@ points.
 2) Graph Based Image Segmentation: Graph-based
 image segmentation algorithms typically interpret pixels
 as vertices and an edge weight is a measure of
-dissimilarity such as the difference in color [ FH04 ],
-[Fel]. There are several different candidates for edges.The 4-neighborhood (north, east, south west) or an 8neighborhood
+dissimilarity such as the difference in color [ FH04],
+[Fel]. There are several different candidates for edges.
+The 4-neighborhood (north, east, south west) or an 8neighborhood
  (north, north-east, east, south-east, south,
 south-west, west, north-west) are plausible choices.
 One way to cut the edges is by building a minimum
@@ -559,11 +577,11 @@ spanning tree and removing edges above a threshold.
 This threshold can either be constant, adapted to the
 graph or adjusted by the user. After the edge-cutting
 step, the connected components are the segments.
-A graph-based method which ranked 2ndin the
+A graph-based method which ranked 2 nd in the
 Pascal VOC 2010 challenge [ EVGW+10] is described
-in [CS10 ]. The system makes heavy use of the multicue
- contour detector globalPb [ MAFM08 ] and needs
-about 10GB of main memory [CS11].
+in [CS10]. The system makes heavy use of the multicue
+ contour detector globalPb [ MAFM08] and needs
+about 10 GB of main memory [CS11].
 3) Random Walks: Random walks belong to the
 graph-based image segmentation algorithms. Random
 walk image segmentation usually works as follows:
@@ -582,7 +600,7 @@ segmentation methods output as seed points.
 along edges, but also try to find a border which is
 smooth. This is done by defining a so called energy
 function which will be minimized. They were initially
-described in [ KWT88 ]. ACMs can be used to segment
+described in [ KWT88]. ACMs can be used to segment
 an image or to refine segmentation as it was done
 in [AM98] for brain MR images.
 5) Watershed Segmentation: The watershed algorithm
@@ -598,7 +616,7 @@ watershed is found. The algorithm stops when the
 highest point is reached.
 A detailed description of the watershed segmentation
 algorithm is given in [RM00].
-The watershed segmentation was used in [ JLD03 ] to
+The watershed segmentation was used in [ JLD03] to
 segment white blood cells. As the authors describe,
 the segmentation by watershed transform has two
 flaws: Over-segmentation due to local minima and thick
@@ -606,13 +624,13 @@ watersheds due to plateaus.
 
 C. Random Decision Forests
 Random Decision Forests were first proposed
-in [Ho95 ]. This type of classifier applies techniques
+in [ Ho95]. This type of classifier applies techniques
 called ensemble learning , where multiple classifiers
 are trained and a combination of their hypotheses is
 used. One ensemble learning technique is the random
 subspaces method where each classifier is trained
 on a random subspace of the feature space. Another
-ensemble learning technique is bagging , which is
+ensemble learning technique is bagging, which is
 training the trees on random subsets of the training set.
 In the case of Random Decision Forests, the classifiers
 are decision trees. A decision tree is a tree where each
@@ -626,233 +644,268 @@ ordinal, interval, ratio) can be arbitrary. Another advantage
 for example, is the speed of training and classification.
 Decision trees were extensively studied in the past
 20 years and a multitude of training algorithms have
-been proposed (e.g. ID3 in [ Qui86 ], C4.5 in [ Qui93 ]).
+been proposed (e.g. ID3 in [ Qui86], C4.5 in [ Qui93]).
 Possible training hyperparameters are the measure to
-evaluate the “goodness of split” [ Min89 ], the number of
+evaluate the “goodness of split” [Min89], the number of
 decision trees being used, and if the depth of the trees
 is restricted. Typically in the context of classification,
 decision trees are trained by adding new nodes until
 each leaf contains only nodes of a single class or until it
 is not possible to split further. This is called a stopping
-criterion .
+criterion.
 There are two typical training modes: Central axis
 projection and perceptron training . In training, for
 each node a hyperplane is searched which is optimal
 according to an error function.
 Random Decision Forests with texton features (see
-Section V-A6) are applied in [ SJC08 ] for segmentation.
-In the [ MSC ] dataset, they report a per-pixel accuracy
-rate of 66.9%for their best system. This system
-requires 415msfor the segmentation of 320px×213px
-images on a single 2.7GHz core. On the Pascal
+Section V-A6) are applied in [ SJC08] for segmentation.
+In the [ MSC] dataset, they report a per-pixel accuracy
+rate of 66.9 % for their best system. This system
+requires 415 ms for the segmentation of 320 px×213 px
+images on a single 2.7 GHz core. On the Pascal
 VOC 2007 dataset, they report an average per-pixel
-accuracy for their best segmentation system of 42%.
+accuracy for their best segmentation system of 42 %.
 An excellent introduction to Random Decision
-Forests for semantic segmentation is given by [ SCZ08 ].
+Forests for semantic segmentation is given by [SCZ08].
 D. SVMs
 SVMs are well-studied binary classifiers which can
 be described by five central ideas. For those ideas, the
-training data is represented as (xi,yi)where xiis the
-feature vector and yi∈{− 1,1}the binary label for
-training example i∈{1,...,m}.1)If data is linearly separable, it can be separated
+training data is represented as (xi,yi) where xi is the
+feature vector and yi ∈{− 1,1 }the binary label for
+training example i∈{1,...,m }.
+1) If data is linearly separable, it can be separated
 by a hyperplane. There is one hyperplane which
 maximizes the distance to the next datapoints
-(support vectors ). This hyperplane should be taken:
+(support vectors). This hyperplane should be taken:
 minimize
-w,b1
+w,b
+1
 2∥w∥2
-s.t.∀m
-i=1yi·(⟨w,xi⟩+b)
-sgn applied to this gives the classification≥1
-2)Even if the underlying process which generates the
+s.t. ∀m
+i=1yi ·(⟨w,xi⟩+ b)  
+sgn applied to this gives the classification
+≥1
+2) Even if the underlying process which generates the
 features for the two classes is linearly separable,
 noise can make the data not separable. The introduction
  of slack variables to relax the requirement
 of linear separability solves this problem. The
 trade-off between accepting some errors and a
 more complex model is weighted by a parameter
-C∈R+
-0. The bigger C, the more errors are
+C ∈ R+
+0 . The bigger C, the more errors are
 accepted. The new optimization problem is:
 minimize
-w1
-2∥w∥2+C·m∑
-i=1ξi
-s.t.∀m
-i=1yi·(⟨w,xi⟩+b)≥1−ξi
-Note that 0≤ξi≤1means that the data point
-is within the margin, whereas ξi≥1means it is
-misclassified. An SVM with C > 0is also called
-asoft-margin SVM .
-3)The primal problem is to find the normal vector
-wand the bias b. The dual problem is to express
-was a linear combination of the training data xi:
-w=m∑
-i=1αiyixi
-whereyi∈{− 1,1}represents the class of the
-training example and αiare Lagrange multipliers.
+w
+1
+2∥w∥2 + C·
+m∑
+i=1
+ξi
+s.t. ∀m
+i=1yi ·(⟨w,xi⟩+ b) ≥1 −ξi
+Note that 0 ≤ξi ≤1 means that the data point
+is within the margin, whereas ξi ≥1 means it is
+misclassified. An SVM with C >0 is also called
+a soft-margin SVM.
+3) The primal problem is to find the normal vector
+w and the bias b. The dual problem is to express
+w as a linear combination of the training data xi:
+w =
+m∑
+i=1
+αiyixi
+where yi ∈{− 1,1 }represents the class of the
+training example and αi are Lagrange multipliers.
 The usage of Lagrange multipliers is explained
-with some examples in [ Smi04 ]. The usage of the
-Lagrange multipliers αichanges the optimization
-problem depend on the αiwhich are weights for
-the feature vectors. It turns out that most αiwill
+with some examples in [ Smi04]. The usage of the
+Lagrange multipliers αi changes the optimization
+problem depend on the αi which are weights for
+the feature vectors. It turns out that most αi will
 be zero. The non-zero weighted vectors are called
-support vectors .
+support vectors.
 The optimization problem is now, according
 to [Bur98]:
 maximize
-αim∑
-i=1αi−1
-2m∑
-i=1m∑
-j=1αiαjyiyj⟨xi,xj⟩
-s.t.∀m
-i=10≤αi≤C
-s.t.m∑
-i=1αiyi= 0
+αi
+m∑
+i=1
+αi −1
+2
+m∑
+i=1
+m∑
+j=1
+αiαjyiyj⟨xi,xj⟩
+s.t. ∀m
+i=10 ≤αi ≤C
+s.t.
+m∑
+i=1
+αiyi = 0
 
-4)Not every dataset is linearly separable. This problem
+4) Not every dataset is linearly separable. This problem
  is approached by transforming the feature
-vectors xwith a non-linear mapping Φinto
+vectors x with a non-linear mapping Φ into
 a higher dimensional (probably ∞-dimensional)
-space. As the feature vectors xare only used
+space. As the feature vectors x are only used
 within scalar product ⟨xi,xj⟩, it is not necessary
 to do the transformation. It is enough to do the
 calculation
-K(xi,xj) =⟨xi,xj⟩
-This function Kis called a kernel . The idea of
-never explicitly transforming the vectors xito the
-higher dimensional space is called the kernel trick .
+K(xi,xj) = ⟨xi,xj⟩
+This function K is called a kernel. The idea of
+never explicitly transforming the vectors xi to the
+higher dimensional space is called the kernel trick.
 Common kernels include the polynomial kernel
-KP(xi,xj) = (⟨xi,xj⟩+r)p
-of degreepand coefficient r, the Gaussian radial
+KP(xi,xj) = (⟨xi,xj⟩+ r)p
+of degree p and coefficient r, the Gaussian radial
 basis function (RBF) kernel
-KGauss(xi,xj) =e−γ∥xi−xj∥2
+KGauss(xi,xj) = e
+−γ∥xi−xj∥2
 2σ2
 and the sigmoid kernel
 Ktanh(xi,xj) = tanh(γ⟨xi,xj⟩−r)
-where the parameter γdetermines how much
+where the parameter γ determines how much
 influence single training examples have.
-5)The described SVMs can only distinguish between
+5) The described SVMs can only distinguish between
 two classes. Common strategies to expand those
 binary classifiers to multi-class classification is
-theone-vs-all and the one-vs-one strategy. In the
-one-vs-all strategy nclassifiers have to be trained
-which can distinguish one of the nclasses against
-all other classes. In the one-vs-one strategyn2−n
+the one-vs-all and the one-vs-one strategy. In the
+one-vs-all strategy n classifiers have to be trained
+which can distinguish one of the n classes against
+all other classes. In the one-vs-one strategy n2−n
 2
 classifiers are trained; one classifier for each pair
 of classes.
 A detailed description of SVMs can be found
 in [Bur98].
-SVMs are used by [ YHRF12 ] on the 2009 and 2010
+SVMs are used by [ YHRF12] on the 2009 and 2010
 PASCAL segmentation challenge [ EVGW+10]. They
 did not hand their classifier in to the challenge itself,
 but calculated an average rank of 7 among the different
 categories.
-[FGMR10 ] also used an SVM based method with
-HOG features and achieved the 7thrank in the 2010
+[FGMR10] also used an SVM based method with
+HOG features and achieved the 7 th rank in the 2010
 PASCAL segmentation challenge by mean accuracy. It
-needs about 2s on a 2.8GHz 8-core Intel processor.
+needs about 2 s on a 2.8 GHz 8-core Intel processor.
 E. Markov Random Fields
 MRFs are undirected probabilistic graphical models
 which are wide-spread model in computer vision. The
 overall idea of MRFs is to assign a random variable for
-each feature and a random variable for each pixel whichx1x2x3x4x5x6x7x8x9
-y1y2y3y4y5y6y7y8y9
-x1x2x3x4x5x6x7x8x9
-y1y2y3y4y5y6y7y8y9
+each feature and a random variable for each pixel which
+x1 x2 x3
+x4 x5 x6
+x7 x8 x9
+y1 y2 y3
+y4 y5 y6
+y7 y8 y9
+x1 x2 x3
+x4 x5 x6
+x7 x8 x9
+y1 y2 y3
+y4 y5 y6
+y7 y8 y9
 Figure 3: CRF with 4-neighborhood. Each node xi
-represents a pixel and each node yirepresents
+represents a pixel and each node yi represents
 a label.
 gets labeled as shown in Figure 3. For example, a MRF
-which is trained on images of the size 224px×224pixel
+which is trained on images of the size 224 px×224 pixel
 and gets the raw RGB values as features has
-224·224·3
-input+ 224·224
-output= 200 704
+224 ·224 ·3  
+input
++ 224·224  
+output
+= 200 704
 random variables. Those random variables are conditionally
  independent, given their local neighborhood.
 These (in)dependencies can be expressed with a graph.
-LetG= (V,E)be the associated undirected graph
-of an MRF andCbe the set of all maximal cliques in
-that graph. Nodes represent random variables x,yand
+Let G= (V,E) be the associated undirected graph
+of an MRF and Cbe the set of all maximal cliques in
+that graph. Nodes represent random variables x,y and
 edges represent conditional dependencies. Just like in
-he 4-neighborhood [ SWRC06 ] and the 8-neighborhood
+he 4-neighborhood [ SWRC06] and the 8-neighborhood
 are reasonable choices for constructing the graph.
-Typically, random variables yrepresent the class of a
-single pixel, random variables xrepresent a pixel values
+Typically, random variables y represent the class of a
+single pixel, random variables x represent a pixel values
 and edges represent pixel neighborhood in computer
 vision problems segmentation problems where MRFs
-are used. Accordingly, the random variables ylive
-on1,..., nr of classes and the random variables x
-typically live on 0,..., 255or[0,1].
-The probability of x,ycan be expressed as
-P(x,y) =1
+are used. Accordingly, the random variables y live
+on 1,..., nr of classes and the random variables x
+typically live on 0,..., 255 or [0,1].
+The probability of x,y can be expressed as
+P(x,y) = 1
 Ze−E(x,y)
-whereZ=∑
-x,ye−E(x,y)is a normalization term
-called the partition function andEis called the energy
-function . A common choice for the energy function is
-E(x,y) =∑
-c∈Cψc(x,y)
-whereψis called a clique potential . One choice for
-cliques of size two x,y= (x1,x2)is [KP06]
-ψc(x1,x2) =wδ(x1,x2) ={
-+wifx1̸=x2
-−wifx1=x2
-According to [ Mur12 ], the most common way of
+where Z = ∑
+x,y e−E(x,y) is a normalization term
+called the partition function and E is called the energy
+function. A common choice for the energy function is
+E(x,y) =
+∑
+c∈C
+ψc(x,y)
+where ψ is called a clique potential . One choice for
+cliques of size two x,y = (x1,x2) is [KP06]
+ψc(x1,x2) = wδ(x1,x2) =
+{
++w if x1 ̸= x2
+−w if x1 = x2
+According to [ Mur12], the most common way of
 inference over the posterior MRF in computer vision
 problems is Maximum A Posteriori (MAP) estimation.
 
 Detailed introductions to MRFs are given by
-[BKR11 ], [Mur12 ]. MRFs are used by [ ZBS01 ] and
+[BKR11], [ Mur12]. MRFs are used by [ ZBS01] and
 [MSB12] for image segmentation.
 F . Conditional Random Fields
 CRFs are MRFs where all clique potentials are
-conditioned on input features [ Mur12 ]. This means,
+conditioned on input features [ Mur12]. This means,
 instead of learning the distribution P(y,x), the task
 is reformulated to learn the distribution P(y|x). One
 consequence of this reformulation is that CRFs need
-much less parameters as the distribution of xdoes
+much less parameters as the distribution of x does
 not have to be estimated. Another advantage of CRFs
 compared to MRFs is that no distribution assumption
-about xhas to be made.
+about x has to be made.
 A CRF has the partition function Z:
-Z(x) =∑
-yP(x,y)
+Z(x) =
+∑
+y
+P(x,y)
 and joint probability distribution
-P(y|x) =1
-Z(x)∏
-c∈Cψc(yc|x)
-The simplest way to define the clique potentials ψis
-the count of the class ycgivenxadded with a positive
+P(y|x) = 1
+Z(x)
+∏
+c∈C
+ψc(yc|x)
+The simplest way to define the clique potentials ψ is
+the count of the class yc given x added with a positive
 smoothing constant to prevent the complete term from
 getting zero.
-CRFs as described in [ LRKT09 ] have reached top
-performance in PASCAL VOC 2010 [ VOC10 ] and
-are also used in [ HZCP04 ], [SWRC06 ] for semantic
+CRFs as described in [ LRKT09] have reached top
+performance in PASCAL VOC 2010 [ VOC10] and
+are also used in [ HZCP04], [ SWRC06] for semantic
 segmentation.
 A method similar to CRFs was proposed
 in [ GBVdW+10]. The system of Gonfaus et.al.
-ranked 1stby mean accuracy in the segmentation task
-of the PASCAL VOC 2010 challenge [EVGW+10].
+ranked 1st by mean accuracy in the segmentation task
+of the PASCAL VOC 2010 challenge [EVGW +10].
 An introduction to CRFs is given by [SM11].
 G. Post-processing methods
 Post-processing refine a found segmentation and
 remove obvious errors. For example, the morphological
-operations opening andclosing can remove noise. The
+operations opening and closing can remove noise. The
 opening operation is a dilation followed by a erosion.
 This removes tiny segments. The closing operation is a
 erosion followed by a dilation. This removes tiny gaps
-in otherwise filled regions. They were used in [ CLP98 ]
+in otherwise filled regions. They were used in [ CLP98]
 for biomedical image segmentation.
 Another way of refinement of the found segmentation
 is by adjusting the segmentation to match close edges.
-This was used in [ BBMM11 ] with an ultra-metric
+This was used in [ BBMM11] with an ultra-metric
 contour map [AMFM09].
 Active contour models are another example of a
-post-processing method [KWT88].VI. N EURAL NETWORKS FOR SEMANTIC
+post-processing method [KWT88].
+VI. N EURAL NETWORKS FOR SEMANTIC
 SEGMENTATION
 Artificial neural networks are classifiers which are
 inspired by biologic neurons. Every single artificial
@@ -862,7 +915,7 @@ function to the weighted sum and gives an output. Those
 neurons can take either a feature vector as input or the
 output of other neurons. In this way, they build up
 feature hierarchies.
-The parameters they learn are the weightsw∈R.
+The parameters they learn are the weights w ∈R.
 They are learned by gradient descent. To do so, an error
 function — usually cross-entropy or mean squared error
 — is necessary. For the gradient descent algorithm, one
@@ -879,39 +932,39 @@ CNNs are neural networks which learn image filters.
 They drastically reduce the number of parameters which
 have to be learned while being still general enough for
 the problem domain of images. This was shown by Alex
-Krizhevsky et al. in [ KSH12 ]. One major idea was a
-clever regularization called dropout training , which set
+Krizhevsky et al. in [ KSH12]. One major idea was a
+clever regularization called dropout training, which set
 the output of neurons while training randomly to zero.
 Another contribution was the usage of an activation
 function called rectified linear unit :
 ϕReLU(x) = max(0,x)
 Those are much faster to train than the commonly used
 sigmoid activation functions
-ϕSigmoid (x) =1
-e−x+ 1
+ϕSigmoid(x) = 1
+e−x + 1
 Krizhevsky et al. implemented those ideas and participated
  in the ImageNet Large-Scale Visual Recognition
 Challenge (ILSVRC). The best other system, which
 used SIFT features and Fisher Vectors, had a performance
- of about 25.7%while the network by Alex
-Krizhevsky et al. got 17.0%error rate on the ILSVRC2010
+ of about 25.7 % while the network by Alex
+Krizhevsky et al. got 17.0 % error rate on the ILSVRC2010
  dataset. As a preprocessing step, they downsampled
- all images to a fixed size of 256px×256pxbefore
+ all images to a fixed size of 256 px×256 px before
 they fed the features into their network. This network
-is commonly known as AlexNet .
+is commonly known as AlexNet.
 Since AlexNet was developed, a lot of different
 neural networks have been proposed. One interesting
-example is [ PC13 ], where a recurrent CNN for semantic
+example is [PC13], where a recurrent CNN for semantic
 segmentation is presented.
 
-Another notable paper is [ LSD14 ]. The algorithm
+Another notable paper is [ LSD14]. The algorithm
 presented there makes use of a classifying network such
 as AlexNet, but applies the complete network as an
 image filter. This way, each pixel gets a probability
 distribution for each of the trained classes. By taking
 the most likely class, a semantic segmentation can be
 done with arbitrary image sizes.
-A very recent publication by Dai et al. [ DHS15 ]
+A very recent publication by Dai et al. [ DHS15]
 showed that segmentation with much deeper networks
 is possible and achieves better results.
 More detailed explanations to neural networks for
@@ -928,7 +981,7 @@ I am not aware of any systematic work which examined
 A. Lens Flare
 Lens flare is the effect of light getting scattered in
 the lens system of the camera. The testing data set of
-the KITTI road evaluation benchmark [ FKG13 ] has a
+the KITTI road evaluation benchmark [ FKG13] has a
 couple of photos with this problem. Figure 4(a) shows
 an extreme example of lens flare.
 B. Vignetting
@@ -984,7 +1037,7 @@ not have photos from the point of view of a child. This
 is visualized in Figure 4(f).
 
 VIII. D ISCUSSION
-Ohta et al. wrote [ OKS78 ] 38 years ago. It is one
+Ohta et al. wrote [ OKS78] 38 years ago. It is one
 of the first papers mentioning semantic segmentation.
 In this time, a lot of work was done and many
 different directions have been explored. Different kinds
@@ -1005,61 +1058,62 @@ A combination of different classifiers in an ensemble
 would be an interesting option to explore in order to
 improve accuracy. Another direction which is currently
 studied is combining classifiers such as neural networks
-with CRFs [ZJRP+15].REFERENCES
+with CRFs [ZJRP +15].
+REFERENCES
 [AM98] M. S. Atkins and B. T. Mackiewich, “Fully
 automatic segmentation of the brain in
 mri,” Medical Imaging, IEEE Transactions
 on, vol. 17, no. 1, pp. 98–107, Feb. 1998.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=668699
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=668699
 [AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and
 J. Malik, “From contours to regions: An
 empirical evaluation,” in Computer Vision and
 Pattern Recognition, 2009. CVPR 2009. IEEE
-Conference on . IEEE, Jun. 2009, pp. 2294–2301.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=5206707
+Conference on. IEEE, Jun. 2009, pp. 2294–2301.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5206707
 [AP11] G. Azzopardi and N. Petkov, “Detection of
 retinal vascular bifurcations by trainable v4-like
 filters,” in Computer Analysis of Images and
-Patterns . Springer, 2011, pp. 451–459. [Online].
-Available: http://www .cs.rug .nl/~imaging/databases/
-retina_database/retinalfeatures_database .html
+Patterns. Springer, 2011, pp. 451–459. [Online].
+Available: http://www.cs.rug.nl/~imaging/databases/
+retina_database/retinalfeatures_database.html
 [BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik,
 “Object segmentation by alignment of poselet
 activations to image contours,” in Computer Vision
 and Pattern Recognition (CVPR), 2011 IEEE
-Conference on . IEEE, Jun. 2011, pp. 2225–2232.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=5995659
+Conference on. IEEE, Jun. 2011, pp. 2225–2232.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5995659
 [BJ00] Y . Boykov and M.-P. Jolly, “Interactive organ
 segmentation using graph cuts,” in Medical Image
 Computing and Computer-Assisted Intervention–
 MICCAI 2000 . Springer, 2000, pp. 276–
-286. [Online]. Available: http://link .springer .com/
-chapter/10 .1007/978-3-540-40899-4_28
+286. [Online]. Available: http://link .springer.com/
+chapter/10.1007/978-3-540-40899-4_28
 [BKR11] A. Blake, P. Kohli, and C. Rother, Markov random
 fields for vision and image processing . Mit Press,
 2011.
 [BKTT15] S. Bittel, V . Kaiser, M. Teichmann, and M. Thoma,
 “Pixel-wise segmentation of street with neural
 networks,” arXiv preprint arXiv:1511.00513 , 2015.
-[Online]. Available: http://arxiv .org/abs/1511 .00513
+[Online]. Available: http://arxiv.org/abs/1511.00513
 [BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik,
 “Detecting people using mutually consistent
 poselet activations,” in Computer Vision–ECCV
-2010 . Springer, 2010, pp. 168–181. [Online].
-Available: http://link .springer .com/chapter/10 .1007/
+2010. Springer, 2010, pp. 168–181. [Online].
+Available: http://link.springer.com/chapter/10.1007/
 978-3-642-15567-3_13#page-1
 [Bur98] C. J. Burges, “A tutorial on support vector machines
-for pattern recognition,” Data mining and knowledge
-discovery , vol. 2, no. 2, pp. 121–167, 1998.
+for pattern recognition,”Data mining and knowledge
+discovery, vol. 2, no. 2, pp. 121–167, 1998.
 [BVZ01] Y . Boykov, O. Veksler, and R. Zabih, “Fast
 approximate energy minimization via graph cuts,”
 Pattern Analysis and Machine Intelligence, IEEE
-Transactions on , vol. 23, no. 11, pp. 1222–1239,
-2001. [Online]. Available: http://ieeexplore .ieee .org/
-xpls/abs_all .jsp?arnumber=969114
+Transactions on, vol. 23, no. 11, pp. 1222–1239,
+2001. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=969114
 [CDF+04] G. Csurka, C. Dance, L. Fan, J. Willamowski,
 and C. Bray, “Visual categorization with bags of
 keypoints,” in Workshop on statistical learning in
@@ -1067,47 +1121,47 @@ computer vision, ECCV , vol. 1, no. 1-22. Prague,
 2004, pp. 1–2.
 [CJSW01] H.-D. Cheng, X. Jiang, Y . Sun, and J. Wang,
 “Color image segmentation: advances and prospects,”
-Pattern recognition , vol. 34, no. 12, pp. 2259–2281,
+Pattern recognition, vol. 34, no. 12, pp. 2259–2281,
 2001.
 [CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image
 segmentation via adaptive k-mean clustering and
 knowledge-based morphological operations with
 biomedical applications,” Image Processing, IEEE
-Transactions on , vol. 7, no. 12, pp. 1673–1683, Dec.
+Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec.
 
-1998. [Online]. Available: http://ieeexplore .ieee .org/
-xpls/abs_all .jsp?arnumber=730379
+1998. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=730379
 [CM02] D. Comaniciu and P. Meer, “Mean shift: A
 robust approach toward feature space analysis,”
 Pattern Analysis and Machine Intelligence, IEEE
-Transactions on , vol. 24, no. 5, pp. 603–619, 2002.
-[Online]. Available: http://ieeexplore .ieee .org/xpl/
-login .jsp?tp=&arnumber=1000236
+Transactions on, vol. 24, no. 5, pp. 603–619, 2002.
+[Online]. Available: http://ieeexplore .ieee.org/xpl/
+login.jsp?tp=&arnumber=1000236
 [COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde,
 “A pixel classification system for segmenting
 biomedical images using intensity neighborhoods
 and dimension reduction,” in Biomedical Imaging:
 From Nano to Macro, 2011 IEEE International
 Symposium on . IEEE, 2011, pp. 1649–1652.
-[Online]. Available: https://www .andrew .cmu .edu/
-user/gustavor/chen_isbi_11 .pdf
+[Online]. Available: https://www.andrew.cmu.edu/
+user/gustavor/chen_isbi_11.pdf
 [CP08] G. Csurka and F. Perronnin, “A simple high
 performance approach to semantic segmentation.”
-inBMVC , 2008, pp. 1–10. [Online]. Available:
- http://www .xrce .xerox .com/layout/set/print/
-content/download/16654/118653/file/2008-023 .pdf
+in BMVC, 2008, pp. 1–10. [Online]. Available:
+ http://www .xrce.xerox.com/layout/set/print/
+content/download/16654/118653/file/2008-023.pdf
 [CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and
 E. Sabo, “Colon crypt segmentation website.” [Online].
- Available: http://mis .haifa .ac.il/~ishimshoni/
-SegmentCrypt/Download .htm
+ Available: http://mis.haifa.ac.il/~ishimshoni/
+SegmentCrypt/Download.htm
 [CRSS14] ——, “Memory based active contour algorithm
 using pixel-level classified images for colon crypt
 segmentation,” Computerized Medical Imaging
 and Graphics , Nov. 2014. [Online]. Available:
-http://mis .haifa .ac.il/~ishimshoni/SegmentCrypt/
+http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/
 Active%20contour%20based%20on%20pixellevel%20classified%20image%20for%20colon%
 
-20crypts%20segmentation .pdf
+20crypts%20segmentation.pdf
 [CS10] J. Carreira and C. Sminchisescu, “Constrained
 parametric min-cuts for automatic object segmentation,”
  in Computer Vision and Pattern Recognition
@@ -1115,61 +1169,62 @@ parametric min-cuts for automatic object segmentation,”
 pp. 3241–3248.
 [CS11] ——, “Cpmc: Constrained parametric min-cuts for
 automatic object segmentation,” Feb. 2011. [Online].
-Available: http://www .maths .lth.se/matematiklth/
+Available: http://www .maths.lth.se/matematiklth/
 personal/sminchis/code/cpmc/
 [CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V .
 Stoecker, J. M. Malters, and J. M. Grichnik, “An
 improved objective evaluation measure for border
 detection in dermoscopy images,” Skin Research
-and Technology , vol. 15, no. 4, pp. 444–450, 2009.
-[Online]. Available: http://arxiv .org/abs/1009 .1020
+and Technology, vol. 15, no. 4, pp. 444–450, 2009.
+[Online]. Available: http://arxiv.org/abs/1009.1020
 [CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear
 segmentation in microscope cell images: a handsegmented
  dataset and comparison of algorithms,”
-inBiomedical Imaging: From Nano to Macro,
+in Biomedical Imaging: From Nano to Macro,
 2009. ISBI’09. IEEE International Symposium on .
 IEEE, 2009, pp. 518–521. [Online]. Available:
-http://murphylab .web .cmu .edu/data
+http://murphylab.web.cmu.edu/data
 [CXGS12] M. D. Collins, J. Xu, L. Grady, and V . Singh,
 “Random walks based multi-image segmentation:
 Quasiconvexity results and gpu-based solutions,”
-inComputer Vision and Pattern Recognition
+in Computer Vision and Pattern Recognition
 (CVPR), 2012 IEEE Conference on . IEEE,
 2012, pp. 1656–1663. [Online]. Available: http:
-//pages .cs.wisc .edu/~jiaxu/pub/rwcoseg .pdf
+//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf
 [DHS15] J. Dai, K. He, and J. Sun, “Instance-aware semantic
  segmentation via multi-task network cascades,”
 arXiv preprint arXiv:1512.04412 , 2015.
 [DT05] N. Dalal and B. Triggs, “Histograms of oriented
 gradients for human detection,” in Computer
 Vision and Pattern Recognition, 2005. CVPR
-2005. IEEE Computer Society Conference on ,vol. 1, June 2005, pp. 886–893 vol. 1.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=1467360
+2005. IEEE Computer Society Conference on ,
+vol. 1, June 2005, pp. 886–893 vol. 1.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=1467360
 [EVGW+a] M. Everingham, L. Van Gool, C. K. I.
 Williams, J. Winn, and A. Zisserman, “The
 PASCAL Visual Object Classes Challenge
 2007 (VOC2007) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2007/workshop/index.html.
 
-[Online]. Available: http://host .robots .ox.ac.uk:
-8080/pascal/VOC/voc2007/index .html
+[Online]. Available: http://host .robots.ox.ac.uk:
+8080/pascal/VOC/voc2007/index.html
 [EVGW+b] ——, “The PASCAL Visual Object Classes Challenge
  2012 (VOC2012) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2012/workshop/index.html.
 
-[Online]. Available: http://host .robots .ox.ac.uk:
-8080/pascal/VOC/voc2012/index .html
+[Online]. Available: http://host .robots.ox.ac.uk:
+8080/pascal/VOC/voc2012/index.html
 [EVGW+10] M. Everingham, L. Van Gool, C. K. Williams,
 J. Winn, and A. Zisserman, “The pascal visual object
 classes (voc) challenge,” International journal of
-computer vision , vol. 88, no. 2, pp. 303–338, 2010.
+computer vision, vol. 88, no. 2, pp. 303–338, 2010.
 [EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams,
 J. Winn, and A. Zisserman, “Visual object
 classes challenge 2012 (voc2012),” 2012. [Online].
-Available: http://host .robots .ox.ac.uk:8080/pascal/
-VOC/voc2012/index .html
+Available: http://host.robots.ox.ac.uk:8080/pascal/
+VOC/voc2012/index.html
 [Fel] P. F. Felzenszwalb, “Graph based image
  segmentation.” [Online]. Available: http:
-//cs .brown .edu/~pff/segment/
+//cs.brown.edu/~pff/segment/
 [FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester,
 and D. Ramanan, “Object detection with discriminatively
  trained part-based models,” Pattern Analysis
@@ -1179,14 +1234,14 @@ vol. 32, no. 9, pp. 1627–1645, 2010.
 “Efficient graph-based image segmentation,”
 International Journal of Computer Vision ,
 vol. 59, no. 2, pp. 167–181, 2004. [Online].
-Available: http://link .springer .com/article/10 .1023/
-B:VISI .0000022288 .19776 .77
+Available: http://link.springer.com/article/10.1023/
+B:VISI.0000022288.19776.77
 [FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A
 new performance measure and evaluation
 benchmark for road detection algorithms,” in
 International Conference on Intelligent Transportation
  Systems (ITSC) , 2013. [Online]. Available:
-http://www .cvlibs .net/datasets/kitti/eval_road .php
+http://www.cvlibs.net/datasets/kitti/eval_road.php
 [GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D.
 Bagdanov, J. Serrat, and J. Gonzalez, “Harmony potentials
  for joint classification and segmentation,” in
@@ -1196,23 +1251,23 @@ Computer Vision and Pattern Recognition (CVPR),
 [GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and
 D. Koller, “Multi-class segmentation with relative
 location prior,” International Journal of Computer
-Vision , vol. 80, no. 3, pp. 300–316, Apr. 2008.
+Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008.
 [GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.Z.
  Yang, “Probabilistic tracking of affine-invariant
 anisotropic regions,” Pattern Analysis and Machine
 Intelligence, IEEE Transactions on , vol. 35, no. 1,
 pp. 130–143, 2013.
-[Har75] J. A. Hartigan, Clustering algorithms . John Wiley
+[Har75] J. A. Hartigan, Clustering algorithms. John Wiley
 & Sons, Inc., 1975.
 [HDT02] C. Huang, L. Davis, and J. Townshend, “An
 assessment of support vector machines for land
-cover classification,” International Journal of remote
-sensing , vol. 23, no. 4, pp. 725–749, 2002.
+cover classification,”International Journal of remote
+sensing, vol. 23, no. 4, pp. 725–749, 2002.
 [HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic
 lung segmentation for accurate quantitation of
-volumetric x-ray ct images,” Medical Imaging, IEEE
+volumetric x-ray ct images,”Medical Imaging, IEEE
 
-Transactions on , vol. 20, no. 6, pp. 490–498, Jun.
+Transactions on, vol. 20, no. 6, pp. 490–498, Jun.
 2001.
 [HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J.
 Flynn, H. Bunke, D. B. Goldgof, K. Bowyer,
@@ -1221,105 +1276,106 @@ Fisher, “An experimental comparison of range
 image segmentation algorithms,” Pattern Analysis
 and Machine Intelligence, IEEE Transactions
 on, vol. 18, no. 7, pp. 673–689, Jul. 1996.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=506791
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=506791
 [Ho95] T. K. Ho, “Random decision forests,” in
 Document Analysis and Recognition, 1995.,
 Proceedings of the Third International Conference
 on, vol. 1. IEEE, 1995, pp. 278–282.
-[Online]. Available: http://ect .bell-labs .com/who/
-tkh/publications/papers/odt .pdf
+[Online]. Available: http://ect .bell-labs.com/who/
+tkh/publications/papers/odt.pdf
 [Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia
 Commons, Nov. 2007. [Online]. Available:
- https://commons .wikimedia .org/wiki/File:
-CCTV_Lens_flare .jpg
+ https://commons .wikimedia.org/wiki/File:
+CCTV_Lens_flare.jpg
 [HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn,
 “Multiscale conditional random fields for image
 labeling,” in Computer Vision and Pattern
 Recognition, 2004. CVPR 2004. Proceedings
 of the 2004 IEEE Computer Society Conference
 on, vol. 2, Jun. 2004, pp. II–695–II–702 V ol.2.
-[Online]. Available: http://ieeexplore .ieee .org/xpl/
-login .jsp?tp=&arnumber=1315232
+[Online]. Available: http://ieeexplore .ieee.org/xpl/
+login.jsp?tp=&arnumber=1315232
 [JLD03] K. Jiang, Q.-M. Liao, and S.-Y . Dai, “A novel white
 blood cell segmentation scheme using scale-space
 filtering and watershed clustering,” in Machine
 Learning and Cybernetics, 2003 International
-Conference on , vol. 5, Nov 2003, pp. 2820–2825
-V ol.5. [Online]. Available: http://ieeexplore .ieee .org/
-xpl/login .jsp?tp=&arnumber=1260033
+Conference on, vol. 5, Nov 2003, pp. 2820–2825
+V ol.5. [Online]. Available: http://ieeexplore.ieee.org/
+xpl/login.jsp?tp=&arnumber=1260033
 [Kaf07] L. Kaffer, “File:great male leopard in south afrikajd.jpg,”
  Wikipedia Commons, Jul. 2007. [Online].
-Available: https://commons .wikimedia .org/wiki/File:
-Great_male_Leopard_in_South_Afrika-JD .JPG
+Available: https://commons.wikimedia.org/wiki/File:
+Great_male_Leopard_in_South_Afrika-JD.JPG
 [KKV+14] V . Kalesnykiene, J.-k. Kamarainen, R. V outilainen,
 J. Pietilä, H. Kälviäinen, and H. Uusitalo,
 “Diaretdb1 diabetic retinopathy database and
 evaluation protocol,” 2014. [Online]. Available:
-http://www2 .it.lut.fi/project/imageret/diaretdb1/
+http://www2.it.lut.fi/project/imageret/diaretdb1/
 [KP92] J. M. Kasson and W. Plouffe, “An analysis of
 selected computer interchange color spaces,” ACM
-Transactions on Graphics (TOG) , vol. 11, no. 4, pp.
+Transactions on Graphics (TOG), vol. 11, no. 4, pp.
 373–405, 1992.
 [KP06] Z. Kato and T.-C. Pong, “A markov random
 field image segmentation model for color
 textured images,” Image and Vision Computing ,
 vol. 24, no. 10, pp. 1103–1114, 2006. [Online].
-Available: http://www .sciencedirect .com/science/
+Available: http://www .sciencedirect.com/science/
 article/pii/S0262885606001223
 [KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton,
 “Imagenet classification with deep convolutional
 neural networks,” in Advances in neural information
-processing systems , 2012, pp. 1097–1105.
+processing systems, 2012, pp. 1097–1105.
 [KWT88] M. Kass, A. Witkin, and D. Terzopoulos,
 “Snakes: Active contour models,” International
 journal of computer vision , vol. 1, no. 4, pp.
 321–331, Jan. 1988. [Online]. Available: http:
-//link .springer .com/article/10 .1007/BF00133570
+//link.springer.com/article/10.1007/BF00133570
 [LKJ15] F.-F. Li, A. Karpathy, and J. Johnson,
 “CS231n: Convolutional neural networks for
 visual recognition,” 2015. [Online]. Available:
-http://cs231n .stanford .edu/
-[Low04] D. Lowe, “Distinctive image features from scale-invariant keypoints,” International Journal of
-Computer Vision , vol. 60, no. 2, pp. 91–110, 2004.
-[Online]. Available: http://dx .doi .org/10 .1023/B%
-3A VISI .0000029664 .99615 .94
+http://cs231n.stanford.edu/
+[Low04] D. Lowe, “Distinctive image features from scaleinvariant
+ keypoints,” International Journal of
+Computer Vision, vol. 60, no. 2, pp. 91–110, 2004.
+[Online]. Available: http://dx .doi.org/10.1023/B%
+3A VISI.0000029664.99615.94
 [LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski,
 “Spectral matting,” Pattern Analysis and
 Machine Intelligence, IEEE Transactions on ,
 vol. 30, no. 10, pp. 1699–1712, 2008.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=4547428
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4547428
 [LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr,
 “Associative hierarchical crfs for object class image
 segmentation,” in Computer Vision, 2009 IEEE 12th
 International Conference on , 2009, pp. 739–746.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=5459248
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=5459248
 [LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully
 convolutional networks for semantic segmentation,”
 arXiv preprint arXiv:1411.4038 , 2014. [Online].
-Available: http://arxiv .org/abs/1411 .4038
+Available: http://arxiv.org/abs/1411.4038
 [MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and
 J. Malik, “Using contours to detect and localize
 junctions in natural images,” in Computer Vision
 and Pattern Recognition, 2008. CVPR 2008.
 IEEE Conference on , June 2008, pp. 1–8.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=4587420
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587420
 [Man12] M. Manske, “File:randabschattung mikroskop
 kamera 6.jpg,” Wikipedia Commons,
  Dec. 2012. [Online]. Available:
- https://commons .wikimedia .org/wiki/File:
-Randabschattung_Mikroskop_Kamera_6 .JPG
+ https://commons .wikimedia.org/wiki/File:
+Randabschattung_Mikroskop_Kamera_6.JPG
 [MBLAGJ+07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez,
  H. Gomez-Moreno, and F. LopezFerreras,
  “Road-sign detection and recognition
 based on support vector machines,” Intelligent
 Transportation Systems, IEEE Transactions on ,
 vol. 8, no. 2, pp. 264–278, Jun. 2007.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=4220659
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4220659
 [MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig,
 “Automatic brain and tumor segmentation,” in Medical
  Image Computing and Computer-Assisted Intervention—MICCAI
@@ -1331,9 +1387,9 @@ images and its application to evaluating
 segmentation algorithms and measuring ecological
 statistics,” in Computer Vision, 2001. ICCV
 2001. Proceedings. Eighth IEEE International
-Conference on , vol. 2. IEEE, 2001, pp. 416–423.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=937655
+Conference on, vol. 2. IEEE, 2001, pp. 416–423.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=937655
 [MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann,
 S. Bodenstedt, A. Sanchez, C. Stock, H. G.
 Kenngott, M. Eisenmann, and S. Speidel, “Can
@@ -1341,11 +1397,11 @@ masses of non-experts train highly accurate
 image classifiers?” in Medical Image Computing
 and Computer-Assisted Intervention–MICCAI 2014 .
 Springer, 2014, pp. 438–445. [Online]. Available:
-http://opencas .webarchiv .kit.edu/?q=node/26
+http://opencas.webarchiv.kit.edu/?q=node/26
 [Min89] J. Mingers, “An empirical comparison of selection
 measures for decision-tree induction,” Machine
-Learning , vol. 3, no. 4, pp. 319–342, 1989.
-[Online]. Available: http://dx .doi .org/10 .1023/A%
+Learning, vol. 3, no. 4, pp. 319–342, 1989.
+[Online]. Available: http://dx.doi.org/10.1023/A%
 3A1022645801436
 [MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson,
 “Markov random field models for supervised land
@@ -1353,22 +1409,22 @@ Learning , vol. 3, no. 4, pp. 319–342, 1989.
 cover classification from very high resolution
 multispectral remote sensing images,” in Advances
 in Radar and Remote Sensing (TyWRRS), 2012
-Tyrrhenian Workshop on . IEEE, 2012, pp. 235–
-242. [Online]. Available: http://ieeexplore .ieee .org/
-xpl/login .jsp?tp=&arnumber=6381135
+Tyrrhenian Workshop on. IEEE, 2012, pp. 235–
+242. [Online]. Available: http://ieeexplore .ieee.org/
+xpl/login.jsp?tp=&arnumber=6381135
 [MSC] “Object class recognition image database.”
-[Online]. Available: http://research .microsoft .com/
+[Online]. Available: http://research.microsoft.com/
 vision/cambridge/recognition/
 [MSR] “Image understanding - research data,”
 Microsoft Research. [Online]. Available:
- http://research .microsoft .com/en-us/projects/
+ http://research.microsoft.com/en-us/projects/
 objectclassrecognition/
 [Mur12] K. P. Murphy, Machine learning: a probabilistic
-perspective . MIT press, 2012.
+perspective. MIT press, 2012.
 [OKS78] Y .-i. Ohta, T. Kanade, and T. Sakai, “An analysis
 system for scenes containing objects with substructures,”
  in Proceedings of the Fourth International
-Joint Conference on Pattern Recognitions , 1978, pp.
+Joint Conference on Pattern Recognitions, 1978, pp.
 752–754.
 [PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin,
 R. Cromartie, A. Geselowitz, T. Greer, B. ter
@@ -1376,168 +1432,169 @@ Haar Romeny, J. B. Zimmerman, and K. Zuiderveld,
 “Adaptive histogram equalization and its variations,”
 Computer vision, graphics, and image processing ,
 vol. 39, no. 3, pp. 355–368, 1987. [Online].
-Available: http://www .sciencedirect .com/science/
+Available: http://www .sciencedirect.com/science/
 article/pii/S0734189X8780186X
 [PC13] P. H. Pinheiro and R. Collobert, “Recurrent
 convolutional neural networks for scene parsing,”
 arXiv preprint arXiv:1306.2795 , 2013. [Online].
-Available: http://arxiv .org/abs/1306 .2795v1
+Available: http://arxiv.org/abs/1306.2795v1
 [PH05] C. Pantofaru and M. Hebert, “A
 comparison of image segmentation algorithms,”
 Robotics Institute , p. 336, 2005. [Online].
-Available: http://riweb-backend .ri.cmu .edu/
-pub_files/pub4/pantofaru _caroline _2005 _1/
-pantofaru_caroline_2005_1 .pdf
+Available: http://riweb-backend .ri.cmu.edu/
+pub_files/pub4/pantofaru_caroline_2005_1/
+pantofaru_caroline_2005_1.pdf
 [PS07] A. Protiere and G. Sapiro, “Interactive
 image segmentation via adaptive weighted
 distances,” Image Processing, IEEE Transactions
 on, vol. 16, no. 4, pp. 1046–1057, 2007.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=4130436
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4130436
 [PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multiclass
  image segmentation using conditional random
 fields and global classification,” in Proceedings
 of the 26th Annual International Conference on
-Machine Learning . ACM, 2009, pp. 817–824.
+Machine Learning. ACM, 2009, pp. 817–824.
 [PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A
 survey of current methods in medical image
 segmentation,” Annual Review of Biomedical
-Engineering , vol. 2, no. 1, pp. 315–337, 2000,
+Engineering, vol. 2, no. 1, pp. 315–337, 2000,
 pMID: 11701515. [Online]. Available: http://
-dx.doi .org/10 .1146/annurev .bioeng .2.1.315
+dx.doi.org/10.1146/annurev.bioeng.2.1.315
 [Qui86] J. R. Quinlan, “Induction of decision trees,”
 Machine learning , vol. 1, no. 1, pp. 81–106,
-Aug. 1986. [Online]. Available: http://dx .doi .org/
+Aug. 1986. [Online]. Available: http://dx .doi.org/
 10.1023/A%3A1022643204877
-[Qui93] ——, C4.5: Programs for Machine Learning , P. Langley,
+[Qui93] ——, C4.5: Programs for Machine Learning, P. Langley,
  Ed. Morgan Kaufmann Publishers, Inc., 1993.
 [RKB04] C. Rother, V . Kolmogorov, and A. Blake, “Grabcut:
 Interactive foreground extraction using iterated
 graph cuts,” ACM Transactions on Graphics
-(TOG) , vol. 23, no. 3, pp. 309–314, 2004. [Online].
-Available: http://delivery .acm .org/10 .1145/1020000/
-1015720/p309-rother .pdf
+(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online].
+Available: http://delivery.acm.org/10.1145/1020000/
+1015720/p309-rother.pdf
 [RM00] J. B. Roerdink and A. Meijster, “The watershed
-transform: Definitions, algorithms and paralleliza-tion strategies,” Fundam. Inform. , vol. 41, no. 1-2,
+transform: Definitions, algorithms and parallelization
+ strategies,” Fundam. Inform., vol. 41, no. 1-2,
 pp. 187–228, 2000.
 [RM07] J. Reynolds and K. Murphy, “Figure-ground
 segmentation using a hierarchical conditional
 random field,” in Computer and Robot
 Vision, 2007. CRV ’07. Fourth Canadian
 Conference on , May 2007, pp. 175–182.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=4228537
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4228537
 [RMBK06] C. Rother, T. Minka, A. Blake, and V . Kolmogorov,
 “Cosegmentation of image pairs by histogram
 matching - incorporating a global constraint
 into mrfs,” in Computer Vision and Pattern
 Recognition, 2006 IEEE Computer Society
 Conference on , vol. 1, June 2006, pp. 993–
-1000. [Online]. Available: http://ieeexplore .ieee .org/
-xpls/abs_all .jsp?arnumber=1640859
+1000. [Online]. Available: http://ieeexplore.ieee.org/
+xpls/abs_all.jsp?arnumber=1640859
 [SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer,
-M. Viergever, B. Van Ginneken et al. , “Ridge-based
+M. Viergever, B. Van Ginneken et al., “Ridge-based
 vessel segmentation in color images of the retina,”
 Medical Imaging, IEEE Transactions on , vol. 23,
 no. 4, pp. 501–509, 2004. [Online]. Available:
-http://www .isi.uu.nl/Research/Databases/DRIVE/
+http://www.isi.uu.nl/Research/Databases/DRIVE/
 [SCZ08] F. Schroff, A. Criminisi, and A. Zisserman,
 “Object class segmentation using random
-forests.” in BMVC , 2008, pp. 1–10. [Online].
- Available: http://research .microsoft .com/pubs/
-72423/Criminisi_bmvc2008 .pdf
+forests.” in BMVC, 2008, pp. 1–10. [Online].
+ Available: http://research.microsoft.com/pubs/
+72423/Criminisi_bmvc2008.pdf
 [SJC08] J. Shotton, M. Johnson, and R. Cipolla,
 “Semantic texton forests for image categorization
 and segmentation,” in Computer vision and
 pattern recognition, 2008. CVPR 2008. IEEE
 Conference on . IEEE, Jun. 2008, pp. 1–8.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=4587503
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=4587503
 [SM11] C. Sutton and A. McCallum, “An introduction
-to conditional random fields,” Machine Learning ,
+to conditional random fields,” Machine Learning,
 vol. 4, no. 4, pp. 267–373, 2011. [Online].
-Available: http://homepages .inf .ed.ac.uk/csutton/
-publications/crftutv2 .pdf
+Available: http://homepages .inf.ed.ac.uk/csutton/
+publications/crftutv2.pdf
 [Smi02] L. I. Smith, “A tutorial on principal components
-analysis,” Cornell University, USA , vol. 51, p. 52,
+analysis,” Cornell University, USA, vol. 51, p. 52,
 2002.
 [Smi04] B. T. Smith, “Lagrange multipliers tutorial in the
 context of support vector machines,” Memorial University
  of Newfoundland St. John’s, Newfoundland,
-Canada , Jun. 2004.
+Canada, Jun. 2004.
 [SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery,
 segmentation and reactive grasping of unknown
-objects.” in Humanoids , 2012, pp. 71–77. [Online].
- Available: http://h2t .anthropomatik .kit.edu/
-pdf/Schiebener2012 .pdf
+objects.” in Humanoids, 2012, pp. 71–77. [Online].
+ Available: http://h2t .anthropomatik.kit.edu/
+pdf/Schiebener2012.pdf
 [SUM+11] D. Schiebener, A. Ude, J. Morimotot,
 T. Asfour, and R. Dillmann, “Segmentation
 and learning of unknown objects through physical
 interaction,” in Humanoid Robots (Humanoids),
 2011 11th IEEE-RAS International Conference
 on. IEEE, 2011, pp. 500–506. [Online].
-Available: http://ieeexplore .ieee .org/ielx5/6086637/
-6100798/06100843 .pdf
+Available: http://ieeexplore.ieee.org/ielx5/6086637/
+6100798/06100843.pdf
 [SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi,
 “Textonboost: Joint appearance, shape and context
 modeling for multi-class object recognition and
 segmentation,” in Computer Vision–ECCV 2006 .
 Springer, 2006, pp. 1–15. [Online]. Available: http:
-//link .springer .com/chapter/10 .1007/11744023_1
+//link.springer.com/chapter/10.1007/11744023_1
 [TNL14] J. Tighe, M. Niethammer, and S. Lazebnik,
 “Scene parsing with object instances and
 occlusion ordering,” in Computer Vision and
 
 Pattern Recognition (CVPR), 2014 IEEE
 Conference on . IEEE, 2014, pp. 3748–3755.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=6909874
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=6909874
 [UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert,
 “A measure for objective evaluation of
 image segmentation algorithms,” in Computer
 Vision and Pattern Recognition-Workshops, 2005.
 CVPR Workshops. IEEE Computer Society
 Conference on . IEEE, 2005, pp. 34–34.
-[Online]. Available: http://repository .cmu .edu/cgi/
-viewcontent .cgi?article=1365&context=robotics
+[Online]. Available: http://repository.cmu.edu/cgi/
+viewcontent.cgi?article=1365&context=robotics
 [vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J.
 van den Herik, “Dimensionality reduction: A comparative
  review,” Journal of Machine Learning
-Research , vol. 10, no. 1-41, pp. 66–71, 2009.
+Research, vol. 10, no. 1-41, pp. 66–71, 2009.
 [VOC10] “V oc2010 preliminary results,” 2010. [Online].
-Available: http://host .robots .ox.ac.uk/pascal/VOC/
-voc2010/results/index .html
-[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic
+Available: http://host.robots.ox.ac.uk/pascal/VOC/
+voc2010/results/index.html
+[W AH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic
 tracking of laparoscopic instruments by color
-coding,” in CVRMed-MRCAS’97 , ser. Lecture
+coding,” in CVRMed-MRCAS’97, ser. Lecture
 Notes in Computer Science, J. Troccaz, E. Grimson,
 and R. Mösges, Eds. Springer Berlin Heidelberg,
 1997, vol. 1205, pp. 357–366. [Online]. Available:
-http://dx .doi .org/10 .1007/BFb0029257
+http://dx.doi.org/10.1007/BFb0029257
 [YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell
 segmentation in microscopy imagery using a
 bag of local bayesian classifiers,” in Biomedical
 Imaging: From Nano to Macro, 2010 IEEE
 International Symposium on , Apr. 2010, pp. 125–
-128. [Online]. Available: http://ieeexplore .ieee .org/
-xpls/abs_all .jsp?arnumber=5490399
+128. [Online]. Available: http://ieeexplore .ieee.org/
+xpls/abs_all.jsp?arnumber=5490399
 [YHRF12] Y . Yang, S. Hallman, D. Ramanan, and
 C. C. Fowlkes, “Layered object models for
 image segmentation,” Pattern Analysis and
 Machine Intelligence, IEEE Transactions on ,
 vol. 34, no. 9, pp. 1731–1743, Sep. 2012.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=6042883
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=6042883
 [ZBS01] Y . Zhang, M. Brady, and S. Smith, “Segmentation
 of brain MR images through a hidden Markov
 random field model and the expectationmaximization
  algorithm,” Medical Imaging, IEEE
-Transactions on , vol. 20, no. 1, pp. 45–57, 2001.
-[Online]. Available: http://ieeexplore .ieee .org/xpls/
-abs_all .jsp?arnumber=906424
+Transactions on, vol. 20, no. 1, pp. 45–57, 2001.
+[Online]. Available: http://ieeexplore.ieee.org/xpls/
+abs_all.jsp?arnumber=906424
 [ZGWX05] S.-C. Zhu, C.-E. Guo, Y . Wang, and Z. Xu, “What
 are textons?” International Journal of Computer
-Vision , vol. 62, no. 1-2, pp. 121–143, 2005.
+Vision, vol. 62, no. 1-2, pp. 121–143, 2005.
 [Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,”
 MultiMedia, IEEE , vol. 19, no. 2, pp. 4–10, Feb.
 2012.
@@ -1546,9 +1603,10 @@ V . Vineet, Z. Su, D. Du, C. Huang, and
 P. H. Torr, “Conditional random fields as
 recurrent neural networks,” in Proceedings
 of the IEEE International Conference on
-Computer Vision , 2015, pp. 1529–1537. [Online].
-Available: http://www .robots .ox.ac.uk/~szheng/
-papers/CRFasRNN .pdfGLOSSARY
+Computer Vision, 2015, pp. 1529–1537. [Online].
+Available: http://www .robots.ox.ac.uk/~szheng/
+papers/CRFasRNN.pdf
+GLOSSARY
 ACM active contour model. 6
 BOV bag-of-visual-words. 5
 CNN Convolution Neuronal Network. 5, 9
@@ -1567,17 +1625,20 @@ SVM Support Vector Machine. 4, 6–8
 
 APPENDIX A
 TABLES
-Database Image Resolution (width ×height)Number
+Database Image Resolution (width × height)
+Number
 of
-ImagesNumber
+Images
+Number
 of
-ClassesChannels Data source
-Colon Crypt DB (302 px−1116 px)×(349 px−875px) 389 2 3 [CRSS]
-DIARETDB1 1500 px×1500 px 89 4 3 [KKV+14]
-KITTI Road (1226 px−1242 px)×(370 px−376px) 289 2 3 [FKG13]
-MSRCv1 (213 px−320px)×(213 px−320px) 240 9 3 [MSR]
-MSRCv2 (213 px−320px)×(162 px−320px) 591 23 3 [MSR]
-Open-CAS Endoscopic Datasets 640px×480px 120 2 3 [MHMK+14]
-PASCAL VOC 2012 (142 px−500px)×( 71 px−500px) 2913 20 3 [EVGW+12]
-Warwick-QU (567 px−775px)×(430 px−522px) 165 5 3 [CSM09]
+Classes
+Channels Data source
+Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS]
+DIARETDB1 1500 px × 1500 px 89 4 3 [KKV +14]
+KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13]
+MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR]
+MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR]
+Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK +14]
+PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW +12]
+Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09]
 Table I: An overview over publicly available image databases with a semantic segmentation ground trouth.
\ No newline at end of file
diff --git a/read/results/pypdf/1707.09725.txt b/read/results/pypdf/1707.09725.txt
index cef8e8c..edaac58 100644
--- a/read/results/pypdf/1707.09725.txt
+++ b/read/results/pypdf/1707.09725.txt
@@ -11,7 +11,8 @@ Reviewer: Prof. Dr.–Ing. R. Dillmann
 Second reviewer: Prof. Dr.–Ing. J. M. Zöllner
 Advisor: Dipl.–Inform. Michael Weber
 Research Period: 03. May 2017 – 03. August 2017
-KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.eduarXiv:1707.09725v1  [cs.CV]  31 Jul 2017
+KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.edu
+arXiv:1707.09725v1  [cs.CV]  31 Jul 2017
 
 Analysis and Optimization of Convolutional Neural
 Network Architectures
@@ -36,7 +37,7 @@ August 2017
 Abstract
 Convolutional Neural Networks (CNNs) dominate various computer vision tasks since
 Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error
-from 26.2 %to15.3 %on the ImageNet large scale visual recognition challenge. Many
+from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many
 aspects of CNNs are examined in various publications, but literature about the analysis
 and construction of neural network architectures is rare. This work is one step to close this
 gap. A comprehensive overview over existing techniques for CNN analysis and topology
@@ -46,7 +47,7 @@ evaluated. Additionally, some results are confirmed and quantified for CIFAR-100
 example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation
 and test-time transformations on the accuracy. Other results, such as the positive impact of
 learned color transformation on the test accuracy could not be confirmed. A model which
-has only one million learned parameters for an input size of 32×32×3and 100 classes and
+has only one million learned parameters for an input size of32 ×32 ×3 and 100 classes and
 which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and
 STL-10 was developed.
 
@@ -54,7 +55,7 @@ Zusammenfassung
 Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen
 Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese
 effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual
-recognition challenge Benchmark von 26.2 %auf15.3 %drücken konnte. Viele Aspekte
+recognition challenge Benchmark von26.2 % auf 15.3 % drücken konnte. Viele Aspekte
 von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleichsweise
  wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen
 geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine
@@ -184,20 +185,20 @@ low-level information given by raw pixels from digital images.
 Robots, search engines, self-driving cars, surveillance agencies and many others have
 applications which include one of the following six problems in computer vision as subproblems:
 
-•Classification :1The algorithm is given an image and kpossible classes. The task is
-to decide which of the kclasses the image belongs to. For example, an image from
-a self-driving cars on-board camera contains either paved road ,unpaved road or
+• Classification:1 The algorithm is given an image andk possible classes. The task is
+to decide which of thek classes the image belongs to. For example, an image from
+a self-driving cars on-board camera contains eitherpaved road, unpaved road or
 no road: Which of those given three classes is in the image?
-•Localization : The algorithm is given an image and one class k. The task is to find
-bounding boxes for all instances of k.
-•Detection : Given an image and kclasses, find bounding boxes for all instances of
+• Localization: The algorithm is given an image and one classk. The task is to find
+bounding boxes for all instances ofk.
+• Detection: Given an image andk classes, find bounding boxes for all instances of
 those classes.
-•Semantic Segmentation : Given an image and kclasses, classify each pixel.
-•Instance segmentation : Given an image and kclasses, classify each pixel as one of
-thekclasses, but distinguish different instances of the classes.
-•Content-based Image Retrieval : Given an image xandnimages in a database,
-find the top uimages which are most similar to x.
-There are many techniques to approach those problems, but since AlexNet [ KSH12] was
+• Semantic Segmentation: Given an image andk classes, classify each pixel.
+• Instance segmentation: Given an image andk classes, classify each pixel as one of
+the k classes, but distinguish different instances of the classes.
+• Content-based Image Retrieval: Given an imagex and n images in a database,
+find the topu images which are most similar tox.
+There are many techniques to approach those problems, but since AlexNet [KSH12] was
 published, all of those problems have high-quality solutions which make use of Convolutional
 Neural Networks (CNNs) [HZRS15a, LAE+16, RFB15, DHS16, SKP15].
 Today, most neural networks are constructed by rules of thumb and gut feeling. The
@@ -205,7 +206,7 @@ architectures evolved and got deeper, more hyperparameters were added. Although
 are methods for analyzing CNNs, those methods are not enough to determine all steps in
 the development of network architectures without gut feeling. A detailed introduction to
 CNNs as well as nine methods for analysis of CNNs is given in Chapter 2.
-1Classification is also called identification if the classes are humans. Another name is object recognition ,
+1Classification is also calledidentification if the classes are humans. Another name isobject recognition,
 although the classes can be humans and animals as well.
 
 1. Introduction
@@ -232,125 +233,139 @@ This chapter introduces linear image filters in Section 2.1, then standard layer
 CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3,
 transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5.
 2.1. Linear Image Filters
-Alinear image filter (also called a filter bank or akernel) is an element F∈Rkw×kh×d,
-wherekwrepresents the filter’s width, khthe filter’s height and dthe number of input
-channels. The filter Fis convolved with the image I∈Rw×h×dto produce a new image I′.
-The output image I′has only one channel. Each pixel I′(x,y)of the output image gets
+A linear image filter(also called afilter bank or akernel) is an elementF ∈Rkw×kh×d,
+where kw represents the filter’s width,kh the filter’s height andd the number of input
+channels. The filterF is convolved with the imageI ∈Rw×h×d to produce a new imageI′.
+The output imageI′ has only one channel. Each pixelI′(x,y) of the output image gets
 calculated by point-wise multiplication of one filter element with one element of the original
-imageI:
-I′(x,y) =⌊kw
-2⌋∑
+image I:
+I′(x,y) =
+⌊kw
+2 ⌋∑
 ix=1−⌈kw
-2⌉⌊kh
-2⌋∑
+2 ⌉
+⌊kh
+2 ⌋∑
 iy=1−⌈kh
-2⌉d∑
-ic=1I(x+ix,y+iy,ic)·F(ix,iy,ic)
-This procedure is explained by Figure 2.1. It is essentially a discrete convolution.I∈R7×7
-Filter kernel
-F∈R3×3Result of point-wise
-multiplicationI′∈R7×7
-104116116112584747
+2 ⌉
+d∑
+ic=1
+I(x+ ix,y + iy,ic) ·F(ix,iy,ic)
+This procedure is explained by Figure 2.1. It is essentially a discrete convolution.
+I∈R7×7
+Filter kernelF∈R3×3
+Result of point-wisemultiplication
+I′∈R7×7
+10411611611258 47 47
 1099711411610511045
-1161041111099746100
+11610411110997 46100
 1014710997115116101
-1144799971169997
-116999711646112104
-11263118614946489-3-1
--653
-2-80936-333-109
+11447 99 9711699 97
+11699 9711646112104
+1126311861 49 46 48
+9 -3 -1
+-6 5 3
+2 -8 0
+936-333-109
 -282545291
-94-7920-4-254-498-662-849-642187
+94-7920
+-4-254-498-662-849-642187
 -52045240211388215-861
 -340559-105185-138-180503
 -718429350173251268-655
 -567-53-7580571-12824
 -408596-55036826976156
 30264787922381154660
-Figure 2.1.: Visualization of the application of a linear k×k×1image filter. For each pixel of the
-output image, k2multiplications and k2additions of the products have to be calculated.
+Figure 2.1.:Visualization of the application of a lineark×k×1 image filter. For each pixel of the
+output image,k2 multiplications andk2 additions of the products have to be calculated.
 
 2. Convolutional Neural Networks
 One important detail is how boundaries are treated. There are four common ways of
 boundary treatment:
-•don’t compute : The image I′will be smaller than the original image. I′∈
-R(w−kw+1)×(h−kh+1)×d3, to be exact.
-•zero padding : The image Iis padded by zeros where the filter would access elements
+• don’t compute: The image I′ will be smaller than the original image. I′ ∈
+R(w−kw+1)×(h−kh+1)×d3 , to be exact.
+• zero padding: The imageI is padded by zeros where the filter would access elements
 which do not exist. This will result in edges being detected at the border if the border
 pixels are not black, but doesn’t need any computation.
-•nearest: Repeat the pixel which is closest to the boundary.
-•reflect: Reflect the image at the boundaries.
+• nearest: Repeat the pixel which is closest to the boundary.
+• reflect: Reflect the image at the boundaries.
 Common tasks that can be done with linear filters include edge detection, corner detection,
 smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples.
 Please note that the result of a filtering operation is again an image. This means filters
-can be applied successively. While each pixel after one filtering operation with a 3×3
-filter got influenced by 3·3 = 9pixels of the original image, two successively applied 3×3
+can be applied successively. While each pixel after one filtering operation with a3 ×3
+filter got influenced by3 ·3 = 9 pixels of the original image, two successively applied3 ×3
 filters increase the area of the original image which influenced the output. The output is
-then influenced by 25 pixel. This is called the receptive field . The kind of pattern which is
-detected by a filter is called a feature. The bigger the receptive field is, the more complex
+then influenced by 25 pixel. This is called thereceptive field. The kind of pattern which is
+detected by a filter is called afeature. The bigger the receptive field is, the more complex
 can features get as they are able to consider more of the original image. Instead of taking
-one5×5filter with 25 parameters, one might consider to take two successive 3×3filters
-with 2·(3·3) = 18parameters. The 5×5filter is a strict superset of possible filtering
-operations compared to the two 3×3filters, but the relevance of this technique will become
+one 5 ×5 filter with 25 parameters, one might consider to take two successive3 ×3 filters
+with 2 ·(3 ·3) = 18 parameters. The 5 ×5 filter is a strict superset of possible filtering
+operations compared to the two3 ×3 filters, but the relevance of this technique will become
 clear in Section 2.2.
 2.2. CNN Layer Types
-While the idea behind deep MLPs is that feature hierarchies capture the important parts
-of the input more easily, CNNs are inspired by the idea of translational invariance : Many
+While the idea behind deep MLPs is thatfeature hierarchiescapture the important parts
+of the input more easily, CNNs are inspired by the idea oftranslational invariance: Many
 features in an image are translationally invariant. For example, if a car is developed, one
-could try to detect it by its parts [ FGMR10 ]. But then there are many positions at which
+could try to detect it by its parts [FGMR10]. But then there are many positions at which
 the wheels could be. Combining those, it is desirable to capture low-level, translationally
 invariant features at lower layers of an artificial neural network (ANN) and in higher layers
 high-level features which are combinations of the low-level features.
 Also, models should utilize the fact that the pixels of images are ordered. One way to use
-this is by learning image filters in so called convolutional layers .
-While MLPs vectorize the input, the input of a layer in a CNN are feature maps . A feature
-map is a matrix m∈Rw×h, but typically the width equals the height ( w=h). For an RGB
+this is by learning image filters in so calledconvolutional layers.
+While MLPs vectorize the input, the input of a layer in a CNN arefeature maps. A feature
+map is a matrixm∈Rw×h, but typically the width equals the height (w= h). For an RGB
 
 2.2. CNN Layer Types
-input image, the number of feature maps is d= 3. Each color channel is a feature map.
-Since AlexNet [ KSH12] almost halved the error in the ImageNet challenge, CNNs are
+input image, the number of feature maps isd= 3. Each color channel is a feature map.
+Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are
 state-of-the-art in various computer vision tasks.
 Traditional CNNs have three important building tools:
-•Convolutional layers with a non-linear activation function as described in Section 2.2.1,
-•pooling layers as described in Section 2.2.2 and
-•normalization layers as described in Section 2.2.4.
+• Convolutional layers with a non-linear activation function as described in Section 2.2.1,
+• pooling layers as described in Section 2.2.2 and
+• normalization layers as described in Section 2.2.4.
 2.2.1. Convolutional Layers
-Convolutional layers take several feature maps as input and produce nfeature maps1as
-output, where nis the number of filters in the convolution layer. The filter weights of
+Convolutional layers take several feature maps as input and producen feature maps1 as
+output, where n is the number of filters in the convolution layer. The filter weights of
 the linear convolutions are the parameters which are adapted to the training data. The
-numbernof filters as well as the filter’s size kw×khare hyperparameters of convolutional
-layers. Sometimes, it is denoted as n@kw×kh. Although the filter depth is usually omitted
-in the notation, the filters are of dimension kw×kh×d(i−1), whered(i−1)is the number of
-feature maps of the input layer (i−1).
-Another hyperparameter of convolution layers is the stride s∈N≥1and the padding.
-Padding (usually zero-padding [ SCL12,SEZ+13,HZRS15a ]) is used to make sure that the
+number n of filters as well as the filter’s sizekw ×kh are hyperparameters of convolutional
+layers. Sometimes, it is denoted asn@kw×kh. Although the filter depth is usually omitted
+in the notation, the filters are of dimensionkw ×kh ×d(i−1), whered(i−1) is the number of
+feature maps of the input layer(i−1).
+Another hyperparameter of convolution layers is the strides ∈N≥1 and the padding.
+Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the
 size of the feature maps doesn’t change.
 The hyperparameters of convolutional layers are
-•the number of filters n∈N≥1,
-•kw,kh∈N≥1of the filter size kw×kh×d(i−1),
-•the activation function of the layer (see Table B.3) and
-•the strides∈N≥1
-Typical choices are n∈{32,64,128},kw=kh=k∈{1,3,5,11}such as in [ KSH12,
-SZ14, SLJ+15], rectified linear unit (ReLU) activation and s= 1.
+• the number of filtersn∈N≥1,
+• kw,kh ∈N≥1 of the filter sizekw ×kh ×d(i−1),
+• the activation function of the layer (see Table B.3) and
+• the strides∈N≥1
+Typical choices aren ∈{ 32,64,128 }, kw = kh = k ∈{ 1,3,5,11 }such as in [KSH12,
+SZ14, SLJ+15], rectified linear unit (ReLU) activation ands= 1.
 TheconceptofweightsharingiscrucialforCNNs. Thisconceptwasintroducedin[ WHH+89].
 With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just
 like MLPs. In fact, every CNN has an equivalent MLP which computes the same function
 if only the flattened output is compared.
-1also called activation maps orchannels
+1also calledactivation maps or channels
 
 2. Convolutional Neural Networks
 This is easier to see when the filtering operation is denoted formally:
-o(i)(x) =b+k∑
-j=1wij·xjwithi∈{1,...,w}×{ 1,...,h}×{ 1,...,d}[2.1]
-o(x,y,z )(I) =b+⌊kw
-2⌋∑
+o(i)(x) = b+
+k∑
+j=1
+wij ·xj with i∈{1,...,w }×{ 1,...,h }×{ 1,...,d } [2.1]
+o(x,y,z)(I) = b+
+⌊kw
+2 ⌋∑
 ix=1−⌈kw
-2⌉⌊kh
-2⌋∑
+2 ⌉
+⌊kh
+2 ⌋∑
 iy=1−⌈kh
-2⌉d∑
-ic=1Fz(ix,iy,ic)·I(x+ix,y+iy,ic) [2.2]
-with a bias b∈R,x∈{1,...,w},y∈{1,...,h}andz∈{1,...,d}
+2 ⌉
+d∑
+ic=1
+Fz(ix,iy,ic) ·I(x+ ix,y + iy,ic) [2.2]
+with a biasb∈R, x∈{1,...,w }, y∈{1,...,h }and z∈{1,...,d }
 One can see that most weights of the equivalent MLP are zero and many weights are
 equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters.
 The effect of fewer parameters is that less training data is necessary to get suitable
@@ -358,112 +373,134 @@ estimations for those. This means a MLP which is able to compute the same functi
 CNN will likely have worse results on the same dataset, if a CNN architecture is suitable
 for the dataset.
 See Figure 2.2 for a visualization of the application of a convolutional layer.
-3feature maps
-(e.g. RGB)nfeature mapsnfilters of
-sizek×k×3
-widthwwidthw heighth heighthneural
+3 feature maps
+(e.g. RGB) n feature maps
+n filters of
+size k× k× 3
+width
+w
+width
+w
+height h
+height h
+neural
 network
-dataapply
+data
+apply
 ...
 ...
-......
 ...
 ...
-Figure 2.2.: Application of a single convolutional layer with nfilters of size k×k×3with stride
-s= 1to input data of size width ×height with three channels.
+...
+...
+Figure 2.2.:Application of a single convolutional layer withn filters of sizek×k×3 with stride
+s= 1 to input data of size width×height with three channels.
 
 2.2. CNN Layer Types
-A convolutional layer with nfilters of size kw×khand SAMEpadding after d(i−1)feature
-maps of size sx×syhasn·d(i−1)·(kw·kh)parameters if no bias is used. In contrast, a fully
+A convolutional layer withn filters of sizekw ×kh and SAME padding afterd(i−1) feature
+maps of sizesx×sy has n·d(i−1) ·(kw·kh) parameters if no bias is used. In contrast, a fully
 connected layer which produces the same output size and does not use a bias would have
-n·d(i−1)·(sx×sy)2parameters. This means a convolutional layer has drastically fewer
+n·d(i−1) ·(sx ×sy)2 parameters. This means a convolutional layer has drastically fewer
 parameters. One the one hand, this means it can learn less complex decision boundaries. On
 the other hand, it means fewer parameters have to be learned and hence the optimization
 procedure needs fewer examples and the optimization objective is simpler.
-It is particularly interesting to notice that even a convolutional layer of 1×1filters does
-learn a linear combination of the dinput feature maps. This can be used for dimensionality
-reduction, if there are fewer 1×1filters in a convolutional layer than input feature maps.
+It is particularly interesting to notice that even a convolutional layer of1 ×1 filters does
+learn a linear combination of thedinput feature maps. This can be used for dimensionality
+reduction, if there are fewer1 ×1 filters in a convolutional layer than input feature maps.
 Another insight recently got important: Every fully connected layer has an equivalent
-convolutional layer which has the same weights.2This way, one can use the complete
+convolutional layer which has the same weights.2 This way, one can use the complete
 classification network as a very complex non-linear image filter which can be used for
 semantic segmentation.
-A fully connected layer with d∈N≥1inputs and n∈N≥1nodes can be interpreted as a
-convolutional layer with an input of shape 1×1×dandnfilters of size 1×1. This will
-produce an output shape 1×1×n. Every single output is connected to all of the inputs.
+A fully connected layer withd∈N≥1 inputs andn∈N≥1 nodes can be interpreted as a
+convolutional layer with an input of shape1 ×1 ×d and n filters of size1 ×1. This will
+produce an output shape1 ×1 ×n. Every single output is connected to all of the inputs.
 When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize
-to feature maps. If the 1×1convolutional filter layer is applied to the vectorized output,
+to feature maps. If the1 ×1 convolutional filter layer is applied to the vectorized output,
 it is completely equivalent to a fully connected layer. However, the vectorization can be
 omitted if a convolution layer without padding and a filter size equal to the feature maps
 size is applied. This was used by [LSD15].
 2.2.2. Pooling Layers
-Pooling summarizes a p×parea of the input feature map. Just like convolutional layers,
-pooling can be used with a stride of s∈N>1. Ass≥2is the usual choice, pooling layers
-are sometimes also called subsampling layers . Typically, p∈{2,3,4,5}ands= 2such as
+Pooling summarizes ap×p area of the input feature map. Just like convolutional layers,
+pooling can be used with a stride ofs∈N>1. As s≥2 is the usual choice, pooling layers
+are sometimes also calledsubsampling layers. Typically,p∈{ 2,3,4,5 }and s= 2 such as
 for AlexNet [KSH12] and VGG-16 [SZ14].
-The type of summary for the set of activations Avaries between the functions listed
-in Table 2.1, spatial pyramid pooling as introduced in [ HZRS14] and generalizing pooling
+The type of summary for the set of activationsA varies between the functions listed
+in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling
 functions as introduced in [LGT16].
-2But convolutional layers only have equivalent fully connected layers if the output feature map is 1×1
+2But convolutional layers only have equivalent fully connected layers if the output feature map is1 × 1
 
 2. Convolutional Neural Networks
 Name Definition Used by
-Max pooling max{a∈A}[BPL10, KSH12]
-Average / mean pooling1
-|A|∑
-a∈AaLeNet-5 [LBBH98] and [KSlB+10]
-ℓ2pooling√∑
-a∈Aa2[Le13]
+Max pooling max {a∈A} [BPL10, KSH12]
+Average / mean pooling 1
+|A|
+∑
+a∈Aa LeNet-5 [LBBH98] and [KSlB+10]
+ℓ2 pooling
+√∑
+a∈Aa2 [Le13]
 Stochastic pooling * [ZF13]
-Table 2.1.: Pooling types for a set Aof activations a∈R.
-(*) For stochastic pooling, each of the p×pactivation values aiin the pooling region gets
-picked with probability pi=ai∑
-aj∈Aaj. This assumes the activations aiare non-negative.
+Table 2.1.: Pooling types for a setA of activationsa∈R.
+(*) For stochastic pooling, each of thep×pactivation valuesai in the pooling region gets
+picked with probabilitypi = ai∑
+aj∈A aj
+. This assumes the activationsai are non-negative.
 Pooling is applied for three reasons: To get local translational invariance, to get invariance
 against minor local changes and, most important, for data reduction to1
-s2th of the data by
-using strides of s>1.
+s2 th of the data by
+using strides ofs> 1.
 See Figure 2.3 for a visualization of max pooling.
-793594070090509375929643
-2×2max pooling
-95999722
-Figure 2.3.: 2×2max pooling applied to a feature map of size 6×4with stride s= 2and padding.
-Average pooling of p×pareas with stride scan be replaced by a convolutional layer. If
-the input of the pooling layer are d(i−1)feature maps, the convolutional layer has to have
-d(i−1)filters of size p×pand strides. Theith filter has the values
+7 9 3 5 9 4
+0 7 0 0 9 0
+5 0 9 3 7 5
+9 2 9 6 4 3
+2 × 2 max pooling
+9 5 9
+9 9 7
+2
+2
+Figure 2.3.:2 ×2 max pooling applied to a feature map of size6 ×4 with strides= 2 and padding.
+Average pooling ofp×p areas with strides can be replaced by a convolutional layer. If
+the input of the pooling layer ared(i−1) feature maps, the convolutional layer has to have
+d(i−1) filters of sizep×p and strides. The ith filter has the values
 
-1
-p2...1
+
+1
+p2 ... 1
 p2
-.........
+... ... ...
 1
-p2...1
-p2
+p2 ... 1
+p2
+
 
-for the dimension iand the zero matrix
+for the dimensioni and the zero matrix
 
-0...0
-.........
-0...0
+
+0 ... 0
+... ... ...
+0 ... 0
+
 
-for all other dimensions i= 1,...,d(i−1).
+for all other dimensionsi= 1,...,d (i−1).
 
 2.2. CNN Layer Types
 2.2.3. Dropout
 Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting
-the output of any neuron to zero with probability p. It was introduced in [ HSK+12] and is
+the output of any neuron to zero with probabilityp. It was introduced in [HSK+12] and is
 well-described in [SHK+14].
-A Dropout layer can be implemented as follows: For an input inof any shape s, a tensor of
-the same shape D∈{0,1}sis sampled, where each element diis sampled independently
+A Dropout layer can be implemented as follows: For an inputin of any shapes, a tensor of
+the same shapeD∈{ 0,1 }s is sampled, where each elementdi is sampled independently
 from a Bernoulli distribution. The results are element-wise multiplied to calculate the
-output outof the Dropout layer:
-out=D⊙in with di∼B(1,p)
-where⊙is the Hadamard product
-(A⊙B)i,j:= (A)i,j(B)i,j
-Hence every value of the input gets set to zero with a dropout probability of p. Typically,
-Dropout is used with p= 0.5. Layers closer to the input usually have a lower dropout probability
+output out of the Dropout layer:
+out = D⊙in with di ∼B(1,p)
+where ⊙is the Hadamard product
+(A⊙B)i,j := (A)i,j(B)i,j
+Hence every value of the input gets set to zero with a dropout probability ofp. Typically,
+Dropout is used withp= 0.5. Layers closer to the input usually have a lower dropout probability
  than later layers. In order to keep the expected output at the same value, the
 output of a dropout layer is multiplied with1
-1−pwhen dropout is enabled [ Las17,tf-16b].
+1−p when dropout is enabled [Las17, tf-16b].
 At inference time, dropout is disabled.
 Dropout is usually only applied after fully connected layers, but not after convolutional
 layers as it usually increases the test error as pointed out in [GG16].
@@ -476,78 +513,81 @@ in [HSL+16] dropout only complete layers. This can be done by having Residual ne
 which have one identity connection and one residual feature connection. Hence the residual
 features can be dropped out and the identity connection remains.
 2.2.4. Normalization Layers
-One problem when training deep neural networks is internal covariate shift : While the
+One problem when training deep neural networks isinternal covariate shift: While the
 parameters of layers close to the output are adapted to some input produced by lower layers,
 those lower layers parameters are also adapted. This leads to the parameters in the upper
 layers being worse. A very low learning rate has to be chosen to adjust for the fact that the
 input features might drastically change over time.
 
 2. Convolutional Neural Networks
-One way to approach this problem is by normalizing mini-batches as described in [ IS15]. A
-Batch Normalization layer with d-dimensional input x= (x(1),...,x(d))is first normalized
+One way to approach this problem is by normalizing mini-batches as described in [IS15]. A
+Batch Normalization layer withd-dimensional inputx= (x(1),...,x (d)) is first normalized
 point-wise to
-ˆx(k)=x(k)−¯x(k)
+ˆx(k) = x(k) −¯x(k)
 √
-s′[x(k)]2+ε
-with ¯x(k)=1
-m∑m
-i=1x(k)
-ibeing the sample mean and s′[x(k)]2=1
-m∑m
+s′[x(k)]2 + ε
+with ¯x(k) = 1
+m
+∑m
+i=1 x(k)
+i being the sample mean ands′[x(k)]2 = 1
+m
+∑m
 i=1(x(k)
-i−¯x(k))the
-sample variance where m∈N≥1is the number of training samples per mini-batch, ε>0
-being a small constant to prevent division by zero and x(k)
-iis the activation of neuron kfor
-training sample i.
-Additionally, for each activation x(k)two parameters γ(k),β(k)are introduced which scale
+i −¯x(k)) the
+sample variance wherem∈N≥1 is the number of training samples per mini-batch,ε> 0
+being a small constant to prevent division by zero andx(k)
+i is the activation of neuronk for
+training samplei.
+Additionally, for each activationx(k) two parametersγ(k),β(k) are introduced which scale
 and shift the feature:
-y(k)=γ(k)·ˆx(k)+β(k)
+y(k) = γ(k) ·ˆx(k) + β(k)
 In the case of fully connected layers, this is applied to the activation, before the non-linearity
 is applied. If it is applied after the activation, it harms the training in early stages. For
-convolution, only one γand oneβis learned per feature map.
-One important special case is γ(k)=√
-s′[x(k)]2+εandβ(k)=¯x(k), which would make the
+convolution, only oneγ and oneβ is learned per feature map.
+One important special case isγ(k) =
+√
+s′[x(k)]2 + ε and β(k) = ¯x(k), which would make the
 Batch Normalization layer an identity layer.
-During evaluation time,3the expected value and the variance are calculated once for the
+During evaluation time,3 the expected value and the variance are calculated once for the
 complete dataset. An unbiased estimate of the empirical variance is used.
 The question where Batch Normalization layers (BN) should be applied and for which
 reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the
 activation function. Considering this, the possible options for the order are:
-1. CONV / FC→BN→activation function →Dropout→...
-2. CONV / FC→activation function →BN→Dropout→...
-3. CONV / FC→activation function →Dropout→BN→...
-4. CONV / FC→Dropout→BN→activation function →...
-The authors of [ IS15] suggest to use Batch Normalization before the activation function
+1. CONV / FC→BN →activation function→Dropout →...
+2. CONV / FC→activation function→BN →Dropout →...
+3. CONV / FC→activation function→Dropout →BN →...
+4. CONV / FC→Dropout →BN →activation function→...
+The authors of [IS15] suggest to use Batch Normalization before the activation function
 as in Items 1 and 4. Batch Normalization after the activation lead to better results in
-https://github .com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm .md
-Another normalization layer is Local Response Normalization as described in [ KSH12],
-which includes ℓ2normalization as described in [ WWQ13 ]. Those two normalization layers,
+https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md
+Another normalization layer is Local Response Normalization as described in [KSH12],
+which includesℓ2 normalization as described in [WWQ13]. Those two normalization layers,
 however, are superseded by Batch Normalization.
-3also called inference time
+3also calledinference time
 
 2.3. CNN Blocks
 2.3. CNN Blocks
 This section describes more complex building blocks than simple layers. CNN blocks act
 similar to a layer, but they are themselves composed of layers.
 2.3.1. Residual Blocks
-Residual blocks as introduced in [ HZRS15a ] are a milestone in computer vision. They
+Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They
 enabled the computer vision community to go from about 16 layers as in VGG 16-D (see
 Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets)
-as introduced in [ HZRS15a ] is to add an identity connection which skips two layers. This
+as introduced in [HZRS15a] is to add an identity connection which skips two layers. This
 identity connection adds the feature maps onto the other feature maps and thus requires
 the output of the input layer of the residual block to be of the same dimension as last layer
 of the residual block.
-Formally, it can be described as follows. If xiare the feature maps after layer iandx0is
-the input image, His a non-linear transformation of feature maps, then
-y=H(x)
+Formally, it can be described as follows. Ifxi are the feature maps after layeri and x0 is
+the input image,H is a non-linear transformation of feature maps, then
+y= H(x)
 describes a traditional CNN. Note that this could be multiple layers. A residual block as
 visualized in Figure 2.4 is described by
-y=H(x) +x
-In [HZRS15a ], they only used residual skip connections to skip two layers. Hence, if
-convi(xi)describes the application of the convolutional layer ito the input xiwithout the
+y= H(x) + x
+In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if
+convi(xi) describes the application of the convolutional layeri to the inputxi without the
 nonlinearity, then such a residual block is
-xi+2= convi+1(ReLU(conv i(xi))) +xi
+xi+2 = conv i+1(ReLU(conv i(xi))) + xi
 Figure 2.4.: ResNet module
 Image source: [HZRS15a]
 [HM16] provides some insights why deep residual networks are successful.
@@ -556,77 +596,82 @@ Image source: [HZRS15a]
 2.3.2. Aggregation Blocks
 Two common ways to add more parameters to neural networks are increasing their depth
 by adding more layers or increasing their width by adding more neurons / filters. Inception
-blocks [AM15] implicitly started a new idea which was explicitly described in [ XGD+16] as
-“ResNeXt block”: Increasing the cardinality C∈N≥1. By cardinality, the authors describe
-the concept of having Csmall convolutional networks with the same topology but different
+blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as
+“ResNeXt block”: Increasing the cardinalityC ∈N≥1. By cardinality, the authors describe
+the concept of havingC small convolutional networks with the same topology but different
 weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not
 combine aggregation blocks with residual blocks as the authors did.
 256-d in
-concatenatetotal 32
+concatenate
+total 32
 groups
 ...
-128-d out4 @1×1×256
-4 @3×3×44 @1×1×256
-4 @3×3×44 @1×1×256
-4 @3×3×4
-Figure 2.5.: Aggregation block with a cardinality of C= 32. Each of the 32 groups is a 2-layer
-convolutional network. The first layer receives 256 feature maps and applies four 1×1
-filters to it. The second layer applies four 3×3filters. Although every group has
+128-d out
+4 @1 ×1 ×256
+4 @3 ×3 ×4
+4 @1 ×1 ×256
+4 @3 ×3 ×4
+4 @1 ×1 ×256
+4 @3 ×3 ×4
+Figure 2.5.:Aggregation block with a cardinality ofC = 32. Each of the 32 groups is a 2-layer
+convolutional network. The first layer receives 256 feature maps and applies four1 ×1
+filters to it. The second layer applies four3 ×3 filters. Although every group has
 the same topology, the learned weights are different. The outputs of the groups are
 concatenated.
 The hyperparameters of an aggregation block are:
-•The topology of the group members.
-•The cardinality C∈N≥1. Note that a cardinality of C= 1is equivalent in every
+• The topology of the group members.
+• The cardinalityC ∈N≥1. Note that a cardinality ofC = 1 is equivalent in every
 aspect to using the group network without an aggregation block.
 
 2.3. CNN Blocks
 2.3.3. Dense Blocks
-Dense blocks are collections of convolutional layers which are introduced in [ HLW16]. The
+Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The
 idea is to connect each convolutional layer directly to subsequent convolutional layers.
-Traditional CNNs with Llayers and one input layer have Lconnections between layers,
+Traditional CNNs withL layers and one input layer haveL connections between layers,
 but dense blocks haveL(L+1)
-2connections between layers. The input feature maps are
+2 connections between layers. The input feature maps are
 concatenated in depth. According to the authors, this prevents features from being relearned
  and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16
 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors
 used only on the order of 12 feature maps per layer.
 A dense block is visualized in Figure 2.6.
 256-d in
-k@3×3
+k @ 3 ×3
+concatenate
+k @ 3 ×3
 concatenate
-k@3×3
-concatenate256-d
+256-d
 k-d
-(256 +k)-d
+(256 + k)-d
 k-d
-(256 +L·k)-d out
-Figure 2.6.: Dense block with L= 2layers and a growth factor of k.
+(256 + L·k)-d out
+Figure 2.6.: Dense block withL= 2 layers and a growth factor ofk.
 Dense block have five hyperparameters:
-•The activation function being used. The authors use ReLU.
-•The sizekw×khof filters. The authors use kw=kh= 3.
-•The number of layers L, whereL= 2is a simple convolutional layer.
-•The number kof filters added per layer (called growth rate in the paper)
-It might be necessary use 1×1convolutions to reduce the number of L·kfeature maps.
+• The activation function being used. The authors use ReLU.
+• The sizekw ×kh of filters. The authors usekw = kh = 3.
+• The number of layersL, whereL= 2 is a simple convolutional layer.
+• The numberk of filters added per layer (calledgrowth ratein the paper)
+It might be necessary use1 ×1 convolutions to reduce the number ofL·k feature maps.
 
 2. Convolutional Neural Networks
 2.4. Transition Layers
 Transition layers are used to overcome constraints imposed by resource limitations or
 architectural design choices. One constraint is the number of feature maps (see Appendix C.3
 for details). In order to reduce the number of feature maps while still keeping as much
-relevant information as possible in the network, a convolutional layer iwithkifilters of
-the shape 1×1×ki−1is added. The number of filters kidirectly controls the number of
+relevant information as possible in the network, a convolutional layeri with ki filters of
+the shape1 ×1 ×ki−1 is added. The number of filterski directly controls the number of
 generated feature maps.
 In order to reduce the dimensionality (width and height) of the feature maps, one typically
 applies pooling.
 Global pooling is another type of transition layer. It applies pooling over the complete
-feature map size to shrink the input to a constant 1×1feature map and hence allows one
+feature map size to shrink the input to a constant1 ×1 feature map and hence allows one
 network to have different input sizes.
 
 2.5. Analysis Techniques
 2.5. Analysis Techniques
 CNNs have dozens of hyperparameters and ways to tune them. Although there are
-automatic methods like random search [ BB12], grid search [ LBOM98 ], gradient-based
-hyperparameter optimization [ MDA15] and Hyperband [ LJD+16] some actions need a
+automatic methods like random search [BB12], grid search [LBOM98], gradient-based
+hyperparameter optimization [MDA15] and Hyperband [LJD+16] some actions need a
 manual investigation to improve the model’s quality. For this reason, analysis techniques
 which guide developers and researchers to the important hyperparameters are necessary. In
 the following, nine diagnostic techniques are explained.
@@ -656,43 +701,49 @@ are not covered by the training set and thus indicate the need to collect more d
 
 2. Convolutional Neural Networks
 2.5.2. Confusion Matrices
-Aconfusion matrix is a matrix (c)ij∈NK×K
-≥0, whereK∈N≥2is the number of classes,
-which contains all correct and wrong classifications. The item cijis the number of times
-items of class iwere classified as class j. This means the correct classification is on the
-diagonalciiand all wrong classifications are of the diagonal. The sum∑K
-i=1∑K
-j=1cijis the
-total number of samples which were evaluated and∑
-i=1cii∑K
-i=1∑K
-j=1cijis the accuracy.
-The sumsr(i) =∑K
-j=1cijof each class iare worth being investigated as they show if the
+A confusion matrix is a matrix(c)ij ∈NK×K
+≥0 , whereK ∈N≥2 is the number of classes,
+which contains all correct and wrong classifications. The itemcij is the number of times
+items of classi were classified as classj. This means the correct classification is on the
+diagonal cii and all wrong classifications are of the diagonal. The sum∑K
+i=1
+∑K
+j=1 cij is the
+total number of samples which were evaluated and
+∑
+i=1 cii∑K
+i=1
+∑K
+j=1 cij
+is the accuracy.
+The sumsr(i) = ∑K
+j=1 cij of each classi are worth being investigated as they show if the
 classes are skewed. If the number of samples of one class dominates the data set, then the
 classifier can get a high accuracy by simply always prediction the most common class. If
 the accuracy of the classifier is close to the a priory probability of the most common class,
 techniques to deal with skewed classes might help.
 An automatic criterion to check for this problem is
-accuracy≤max({r(i)|i= 1,...,k})∑k
-i=1r(i)+ε
-whereεis a small value to compensate the fact that some examples might be correct just
+accuracy ≤max({r(i) |i= 1,...,k })∑k
+i=1 r(i)
++ ε
+where ε is a small value to compensate the fact that some examples might be correct just
 by chance.
 Other values which should be checked are the class-wise sensitivities:
-s(k) =# correctly identified instances of class k
-# instances of class k=ckk
-r(k)∈[0,1]
-Ifs(i)is much lower than s(j), it is an indicator that more or cleaner training data is
-necessary for s(i).
+s(k) = # correctly identified instances of classk
+# instances of classk = ckk
+r(k) ∈[0,1]
+If s(i) is much lower thans(j), it is an indicator that more or cleaner training data is
+necessary fors(i).
 The class-wise confusion
-fconfusability (k1,k2) =ck1k2∑K
-j=1ck1j
-indicates if class k1gets often classified as class k2. The highest values here can indicate
+fconfusability(k1,k2) = ck1k2
+∑K
+j=1 ck1j
+indicates if classk1 gets often classified as classk2. The highest values here can indicate
 if two classes should be merged or a specialized model for separating those classes could
 improve the overall system.
 2.5.3. Validation Curves: Accuracy, loss and other metrics
 Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal
-axis and a quality metric on the vertical axis. Accuracy, error = (1−accuracy )or loss are
+axis and a quality metric on the vertical axis. Accuracy,error = (1 −accuracy) or loss are
 typical quality metrics. Other quality metrics can be found in [OHIL16].
 In case that the number of training epochs are used as the examined hyperparameter,
 validation curves give an indicator if training longer improves the model’s performance. By
@@ -700,12 +751,17 @@ validation curves give an indicator if training longer improves the model’s pe
 2.5. Analysis Techniques
 plotting the error on the training set as well as the error on a validation set, one can also
 estimate if overfitting might become a problem. See Figure 2.7 for an example.
-10 20 30 40 50 60 70 80 90 1000.20.40.60.8
+10 20 30 40 50 60 70 80 90 100
+0.2
+0.4
+0.6
+0.8
 overfitting
-EpochsErrorTraining set
+Epochs
+Error Training set
 Validation set
-Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs
-and the quality metric is the error (1−accuracy ). The longer the network is trained,
+Figure 2.7.:A typical validation curve: In this case, the hyperparameter is the number of epochs
+and the quality metric is the error(1 −accuracy). The longer the network is trained,
 the better it gets on the training set. At some point the network is fit too well to the
 training data and loses its capability to generalize. At this point the quality curve of
 the training set and the validation set diverge. While the classifier is still improving on
@@ -715,36 +771,46 @@ When the epoch-loss validation curve has plateaus as in Figure 2.8, this means t
 problem of plateaus are (i) to change weight initialization if the plateau was at the beginning,
 (ii) regularizing the model or (iii) changing the optimization algorithm.
 Loss functions
-The loss function (also called error function orcost function ) is a function which assigns a
+The loss function (also callederror functionor cost function) is a function which assigns a
 real value to a complex event like the predicted class of a feature vector. It is used to define
-theobjective function . For classification problems the loss function is typically cross-entropy
-withℓ1orℓ2regularization, as it was described in [NH92]:
-ECE(W) =−∑
-x∈XK∑
-k=1[tx
+the objective function. For classification problems the loss function is typically cross-entropy
+with ℓ1 or ℓ2 regularization, as it was described in [NH92]:
+ECE(W) = −
+∑
+x∈X
+K∑
+k=1
+[tx
 klog(ox
 k) + (1−tx
 k) log(1−ox
 k)]
   
-cross-entropy data loss+λ1·ℓ1∑
-w∈W|w|+λ2·ℓ2∑
-w∈Ww2
-
+cross-entropy data loss
++ λ1 ·
+ℓ1
+  ∑
+w∈W
+|w|+λ2 ·
+ℓ2
+  ∑
+w∈W
+w2
+  
 model complexity loss
-whereWare the weights, Xis the training data set, K∈N≥0is the number of classes and
+where W are the weights,X is the training data set,K ∈N≥0 is the number of classes and
 tx
-kindicates if the training example xis of classk.ox
-kis the output of the classification
-algorithm which depends on the weights. λ1,λ2∈[0,∞)weights the regularization and is
-typically smaller than 0.1.
+k indicates if the training examplex is of classk. ox
+k is the output of the classification
+algorithm which depends on the weights.λ1,λ2 ∈[0,∞) weights the regularization and is
+typically smaller than0.1.
 
 2. Convolutional Neural Networks
-Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange
+Figure 2.8.:Example for a validation curve (plotted loss function) with plateaus. The dark orange
 curve is smoothed, but the non-smoothed curve is also plotted in light orange.
 The data loss is positive whenever the classification is not correct, whereas the model
 complexity loss is higher for more complex models. The model complexity loss exists due
-to the intuition of Occam’s razor : If two models explain the same data with an accuracy of
+to the intuition ofOccam’s razor: If two models explain the same data with an accuracy of
 100 %, the simpler model is to be preferred.
 A reason to show the loss for the validation curve technique instead of other quality metrics
 is that it contains more information about the quality of the model. A reason against the
@@ -753,46 +819,52 @@ loss only shows relative learning progress whereas the accuracy shows absolute p
 human readers.
 There are three observations in the loss validation curve which can help to improve the
 network:
-•If the loss does not decrease for several epochs, the learning rate might be too low.
+• If the loss does not decrease for several epochs, the learning rate might be too low.
 The optimization process might also be stuck in a local minimum.
-•Loss being NAN might be due to too high learning rates. Another reason is division
+• Loss being NAN might be due to too high learning rates. Another reason is division
 by zero or taking the logarithm of zero. In both cases, adding a small constant like
-10−7fixes the problem.
-•If the loss-epoch validation curve has a plateau at the beginning, the weight initialization
+10−7 fixes the problem.
+• If the loss-epoch validation curve has a plateau at the beginning, the weight initialization
  might be bad.
 
 2.5. Analysis Techniques
 Quality criteria
 There are several quality criteria for classification models. Most quality criteria are based
-the confusion matrix cwhich denotes at cijthe number of times the real class was iandj
+the confusion matrixc which denotes atcij the number of times the real class wasi and j
 was predicted. This means the diagonal contains the number of correct predictions. For
-the following, let ti=∑k
-j=1cijbe the number of training samples for class i. The most
+the following, letti = ∑k
+j=1 cij be the number of training samples for classi. The most
 common quality criterion is accuracy:
-accuracy (c) =∑k
-i=1cii∑k
-i=1ti∈[0,1]
+accuracy(c) =
+∑k
+i=1 cii
+∑k
+i=1 ti
+∈[0,1]
 One problem of accuracy as a quality criterion are skewed classes. If one class is by far
 more common than all other classes, then the simplest way to achieve a high score is to
 always classify everything as the most common class.
 In order to fix this problem, one can use the mean accuracy:
-mean-accuracy (c) =1
-k·k∑
-i=1cii
-ti∈[0,1]
-For two-class problems there are many other metrics like precision, recall and Fβ-score.
+mean-accuracy(c) = 1
+k ·
+k∑
+i=1
+cii
+ti
+∈[0,1]
+For two-class problems there are many other metrics like precision, recall andFβ-score.
 Quality criteria for semantic segmentation are explained in [Tho16].
 Besides the quality of the classification result, several other quality criteria are important
 in practice:
-•Speed of evaluation for new images,
-•latency,
-•power consumption,
-•robustness against (non)random perturbations in the training data (see [ SZS+13,
+• Speed of evaluation for new images,
+• latency,
+• power consumption,
+• robustness against (non)random perturbations in the training data (see [SZS+13,
 PMW+15]),
-•robustness against (non)random perturbations in the training labels (see [ NDRT13 ,
+• robustness against (non)random perturbations in the training labels (see [NDRT13,
 XXE12]),
-•model size
-As reducing the floating point accuracy allows to process more data on a given device [ Har15],
+• model size
+As reducing the floating point accuracy allows to process more data on a given device [Har15],
 analysis under this aspect is also highly relevant in some scenarios.
 However, the following focuses on the quality of the classification result.
 
@@ -807,16 +879,21 @@ the networks performance. Having the training set’s learning curve, it is poss
 if the capacity of the model to fit the data is high enough for the desired classification error.
 The error on the validation set should never be expected to be significantly lower than the
 error on the training set. If the error on the training set is too high, then more data will
-nothelp. Instead, the model or the training algorithm need to be adjusted.
+not help. Instead, the model or the training algorithm need to be adjusted.
 If the training set’s learning curve is significantly higher than the validation set’s learning
 curve, then removing features (e.g., by decreasing the images resolution), more training
 samples or more regularization will help.
-10 20 30 40 50 60 70 80 90 1000.20.40.6
-avoidable biasvariance
-human-levelerror
-Training samplesErrorValidation set
+10 20 30 40 50 60 70 80 90 100
+0.2
+0.4
+0.6
+avoidable bias
+variance
+human-level error
+Training samples
+Error Validation set
 Training set
-Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given
+Figure 2.9.:A typical learning curve: The more data is used for training, the more errors a given
 architecture will make to fit the given training data. At the same time, it is expected
 that the training data gets more similar to the true distribution of the data which
 should be captured by the test data. At some point, the error on the training and
@@ -838,59 +915,59 @@ well. However, it is not the desired solution.
 For classification problems in computer vision, there are two types of visualizations which
 help to diagnose such problems. Both color superpixels of the original image to convey
 information how the model used those superpixels:
-•Correct class heatmap : The probability of the correct class is encoded to give a
+• Correct class heatmap: The probability of the correct class is encoded to give a
 heat map which superpixels are important for the correct class. This can also be done
 by setting the opacity accordingly.
-•Most-likely class image : Each of the most likely classes for all superpixels is
+• Most-likely class image: Each of the most likely classes for all superpixels is
 represented by a color. The colored image thus gives clues why different predictions
 were assigned a high probability.
 Two methods to generate such images are explained in the following.
 Occlusion Sensitivity Analysis
-Occlusion sensitivity analysis is described in [ ZF14]. The idea is to occlude a part of the
-image by something. This could be a gray square as in [ ZF14] or a black superpixel as
+Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the
+image by something. This could be a gray square as in [ZF14] or a black superpixel as
 in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g.,
 superpixel or position of the square) and the regions are then colored to generate either a
 correct class heatmap of the most-likely class image. It is important to note that the color
-at regionridenotes the result if riis occluded.
+at regionri denotes the result ifri is occluded.
 Both visualizations are shown in Figure 2.10. One can see that the network makes sensible
 predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan
 Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation.
 Gradient-based approaches
-In [SVZ13], a gradient-based approach was used to generate image-specific class saliency
+In [SVZ13], a gradient-based approach was used to generateimage-specific class saliency
 maps. The authors describe the problem as a ranking problem, where each pixel of the
-imageI0is assigned a score Sc(I0)for a classcof interest. CNNs are non-linear functions,
-but they can be approximated by the first order Taylor expansion Sc(I)≈wTI+bwhere
-wis the derivative of ScatI0.
+image I0 is assigned a scoreSc(I0) for a classc of interest. CNNs are non-linear functions,
+but they can be approximated by the first order Taylor expansionSc(I) ≈wTI+ b where
+w is the derivative ofSc at I0.
 
 2. Convolutional Neural Networks
 2.5.6. Argmax Method
-Theargmax method has two variants:
-•Fixed class argmax : Propagate all elements of a given class through the network
+The argmax methodhas two variants:
+• Fixed class argmax: Propagate all elements of a given class through the network
 and analyze which neurons are activated most often / have the highest activation.
-•Fixed neuron argmax : Propagate the data through the network and find the n
+• Fixed neuron argmax: Propagate the data through the network and find then
 data elements which cause the highest activation for a given neuron.
-Note that a “neuron” is a filter in a CNN. The amount of activation of a filter Fby an
-imageIis calculated by applying FtoIand calculating the element-wise sum of the result.
-Fixed-neuron argmax was applied in [ ZF14]. However, they did not stop with that. Besides
-showingthe9imageswhichcausedthehighestactivation, theyalsotrainedadeconvolutional
+Note that a “neuron” is a filter in a CNN. The amount of activation of a filterF by an
+image I is calculated by applyingF to I and calculating the element-wise sum of the result.
+Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides
+showing the 9 images which caused the highest activation, they also trained a deconvolutional
 neural network to project the activation of the filter back into pixel space.
 The fixed neuron argmax can be used qualitatively to get an impression of the kind of
-features which are learned. This is useful to diagnose problems, for example in [ AM15] it is
+features which are learned. This is useful to diagnose problems, for example in [AM15] it is
 described that the network recognized the class “dumbbell” only if a hand was present, too.
 Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters
 being shared between classes or how many parameters are mainly assigned to which classes.
 Going one step further from the fixed neuron argmax method is using an optimization
 algorithm to change an initial image minimally in such a way that any desired class gets
-predicted. This is called caricaturization in [MV16].
+predicted. This is calledcaricaturization in [MV16].
 2.5.7. Feature Map Reconstructions
-Feature map visualizations such as the ones made in [ ZF14] (see Figure 2.11) give insights
+Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights
 into the learned features. This shows what the network emphasizes. However, it is not
 necessarily the case that the feature maps allow direct and easy conclusions about the
-learned features. This technique is called inversion in [MV16].
+learned features. This technique is calledinversion in [MV16].
 A key idea of feature map visualizations is to reconstruct a layers input, given its activation.
 This makes it possible find which inputs would cause neurons to activate with extremely
 high or low values.
-More recent work like [ NYC16] tries to make the reconstructions appearance look more
+More recent work like [NYC16] tries to make the reconstructions appearance look more
 natural.
 
 2.5. Analysis Techniques
@@ -901,28 +978,34 @@ initializations, the learned weights should still be comparable.
 If the set of learned filters changes with initialization, this might be an indicator for too
 little capacity of that layer. Hence adding more filters to that layer could improve the
 performance.
-Filters can be compared with the k-translation correlation as introduced in [ZCZL16]:
+Filters can be compared with thek-translation correlation as introduced in [ZCZL16]:
 ρk(Wi,Wj) = max
-(x,y)∈{−k,...,k}2\(0,0)⟨Wi,T(Wj,x,y)⟩f
-∥Wi∥2∥Wj∥2∈[−1,1],
-whereT(·,x,y)denotes the translation of the first operand by (x,y), with zero padding at
-the borders to keep the shape. ⟨·,·⟩fdenotes the flattened inner product, where the two
+(x,y)∈{−k,...,k}2\(0,0)
+⟨Wi,T(Wj,x,y )⟩f
+∥Wi∥2 ∥Wj∥2
+∈[−1,1],
+where T(·,x,y ) denotes the translation of the first operand by(x,y), with zero padding at
+the borders to keep the shape.⟨·,·⟩f denotes the flattened inner product, where the two
 operands are flattened into column vectors before applying the standard inner product. The
-closer the absolute value of the k-translation correlation to one, the more similar two filters
-Wi,Wjare. According to [ ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and
+closer the absolute value of thek-translation correlation to one, the more similar two filters
+Wi,Wj are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and
 VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found
-this by comparing the averaged maximum k-translational correlation of the networks with
-Gaussian-distributed initialized filters. The averaged maximum k-translational correlation
+this by comparing theaveraged maximumk-translational correlationof the networks with
+Gaussian-distributed initialized filters. The averaged maximumk-translational correlation
 is defined as
-¯ρk(W) =1
-NN∑
-i=1Nmax
-j=1,j̸=iρk(Wi,Wj)
-whereNis the number of filters in the layer WandWidenotes the ith filter.
+¯ρk(W) = 1
+N
+N∑
+i=1
+N
+max
+j=1,j̸=i
+ρk(Wi,Wj)
+where N is the number of filters in the layerW and Wi denotes theith filter.
 2.5.9. Weight update tracking
 Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if
 the learning rate is well-chosen. He suggests that the weight update should be in the order
-of10−3. If the weight update is too high, then the learning rate has to be decreased. If the
+of 10−3. If the weight update is too high, then the learning rate has to be decreased. If the
 weight update is too low, then the learning rate has to be increased.
 The order of the weight updates as well as possible implications highly depend on the model
 and the training algorithm. See Appendix B.5 for a short overview of training algorithms
@@ -932,24 +1015,24 @@ for neural networks.
 2.6. Accuracy boosting techniques
 There are techniques which can almost always be applied to improve accuracy of CNN
 classifiers:
-•Ensembles [CMS12]
-•Training-time augmentation (see Appendix B.2)
-•Test-time transformations [DDFK16, How13, HZRS15b]
-•Pre-training and fine-tuning [ZDGD14, GDDM14]
-One of the most simple ensemble techniques which was introduced in [ CMS12] is averaging
-the prediction of nclassifiers. This improves the accuracy even if the classifiers use exactly
+• Ensembles [CMS12]
+• Training-time augmentation (see Appendix B.2)
+• Test-time transformations [DDFK16, How13, HZRS15b]
+• Pre-training and fine-tuning [ZDGD14, GDDM14]
+One of the most simple ensemble techniques which was introduced in [CMS12] is averaging
+the prediction ofn classifiers. This improves the accuracy even if the classifiers use exactly
 the same training setup by reducing variance.
 Data augmentation techniques give the optimizer the possibility to take invariances like
 rotation into account by generating artificial training samples from real training samples.
 Data augmentation hence reduces bias and variance with no cost at inference time.
 Data augmentation at inference time reduces the variance of the classifier. Similar to using
 an ensemble, it increases the computational cost of inference.
-Pretrainingtheclassifieronanotherdatasettoobtainstartfromagoodpositionorfinetuning
+Pretraining the classifier on another dataset to obtain start from a good position or finetuning
 a model which was originally created for another task is also a common technique.
 
 2.6. Accuracy boosting techniques
-Figure 2.10.: Occlusion sensitivity analysis by [ ZF14]: The left column shows three example images,
-where a gray square occluded a part of the image. This gray squares center (x,y)was
+Figure 2.10.:Occlusion sensitivity analysis by [ZF14]: The left column shows three example images,
+where a gray square occluded a part of the image. This gray squares center(x,y) was
 moved over the complete image and the classifier was run on each of the occluded
 images. The probability of the correct class, depending on the gray squares position,
 is showed in the middle column. One can see that the predicted probability of the
@@ -959,7 +1042,7 @@ it always predicts the correct class if the head is visible. However, if the hea
 dog is occluded, it predicts other classes.
 
 2. Convolutional Neural Networks
-Figure 2.11.: Filter visualization from [ ZF14]: The filters themselves as well as the input feature
+Figure 2.11.:Filter visualization from [ZF14]: The filters themselves as well as the input feature
 maps which caused the highest activation are displayed.
 
 3. Topology Learning
@@ -977,64 +1060,73 @@ layers / neurons into the network.
 In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Optimization
  are introduced.
 3.1.1. Cascade-Correlation
-Cascade-Correlation was introduced in [ FL89]. It generates a cascading architecture which
+Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which
 is similar to dense block described in Section 2.3.3.
 Cascade-Correlation works as follows:
-1.Initialization : The number of input nodes and the number of output nodes are
+1. Initialization: The number of input nodes and the number of output nodes are
 defined by the problem. Create a minimal, fully connected network for those.
-2.Training : Train the network until the error no longer decreases.
-3.Candidate Generation : Generate candidate nodes. Each candidate node is connected
+2. Training: Train the network until the error no longer decreases.
+3. Candidate Generation: Generate candidate nodes. Each candidate node is connected
  to all inputs. They are not connected to other candidate nodes and not
 connected to the output nodes.
 
 3. Topology Learning
-4.Correlation Maximization : Train the weights of the candidates by maximizing S,
-the correlation between candidates output value Vwith the networks residual error:
-S=∑
-o∈O⏐⏐⏐⏐⏐⏐∑
-p∈T(
-Vp−¯V)
-(Ep,o−¯Eo)⏐⏐⏐⏐⏐⏐
-whereOis the set of output nodes, Tis the training set, Vpis the candidate neurons
-activation for a training pattern p.Ep,ois the residual output error at node ofor
-patternp.¯Vand ¯Eoare averaged values over all elements of T. This step is finished
+4. Correlation Maximization: Train the weights of the candidates by maximizingS,
+the correlation between candidates output valueV with the networks residual error:
+S =
+∑
+o∈O
+⏐⏐⏐⏐⏐⏐
+∑
+p∈T
+(
+Vp −¯V
+)
+(Ep,o − ¯Eo)
+⏐⏐⏐⏐⏐⏐
+where O is the set of output nodes,T is the training set,Vp is the candidate neurons
+activation for a training patternp. Ep,o is the residual output error at nodeo for
+pattern p. ¯V and ¯Eo are averaged values over all elements ofT. This step is finished
 when the correlation no longer increases.
-5.Candidate selection : Keep the candidate node with the highest correlation, freeze
+5. Candidate selection: Keep the candidate node with the highest correlation, freeze
 its incoming weights and add connections to the output nodes.
-6.Continue : If the error is higher than desired, continue with step 2.
+6. Continue: If the error is higher than desired, continue with step 2.
 One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1.
 1
-Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray)
+Figure 3.1.:A Cascade-Correlation network with three input nodes (red) and one bias node (gray)
 to the left, three hidden nodes (green) in the middle and two output nodes in the upper
 right corner. The black squares represent frozen weights which are found by correlation
 maximization whereas the white squares are trainable weights.
 3.1.2. Meiosis Networks
-Meiosis Networks are introduced in [ Han89]. In contrast to most MLPs and CNNs, where
-weights are deterministic and fixed at prediction time, each weight wijin Meiosis networks
+Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where
+weights are deterministic and fixed at prediction time, each weightwij in Meiosis networks
 follows a normal distribution:
-wij∼N(µij,σ2
+wij ∼N(µij,σ2
 ij)
 
 3.2. Pruning approaches
-Hence every connection has two learned parameters: µijandσ2
+Hence every connection has two learned parameters:µij and σ2
 ij.
 The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell
-division. A node jis splitted, when the random part dominates the value of the sampled
+division. A nodej is splitted, when the random part dominates the value of the sampled
 weights: ∑
 iσij∑
-iµij>1and∑
+iµij
+>1 and
+∑
 kσjk∑
-kµjk>1
+kµjk
+>1
 The mean of the new nodes is sampled around the old mean, half the variance is assigned
 to the new connections.
 Hence Meiosis networks only change the number of neurons per layer. They do not add
 layers or add skip connections.
 3.1.3. Automatic Structure Optimization
-Automatic Structure Optimization (ASO) was introduced in [ BM93] for the task of online
- handwriting recognition. It makes use of the confusion matrix C= (cij)∈Nk×k
+Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of online
+ handwriting recognition. It makes use of the confusion matrixC = ( cij) ∈Nk×k
 ≥0
 (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix
-Swithsij=sji=cij·cji. The maximum of Sdefines where the ASO algorithm adds
+S with sij = sji = cij ·cji. The maximum ofS defines where the ASO algorithm adds
 more parameters. The details how the resources are added are not transferable to CNNs.
 3.2. Pruning approaches
 Pruning approaches start with a network which is bigger than necessary and prune it. The
@@ -1047,41 +1139,43 @@ Pruning generally works as follows:
 2. prune weights according to a pruning criterion and
 3. retrain the pruned network.
 This procedure can be repeated.
-One family of pruning criterions uses the Hessian matrix . For example, Optimal Brain
-Damage (OBD) as introduced in [ LDS+89]. For every single parameter k, OBD calculates
-the effect on the objective function of deleting k. The authors call the effect of the deletion
+One family of pruning criterions uses theHessian matrix. For example, Optimal Brain
+Damage (OBD) as introduced in [LDS+89]. For every single parameterk, OBD calculates
+the effect on the objective function of deletingk. The authors call the effect of the deletion
 
 3. Topology Learning
-of parameter kthe saliency sk. The parameters with the lowest saliency are deleted, which
+of parameterk the saliencysk. The parameters with the lowest saliency are deleted, which
 means they are set to 0 and are not updated anymore.
-A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights
+A follow-up method calledOptimal Brain Surgeon[HSW93] claims to choose the weights
 in a much better way. This requires, however, to calculate the inverse Hessian matrix
-H−1∈Rn×nwheren∈Nis typically n>106.
-A much simpler and computationally cheaper pruning criterion is the weight magnitude .
-[HPTD15] prunes all weights wwhich are below a threshold θ:
-w←
+H−1 ∈Rn×n where n∈N is typicallyn> 106.
+A much simpler and computationally cheaper pruning criterion is theweight magnitude.
+[HPTD15] prunes all weightsw which are below a thresholdθ:
+w←
+
 
-wifw≥θ
-0otherwise
+
+w if w≥θ
+0 otherwise
 3.3. Genetic approaches
 The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which
 can recombine themselves via crossover and inversion. An introduction to such algorithms
 is given in [ES03].
-Commonly used techniques to generate neural networks by GAs are NEAT [ SM02] and its
+Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its
 successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10].
 The results, however, are of unacceptable quality: On MNIST (see Appendix E), where
-random chance gives 10 %accuracy, even simple topologies trained with SGD achieve
-about 92 %accuracy [ TF-16a] and state of the art is 99.79 %[WZZ+13], the HyperNEAT
-algorithm achieves only 23.9 %accuracy [VH13].
-Kocmánek shows in [ Koc15] that HyperNEAT approaches can achieve 96.47 %accuracy
+random chance gives10 % accuracy, even simple topologies trained with SGD achieve
+about 92 % accuracy [TF-16a] and state of the art is99.79 % [WZZ+13], the HyperNEAT
+algorithm achieves only23.9 % accuracy [VH13].
+Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve96.47 % accuracy
 on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer
 so that not more than three hidden layers could be trained. At the same time, VGG19
  [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers
 in [HZRS15a].
 [LX17] shows that Genetic algorithms can achieve competitive results on MNIST and
-SVHN, but the best results on CIFAR-10 were 7.10 %error whereas the state of the art is
-at3.74 %[HLW16]. Similarly, the Genetic algorithm achieves 29.03 %error on CIFAR-100,
-but the state of the art is 17.18 %[HLW16].
+SVHN, but the best results on CIFAR-10 were7.10 % error whereas the state of the art is
+at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves29.03 % error on CIFAR-100,
+but the state of the art is17.18 % [HLW16].
 3.4. Reinforcement Learning
 Reinforcement learning is a sub-field of machine learning, which focuses on the question
 how to choose actions that lead to high rewards.
@@ -1091,22 +1185,22 @@ One can think of the search for good neural network topologies as a reinforcemen
 problem. The agent is a recurrent neural network which can generate bitstrings. Those
 variable-length bitstrings encode neural network topologies.
 In 2016, this approach was applied to construct neural networks for computer vision.
-In [BGNR16], Q-learning with an ε-greedy exploration was applied.
-In [ZL16], the REINFORCE algorithm from [ Wil92] was used to train state of the art models
+In [BGNR16], Q-learning with anε-greedy exploration was applied.
+In [ZL16], theREINFORCE algorithm from [Wil92] was used to train state of the art models
 for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous
 amounts of computational resources were used to obtain those results.
 3.5. Convolutional Neural Fabrics
-Convolutional Neural Fabrics are introduced in [ SV16]. They side-step hard decisions
+Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions
 about topologies by learning an ensemble of different CNN architectures. The idea is to
 define a single architecture as a trellis through a 3D grid of nodes. Each node represents a
 convolutional layer. One dimension is the index of the layer, the other two dimensions are
 the amount of filters and the feature size. Each node is connected to nine other nodes and
 thus represents nine possible choices of convolutional layers:
-•Resolution : (i) convolution with stride=1 or (ii) convolution with stride=2 or
+• Resolution: (i) convolution with stride=1 or (ii) convolution withstride=2 or
 (iii) deconvolution (doubling the resolution)
-•Channels : (i) half the number of filters than the layer before (ii) the same number
+• Channels: (i) half the number of filters than the layer before (ii) the same number
 of filters as the layer before (iii) double the number of filters than the layer before
-They always use ReLU as an activation function and they always use filters of size 3×3.
+They always use ReLU as an activation function and they always use filters of size3 ×3.
 They don’t use pooling at all.
 
 3. Topology Learning
@@ -1114,19 +1208,19 @@ They don’t use pooling at all.
 4. Hierarchical Classification
 Designing a classifier for a new dataset is hard for two main reasons: Many design choices are
 not clearly superior to others and evaluating one design choice takes much time. Especially
-CNNs are known to take several days [ KSH12,SLJ+15] or even weeks [ SZ14] to train.
+CNNs are known to take several days [KSH12, SLJ+15] or even weeks [SZ14] to train.
 Additionally, some methods for analyzing a dataset become harder to use with more classes
 and more training samples. Examples are t-SNE, the manual inspection of errors and
 confusion matrices, and the argmax method.
 One idea to approach this problem is by building a hierarchy of classifiers. The root
 classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single
 classes. Figure 4.1 gives an example for an hierarchy of classifiers.
-Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle.
-The root classifier C0has to distinguish six coarse classes (pedestrian, four+-wheelers,
-traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0predicts a
-pedestrian , another classifier has to predict if it is an adult or a child. Similar, if C0
-predicts traffic sign , then another classifier has to predict if it is a speed limit, a
-sign indicating danger or something else. If C0, however, predicts road, then no other
+Figure 4.1.:Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle.
+The root classifierC0 has to distinguish six coarse classes (pedestrian, four+-wheelers,
+traffic signs, two-wheelers, street, other) or 17 fine-grained classes. IfC0 predicts a
+pedestrian, another classifier has to predict if it is an adult or a child. Similar, ifC0
+predicts traffic sign, then another classifier has to predict if it is a speed limit, a
+sign indicating danger or something else. IfC0, however, predictsroad, then no other
 classifier will become active.
 In this example, the problem has 17 classes. The hierarchical approach introduces
 7 clusters of classes and thus uses 8 classifiers.
@@ -1135,37 +1229,37 @@ Such a hierarchy of classifiers needs clusters of classes.
 4. Hierarchical Classification
 4.1. Advantages of classifier hierarchies
 Having a classifier hierarchy has five advantages:
-•Division of labor : Different teams can work together. Instead of having a monolithic
+• Division of labor: Different teams can work together. Instead of having a monolithic
 task, the solutions can be combined.
-•Guarantees : Changing a classifier will only change the prediction of itself and its
+• Guarantees: Changing a classifier will only change the prediction of itself and its
 children. Siblings are not affected. In the example from Figure 4.1, the classifier
-which distinguishes traffic signs can be changed while the classification as pedestrian ,
-four+-wheelers ,traffic sign ,street,otherwill not be affected. Also, the
+which distinguishes traffic signs can be changed while the classification aspedestrian,
+four+-wheelers, traffic sign, street, other will not be affected. Also, the
 classification between speed limits, danger signs and other signs will not change.
-•Faster training : Except for the root classifier C0, each other classifier will have
+• Faster training: Except for the root classifierC0, each other classifier will have
 less than the total amount of training data. Depending on the combined classes, the
 models could also be simpler. Hence the training time is reduced.
-•Weighting of errors : In practice, some errors are more severe than others. For
-example, it could be acceptable if the two-wheelers classifier has an error rate of
-40 %. But it is not acceptable if the speed limit classifier has such a high error rate.
-•Post-hoc explanations : The simpler a model is, the easier it is to explain why a
+• Weighting of errors: In practice, some errors are more severe than others. For
+example, it could be acceptable if thetwo-wheelers classifier has an error rate of
+40 %. But it is not acceptable if thespeed limit classifier has such a high error rate.
+• Post-hoc explanations: The simpler a model is, the easier it is to explain why a
 classification is made the way it is made.
 4.2. Clustering classes
 There are two ways to cluster classes: By similarity or by semantics. While semantic
 clustering needs either additional information or manual work, the similarity can be
-automatically inferred from the data. As pointed out in [ XZY+14], semantically similar
+automatically inferred from the data. As pointed out in [XZY+14], semantically similar
 classes are often also visually similar. For example, in the ImageNet dataset most dogs
 are semantically and visually more similar to each other than to non-dogs. An example
-where this is obviously not the case are symbols: The summation symbol \sumis identical
-in appearance to the Greek letter \Sigma, but semantically much closer to the addition
+where this is obviously not the case are symbols: The summation symbol\sum is identical
+in appearance to the Greek letter\Sigma, but semantically much closer to the addition
 operator +.
 One approach to cluster classes by similarity is to train a classifier and examine its
 predictions. Each class is represented in the confusion matrix by one row. Those rows
-can be directly with standard clustering algorithms such as k-means, DBSCAN [ EKS+96],
-OPTICS [ ABKS99 ], CLARANS [ NH02], DIANA [ KR09], AHC (see [ HPK11]) or spectral
-clustering as in [ XZY+14]. Those clusterings, however, are hard to interpret and most of
+can be directly with standard clustering algorithms such ask-means, DBSCAN [EKS+96],
+OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral
+clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of
 them do not allow a human to improve the found clustering manually.
-The confusion matrix (c)ij∈Nk×kstates how often class iwas present and class jwas
+The confusion matrix(c)ij ∈Nk×k states how often classi was present and classj was
 
 4.2. Clustering classes
 predicted. The more often this confusion happens, the more similar those two classes are to
@@ -1176,34 +1270,37 @@ diminish after a critical point of classes is reached. Hence a binary tree might
 good choice. As an alternative, an approach which allows building arbitrary many clusters,
 is proposed.
 The proposed algorithm has two main ideas:
-•The order of columns and rows in the confusion matrix is arbitrary. This means one
-can swap rows and columns. If row iandjare swapped, then the columns iandj
+• The order of columns and rows in the confusion matrix is arbitrary. This means one
+can swap rows and columns. If rowi and j are swapped, then the columnsi and j
 have to be swapped to in order to keep the same confusion matrix.
-•If two classes are confused often, then they are similar to the classifier.
+• If two classes are confused often, then they are similar to the classifier.
 Hence the order of the classes is permutated in such a way that the highest errors are close
 to the diagonal. One possible objective function to be minimized is
-f(C) =n∑
-i=1n∑
-j=1Cij·|i−j| [4.1]
+f(C) =
+n∑
+i=1
+n∑
+j=1
+Cij ·|i−j| [4.1]
 which punishes errors linearly with the distance to the diagonal. This method is called CMO
 in the following.
 As pointed out by Tobias Ribizel (personal communication), this optimization problem
-is a weighted version of Optimal Linear Arrangement problem . That problem is NPcomplete
- [ GJ02,GJS76]. Simulated Annealing as described in Algorithm 1, however,
+is a weighted version ofOptimal Linear Arrangement problem. That problem is NPcomplete
+ [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however,
 produces reasonable clusterings as well as visually appealing confusion matrices. The
-algorithm works as follows: First, decide with probability 0.5if only two random rows are
+algorithm works as follows: First, decide with probability0.5 if only two random rows are
 swapped or a block is swapped. If two rows are swapped, choose both of them randomly.
 If a block is swapped, then choose the start randomly and the end of the block randomly
 after the start. The insert position has to be a valid position considering the block length,
 but besides that it is also chosen uniformly random.
 Simple row-swapping can exploit local improvements. For example, in the context of
-ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier
-and both dog classes Dalmatian andGreyhound next to each other. Both the two clusters
-of dog breeds could be separated by carandbusdue to random chance. Moving any single
+ImageNet, it can swap the dog-classSilky Terrier to the dog-classYorkshire terrier
+and both dog classesDalmatian and Greyhound next to each other. Both the two clusters
+of dog breeds could be separated bycar and bus due to random chance. Moving any single
 class increases the score, but moving either one of the dog breed clusters or the vehicle
 cluster decreases the score. Hence it is beneficial to implement block moving.
 One advantage of permutating the classes in order to minimize Equation (4.1) in comparison
-to spectral clustering as used in [ XZY+14] is that the adjusted confusion matrix can be
+to spectral clustering as used in [XZY+14] is that the adjusted confusion matrix can be
 
 4. Hierarchical Classification
 split into many much smaller matrices along the diagonal. In the case of many classes (e.g.,
@@ -1213,15 +1310,15 @@ confusions are not made and thus many elements of the confusion matrix are close
 Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1).
 Once a permutation of the classes is found which has a low score Equation (4.1), the clusters
 can either be made by hand by deciding why classes should not be in one clusters. With
-such a permutation, only n−1binary decisions have to be made and hence only the list of
-classes has to be read. Alternatively, one can calculate the confusions C′
-i,i+1+C′
-i+1,ifor
+such a permutation, onlyn−1 binary decisions have to be made and hence only the list of
+classes has to be read. Alternatively, one can calculate the confusionsC′
+i,i+1 + C′
+i+1,i for
 each pair of classes which are neighbors in the confusion matrix. The higher this value, the
-more similar are the classes according to the classifier. Hence a threshold θcan be applied.
-θcan either be set automatically (e.g., such that 10 %of all pairs are above the threshold)
+more similar are the classes according to the classifier. Hence a thresholdθ can be applied.
+θ can either be set automatically (e.g., such that10 % of all pairs are above the threshold)
 or semi-automatically by asking the user for information if two classes belong to the same
-cluster. Such an approach only needs log(n)binary decisions from the user where nis the
+cluster. Such an approach only needslog(n) binary decisions from the user wheren is the
 number of classes.
 Please note that CMO only works if the classifier is neither too bad nor too good. A classifier
 which does not solve the task at all might just give almost uniform predictions whereas the
@@ -1231,147 +1328,159 @@ the prediction of the class in contrast to using only the argmax in order to fin
 permutation.
 
 5. Experimental Evaluation
-All experiments are implemented using Keras 2.0 [ Cho15] with Tensorflow 1.0 [ AAB+16]
-and cuDNN 5.1 [ CWV+14] as the backend. The experiments were run on different machines
+All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+16]
+and cuDNN 5.1 [CWV+14] as the backend. The experiments were run on different machines
 with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce
 GTX 970 and GeForce 940MX.
-The GTSRB [ SSSI12], SVHN [ NWC+11b], CIFAR-10 and CIFAR-100 [ Kri], MNIST [ YL98],
-HASYv2 [ Tho17a], STL-10 [ CLN10] dataset are used for the evaluation. Those datasets are
+The GTSRB [SSSI12], SVHN [NWC+11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98],
+HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are
 used as their size is small enough to be trained within a day. Other classification datasets
 which were considered are listed in Appendix E.
-CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color
-images of the size 32 px×32 px. Its ten classes are airplane, automobile, bird, cat, deer,
-dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 %[HLW16].
-According to [Kar11], human accuracy is at about 94 %.
-CIFAR-100 is a 100-class dataset of color images of the size 32 px×32 px. Its 100 classes
+CIFAR-10(Canadian Institute for Advanced Research 10) is a 10-class dataset of color
+images of the size32 px×32 px. Its ten classes are airplane, automobile, bird, cat, deer,
+dog, frog, horse, ship, truck. The state of the art achieves an accuracy of96.54 % [HLW16].
+According to [Kar11], human accuracy is at about94 %.
+CIFAR-100is a 100-class dataset of color images of the size32 px×32 px. Its 100 classes
 are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles
 and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain
-the class airplane . The state of the art achieves an accuracy of 82.82 %[HLW16].
+the classairplane. The state of the art achieves an accuracy of82.82 % [HLW16].
 GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs.
-The51 839images are in color and of a minimum size of 25 px×25 pxup to 266 px×232 px.
-The state of the art achieves 99.46 %accuracy with an ensemble of 25 CNNs [ SL11].
-According to [SSSI], human performance is at 98.84 %.
-HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images
-of the size 32 px×32 px. The 369 classes contain the Latin and Greek letters, arrows,
-mathematical symbols. The state of the art achieves an accuracy of 82.00 %[Tho17a].
-STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px×96 px.
+The 51 839 images are in color and of a minimum size of25 px×25 px up to266 px×232 px.
+The state of the art achieves99.46 % accuracy with an ensemble of 25 CNNs [SL11].
+According to [SSSI], human performance is at98.84 %.
+HASYv2(Handwritten Symbols version 2) is a 369 class dataset of black-and-white images
+of the size32 px×32 px. The 369 classes contain the Latin and Greek letters, arrows,
+mathematical symbols. The state of the art achieves an accuracy of82.00 % [Tho17a].
+STL-10(self-taught learning 10) is a 10-class dataset of color images of the size96 px×96 px.
 Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state
-of the art achieves an accuracy of 74.80 %[ZMGL15 ]. It contains 100 000unlabeled images
-for unsupervised training and 500images per class for supervised training.
-SVHN(Street View House Numbers) exists in two formats. For the following experiments,
+of the art achieves an accuracy of74.80 % [ZMGL15]. It contains100 000 unlabeled images
+for unsupervised training and500 images per class for supervised training.
+SVHN (Street View House Numbers) exists in two formats. For the following experiments,
 the cropped digit format was used. It contains the 10 digits cropped from photos of Google
-Street View. The images are in color and of size 32 px×32 px. The state of the art
+Street View. The images are in color and of size32 px ×32 px. The state of the art
 
 5. Experimental Evaluation
-achieves an accuracy of 98.41 %[HLW16]. According to [ NWC+11a], human performance
-is at 98.0 %.
-As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0,1].
-For GTSRB, the training and test data was scaled to 32 px×32 px.
+achieves an accuracy of98.41 % [HLW16]. According to [NWC+11a], human performance
+is at98.0 %.
+As a preprocessing step, the pixel-features were divided by 255 to obtain values in[0,1].
+For GTSRB, the training and test data was scaled to32 px×32 px.
 5.1. Baseline Model and Training setup
-The baseline model is trained with Adam [ KB14], an initial learning rate of 10−4, a batch
+The baseline model is trained with Adam [KB14], an initial learning rate of10−4, a batch
 size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation
 depends on the dataset:
-•CIFAR-10 ,CIFAR-100 and STL-10: Random width and height shift by at most
-±3pixels in either direction; Random horizontal flip.
-•GTSRB ,MNIST : Random width and height shift by at most ±5pixels in either
-direction; random rotation by at most ±15degrees; random channel shift; random
-zoom in [0.5,1.5]; random shear by at most 6 degrees.
-•HASYv2 : Random width and height shift by at most ±5pixels in either direction;
-random rotation by at most ±5degree.
-•SVHN: No data augmentation.
-If the dataset does not define a training/test set, a stratified 67 %/33 %split is applied. If
+• CIFAR-10, CIFAR-100and STL-10: Random width and height shift by at most
+±3 pixels in either direction; Random horizontal flip.
+• GTSRB, MNIST: Random width and height shift by at most±5 pixels in either
+direction; random rotation by at most±15 degrees; random channel shift; random
+zoom in[0.5,1.5]; random shear by at most 6 degrees.
+• HASYv2: Random width and height shift by at most±5 pixels in either direction;
+random rotation by at most±5 degree.
+• SVHN: No data augmentation.
+If the dataset does not define a training/test set, a stratified67 % / 33 % split is applied. If
 the dataset does not define a validation set, the training set is split in a stratified manner
-into90 %training set / 10 %test set.
-Early stopping [ Pre98] with the validation accuracy as a stopping criterion and a patience of
+into 90 % training set /10 % test set.
+Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of
 10 epochs is applied. After this, the model is trained without data augmentation for at most
 1000 epochs with early stopping and the validation accuracy as a stopping criterion and a
 patience of 10 epochs. Kernel weights are initialized according to the uniform initialization
 scheme of He [HZRS15b] (see Appendix B.3).
 The architecture of the baseline model uses a pattern of
-Conv-Block (n) = (Convolution−Batch Normalization −Activation )n−Pooling
+Conv-Block(n) = (Convolution−Batch Normalization−Activation)n −Pooling
 The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for
 the last layer where softmax is used. Before the last two convolutional layer, a dropout
-layer with dropout probability 0.5is applied. The architecture is given in detail in Table 5.1.
+layer with dropout probability0.5 is applied. The architecture is given in detail in Table 5.1.
 Please note that the number of input- and output channels of the network depends on
-the dataset. If the input image is larger than 32 px×32 px, for each power of two a
-Conv-Block (2)is added at the input. For MNIST, the images are bilinearly upsampled to
+the dataset. If the input image is larger than 32 px ×32 px, for each power of two a
+Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to
 32 px×32 px.
 
 5.1. Baseline Model and Training setup
 # Type Filters @
-Patch size / strideParameters FLOPs Output size
-Input 0 0 3@32 ×32
-1 Convolution 32@ 3×3×3/1 896 1736704 32@32×32
-2 BN + ELU 64 163904 32@32×32
-3 Convolution 32@ 3×3×32/1 9248 18841600 32@32×32
-4 BN + ELU 64 163904 32@32×32
-Max pooling 2×2/2 0 40960 32@16 ×16
-5 Convolution 64@ 3×3×32/1 18496 9420800 64@16 ×16
-6 BN + ELU 128 82048 64@16 ×16
-7 Convolution 64@ 3×3×64/1 36928 18 857 984 64@16×16
-8 BN + ELU 128 82048 64@16 ×16
-Max pooling 2×2/2 20480 64@ 8 ×8
-9 Convolution 64@ 3×3×64/1 36928 4714496 64@ 8 ×8
-10 BN + ELU 128 20608 64@ 8 ×8
-Max pooling 2×2/2 5120 64@ 4 ×4
-11 Convolution (v) 512@ 4×4×64/1 524 800 1048064 512@ 1 ×1
-12 BN + ELU 1024 3584 512@ 1 ×1
-Dropout 0.5 0 0 512@ 1 ×1
-13 Convolution 512@ 1×1×512/1 262656 523776 512@ 1 ×1
-14 BN + ELU 1024 3584 512@ 1 ×1
-Dropout 0.5 0 0 512@ 1 ×1
-15 Convolution k @ 1×1×512/1k·(512 + 1) 1024 ·kk @ 1×1
-Global avg Pooling 1×1 0 k k @ 1×1
-16 BN + Softmax 2k 7k k @ 1×1
+Patch size / stride
+Parameters FLOPs Output size
+Input 0 0 3@32 × 32
+1 Convolution 32@ 3 ×3 ×3 /1 896 1736704 32 @32 × 32
+2 BN + ELU 64 163904 32 @32 × 32
+3 Convolution 32@ 3 ×3 ×32 /1 9248 18841600 32 @32 × 32
+4 BN + ELU 64 163904 32 @32 × 32
+Max pooling 2 ×2 /2 0 40960 32@16 × 16
+5 Convolution 64@ 3 ×3 ×32 /1 18496 9420800 64@16 × 16
+6 BN + ELU 128 82048 64@16 × 16
+7 Convolution 64@ 3 ×3 ×64 /1 36928 18 857 984 64@16 × 16
+8 BN + ELU 128 82048 64@16 × 16
+Max pooling 2 ×2 /2 20480 64@ 8 × 8
+9 Convolution 64@ 3 ×3 ×64 /1 36928 4714496 64@ 8 × 8
+10 BN + ELU 128 20608 64@ 8 × 8
+Max pooling 2 ×2 /2 5120 64@ 4 × 4
+11 Convolution (v) 512@ 4 ×4 ×64 /1 524 800 1048064 512@ 1 × 1
+12 BN + ELU 1024 3584 512@ 1 × 1
+Dropout 0.5 0 0 512@ 1 × 1
+13 Convolution 512@ 1 ×1 ×512 /1 262656 523776 512@ 1 × 1
+14 BN + ELU 1024 3584 512@ 1 × 1
+Dropout 0.5 0 0 512@ 1 × 1
+15 Convolution k @ 1 ×1 ×512 /1 k·(512 + 1) 1024 ·k k @ 1 × 1
+Global avg Pooling 1 ×1 0 k k @ 1 × 1
+16 BN + Softmax 2k 7k k @ 1 × 1
 ∑ 515k
-+8925121032k
-+55729664103424+ 2k
-Table 5.1.: Baseline architecture with 3 input channels of size 32×32. All convolutional layers
-useSAMEpadding, except for layer 11 which used VALIDpadding in order to decrease
-the feature map size to 1×1. If the input feature map is bigger than 32×32, for
-each power of two there are two Convolution + BN + ELU blocks and one Max pooling
-block added. This is the framed part in the table.32×32Input
++892512
+1032k
++55729664 103424+2k
+Table 5.1.:Baseline architecture with 3 input channels of size32 ×32. All convolutional layers
+use SAME padding, except for layer 11 which usedVALID padding in order to decrease
+the feature map size to1 ×1. If the input feature map is bigger than32 ×32, for
+each power of two there are twoConvolution + BN + ELU blocks and oneMax pooling
+block added. This is the framed part in the table.
+32×32
+Input
 C32@3×3/1
 BN + ELU
 C32@3×3/1
-BN + ELU16×16max pooling 2×2/2
+BN + ELU
+16×16
+max pooling2×2/2
 C64@3×3/1
 BN + ELU
 C64@3×3/1
-BN + ELU8×8max pooling 2×2/2
+BN + ELU
+8×8
+max pooling2×2/2
 C64@3×3/1
-BN + ELU4×4max pooling 2×2/2
+BN + ELU
+4×4
+max pooling2×2/2
 C512@4×4/1(V)
 BN + ELU
-Dropout,p= 0.51×1C512@1×1/1
+Dropout,p= 0.5
+1×1
+C512@1×1/1
 BN + ELU
 Dropout,p= 0.5
 Ck@1×1/1
 Global AVG pooling
 BN + Softmax
-Figure 5.1.: Architecture of the baseline model. C32@3×3/1is a convolutional layer with 32 filters
-of kernel size 3×3with stride 1.
+Figure 5.1.:Architecture of the baseline model.C 32@3 ×3/1 is a convolutional layer with 32 filters
+of kernel size3 ×3 with stride 1.
 
 5. Experimental Evaluation
 5.1.1. Baseline Evaluation
 The results for the baseline model evaluated on eight datasets are given in Table 5.2. The
 speed for inference for different GPUs is given in Table 5.3.
-DatasetSingle Model Accuracy Ensemble of 10
+Dataset Single Model Accuracy Ensemble of 10
 Training Set Test Set Training Set Test Set
-Asirra 94.22 %σ= 3.49 94.37 %σ= 3.47 97 .07 % 97.37 %
-CIFAR-10 91.23 %σ= 1.10 85.84 %σ= 0.87 92 .36 % 86.75 %
-CIFAR-100 76.64 %σ= 1.48 63.38 %σ= 0.55 78 .30 % 64.70 %
-GTSRB 100.00 %σ= 0.00 99.18 %σ= 0.11 100 .00 % 99.46 %
-HASYv2 89.49 %σ= 0.42 85.35 %σ= 0.10 89 .94 % 86.03 %
-MNIST 99.93 %σ= 0.07 99.53 %σ= 0.06 99 .99 % 99.58 %
-STL-10 94.12 %σ= 0.87 75.67 %σ= 0.34 96 .35 % 77.62 %
-SVHN 99.02 %σ= 0.07 96.28 %σ= 0.10 99 .42 % 97.20 %
-Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models
-used in the ensemble. The empirical standard deviation σof the accuracy is also given.
+Asirra 94.22 % σ= 3.49 94 .37 % σ= 3.47 97 .07 % 97 .37 %
+CIFAR-10 91.23 % σ= 1.10 85 .84 % σ= 0.87 92 .36 % 86 .75 %
+CIFAR-100 76.64 % σ= 1.48 63 .38 % σ= 0.55 78 .30 % 64 .70 %
+GTSRB 100.00 % σ= 0.00 99 .18 % σ= 0.11 100 .00 % 99 .46 %
+HASYv2 89.49 % σ= 0.42 85 .35 % σ= 0.10 89 .94 % 86 .03 %
+MNIST 99.93 % σ= 0.07 99 .53 % σ= 0.06 99 .99 % 99 .58 %
+STL-10 94.12 % σ= 0.87 75 .67 % σ= 0.34 96 .35 % 77 .62 %
+SVHN 99.02 % σ= 0.07 96 .28 % σ= 0.10 99 .42 % 97 .20 %
+Table 5.2.:Baseline model accuracy on eight datasets. The single model actuary is the 10 models
+used in the ensemble. The empirical standard deviationσ of the accuracy is also given.
 CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
 models uses unlabeled data or data from other datasets. For HASYv2 no test time
 transformations are used.
-Network GPU TensorflowInference per Training
+Network GPU Tensorflow Inference per Training
 1 Image 128 images time / epoch
 Baseline Default Intel i7-4930K 3 ms 244 ms 231 .0 s
 Baseline Optimized Intel i7-4930K 2 ms 143 ms 149 .0 s
@@ -1383,9 +1492,9 @@ Baseline Default GTX 1070 2 ms 15 ms 14 .4 s-14.5 s
 Baseline Default Titan Black 4 ms 25 ms 28 .1 s-28.1 s
 Baseline Optimized Titan Black 3 ms 22 ms 24 .4 s-24.4 s
 DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms —
-Table 5.3.: SpeedcomparisonofthebaselinemodelonCIFAR-10. Thebaselinemodelisevaluatedon
-six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [ Maj17].
-Weights the baseline model can be found at [ Tho17b]. The optimized Tensorflow build
+Table 5.3.:Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on
+six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Maj17].
+Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build
 makes use of SSE4.X, AVX, AVX2 and FMA instructions.
 
 5.1. Baseline Model and Training setup
@@ -1394,55 +1503,55 @@ The distribution of filter weights by layer is visualized in Figure 5.2 and the
 of bias weights by layer is shown in Figure 5.3. Although both figures only show the
 distribution for one specific model trained on CIFAR-100, the following observed patterns
 are consistent for 70 models (7 datasets and 10 models per dataset):
-•The empiric [0.5−percentile,99.5−percentile ]interval which contains 99 %of the
+• The empiric[0.5 −percentile,99.5 −percentile] interval which contains99 % of the
 filter weights is almost symmetric around zero. The same is true for the bias weights.
-•The farther a layer is from the input away, the smaller the 99-percentile interval is,
+• The farther a layer is from the input away, the smaller the 99-percentile interval is,
 except for the last layer (see Table A.1).
-•The 99-percentile interval of the first layers filter weights is about [−0.5,+0.5], except
-for MNIST and HASYv2 where it is in [−0.8,0.8].
-•The 99-percentile interval of the first layers bias weights is always in [−0.2,0.2].
-•The distribution of filter weights of the last convolutional layer is not symmetric. In
+• The 99-percentile interval of the first layers filter weights is about[−0.5,+0.5], except
+for MNIST and HASYv2 where it is in[−0.8,0.8].
+• The 99-percentile interval of the first layers bias weights is always in[−0.2,0.2].
+• The distribution of filter weights of the last convolutional layer is not symmetric. In
 some cases the distribution is also not unimodal.
-•The bias weights of the last three layers are very close to zero. The absolute value of
-most of them is smaller than 10−2.
-Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γand theβparameter of
-Batch Normalization. It is expected that γis close to 1 and βis close to 0. In those cases,
+• The bias weights of the last three layers are very close to zero. The absolute value of
+most of them is smaller than10−2.
+Similarly, Figure 5.4 and Figure 5.5 show the distribution of theγ and theβ parameter of
+Batch Normalization. It is expected thatγ is close to 1 andβ is close to 0. In those cases,
 the Batch Normalization layer equals the identity and thus is only relevant for the training.
-Whileγandβdo not show as clear patterns as the filter and bias weights of convolutional
+While γ and β do not show as clear patterns as the filter and bias weights of convolutional
 layers, some observations are also consistent through all models even for different datasets:
-•γof the last layer (layer 16) is bigger than 1.3.
-•The 99-percentile interval for βof the last layer is longer than the other 99-percentile
+• γ of the last layer (layer 16) is bigger than 1.3.
+• The 99-percentile interval forβ of the last layer is longer than the other 99-percentile
 intervals.
-•The 99-percentile interval for βof the fourth-last (layer 14 for STL-10, layer 10 for
+• The 99-percentile interval forβ of the fourth-last (layer 14 for STL-10, layer 10 for
 all other models) is more negative then all other layers.
 Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional
 layer. The ranges are calculated for each channel and filter separately. The smaller the
 values are, the less information is lost if the filters are replaced by smaller filters.
 
 5. Experimental Evaluation
-Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100.
- The weights of the first layer are relatively evenly spread in the interval [−0.4,+0.4].
-With every layer the interval which contains 95 %of the weights and is centered around
+Figure 5.2.:Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100.
+ The weights of the first layer are relatively evenly spread in the interval[−0.4,+0.4].
+With every layer the interval which contains95 % of the weights and is centered around
 the mean becomes smaller, especially with layer 11 where the feature maps are of
-size1×1. In contrast to the other layers, the last convolutional layer has a bimodal
+size 1 ×1. In contrast to the other layers, the last convolutional layer has a bimodal
 distribution.
 This plot indicates that the network might benefit from bigger filters in the first layer,
 whereas the filters in layers 7 – 11 could potentially be smaller.
-Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100.
-While the first layers biases are in [−0.1,+0.1], after each max-pooling layer the interval
-which contains 95 %of the weights and is centered around the mean becomes smaller.
-In the last three convolutional layer, most bias weights are in [−0.005,+0.005].
+Figure 5.3.:Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100.
+While the first layers biases are in[−0.1,+0.1], after each max-pooling layer the interval
+which contains95 % of the weights and is centered around the mean becomes smaller.
+In the last three convolutional layer, most bias weights are in[−0.005,+0.005].
 
 5.1. Baseline Model and Training setup
-Figure 5.4.: Violin plots of the distribution of the γparameter of Batch Normalization layers of a
+Figure 5.4.:Violin plots of the distribution of theγ parameter of Batch Normalization layers of a
 baseline model trained on CIFAR-100.
-Figure 5.5.: The distribution of the βparameter of Batch Normalization layers of a baseline model
+Figure 5.5.:The distribution of theβ parameter of Batch Normalization layers of a baseline model
 trained on CIFAR-100.
 
 5. Experimental Evaluation
-Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For
+Figure 5.6.:The distribution of the range of values (max - min) of filters by channel and layer. For
 each filter, the range of values is recorded by channel. The smaller this range is, the
-less information is lost if a n×nfilter is replaced by a (n−1)×(n−1)filter.
+less information is lost if an×n filter is replaced by a(n−1) ×(n−1) filter.
 
 5.1. Baseline Model and Training setup
 5.1.3. Training behavior
@@ -1456,27 +1565,36 @@ the start are also better at the end. In order to check this hypothesis, the rel
 validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering
 stays approximately the same, then it can be considered to run the first few epochs many
 times and only train the best models to the end. For 10 models, there can be102−10
-2= 45
+2 = 45
 pair-wise changes in the ordering at maximum if the relative order of validation accuracies
 is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred
-in average for each pair of epochs (i,i+ 1). This means if one knows only the relative order
-of the validation accuracy of two models mandm′in epochi, it is doubtful if one can
-make any statement about the ordering of mandm′in epochi+ 1.
-01020304050607080901001101201301400.20.30.40.50.60.7
-epochvalidation accuracy
+in average for each pair of epochs(i,i + 1). This means if one knows only the relative order
+of the validation accuracy of two modelsm and m′ in epoch i, it is doubtful if one can
+make any statement about the ordering ofm and m′in epochi+ 1.
+0 10 20 30 40 50 60 70 80 90 100 110 120 130 140
+0.2
+0.3
+0.4
+0.5
+0.6
+0.7
+epoch
+validation accuracy
 maximum validation accuracy
-minimum validation accuracy1.5
+minimum validation accuracy
+1.5
 2
 2.5
 3
 3.5
 4
-4.5loss
+4.5
+loss
 maximum validation accuracy
 minimum validation accuracy
 mean loss
-Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The
-differences do not exceed 1 %and does not increase by training epoch. Four models
+Figure 5.7.:Minimum and maximum validation accuracy of the 10 trained models by epoch. The
+differences do not exceed1 % and does not increase by training epoch. Four models
 stopped the first training stage at epoch 133 which causes the shift in the loss and the
 maximum validation accuracy.
 Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was
@@ -1506,50 +1624,50 @@ of the task is hard. For more than about 10 classes, however, it becomes hard to
 and read.
 For CIFAR-10, the proposed method groups the four object classes and the six animal
 classes together (see Figure 5.11a).
-(a)CIFAR-10 Test set
- (b)Random
-Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal
+(a) CIFAR-10 Test set
+ (b) Random
+Figure 5.11.:Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal
 elements are set to 0 in order to make other elements easier to see.
 Figure 5.11b shows a confusion matrix with random mistakes.
-The first image of Figure 5.12 shows one example of a classifier with only 97.13 %test
+The first image of Figure 5.12 shows one example of a classifier with only97.13 % test
 accuracy where a good permutation was found. Please note that this is not the best classifier.
-The confusion matrix which resulted from a baseline classifier with 99.32 %test accuracy is
+The confusion matrix which resulted from a baseline classifier with99.32 % test accuracy is
 displayed in as the second image.
 Those results suggest that the ordering of classes is a valuable tool to make patterns easier
 to see. Humans, however, are good at finding patterns even if they come from random noise.
-Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 %accuracy
-and40 %uniformly random errors of a balanced dataset is created, optimized according to
+Hence, for comparison, a confusion matrix of a classifier with 30 classes,60 % accuracy
+and 40 % uniformly random errors of a balanced dataset is created, optimized according to
 Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a.
 On the HASYv2 dataset the class-ordering is necessary to see anything as most possible
 confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the
 unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a
-maximum size of 50×50are displayed, the ordered method can show only 8 matrices
+maximum size of50 ×50 are displayed, the ordered method can show only 8 matrices
 because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be
 displayed.
 
 5.2. Confusion Matrix Ordering
-Figure 5.12.: ThefirstimageshowstheconfusionmatrixforthetestofGTSRBsetafteroptimization
+Figure 5.12.:The first image shows the confusion matrix for the test of GTSRB set after optimization
 to Equation (4.1). The diagonal elements are set to 0 in order to make other elements
 easier to see. The symbols next to the label on the vertical axis indicate the shape
 and the color of the signs.
 The second image shows the same, but with baseline model.
-Best viewed in electronic form.
-Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal
+Best viewed in electronic form. 
+Figure 5.13.:The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal
 elements are set to 0 in order to make other elements easier to see. The top image
 shows arbitrary class ordering, the bottom image shows the optimized ordering.
 5.3. Spectral Clustering vs CMO
 5.3. Spectral Clustering vs CMO
 This section evaluates the clustering quality of CMO in comparison to the clustering quality
 of spectral clustering.
-The evaluated model achieves 70.50 %training accuracy and 53.16 %test accuracy on
+The evaluated model achieves70.50 % training accuracy and 53.16 % test accuracy on
 CIFAR-100. Figure 5.14 shows the sorted confusion matrix.
-Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The
+Figure 5.14.:The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The
 diagonal elements are set to 0 in order to make other elements easier to see. Best
 viewed in electronic form.
 CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters
-which are to be found. The number of errors is determined by (i) Join all nclusters which
-contain the classes of the coarse class Cto a setM. The error is n. (ii) Within M, find the
-set of classes M−which do not belong to C. (iii) The final error is n+|M−|. As can be
+which are to be found. The number of errors is determined by (i) Join alln clusters which
+contain the classes of the coarse classC to a setM. The error isn. (ii) WithinM, find the
+set of classesM−which do not belong toC. (iii) The final error isn + |M−|. As can be
 seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has
 only half the error of spectral clustering.
 The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be
@@ -1559,101 +1677,109 @@ based on CMO as described in Section 4.2.
 5. Experimental Evaluation
 Cluster Spectral clustering Errors CMO Errors
 fish aquarium fish, orchid + flatfish
-+ ray, shark + trout, lion5 aquarium fish, orchid + flatfish
-+ ray + shark, trout4
++ ray, shark + trout, lion
+5 aquarium fish, orchid + flatfish
++ ray + shark, trout
+4
 flowers orchid, aquarium fish + sunflower
  + poppy, tulip + rose,
-train5orchid, aquarium fish + sunflower,
- poppy, tulip, rose2
+train
+5 orchid, aquarium fish + sunflower,
+ poppy, tulip, rose
+2
 people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0
 reptiles crocodile, plain, road, table,
 wardrobe + dinosaur + lizard
-+ snake, worm + turtle9crocodile, lizard, lobster, caterpillar
++ snake, worm + turtle
+9 crocodile, lizard, lobster, caterpillar
  + dinosaur + snake + turtle,
- crab6
+ crab
+6
 trees maple, oak, pine+willow, forest
-+ palm3 palm, willow, pine, maple, oak 0
++ palm
+3 palm, willow, pine, maple, oak 0
 Total 24 12
-Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by ,
-whereas clusters are separated by +.
+Table 5.4.:Differences in spectral clustering and CMO. Classes in a cluster are separated by,
+whereas clusters are separated by+.
 Cluster Spectral clustering Errors CMO Errors
-AA,A,A 0A,A,A, Å 1
-BB,B 0B,B 0
-CC,c,⊂andC,ξ,EandC 4C,c,⊂,CandC 1
-DD,D,D,⊿ 1D,D,D 0
-EEandE,ε 2EandE,ε,ϵ,∈ 4
-FFandF,F 1FandF,F 1
-HHandH,κandH 3HandH,H 1
-KK,κ 0K,κ 0
-LL,⌊andL,L 1L,⌊andL,L 1
-MMandMandM 2Mandµ,MandM 3
-NNandN,NandN 2NandN,NandN,ℵ 3
-OO,O,0,◦,°,#ando 1O,O,0,◦,°and#ando 2
-PP,Pandp,ρandPand℘ 3PandP,P,℘andp,ρ 2
-QQ,Q,Q,ι,⊔,≳,ℓ,ℑ, Æ, 1 7QandQ,Q 1
-RR,RandR,R,kandℜ 3Randℜ,R,R,R 1
-SS,s,S 0S,s,S 0
-TT,⊤andT,τ 1T,⊤andT,τ 1
-UU,∪andu,U,A 1U,u,U,Aand∪ 2
-VV,v,∨ 0V,v,∨ 0
-WW,w,ω 0W,wandω 1
-XX,x,X,χ,× 0X,x,X,χ,× 0
-YYandy 1Y,y 0
-ZZ,z,ZandZ,Z 1Z,z,Z,Z,Z 0
+A A, A, A 0 A, A, A , Å 1
+B B, B 0 B, B 0
+C C, c, ⊂and C, ξ, E and C 4 C, c, ⊂, Cand C 1
+D D, D, D, ⊿ 1 D, D, D 0
+E E and E, ε 2 E and E, ε, ϵ, ∈ 4
+F F and F, F 1 F and F, F 1
+H H and H , κ and H 3 H and H, H 1
+K K, κ 0 K, κ 0
+L L, ⌊and L, L 1 L, ⌊and L, L 1
+M M and Mand M 2 M and µ, Mand M 3
+N N and N, N and N 2 N and N, N and N, ℵ 3
+O O, O, 0, ◦, °, # and o 1 O, O, 0, ◦, ° and # and o 2
+P P, Pand p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2
+Q Q, Q, Q, ι, ⊔, ≳, ℓ, ℑ, Æ,1 7 Q and Q, Q 1
+R R, Rand R, R, k and ℜ 3 R and ℜ, R, R, R 1
+S S, s, S 0 S, s, S 0
+T T, ⊤and T, τ 1 T, ⊤and T, τ 1
+U U, ∪and u, U, A 1 U, u, U, A and ∪ 2
+V V, v, ∨ 0 V, v, ∨ 0
+W W, w, ω 0 W, w and ω 1
+X X, x, X, χ, × 0 X, x, X, χ, × 0
+Y Y and y 1 Y, y 0
+Z Z, z, Zand Z, Z 1 Z, z, Z, Z, Z 0
 Total 34 25
 Table 5.5.: Differences in spectral clustering and CMO.
 
 5.4. Hierarchy of Classifiers
 5.4. Hierarchy of Classifiers
 In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root
-classifier achieves an accuracy of 65.29 %with test-time transformations. The accuracy on
+classifier achieves an accuracy of65.29 % with test-time transformations. The accuracy on
 the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better
 results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either
-be due to limited training data, overfitting or the small size of 32 px×32 pxof the data.
+be due to limited training data, overfitting or the small size of32 px×32 px of the data.
 The experiment also shows that most of the errors are due to not identifying the correct
 cluster. Hence, in this case, more work in improving the root classifier is necessary rather
 than improving the discrimination of classes within a cluster.
 Although the classes within a cluster capture most of the classifications, many misclassifications
  happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would
-push the accuracy in the fullcolumn only to 63.50 %due to errors of the root classifier
+push the accuracy in thefull column only to63.50 % due to errors of the root classifier
 where the root classifier does not predict the correct cluster.
 The leaf classifiers use the same topology as the root classifier. By initializing them with
-the root classifiers weights their performance can be pushed at about the inneraccuracy.
-They are, however, only useful if their accuracy is well above the inneraccuracy of the root
+the root classifiers weights their performance can be pushed at about theinner accuracy.
+They are, however, only useful if their accuracy is well above theinner accuracy of the root
 classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful.
-Cluster Classesaccuracy
+Cluster Classes
+accuracy
 root classifier leaf classifier
 cluster identified class identified | cluster class identified | cluster
-1 3 69.67 % 84 .27 % 72.98 %
-2 5 46.60 % 58 .54 % 43.47 %
-3 2 58.50 % 92 .13 % 83.46 %
-4 2 50.50 % 87 .83 % 81.74 %
-5 3 44.67 % 79 .29 % 71.01 %
-6 2 29.50 % 78 .67 % 72.00 %
-7 2 52.50 % 92 .11 % 87.72 %
-8 2 59.50 % 86 .23 % 81.88 %
-9 2 59.00 % 90 .08 % 87.79 %
-10 2 62.00 % 85 .52 % 73.10 %
-11 2 67.00 % 87 .01 % 75.32 %
-12 2 72.50 % 94 .77 % 76.77 %
-13 2 64.00 % 82 .58 % 86.27 %
-14 2 79.67 % 89 .85 % 89.10 %
-Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on
-14 clusters of classes. Each class has 100 elements to test. The column cluster identified
+1 3 69.67 % 84 .27 % 72 .98 %
+2 5 46.60 % 58 .54 % 43 .47 %
+3 2 58.50 % 92 .13 % 83 .46 %
+4 2 50.50 % 87 .83 % 81 .74 %
+5 3 44.67 % 79 .29 % 71 .01 %
+6 2 29.50 % 78 .67 % 72 .00 %
+7 2 52.50 % 92 .11 % 87 .72 %
+8 2 59.50 % 86 .23 % 81 .88 %
+9 2 59.00 % 90 .08 % 87 .79 %
+10 2 62.00 % 85 .52 % 73 .10 %
+11 2 67.00 % 87 .01 % 75 .32 %
+12 2 72.50 % 94 .77 % 76 .77 %
+13 2 64.00 % 82 .58 % 86 .27 %
+14 2 79.67 % 89 .85 % 89 .10 %
+Table 5.6.:Accuracies of the root classifier trained on the full set of 100 classes evaluated on
+14 clusters of classes. Each class has 100 elements to test. The columncluster identified
 gives the percentage that the root classifiers argmax prediction is within the correct
-cluster, but not necessarily the correct class. The columns class identified | cluster only
+cluster, but not necessarily the correct class. The columnsclass identified | clusteronly
 consider data points where the root classifier correctly identified the cluster.
 
 5. Experimental Evaluation
 5.5. Increased width for faster learning
 More filters in one layer could simplify the optimization problem as each filter needs smaller
-updates. Hence a CNN Nwithnifilters in layer iis expected to take more epochs than a
-CNNN′with 2·nifilters in layer ito achieve the same validation accuracy.
-This hypothesis can be falsified by training a CNN Nand a CNN N′and comparing the
+updates. Hence a CNNN with ni filters in layeri is expected to take more epochs than a
+CNN N′with 2 ·ni filters in layeri to achieve the same validation accuracy.
+This hypothesis can be falsified by training a CNNN and a CNNN′and comparing the
 trained number of epochs. As more filters can lead to different results depending on the
 layer where they are added, five models are trained. The details about those models are
 given in Table 5.7
-Name LayerFilter count Total
+Name Layer Filter count Total
 Baseline New parameters
 m9 9 64 638 5 978 566
 m′
@@ -1662,29 +1788,30 @@ m11 11 512 3786 5 982 698
 m′
 11 11 512 1024 1 731 980
 m13 13 512 8704 5 982 092
-Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer
+Table 5.7.:Models which are identical to the baseline, except that the number of filters of one layer
 was increased.
 The detailed results are given in Table 5.8. As expected, the number of training epochs of
 the models with increased numbers of parameters is lower. The wall-clock time, however, is
 higher due to the increase in computation per forward- and backward-pass.
-Form9,m11andm13, the filter weight range of the layer with increased capacity decreases
+For m9, m11 and m13, the filter weight range of the layer with increased capacity decreases
 compared to Figure 5.6, the filter weights of the layer with increased capacity are more
-concentrated around zero compared to Figure 5.2. For model m13, the distribution of
+concentrated around zero compared to Figure 5.2. For modelm13, the distribution of
 weight of the output layer changed to a more bell-shaped distribution. Except for this, the
 distribution of filter weights in other layers did not change for all three models compared to
 the baseline.
-Model ParametersAccuracy Training
+Model Parameters
+Accuracy Training
 Single Model Ensemble Mean Epochs Mean Time
 Mean std
-baseline 944 012 63 .38 %0.55 64.70 % 154.7 3856 s
-m9 5 978 566 65 .53 %0.37 66.72 % 105.7 4472 s
+baseline 944 012 63 .38 % 0.55 64.70 % 154.7 3856 s
+m9 5 978 566 65 .53 % 0.37 66.72 % 105.7 4472 s
 m′
-9 8 925 622 65 .10 %1.09 66.54 % 95.6 5261 s
-m11 5 982 698 65.73 %0.77 67.38 % 149.2 5450 s
+9 8 925 622 65 .10 % 1.09 66.54 % 95.6 5261 s
+m11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s
 m′
-11 1 731 980 62 .12 %0.48 62.89 % 143.6 3665 s
-m13 5 982 092 62 .39 %0.66 63.77 % 147.8 4485 s
-Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9,m11,m13
+11 1 731 980 62 .12 % 0.48 62.89 % 143.6 3665 s
+m13 5 982 092 62 .39 % 0.66 63.77 % 147.8 4485 s
+Table 5.8.:Training time in epochs and wall-clock time for the baseline and modelsm9, m11, m13
 as well as their accuracies.
 
 5.6. Weight updates
@@ -1705,28 +1832,31 @@ the mean weight updates of layers 1 and 3 are higher, the range of the mean weig
 from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher.
 For the maximum and the sum, no similar pattern could be observed (see Figures A.3
 and A.4).
-Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but
+Figure 5.15.:Mean weight updates between epochs by layer. The model is the baseline model, but
 with layer 5 reduced to 3 filters.
 
 5. Experimental Evaluation
 5.7. Multiple narrow layers vs One wide layer
 On a given feature map size one can have an arbitrary number of convolutional layers with
-SAMEpadding and each layer can have an arbitrary number of filters. A convolutional layer
-with more filters is called wider[ZK16], a convolutional layer with fewer filters is thus called
-narrower and the number of filters in a convolutional layer is the layers width.
+SAME padding and each layer can have an arbitrary number of filters. A convolutional layer
+with more filters is calledwider [ZK16], a convolutional layer with fewer filters is thus called
+narrower and the number of filters in a convolutional layer is the layerswidth.
 If the number of parameters which may be used for the feature map scale is fixed and high
-enough, there are still many combinations. If niwithi= 0,...,kis the number of output
-feature maps of layer iwherei= 0is the input layer and all filters are 3×3filters without
+enough, there are still many combinations. Ifni with i= 0,...,k is the number of output
+feature maps of layeri where i= 0 is the input layer and all filters are3 ×3 filters without
 a bias, then the number of parameters is
-Parameters =k∑
-i=1(
-(ni−1·32+ 1)·ni)
+Parameters =
+k∑
+i=1
+(
+(ni−1 ·32 + 1) ·ni
+)
 Hence the width of one layer does not only influence the parameters in this layer, but also
 in the next layer.
 The number of possible subsequent layers of one feature map size is enormous, even if
 constraints are placed on the number of parameters. For example, the first convolutional
 layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per
-layer are not desirable, one keeps all layers having a bias and all layers only use 3×3filters,
+layer are not desirable, one keeps all layers having a bias and all layers only use3 ×3 filters,
 then the maximum depth is 10. If one furthermore assumes that at least 800 parameters
 should be used, there are still 120 possible layer combinations. As experimentally evaluating
 one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible
@@ -1734,52 +1864,52 @@ to evaluate all layer combinations. In the following, a couple of changes to the
 width / depth will be evaluated.
 Each layer expands the perceptive field. Hence deeper layer can use more of the input for
 every single output value. But deeper networks need more time for inference as the output
-of layerihas to be computed before the output of i+ 1can be computed. Hence there is
+of layeri has to be computed before the output ofi+ 1 can be computed. Hence there is
 less potential to parallelize computations. Each filter can be seen as a concept which can
 be learned. The deeper the filter is in the network, the higher is the abstraction level of the
 concept. In most cases, both is necessary: Many different concepts (width) and high-level
 concepts (depth).
 Reducing the two first convolutional layers of the baseline model (see Page 39) to one
-convolutional layer of 48 filters ( 944 396parameters in total, whereas the baseline model
-has944 012parameters) resulted in a mean accuracy of 61.64 %(-1.74 %) and a standard
-deviation of σ= 1.12(+0.57). The ensemble achieved 63.18 %(-1.52 %). As expected,
-the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 sof
-the baseline model to 15 sof the model with one less convolutional layer, one less Batch
-Normalization and one less activation layer. The inference time was also reduced from 6 ms
+convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model
+has 944 012 parameters) resulted in a mean accuracy of61.64 % (-1.74 %) and a standard
+deviation of σ = 1.12 (+0.57). The ensemble achieved63.18 % (-1.52 %). As expected,
+the training time per epoch was reduced. For the GTX 980, it was reduced from22.0 s of
+the baseline model to15 s of the model with one less convolutional layer, one less Batch
+Normalization and one less activation layer. The inference time was also reduced from6 ms
 
 5.8. Batch Normalization
-to4 msfor 1 image and from 32 msto23 msfor 128 images. Due to the loss in accuracy of
+to 4 ms for 1 image and from32 ms to 23 ms for 128 images. Due to the loss in accuracy of
 more then one percentage point of the mean model and the increased standard deviation of
-the models performance, at least two convolutional layers are on the 32 px×32 pxfeature
+the models performance, at least two convolutional layers are on the32 px×32 px feature
 map scale are recommendable for CIFAR-100.
 Changing the baseline to have less filters but more layers is another option. This was tried
-for the first block at the 32 px×32 pxfeature map scale. The two convolutional layers
+for the first block at the32 px×32 px feature map scale. The two convolutional layers
 (layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one
-convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model
-has944 132parameters. Compared to the baseline model, the time for inference was the
+convolutional layer with 26 filters in theconvolution - BN - ELU pattern. The model
+has 944 132 parameters. Compared to the baseline model, the time for inference was the
 same. This is unexpected, because the inference time changed when a layer was removed at
-this scale. The mean test accuracy was 63.66 %(+0.28) and the standard deviation was
-σ= 1.03(+0.48). The ensemble achieved 64.91 %test accuracy (+0.21).
+this scale. The mean test accuracy was63.66 % (+0.28) and the standard deviation was
+σ= 1.03 (+0.48). The ensemble achieved64.91 % test accuracy (+0.21).
 Having two nonlinearities at each feature map scale could be important to learn nonlinear
 transformations at that scale. As the baseline model does only have one nonlinearity at the
-8×8feature maps scale, another convolutional layer with 64 filters, Batch Normalization
+8 ×8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization
 and ELU was added. To keep the number of parameters constant, layer 11 of the baseline
 model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy
-of63.09 %(-0.29) with a standard deviation of σ= 0.70(+0.15). The ensemble achieves
-an accuracy of 64.39 %(+0.31). This could indicate that having two convolutional layers
+of 63.09 % (-0.29) with a standard deviation ofσ= 0.70 (+0.15). The ensemble achieves
+an accuracy of64.39 % (+0.31). This could indicate that having two convolutional layers
 is more important for layers close to the input than intermediate layer. Alternatively, the
 parameters could be more important in layer 11 than having a new convolutional layer after
 layer 9.
 In order to control the hypothesis that having two convolutional layers are less important in
-the middle of a network, the second convolutional layer at the 16×16feature map scale is
+the middle of a network, the second convolutional layer at the16 ×16 feature map scale is
 removed. The first convolutional layer was increased from 32 filters to 59 filters, the second
 convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of
-parameters of the model constant. The adjusted model achieved 62.72 %(-0.66) mean test
-accuracy with a standard deviation of σ= 0.84(+0.29). The ensemble achieved 63.88 %
+parameters of the model constant. The adjusted model achieved62.72 % (-0.66) mean test
+accuracy with a standard deviation ofσ= 0.84 (+0.29). The ensemble achieved63.88 %
 test accuracy (-0.66).
-Even more extreme, if both convolutional layers are removed from the 16×16feature map
-scale, the mean test accuracy drops to 61.21 %(-2.17) with a standard deviation of σ= 0.51
-(-0.04). The ensemble achieves a test accuracy of 63.07 %(-1.63). Thus it is very important
+Even more extreme, if both convolutional layers are removed from the16 ×16 feature map
+scale, the mean test accuracy drops to61.21 % (-2.17) with a standard deviation ofσ= 0.51
+(-0.04). The ensemble achieves a test accuracy of63.07 % (-1.63). Thus it is very important
 to have at least one convolutional layer at this feature map scale.
 5.8. Batch Normalization
 In [CUH15], the authors write that Batch Normalization does not improve ELU networks.
@@ -1787,31 +1917,31 @@ Hence the effect of removing Batch Normalization from the baseline is investigat
 
 5. Experimental Evaluation
 experiment.
-As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn
-are identical to the baseline model m, except that in mno-bnthe Batch Normalization layers
+As before, 10 models are trained on CIFAR-100. The training setup and the modelmno-bn
+are identical to the baseline modelm, except that inmno-bn the Batch Normalization layers
 are removed.
-One notable difference is the training time: While mneeds 21 msper epoch in average on
-a GTX 980, mno-bnonly needs 21 msper epoch. The number of epochs used for training,
+One notable difference is the training time: Whilem needs 21 ms per epoch in average on
+a GTX 980,mno-bn only needs21 ms per epoch. The number of epochs used for training,
 however, also increased noticeably from 149 epochs to 178 epochs in average. The standard
-deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for mno-bn.
-The mean accuracy of mno-bnis62.86 %and hence 0.52 percentage points worse. The
+deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs formno-bn.
+The mean accuracy ofmno-bn is 62.86 % and hence 0.52 percentage points worse. The
 standard deviation between models increased from 0.55 to 0.61. This is likely a result of the
 early stopping policy and the differences in training epochs. This can potentially be fixed
 by retraining the models which stopped earlier than the model which was trained for the
-biggest amount of epochs. The ensemble test accuracy is 63.88 %and hence 0.82 percentage
+biggest amount of epochs. The ensemble test accuracy is63.88 % and hence 0.82 percentage
 points worse than the baseline.
 The filter weight range and distribution is approximately the same as Figure 5.6 and
 Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of
 the baseline are spread out in the first layer and much more concentrated in subsequent layers
 (see Figure 5.3), the model without Batch Normalization has rather concentrated weights
 in the first layers and only the bias weights of the last layer is spread out (see Figure A.2).
-Another model m′
-no-bnwhich has one more filter in the convolutional layer 1, 3, 5, and 7 to
+Another modelm′
+no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to
 compensate for the loss of parameters in Batch Normalization. The mean test accuracy of
-10 such models is 62.87 %which is 0.51 percentage points worse than the baseline. The
-ensemble of m′
-no-bnachieves 64.33 %which is 0.37 percentage points worse than the baseline.
-The mean training time was 14 sper epoch and 157.4 epochs with a standard deviation of
+10 such models is62.87 % which is 0.51 percentage points worse than the baseline. The
+ensemble ofm′
+no-bn achieves64.33 % which is 0.37 percentage points worse than the baseline.
+The mean training time was14 s per epoch and 157.4 epochs with a standard deviation of
 20.7 epochs.
 Hence it is not advisable to remove Batch Normalization for the final model. It could,
 however, be possible to remove Batch Normalization for the experiments to iterate quicker
@@ -1820,75 +1950,75 @@ Batch Normalization.
 
 5.9. Batch size
 5.9. Batch size
-The mini-batch size m∈N≥1influences
-•Epochs until convergence : The smaller m, the more often the model is updated
+The mini-batch sizem∈N≥1 influences
+• Epochs until convergence: The smallerm, the more often the model is updated
 in one epoch. Those updates, however, are based on fewer samples of the dataset.
 Hence the gradients of different mini-batches can noticeably differ. In the literature,
 this is referred to as gradient noise [KMN+16].
-•Training time per epoch : The smaller the batch size, the higher the training time
+• Training time per epoch: The smaller the batch size, the higher the training time
 per epoch as the hardware is not optimally utilized.
-•Resulting model quality : The choice of the hyperparameter minfluences the
-accuracy of the classifier when training is finished. [ KMN+16] supports the view that
-smallermresult in less sharp minima. Hence smaller mlead to better generalization.
+• Resulting model quality: The choice of the hyperparameter m influences the
+accuracy of the classifier when training is finished. [KMN+16] supports the view that
+smaller m result in less sharp minima. Hence smallerm lead to better generalization.
 Empiric evaluation results can be found in Table 5.9. Those results confirm the claim
 of [KMN+16] that lower batch sizes generalize better.
-mTrainingEpochsMean total Single model Ensemble
+m Training Epochs Mean total Single model Ensemble
 time training time Accuracy std Accuracy
-8118s
-epoch81–153 14 131 s 61 .93 %σ= 1.03 65.68 %
-16 62s
-epoch103 – 173 8349 s 64.16 %σ= 0.81 66.98 %
-32 35s
-epoch119 – 179 5171 s 64 .11 %σ= 0.75 65.89 %
-64 25s
-epoch133 – 195 2892 s 63.38 %σ= 0.55 64.70 %
-128 18s
-epoch145 – 239 3126 s 62 .23 %σ= 0.73 63.55 %
-Table 5.9.: Trainingtimeperepochandsinglemodeltestsetaccuracy(meanandstandarddeviation)
-of baseline models trained with different mini-batch sizes mon GTX 970 GPUs on
+8 118 s
+epoch 81 – 153 14 131 s 61 .93 % σ= 1.03 65 .68 %
+16 62 s
+epoch 103 – 173 8349 s 64.16 % σ= 0.81 66.98 %
+32 35 s
+epoch 119 – 179 5171 s 64 .11 % σ= 0.75 65 .89 %
+64 25 s
+epoch 133 – 195 2892 s 63.38 % σ= 0.55 64.70 %
+128 18 s
+epoch 145 – 239 3126 s 62 .23 % σ= 0.73 63 .55 %
+Table 5.9.:Training time per epoch and single model test set accuracy (mean and standard deviation)
+of baseline models trained with different mini-batch sizesm on GTX 970 GPUs on
 CIFAR-100.
 5.10. Bias
 Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a
-modelmno-biasis created which is identical to the baseline model m, except that the bias of
+model mno-bias is created which is identical to the baseline modelm, except that the bias of
 layers 11, 13 and 15 is removed.
-The mean test accuracy of 10 trained mno-biasis63.74 %which is an improvement of
-0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 %
+The mean test accuracy of 10 trainedmno-bias is 63.74 % which is an improvement of
+0.36 percentage points over the baseline. The ensemble achieves a test accuracy of65.13 %
 which is 0.43 percentage points better than the baseline. Hence the bias can safely be
 removed.
 Removing the biases did not have a noticeable effect on the filter weight range, the filter
-weight distribution or the distribution of the remaining biases. Also, the γandβparameters
+weight distribution or the distribution of the remaining biases. Also, theγ and β parameters
 of the Batch Normalization layers did not noticeably change.
 
 5. Experimental Evaluation
 5.11. Learned Color Space Transformation
-In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1×1
-directly after the input and then another convolutional layer with 3 filters of size 1×1acts
+In [MSM16] it is described that placing one convolutional layer with 10 filters of size1 ×1
+directly after the input and then another convolutional layer with 3 filters of size1 ×1 acts
 as a learned transformation in another color space and boosts the accuracy.
 This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU activation
  and 10 filters followed by another convolutional layer with ELU activation and
-3 filters. The mean accuracy of 10 models was 63.31 %with a standard deviation of 1.37.
+3 filters. The mean accuracy of 10 models was63.31 % with a standard deviation of 1.37.
 The standard deviation is noticeable higher than the standard deviation of the baseline
 model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of
-the ensemble is at 64.77 %and hence 0.07 percentage points higher than the accuracy of
+the ensemble is at64.77 % and hence 0.07 percentage points higher than the accuracy of
 the baseline models.
 The inference time for 1 image and for 128 images did not change compared to the baseline.
-The training time per epoch increased from 26 sto30 son the GTX 970.
+The training time per epoch increased from26 s to 30 s on the GTX 970.
 Hence it is not advisable to use the learned color space transformation.
 5.12. Pooling
-An alternative to max pooling with stride 2 with a 2×2kernel is using a 3×3kernel with
+An alternative to max pooling with stride 2 with a2 ×2 kernel is using a3 ×3 kernel with
 stride 2.
 This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the
-3×3kernel max pooling (and SAMEpadding). The mean accuracy of 10 models was 63.32 %
-(−0.06) and the standard deviation was 0.57 ( +0.02). The ensemble achieved 65.15 %test
-accuracy ( +0.45).
-The training time per epoch decreased from 20.5 s-21.1 sto18.6 s(mean of 10 training runs)
-on the Nvidia GTX 970. The time for inference increased from 25 msto26 msfor a batch
+3 ×3 kernel max pooling (andSAME padding). The mean accuracy of 10 models was63.32 %
+(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved65.15 % test
+accuracy (+0.45).
+The training time per epoch decreased from20.5 s-21.1 s to 18.6 s (mean of 10 training runs)
+on the Nvidia GTX 970. The time for inference increased from25 ms to 26 ms for a batch
 of 128 images.
 5.13. Activation Functions
-Nonlinear, differentiableactivationfunctionsareimportantforneuralnetworkstoallowthem
+Nonlinear, differentiable activation functions are important for neural networks to allow them
 to learn nonlinear decision boundaries. One of the simplest and most widely used activation
-functions for CNNs is ReLU [ KSH12], but others such as ELU [ CUH15], parametrized
-rectified linear unit (PReLU) [ HZRS15b ], softplus [ ZYL+15] and softsign [ BDLB09 ] have
+functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized
+rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+15] and softsign [BDLB09] have
 been proposed. The baseline uses ELU.
 
 5.13. Activation Functions
@@ -1896,16 +2026,16 @@ Activation functions differ in the range of values and the derivative. The defin
 other comparisons of eleven activation functions are given in Table B.3.
 Theoretical explanations why one activation function is preferable to another in some
 scenarios are the following:
-•Vanishing Gradient : Activation functions like tanh and the logistic function saturate
- outside of the interval [−5,5]. This means weight updates are very small for
+• Vanishing Gradient: Activation functions like tanh and the logistic function saturate
+ outside of the interval[−5,5]. This means weight updates are very small for
 preceding neurons, which is especially a problem for very deep or recurrent networks as
-described in [ BSF94]. Even if the neurons learn eventually, learning is slower [ KSH12].
-•Dying ReLU : The dying ReLU problem is similar to the vanishing gradient problem.
+described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12].
+• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem.
 The gradient of the ReLU function is 0 for all non-positive values. This means if all
 elements of the training set lead to a negative input for one neuron at any point in the
 training process, this neuron does not get any update and hence does not participate
 in the training process. This problem is addressed in [MHN13].
-•Mean unit activation : Some publications like [ CUH15,IS15] claim that mean
+• Mean unit activation: Some publications like [CUH15, IS15] claim that mean
 unit activations close to 0 are desirable. They claim that this speeds up learning
 by reducing the bias shift effect. The speedup of learning is supported by many
 experiments. Hence the possibility of negative activations is desirable.
@@ -1918,14 +2048,14 @@ tanh and softplus performed worse than the identity and it is unclear why the pu
 network performed so much better than the logistic function. One hypothesis why the
 logistic function performs so bad is that it cannot produce negative outputs. Hence the
 logistic−function was developed:
-logistic−(x) =1
-1 +e−x−0.5
+logistic−(x) = 1
+1 + e−x −0.5
 The logistic−function has the same derivative as the logistic function and hence still suffers
 from the vanishing gradient problem. The network with the logistic−function achieves an
-accuracy which is 11.30 %better than the network with the logistic function, but is still
-5.54 %worse than the ELU.
+accuracy which is11.30 % better than the network with the logistic function, but is still
+5.54 % worse than the ELU.
 Similarly, ReLU was adjusted to have a negative output:
-ReLU−(x) = max(−1,x) =ReLU (x+ 1)−1
+ReLU−(x) = max(−1,x) = ReLU(x+ 1) −1
 The results of ReLU−are much worse on the training set, but perform similar on the test
 
 5. Experimental Evaluation
@@ -1935,16 +2065,18 @@ This contradicts [GBB11, SMGS14].
 A key difference between the logistic−function and ELU is that ELU does neither suffers
 from the vanishing gradient problem nor is its range of values bound. For this reason, the
 S2ReLU activation function, defined as
-S2ReLU (x) =ReLU (x
-2+ 1)−ReLU (−x
-2+ 1) =
+S2ReLU(x) = ReLU(x
+2 + 1) −ReLU(−x
+2 + 1) =
+
 
-−x
-2+ 1ifx≤−2
-x if−2≤x≤2
+
+−x
+2 + 1 if x≤−2
+x if −2 ≤x≤2
 x
-2+ 1ifx>−2
-This function is similar to SReLUs as introduced in [ JXF+16]. The difference is that S2ReLU
+2 + 1 if x> −2
+This function is similar to SReLUs as introduced in [JXF+16]. The difference is that S2ReLU
 does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be
 the identity close to zero and have a smaller absolute value than the identity farther away.
 It is easy to compute and easy to implement.
@@ -1956,11 +2088,11 @@ test accuracy.
 Function Vanishing Gradient Negative Activation possible Bound activation
 Identity No Yes No
 Logistic Yes No Yes
-Logistic−Yes Yes Yes
+Logistic− Yes Yes Yes
 Softmax Yes Yes Yes
 tanh Yes Yes Yes
 Softsign Yes Yes Yes
-ReLU Yes1No Half-sided
+ReLU Yes1 No Half-sided
 Softplus No No Half-sided
 S2ReLU No Yes No
 LReLU/PReLU No Yes No
@@ -1969,189 +2101,201 @@ Table 5.10.: Properties of activation functions.
 1The dying ReLU problem is similar to the vanishing gradient problem.
 
 5.13. Activation Functions
-FunctionSingle model Ensemble of 10
+Function Single model Ensemble of 10
 Training set Test set Training set Test set
-Identity 66.25 %σ= 0.77 56.74 %σ= 0.51 68.77 % 58 .78 %
-Logistic 51.87 %σ= 3.64 46.54 %σ= 3.22 61.19 % 54 .58 %
-Logistic−66.49 %σ= 1.99 57.84 %σ= 1.15 69.04 % 60 .10 %
-Softmax 75.22 %σ= 2.41 59.49 %σ= 1.25 78.87 % 63 .06 %
-Tanh 67.27 %σ= 2.38 55.70 %σ= 1.44 70.21 % 58 .10 %
-Softsign 66.43 %σ= 1.74 55.75 %σ= 0.93 69.78 % 58 .40 %
-ReLU 78.62 %σ= 2.15 62.18 %σ= 0.99 81.81 % 64 .57 %
-ReLU−76.01 %σ= 2.31 62.87 %σ= 1.08 78.18 % 64 .81 %
-Softplus 66.75 %σ= 2.45 56.68 %σ= 1.32 71.27 % 60 .26 %
-S2ReLU 63.32 %σ= 1.69 56.99 %σ= 1.14 65.80 % 59 .20 %
-LReLU 74.92 %σ= 2.49 61.86 %σ= 1.23 77.67 % 64 .01 %
-PReLU 80.01 %σ= 2.03 62.16 %σ= 0.73 83.50 % 64.79 %
-ELU 76.64 %σ= 1.48 63.38 %σ= 0.55 78.30 % 64 .70 %
-Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation
-functions on CIFAR-100. For LReLU, α= 0.3was chosen.
-FunctionInference per TrainingEpochsMean total
+Identity 66.25 % σ= 0.77 56.74 % σ= 0.51 68 .77 % 58 .78 %
+Logistic 51.87 % σ= 3.64 46 .54 % σ= 3.22 61 .19 % 54 .58 %
+Logistic− 66.49 % σ= 1.99 57 .84 % σ= 1.15 69 .04 % 60 .10 %
+Softmax 75.22 % σ= 2.41 59 .49 % σ= 1.25 78 .87 % 63 .06 %
+Tanh 67.27 % σ= 2.38 55 .70 % σ= 1.44 70 .21 % 58 .10 %
+Softsign 66.43 % σ= 1.74 55 .75 % σ= 0.93 69 .78 % 58 .40 %
+ReLU 78.62 % σ= 2.15 62 .18 % σ= 0.99 81 .81 % 64 .57 %
+ReLU− 76.01 % σ= 2.31 62 .87 % σ= 1.08 78 .18 % 64 .81 %
+Softplus 66.75 % σ= 2.45 56 .68 % σ= 1.32 71 .27 % 60 .26 %
+S2ReLU 63.32 % σ= 1.69 56 .99 % σ= 1.14 65 .80 % 59 .20 %
+LReLU 74.92 % σ= 2.49 61 .86 % σ= 1.23 77 .67 % 64 .01 %
+PReLU 80.01 % σ= 2.03 62 .16 % σ= 0.73 83.50 % 64 .79 %
+ELU 76.64 % σ= 1.48 63.38 % σ= 0.55 78 .30 % 64 .70 %
+Table 5.11.:Training and test accuracy of adjusted baseline models trained with different activation
+functions on CIFAR-100. For LReLU,α= 0.3 was chosen.
+Function Inference per Training Epochs Mean total
 1 Image 128 time training time
-Identity 8 ms 42 ms 31s
-epoch108 –148 3629 s
-Logistic 6 ms 31 ms 24s
-epoch101– 167 2234 s
-Logistic−6 ms 31 ms 22s
-epoch133 – 255 3421 s
-Softmax 7 ms 37 ms 33s
-epoch127 – 248 5250 s
-Tanh 6 ms 31 ms 23s
-epoch125 – 211 3141 s
-Softsign 6 ms 31 ms 23s
-epoch122 – 205 3505 s
-ReLU 6 ms 31 ms 23s
-epoch118 – 192 3449 s
-Softplus 6 ms 31 ms 24s
-epoch101– 165 2718 s
-S2ReLU 5 ms 32 ms 26s
-epoch108 – 209 3231 s
-LReLU 7 ms 34 ms 25s
-epoch109 – 198 3388 s
-PReLU 7 ms 34 ms 28s
-epoch131 – 215 3970 s
-ELU 6 ms 31 ms 23s
-epoch146 – 232 3692 s
-Table 5.12.: Training time and inference time of adjusted baseline models trained with different
+Identity 8 ms 42 ms 31 s
+epoch 108 –148 3629 s
+Logistic 6 ms 31 ms 24 s
+epoch 101 – 167 2234 s
+Logistic− 6 ms 31 ms 22 s
+epoch 133 – 255 3421 s
+Softmax 7 ms 37 ms 33 s
+epoch 127 – 248 5250 s
+Tanh 6 ms 31 ms 23 s
+epoch 125 – 211 3141 s
+Softsign 6 ms 31 ms 23 s
+epoch 122 – 205 3505 s
+ReLU 6 ms 31 ms 23 s
+epoch 118 – 192 3449 s
+Softplus 6 ms 31 ms 24 s
+epoch 101 – 165 2718 s
+S2ReLU 5 ms 32 ms 26 s
+epoch 108 – 209 3231 s
+LReLU 7 ms 34 ms 25 s
+epoch 109 – 198 3388 s
+PReLU 7 ms 34 ms 28 s
+epoch 131 – 215 3970 s
+ELU 6 ms 31 ms 23 s
+epoch 146 – 232 3692 s
+Table 5.12.:Training time and inference time of adjusted baseline models trained with different
 activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the
 identity is the fastest function. This result is likely an implementation specific problem
 of Keras 2.0.4 or Tensorflow 1.1.0.
 
 5. Experimental Evaluation
-FunctionSingle model Ensemble Epochs
+Function Single model Ensemble Epochs
 Accuracy std Accuracy Range Mean
-Identity 99.45 %σ= 0.09 99.63 %55 – 77 62.2
-Logistic 97.27 %σ= 2.10 99.48 %37– 7654.5
-Softmax 99.60 %σ= 0.03 99.63 %44 – 73 55.6
-Tanh 99.40 %σ= 0.09 99.57 %56 – 80 67.6
-Softsign 99.40 %σ= 0.08 99.57 %72 – 101 84.0
-ReLU 99.62 %σ= 0.04 99.73 %51 – 94 71.7
-Softplus 99.52 %σ= 0.05 99.62 %62 –7068.9
-PReLU 99.57 %σ= 0.07 99.73 %44 – 89 71.2
-ELU 99.53 %σ= 0.06 99.58 %45 – 111 72.5
-Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions
+Identity 99.45 % σ= 0.09 99 .63 % 55 – 77 62.2
+Logistic 97.27 % σ= 2.10 99 .48 % 37 – 76 54.5
+Softmax 99.60 % σ= 0.03 99.63 % 44 – 73 55.6
+Tanh 99.40 % σ= 0.09 99 .57 % 56 – 80 67.6
+Softsign 99.40 % σ= 0.08 99 .57 % 72 – 101 84.0
+ReLU 99.62 % σ= 0.04 99.73 % 51 – 94 71.7
+Softplus 99.52 % σ= 0.05 99 .62 % 62 – 70 68.9
+PReLU 99.57 % σ= 0.07 99.73 % 44 – 89 71.2
+ELU 99.53 % σ= 0.06 99 .58 % 45 – 111 72.5
+Table 5.13.:Test accuracy of adjusted baseline models trained with different activation functions
 on MNIST.
 5.14. Label smoothing
-Ensembles consisting of nmodels trained by the same procedure on the same data but
+Ensembles consisting ofn models trained by the same procedure on the same data but
 initialized with different weights and trained with a different order of the training data
 perform consistently better than single models. One drawback of ensembles in applications
-such as self-driving cars is that they increase the computation by a factor of n. One idea
+such as self-driving cars is that they increase the computation by a factor ofn. One idea
 why they improve the test accuracy is by reducing the variance.
 The idea of label smoothing is to use the ensemble prediction of the training data as labels
-for another classifier. For every element xof the training set, the one-hot encoded target
-t(x)is smoothed by the ensemble prediction yE(x)
-t′(x) =α·t(x) + (1−α)yE(x)
-whereα∈[0,1]is the smoothing factor.
+for another classifier. For every elementx of the training set, the one-hot encoded target
+t(x) is smoothed by the ensemble predictionyE(x)
+t′(x) = α·t(x) + (1−α)yE(x)
+where α∈[0,1] is the smoothing factor.
 There are three reasons why label smoothing could be beneficial:
-•Training speed : The ensemble prediction contains more information about the
+• Training speed: The ensemble prediction contains more information about the
 image than binary class decisions. Classifiers in computer vision predict how similar
 the input looks to other input of the classes they are trained on. By smoothing the
 labels, the information that one image could also belong to another class is passed to
 the optimizer. In early stages of the optimization this could lead to a lower loss on
 the non-smoothed validation set.
-•Higher accuracy : Using smoothed labels for the optimization could lead to a higher
+• Higher accuracy: Using smoothed labels for the optimization could lead to a higher
 accuracy of the base-classifier due to a smoothed error surface. It might be less likely
 
 5.14. Label smoothing
 that the classifier gets into bad local minima.
-•Label noise : Depending on the way how the labels are obtained, it might not always
+• Label noise: Depending on the way how the labels are obtained, it might not always
 be clear which label is the correct one. Also, labeling errors can be present in training
 datasets. Those errors severely harm the training. By smoothing the labels errors
 could be relaxed.
-10 models msmoothare trained with the α= 0.5smoothed labels from the prediction
+10 models msmooth are trained with the α = 0 .5 smoothed labels from the prediction
 of an ensemble of 10 baseline models. The mean accuracy of the models trained on the
-smoothed training set labels was 63.61 %(+0.23 %) and the standard deviation was σ= 0.72
-(+0.17 %). The ensemble of 10 msmoothmodels achieved 64.79 %accuracy ( +0.09 %). Hence
+smoothed training set labels was63.61 % (+0.23 %) and the standard deviation wasσ= 0.72
+(+0.17 %). The ensemble of 10msmooth models achieved64.79 % accuracy (+0.09 %). Hence
 the effect of this kind of label smoothing on the final accuracy is questionable.
 The training speed didn’t noticeably change either: The number of trained epochs ranged
 from 144 to 205, the mean number of epochs was 177. The baseline training ranged from
 146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training
 methods accuracy differed by less than one percentage point. Hence it is unlikely that label
 smoothing has a positive effect on the training speed.
-Hinton et al. called this method distillation in [HVD15]. Hinton et al. used smooth and
+Hinton et al. called this methoddistillation in [HVD15]. Hinton et al. used smooth and
 hard labels for training, this work only used smoothed labels.
 
 5. Experimental Evaluation
 5.15. Optimized Classifier
 In comparison to the baseline classifier, the following changes are applied to the optimized
 classifier:
-•Remove the bias for the last layers : For all layers which output a 1×1feature
+• Remove the bias for the last layers: For all layers which output a1 ×1 feature
 map, the bias is removed
-•Increase the max pooling kernel to 3×3
-•More filters in the first layers
+• Increase the max pooling kernel to3 ×3
+• More filters in the first layers
 The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation
 is given in Table 5.15 and the timing comparison is given in Table 5.16.
 # Type Filters @
-Patch size / strideParameters FLOPs Output size
-Input 0 0 3@32 ×32
-1 Convolution 69@ 3×3×3/1 1932 3744768 69@32×32
-2 BN + ELU 138 353418 69@32×32
-3 Convolution 69@ 3×3×32/1 42918 37684096 69@32×32
-4 BN + ELU 138 353418 69@32×32
-Max pooling 2×2/2 0 40960 32@16 ×16
-5 Convolution 64@ 3×3×32/1 39808 20332544 64@16 ×16
-6 BN + ELU 128 82048 64@16 ×16
-7 Convolution 64@ 3×3×64/1 36928 18 857 984 64@16×16
-8 BN + ELU 128 82048 64@16 ×16
-Max pooling 2×2/2 20480 64@ 8 ×8
-9 Convolution 64@ 3×3×64/1 36928 4714496 64@ 8 ×8
-10 BN + ELU 128 20608 64@ 8 ×8
-Max pooling 2×2/2 5120 64@ 4 ×4
-11 Convolution (v) 512@ 4×4×64/1 524 288 1048064 512@ 1 ×1
-12 BN + ELU 1024 3584 512@ 1 ×1
-Dropout 0.5 0 0 512@ 1 ×1
-13 Convolution 512@ 1×1×512/1 262144 523776 512@ 1 ×1
-14 BN + ELU 1024 3584 512@ 1 ×1
-Dropout 0.5 0 0 512@ 1 ×1
-15 Convolution k @ 1×1×512/1 512·k 512·kk @ 1×1
-Global avg Pooling 1×1 0k k @ 1×1
-16 BN + Softmax 2k 7k k @ 1×1
+Patch size / stride
+Parameters FLOPs Output size
+Input 0 0 3@32 × 32
+1 Convolution 69@ 3 ×3 ×3 /1 1932 3744768 69 @32 × 32
+2 BN + ELU 138 353418 69 @32 × 32
+3 Convolution 69@ 3 ×3 ×32 /1 42918 37684096 69 @32 × 32
+4 BN + ELU 138 353418 69 @32 × 32
+Max pooling 2 ×2 /2 0 40960 32@16 × 16
+5 Convolution 64@ 3 ×3 ×32 /1 39808 20332544 64@16 × 16
+6 BN + ELU 128 82048 64@16 × 16
+7 Convolution 64@ 3 ×3 ×64 /1 36928 18 857 984 64@16 × 16
+8 BN + ELU 128 82048 64@16 × 16
+Max pooling 2 ×2 /2 20480 64@ 8 × 8
+9 Convolution 64@ 3 ×3 ×64 /1 36928 4714496 64@ 8 × 8
+10 BN + ELU 128 20608 64@ 8 × 8
+Max pooling 2 ×2 /2 5120 64@ 4 × 4
+11 Convolution (v) 512@ 4 ×4 ×64 /1 524 288 1048064 512@ 1 × 1
+12 BN + ELU 1024 3584 512@ 1 × 1
+Dropout 0.5 0 0 512@ 1 × 1
+13 Convolution 512@ 1 ×1 ×512 /1 262144 523776 512@ 1 × 1
+14 BN + ELU 1024 3584 512@ 1 × 1
+Dropout 0.5 0 0 512@ 1 × 1
+15 Convolution k @ 1 ×1 ×512 /1 512 ·k 512 ·k k @ 1 × 1
+Global avg Pooling 1 ×1 0 k k @ 1 × 1
+16 BN + Softmax 2k 7k k @ 1 × 1
 ∑ 514k
-+947654520k
-+87870996179200+ 2k
-Table 5.14.: Optimized architecture with 3 input channels of size 32×32. All convolutional layers
-useSAMEpadding, except for layer 11 which used VALIDpadding in order to decrease
-the feature map size to 1×1. If the input feature map is bigger than 32×32, for each
-power of two there are two Convolution + BN + ELU blocks and one Max pooling
++947654
+520k
++87870996 179200+2k
+Table 5.14.:Optimized architecture with 3 input channels of size32 ×32. All convolutional layers
+use SAME padding, except for layer 11 which usedVALID padding in order to decrease
+the feature map size to1 ×1. If the input feature map is bigger than32 ×32, for each
+power of two there are twoConvolution + BN + ELU blocks and oneMax pooling
 block added. This is the framed part in the table.
 
-5.15. Optimized Classifier32×32Input
+5.15. Optimized Classifier
+32×32
+Input
 C69@3×3/1
 BN + ELU
 C69@3×3/1
-BN + ELU16×16max pooling 3×3/2
+BN + ELU
+16×16
+max pooling3×3/2
 C64@3×3/1
 BN + ELU
 C64@3×3/1
-BN + ELU8×8max pooling 3×3/2
+BN + ELU
+8×8
+max pooling3×3/2
 C64@3×3/1
-BN + ELU4×4max pooling 3×3/2
+BN + ELU
+4×4
+max pooling3×3/2
 C*512@4×4/1(V)
 BN + ELU
-Dropout,p= 0.51×1C*512@1×1/1
+Dropout,p= 0.5
+1×1
+C*512@1×1/1
 BN + ELU
 Dropout,p= 0.5
 C*k@1×1/1
 Global AVG pooling
 BN + Softmax
-Figure 5.16.: Architecture of the optimized model. C32@3×3/1is a convolutional layer with
-32 filters of kernel size 3×3with stride 1. The * indicates that no bias is used.
-DatasetSingle Model Accuracy Ensemble of 10
+Figure 5.16.:Architecture of the optimized model. C 32@3 ×3/1 is a convolutional layer with
+32 filters of kernel size3 ×3 with stride 1. The * indicates that no bias is used.
+Dataset Single Model Accuracy Ensemble of 10
 Training Set Test Set Training Set Test Set
-Asirra 95.83 %σ= 4.70 90.75 %σ= 4.73 98 .78 % 93.09 %
-CIFAR-10 94.58 %σ= 0.70 87.92 %σ= 0.46 96 .47 % 89.86 %
-CIFAR-100 77.96 %σ= 2.18 64.42 %σ= 0.73 81 .44 % 67.03 %
-GTSRB 100.00 %σ= 0.00 99.28 %σ= 0.10 100 .00 % 99.51 %
-HASYv2 88.79 %σ= 0.45 85.36 %σ= 0.15 89 .36 % 85.92 %
-MNIST 99.88 %σ= 0.10 99.48 %σ= 0.13 99 .99 % 99.67 %
-STL-10 95.43 %σ= 3.57 75.09 %σ= 2.39 98 .54 % 78.66 %
-SVHN 99.08 %σ= 0.07 96.37 %σ= 0.12 99 .50 % 97.47 %
-Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models
-used in the ensemble. The empirical standard deviation σof the accuracy is also given.
+Asirra 95.83 % σ= 4.70 90 .75 % σ= 4.73 98 .78 % 93 .09 %
+CIFAR-10 94.58 % σ= 0.70 87 .92 % σ= 0.46 96 .47 % 89 .86 %
+CIFAR-100 77.96 % σ= 2.18 64 .42 % σ= 0.73 81 .44 % 67 .03 %
+GTSRB 100.00 % σ= 0.00 99 .28 % σ= 0.10 100 .00 % 99 .51 %
+HASYv2 88.79 % σ= 0.45 85 .36 % σ= 0.15 89 .36 % 85 .92 %
+MNIST 99.88 % σ= 0.10 99 .48 % σ= 0.13 99 .99 % 99 .67 %
+STL-10 95.43 % σ= 3.57 75 .09 % σ= 2.39 98 .54 % 78 .66 %
+SVHN 99.08 % σ= 0.07 96 .37 % σ= 0.12 99 .50 % 97 .47 %
+Table 5.15.:Optimized model accuracy on eight datasets. The single model actuary is the 10 models
+used in the ensemble. The empirical standard deviationσ of the accuracy is also given.
 CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the
 models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN
 and HASY, no test time transformations are used.
-Network GPU TensorflowInference per Training
+Network GPU Tensorflow Inference per Training
 1 Image 128 images time / epoch
 Optimized Default Intel i7-4930K 5 ms 432 ms 386 s
 Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s
@@ -2161,9 +2305,9 @@ Optimized Default GTX 980 3 ms 35 ms 27 s
 Optimized Default GTX 980 Ti 6 ms 36 ms 26 s
 Optimized Default GTX 1070 2 ms 24 ms 21 s
 Optimized Default Titan Black 4 ms 46 ms 43 s
-Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is
+Table 5.16.:Speed comparison of the optimized model on CIFAR-10. The baseline model is
 evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken
-from [Maj17]. Weights the baseline model can be found at [ Tho17b]. The optimized
+from [Maj17]. Weights the baseline model can be found at [Tho17b]. The optimized
 Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions.
 
 5. Experimental Evaluation
@@ -2172,7 +2316,7 @@ A separate validation set is necessary for two reasons: (1) Early stopping and (
 overfitting due to many experiments. To prevent overfitting, a different dataset can be used.
 For example, all decisions about hyperparameters in this thesis are based on CIFAR-100,
 but the network is finally trained and evaluated with the same hyperparameters on all
-datasets.2The validation set can hence be removed if early stopping is removed. Instead,
+datasets.2 The validation set can hence be removed if early stopping is removed. Instead,
 the validation data is used in a first run to determine the number of epochs necessary for
 training. In a second training run the validation data is added to the training set. The
 number of used epochs for the second run is given in Table 5.17.
@@ -2185,7 +2329,7 @@ HASYv2 92 136 116 369 369
 GTSRB 97 35 288 43 821
 STL-10 116 4500 10 450
 CIFAR-100 155 45 000 100 450
-Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total
+Table 5.17.:Mean number of training epochs for the optimized model. For comparison, the total
 amount of used training data, the number of classes of the dataset and the average
 amount of data per class is given.
 Alternatively, the model can be trained with early stopping (ES) purely on the training
@@ -2197,7 +2341,7 @@ improve the results when the number of epochs is fixed, but notably improved the
 when the training loss was used as the early stopping criterion.
 5.17. Regularization
 Stronger regularization might even improve the results when using the training loss as an
-early stopping criterion. ℓ2regularization with a weighting factor of λ= 0.0001is used in
+early stopping criterion.ℓ2 regularization with a weighting factor ofλ= 0.0001 is used in
 all other experiments. While the accuracy as shown in Table 5.19 does not show a clear
 pattern, the number of epochs increases with lower model regularization (see Table 5.20).
 2Except data augmentation and test time transformations.
@@ -2205,31 +2349,31 @@ pattern, the number of epochs increases with lower model regularization (see Tab
 4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model.
 
 5.17. Regularization
-DatasetEarly Stopping Fixed epochs
+Dataset Early Stopping Fixed epochs
 val. acc train loss
-Asirra 93.09 % 96.01 %396.01 %
-CIFAR-10 89.86 % 91.75 % 88 .88 %
-CIFAR-100 67.03 % 71.01 % 69 .08 %
-HASYv2 85.92 % 82.89 %485.05 %
-MNIST 99.67 % 99.64 % 99 .57 %
-STL-10 78.66 % 83.25 % 78 .64 %
-Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy
+Asirra 93.09 % 96 .01 %3 96.01 %
+CIFAR-10 89.86 % 91 .75 % 88 .88 %
+CIFAR-100 67.03 % 71 .01 % 69 .08 %
+HASYv2 85.92 % 82 .89 %4 85.05 %
+MNIST 99.67 % 99 .64 % 99 .57 %
+STL-10 78.66 % 83 .25 % 78 .64 %
+Table 5.18.:Comparisons of trained optimized models with early stopping on the validation accuracy
 compared training setups without a validation set and thus more training data. The
 second column uses the training loss as a stopping criterion, the third column uses a
 fixed number of epochs which is equal to the mean number of training epochs of the
 models with early stopping on the validation set accuracy.
-λSingle Model Accuracy Ensemble of 10
+λ Single Model Accuracy Ensemble of 10
 Training Set Test Set Training Set Test Set
-λ= 0.01 73.83 %σ= 1.78 58.94 %σ= 1.33 87 .78 % 69.98 %
-λ= 0.001 82.86 %σ= 0.89 63.03 %σ= 0.67 91 .86 % 71.02 %
-λ= 0.0001 77.96 %σ= 2.18 64.42 %σ= 0.73 81 .44 % 67.03 %
-Table 5.19.: Different choices of ℓ2model regularization applied to the optimized model.
+λ= 0.01 73 .83 % σ= 1.78 58 .94 % σ= 1.33 87 .78 % 69 .98 %
+λ= 0.001 82 .86 % σ= 0.89 63 .03 % σ= 0.67 91 .86 % 71 .02 %
+λ= 0.0001 77 .96 % σ= 2.18 64 .42 % σ= 0.73 81 .44 % 67 .03 %
+Table 5.19.: Different choices ofℓ2 model regularization applied to the optimized model.
 λ min max mean std
-λ= 0.01457 503 404.6 37.2
-λ= 0.001516 649 588.4 41.6
-λ= 0.0001579 833 696.1 79.1
-Table 5.20.: Training time in epochs of models with early stopping on training loss by different
-choices ofℓ2model regularization applied to the optimized model.
+λ= 0.01 457 503 404.6 37.2
+λ= 0.001 516 649 588.4 41.6
+λ= 0.0001 579 833 696.1 79.1
+Table 5.20.:Training time in epochs of models with early stopping on training loss by different
+choices ofℓ2 model regularization applied to the optimized model.
 
 5. Experimental Evaluation
 
@@ -2240,16 +2384,16 @@ algorithms in Chapter 3.
 Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations
 of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering
 algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are:
-•Ordering the classes in the confusion matrix allows to display the relevant parts even
+• Ordering the classes in the confusion matrix allows to display the relevant parts even
 for several hundred classes.
-•A hierarchy of classifiers based on the classes does not improve the results on CIFAR100.
+• A hierarchy of classifiers based on the classes does not improve the results on CIFAR100.
  There are three possible reasons for this:
-–32 px×32 pxis too low dimensional
-–100 classes are not enough for this approach
-–More classes are always easier to distinguish if each new class comes with more
+– 32 px×32 px is too low dimensional
+– 100 classes are not enough for this approach
+– More classes are always easier to distinguish if each new class comes with more
 data. One reason why this might be the case is that distinguishing the object
 from background has similar properties even for different classes.
-•Label smoothing had only a minor effect on the accuracy and no effect on the training
+• Label smoothing had only a minor effect on the accuracy and no effect on the training
 time when a single base classifier was used to train with the smoothed labels by an
 ensemble of base classifiers.
 A baseline model was defined and evaluated on eight publicly available datasets. The
@@ -2257,57 +2401,57 @@ baselines topology and training setup are described in detail as well as its beh
 training and properties of the weights of the trained model.
 The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100.
 The insights of those experiments are:
-•Averaging ensembles of 10 base classifiers of the same architecture and trained with the
+• Averaging ensembles of 10 base classifiers of the same architecture and trained with the
 same setup consistently improve the accuracy. The amount of improvement depends
 on the base classifiers, but the ensemble tends to improve the test accuracy by about
 one percentage point.
-•Wider networks learn in fewer epochs. This, however, does not mean that the
+• Wider networks learn in fewer epochs. This, however, does not mean that the
 
 6. Conclusion and Outlook
 wall-clock time is lower due to increased computation in forward- and backward
 passes.
-•Batch Normalization increases the training time noticeably. For the described ELU
+• Batch Normalization increases the training time noticeably. For the described ELU
 baseline model it also increases accuracy, which contradicts [CUH15].
-•The lower the batch size, the longer the time for each epoch of training and the less
+• The lower the batch size, the longer the time for each epoch of training and the less
 epochs need to be trained. Higher accuracy by lower batch sizes was empirically
 confirmed. The batch size, however, can also be too low.
-•An analysis of the weights of the baseline indicated that the bias of layers close to
+• An analysis of the weights of the baseline indicated that the bias of layers close to
 the output layer can be removed. This was experimentally confirmed.
-•It could not be confirmed that learned color space transformation, as described
+• It could not be confirmed that learned color space transformation, as described
 in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear
-unit (LReLU) and α= 0.3.
-•It could be confirmed that ELU networks gives better results than any other activation
+unit (LReLU) andα= 0.3.
+• It could be confirmed that ELU networks gives better results than any other activation
 function on CIFAR-100. For the character datasets MNIST and HASYv2, however,
 ReLU, LReLU, PReLU, Softplus and ELU all performed similar.
-•Changing the activation functions to the identity had very little impact on the HASYv2
+• Changing the activation functions to the identity had very little impact on the HASYv2
 and MNIST classifiers. Note that those networks are still able to learn nonlinear
-decision boundaries due to max-pooling and SAMEpadding. For CIFAR-100, however,
-the accuracy drops by 6.64 %when ELU is replaced by the identity.
+decision boundaries due to max-pooling andSAME padding. For CIFAR-100, however,
+the accuracy drops by6.64 % when ELU is replaced by the identity.
 Based on the results of those experiments, an optimized classifier was developed and
 evaluated on all eight datasets.
-The state of the art of STL-10 was improved from 74.80 %[ZMGL15 ] to78.66 %without
+The state of the art of STL-10 was improved from74.80 % [ZMGL15] to78.66 % without
 using the unlabeled part of the dataset. The state of the art of HASYv2 was improved
-from 81.00 %[Tho17a] to85.92 %, for GTSRB the state of the art was improved from
-99.46 %[SL11] to99.51 %, for Asirra it was improved from 82.7 %[Gol08] to93.09 %.1
+from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from
+99.46 % [SL11] to 99.51 %, for Asirra it was improved from82.7 % [Gol08] to 93.09 %.1
 This was mainly achieved by the combination of ELU, Dropout, ensembles, training data
 augmentation and test-time transformations. The removal of the bias of layers close to the
-output and re-usage of those parameters in layers close to the input as well as using 3×3
-pooling instead of 2×2pooling improved the baseline.
+output and re-usage of those parameters in layers close to the input as well as using3 ×3
+pooling instead of2 ×2 pooling improved the baseline.
 While writing this masters thesis, several related questions could not be answered:
-•Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting
+• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting
 is not a problem. But at which subsampling-level does having more layers have the
 biggest effect? Can this question be answered before a deeper network is trained?
-•Is label smoothing helpful for noisy labels?
+• Is label smoothing helpful for noisy labels?
 1The baseline is better than the optimized model on Asirra and on HASYv2.
 
-•How does the choice of activation functions influence residual architectures? Could the
+• How does the choice of activation functions influence residual architectures? Could the
 results be the same for different activation functions in architectures with hundreds
 of layers?
-•The results for the pooling kernel were inconclusive. Larger pooling kernels might be
+• The results for the pooling kernel were inconclusive. Larger pooling kernels might be
 advantageous as well as fractional max pooling [Gra15].
-•Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that
+• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that
 can and should be fixed?
-•Why is softmax so much better than the logistic function? Can the reason be used to
+• Why is softmax so much better than the logistic function? Can the reason be used to
 further improve ELU?
 Besides those questions, the influence of optimizers on time per epoch, epochs until
 convergence, total training time, memory consumption, accuracy of the models and standard
@@ -2316,14 +2460,14 @@ might be crucial for the models quality.
 
 
 A. Figures, Tables and Algorithms
-(a)Original image
- (b)Smoothing filter
- (c)Laplace edge detection filter
-(d)Sobel edge detection filter
- (e)Prewitt edge detection filter
- (f)Canny filter
+(a) Original image
+ (b) Smoothing filter
+ (c) Laplace edge detection filter
+(d) Sobel edge detection filter
+ (e) Prewitt edge detection filter
+ (f) Canny filter
 Figure A.1.: Examples of image filters. Best viewed in electronic form.
-Layer99-percentile interval
+Layer 99-percentile interval
 filter bias
 1 [-0.50, 0.48] [-0.06, 0.07]
 3 [-0.21, 0.19] [-0.07, 0.07]
@@ -2333,75 +2477,75 @@ filter bias
 11 [-0.08, 0.08] [-0.00, 0.00]
 13 [-0.08, 0.08] [-0.00, 0.00]
 15 [-0.10, 0.11] [-0.01, 0.01]
-Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model
+Table A.1.:99-percentile intervals for filter weights and bias weights by layer of a baseline model
 trained on CIFAR-100.
 
-Figure A.2.: The distribution of bias weights of a model without batch normalization trained on
+Figure A.2.:The distribution of bias weights of a model without batch normalization trained on
 CIFAR-100.
-Algorithm 1 Simulated Annealing for minimizing Equation (4.1).
-Require:C∈Nn×n, steps∈N,T∈R+,c∈(0,1)
-procedure SimulatedAnnealing (C, steps,T,c)
-bestScore←accuracy (C)
-bestC←C
-fori= 0;i<steps;i←i+ 1do
-p←randomFloat (0,1)
-ifp<0.5then ⊿Swap rows
-i←randomInteger (1,...,n )
-j←randomInteger (1,...,n )\{i}
-p←randomUniform (0,1)
-C′←swap (C,i,j )
-s←accuracy (C′)
-ifp<exp(s−bestScore
-T)then
-C←C′
-ifs>bestScore then
-bestScore←s
-bestC←C
-T←T·c
-else ⊿Move Block
-s←randomInteger (1,...,n ) ⊿Block start
-e←randomInteger (s,...,n ) ⊿Block end
-i←randomInteger (1,...,n−(e−s)) ⊿Block insert position
-Move Block (s, ..., e) to position i
-returnbestM
-
-Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model,
+Algorithm 1Simulated Annealing for minimizing Equation (4.1).
+Require: C ∈Nn×n, steps∈N, T ∈R+, c∈(0,1)
+procedure SimulatedAnnealing(C, steps,T, c)
+bestScore ←accuracy(C)
+bestC ←C
+for i= 0; i< steps; i←i+ 1 do
+p←randomFloat(0,1)
+if p< 0.5 then ⊿ Swap rows
+i←randomInteger(1,...,n )
+j ←randomInteger(1,...,n ) \{i}
+p←randomUniform(0,1)
+C′←swap(C,i,j )
+s←accuracy(C′)
+if p< exp(s−bestScore
+T ) then
+C ←C′
+if s> bestScore then
+bestScore ←s
+bestC ←C
+T ←T ·c
+else ⊿ Move Block
+s←randomInteger(1,...,n ) ⊿ Block start
+e←randomInteger(s,...,n ) ⊿ Block end
+i←randomInteger(1,...,n −(e−s)) ⊿ Block insert position
+Move Block (s, ..., e) to positioni
+return bestM
+
+Figure A.3.:Maximum weight updates between epochs by layer. The model is the baseline model,
 but with layer 5 reduced to 3 filters.
-FunctionSingle model Ensemble of 10 Epochs
+Function Single model Ensemble of 10 Epochs
 Training set Test set Train Test Range Mean
-Identity 87.92 %σ= 0.40 84.69 %σ= 0.08 88.59 % 85.43 %92 – 140 114.5
-Logistic 81.46 %σ= 5.08 79.67 %σ= 4.85 86.38 % 84.60 %58–91 77.3
-Softmax 88.19 %σ= 0.31 84.70 %σ= 0.15 88.69 % 85.43 %124 – 171 145.8
-Tanh 88.41 %σ= 0.36 84.46 %σ= 0.27 89.24 % 85.45 %89 – 123 108.7
-Softsign 88.00 %σ= 0.47 84.46 %σ= 0.23 88.77 % 85.33 %77 – 119 104.1
-ReLU 88.93 %σ= 0.4685.35 %σ= 0.21 89.35 % 85.95 %96 – 132 102.8
-Softplus 88.42 %σ= 0.2985.16 %σ= 0.15 88.90 % 85.73 %108 – 143 121.0
-LReLU 88.61 %σ= 0.41 85.21 %σ= 0.0589.07 % 85.83 %87 – 117 104.5
-PReLU 89.62 %σ= 0.4185.35 %σ= 0.1790.10 % 86.01 %85 – 111 100.5
-ELU 89.49 %σ= 0.4285.35 %σ= 0.10 89.94 % 86.03 %73 – 113 92.4
-Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on
-HASYv2. For LReLU, α= 0.3was chosen.
-
-Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but
+Identity 87.92 % σ= 0.40 84 .69 % σ= 0.08 88 .59 % 85 .43 % 92 – 140 114.5
+Logistic 81.46 % σ= 5.08 79 .67 % σ= 4.85 86 .38 % 84 .60 % 58 – 91 77.3
+Softmax 88.19 % σ= 0.31 84 .70 % σ= 0.15 88 .69 % 85 .43 % 124 – 171 145.8
+Tanh 88.41 % σ= 0.36 84 .46 % σ= 0.27 89 .24 % 85 .45 % 89 – 123 108.7
+Softsign 88.00 % σ= 0.47 84 .46 % σ= 0.23 88 .77 % 85 .33 % 77 – 119 104.1
+ReLU 88.93 % σ= 0.46 85.35 % σ= 0.21 89 .35 % 85 .95 % 96 – 132 102.8
+Softplus 88.42 % σ= 0.29 85.16 % σ= 0.15 88 .90 % 85 .73 % 108 – 143 121.0
+LReLU 88.61 % σ= 0.41 85 .21 % σ= 0.05 89.07 % 85 .83 % 87 – 117 104.5
+PReLU 89.62 % σ= 0.41 85.35 % σ= 0.17 90.10 % 86.01 % 85 – 111 100.5
+ELU 89.49 % σ= 0.42 85.35 % σ= 0.10 89 .94 % 86.03 % 73 – 113 92.4
+Table A.2.:Test accuracy of adjusted baseline models trained with different activation functions on
+HASYv2. For LReLU,α= 0.3 was chosen.
+
+Figure A.4.:Sum of weight updates between epochs by layer. The model is the baseline model, but
 with layer 5 reduced to 3 filters.
-FunctionSingle model Ensemble of 10 Epochs
+Function Single model Ensemble of 10 Epochs
 Training set Test set Train Test Range Mean
-Identity 87.49 %σ= 2.50 69.86 %σ= 1.41 89.78 % 71.90 %51 – 65 53.4
-Logistic 45.32 %σ= 14.88 40.85 %σ= 12.56 51.06 % 45.49 %38 – 93 74.6
-Softmax 87.90 %σ= 3.58 67.91 %σ= 2.32 91.51 % 70.96 %108 – 150 127.5
-Tanh 85.38 %σ= 4.04 67.65 %σ= 2.01 90.47 % 71.29 %48 – 92 65.2
-Softsign 88.57 %σ= 4.00 69.32 %σ= 1.68 93.04 % 72.40 %55 – 117 83.2
-ReLU 94.35 %σ= 3.38 71.01 %σ= 1.63 98.20 % 74.85 %52 – 98 75.5
-Softplus 83.03 %σ= 2.07 68.28 %σ= 1.74 93.04 % 75.99 %56 – 89 68.9
-LReLU 93.83 %σ= 3.89 74.66 %σ= 2.11 97.56 % 78.08 %52 – 120 80.1
-PReLU 95.53 %σ= 1.92 71.69 %σ= 1.37 98.17 % 74.69 %59 – 101 78.8
-ELU 95.42 %σ= 3.57 75.09 %σ= 2.39 98.54 % 78.66 %66 – 72 67.2
-Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on
-STL-10. For LReLU, α= 0.3was chosen.
+Identity 87.49 % σ= 2.50 69 .86 % σ= 1.41 89 .78 % 71 .90 % 51 – 65 53.4
+Logistic 45.32 % σ= 14.88 40 .85 % σ= 12.56 51 .06 % 45 .49 % 38 – 93 74.6
+Softmax 87.90 % σ= 3.58 67 .91 % σ= 2.32 91 .51 % 70 .96 % 108 – 150 127.5
+Tanh 85.38 % σ= 4.04 67 .65 % σ= 2.01 90 .47 % 71 .29 % 48 – 92 65.2
+Softsign 88.57 % σ= 4.00 69 .32 % σ= 1.68 93 .04 % 72 .40 % 55 – 117 83.2
+ReLU 94.35 % σ= 3.38 71 .01 % σ= 1.63 98 .20 % 74 .85 % 52 – 98 75.5
+Softplus 83.03 % σ= 2.07 68 .28 % σ= 1.74 93 .04 % 75 .99 % 56 – 89 68.9
+LReLU 93.83 % σ= 3.89 74 .66 % σ= 2.11 97 .56 % 78 .08 % 52 – 120 80.1
+PReLU 95.53 % σ= 1.92 71 .69 % σ= 1.37 98 .17 % 74 .69 % 59 – 101 78.8
+ELU 95.42 % σ= 3.57 75 .09 % σ= 2.39 98 .54 % 78 .66 % 66 – 72 67.2
+Table A.3.:Test accuracy of adjusted baseline models trained with different activation functions on
+STL-10. For LReLU,α= 0.3 was chosen.
 
 B. Hyperparameters
 Hyperparameters are parameters of models which are not optimized automatically (e.g., by
-gradient descent), but by methods like random search [ BB12], grid search [ LBOM98 ] or
+gradient descent), but by methods like random search [BB12], grid search [LBOM98] or
 manual search.
 B.1. Preprocessing
 Preprocessing used to be of major importance in machine learning. However, with the
@@ -2409,21 +2553,21 @@ availability of data sets with hundreds of examples per class and the possibilit
 learn features themselves, most models today rely on raw pixel values. The only common
 preprocessing is size normalization. In order to get a fixed input-size for a CNN, the
 following procedure can be used:
-•Take one or multiple crops of the image which have the desired aspect ratio.
-•Scale the crop(s) to the desired size.
-•In training, all crops can be used independently. In testing, all crops can be passed
+• Take one or multiple crops of the image which have the desired aspect ratio.
+• Scale the crop(s) to the desired size.
+• In training, all crops can be used independently. In testing, all crops can be passed
 through the network and the output probability distributions can get fusioned, for
 example by averaging.
 Other preprocessing methods are:
-•Color space transformations (RGB, HSV, etc.)
-•Mean subtraction
-•Standardization of pixel-values to [0,1]by dividing through 255(used by [HLW16])
-•Dimensionality reduction
-–Principal component analysis (PCA): An unsupervised linear transformation
+• Color space transformations (RGB, HSV, etc.)
+• Mean subtraction
+• Standardization of pixel-values to[0,1] by dividing through255 (used by [HLW16])
+• Dimensionality reduction
+– Principal component analysis (PCA): An unsupervised linear transformation
 which can be learned in the first hidden layer. It is hence doubtful if PCA
 improves the network.
-–Linear discriminant analysis (LDA)
-•Zero Components Analysis (ZCA) whitening (used by [KH09])
+– Linear discriminant analysis (LDA)
+• Zero Components Analysis (ZCA) whitening (used by [KH09])
 
 B.2. Data augmentation
 Data augmentation techniques aim at making artificially more data from real data items by
@@ -2431,278 +2575,316 @@ applying invariances. For computer vision, they include:
 Name Augmentation Factor Used by
 Horizontal flip 2 [KSH12, WYS+15]
 Vertical flip 2 [DWD15]1
-Rotation∼40(δ= 20) [DSRB14]
-Scaling∼14(δ∈[0.7,1.4]) [DSRB14]
-Crops 322= 1024 [KSH12, WYS+15]
+Rotation ∼40 (δ= 20) [DSRB14]
+Scaling ∼14 (δ∈[0.7,1.4]) [DSRB14]
+Crops 322 = 1024 [KSH12, WYS+15]
 Shearing [Gra15]
-GANs [BCW+17]
-Brightness∼20(δ∈[0.5,1.5]) [How13]
-Hue 51(δ= 0.1) [MRM15, DSRB14]
-Saturation∼20(δ= 0.5) [DSRB14]
-Contrast∼20(δ∈[0.5,1.5]) [How13]
+GANs [BCW +17]
+Brightness ∼20 (δ∈[0.5,1.5]) [How13]
+Hue 51 (δ= 0.1) [MRM15, DSRB14]
+Saturation ∼20 (δ= 0.5) [DSRB14]
+Contrast ∼20 (δ∈[0.5,1.5]) [How13]
 Channel shift [KSH12]
-Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for
+Table B.1.:Overview of data augmentation techniques. The augmentation factor is calculated for
 typical situations. For example, the augmentation factor for random crops is calculated
-for256 px×256 pximages which are cropped to 224 px×224 px.
+for 256 px×256 px images which are cropped to224 px×224 px.
 Taking several scales if the original is of higher resolution than desired is another technique.
 Combinations of the techniques above can also be applied. Please note that the order of
 operations does matter in many cases and hence the order is another augmentation factor.
 Less common, but also reasonable are:
-•Adding noise
-•Elastic deformations
-•Color casting (used by [WYS+15])
-•Vignetting (used by [WYS+15])
-•Lens distortion (used by [WYS+15])
-1Vertical flipping combined with 180◦rotation is equivalent to horizontal flipping
+• Adding noise
+• Elastic deformations
+• Color casting (used by [WYS+15])
+• Vignetting (used by [WYS+15])
+• Lens distortion (used by [WYS+15])
+1Vertical flipping combined with180◦ rotation is equivalent to horizontal flipping
 
 B.3. Initialization
 Weight initializations are usually chosen to be small and centered around zero. One way to
 characterize many initialization schemes is by
-w∼α·U[−1,1] +β·N(0,1) +γwithα,β,γ≥0
+w∼α·U[−1,1] + β·N(0,1) + γ with α,β,γ ≥0
 Table B.2 shows six commonly used weight initialization schemes. Several schemes use the
-same idea, that unit-variance is desired for each layer as the training converges faster [ IS15].
+same idea, that unit-variance is desired for each layer as the training converges faster [IS15].
 Name α β γ Reference
-Constant α= 0 β= 0 γ≥0used by [ZF14]
-Xavier/Glorot uniform α=√
+Constant α= 0 β = 0 γ ≥0 used by [ZF14]
+Xavier/Glorot uniform α=
+√
 6
-nin+noutβ= 0 γ= 0[GB10]
-Xavier/Glorot normal α= 0 β=(
+nin+nout β = 0 γ = 0 [GB10]
+Xavier/Glorot normal α= 0 β =
+(
 2
-(nin+nout))2
-γ= 0[GB10]
-He α= 0 β=2
-ninγ= 0[HZRS15b]
-Orthogonal — — γ= 0[SMG13]
-LSUV — — γ= 0[MM15]
-Table B.2.: Weight initialization schemes of the form w∼α·U[−1,1] +β·N(0,1) +γ.
-nin,noutare the number of units in the previous layer and the next layer. Typically,
+(nin+nout)
+)2
+γ = 0 [GB10]
+He α= 0 β = 2
+nin
+γ = 0 [HZRS15b]
+Orthogonal — — γ = 0 [SMG13]
+LSUV — — γ = 0 [MM15]
+Table B.2.: Weight initialization schemes of the formw∼α·U[−1,1] + β·N(0,1) + γ.
+nin,nout are the number of units in the previous layer and the next layer. Typically,
 biases are initialized with constant 0 and weights by one of the other schemes to prevent
 unit-coadaptation. However, dropout makes it possible to use constant initialization for
 all parameters.
 LSUV and Orthogonal initialization cannot be described with this simple pattern.
 B.4. Objective function
 For classification tasks, the cross-entropy
-ECE(W) =−∑
-x∈XK∑
-k=1[tx
+ECE(W) = −
+∑
+x∈X
+K∑
+k=1
+[tx
 klog(ox
 k) + (1−tx
 k) log(1−ox
 k)]
-is by far the most commonly used objective function (e.g., used by [ ZF14]). In this equation,
-Xis the set of training examples, Kis the number of classes, tx
-k∈{0,1}indicates if the
-training example xis of classk,ox
-kis the output of the classifier for the training example x
+is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation,
+X is the set of training examples,K is the number of classes,tx
+k ∈{ 0,1 }indicates if the
+training examplex is of classk, ox
+k is the output of the classifier for the training examplex
 and classk.
-However, regularization terms weighted with a constant λ∈(0,+∞)are sometimes added:
-•LASSO:ℓ1(e.g., used in [HPTD15])
-•Weight decay: ℓ2(e.g.,λ= 0.0005as in [MSM16])
-•Orthogonality regularization ( |(WT·W−I)|, see [VTKP17])
+However, regularization terms weighted with a constantλ∈(0,+∞) are sometimes added:
+• LASSO: ℓ1 (e.g., used in [HPTD15])
+• Weight decay:ℓ2 (e.g., λ= 0.0005 as in [MSM16])
+• Orthogonality regularization (|(WT ·W −I)|, see [VTKP17])
 
 B.5. Optimization Techniques
 Most relevant optimization techniques for CNNs are based on SGD, which updates the
 weights according to the rule
-wji←wji+ ∆wjiwith ∆wji=−η∂Ex
+wji ←wji + ∆wji with ∆wji = −η∂Ex
 ∂wji
-whereη∈(0,1), typically 0.01(e.g., [MSM16]), is called the learning rate .
-A slight variation of SGD is mini-batch gradient descent with the mini-batch B(typically
-mini-batch sizes are |B|∈{ 32,64,128,256,512}, e.g. [ZF14]). Larger mini-batch sizes
-lead to sharp minima and thus poor generalization [ KMN+16]. Smaller mini-batch sizes
+where η∈(0,1), typically0.01 (e.g., [MSM16]), is called thelearning rate.
+A slight variation of SGD is mini-batch gradient descent with the mini-batchB (typically
+mini-batch sizes are|B|∈{ 32,64,128,256,512 }, e.g. [ZF14]). Larger mini-batch sizes
+lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes
 lead to longer training times due to computational overhead and to more training steps due
 to gradient noise.
-wji←wji+ ∆wjiwith ∆wji=−η∂EB
+wji ←wji + ∆wji with ∆wji = −η∂EB
 ∂wji
 Nine variations which adjust the learning rate during training are:
-•Momentum:
+• Momentum:
 w(t+1)
-ji←w(t)
-ji+ ∆w(t+1)
-jiwith ∆w(t+1)
-ji =−η∂EB
-∂wji+α∆w(t)
+ji ←w(t)
+ji + ∆w(t+1)
+ji with ∆w(t+1)
+ji = −η∂EB
+∂wji
++ α∆w(t)
 ji
-withα∈[0,1], typically 0.9(e.g., [ZF14, MSM16])
-•Adagrad [DHS11]
-•RProp and the mini-batch version RMSProp [TH12]
-•Adadelta [Zei12]
-•Power Scheduling [ Xu11]:η(t) =η(0)(1 +a·t)−c, wheret∈N0is the training step,
-a,care constants.
-•Performance Scheduling [ SHY+13]: Measure the error on the cross validation set and
+with α∈[0,1], typically0.9 (e.g., [ZF14, MSM16])
+• Adagrad [DHS11]
+• RProp and the mini-batch version RMSProp [TH12]
+• Adadelta [Zei12]
+• Power Scheduling [Xu11]: η(t) = η(0)(1 + a·t)−c, wheret∈N0 is the training step,
+a,c are constants.
+• Performance Scheduling [SHY+13]: Measure the error on the cross validation set and
 decrease the learning rate when the algorithms improvement is below a threshold.
-•Exponential Decay Learning Rate [ SHY+13]:η(t) =η(0)·10−t
-kwheret∈N0is the
-training step, η(0)is the initial learning rate, k∈N≥1is the number of training steps
+• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) ·10−t
+k where t∈N0 is the
+training step,η(0) is the initial learning rate,k∈N≥1 is the number of training steps
 until the learning rate is decreased by1
-10th.
-•Newbob Scheduling [ new00]: Start with Performance Scheduling, then use Exponential
+10 th.
+• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential
 Decay Scheduling.
-•Adam and AdaMax [KB14]
+• Adam and AdaMax [KB14]
 
-•Nadam [Doz15]
+• Nadam [Doz15]
 Some of those are explained in [Rud16].
 Other first-order gradient optimization methods are:
-•Quickprop [Fah88]
-•Nesterov Accellerated Momentum (NAG) [Nes83]
-•Conjugate Gradient method [ Cha92]: Combines a line search for the step size with
+• Quickprop [Fah88]
+• Nesterov Accellerated Momentum (NAG) [Nes83]
+• Conjugate Gradient method [Cha92]: Combines a line search for the step size with
 the gradients direction.
 Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS
 and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs.
 However, there are alternatives which do not use gradient information:
-•Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [ SM02]
-•Simulated Annealing [vLA87]
-•Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described
+• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02]
+• Simulated Annealing [vLA87]
+• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described
 on [Tho14b]
 There are also approaches which learn the optimization algorithm [ADG+16, LM16].
 
 B.6. Network Design
 CNNs have the following hyperparameters:
-•Depth: The number of layers
-•Width: The number of filters per layer
-•Layer and block connectivity graph
-•Layer and block hyperparameters :
-–Activation Functions as shown in Table B.3
-–For more, see Sections 2.2 and 2.3.
+• Depth: The number of layers
+• Width: The number of filters per layer
+• Layer and block connectivity graph
+• Layer and block hyperparameters:
+– Activation Functions as shown in Table B.3
+– For more, see Sections 2.2 and 2.3.
 Name Function ϕ(x) Range of Values ϕ′(x) Used by
-Sign function†
+Sign function†
+
 
-+1ifx≥0
-−1ifx<0{−1,1} 0 [KS02]
+
++1 if x≥0
+−1 if x< 0
+{−1,1 } 0 [KS02]
 Heaviside
-step function†
+step function†
+
 
-+1ifx>0
-0ifx<0{0,1} 0 [MP43]
-Logistic function1
-1+e−x [0,1]ex
+
++1 if x> 0
+0 if x< 0
+{0,1 } 0 [MP43]
+Logistic function 1
+1+e−x [0,1] ex
 (ex+1)2 [DJ99]
-Tanhex−e−x
-ex+e−x= tanh(x) [−1,1] sech2(x) [LBBH98, Tho14a]
-ReLU†max(0,x) [0 ,+∞)
+Tanh ex−e−x
+ex+e−x = tanh(x) [ −1,1] sech 2(x) [LBBH98, Tho14a]
+ReLU† max(0,x) [0 ,+∞)
+
 
-1ifx>0
-0ifx<0[KSH12]
+
+1 if x> 0
+0 if x< 0
+[KSH12]
 LReLU†2
-(PReLU)ϕ(x) = max(αx,x ) (−∞,+∞)
+(PReLU)
+ϕ(x) = max(αx,x) ( −∞,+∞)
+
 
-1ifx>0
-αifx<0[MHN13, HZRS15b]
-Softplus log(ex+ 1) (0 ,+∞)ex
-ex+1[DBB+01, GBB11]
-ELU
+
+1 if x> 0
+α if x< 0
+[MHN13, HZRS15b]
+Softplus log(ex + 1) (0 ,+∞) ex
+ex+1 [DBB+01, GBB11]
+ELU
+
 
-x ifx>0
-α(ex−1)ifx≤0(−∞,+∞)
+
+x if x> 0
+α(ex −1) if x≤0
+(−∞,+∞)
+
 
-1ifx>0
-αexotherwise[CUH15]
-Softmax‡o(x)j=exj∑K
-k=1exk[0,1]Ko(x)j·∑K
-k=1exk−exj
+
+1 if x> 0
+αex otherwise
+[CUH15]
+Softmax‡ o(x)j = exj
 ∑K
-k=1exk[KSH12, Tho14a]
-Maxout‡o(x) = maxx∈xx (−∞,+∞)
+k=1 exk [0,1]K o(x)j ·
+∑K
+k=1 exk −exj
+∑K
+k=1 exk [KSH12, Tho14a]
+Maxout‡ o(x) = maxx∈x x (−∞,+∞)
+
 
-1ifxi= max x
-0otherwise[GWFM+13]
-Table B.3.: Overview of activation functions. Functions marked with †are not differentiable at 0
-and functions marked with ‡operate on all elements of a layer simultaneously. The
-hyperparameters α∈(0,1)of Leaky ReLU and ELU are typically α= 0.01. Other
-activation function like randomized leaky ReLUs exist [ XWCL15 ], but are far less
+
+1 if xi = max x
+0 otherwise
+[GWFM+13]
+Table B.3.:Overview of activation functions. Functions marked with†are not differentiable at 0
+and functions marked with‡operate on all elements of a layer simultaneously. The
+hyperparameters α ∈(0,1) of Leaky ReLU and ELU are typicallyα = 0.01. Other
+activation function like randomized leaky ReLUs exist [XWCL15], but are far less
 commonly used.
 Some functions are smoothed versions of others, like the logistic function for the
 Heaviside step function, tanh for the sign function, softplus for ReLU.
 Softmax is the standard activation function for the last layer of a classification network
 as it produces a probability distribution. See Figure B.1 for a plot of some of them.
-2αis a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.
-
-−2.0−1.5−1.0−0.5 0.5 1.0 1.5 2.0
-−1.0−0.50.51.01.52.0
-xy
-ϕ1(x) =1
+2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.
+
+−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0
+−1.0
+−0.5
+0.5
+1.0
+1.5
+2.0
+x
+y
+ϕ1(x) = 1
 1+e−x
 ϕ2(x) = tanh(x)
 ϕ3(x) = max(0,x)
-ϕ4(x) = log(ex+ 1)
-ϕ5(x) = max(x,ex−1)
-Figure B.1.: Activation functions plotted in [−2,+2].tanhand ELU are able to produce negative
+ϕ4(x) = log(ex + 1)
+ϕ5(x) = max(x,ex −1)
+Figure B.1.:Activation functions plotted in[−2,+2]. tanh and ELU are able to produce negative
 numbers. The image of ELU, ReLU and Softplus is not bound on the positive side,
-whereas tanhand the logistic function are always below 1.
+whereas tanh and the logistic function are always below 1.
 B.7. Regularization
 Regularization techniques aim to make the fitted function smoother and reduce overfitting.
 Regularization techniques are:
-•ℓ1,ℓ2, and Orthogonality regularization: See Appendix B.4
-•Max-norm regularization (e.g. used ins [SHK+14])
-•Dropout (introduced in [ SHK+14]), DropConnect (see [ WZZ+13]), Stochastic Depth
+• ℓ1, ℓ2, and Orthogonality regularization: See Appendix B.4
+• Max-norm regularization (e.g. used ins [SHK+14])
+• Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth
 (see [HSL+16])
-•Feature scale clipping (see [ZF14])
-•Data augmentation (according to [ZBH+16])
-•Global average pooling (according to [ZKL+15])
-•Dense-Sparse-Dense training (see [HPN+16])
-•Soft targets (see [HVD15])
+• Feature scale clipping (see [ZF14])
+• Data augmentation (according to [ZBH+16])
+• Global average pooling (according to [ZKL+15])
+• Dense-Sparse-Dense training (see [HPN+16])
+• Soft targets (see [HVD15])
 
 
 C. Calculating Network Characteristics
 C.1. Parameter Numbers
-•A fully connected layer with nnodes,kinputs hasn·(k+ 1)parameters. The +1is
+• A fully connected layer withn nodes, k inputs hasn·(k+ 1) parameters. The +1 is
 due to the bias.
-•A convolutional layer iwithkifilters of size n×mbeing applied to ki−1feature maps
-haski·ki−1(n·m+ 1)parameters. The +1is due to the bias.
-•A fully connected layer with nnodes after kfeature maps of size m1×m2has
-n·(k·m1·m2+ 1)parameters.
-•A dense block with a depth of L, a growth rate of nand3×3filters hasL+n·32+
-32·n2∑L
-i=0(L−i) =L+ 9n+ 9n2L2−L
-2parameters.
-According to [ HPTD15 ], AlexNet has 60 million parameters which is roughly the number
+• A convolutional layeriwith ki filters of sizen×mbeing applied toki−1 feature maps
+has ki ·ki−1(n·m+ 1) parameters. The +1 is due to the bias.
+• A fully connected layer withn nodes after k feature maps of size m1 ×m2 has
+n·(k·m1 ·m2 + 1) parameters.
+• A dense block with a depth ofL, a growth rate ofn and 3 ×3 filters hasL+ n·32 +
+32 ·n2 ∑L
+i=0(L−i) = L+ 9n+ 9n2 L2−L
+2 parameters.
+According to [HPTD15], AlexNet has 60 million parameters which is roughly the number
 calculated in Table D.2.
 C.2. FLOPs
 The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence
 the following number are only giving rough estimates.
-In the following, nϕdenotes the number of FLOPs to compute the non-linearity ϕ. For
-simplicity,nϕ= 5was chosen.
-•A fully connected layer with nnodes andkinputs has to calculate ϕ(W·x+b)with
-W∈Rn×k,x∈Rk×1,b∈Rn×1. It hence needs about n·(k+ (k−1) + 1) = 2 nk
-additions / multiplications before the non-linearity ϕis calculated. The total number
-of FLOPs is 2·n·k+n·nϕ.
-•In the following, biases are ignored. A convolutional layer with kifilters of size n×m
-being applied to ki−1filter maps of size w×hresults inkifilter maps of size w×hif
-padding is applied. For each element of each filter map, n·m·ki−1multiplications and
-(n·m·ki−1−1)additions have to be made. This results in (2nmki−1−1)·(ki·w·h)
-operations. The total number of FLOPs is (2·n·m·ki−1−1)·(ki·w·h)+ki·w·h·nϕ.
+In the following,nϕ denotes the number of FLOPs to compute the non-linearityϕ. For
+simplicity,nϕ = 5 was chosen.
+• A fully connected layer withn nodes andk inputs has to calculateϕ(W ·x+ b) with
+W ∈Rn×k, x ∈Rk×1, b ∈Rn×1. It hence needs aboutn·(k+ (k−1) + 1) = 2nk
+additions / multiplications before the non-linearityϕ is calculated. The total number
+of FLOPs is2 ·n·k+ n·nϕ.
+• In the following, biases are ignored. A convolutional layer withki filters of sizen×m
+being applied toki−1 filter maps of sizew×hresults inki filter maps of sizew×hif
+padding is applied. For each element of each filter map,n·m·ki−1 multiplications and
+(n·m·ki−1 −1) additions have to be made. This results in(2nmki−1 −1) ·(ki·w·h)
+operations. The total number of FLOPs is(2·n·m·ki−1 −1)·(ki·w·h)+ ki·w·h·nϕ.
 This is, of course, a naive way of calculating a convolution. There are other ways of
 calculating convolutions [LG16].
 
-•A fully connected layer with nnodes after kfeature maps of size w×hneeds 2n(k·w·h)
-FLOPs. The total number of FLOPs is 2n·(k·w·h) +n·nϕ.
-•As Dropout is only calculated during training, the number of FLOPs was set to 0.
-•The number of FLOPs for max pooling is dominated by the number of positions to
-which the pooling kernel is applied. For a feature map of size w×ha max pooling
-filter with stride sgets appliedw·h
-s2. The number of FLOPs per application depends
-on the kernel size. A 2×2kernel is assumed to need 5 FLOPs.
-•The number of FLOPs for Batch Normalization is the same as the number of its
+• A fully connected layer withnnodes afterkfeature maps of sizew×hneeds 2n(k·w·h)
+FLOPs. The total number of FLOPs is2n·(k·w·h) + n·nϕ.
+• As Dropout is only calculated during training, the number of FLOPs was set to 0.
+• The number of FLOPs for max pooling is dominated by the number of positions to
+which the pooling kernel is applied. For a feature map of sizew×h a max pooling
+filter with strides gets applied w·h
+s2 . The number of FLOPs per application depends
+on the kernel size. A2 ×2 kernel is assumed to need 5 FLOPs.
+• The number of FLOPs for Batch Normalization is the same as the number of its
 parameters.
 Here are some references which give information for the FLOPs:
-•AlexNet
-–1.5B in total [HPTD15].
-–725M in total [KPY+15].
-–3300M in total in Table D.2
-•VGG-16:
-–15484M in total [HPTD15].
-–31000M in total in Table D.3.
-•GoogleNet: 1566M in total [HPTD15].
+• AlexNet
+– 1.5B in total [HPTD15].
+– 725M in total [KPY+15].
+– 3300M in total in Table D.2
+• VGG-16:
+– 15484M in total [HPTD15].
+– 31000M in total in Table D.3.
+• GoogleNet: 1566M in total [HPTD15].
 One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same
 network.
 C.3. Memory Footprint
 The memory footprint of CNNs determines when networks can be used at all and if they
 can be trained efficiently. In order to be able to train CNNs efficiently, one weight update
 step has to fit in the memory of the GPU. This includes the following:
-•Activations : All activations of one mini-batch in order to calculate the gradients
+• Activations: All activations of one mini-batch in order to calculate the gradients
 in the backward pass. This is the number of floats in the feature maps of all weight
 layers combined.
-•Weights
-•Optimization algorithm : The optimization algorithm introduces some overhead.
+• Weights
+• Optimization algorithm: The optimization algorithm introduces some overhead.
 For example, Adam stores two parameters per weights.
 At inference time, every two consecutive layers have to fit into memory. When the forward
 pass of layer A to layer B is calculated, the memory can be freed if no skip connections are
@@ -2719,168 +2901,183 @@ The summation row gives the sum of all floats for the output size column. This a
 conclusions about the maximum mini-batch size which can be in memory for training.
 
 D.1. LeNet-5
-One of the first CNNs used was LeNet-5 [ LBBH98 ]. LeNet-5 uses two times the common
-pattern of a single convolutional layer with tanhas a non-linear activation function followed
+One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common
+pattern of a single convolutional layer withtanh as a non-linear activation function followed
 by a pooling layer and three fully connected layers. One fully connected layer is used to
 get the right output dimension, another one is necessary to allow the network to learn a
 non-linear combination of the features of the feature maps.
 Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test
-error rate of 0.8 %on MNIST.
+error rate of0.8 % on MNIST.
 Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98].
 # Type Filters @
-Patch size / strideParameters FLOPs Output size
+Patch size / stride
+Parameters FLOPs Output size
 Input 0 0 1@32 ×32
-1 Convolution 6@ 5×5×1/1 156 307800 6@28×28
-2 Scaled average pooling 2×2/2 2 336 6@14 ×14
-3 Convolution 16@ 5×5×6/1 2416 942 400 16@10×10
-4 Scaled average pooling 2×2/2 2 1600 16@ 5 ×5
+1 Convolution 6@ 5 ×5 ×1 /1 156 307800 6 @28 ×28
+2 Scaled average pooling 2 ×2 /2 2 336 6@14 ×14
+3 Convolution 16@ 5 ×5 ×6 /1 2416 942 400 16@10 ×10
+4 Scaled average pooling 2 ×2 /2 2 1600 16@ 5 × 5
 5 Fully Connected 120 neurons 48 120 240000 120
 6 Fully Connected 84 neurons 10164 20580 84
 7 Fully Connected (output) 10 neurons 850 1730 10
-∑61710 15144446 9118
-Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanhactivation function is applied.
+∑ 61710 15144446 9118
+Table D.1.:LeNet-5 architecture: After layers 1, 3, 5 and 6 thetanh activation function is applied.
 After layer 7, the softmax function is applied. One can see that convolutional layer
 need much fewer parameters, but an order of magnitude more FLOPs per parameter
 than fully connected layers.
 
 D.2. AlexNet
 ThefirstCNNwhichachievedmajorimprovementsontheImageNetdatasetwasAlexNet[ KSH12].
-ItsarchitectureisshowninFigureD.2anddescribedinTableD.2. Ithasabout 60·106parameters.
+ItsarchitectureisshowninFigureD.2anddescribedinTableD.2. Ithasabout 60·106 parameters.
  A trained AlexNet can be downloaded at www.cs.toronto.edu/˜ guerzhoy/tf_alexnet.
-Note that the uncompressed size is at least 60 965 224 floats·32bit
-float≈244 MB.
-Figure D.2.: Architecture of AlexNet as shown in [ KSH12]: Convolutional Layers are followed
+Note that the uncompressed size is at least60 965 224 floats·32 bit
+float ≈244 MB.
+Figure D.2.:Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed
 by pooling layers multiple times. At the end, a fully connected network is applied.
 Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1).
 # Type Filters @
-Patch size / strideParameters FLOPs Output size
-Input 3 @ 224×224
-1 Convolution 96 @ 11×11×3/ 4 34 944 211 M 96@ 55×55
-LCN 12 M96@ 55×55
-2 Max pooling 3×3/ 2 0 301 k 96 @ 27×27
-3 Convolution 256 @ 5×5×48/ 1 307 456 448 M 256 @ 13×13
-LCN 3 M256 @ 13×13
-4 Max pooling 3×3/ 2 0 50 k 256 @ 13×13
-5 Convolution 384 @ 3×3×256/ 1 885 120 299 M 384 @ 13×13
-7 Convolution 384 @ 3×3×192/ 1 663 936 224 M 384 @ 13×13
-9 Convolution 256 @ 3×3×192/ 1 442 624 150 M 256 @ 13×13
-10 Max pooling 3×3/ 2 0 50 k 256 @ 6×6
-11 FC 4096 neurons 37 752 832 75 M4096
+Patch size / stride
+Parameters FLOPs Output size
+Input 3 @ 224 ×224
+1 Convolution 96 @ 11 ×11 ×3 / 4 34 944 211 M 96@ 55 × 55
+LCN 12 M 96@ 55 × 55
+2 Max pooling 3 ×3 / 2 0 301 k 96 @ 27 × 27
+3 Convolution 256 @ 5 ×5 × 48 / 1 307 456 448 M 256 @ 13 × 13
+LCN 3 M 256 @ 13 × 13
+4 Max pooling 3 ×3 / 2 0 50 k 256 @ 13 × 13
+5 Convolution 384 @ 3 ×3 ×256 / 1 885 120 299 M 384 @ 13 × 13
+7 Convolution 384 @ 3 ×3 ×192 / 1 663 936 224 M 384 @ 13 × 13
+9 Convolution 256 @ 3 ×3 ×192 / 1 442 624 150 M 256 @ 13 × 13
+10 Max pooling 3 ×3 / 2 0 50 k 256 @ 6 × 6
+11 FC 4096 neurons 37 752 832 75 M 4096
 12 FC 4096 neurons 16 781 312 34 M 4096
 13 FC 1000 neurons 4 097 000 8 M 1000
-∑60 965 224 3300 M 1 122 568
-Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to
+∑ 60 965 224 3300 M 1 122 568
+Table D.2.:AlexNet architecture: One special case of AlexNet is grouping of convolutions due to
 computational restrictions at the time of its development. This also reduces the number
 of parameters and allows parallel computation on separate GPUs. However, to make
 the architecture easier to compare, this grouping was ignored for the parameter count.
-The FLOPs are taken from [ HPTD15 ] and combined with rough estimates for Local
+The FLOPs are taken from [HPTD15] and combined with rough estimates for Local
 Contrast Normalization and max pooling.
 The calculated number of parameters was checked against the downloaded version. It
-also has 60 965 224 parameters.
+also has60 965 224parameters.
 
 D.3. VGG-16 D
-Another widespread architecture is the VGG-16 (D) [ SZ14]. VGG comes from the Visual
-GeometryGroup in Oxford which developed this architecture. It has 16layers which can
-learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3×3
+Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from theVisual
+Geometry Group in Oxford which developed this architecture. It has16 layers which can
+learn parameters. A major difference compared to AlexNet is that VGG-16 uses only3 ×3
 filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a
 detailed textual description is given in Table D.3.
-AtrainedVGG-16DforTensorflowcanbedownloadedat https://github .com/machrisaa/
-tensorflow-vgg . Note that the uncompressed size is at least 138 357 544 floats ·32bit
-float≈
-520 MB. The downloaded Numpy binary file npzneeds 553 MBwithout compression and
-514 MBwith compression.224×224Input
+A trained VGG-16 D for Tensorflow can be downloaded athttps://github.com/machrisaa/
+tensorflow-vgg. Note that the uncompressed size is at least138 357 544 floats·32 bit
+float ≈
+520 MB. The downloaded Numpy binary filenpz needs 553 MB without compression and
+514 MB with compression.
+224×224
+Input
+C64@3×3/1
 C64@3×3/1
-C64@3×3/1112×112max pooling 2×2/1
+112×112
+max pooling2×2/1
+C128@3×3/1
 C128@3×3/1
-C128@3×3/156×56max pooling 2×2/1
+56×56
+max pooling2×2/1
 C256@3×3/1
 C256@3×3/1
-C256@3×3/128×28max pooling 2×2/1
+C256@3×3/1
+28×28
+max pooling2×2/1
+C512@3×3/1
+C512@3×3/1
 C512@3×3/1
+14×14
+max pooling2×2/1
 C512@3×3/1
-C512@3×3/114×14max pooling 2×2/1
 C512@3×3/1
 C512@3×3/1
-C512@3×3/17×7max pooling 2×2/1
+7×7
+max pooling2×2/1
 Fully Connected 4096
 Dropout,p= 0.5
 Fully Connected 4096
 Dropout,p= 0.5
 Fully Connected 1000
-Figure D.3.: Architecture of VGG-16 D. C512@3×3/1is a convolutional layer with 512 filters of
-kernel size 3×3with stride 1. All convolutional layers use SAMEpadding.
+Figure D.3.:Architecture of VGG-16 D.C 512@3 ×3/1 is a convolutional layer with 512 filters of
+kernel size3 ×3 with stride 1. All convolutional layers useSAME padding.
 
 # Type Filters @
-Patch size / strideParameters FLOPs Output size
-Input 3 @ 224×224
-1 Convolution 64 @ 3×3×3/ 1 1 792 186 M 64@ 224×224
-2 Convolution 64 @ 3×3×64/ 1 36 928 3712 M 64@ 224×224
-Max pooling 2×2/ 2 0 2 M 64 @ 112×112
-3 Convolution 128 @ 3×3×64/ 1 73 856 1856 M 128 @ 112×112
-4 Convolution 128 @ 3×3×128/ 1 147 584 3705 M 128 @ 112×112
-Max pooling 2×2/ 2 0 1 M 128 @ 56×56
-5 Convolution 256 @ 3×3×128/ 1 295 168 1853 M 256 @ 56×56
-6 Convolution 256 @ 3×3×256/ 1 590 080 3703 M 256 @ 56×56
-7 Convolution 256 @ 3×3×256/ 1 590 080 3703 M 256 @ 56×56
-Max pooling 2×2/ 2 0<1 M256 @ 28×28
-8 Convolution 512 @ 3×3×256/ 1 1 180 160 1851 M 512 @ 28×28
-9 Convolution 512 @ 3×3×512/ 1 2 359 808 3701 M 512 @ 28×28
-10 Convolution 512 @ 3×3×512/ 1 2 359 808 3701 M 512 @ 28×28
-Max pooling 2×2/ 2 0<1 M512 @ 14×14
-11 Convolution 512 @ 3×3×512/ 1 2 359 808 925 M 512 @ 14×14
-12 Convolution 512 @ 3×3×512/ 1 2 359 808 925 M 512 @ 14×14
-13 Convolution 512 @ 3×3×512/ 1 2 359 808 925 M 512 @ 14×14
-Max pooling 2×2/ 2 0<1 M512 @ 7×7
-14 FC 4096 neurons 102 764 544 206 M4096
+Patch size / stride
+Parameters FLOPs Output size
+Input 3 @ 224 ×224
+1 Convolution 64 @ 3 ×3 × 3 / 1 1 792 186 M 64@ 224 ×224
+2 Convolution 64 @ 3 ×3 × 64 / 1 36 928 3712 M 64@ 224 ×224
+Max pooling 2 ×2 / 2 0 2 M 64 @112 ×112
+3 Convolution 128 @ 3 ×3 × 64 / 1 73 856 1856 M 128 @112 ×112
+4 Convolution 128 @ 3 ×3 ×128 / 1 147 584 3705 M 128 @112 ×112
+Max pooling 2 ×2 / 2 0 1 M 128 @ 56 × 56
+5 Convolution 256 @ 3 ×3 ×128 / 1 295 168 1853 M 256 @ 56 × 56
+6 Convolution 256 @ 3 ×3 ×256 / 1 590 080 3703 M 256 @ 56 × 56
+7 Convolution 256 @ 3 ×3 ×256 / 1 590 080 3703 M 256 @ 56 × 56
+Max pooling 2 ×2 / 2 0 <1 M 256 @ 28 × 28
+8 Convolution 512 @ 3 ×3 ×256 / 1 1 180 160 1851 M 512 @ 28 × 28
+9 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 3701 M 512 @ 28 × 28
+10 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 3701 M 512 @ 28 × 28
+Max pooling 2 ×2 / 2 0 <1 M 512 @ 14 × 14
+11 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 925 M 512 @ 14 × 14
+12 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 925 M 512 @ 14 × 14
+13 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 925 M 512 @ 14 × 14
+Max pooling 2 ×2 / 2 0 <1 M 512 @ 7 × 7
+14 FC 4096 neurons 102 764 544 206 M 4096
 Dropout 0 0 4096
 15 FC 4096 neurons 16 781 312 34 M 4096
 Dropout 0 0 4096
 16 FC 1000 neurons 4 097 000 8 M 1000
-∑138 357 544 31 000 M 15 245 800
-Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have
+∑ 138 357 544 31 000 M 15 245 800
+Table D.3.:VGG-16 D architecture: The authors chose to give only layers a number which have
 learnable parameters. All convolutions are zero padded to prevent size changes and
 use ReLU activation functions. The channels mean is subtracted from each pixel as
-a preprocessing step ( −103.939,−116.779,−123.68). As Dropout is only calculated
-during training time, the number of FLOPs is 0. The dropout probability is 0.5.
+a preprocessing step (−103.939,−116.779,−123.68). As Dropout is only calculated
+during training time, the number of FLOPs is 0. The dropout probability is0.5.
 The calculated number of parameters was checked against the downloaded version. It
-also has 138 357 544 parameters.
+also has138 357 544parameters.
 
 D.4. GoogleNet, Inception v2 and v3
 The large number of parameters and operations is a problem when such models should get
 applied in practice to thousands of images. In order to reduce the computational cost while
-maintaining the classification quality, GoogleNet [ SLJ+15] and the Inception module were
-developed. The Inception module essentially only computes 1×1filters, 3×3filters and
-5×5filters in parallel, but applied bottleneck 1×1filters before to reduce the number of
+maintaining the classification quality, GoogleNet [SLJ+15] and the Inception module were
+developed. The Inception module essentially only computes1 ×1 filters, 3 ×3 filters and
+5 ×5 filters in parallel, but applied bottleneck1 ×1 filters before to reduce the number of
 parameters. It is shown in Figure D.4.
 Figure D.4.: Inception module
 Image source: [SLJ+15]
-Compared to GoogleNet, Inception v2 [ SVI+15] removed the 5×5filters and replaced
-them by two successive layers of 3×3filters. A visualization of an Inception v2 module
+Compared to GoogleNet, Inception v2 [SVI+15] removed the5 ×5 filters and replaced
+them by two successive layers of3 ×3 filters. A visualization of an Inception v2 module
 is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to
-approximate symmetric filters with fewer parameters. The authors call this approach filter
-factorization .
+approximate symmetric filters with fewer parameters. The authors call this approachfilter
+factorization.
 Inception v3 introduced Batch Normalization to the network [SVI+15].
 Figure D.5.: Inception v2 module
 Image source: [SVI+15]
 
 D.5. Inception-v4
-Inception-v4 as described in [ SIV16] consists of four main building blocks: The stem,
+Inception-v4 as described in [SIV16] consists of four main building blocks: The stem,
 Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper,
 wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A
 and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use
 average pooling. The stem, module B and module C use separable convolutions.
-#×Type Parameters Output size
-Input 3 @ 299×299
-1 Stem 605 728 384 @ 35×35
-24×Inception A 317 632 384 @ 35×35
-3 Reduction A 2 306 112 1024 @ 17×17
-47×Inception B 2 936 256 1024 @ 17×17
-5 Reduction B 2 747 392 1536 @ 8×8
-63×Inception C 4 553 088 1536 @ 8×8
-Global Average Pooling 0 1536 @ 1×1
-Dropout (p=0.8) 0 1536 @ 1×1
+# × Type Parameters Output size
+Input 3 @ 299 ×299
+1 Stem 605 728 384 @ 35 × 35
+2 4× Inception A 317 632 384 @ 35 × 35
+3 Reduction A 2 306 112 1024 @ 17 × 17
+4 7× Inception B 2 936 256 1024 @ 17 × 17
+5 Reduction B 2 747 392 1536 @ 8 × 8
+6 3× Inception C 4 553 088 1536 @ 8 × 8
+Global Average Pooling 0 1536 @ 1 × 1
+Dropout (p=0.8) 0 1536 @ 1 × 1
 7 Softmax 1 537 000 1000
-∑42 679 816
+∑ 42 679 816
 Table D.4.: Inception-v4 network.
 
 
@@ -2889,69 +3086,72 @@ Well-known benchmark datasets for classification problems in computer vision are
 in Table E.1. The best results known to me are given in Table E.2. However, every semantic
 segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers
 using Algorithm 2.
-DatabaseImage Resolution
-(width×height)Number
+Database Image Resolution
+(width ×height)
+Number
 of
-ImagesNumber
+Images
+Number
 of
-ClassesChannels Data source
+Classes
+Channels Data source
 MNIST 28 px×28 px 70 000 10 1 [YL98, LBBH98]
 HASYv2 32 px×32 px 168 233 369 1 [Tho17a]
-SVHN 32 px×32 px 630 420 10 3[NWC+11b],
+SVHN 32 px×32 px 630 420 10 3 [NWC+11b],
 [NWC+11a]
 CIFAR-10 32 px×32 px 60 000 10 3 [Kri, KH09]
 CIFAR-100 32 px×32 px 60 000 100 3 [Kri, KH09]
 STL-10 96 px×96 px 13 000 10 3 [CLN11, CLN10]
-Caltech-101(80 px−3481 px)
-×(92 px−3999 px)9144 102 3 [FFP03, FFFP06]
-Caltech-256(75 px−7913 px)
-×(75 px−7913 px)30 607 257 3 [Gri06, GG07]
+Caltech-101 (80 px−3481 px)
+×(92 px−3999 px) 9144 102 3 [FFP03, FFFP06]
+Caltech-256 (75 px−7913 px)
+×(75 px−7913 px) 30 607 257 3 [Gri06, GG07]
 ILSVRC 20121 (8 px−9331 px)
-×(10 px−6530 px)1.2·1061000 3 [Ima12, RDS+14]
+×(10 px−6530 px) 1.2 ·106 1000 3 [Ima12, RDS +14]
 Places3652 (290px−3158px)
-×(225px−2630px)1.8·106365 3 [Zho16, ZKL+16]
-GTSRB(25 px−266 px)
-×(25 px−232 px)51 839 43 3 [SSSI, SSSI12]
+×(225px−2630px) 1.8 ·106 365 3 [Zho16, ZKL +16]
+GTSRB (25 px−266 px)
+×(25 px−232 px) 51 839 43 3 [SSSI, SSSI12]
 Asirra3 (4 px−500 px)
-×(4 px−500 px)25 000 2 3 [Asi17, EDHS07]
-Graz-02480 px×640 px
-and640 px×480 px1096 3 3 [Mar08, MS07]
-Table E.1.: An overview over publicly available image databases for classification. The number
+×(4 px−500 px) 25 000 2 3 [Asi17, EDHS07]
+Graz-02 480 px×640 px
+and 640 px×480 px 1096 3 3 [Mar08, MS07]
+Table E.1.:An overview over publicly available image databases for classification. The number
 of images row gives the sum of the training and the test images. Some datasets, like
 SVHN, have additional unlabeled data which is not given in this table.
 1ImageNet Large Scale Visual Recognition Competition
 2The dimensions are only calculated for the validation set.
 3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle
 
-Dataset Model type / name Result ScoreAchieved /
+Dataset Model type / name Result Score Achieved /
 Claimed by
-MNIST — 0.21 % error [WZZ+13]
+MNIST — 0.21 % error [WZZ +13]
 HASYv2 TF-CNN 81.00 % accuracy [Tho17a]
 SVHN DenseNet ( k= 24) 1.59 % error [HLW16]
-CIFAR-10 DenseNet-BC ( k= 40)3.46 % error [HLW16]
+CIFAR-10 DenseNet-BC ( k= 40) 3.46 % error [HLW16]
 CIFAR-100 WRN-28-10 16.21 % error [LH16]
 STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15]
-Caltech-101 SPP-net (pretrained) 93.42 %±0.5 %accuracy [HZRS14]
-Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 %accuracy [ZF14]
+Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14]
+Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14]
 ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a]
 GTSRB MCDNN 99.46 % accuracy [SL11]
 Asirra SVM 82.7 % accuracy [Gol08]
 Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10]
 Table E.2.: An overview over state of the art results achieved in computer vision datasets.
-Algorithm 2 Create a classification dataset from a semantic segmentation dataset
-Require: Semantic segmentation dataset ( DS)
-procedure CreateDataset (Annotated dataset DS)
-DC←List
+Algorithm 2Create a classification dataset from a semantic segmentation dataset
+Require: Semantic segmentation dataset (DS)
+procedure CreateDataset(Annotated datasetDS)
+DC ←List
 w←desired image width
 h←desired image height
-forImage and associated label (x,y)inDSdo
-i←randint (0,L.width−w)
-j←randint (0,L.height−h)
-cL←crop (y,(i,j),(i+w,j+h))
-ifat least 50% of sare of one class then
-cI←crop (x,(i,j),(i+w,j+h))
-D.append ((cI,cL))
-return(DC)
+for Image and associated label(x,y) in DS do
+i←randint(0,L.width−w)
+j ←randint(0,L.height−h)
+cL ←crop(y,(i,j),(i+ w,j + h))
+if at least 50% ofs are of one classthen
+cI ←crop(x,(i,j),(i+ w,j + h))
+D.append((cI,cL))
+return (DC)
 
 F. List of Tables
 2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8
@@ -3006,8 +3206,8 @@ G. List of Figures
 5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
 5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42
 5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42
-5.4 Baseline model γdistribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
-5.5 Baseline model βdistribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
+5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43
 5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44
 5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45
 5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46
@@ -3032,596 +3232,596 @@ D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94
 
 H. Bibliography
-[AAB+16]M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on
-heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467 , Mar.
-2016. [Online]. Available: https://arxiv .org/abs/1603 .04467
-[ABKS99] M. Ankerst, M. M. Breunig et al., “OPTICS: Ordering points to identify the
-clustering structure,” in ACM Sigmod record , vol. 28, no. 2. ACM, 1999, pp.
+[AAB+16] M. Abadi, A. Agarwalet al., “Tensorflow: Large-scale machine learning on
+heterogeneous distributed systems,”arXiv preprint arXiv:1603.04467, Mar.
+2016. [Online]. Available: https://arxiv.org/abs/1603.04467
+[ABKS99] M. Ankerst, M. M. Breuniget al., “OPTICS: Ordering points to identify the
+clustering structure,” inACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp.
 49–60.
-[ADG+16]M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by
-gradient descent,” in Advances in Neural Information Processing Systems 29
-(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar.
-2016, pp. 3981–3989. [Online]. Available: http://papers .nips.cc/paper/6461learning-to-learn-by-gradient-descent-by-gradient-descent
- .pdf
+[ADG+16] M. Andrychowicz, M. Denilet al., “Learning to learn by gradient descent by
+gradient descent,” inAdvances in Neural Information Processing Systems 29
+(NIPS), D. D. Lee, M. Sugiyamaet al., Eds. Curran Associates, Inc., Mar.
+2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461learning-to-learn-by-gradient-descent-by-gradient-descent.pdf
+
 [AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism:
 Going deeper into neural networks,” Jun. 2015. [Online]. Available:
- https://research .googleblog.com/2015/06/inceptionism-going-deeperinto-neural.html
+ https://research.googleblog.com/2015/06/inceptionism-going-deeperinto-neural.html
 
 [Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https:
-//www.microsoft.com/en-us/download/details .aspx?id=54765
+//www.microsoft.com/en-us/download/details.aspx?id=54765
 [BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,”
-Journal of Machine Learning Research , vol. 13, no. Feb, pp. 281–305,
-Feb. 2012. [Online]. Available: http://jmlr .csail.mit.edu/papers/volume13/
-bergstra12a/bergstra12a .pdf
-[BCW+17]J. Bao, D. Chen et al., “CVAE-GAN: Fine-grained image generation through
-asymmetric training,” arXiv preprint arXiv:1703.10155 , Mar. 2017. [Online].
-Available: https://arxiv .org/abs/1703 .10155
-[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better image
+Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305,
+Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/
+bergstra12a/bergstra12a.pdf
+[BCW+17] J. Bao, D. Chenet al., “CVAE-GAN: Fine-grained image generation through
+asymmetric training,”arXiv preprint arXiv:1703.10155, Mar. 2017. [Online].
+Available: https://arxiv.org/abs/1703.10155
+[BDLB09] J. Bergstra, G. Desjardinset al., “Quadratic polynomials learn better image
  features,” Département d’Informatique et de Recherche Opérationnelle,
 Université de Montréal, Tech. Rep. 1337, 2009.
-[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using
-reinforcement learning,” arXiv preprint arXiv:1611.02167 , Nov. 2016. [Online].
-Available: https://arxiv .org/abs/1611 .02167
+[BGNR16] B. Baker, O. Guptaet al., “Designing neural network architectures using
+reinforcement learning,”arXiv preprint arXiv:1611.02167, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.02167
 
 [BM93] U. Bodenhausen and S. Manke, Automatically Structured Neural
-Networks For Handwritten Character And Word Recognition . London:
+Networks For Handwritten Character And Word Recognition. London:
 Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http:
 //dx.doi.org/10.1007/978-1-4471-2063-6_283
 [BMDP10] R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest
-neighbor,” in European Conference on Computer Vision (ECCV) . Springer,
+neighbor,” inEuropean Conference on Computer Vision (ECCV). Springer,
 2010, pp. 171–184.
 [BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of
 feature pooling in visual recognition,” in International Conference on
-Machine Learning (ICML) , no. 27, 2010, pp. 111–118. [Online]. Available:
-http://yann .lecun.com/exdb/publis/pdf/boureau-icml-10 .pdf
+Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available:
+http://yann.lecun.com/exdb/publis/pdf/boureau-icml-10.pdf
 [BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies
-with gradient descent is difficult,” IEEE transactions on neural networks ,
+with gradient descent is difficult,”IEEE transactions on neural networks,
 vol. 5, no. 2, pp. 157–166, 1994.
 [Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training
 of artificial neural networks,” IEEE Proceedings G-Circuits, Devices
-and Systems , vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available:
-http://ieeexplore .ieee.org/document/143326/
+and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available:
+http://ieeexplore.ieee.org/document/143326/
 [Cho15] F. Chollet, “Keras,” https://github .com/fchollet/keras, 2015.
 [CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks
-in unsupervised feature learning,” Ann Arbor , vol. 1001, no. 48109,
+in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109,
 p. 2, 2010. [Online]. Available: http://cs .stanford.edu/~acoates/papers/
-coatesleeng_aistats_2011 .pdf
+coatesleeng_aistats_2011.pdf
 [CLN11] A. Coates, H. Lee, and A. Y. Ng, “STL-10 dataset,” 2011. [Online]. Available:
 http://cs.stanford.edu/~acoates/stl10
 [CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural
-networks for image classification,” in Conference on Computer Vision and
-Pattern Recognition (CVPR) . IEEE, Feb. 2012, pp. 3642–3649. [Online].
-Available: https://arxiv .org/abs/1202 .2745v1
+networks for image classification,” inConference on Computer Vision and
+Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online].
+Available: https://arxiv.org/abs/1202.2745v1
 [CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate
 deep network learning by exponential linear units (ELUs),” arXiv
 preprint arXiv:1511.07289 , Nov. 2015. [Online]. Available: https:
-//arxiv.org/abs/1511 .07289
-[CWV+14]S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep
-learning,” arXiv preprint arXiv:1410.0759 , Oct. 2014. [Online]. Available:
-https://arxiv .org/abs/1410 .0759
+//arxiv.org/abs/1511.07289
+[CWV+14] S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep
+learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available:
+https://arxiv.org/abs/1410.0759
 
-[DBB+01]C. Dugas, Y. Bengio et al., “Incorporating second-order functional
+[DBB+01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional
 knowledge for better option pricing,” in Advances in Neural Information
- Processing Systems 13 (NIPS) , T. K. Leen, T. G. Dietterich,
+ Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich,
 and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online].
-Available: http://papers .nips.cc/paper/1920-incorporating-second-orderfunctional-knowledge-for-better-option-pricing
- .pdf
+Available: http://papers .nips.cc/paper/1920-incorporating-second-orderfunctional-knowledge-for-better-option-pricing.pdf
+
 [DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry
-in convolutional neural networks,” arXiv preprint arXiv:1602.02660 , Feb.
-2016. [Online]. Available: https://arxiv .org/abs/1602 .02660
+in convolutional neural networks,”arXiv preprint arXiv:1602.02660, Feb.
+2016. [Online]. Available: https://arxiv.org/abs/1602.02660
 [DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for
-online learning and stochastic optimization,” Journal of Machine Learning
-Research , vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available:
-http://www .jmlr.org/papers/volume12/duchi11a/duchi11a .pdf
+online learning and stochastic optimization,”Journal of Machine Learning
+Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available:
+http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
 [DHS16] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via
-multi-task network cascades,” in Conference on Computer Vision and Pattern
-Recognition (CVPR) . IEEE, 2016, pp. 3150–3158. [Online]. Available:
-https://arxiv .org/abs/1512 .04412
-[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural
-Computing Surveys , vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available:
-ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6 .pdf
+multi-task network cascades,” inConference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available:
+https://arxiv.org/abs/1512.04412
+[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,”Neural
+Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available:
+ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf
 [Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford
-University, Tech. Rep., 2015. [Online]. Available: http://cs229 .stanford.edu/
-proj2015/054_report .pdf
+University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/
+proj2015/054_report.pdf
 [DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised
-feature learning with convolutional neural networks,” in Advances in Neural
-Information Processing Systems 27 (NIPS) , Z. Ghahramani, M. Welling
+feature learning with convolutional neural networks,” inAdvances in Neural
+Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling
 et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online].
-Available: http://papers .nips.cc/paper/5548-discriminative-unsupervisedfeature-learning-with-convolutional-neural-networks
- .pdf
+Available: http://papers.nips.cc/paper/5548-discriminative-unsupervisedfeature-learning-with-convolutional-neural-networks.pdf
+
 [DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional
-neural networks for galaxy morphology prediction,” Monthly notices of the
-royal astronomical society , vol. 450, no. 2, pp. 1441–1459, 2015.
-[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that
+neural networks for galaxy morphology prediction,”Monthly notices of the
+royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015.
+[EDHS07] J. Elson, J. J. Douceur et al. , “Asirra: A CAPTCHA that
 exploits interest-aligned manual image categorization,” in ACM Conference
  on Computer and Communications Security (CCS) , no. 14.
 Association for Computing Machinery, Inc., Oct. 2007. [Online].
 
-Available: https://www .microsoft.com/en-us/research/publication/asirra-acaptcha-that-exploits-interest-aligned-manual-image-categorization/
+Available: https://www.microsoft.com/en-us/research/publication/asirra-acaptcha-that-exploits-interest-aligned-manual-image-categorization/
 
-[EKS+96]M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering
-clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996,
+[EKS+96] M. Ester, H.-P. Kriegelet al., “A density-based algorithm for discovering
+clusters in large spatial databases with noise.” inKdd, vol. 96, no. 34, 1996,
 pp. 226–231.
-[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing .
-Springer, 2003, vol. 53. [Online]. Available: https://dx .doi.org/10.1007/978-3662-44874-8
+[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing.
+Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3662-44874-8
 
 [Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation
 networks,” 1988. [Online]. Available: http://repository .cmu.edu/cgi/
-viewcontent .cgi?article=2799&context=compsci
+viewcontent.cgi?article=2799&context=compsci
 [FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of object
-categories,” IEEE transactions on pattern analysis and machine intelligence ,
+categories,”IEEE transactions on pattern analysis and machine intelligence,
 vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http:
-//vision.stanford.edu/documents/Fei-FeiFergusPerona2006 .pdf
+//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf
 [FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http:
-//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101 .html
-[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Object detection with discriminatively
- trained part-based models,” IEEE transactions on pattern analysis and
-machine intelligence , vol. 32, no. 9, pp. 1627–1645, 2010.
+//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html
+[FGMR10] P. F. Felzenszwalb, R. B. Girshicket al., “Object detection with discriminatively
+ trained part-based models,”IEEE transactions on pattern analysis and
+machine intelligence, vol. 32, no. 9, pp. 1627–1645, 2010.
 [FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,”
-1989. [Online]. Available: http://repository .cmu.edu/compsci/1938/
+1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/
 [GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep
-feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online].
-Available: http://jmlr .org/proceedings/papers/v9/glorot10a/glorot10a .pdf
+feedforward neural networks.” inAistats, vol. 9, 2010, pp. 249–256. [Online].
+Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
 [GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural
-networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available:
-http://www .jmlr.org/proceedings/papers/v15/glorot11a/glorot11a .pdf
-[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate object
-detection and semantic segmentation,” in Conference on Computer Vision
-and Pattern Recognition (CVPR) . IEEE, 2014, pp. 580–587. [Online].
-Available: https://arxiv .org/abs/1311 .2524
+networks.” inAistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available:
+http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf
+[GDDM14] R. Girshick, J. Donahueet al., “Rich feature hierarchies for accurate object
+detection and semantic segmentation,” inConference on Computer Vision
+and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online].
+Available: https://arxiv.org/abs/1311.2524
 [GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 object category dataset,” Apr.
-2007. [Online]. Available: http://authors .library.caltech.edu/7694/
+2007. [Online]. Available: http://authors.library.caltech.edu/7694/
 
 [GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with
-Bernoulli approximate variational inference,” arXivpreprintarXiv:1506.02158 ,
-Jan. 2016. [Online]. Available: https://arxiv .org/abs/1506 .02158v6
-[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability . wh freeman
+Bernoulli approximate variational inference,”arXiv preprint arXiv:1506.02158,
+Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6
+[GJ02] M. R. Garey and D. S. Johnson,Computers and intractability. wh freeman
 New York, 2002, vol. 29.
-[GJS76] M.R.Garey, D.S.Johnson, andL.Stockmeyer, “SomesimplifiedNP-complete
-graph problems,” Theoretical computer science , vol. 1, no. 3, pp. 237–267,
+[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete
+graph problems,”Theoretical computer science, vol. 1, no. 3, pp. 237–267,
 1976.
-[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM
-conference on Computer and communications security (CCS) , no. 15. ACM,
+[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” inACM
+conference on Computer and communications security (CCS), no. 15. ACM,
 2008, pp. 535–542.
-[Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071 , May
-2015. [Online]. Available: https://arxiv .org/abs/1412 .6071
+[Gra15] B. Graham, “Fractional max-pooling,”arXiv preprint arXiv:1412.6071, May
+2015. [Online]. Available: https://arxiv.org/abs/1412.6071
 [Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available:
-http://www .vision.caltech.edu/Image_Datasets/Caltech256/
-[GWFM+13]I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML,
+http://www.vision.caltech.edu/Image_Datasets/Caltech256/
+[GWFM+13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML,
 vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http:
-//www.jmlr.org/proceedings/papers/v28/goodfellow13 .pdf
+//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
 [HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for
-transfer learning?” arXiv preprint arXiv:1608.08614 , Aug. 2016. [Online].
-Available: https://arxiv .org/abs/1608 .08614
-[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online].
-Available: http://papers .nips.cc/paper/227-meiosis-networks .pdf
+transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online].
+Available: https://arxiv.org/abs/1608.08614
+[Han89] S. J. Hanson, “Meiosis networks.” inNIPS, 1989, pp. 533–541. [Online].
+Available: http://papers.nips.cc/paper/227-meiosis-networks.pdf
 [Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available:
-https://devblogs .nvidia.com/parallelforall/new-features-cuda-7-5/
+https://devblogs.nvidia.com/parallelforall/new-features-cuda-7-5/
 [HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional
-networks,” arXiv preprint arXiv:1608.06993 , Aug. 2016. [Online]. Available:
-https://arxiv .org/abs/1608 .06993v1
+networks,”arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available:
+https://arxiv.org/abs/1608.06993v1
 [HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv
 preprint arXiv:1611.04231 , Nov. 2016. [Online]. Available: https:
-//arxiv.org/abs/1611 .04231
+//arxiv.org/abs/1611.04231
 [How13] A. G. Howard, “Some improvements on deep convolutional neural network
-based image classification,” arXiv preprint arXiv:1312.5402 , Dec. 2013.
-[Online]. Available: https://arxiv .org/abs/1312 .5402
+based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013.
+[Online]. Available: https://arxiv.org/abs/1312.5402
 
-[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques .
+[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques.
 Elsevier, 2011.
-[HPN+16]S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with
-dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381 , Jul. 2016.
-[Online]. Available: https://arxiv .org/abs/1607 .04381
-[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient
-neural network,” in Advances in Neural Information Processing Systems 28
-(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun.
-2015, pp. 1135–1143. [Online]. Available: http://papers .nips.cc/paper/5784learning-both-weights-and-connections-for-efficient-neural-network
+[HPN+16] S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with
+dense-sparse-dense training flow,”arXiv preprint arXiv:1607.04381, Jul. 2016.
+[Online]. Available: https://arxiv.org/abs/1607.04381
+[HPTD15] S. Han, J. Poolet al., “Learning both weights and connections for efficient
+neural network,” inAdvances in Neural Information Processing Systems 28
+(NIPS), C. Cortes, N. D. Lawrenceet al., Eds. Curran Associates, Inc., Jun.
+2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784learning-both-weights-and-connections-for-efficient-neural-network
  .pdf
-[HSK+12]G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing
-co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580 , Jul.
-2012. [Online]. Available: https://arxiv .org/abs/1207 .0580
-[HSL+16]G. Huang, Y. Sun et al., “Deep networks with stochastic depth,”
-arXiv preprint arXiv:1603.09382 , Mar. 2016. [Online]. Available: https:
-//arxiv.org/abs/1603 .09382
+[HSK+12] G. E. Hinton, N. Srivastavaet al., “Improving neural networks by preventing
+co-adaptation of feature detectors,”arXiv preprint arXiv:1207.0580, Jul.
+2012. [Online]. Available: https://arxiv.org/abs/1207.0580
+[HSL+16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,”
+arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https:
+//arxiv.org/abs/1603.09382
 [HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon
 and general network pruning,” in International Conference on Neural
-Networks . IEEE, 1993, pp. 293–299. [Online]. Available: http:
-//ee.caltech.edu/Babak/pubs/conferences/00298572 .pdf
+Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http:
+//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf
 [HVD15] G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural
-network,” arXiv preprint arXiv:1503.02531 , Mar. 2015. [Online]. Available:
-https://arxiv .org/abs/1503 .02531
+network,”arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available:
+https://arxiv.org/abs/1503.02531
 [HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional
 networks for visual recognition,” in European Conference on Computer
 Vision (ECCV) . Springer, 2014, pp. 346–361. [Online]. Available:
-https://arxiv .org/abs/1406 .4729
+https://arxiv.org/abs/1406.4729
 [HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,”
-arXiv preprint arXiv:1512.03385 , Dec. 2015. [Online]. Available: https:
-//arxiv.org/abs/1512 .03385v1
-[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level
+arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https:
+//arxiv.org/abs/1512.03385v1
+[HZRS15b] K. He, X. Zhanget al., “Delving deep into rectifiers: Surpassing human-level
 performance on imagenet classification,” in International Conference on
-Computer Vision (ICCV) , Feb. 2015, pp. 1026–1034. [Online]. Available:
-https://arxiv .org/abs/1502 .01852
+Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available:
+https://arxiv.org/abs/1502.01852
 [Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),”
 
-2012. [Online]. Available: http://www .image-net.org/challenges/LSVRC/
+2012. [Online]. Available: http://www.image-net.org/challenges/LSVRC/
 2012/nonpub-downloads
 [IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network
-training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167 ,
-Feb. 2015. [Online]. Available: https://arxiv .org/abs/1502 .03167
-[JXF+16]X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation
-units,” in Thirtieth AAAI Conference on Artificial Intelligence , Dec. 2016.
-[Online]. Available: https://arxiv .org/abs/1512 .07030
+training by reducing internal covariate shift,”arXiv preprint arXiv:1502.03167,
+Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167
+[JXF+16] X. Jin, C. Xuet al., “Deep learning with s-shaped rectified linear activation
+units,” inThirtieth AAAI Conference on Artificial Intelligence, Dec. 2016.
+[Online]. Available: https://arxiv.org/abs/1512.07030
 [Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr.
-2011. [Online]. Available: http://karpathy .github.io/2011/04/27/manuallyclassifying-cifar10/
+2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manuallyclassifying-cifar10/
 
 [KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,”
-arXiv preprint arXiv:1412.6980 , Dec. 2014. [Online]. Available: https:
-//arxiv.org/abs/1412 .6980
+arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https:
+//arxiv.org/abs/1412.6980
 [KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny
-images,” Apr. 2009. [Online]. Available: https://www .cs.toronto.edu/~kriz/
-learning-features-2009-TR .pdf
-[KMN+16]N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning:
-Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836 ,
-Sep. 2016. [Online]. Available: https://arxiv .org/abs/1609 .04836
+images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/
+learning-features-2009-TR.pdf
+[KMN+16] N. S. Keskar, D. Mudigereet al., “On large-batch training for deep learning:
+Generalization gap and sharp minima,”arXiv preprint arXiv:1609.04836,
+Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836
 [Koc15] T. Kocmánek, “HyperNEAT and novelty search for image recognition,” Ph.D.
 dissertation, Master’s thesis, Czech Technical University in Prague, 2015.
-[Online]. Available: http://kocmi .tk/photos/DiplomaThesis .pdf
-[KPY+15]Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks
-for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530 ,
-Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511 .06530
-[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to
-cluster analysis . John Wiley & Sons, 2009, vol. 344.
+[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf
+[KPY+15] Y.-D. Kim, E. Parket al., “Compression of deep convolutional neural networks
+for fast and low power mobile applications,”arXiv preprint arXiv:1511.06530,
+Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530
+[KR09] L. Kaufman and P. J. Rousseeuw,Finding groups in data: an introduction to
+cluster analysis. John Wiley & Sons, 2009, vol. 344.
 [Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https:
-//www.cs.toronto.edu/~kriz/cifar .html
+//www.cs.toronto.edu/~kriz/cifar.html
 [KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear
-and neural network approximation,” IEEE Transactions on Information
+and neural network approximation,”IEEE Transactions on Information
 Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available:
-http://ieeexplore .ieee.org/abstract/document/971754/
+http://ieeexplore.ieee.org/abstract/document/971754/
 
 [KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification
 with deep convolutional neural networks,” in Advances in Neural
-Information Processing Systems 25 (NIPS) , F. Pereira, C. J. C. Burges
+Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges
 et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online].
-Available: http://papers .nips.cc/paper/4824-imagenet-classification-withdeep-convolutional-neural-networks
- .pdf
-[KSlB+10]K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature
+Available: http://papers.nips.cc/paper/4824-imagenet-classification-withdeep-convolutional-neural-networks.pdf
+
+[KSlB+10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature
 hierarchies for visual recognition,” in Advances in Neural Information
 Processing Systems 23 (NIPS) , J. D. Lafferty, C. K. I. Williams
 et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online].
-Available: http://papers .nips.cc/paper/4133-learning-convolutional-featurehierarchies-for-visual-recognition
- .pdf
-[LAE+16]W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in
-European Conference on Computer Vision (ECCV) . Springer, 2016, pp.
-21–37. [Online]. Available: https://arxiv .org/abs/1512 .02325
-[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne .readthedocs .io/
-en/latest/modules/layers/noise .html#lasagne .layers.DropoutLayer
-[LBBH98] Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document
-recognition,” Proceedings of the IEEE , vol. 86, no. 11, pp. 2278–2324, Nov.
-1998. [Online]. Available: http://yann .lecun.com/exdb/publis/pdf/lecun01a.pdf
+Available: http://papers.nips.cc/paper/4133-learning-convolutional-featurehierarchies-for-visual-recognition.pdf
+
+[LAE+16] W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in
+European Conference on Computer Vision (ECCV). Springer, 2016, pp.
+21–37. [Online]. Available: https://arxiv.org/abs/1512.02325
+[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/
+en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer
+[LBBH98] Y. LeCun, L. Bottouet al., “Gradient-based learning applied to document
+recognition,”Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov.
+1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun01a.pdf
 
 [LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature,
 vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available:
-http://www .nature.com/nature/journal/v521/n7553/abs/nature14539 .html
-[LBOM98] Y. A. LeCun, L. Bottou et al.,Efficient BackProp , ser. Lecture Notes in
+http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html
+[LBOM98] Y. A. LeCun, L. Bottouet al., Efficient BackProp, ser. Lecture Notes in
 Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol.
-1524, pp. 9–50. [Online]. Available: http://dx .doi.org/10.1007/3-540-49430-8
-[LDS+89]Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989,
-pp. 598–605. [Online]. Available: http://yann .lecun.com/exdb/publis/pdf/
+1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8
+[LDS+89] Y. LeCun, J. S. Denkeret al., “Optimal brain damage.” inNIPs, vol. 2, 1989,
+pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/
 lecun-90b.pdf
 [Le13] Q. V. Le, “Building high-level features using large scale unsupervised
 learning,” in International conference on acoustics, speech and signal
-processing . IEEE, 2013, pp. 8595–8598. [Online]. Available: http:
-//ieeexplore .ieee.org/stamp/stamp .jsp?arnumber=6639343
+processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http:
+//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343
 [LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in
 
-Conference on Computer Vision and Pattern Recognition (CVPR) . IEEE, Sep.
-2016, pp. 4013–4021. [Online]. Available: https://arxiv .org/abs/1509 .09308
+Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep.
+2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308
 [LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in
-convolutional neural networks: Mixed, gated, and tree,” in International
-Conference on Artificial Intelligence and Statistics , 2016. [Online]. Available:
-https://arxiv .org/abs/1509 .08985v2
+convolutional neural networks: Mixed, gated, and tree,” inInternational
+Conference on Artificial Intelligence and Statistics, 2016. [Online]. Available:
+https://arxiv.org/abs/1509.08985v2
 [LH16] I. Loshchilov and F. Hutter, “SGDR: stochastic gradient descent
-with warm restarts,” Learning , Aug. 2016. [Online]. Available: https:
-//arxiv.org/abs/1608 .03983
-[LJD+16]L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to
-hyperparameter optimization,” arXiv preprint arXiv:1603.06560 , Mar. 2016.
-[Online]. Available: https://arxiv .org/abs/1603 .06560
-[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885 ,
-Jun. 2016. [Online]. Available: https://arxiv .org/abs/1606 .01885
+with warm restarts,” Learning, Aug. 2016. [Online]. Available: https:
+//arxiv.org/abs/1608.03983
+[LJD+16] L. Li, K. Jamiesonet al., “Hyperband: A novel bandit-based approach to
+hyperparameter optimization,”arXiv preprint arXiv:1603.06560, Mar. 2016.
+[Online]. Available: https://arxiv.org/abs/1603.06560
+[LM16] K. Li and J. Malik, “Learning to optimize,”arXiv preprint arXiv:1606.01885,
+Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885
 [LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for
-semantic segmentation,” in Conference on Computer Vision and Pattern
-Recognition (CVPR) . IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available:
-https://arxiv .org/abs/1411 .4038v2
-[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513 , Mar.
-2017. [Online]. Available: https://arxiv .org/abs/1703 .01513
+semantic segmentation,” inConference on Computer Vision and Pattern
+Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available:
+https://arxiv.org/abs/1411.4038v2
+[LX17] A. Y. Lingxi Xie, “Genetic CNN,”arXiv preprint arXiv:1703.01513, Mar.
+2017. [Online]. Available: https://arxiv.org/abs/1703.01513
 [Maj17] S. Majumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available:
-https://github .com/titu1994/DenseNet
+https://github.com/titu1994/DenseNet
 [Mar08] M. Marszałek, “INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online].
-Available: http://lear .inrialpes.fr/people/marszalek/data/ig02/
+Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/
 [MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter
-optimization through reversible learning,” in International Conference on
-Machine Learning (ICML) , 2015, pp. 2113–2122.
-[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of
-Machine Learning Research , vol. 9, no. Nov, pp. 2579–2605, 2008.
+optimization through reversible learning,” inInternational Conference on
+Machine Learning (ICML), 2015, pp. 2113–2122.
+[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,”Journal of
+Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008.
 [MHN13] A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities
-improve neural network acoustic models,” in Proc. ICML , vol. 30,
-no. 1, 2013. [Online]. Available: https://web .stanford.edu/~awni/papers/
-relu_hybrid_icml2013_final .pdf
+improve neural network acoustic models,” in Proc. ICML, vol. 30,
+no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/
+relu_hybrid_icml2013_final.pdf
 [MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv
 
 preprint arXiv:1511.06422 , Nov. 2015. [Online]. Available: https:
-//arxiv.org/abs/1511 .06422
+//arxiv.org/abs/1511.06422
 [MP43] W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in
-nervous activity,” The bulletin of mathematical biophysics , vol. 5, no. 4, pp.
+nervous activity,”The bulletin of mathematical biophysics, vol. 5, no. 4, pp.
 115–133, 1943.
 [MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for
-reducing dataset bias in person re-identification,” in International Conference
-on Advanced Video and Signal Based Surveillance (AVSS) , no. 12, Aug. 2015,
-pp. 1–6. [Online]. Available: http://ieeexplore .ieee.org/abstract/document/
+reducing dataset bias in person re-identification,” inInternational Conference
+on Advanced Video and Signal Based Surveillance (AVSS), no. 12, Aug. 2015,
+pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/
 7301739/
 [MS07] M. Marszalek and C. Schmid, “Accurate object localization with
 shape masks,” in Conference on Computer Vision and Pattern
-Recognition (CVPR) . IEEE, 2007, pp. 1–8. [Online]. Available: http:
-//ieeexplore .ieee.org/document/4270110/
+Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http:
+//ieeexplore.ieee.org/document/4270110/
 [MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN
-advances on the ImageNet,” arXiv preprint arXiv:1606.02228 , Jun. 2016.
-[Online]. Available: https://arxiv .org/abs/1606 .02228
+advances on the ImageNet,”arXiv preprint arXiv:1606.02228, Jun. 2016.
+[Online]. Available: https://arxiv.org/abs/1606.02228
 [MV16] A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural
-networks using natural pre-images,” InternationalJournal of Computer Vision ,
-pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv .org/abs/1512 .02017
-[NDRT13] N. Natarajan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances
-in Neural Information Processing Systems 26 (NIPS) , C. J. C. Burges,
-L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online].
-Available: http://papers .nips.cc/paper/5073-learning-with-noisy-labels .pdf
+networks using natural pre-images,”International Journal of Computer Vision,
+pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017
+[NDRT13] N. Natarajan, I. S. Dhillonet al., “Learning with noisy labels,” inAdvances
+in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges,
+L. Bottouet al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online].
+Available: http://papers.nips.cc/paper/5073-learning-with-noisy-labels.pdf
 [Nes83] Y. Nesterov, “A method of solving a convex programming problem with
-convergence rate o (1/k2),” in Soviet Mathematics Doklady , vol. 27, no. 2,
+convergence rate o (1/k2),” inSoviet Mathematics Doklady, vol. 27, no. 2,
 1983, pp. 372–376.
 [new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available:
-http://www1 .icsi.berkeley.edu/Speech/faq/nn-train .html
+http://www1.icsi.berkeley.edu/Speech/faq/nn-train.html
 [Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS
 Talk, Dec. 2016.
 [NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft
-weight-sharing,” Neural computation , vol. 4, no. 4, pp. 473–493, 1992.
-[Online]. Available: https://www .cs.toronto.edu/~hinton/absps/sunspots .pdf
+weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992.
+[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf
 [NH02] R. T. Ng and J. Han, “CLARANS: A method for clustering objects for spatial
 
-data mining,” IEEE transactions on knowledge and data engineering , vol. 14,
+data mining,”IEEE transactions on knowledge and data engineering, vol. 14,
 no. 5, pp. 1003–1016, 2002.
-[NWC+11a]Y. Netzer, T. Wang et al., “Reading digits in natural images with
+[NWC+11a] Y. Netzer, T. Wang et al., “Reading digits in natural images with
 unsupervised feature learning,” in NIPS workshop on deep learning and
-unsupervised feature learning , vol. 2011, no. 2, 2011, p. 5. [Online]. Available:
-http://ufldl .stanford.edu/housenumbers/nips2011_housenumbers .pdf
-[NWC+11b]Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,”
-2011. [Online]. Available: http://ufldl .stanford.edu/housenumbers/
+unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available:
+http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf
+[NWC+11b] Y. Netzer, T. Wanget al., “The street view house numbers (SVHN) dataset,”
+2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/
 [NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization:
 Uncovering the different types of features learned by each neuron in deep
-neural networks,” arXiv preprint arXiv:1602.03616 , May 2016. [Online].
-Available: https://arxiv .org/abs/1602 .03616
+neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online].
+Available: https://arxiv.org/abs/1602.03616
 [OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive
 classifiers for unbalanced classification problems: A study on the performance
-scores,”arXiv preprint arXiv:1608.08984 , Aug. 2016. [Online]. Available:
-https://arxiv .org/abs/1608 .08984
-[PMW+15]N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial
-perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508 ,
-Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511 .04508
+scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available:
+https://arxiv.org/abs/1608.08984
+[PMW+15] N. Papernot, P. McDanielet al., “Distillation as a defense to adversarial
+perturbations against deep neural networks,”arXiv preprint arXiv:1511.04508,
+Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508
 [Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer
-Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx .doi.org/
+Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/
 10.1007/3-540-49430-8_3
-[RDS+14]O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition
-challenge,” arXiv preprint arXiv:1409.0575 , vol. 115, no. 3, pp. 211–252, Sep.
-2014. [Online]. Available: https://arxiv .org/abs/1409 .0575
+[RDS+14] O. Russakovsky, J. Denget al., “Imagenet large scale visual recognition
+challenge,”arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep.
+2014. [Online]. Available: https://arxiv.org/abs/1409.0575
 [RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks
-for biomedical image segmentation,” in International Conference on Medical
-Image Computing and Computer-Assisted Intervention . Springer, 2015, pp.
-234–241. [Online]. Available: https://arxiv .org/abs/1505 .04597
+for biomedical image segmentation,” inInternational Conference on Medical
+Image Computing and Computer-Assisted Intervention. Springer, 2015, pp.
+234–241. [Online]. Available: https://arxiv.org/abs/1505.04597
 [RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and density
- of neurons in the hyperneat substrate,” in Conference on Genetic and
-evolutionary computation , no. 12. ACM, 2010, pp. 563–570.
+ of neurons in the hyperneat substrate,” inConference on Genetic and
+evolutionary computation, no. 12. ACM, 2010, pp. 563–570.
 [RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?":
-Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938 ,
-Feb. 2016. [Online]. Available: https://arxiv .org/abs/1602 .04938
+Explaining the predictions of any classifier,”arXiv preprint arXiv:1602.04938,
+Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938
 
 [Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,”
-arXiv preprint arXiv:1609.04747 , Sep. 2016. [Online]. Available: https:
-//arxiv.org/abs/1609 .04747
+arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https:
+//arxiv.org/abs/1609.04747
 [SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks
-applied to house numbers digit classification,” in International Conference
-on Pattern Recognition (ICPR) , no. 21. IEEE, Apr. 2012, pp. 3288–3291.
-[Online]. Available: https://arxiv .org/abs/1204 .3968
+applied to house numbers digit classification,” inInternational Conference
+on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291.
+[Online]. Available: https://arxiv.org/abs/1204.3968
 [SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding
-for evolving large-scale neural networks,” Artificial life , vol. 15, no. 2, pp. 185–
-212, 2009. [Online]. Available: http://ieeexplore .ieee.org/document/6792316/
-[SEZ+13]P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization
-and detection using convolutional networks,” arXiv preprint arXiv:1312.6229 ,
-Feb. 2013. [Online]. Available: https://arxiv .org/abs/1312 .6229v4
-[SHK+14]N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to
-prevent neural networks from overfitting.” Journal of Machine Learning
-Research , vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available:
-https://www .cs.toronto.edu/~hinton/absps/JMLRdropout .pdf
-[SHY+13]A. Senior, G. Heigold et al., “An empirical study of learning rates in deep
+for evolving large-scale neural networks,”Artificial life, vol. 15, no. 2, pp. 185–
+212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/
+[SEZ+13] P. Sermanet, D. Eigenet al., “Overfeat: Integrated recognition, localization
+and detection using convolutional networks,”arXiv preprint arXiv:1312.6229,
+Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4
+[SHK+14] N. Srivastava, G. E. Hinton et al. , “Dropout: a simple way to
+prevent neural networks from overfitting.”Journal of Machine Learning
+Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available:
+https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
+[SHY+13] A. Senior, G. Heigoldet al., “An empirical study of learning rates in deep
 neural networks for speech recognition,” in International Conference on
-Acoustics, Speech and Signal Processing . IEEE, 2013, pp. 6724–6728. [Online].
-Available: http://ieeexplore .ieee.org/document/6638963/?arnumber=6638963
-[SIV16] C.Szegedy, S.Ioffe, andV.Vanhoucke, “Inception-v4, inception-resnetandthe
-impact of residual connections on learning,” arXiv preprint arXiv:1602.07261 ,
-Feb. 2016. [Online]. Available: https://arxiv .org/abs/1602 .07261
+Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online].
+Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963
+[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the
+impact of residual connections on learning,”arXiv preprint arXiv:1602.07261,
+Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261
 [SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding
 for face recognition and clustering,” in Conference on Computer Vision
-and Pattern Recognition (CVPR) . IEEE, Mar. 2015, pp. 815–823. [Online].
-Available: https://arxiv .org/abs/1503 .03832
+and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online].
+Available: https://arxiv.org/abs/1503.03832
 [SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale
 convolutional networks,” in International Joint Conference on Neural
 Networks (IJCNN) , Jul. 2011, pp. 2809–2813. [Online]. Available:
-http://ieeexplore .ieee.org/document/6033589/
-[SLJ+15]C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference
-on Computer Vision and Pattern Recognition (CVPR) . IEEE, Sep. 2015, pp.
-1–9. [Online]. Available: https://arxiv .org/abs/1409 .4842
+http://ieeexplore.ieee.org/document/6033589/
+[SLJ+15] C. Szegedy, W. Liuet al., “Going deeper with convolutions,” inConference
+on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp.
+1–9. [Online]. Available: https://arxiv.org/abs/1409.4842
 [SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through
 
-augmenting topologies,” Evolutionary computation , vol. 10, no. 2, pp. 99–127,
-2002. [Online]. Available: http://www .mitpressjournals .org/doi/abs/10 .1162/
+augmenting topologies,”Evolutionary computation, vol. 10, no. 2, pp. 99–127,
+2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/
 106365602320169811
 [SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to
 the nonlinear dynamics of learning in deep linear neural networks,”
-arXiv preprint arXiv:1312.6120 , Dec. 2013. [Online]. Available: https:
-//arxiv.org/abs/1312 .6120
+arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https:
+//arxiv.org/abs/1312.6120
 [SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive
-networks,” arXiv preprint arXiv:1410.1165 , Oct. 2014. [Online]. Available:
-https://arxiv .org/abs/1410 .1165
+networks,”arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available:
+https://arxiv.org/abs/1410.1165
 [SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition
-benchmark.” [Online]. Available: http://benchmark .ini.rub.de/?section=
+benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section=
 gtsrb&subsection=news
 [SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking
-machine learning algorithms for traffic sign recognition,” Neural Networks ,
-no. 0, pp. –, 2012. [Online]. Available: http://www .sciencedirect .com/science/
+machine learning algorithms for traffic sign recognition,”Neural Networks,
+no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/
 article/pii/S0893608012000457
-[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint
-arXiv:1606.02492 , 2016.[Online].Available: https://arxiv .org/abs/1606 .02492
-[SVI+15]C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture
-for computer vision,” arXiv preprint arXiv:1512.00567 , Dec. 2015. [Online].
-Available: https://arxiv .org/abs/1512 .00567v3
+[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,”arXiv preprint
+arXiv:1606.02492, 2016.[Online].Available: https://arxiv.org/abs/1606.02492
+[SVI+15] C. Szegedy, V. Vanhouckeet al., “Rethinking the inception architecture
+for computer vision,”arXiv preprint arXiv:1512.00567, Dec. 2015. [Online].
+Available: https://arxiv.org/abs/1512.00567v3
 [SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional
 networks: Visualising image classification models and saliency maps,”
-arXiv preprint arXiv:1312.6034 , Dec. 2013. [Online]. Available: https:
-//arxiv.org/abs/1312 .6034
+arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https:
+//arxiv.org/abs/1312.6034
 [SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for
-large-scale image recognition,” arXiv preprint arXiv:1409.1556 , Sep. 2014.
-[Online]. Available: https://arxiv .org/abs/1409 .1556
-[SZS+13]C. Szegedy, W. Zaremba et al., “Intriguing properties of neural
-networks,” arXiv preprint arXiv:1312.6199 , Dec. 2013. [Online]. Available:
-https://arxiv .org/abs/1312 .6199v4
+large-scale image recognition,”arXiv preprint arXiv:1409.1556, Sep. 2014.
+[Online]. Available: https://arxiv.org/abs/1409.1556
+[SZS+13] C. Szegedy, W. Zaremba et al. , “Intriguing properties of neural
+networks,”arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available:
+https://arxiv.org/abs/1312.6199v4
 [TF-16a] “MNIST for ML beginners,” Dec. 2016. [Online]. Available: https:
 //www.tensorflow.org/tutorials/mnist/beginners/
 
-[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www .tensorflow.org/
+[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/
 api_docs/python/nn/activation_functions_#dropout
 [TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient
 by a running average of its recent magnitude,” COURSERA: Neural
-Networks for Machine Learning , vol. 4, no. 2, 2012. [Online]. Available:
-http://www .cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6 .pdf
+Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available:
+http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
 [Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,”
 Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martinthoma.com/write-math
 
 [Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available:
-https://martin-thoma .com/twiddle/
+https://martin-thoma.com/twiddle/
 [Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint
-arXiv:1602.06541 , Feb. 2016. [Online]. Available: https://arxiv .org/abs/
+arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/
 1602.06541
-[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380 , Jan.
-2017. [Online]. Available: https://arxiv .org/abs/1701 .08380
+[Tho17a] M. Thoma, “The HASYv2 dataset,”arXiv preprint arXiv:1701.08380, Jan.
+2017. [Online]. Available: https://arxiv.org/abs/1701.08380
 [Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available:
-https://martin-thoma .com/msthesis
+https://martin-thoma.com/msthesis
 [VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep
-learning,” arXiv preprint arXiv:1312.5355 , Dec. 2013. [Online]. Available:
-https://arxiv .org/abs/1312 .5355
-[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing .
+learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available:
+https://arxiv.org/abs/1312.5355
+[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing.
 Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available:
 http://dx.doi.org/10.1007/978-94-015-7744-1_2
-[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent
-networks with long term dependencies,” arXiv preprint arXiv:1702.00071 ,
-Jan. 2017. [Online]. Available: https://arxiv .org/abs/1702 .00071
-[WHH+89]A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay
+[VTKP17] E. Vorontsov, C. Trabelsiet al., “On orthogonality and learning recurrent
+networks with long term dependencies,”arXiv preprint arXiv:1702.00071,
+Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071
+[WHH+89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay
 neural networks,” IEEE transactions on acoustics, speech, and signal
-processing , vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available:
-http://ieeexplore .ieee.org/document/21701/
+processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available:
+http://ieeexplore.ieee.org/document/21701/
 [Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connectionist
- reinforcement learning,” Machine learning , vol. 8, no. 3-4, pp. 229–256,
+ reinforcement learning,”Machine learning, vol. 8, no. 3-4, pp. 229–256,
 1992.
 
-[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling
-and Normalization Methods for Action Recognition . Berlin, Heidelberg:
+[WWQ13] X. Wang, L. Wang, and Y. Qiao,A Comparative Study of Encoding, Pooling
+and Normalization Methods for Action Recognition. Berlin, Heidelberg:
 Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online].
-Available: http://dx .doi.org/10.1007/978-3-642-37431-9_44
-[WYS+15]R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv
-preprint arXiv:1501.02876 , vol. 7, no. 8, Jul. 2015. [Online]. Available:
-https://arxiv .org/abs/1501 .02876v4
-[WZZ+13]L.Wan, M.Zeiler etal., “Regularizationofneuralnetworksusingdropconnect,”
-inInternational Conference on Machine Learning (ICML) , no. 30, 2013,
-pp. 1058–1066. [Online]. Available: http://www .matthewzeiler .com/pubs/
-icml2013/icml2013 .pdf
-[XGD+16]S. Xie, R. Girshick et al., “Aggregated residual transformations for deep
-neural networks,” arXiv preprint arXiv:1611.05431 , Nov. 2016. [Online].
-Available: https://arxiv .org/abs/1611 .05431v1
+Available: http://dx.doi.org/10.1007/978-3-642-37431-9_44
+[WYS+15] R. Wu, S. Yanet al., “Deep image: Scaling up image recognition,”arXiv
+preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available:
+https://arxiv.org/abs/1501.02876v4
+[WZZ+13] L. Wan, M. Zeileretal., “Regularization of neural networks using dropconnect,”
+in International Conference on Machine Learning (ICML), no. 30, 2013,
+pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/
+icml2013/icml2013.pdf
+[XGD+16] S. Xie, R. Girshicket al., “Aggregated residual transformations for deep
+neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.05431v1
 [Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged
-stochastic gradient descent,” arXiv preprint arXiv:1107.2490 , Jul. 2011.
-[Online]. Available: https://arxiv .org/abs/1107 .2490
+stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011.
+[Online]. Available: https://arxiv.org/abs/1107.2490
 [XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in
-convolutional network,” arXiv preprint arXiv:1505.00853 , May 2015. [Online].
-Available: https://arxiv .org/abs/1505 .00853
+convolutional network,”arXiv preprint arXiv:1505.00853, May 2015. [Online].
+Available: https://arxiv.org/abs/1505.00853
 [XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on
-support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available:
-https://www .sec.in.tum.de/assets/Uploads/ecai2 .pdf
-[XZY+14]T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional
- neural network for large-scale image classification,” in International
-Conference on Multimedia , no. 22. ACM, 2014, pp. 177–186.
+support vector machines.” inECAI, 2012, pp. 870–875. [Online]. Available:
+https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf
+[XZY+14] T. Xiao, J. Zhanget al., “Error-driven incremental learning in deep convolutional
+ neural network for large-scale image classification,” inInternational
+Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186.
 [YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten
-digits,” 1998. [Online]. Available: http://yann .lecun.com/exdb/mnist/
-[ZBH+16]C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking
-generalization,” arXiv preprint arXiv:1611.03530 , Nov. 2016. [Online].
-Available: https://arxiv .org/abs/1611 .03530
+digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/
+[ZBH+16] C. Zhang, S. Bengioet al., “Understanding deep learning requires rethinking
+generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online].
+Available: https://arxiv.org/abs/1611.03530
 [ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in
-Advances in Neural Information Processing Systems 29 (NIPS) , D. D. Lee,
-M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090.
-[Online]. Available: http://papers .nips.cc/paper/6340-doubly-convolutionalneural-networks
- .pdf
+Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee,
+M. Sugiyamaet al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090.
+[Online]. Available: http://papers.nips.cc/paper/6340-doubly-convolutionalneural-networks.pdf
+
 
-[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category
-detection,” in European Conference on Computer Vision (ECCV) . Springer,
-Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv .org/abs/1407 .3867
-[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint
-arXiv:1212.5701 , Dec. 2012. [Online]. Available: https://arxiv .org/abs/
+[ZDGD14] N. Zhang, J. Donahueet al., “Part-based R-CNNs for fine-grained category
+detection,” inEuropean Conference on Computer Vision (ECCV). Springer,
+Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867
+[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,”arXiv preprint
+arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv .org/abs/
 1212.5701v1
 [ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep
-convolutional neural networks,” arXiv preprint arXiv:1301.3557 , Jan. 2013.
-[Online]. Available: https://arxiv .org/abs/1301 .3557v1
+convolutional neural networks,”arXiv preprint arXiv:1301.3557, Jan. 2013.
+[Online]. Available: https://arxiv.org/abs/1301.3557v1
 [ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional
-networks,” in European Conference on Computer Vision (ECCV) . Springer,
-Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv .org/abs/1311 .2901
+networks,” inEuropean Conference on Computer Vision (ECCV). Springer,
+Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901
 [Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http://
-places2.csail.mit.edu/download .html
+places2.csail.mit.edu/download.html
 [ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv
 preprint arXiv:1605.07146 , May 2016. [Online]. Available: https:
-//arxiv.org/abs/1605 .07146
-[ZKL+15]B. Zhou, A. Khosla et al., “Learning deep features for discriminative
-localization,” arXiv preprint arXiv:1512.04150 , Dec. 2015. [Online]. Available:
-https://arxiv .org/abs/1512 .04150
-[ZKL+16]B. Zhou, A. Khosla et al., “Places: An image database for deep scene
-understanding,” arXiv preprint arXiv:1610.02055 , Oct. 2016. [Online].
-Available: https://arxiv .org/abs/1610 .02055
+//arxiv.org/abs/1605.07146
+[ZKL+15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative
+localization,”arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available:
+https://arxiv.org/abs/1512.04150
+[ZKL+16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene
+understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online].
+Available: https://arxiv.org/abs/1610.02055
 [ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement
-learning,” arXiv preprint arXiv:1611.01578 , Nov. 2016. [Online]. Available:
-https://arxiv .org/abs/1611 .01578
-[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,”
-arXiv preprint arXiv:1506.02351 , Jun. 2015. [Online]. Available: https:
-//arxiv.org/abs/1506 .02351v1
-[ZYL+15]H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus
-units,” in International Joint Conference on Neural Networks (IJCNN) , Jul.
+learning,”arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available:
+https://arxiv.org/abs/1611.01578
+[ZMGL15] J. Zhao, M. Mathieu et al. , “Stacked what-where auto-encoders,”
+arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https:
+//arxiv.org/abs/1506.02351v1
+[ZYL+15] H. Zheng, Z. Yanget al., “Improving deep neural networks using softplus
+units,” inInternational Joint Conference on Neural Networks (IJCNN), Jul.
 2015, pp. 1–4.
 
 I. Glossary
-ANNartificial neural network. 4
-ASOAutomatic Structure Optimization. 29
-CMOConfusion Matrix Ordering. 2, 35, 36, 51, 52, 71
-CNNConvolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60,
+ANN artificial neural network. 4
+ASO Automatic Structure Optimization. 29
+CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71
+CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60,
 71, 72, 79, 82–84, 88–91
-ELUExponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84
-ESearly stopping. 68
-FCFully Connected. 91, 93
-FLOPfloating point operation. 27, 29, 87, 88, 90, 91, 93
-GAgenetic algorithm. 30
-GANGenerative Adverserial Network. 80
-GPUgraphics processing unit. 37, 40, 59, 63, 67, 88, 91
-HSVhue, saturation, value. 79
-LCNLocal Contrast Normalization. 91
-LDAlinear discriminant analysis. 79
-LReLUleaky rectified linear unit. 63, 72, 77, 78, 84
-MLPmultilayer perceptron. 3–6, 28
-NAGNesterov Accellerated Momentum. 83
-NEATNeuroEvolution of Augmenting Topologies. 83
-OBDOptimal Brain Damage. 29
-
-PCAprincipal component analysis. 79
+ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84
+ES early stopping. 68
+FC Fully Connected. 91, 93
+FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93
+GA genetic algorithm. 30
+GAN Generative Adverserial Network. 80
+GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91
+HSV hue, saturation, value. 79
+LCN Local Contrast Normalization. 91
+LDA linear discriminant analysis. 79
+LReLU leaky rectified linear unit. 63, 72, 77, 78, 84
+MLP multilayer perceptron. 3–6, 28
+NAG Nesterov Accellerated Momentum. 83
+NEAT NeuroEvolution of Augmenting Topologies. 83
+OBD Optimal Brain Damage. 29
+
+PCA principal component analysis. 79
 PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84
-ReLUrectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84
-SGDstochastic gradient descent. 5, 30, 45, 46, 82
-ZCAZero Components Analysis. 79
+ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84
+SGD stochastic gradient descent. 5, 30, 45, 46, 82
+ZCA Zero Components Analysis. 79
diff --git a/read/results/pypdf/2201.00021.txt b/read/results/pypdf/2201.00021.txt
index e75180c..79fc9c1 100644
--- a/read/results/pypdf/2201.00021.txt
+++ b/read/results/pypdf/2201.00021.txt
@@ -1,38 +1,38 @@
-Astronomy &Astrophysics manuscript no. mainArxiv ©ESO 2022
+Astronomy & Astrophysics manuscript no. mainArxiv ©ESO 2022
 April 12, 2022
 Discovery of ammonia (9,6) masers in two high-mass star-forming
 regions
-Y . T. Yan ( 闫耀庭)1,⋆, C. Henkel1,2,3, K. M. Menten1, Y . Gong ( 龚龑)1, J. Ott4, T. L. Wilson1, A. Wootten4, A.
-Brunthaler1, J. S. Zhang (张江水 )5, J. L. Chen ( 陈家梁)5, and K. Yang ( 杨楷)6,7
-1Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany
+Y . T. Yan (闫耀庭)1, ⋆, C. Henkel1,2,3, K. M. Menten1, Y . Gong (龚龑)1, J. Ott4, T. L. Wilson1, A. Wootten4, A.
+Brunthaler1, J. S. Zhang (张江水)5, J. L. Chen (陈家梁)5, and K. Yang (杨楷)6,7
+1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany
 e-mail: yyan@mpifr-bonn.mpg.de
-2Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia
-3Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China
-4National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, V A 22903-2475, USA
-5Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China
-6School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China
-7Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s
+2 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia
+3 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China
+4 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, V A 22903-2475, USA
+5 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China
+6 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China
+7 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s
 Republic of China
-Received 13 December 2021 /Accepted 30 December 2021
+Received 13 December 2021 / Accepted 30 December 2021
 ABSTRACT
 Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact
 regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements.
-Aims. Only a few NH 3(9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH 3(9,6)
+Aims. Only a few NH3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH 3 (9,6)
 masers to provide a better observational basis for studying their role in high-mass star-forming regions.
-Methods. We carried out NH 3(9,6) observations toward Cepheus A and G34.26 +0.15 with the E ffelsberg 100-meter telescope (beam
+Methods. We carried out NH3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam
 size 49′′) and the Karl G. Jansky Very Large Array (JVLA; beam size about 1′′.2).
-Results. We discovered new NH 3(9,6) masers in Cep A and G34.26 +0.15, which increases the number of known high-mass starforming
- regions hosting NH 3(9,6) masers from five to seven. Long-term monitoring (20 months) at E ffelsberg shows that the intensity
-of the (9,6) maser in G34.26 +0.15 is decreasing, while the Cep A maser remains stable. Compared to the E ffelsberg data and assuming
-linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH 3(9,6) emission
+Results. We discovered new NH3 (9,6) masers in Cep A and G34.26 +0.15, which increases the number of known high-mass starforming
+ regions hosting NH3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity
+of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming
+linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH3 (9,6) emission
 arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the
-NH 3(9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (0′′.28±0′′.10) of the peak position
-of the 1.36 cm continuum object, HW2. In G34.26 +0.15, three NH 3(9,6) maser spots are observed: one is close to the head of the
-cometary ultracompact H iiregion C, and the other two are emitted from a compact region to the west of the hypercompact H iiregion
+NH3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (0 ′′.28 ±0′′.10) of the peak position
+of the 1.36 cm continuum object, HW2. In G34.26 +0.15, three NH3 (9,6) maser spots are observed: one is close to the head of the
+cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact Hii region
 A.
 Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long
 baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios.
-Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26 +0.15 – ISM: H iiregions – Radio lines: ISM
+Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM
 1. Introduction
 Since its discovery more than five decades ago (Cheung et al.
 1968), ammonia (NH 3) has been a most valuable molecule for
@@ -42,15 +42,16 @@ the centimeter-wavelength inversion transitions of ammonia are
 regarded as a reliable thermometer of molecular clouds (e.g.,
 Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia
 masers have attracted attention since the first detection of maser
-action in the ( J,K)=(3,3) metastable ( J=K) line toward the
+action in the ( J,K) = (3,3) metastable ( J = K) line toward the
 massive star-forming region W33 (Wilson et al. 1982). Subsequent
  observations have led to the detection of new metastable
-ammonia masers, including15NH 3(3,3) (Mauersberger et al.
-1986), NH 3(1,1) (Gaume et al. 1996), NH 3(2,2) (Mills et al.
-2018), NH 3(5,5) (Cesaroni et al. 1992), NH 3(6,6) (Beuther
-⋆Member of the International Max Planck Research School (IMPRS)
+ammonia masers, including 15NH3 (3,3) (Mauersberger et al.
+1986), NH3 (1,1) (Gaume et al. 1996), NH 3 (2,2) (Mills et al.
+2018), NH 3 (5,5) (Cesaroni et al. 1992), NH 3 (6,6) (Beuther
+⋆ Member of the International Max Planck Research School (IMPRS)
  for Astronomy and Astrophysics at the universities of Bonn and
-Cologne.et al. 2007), NH 3(7,7), NH 3(9,9), and NH 3(12,12) (Henkel
+Cologne.
+et al. 2007), NH 3 (7,7), NH 3 (9,9), and NH 3 (12,12) (Henkel
 et al. 2013). These have led to the discovery of metastable maser
 lines in 22 di fferent regions (Mauersberger et al. 1986, 1987;
 Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991;
@@ -61,18 +62,19 @@ et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh
 et al. 2011; Wang et al. 2012; Henkel et al. 2013; Ho ffman &
 Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al.
 2019; Mei et al. 2020; Towner et al. 2021). Compared with the
-metastable ammonia masers, detected non-metastable ( J>K)
+metastable ammonia masers, detected non-metastable ( J > K)
 ammonia maser transitions are more numerous. The first highly
 excited non-metastable ammonia maser was detected by Madden
- et al. (1986) in the ( J,K)=(9,6) and (6,3) lines. Thereafter,
-many other NH 3non-metastable inversion transition lines have
+ et al. (1986) in the (J,K) = (9,6) and (6,3) lines. Thereafter,
+many other NH3 non-metastable inversion transition lines have
 been identified as masers, including the (5,3), (5,4), (6,1), (6,2),
 (6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3),
 (9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transiArticle
- number, page 1 of 10arXiv:2201.00021v3  [astro-ph.GA]  9 Apr 2022
-A&A proofs: manuscript no. mainArxiv
+ number, page 1 of 10
+arXiv:2201.00021v3  [astro-ph.GA]  9 Apr 2022
+A&A proofs:manuscript no. mainArxiv
 tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007;
-Henkel et al. 2013; Mei et al. 2020). Except for the NH 3(3,3)
+Henkel et al. 2013; Mei et al. 2020). Except for the NH 3 (3,3)
 masers proposed to be associated with four supernova remnants
 (McEwen et al. 2016), almost all the other ammonia masers are
 detected in high-mass star-forming regions (HMSFRs). However,
@@ -84,94 +86,95 @@ high-mass star plays in their excitation remains unclear. Therefore,
 indispensable in regard to their overall incidence and association
  with di fferent environments, which can provide additional
 constraints on the pumping mechanism of ammonia masers.
-So far, a total of 32 NH 3inversion transitions ( ∆K=0
-and∆J=0) have been identified as masers. Among these, and
+So far, a total of 32 NH 3 inversion transitions ( ∆K = 0
+and ∆J = 0) have been identified as masers. Among these, and
 despite arising from energy levels as high as 1090 K above
-the ground state, the NH 3(9,6) maser stands out as being the
+the ground state, the NH 3 (9,6) maser stands out as being the
 strongest and most variable one in W51-IRS2 (e.g., Henkel et al.
 2013). Maser emission in this line has only been detected in five
 HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al.
-1986), and Sgr B2(N) (Mei et al. 2020). The NH 3(3,3) masers
+1986), and Sgr B2(N) (Mei et al. 2020). The NH 3 (3,3) masers
 are thought to be collisionally excited (e.g., Flower et al. 1990;
 Mangum & Wootten 1994); in contrast, the pumping mechanism
- of NH 3(9,6) masers is less well constrained (Madden et al.
+ of NH3 (9,6) masers is less well constrained (Madden et al.
 1986). Brown & Cragg (1991) have studied ortho-ammonia and
 found that it could possibly pump the (6,3) inversion line, but
 they did not extend their model to the (9,6) transition due to the
 fact that collision rates are only known for inversion levels up to
-J=6 (e.g., Danby et al. 1988).
-NH 3(9,6) masers are found to be strongly variable, similar to
+J = 6 (e.g., Danby et al. 1988).
+NH3 (9,6) masers are found to be strongly variable, similar to
 H2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al.
 2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6)
 line showed significant variation in line shape within a time interval
  of only two days. Mapping of the (9,6) maser toward W51
 with very long baseline interferometry (VLBI) suggests that the
 masers are closer to the H 2O masers than to the OH masers or
-to ultracompact (UC) H iiregions (Pratap et al. 1991). While
+to ultracompact (UC) H ii regions (Pratap et al. 1991). While
 Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO
-and NH 3masers in W51-IRS2 are very close to each other, their
-positions, di ffering by 0′′.065 (∼0.015 pc), do not fully coincide.
-In this paper we report the discovery of NH 3(9,6) masers
+and NH3 masers in W51-IRS2 are very close to each other, their
+positions, differing by 0′′.065 (∼0.015 pc), do not fully coincide.
+In this paper we report the discovery of NH 3 (9,6) masers
 in two HMSFRs, Cepheus A and G34.26 +0.15. This increases
 the number of (9,6) maser detections in our Galaxy from five
 to seven. In Sect. 2 observations with the E ffelsberg 100-meter
 telescope and the Karl G. Jansky Very Large Array (JVLA) are
 described. Results are presented in Sect. 3. The morphology of
-Cep A and G34.26 +0.15 as well as a comparison of the emission
-distributions of di fferent tracers with the NH 3(9,6) masers are
+Cep A and G34.26+0.15 as well as a comparison of the emission
+distributions of di fferent tracers with the NH 3 (9,6) masers are
 presented in Sect. 4. Our main results are summarized in Sect. 5.
 2. Observations and data reduction
 2.1. Effelsberg observations and data reduction
-The NH 3(9,6) line was observed toward Cep A and
-G34.26 +0.15 with the 100-meter E ffelsberg telescope1in 2020
+The NH 3 (9,6) line was observed toward Cep A and
+G34.26+0.15 with the 100-meter E ffelsberg telescope1 in 2020
 January and 2021 February, July, and August. The S14mm double
  beam secondary focus receiver was employed. The full width
-at half maximum (FWHM) beam size is 49′′at 18.5 GHz, the
+at half maximum (FWHM) beam size is 49 ′′ at 18.5 GHz, the
 frequency of the target line. The observations were performed in
-position switching mode, and the o ffposition was 10′in azimuth
-1Based on observations with the 100-meter telescope of the MPIfR
-(Max-Planck-Institut für Radioastronomie) at E ffelsberg.away from the source. For observations made before 2021 August,
+position switching mode, and the off position was 10′in azimuth
+1 Based on observations with the 100-meter telescope of the MPIfR
+(Max-Planck-Institut für Radioastronomie) at Effelsberg.
+away from the source. For observations made before 2021 August,
  we used a spectrometer that covered 2 GHz wide backends
-with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1
+with a channel width of 38.1 kHz, corresponding to∼0.62 km s−1
 at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar
 1975). A high spectral resolution backend with 65536 channels
 and a bandwidth of 300 MHz was employed in 2021 August,
-providing a channel width of 0.07 km s−1at 18.5 GHz. Pointing
+providing a channel width of 0.07 km s −1 at 18.5 GHz. Pointing
  was checked every 2 hours using 3C 286 or NGC 7027.
 Focus calibrations were done at the beginning of the observations
  and during sunset and sunrise toward the abovementioned
 pointing sources. The system temperatures were 100–130 K on
 a main-beam brightness temperature, TMB, scale. This flux density
- was calibrated assuming a TMB/Sratio of 1.95 K /Jy, derived
+ was calibrated assuming aTMB/S ratio of 1.95 K/Jy, derived
 from continuum cross scans of NGC 7027 (the flux density was
 adopted from Ott et al. 1994). Calibration uncertainties are estimated
- to be∼10%.
-We used the GILDAS /CLASS2package (Pety 2005) to reduce
+ to be ∼10%.
+We used the GILDAS /CLASS2 package (Pety 2005) to reduce
  the spectral line data. A first-order polynomial was subtracted
  from each spectrum for baseline removal.
 2.2. JVLA observations and data reduction
-Observations of the NH 3(9,6) line toward Cep A and
-G34.26 +0.15 were obtained on 2021 July 13 with the JVLA
-of the National Radio Astronomy Observatory3(NRAO) in the
+Observations of the NH 3 (9,6) line toward Cep A and
+G34.26+0.15 were obtained on 2021 July 13 with the JVLA
+of the National Radio Astronomy Observatory 3 (NRAO) in the
 C configuration (project ID: 21A-157, PI: Yaoting Yan). We
 employed 27 antennas for the observations. The primary beam
-of the JVLA antennas is 150′′(FWHM) at 18.5 GHz. A mixture
+of the JVLA antennas is 150 ′′ (FWHM) at 18.5 GHz. A mixture
  of mixed three-bit and eight-bit samplers were used to perform
- the observations. For the NH 3(9,6) line observations, we
+ the observations. For the NH 3 (9,6) line observations, we
 used one subband with the eight-bit sampler covering a bandwidth
  of 16 MHz with full polarization, eight recirculations, and
 four baseline board pairs (BIBPs) to provide a velocity range
-of 260 km s−1with a channel spacing of 0.13 km s−1. Two
+of 260 km s −1 with a channel spacing of 0.13 km s −1. Two
 additional subbands of bandwidth 16 MHz were used to cover
-the NH 3(8,5) and (10,7) lines. The three-bit sampler with 32
+the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32
 subbands, each with a bandwidth of 128 MHz to cover a total
  range of 4 GHz between 20–24 GHz, was used to measure
  the continuum emission. 3C 286 with a flux density of
 2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a
 calibrator for pointing, flux density, bandpass, and polarization.
-J2230 +6946 and J1851 +0035 served as gain calibrators for Cep
+J2230+6946 and J1851+0035 served as gain calibrators for Cep
 A and G34.26 +0.15, respectively. The on-source times were
-4m30sand 4m50stoward Cep A and G34.26 +0.15, respectively.
+4m30s and 4m50s toward Cep A and G34.26+0.15, respectively.
 Data from two antennas were lost due to technical issues.
  The data from the remaining 25 antennas were reduced
 through the Common Astronomy Software Applications package
@@ -183,56 +186,56 @@ the calibrated visibility data to search for additional artifacts before
  imaging. Then, the uvcontsub task in CASA was used to
 separate the calibrated visibilities into two parts, one with lineonly
  data and the other with the continuum data. The tclean task
-with a cell size of 0′′.2 and Briggs weighting with robust =0 was
+with a cell size of 0′′.2 and Briggs weighting with robust=0 was
 used to produce the images of spectral line and continuum emission.
- The synthesized beams for NH 3(9,6) are 1′′.47×0′′.99 at
-2https: //www.iram.fr /IRAMFR /GILDAS /
-3The National Radio Astronomy Observatory is a facility of the National
+ The synthesized beams for NH 3 (9,6) are 1 ′′.47 ×0′′.99 at
+2 https://www.iram.fr/IRAMFR/GILDAS/
+3 The National Radio Astronomy Observatory is a facility of the National
  Science Foundation operated under cooperative agreement by Associated
  Universities, Inc.
-4https: //casa.nrao.edu /
+4 https://casa.nrao.edu/
 Article number, page 2 of 10
-Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-P.A.=58◦.79 and 1′′.33×1′′.06 at P.A. =5◦.36 toward Cep A
+Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+P.A. = 58◦.79 and 1 ′′.33 ×1′′.06 at P.A. = 5◦.36 toward Cep A
 and G34.26 +0.15, respectively. For the 1.36 cm (20–24 GHz)
-continuum emission, the synthesized beams are 1′′.08×0′′.67 at
-P.A.=60◦.64 and 0′′.95×0′′.71 at P.A. =5◦.91 toward Cep A and
-G34.26 +0.15. The typical absolute astrometric accuracy of the
-JVLA is∼10% of the synthesized beam5. The flux density scale
+continuum emission, the synthesized beams are 1 ′′.08 ×0′′.67 at
+P.A. = 60◦.64 and 0′′.95 ×0′′.71 at P.A. = 5◦.91 toward Cep A and
+G34.26+0.15. The typical absolute astrometric accuracy of the
+JVLA is ∼10% of the synthesized beam5. The flux density scale
 calibration accuracy is estimated to be within 15%.
-Fig. 1. Spectra from NH 3(9,6) transition lines. Left:Top to bottom:
-Time sequence of NH 3(9,6) profiles observed toward Cep A with the
+Fig. 1. Spectra from NH 3 (9,6) transition lines. Left: Top to bottom:
+Time sequence of NH 3 (9,6) profiles observed toward Cep A with the
 Effelsberg 100-meter telescope (after subtracting a first-order polynomial
  baseline). A JVLA spectrum is interspersed. The systemic velocity
- from CO and HCO+lines is indicated by a dashed blue line. The
-two dashed red lines at LSR velocities, VLSR, of−0.90 km s−1and
-−0.28 km s−1indicate the central velocities of the two major components.
- Right : NH 3(9,6) spectra from G34.26 +0.15. The systemic velocity
- from C17O is indicated by a dashed blue line. The three dashed
-red lines at VLSR=54.1 km s−1, 55.8 km s−1, and 62.5 km s−1show the
+ from CO and HCO + lines is indicated by a dashed blue line. The
+two dashed red lines at LSR velocities, VLSR, of −0.90 km s −1 and
+−0.28 km s −1 indicate the central velocities of the two major components.
+ Right: NH3 (9,6) spectra from G34.26 +0.15. The systemic velocity
+ from C 17O is indicated by a dashed blue line. The three dashed
+red lines at VLSR = 54.1 km s−1, 55.8 km s−1, and 62.5 km s−1 show the
 central velocities of the main ammonia emission components.
 3. Results
 The spectra from di fferent epochs are shown in Figs. 1 and 2.
-Toward Cep A, the NH 3(9,6) line profile from the JVLA is extracted
- from an E ffelsberg-beam-sized region (FWHM, 49′′). In
-the case of G34.26 +0.15, the NH 3spectrum is below the noise
+Toward Cep A, the NH3 (9,6) line profile from the JVLA is extracted
+ from an Effelsberg-beam-sized region (FWHM, 49′′). In
+the case of G34.26 +0.15, the NH3 spectrum is below the noise
 level if a similarly large beam size is used. Therefore, we derived
- the JVLA NH 3(9,6) spectrum from a smaller region, with
-radius 3′′.5, that contains all the detected NH 3(9,6) emission. In
-Table A.1, the observed NH 3(9,6) line parameters obtained by
-Gaussian fits are listed. NH 3(8,5) and (10,7) emission is not detected
- by our JVLA observations. The 3 σupper limits for the
-NH 3(8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1
-5https: //science.nrao.edu /facilities /vla/docs/manuals /oss/performance/positional-accuracy
+ the JVLA NH3 (9,6) spectrum from a smaller region, with
+radius 3′′.5, that contains all the detected NH3 (9,6) emission. In
+Table A.1, the observed NH 3 (9,6) line parameters obtained by
+Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not detected
+ by our JVLA observations. The 3 σ upper limits for the
+NH3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam −1
+5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance/positional-accuracy
 
-Fig. 2. NH 3(9,6) line profiles emphasizing, in contrast to the spectra
+Fig. 2.NH3 (9,6) line profiles emphasizing, in contrast to the spectra
 in Fig. 1, weaker features. Cep A spectra are presented on the left,
-G34.26 +0.15 spectra on the right. The two dashed red lines in the left
-panels indicate VLSR=1.48 km s−1and 2.89 km s−1. In the right panels,
-the two dashed red lines refer to 54.1 km s−1and 55.8 km s−1.
-and 27.2 mJy beam−1, respectively. In G34.26 +0.15, the corresponding
- 3σupper limits for the NH 3(8,5) and (10,7) lines are
-22.1 mJy beam−1and 30.4 mJy beam−1. For both sources, sensitivity
+G34.26+0.15 spectra on the right. The two dashed red lines in the left
+panels indicate VLSR = 1.48 km s−1 and 2.89 km s−1. In the right panels,
+the two dashed red lines refer to 54.1 km s−1 and 55.8 km s−1.
+and 27.2 mJy beam −1, respectively. In G34.26+0.15, the corresponding
+ 3σupper limits for the NH3 (8,5) and (10,7) lines are
+22.1 mJy beam−1 and 30.4 mJy beam −1. For both sources, sensitivity
  levels refer to emission from a single channel of width
 0.13 km s−1. Taking the larger measured line widths of the (9,6)
 maser features (see Table A.1), these limits could be further lowered
@@ -242,66 +245,67 @@ The 1.36 cm continuum, derived from our JVLA observations,
 toward Cep A is presented in Fig. 3. Six published compact
 sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are detected
  in our observations. Figure 4 shows the 1.36 cm continuum
- in G34.26 +0.15. Three main continuum objects, A, B, and
+ in G34.26+0.15. Three main continuum objects, A, B, and
 C, are detected. By using the imfit task in CASA, we measured
 the continuum flux at 1.36 cm toward individual compact source
-components in Cep A and G34.26 +0.15. Details are given in Table
+components in Cep A and G34.26+0.15. Details are given in Table
  A.2.
-3.2. NH 3(9,6) emission in Cep A
-In 2020 January, NH 3(9,6) emission with a peak flux density of
-0.67±0.07 Jy was first detected with the E ffelsberg 100-meter
+3.2. NH3 (9,6) emission in Cep A
+In 2020 January, NH3 (9,6) emission with a peak flux density of
+0.67 ±0.07 Jy was first detected with the E ffelsberg 100-meter
 telescope in Cep A. Emission with similar strength was also detected
  in 2021 February and August with the same telescope.
 Higher velocity resolution data, which were obtained in 2021
 August, again with the E ffelsberg 100-meter telescope, show
 that the (9,6) emission contains two main velocity components.
-Overall, the flux densities of the NH 3(9,6) emission line measured
- with the E ffelsberg 100-meter telescope are, within the calibration
+Overall, the flux densities of the NH 3 (9,6) emission line measured
+ with the Effelsberg 100-meter telescope are, within the calibration
  uncertainties, unchanged. This is valid for the time interval
  between 2020 January and August 2021, when we smoothed
 the obtained spectra to the same velocity resolution. We also
 see another two weaker components. Figure 2 emphasizes these
 weak components with an expanded flux density scale.
 Higher angular resolution data from the JVLA pinpoint the
-position of the NH 3(9,6) emission with an o ffset of (−0′′.28,
+position of the NH 3 (9,6) emission with an o ffset of ( −0′′.28,
 0′′.02) relative to the 1.36 cm continuum peak of Cep A HW2
-(Fig. 3). The deconvolved NH 3(9,6) component size is (0′′.29±
-0′′.15)×(0′′.19±0′′.14) at P.A. =174◦, derived with the imfit task
+(Fig. 3). The deconvolved NH3 (9,6) component size is (0′′.29 ±
+0′′.15) ×(0′′.19 ±0′′.14) at P.A.= 174◦, derived with the imfit task
 in CASA, and can thus be considered, accounting for the uncertainties,
  as unresolved.
 Article number, page 3 of 10
-A&A proofs: manuscript no. mainArxiv
+A&A proofs:manuscript no. mainArxiv
 Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90,
-and 110×0.125 mJy beam−1. The background image is the Spitzer 4.5µm emission, taken from the Galactic Legacy Infrared Mid-Plane
-Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 =22h56m17s.972, and
-δJ2000=62◦01′49′′.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
-ellipse denoting the position of the NH 3(9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H 2O (Sobolev et al. 2018),
-and CH 3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates
+and 110 ×0.125 mJy beam −1. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane
+Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 = 22h56m17s.972, and
+δJ2000 = 62◦01′49′′.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
+ellipse denoting the position of the NH 3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H 2O (Sobolev et al. 2018),
+and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates
 the LSR velocity range of the maser spots.
-Fig. 4. 1.36 cm JVLA continuum map of G34.26 +0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130,
-150, 180, and 200 ×5.0 mJy beam−1. The background image is the Spitzer 4.5µm emission, taken from GLIMPSE. The reference position is
-αJ2000=18h53m18s.560, andδJ2000=01◦14′58′′.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH 3
-(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H 2O (Imai et al. 2011), and CH 3OH (Bartkiewicz et al.
-2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range ( VLSR) of maser spots.
+Fig. 4.1.36 cm JVLA continuum map of G34.26 +0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130,
+150, 180, and 200 ×5.0 mJy beam −1. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is
+αJ2000 = 18h53m18s.560, and δJ2000 = 01◦14′58′′.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH 3
+(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H 2O (Imai et al. 2011), and CH3OH (Bartkiewicz et al.
+2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (VLSR) of maser spots.
 In view of the constancy of the flux densities obtained at Effelsberg
  and the similar JVLA flux density, measured in 2021
 July, there is no missing interferometric flux density in the JVLA
 data.
-3.3. NH 3(9,6) emission in G34.26 +0.15
-The NH 3(9,6) emission was first detected toward G34.26 +0.15
-in 2020 January with the E ffelsberg 100-meter telescope. Highervelocity resolution data from 2021 August show the NH 3(9,6)
-emission to be composed of two di fferent components. The spectra
+3.3. NH3 (9,6) emission in G34.26+0.15
+The NH3 (9,6) emission was first detected toward G34.26 +0.15
+in 2020 January with the Effelsberg 100-meter telescope. Higher
+velocity resolution data from 2021 August show the NH 3 (9,6)
+emission to be composed of two different components. The spectra
  of weak components on a smaller flux density scale are presented
  in Fig. 2.
-Three di fferent locations showing NH 3(9,6) emission are
-found toward G34.26 +0.15 (Fig. 4). The deconvolved NH 3(9,6)
-component sizes are (1′′.42±0′′.43)×(0′′.54±0′′.62) at P.A. =97◦
-(M1), (0′′.42±0′′.27)×(0′′.15±0′′.27) at P.A. =150◦(M2), and
+Three di fferent locations showing NH 3 (9,6) emission are
+found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6)
+component sizes are (1′′.42 ±0′′.43) ×(0′′.54 ±0′′.62) at P.A.= 97◦
+(M1), (0′′.42 ±0′′.27) ×(0′′.15 ±0′′.27) at P.A. = 150◦(M2), and
 Article number, page 4 of 10
-Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-(1′′.17±0′′.34)×(0′′.27±0′′.46) at P.A. =53◦(M3) and are thus
+Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+(1′′.17 ±0′′.34) ×(0′′.27 ±0′′.46) at P.A. = 53◦(M3) and are thus
 comparable to or smaller than the beam size.
-Overall, the NH 3(9,6) line from G34.26 +0.15 weakened
+Overall, the NH 3 (9,6) line from G34.26 +0.15 weakened
 during the time interval from 2020 January to 2021 August by
 about 70%. A comparison between the JVLA spectrum and the
 Effelsberg data, assuming a linear decrease in the integrated intensity
@@ -309,66 +313,67 @@ Effelsberg data, assuming a linear decrease in the integrated intensity
 100-meter observations, suggests there is no missing flux in the
 JVLA data. This is similar to the situation in Cep A.
 4. Discussion
-4.1. Morphology of Cep A and G34.26 +0.15
+4.1. Morphology of Cep A and G34.26+0.15
 Cep A, at a trigonometric parallax distance of 0.70 ±0.04 kpc
 (Moscadelli et al. 2009; Dzib et al. 2011), is the second closest
-HMSFR (after Orion) and by far the closest NH 3(9,6) maser
-known. About 16 compact ( ∼1′′) radio sources (e.g., Hughes &
+HMSFR (after Orion) and by far the closest NH 3 (9,6) maser
+known. About 16 compact (∼1′′) radio sources (e.g., Hughes &
 Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been
 identified in Cep A. Hughes & Wouterloot (1984) discovered
 these targets at radio wavelengths, which are UC and hypercompact
- (HC) H iiregions and /or stellar wind sources, subsequently
+ (HC) H ii regions and/or stellar wind sources, subsequently
 named as HW sources. The HW2 object is one of the best known
 examples of a protostellar jet or disk system driving a powerful
 outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles
 et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021).
-The observed NH 3(9,6) emission is slightly o ffset (−0′′.28, 0′′.02)
+The observed NH3 (9,6) emission is slightly offset (−0′′.28, 0′′.02)
 from the center of HW2 (see Fig. 3).
-G34.26 +0.15 is an HMSFR located at a distance of 3.3 kpc
+G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc
 (Kuchar & Bania 1994). It hosts four radio continuum components
  named A, B, C, and D. Component C is a prototypical
-cometary UC H iiregion containing a compact head and a di ffuse
+cometary UC Hii region containing a compact head and a diffuse
 tail that extends from east to west (e.g., Reid & Ho 1985; Garay
 et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components
-A and B are HC H iiregions, located to the east of component
-C. An extended ring-like H iiregion, called component D, is located
+A and B are HC H ii regions, located to the east of component
+C. An extended ring-like H ii region, called component D, is located
  southeast of components A-C. One of the three observed
-NH 3(9,6) emission line sources, M1, is close to the head of component
+NH3 (9,6) emission line sources, M1, is close to the head of component
  C, whereas M2 and M3 originate from another compact
-region in the west of the HC H iicomponent A (see Fig. 4).
-4.2. NH 3(9,6) emission possibly caused by maser action
-As shown in Fig. 1, the NH 3(9,6) profiles in Cep A and
-G34.26 +0.15 are narrow ( ∆V1/2≤2.0 km s−1), much narrower
-than the expected line widths ( ≳4 km s−1) of thermal lines observed
+region in the west of the HC H ii component A (see Fig. 4).
+4.2. NH3 (9,6) emission possibly caused by maser action
+As shown in Fig. 1, the NH 3 (9,6) profiles in Cep A and
+G34.26+0.15 are narrow ( ∆V1/2 ≤2.0 km s −1), much narrower
+than the expected line widths ( ≳4 km s −1) of thermal lines observed
  at a similar angular resolution (e.g., Torrelles et al. 1985,
 1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mookerjea
  et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Velocity
  shifts with respect to the systemic velocities of the two
-sources are both observed, that is, V∼10 km s−1in Cep A and
-V∼4 km s−1in G34.26 +0.15 (see details in Sect. 4.3). Furthermore,
+sources are both observed, that is, V ∼10 km s−1 in Cep A and
+V ∼4 km s−1 in G34.26+0.15 (see details in Sect. 4.3). Furthermore,
  time variability is observed in the case of G34.26 +0.15,
 which is also a characteristic feature of maser emission.
 Additional evidence of their maser nature is the high brightness
  temperatures of the (9,6) emission spots toward Cep A and
-G34.26 +0.15. The spectral parameters are listed in Table A.3.
-Because at least a significant part of the NH 3(9,6) emission
+G34.26+0.15. The spectral parameters are listed in Table A.3.
+Because at least a significant part of the NH 3 (9,6) emission
 is not resolved by our JVLA observations, the derived brightness
  temperatures are only lower limits. Nevertheless, the lower
 limits on the brightness temperature are >800 K in Cep A (see
 Table A.3), which is much higher than the expected thermal
 gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito
-et al. 2007; Beuther et al. 2018). This strongly suggests thatthe NH 3(9,6) emission in Cep A is due to maser action. Because
- G34.26 +0.15 is located at about five times the distance to
-Cep A, beam dilution e ffects reduce the lower main beam brightness
+et al. 2007; Beuther et al. 2018). This strongly suggests that
+the NH 3 (9,6) emission in Cep A is due to maser action. Because
+ G34.26+0.15 is located at about five times the distance to
+Cep A, beam dilution effects reduce the lower main beam brightness
  temperature limit to 400 K in G34.26 +0.15 (M2) (see Table
- A.3). We also note that the luminosity of the NH 3(9,6) emission
- in G34.26 +0.15 is higher than or comparable to that in Cep
+ A.3). We also note that the luminosity of the NH3 (9,6) emission
+ in G34.26+0.15 is higher than or comparable to that in Cep
 A, depending on the epoch of our observations.
 Finally, the non-detections of the (8,5) and (10,7) lines also
 indicate that the (9,6) line is special. This allows us to derive
-lower 3σlimits of the (9,6) /(8,5) and (9,6) /(10,7) line intensity
-ratios. The (9,6) line arises from ortho-NH 3(K=3n), whereas
-the NH 3(8,5) and (10,7) lines are para-NH 3(K,3n) lines.
+lower 3σlimits of the (9,6) /(8,5) and (9,6)/(10,7) line intensity
+ratios. The (9,6) line arises from ortho-NH 3 (K = 3n), whereas
+the NH 3 (8,5) and (10,7) lines are para-NH 3 (K , 3n) lines.
 The minimum ortho-to-para ratios are in the range 12–42 and 1–
 8 toward Cep A and G34.26 +0.15, respectively. The statistical
 weights for the ortho states are twice as large as those for the
@@ -377,112 +382,113 @@ et al. 2013). In Cep A, the line intensity ratios are far higher than
 this factor of two. Thus, at least in Cep A the higher main beam
 brightness peak temperature of the (9,6) emission is caused by
 maser action, perhaps involving exponential amplification, and
-the case of G34.26 +0.15 is likely similar.
-4.3. Comparison of NH 3(9,6) masers with previously
-published (quasi-)thermal NH 3emission
+the case of G34.26+0.15 is likely similar.
+4.3. Comparison of NH3 (9,6) masers with previously
+published (quasi-)thermal NH3 emission
 The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
 show thermal emission toward Cep A over a velocity range of
-−13 km s−1≤VLSR≤−4 km s−1(Brown et al. 1981; Güsten
+−13 km s −1 ≤VLSR ≤−4 km s −1 (Brown et al. 1981; Güsten
 et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average
-NH 3column density of ∼5×1015cm−2was estimated for a region
-of 3′′around HW2 (Torrelles et al. 1999). This high NH 3abundance
+NH3 column density of∼5×1015 cm−2 was estimated for a region
+of 3′′around HW2 (Torrelles et al. 1999). This high NH 3 abundance
  could provide a suitable environment for maser species.
-Large line widths ( ∆V1/2≃7.0 km s−1) with VLSR∼ −10 km s−1
+Large line widths (∆V1/2 ≃7.0 km s−1) with VLSR ∼ −10 km s−1
 in both (1,1) and (2,2) lines were found toward HW2 (Torrelles
 et al. 1993). The velocity is similar to the cloud’s systemic local
- standard of rest (LSR) velocity of −11.2 km s−1, which
-is based on CO (Narayanan & Walker 1996) and HCO+observations
+ standard of rest (LSR) velocity of −11.2 km s −1, which
+is based on CO (Narayanan & Walker 1996) and HCO + observations
  (Gómez et al. 1999). Our (9,6) maser is redshifted
-(−0.9 km s−1≤VLSR≤2.9 km s−1) and shares positions with
-the outflowing gas seen in CO and HCO+with similarly redshifted
+(−0.9 km s −1 ≤VLSR ≤2.9 km s −1) and shares positions with
+the outflowing gas seen in CO and HCO + with similarly redshifted
  velocities. Therefore, we argue that the (9,6) masers are
 related to outflowing gas.
-In G34.26 +0.15, a large NH 3column density,
-1018.5±0.2cm−2, and a kinetic temperature of 225 ±75 K
+In G34.26 +0.15, a large NH 3 column density,
+1018.5±0.2 cm−2, and a kinetic temperature of 225 ±75 K
 were derived by Henkel et al. (1987) based on measurements
-of 15 NH 3inversion transitions in the frequency range of
+of 15 NH 3 inversion transitions in the frequency range of
 22.0–26.0 GHz. These did not include the (9,6) transition.
 While these lines were measured with a beam size of about
 40′′, a comparison of the peak intensities of the optically thick
 lines with the kinetic temperature reveals the size of the hot,
 ammonia-emitting core to be only ∼2.5′′. All those measured
-NH 3lines were quasi-thermal and had LSR velocities of
+NH3 lines were quasi-thermal and had LSR velocities of
 ∼58.5 km s−1, close to the systemic velocity of ∼58.1 km s−1
-obtained from C17O observations (Wyrowski et al. 2012).
-Their line widths ( ∆V1/2≥3.6 km s−1) are larger than what
-we find (0.35 km s−1≤∆V1/2≤0.94 km s−1) for each (9,6)
+obtained from C 17O observations (Wyrowski et al. 2012).
+Their line widths ( ∆V1/2 ≥3.6 km s −1) are larger than what
+we find (0.35 km s −1 ≤ ∆V1/2 ≤0.94 km s −1) for each (9,6)
 maser component (see details in Table A.3). In all, we may
 have observed four di fferent (9,6) velocity features. Three
-are blueshifted at VLSR∼53.8 km s−1, 55.8 km s−1, and
-56.8 km s−1, and a fourth, tentatively detected, at 62.5 km s−1.
+are blueshifted at VLSR ∼ 53.8 km s −1, 55.8 km s −1, and
+56.8 km s−1, and a fourth, tentatively detected, at 62.5 km s −1.
 This tentative redshifted feature was only potentially detected
-with E ffelsberg in 2020 January. The velocity is similar to that
-of the JVLA measurements on the NH 3(1,1) absorption line
-against continuum source C ( ∼7′′resolution; Keto et al. 1987)
+with Effelsberg in 2020 January. The velocity is similar to that
+of the JVLA measurements on the NH 3 (1,1) absorption line
+against continuum source C ( ∼7′′ resolution; Keto et al. 1987)
 Article number, page 5 of 10
-A&A proofs: manuscript no. mainArxiv
-and the NH 3(3,3) emission surrounding continuum source B as
-well as the head of C (1′′.4×1′′.2 resolution; Heaton et al. 1989).
+A&A proofs:manuscript no. mainArxiv
+and the NH3 (3,3) emission surrounding continuum source B as
+well as the head of C (1 ′′.4×1′′.2 resolution; Heaton et al. 1989).
 However, we did not find this redshifted component in our
-JVLA observations. Therefore, its position within G34.26 +0.15
+JVLA observations. Therefore, its position within G34.26+0.15
 cannot be determined. The blueshifted (9,6) masers with a
-velocity range of 53.8–56.8 km s−1(M1, M2, and M3) show
-velocities compatible with those of the NH 3(3,3) emission at
+velocity range of 53.8–56.8 km s −1 (M1, M2, and M3) show
+velocities compatible with those of the NH 3 (3,3) emission at
 the proper positions (Heaton et al. 1989), which might be a
 suitable environment for maser species.
-4.4. Comparison of NH 3(9,6) masers with other maser lines
-To characterize the environment of NH 3(9,6) masers, we can
+4.4. Comparison of NH3 (9,6) masers with other maser lines
+To characterize the environment of NH 3 (9,6) masers, we can
 compare their positions with respect to those of other maser
 species (i.e., OH, H 2O, and CH 3OH). Toward Cep A HW2,
-many CH 3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna
+many CH3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna
 et al. 2017) and H 2O maser spots (e.g., Torrelles et al. 1998,
 2011; Sobolev et al. 2018) are detected and are associated with
 its disk. Sobolev et al. (2018) also found that most of the H 2O
-maser flux is associated with the compact H iiregion HW3d. OH
-maser features close to the H iiregions are also seen in HW2
+maser flux is associated with the compact Hii region HW3d. OH
+maser features close to the H ii regions are also seen in HW2
 (e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These
 three kinds of masers in Cep A have a large velocity range of
-−25 km s−1≤VLSR≤−2 km s−1and are widespread around
-HW2 and HW3, while NH 3(9,6) emission is only detected at
-−0.9 km s−1≤VLSR≤2.9 km s−1toward a sub-arcsecondsized
+−25 km s −1 ≤VLSR ≤−2 km s −1 and are widespread around
+HW2 and HW3, while NH 3 (9,6) emission is only detected at
+−0.9 km s −1 ≤ VLSR ≤2.9 km s −1 toward a sub-arcsecondsized
  region to the west of the peak continuum position of HW2
-(see Fig. 3). This suggests that the NH 3(9,6) maser in Cep A
+(see Fig. 3). This suggests that the NH 3 (9,6) maser in Cep A
 is unique and not related to maser spots seen in other molecular
 species.
-In G34.26 +0.15, OH (Zheng et al. 2000), H 2O (Imai et al.
-2011), and CH 3OH (Bartkiewicz et al. 2016) masers have been
+In G34.26+0.15, OH (Zheng et al. 2000), H 2O (Imai et al.
+2011), and CH3OH (Bartkiewicz et al. 2016) masers have been
 detected east of source C (Fig. 4), and none of them coincides
-with the head of C. The NH 3(9,6) maser M1 is also found
-slightly o ffthe head of source C. This could suggest that M1
+with the head of C. The NH 3 (9,6) maser M1 is also found
+slightly o ff the head of source C. This could suggest that M1
 is powered by continuum source C or by an outflow. Near component
  B, there are some OH and CH 3OH masers but no H 2O
-or NH 3masers. A group of H 2O masers, well-known tracers
-of outflows, with a large velocity distribution of 43 km s−1≤
-VLSR≤54 km s−1, was found to the west of the centimetercontinuum
+or NH 3 masers. A group of H 2O masers, well-known tracers
+of outflows, with a large velocity distribution of 43 km s −1 ≤
+VLSR ≤54 km s −1, was found to the west of the centimetercontinuum
  source A and close to the peak of the millimetercontinuum
  emission (see details in our Fig. A.2 and also in Fig. 5
-of Imai et al. 2011). The closeness of NH 3(9,6) maser spots M2
+of Imai et al. 2011). The closeness of NH3 (9,6) maser spots M2
 and M3 to this group of water masers and their similar velocities
-again suggest an association of NH 3(9,6) masers with outflow
+again suggest an association of NH 3 (9,6) masers with outflow
 activity.
 4.5. Constraints on pumping scenarios
-Our observations have resulted in the detection of NH 3(9,6)
+Our observations have resulted in the detection of NH 3 (9,6)
 masers in Cep A and G34.26 +0.15. The new detections could
 provide additional constraints on the maser line’s pumping
 mechanism. As mentioned in Sect. 1, the pumping mechanism
 of the (9,6) maser is unclear (Madden et al. 1986; Brown &
 Cragg 1991). Previous studies have suggested that there are three
-main pumping scenarios to explain the observed NH 3maser
+main pumping scenarios to explain the observed NH 3 maser
 lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared radiation
  from the dust continuum emission, (2) line overlap, and
 (3) collisional pumping.
 For the first mechanism, infrared photons near 10 µm are
 needed for vibrational excitation. The high dust temperature
 (∼300 K) of W51-IRS2 can provide substantial infrared photons
- near 10 µm, which is used for radiative pumping (Henkelet al. 2013). Both Cep A and G34.26 +0.15 have similar kinetic
+ near 10 µm, which is used for radiative pumping (Henkel
+et al. 2013). Both Cep A and G34.26 +0.15 have similar kinetic
 temperatures of ≳200 K (Henkel et al. 1987; Patel et al. 2005;
 Comito et al. 2007; Beuther et al. 2018). This suggests that
-high kinetic temperatures are needed to excite NH 3(9,6) masers.
+high kinetic temperatures are needed to excite NH3 (9,6) masers.
 However, it should be noted that the silicate dust absorption feature
  might dominate at 10 µm (see the spectral energy distribution
  of Cep A in De Buizer et al. 2017). Additionally, there is
@@ -490,17 +496,17 @@ no bright infrared emission around the two (9,6) masers, M2 and
 M3, in G34.26 +0.15 (see Fig. 4; see also Fig. 11 in De Buizer
 et al. 2003 for a 10.5 µm map). This indicates that the pumping
 mechanism via infrared photons near 10 µm may not be viable
-to explain the (9,6) masers in Cep A and G34.26 +0.15. Furthermore,
+to explain the (9,6) masers in Cep A and G34.26+0.15. Furthermore,
  Wilson & Schilke (1993) argued that radiative pumping by
 dust emission tends to excite multiple adjacent ammonia maser
 transitions, which appears to contradict our failure to detect the
 adjacent (8,5) and (10,7) lines (with respect to quantum numbers
 and frequency) and to only measure the (9,6) transitions in Cep
-A and G34.26 +0.15. Therefore, we suggest that infrared radiation
+A and G34.26+0.15. Therefore, we suggest that infrared radiation
  from dust is not the main pumping source.
 Madden et al. (1986) suggested that there might be some
-line overlaps between the rotational NH 3transitions in the farinfrared
- band. However, this would be unlikely to a ffect only the
+line overlaps between the rotational NH 3 transitions in the farinfrared
+ band. However, this would be unlikely to affect only the
 (9,6) line. Nevertheless, far-infrared spectral observations will
 be needed to clarify this scenario.
 Based on our observations, the (9,6) maser spots are close
@@ -510,39 +516,39 @@ masers show velocity o ffsets with respect to their systemic velocities.
  This indicates that the (9,6) masers are located at the
 base of outflows, similar to the H 2O masers. This is supported
 by VLBI observations that show that (9,6) masers tend to be
-closely associated with H 2O masers (Pratap et al. 1991). The observed
- time variability in G34.26 +0.15 and W51-IRS2 can also
+closely associated with H2O masers (Pratap et al. 1991). The observed
+ time variability in G34.26+0.15 and W51-IRS2 can also
 be attributed to episodic molecular outflows. This indicates that
 collisional pumping could be the driver of the (9,6) maser. On
 the other hand, collisional pumping has been successfully used
-to explain the NH 3(3,3) maser (Walmsley & Ungerechts 1983;
+to explain the NH3 (3,3) maser (Walmsley & Ungerechts 1983;
 Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to
 pump from the K=0 level to the K=3 level with parity changes,
 that is, the upper level of the (3,3) metastable transition will be
-overpopulated. NH 3(9,6) arises from the ortho species, so a similar
+overpopulated. NH3 (9,6) arises from the ortho species, so a similar
  mechanism might also occur in the case of the (9,6) transition.
  Further measurements of collisional rates of ammonia will
 allow us to test this scenario.
 5. Summary
-We report the discovery of NH 3(9,6) masers in two HMSFRs,
+We report the discovery of NH 3 (9,6) masers in two HMSFRs,
 Cep A and G34.26 +0.15. The narrow line width of the emission
- features ( ∆V1/2≤2.0 km s−1) and their high brightness temperatures
- (>400 K) indicate the maser nature of the lines.
+ features (∆V1/2 ≤2.0 km s−1) and their high brightness temperatures
+ ( > 400 K) indicate the maser nature of the lines.
 The intensity of the (9,6) maser in G34.26 +0.15 is decreasing
 with time, while toward Cep A the maser is stable based on 20
 months of monitoring at E ffelsberg. Linearly interpolating the
-integrated intensities obtained at E ffelsberg as a function of time,
+integrated intensities obtained at Effelsberg as a function of time,
 the JVLA measurements show that there is no missing flux density
- on scales on the order of 1.2 arcsec (4 ×10−3and 2×10−2pc)
+ on scales on the order of 1.2 arcsec (4×10−3 and 2 ×10−2 pc)
 to the total single-dish flux. The JVLA-detected emission indicates
- that the NH 3(9,6) maser in Cep A originates from a
-sub-arcsecond-sized region slightly (0′′.28±0′′.10) to the west
+ that the NH 3 (9,6) maser in Cep A originates from a
+sub-arcsecond-sized region slightly (0 ′′.28 ±0′′.10) to the west
 of the peak position of the 1.36 cm continuum object, HW2. In
-G34.26 +0.15, three NH 3(9,6) maser spots are observed: one is
-close to the head of the cometary UC H iiregion C, and the other
-two are emitted from a compact region to the west of the HC H ii
+G34.26+0.15, three NH3 (9,6) maser spots are observed: one is
+close to the head of the cometary UC Hii region C, and the other
+two are emitted from a compact region to the west of the HC Hii
 Article number, page 6 of 10
-Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
 region A. We suggest that the (9,6) masers may be connected to
 outflowing gas. Higher angular resolution JVLA and VLBI observations
  are planned to provide more accurate positions and
@@ -551,11 +557,11 @@ Acknowledgements. We would like to thank the anonymous referee for the useful
  comments that improve the manuscript. Y .T.Y . is a member of the International
  Max Planck Research School (IMPRS) for Astronomy and Astrophysics
 at the Universities of Bonn and Cologne. Y .T.Y . would like to thank the China
-Scholarship Council (CSC) for its support. We would like to thank the sta ffat
-the Effelsberg for their help provided during the observations. We thank the sta ff
+Scholarship Council (CSC) for its support. We would like to thank the sta ff at
+the Effelsberg for their help provided during the observations. We thank the staff
 of the JVLA, especially Tony Perreault and Edward Starr, for their assistance
 with the observations and data reduction. This research has made use of the
-NASA /IPAC Infrared Science Archive, which is funded by the National Aeronautics
+NASA/IPAC Infrared Science Archive, which is funded by the National Aeronautics
  and Space Administration and operated by the California Institute of
 Technology.
 References
@@ -586,7 +592,7 @@ De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598
 1127
 Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M.
 2011, ApJ, 733, 71
-Flower, D. R., O ffer, A., & Schilke, P. 1990, MNRAS, 244, 4P
+Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P
 Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036
 Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ,
 459, 193
@@ -616,7 +622,8 @@ Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D.
 1986, ApJ, 300, L79
 Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33
 Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352
-Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123
+Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13
+Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123
 McEwen, B. C., Pihlström, Y . M., & Sjouwerman, L. O. 2016, ApJ, 826, 189
 McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in Astronomical
  Society of the Pacific Conference Series, V ol. 376, Astronomical
@@ -672,74 +679,75 @@ Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63
 Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117
 Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192
 Article number, page 7 of 10
-A&A proofs: manuscript no. mainArxiv
+A&A proofs:manuscript no. mainArxiv
 Appendix A:
-Table A.1. Summary of NH 3(9, 6) maser observations.
-Source Telescope Beam Epoch Channel Sν rms∫
-Sνdv V LSR ∆V1/2
+Table A.1.Summary of NH3 (9, 6) maser observations.
+Source Telescope Beam Epoch Channel S ν rms
+∫
+S νdv V LSR ∆V1/2
 size spacing
-(km s−1) (Jy) (mJy) (Jy km s−1) (km s−1)
-Cep A E ffelsberg 49′′2020, Jan. 04 0.62 0.67 3.41 1.19 ±0.02 -1.11±0.02 1.67±0.04
-Effelsberg 49′′2021, Feb. 11 0.62 0.59 5.97 1.08 ±0.02 -0.74±0.02 1.70±0.04
-Effelsberg 49′′2021, Feb. 15 0.62 0.65 10.98 1.11 ±0.03 -0.75±0.02 1.60±0.05
-JVLAa1′′.47×0′′.99 2021, Jul. 13 0.13 1.13 144 0.89 ±0.09 -0.86±0.03 0.74±0.12
-Effelsberg 49′′2021, Aug. 11 0.07 0.98 13.36 0.49 ±0.02 -0.90±0.01 0.47±0.01
-0.35 0.26 ±0.02 -0.28±0.02 0.69±0.05
-Effelsberg 49′′2021, Aug. 12 0.07 0.98 13.35 0.50 ±0.01 -0.89±0.07 0.48±0.07
-0.35 0.20 ±0.01 -0.29±0.07 0.54±0.07
-0.06 0.07 ±0.01 0.51±0.07 1.09±0.07
-0.02 0.02 ±0.01 2.15±0.07 0.80±0.07
-0.07 0.06 ±0.01 2.89±0.07 0.92±0.07
-G34.26 +0.15 E ffelsberg 49′′2020, Jan. 03 0.62 0.30 1.26 0.65 ±0.03 62.50±0.05 2.05±0.13
-Effelsberg 49′′2021, Feb. 11 0.62 0.24 2.42 0.40 ±0.02 55.76±0.04 1.60±0.12
-Effelsberg 49′′2021, Feb. 15 0.62 0.20 4.86 0.38 ±0.02 55.71±0.05 1.80±0.14
-JVLAb1′′.33×1′′.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ±0.02 54.41±0.03 0.38±0.09
-0.22 0.22 ±0.02 55.82±0.05 0.95±0.12
-0.15 0.06 ±0.01 57.21±0.04 0.35±0.08
-Effelsberg 49′′2021, Aug. 11 0.07 0.08 13.92 0.06 ±0.007 54.10±0.05 0.68±0.12
-0.07 0.02 ±0.006 54.82±0.03 0.31±0.09
-0.12 0.10 ±0.006 55.85±0.02 0.75±0.06
-Effelsberg 49′′2021, Aug. 12 0.07 0.16 27.40 0.09 ±0.008 55.83±0.02 0.56±0.05
-Notes. The spectral parameters are obtained from Gaussian fitting.(a)The JVLA spectrum toward Cep A is extracted from the E ffelsberg-beamsized
- region (FWHM 49′′).(b)For G34.26 +0.15, the JVLA beam samples the NH 3(9,6) spectrum over a region of radius 3′′.5, which contains all
-detected NH 3(9,6) emissions.
-Table A.2. 1.36 cm JVLA flux densities of individual continuum sources.
-Source R.A. Dec. Size P.A. Sν
-(h m s ) (◦ ′ ′′) (arcsec) (deg) (mJy)
-Cep A HW2 22 56 17.972 ±0.003 +62 01 49.587±0.015 (0.45±0.19)×(0.22±0.10) 50.0 20.2 ±1.4
-HW3a 22 56 17.420 ±0.022 +62 01 44.576±0.076 (2.35±0.45)×(0.55±0.14) 66.6 4.75 ±0.74
-HW3b 22 56 17.578 ±0.009 +62 01 45.041±0.043 (1.43±0.24)×(0.45±0.10) 59.9 3.19 ±0.36
-HW3c 22 56 17.956 ±0.016 +62 01 46.224±0.038 (1.44±0.37)×(0.36±0.19) 86.0 9.90 ±1.7
-HW3d 22 56 18.195 ±0.005 +62 01 46.325±0.014 (1.26±0.12)×(0.30±0.19) 102.5 13.75 ±0.92
-HW9 22 56 18.626 ±0.014 +62 01 47.851±0.137 (1.53±0.51)×(0.29±0.30) 28.0 3.26 ±0.78
-G34.26 +0.15 A 18 53 18.774 ±0.005 +01 14 56.208±0.125 (0.66±0.49)×(0.50±0.33) 10.0 94 ±33
-B 18 53 18.649 ±0.005 +01 15 00.071±0.180 (2.31±0.49)×(0.85±0.21) 17.4 597 ±110
-C 18 53 18.560 ±0.004 +01 14 58.201±0.112 (2.03±0.30)×(1.34±0.20) 178.0 5070 ±660
+(km s−1) (Jy) (mJy) (Jy km s −1) (km s −1)
+Cep A E ffelsberg 49 ′′ 2020, Jan. 04 0.62 0.67 3.41 1.19 ±0.02 -1.11 ±0.02 1.67 ±0.04
+Effelsberg 49 ′′ 2021, Feb. 11 0.62 0.59 5.97 1.08 ±0.02 -0.74 ±0.02 1.70 ±0.04
+Effelsberg 49 ′′ 2021, Feb. 15 0.62 0.65 10.98 1.11 ±0.03 -0.75 ±0.02 1.60 ±0.05
+JVLAa 1′′.47 ×0′′.99 2021, Jul. 13 0.13 1.13 144 0.89 ±0.09 -0.86 ±0.03 0.74 ±0.12
+Effelsberg 49 ′′ 2021, Aug. 11 0.07 0.98 13.36 0.49 ±0.02 -0.90 ±0.01 0.47 ±0.01
+0.35 0.26 ±0.02 -0.28 ±0.02 0.69 ±0.05
+Effelsberg 49 ′′ 2021, Aug. 12 0.07 0.98 13.35 0.50 ±0.01 -0.89 ±0.07 0.48 ±0.07
+0.35 0.20 ±0.01 -0.29 ±0.07 0.54 ±0.07
+0.06 0.07 ±0.01 0.51 ±0.07 1.09 ±0.07
+0.02 0.02 ±0.01 2.15 ±0.07 0.80 ±0.07
+0.07 0.06 ±0.01 2.89 ±0.07 0.92 ±0.07
+G34.26+0.15 E ffelsberg 49 ′′ 2020, Jan. 03 0.62 0.30 1.26 0.65 ±0.03 62.50 ±0.05 2.05 ±0.13
+Effelsberg 49 ′′ 2021, Feb. 11 0.62 0.24 2.42 0.40 ±0.02 55.76 ±0.04 1.60 ±0.12
+Effelsberg 49 ′′ 2021, Feb. 15 0.62 0.20 4.86 0.38 ±0.02 55.71 ±0.05 1.80 ±0.14
+JVLAb 1′′.33 ×1′′.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ±0.02 54.41 ±0.03 0.38 ±0.09
+0.22 0.22 ±0.02 55.82 ±0.05 0.95 ±0.12
+0.15 0.06 ±0.01 57.21 ±0.04 0.35 ±0.08
+Effelsberg 49 ′′ 2021, Aug. 11 0.07 0.08 13.92 0.06 ±0.007 54.10 ±0.05 0.68 ±0.12
+0.07 0.02 ±0.006 54.82 ±0.03 0.31 ±0.09
+0.12 0.10 ±0.006 55.85 ±0.02 0.75 ±0.06
+Effelsberg 49 ′′ 2021, Aug. 12 0.07 0.16 27.40 0.09 ±0.008 55.83 ±0.02 0.56 ±0.05
+Notes. The spectral parameters are obtained from Gaussian fitting. (a) The JVLA spectrum toward Cep A is extracted from the E ffelsberg-beamsized
+ region (FWHM 49′′). (b) For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 3′′.5, which contains all
+detected NH3 (9,6) emissions.
+Table A.2.1.36 cm JVLA flux densities of individual continuum sources.
+Source R.A. Dec. Size P.A. S ν
+(h m s ) ( ◦ ′ ′′ ) (arcsec) (deg) (mJy)
+Cep A HW2 22 56 17.972 ±0.003 +62 01 49.587 ±0.015 (0.45 ±0.19) ×(0.22 ±0.10) 50.0 20.2 ±1.4
+HW3a 22 56 17.420 ±0.022 +62 01 44.576 ±0.076 (2.35 ±0.45) ×(0.55 ±0.14) 66.6 4.75 ±0.74
+HW3b 22 56 17.578 ±0.009 +62 01 45.041 ±0.043 (1.43 ±0.24) ×(0.45 ±0.10) 59.9 3.19 ±0.36
+HW3c 22 56 17.956 ±0.016 +62 01 46.224 ±0.038 (1.44 ±0.37) ×(0.36 ±0.19) 86.0 9.90 ±1.7
+HW3d 22 56 18.195 ±0.005 +62 01 46.325 ±0.014 (1.26 ±0.12) ×(0.30 ±0.19) 102.5 13.75 ±0.92
+HW9 22 56 18.626 ±0.014 +62 01 47.851 ±0.137 (1.53 ±0.51) ×(0.29 ±0.30) 28.0 3.26 ±0.78
+G34.26+0.15 A 18 53 18.774 ±0.005 +01 14 56.208 ±0.125 (0.66 ±0.49) ×(0.50 ±0.33) 10.0 94 ±33
+B 18 53 18.649 ±0.005 +01 15 00.071 ±0.180 (2.31 ±0.49) ×(0.85 ±0.21) 17.4 597 ±110
+C 18 53 18.560 ±0.004 +01 14 58.201 ±0.112 (2.03 ±0.30) ×(1.34 ±0.20) 178.0 5070 ±660
 Article number, page 8 of 10
-Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
-Table A.3. NH 3(9,6) maser positions derived from the JVLA observations.
-Source R.A. Dec. Sν TMB VLSR ∆V1/2
-(h m s ) (◦ ′ ′′) (mJy beam−1) (K) (km s−1)
-Cep A M 22 56 17.933 ±0.002 +62 01 49.608±0.011 985.2 2464.8 -0.88 ±0.01 0.51±0.02
-343.2 829.5 -0.24 ±0.03 0.63±0.05
-G34.26 +0.15 M1 18 53 18.569 ±0.007 +01 14 57.997±0.056 37.1 94.5 56.82 ±0.06 0.68±0.14
-M2 18 53 18.696 ±0.002 +01 14 55.807±0.034 48.4 122.4 53.77 ±0.05 0.35±0.08
-57.8 146.2 54.35 ±0.07 0.83±0.14
-180.8 457.6 55.83 ±0.01 0.59±0.03
-M3 18 53 18.667 ±0.005 +01 14 55.348±0.066 78.1 197.2 54.22 ±0.04 0.94±0.08
-73.7 186.3 55.78 ±0.04 0.79±0.08
-Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000=22h56m17s.972,
-andδJ2000=62◦01′49′′.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse
-denoting the position of the NH 3(9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array
+Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+Table A.3.NH3 (9,6) maser positions derived from the JVLA observations.
+Source R.A. Dec. S ν TMB VLSR ∆V1/2
+(h m s ) ( ◦ ′ ′′ ) (mJy beam −1) (K) (km s −1)
+Cep A M 22 56 17.933 ±0.002 +62 01 49.608 ±0.011 985.2 2464.8 -0.88 ±0.01 0.51 ±0.02
+343.2 829.5 -0.24 ±0.03 0.63 ±0.05
+G34.26+0.15 M1 18 53 18.569 ±0.007 +01 14 57.997 ±0.056 37.1 94.5 56.82 ±0.06 0.68 ±0.14
+M2 18 53 18.696 ±0.002 +01 14 55.807 ±0.034 48.4 122.4 53.77 ±0.05 0.35 ±0.08
+57.8 146.2 54.35 ±0.07 0.83 ±0.14
+180.8 457.6 55.83 ±0.01 0.59 ±0.03
+M3 18 53 18.667 ±0.005 +01 14 55.348 ±0.066 78.1 197.2 54.22 ±0.04 0.94 ±0.08
+73.7 186.3 55.78 ±0.04 0.79 ±0.08
+Fig. A.1.Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000 = 22h56m17s.972,
+and δJ2000 = 62◦01′49′′.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse
+denoting the position of the NH 3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array
 (NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 ×2.43 mJy beam−1.
-OH (Bartkiewicz et al. 2005), H 2O (Sobolev et al. 2018), and CH 3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares,
-respectively. The color bar on the right-hand side indicates the velocity range ( VLSR) of maser spots.
+OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares,
+respectively. The color bar on the right-hand side indicates the velocity range (VLSR) of maser spots.
 Article number, page 9 of 10
-A&A proofs: manuscript no. mainArxiv
-Fig. A.2. 1.36 cm JVLA continuum map of G34.26 +0.15 presented as gray shaded areas. The reference position is αJ2000=18h53m18s.560, and
-δJ2000=01◦14′58′′.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH 3(9,6) emission with stars at their
+A&A proofs:manuscript no. mainArxiv
+Fig. A.2.1.36 cm JVLA continuum map of G34.26 +0.15 presented as gray shaded areas. The reference position is αJ2000 = 18h53m18s.560, and
+δJ2000 = 01◦14′58′′.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH 3 (9,6) emission with stars at their
 center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from
-Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 ×20 mJy beam−1. OH (Zheng et al. 2000), H 2O (Imai
+Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140×20 mJy beam−1. OH (Zheng et al. 2000), H2O (Imai
 et al. 2011), and CH 3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates
-the velocity range ( VLSR) of maser spots.
+the velocity range (VLSR) of maser spots.
 Article number, page 10 of 
\ No newline at end of file
diff --git a/read/results/pypdf/2201.00022.txt b/read/results/pypdf/2201.00022.txt
index 08cbfa5..b90ac7c 100644
--- a/read/results/pypdf/2201.00022.txt
+++ b/read/results/pypdf/2201.00022.txt
@@ -1,48 +1,54 @@
-Draft version January 4, 2022
-Typeset using L ATEXtwocolumn style in AASTeX631
+Draft version July 7, 2022
+Typeset using LATEX twocolumn style in AASTeX631
 The Formation of Intermediate Mass Black Holes in Galactic Nuclei
-Sanaea C. Rose,1, 2Smadar Naoz,1, 2Re’em Sari,3and Itai Linial3
+Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3
 1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA
 2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA
 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel
 ABSTRACT
 Most stellar evolution models predict that black holes (BHs) should not exist above approximately
-50−70 M ⊙. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and
-above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs),
-can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding
-main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relaxation,
- we find that this channel can be quite efficient, forming IMBHs as massive as 104M⊙. Our
-results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This formation
- channel also has implications for observations. Collisions between stars and BHs can produce
-electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally,
-formed through this channel, both black holes in the mass gap and IMBHs can merge with the supermassive
- black hole at the center of a galactic nucleus through gravitational waves. These gravitational
-wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively).
-1.INTRODUCTION
+50 −70 M⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
+indicate the existence of BHs with masses at and above this threshold. We suggest that massive
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
+processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite
+efficient, forming IMBHs as massive as 10 4 M⊙. This upper limit assumes that (1) the BHs accrete a
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
+new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar
+disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs,
+respectively).
+1. INTRODUCTION
 The recently detected gravitational wave source
 GW190521 (The LIGO Scientific Collaboration et al.
 2020a,b) produced an intermediate mass black hole of
-approximately 142 M ⊙. This event may have also had a
-85 M ⊙progenitor, which falls within the pair-instability
+approximately 142 M⊙. This event may have also had a
+85 M⊙ progenitor, which falls within the pair-instability
 mass gap that limits stellar black holes (BHs) to no
-more than∼<50 M ⊙(e.g., Heger et al. 2003; Woosley
+more than ∼< 50 M ⊙ (e.g., Heger et al. 2003; Woosley
 2017)1. Similarly, the merger products of GW150914,
 GW170104, and GW170814 fall within the mass gap
 (e.g., Abbott et al. 2016, 2017a,b). BH mergers that
 form second generation BHs and, in some cases, intermediate
  mass BHs (IMBHs), these gravitational wave
-(GW) events can occur in globular clusters, young stellar
- clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez
- et al. 2019; Fishbach et al. 2020; Mapelli et al.
-2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
-2021; Arca Sedda et al. 2021). However, IMBHs are
-not limited to these locations and may reside in galacCorresponding
+(GW) events can occur in globular clusters, young stelCorresponding
  author: Sanaea C. Rose
 srose@astro.ucla.edu
-1Note that the exact lower and upper limits may be sensitive to
+1 Note that the exact lower and upper limits may be sensitive to
 metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski
-et al. 2020a; Renzo et al. 2020; Vink et al. 2021).tic nuclei as well. Several studies propose that our
+et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
+lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez
+ et al. 2019; Fishbach et al. 2020; Mapelli et al.
+2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
+2021; Arca Sedda et al. 2021). However, IMBHs are
+not limited to these locations and may reside in galactic
+ nuclei as well. Several studies propose that our
 own galactic center may host an IMBH in the inner pc
 (e.g., Hansen & Milosavljevi´ c 2003; Maillard et al. 2004;
 G¨ urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
@@ -59,23 +65,26 @@ Valiante et al. 2016) or from direct collapse of accumulated
 Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
 2016). These high redshift IMBHs would need to survive
  galaxy evolution and mergers to present day (e.g.,
+arXiv:2201.00022v2  [astro-ph.GA]  6 Jul 2022
+ Rose et al.
 Rashkov & Madau 2014), with significant effects on their
 stellar and even dark matter surroundings (e.g., Bertone
 et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
 et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
 popular formation channel relies on the coalescence of
-many stellar-mass black holes. For example, IMBHs
+many stellar-mass black holes, which may seed objects
+as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs
 may form in the centers of globular clusters, where fewbody
  interactions lead to the merger of stellar-mass BHs
 (e.g., O’Leary et al. 2006; G¨ urkan et al. 2006; Blecha
-et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-arXiv:2201.00022v1  [astro-ph.GA]  31 Dec 202
- Rose et al.
-driguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
+et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez
+ et al. 2018; Rodriguez et al. 2019; Fragione et al.
 2020b). Other formation mechanisms invoke successive
-collisions and mergers of massive stars (e.g., Portegies
-Zwart & McMillan 2002; Portegies Zwart et al. 2004;
-Freitag et al. 2006; Kremer et al. 2020; Gonz´ alez et al.
-2021; Di Carlo et al. 2021).
+collisions and mergers of massive stars (e.g., Ebisuzaki
+et al. 2001; Portegies Zwart & McMillan 2002; Portegies
+Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017;
+Kremer et al. 2020; Gonz´ alez et al. 2021; Di Carlo et al.
+2021; Das et al. 2021a,b; Escala 2021).
 The main obstacle to sequential BH mergers in clusters
  is that the merger recoil velocity kick often exceeds
 the escape velocity from the cluster (e.g., Schnittman
@@ -89,19 +98,30 @@ clusters without a SMBH. They considered BH binarysingle
 merger recoil kicks. The post-kick merger product sinks
 back towards the cluster center over a dynamical friction
  timescale. Using this approach, they showed that
-103−104M⊙IMBHs can form efficiently over the lifetime
+103 −104 M⊙ IMBHs can form efficiently over the lifetime
  of a cluster.
-However, as discussed in Section 2.2, direct star-BH
+However, as discussed in Section 2.2, direct BH-star
 collisions are much more frequent than BH-BH collision
 in galactic nuclei, making the former a promising channel
- for BH growth. We propose that IMBHs can form
-naturally within the central pc of a SMBH in a galactic
-center. Specifically, these IMBHs form through repeated
-collisions with main sequence stars , accreting some or
-all of the star’s mass depending on the details of the
-collision. We demonstrate that this channel can create
-IMBHs with masses as large as 104M⊙, depending on
-the density profile of the surrounding stars.
+ for BH growth. In an N-body study of young star
+clusters, Rizzuto et al. (2022) find that BH-star collisions
+ are a main contributor to the formation of BHs
+in the mass gap and IMBHs. In a similar vein, Stone
+et al. (2017) demonstrate that massive BHs can form
+from repeated tidal encounters between stars and BHs.
+More generally, several studies have explored the role of
+collisions in a GN, with implications for the stellar and
+red giant populations (e.g., Dale & Davies 2006; Dale
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+et al. 2021). We propose that IMBHs can form naturally
+within the central pc of a galactic center through repeated
+ collisions between BHs and main sequence stars.
+During a collision, the BH can accrete some portion of
+the star’s mass. Over many collisions, it can grow appreciably
+ in size. We demonstrate that this channel can
+create IMBHs with masses as large as 104 M⊙, an upper
+limit that depends on the density profile of the surrounding
+ stars and the efficiency of the accretion.
 The paper is structured as follows: we describe relevant
  physical processes and our approach in Section 2.
 In particular, we provide an overview of collisions in
@@ -110,30 +130,31 @@ Section 2.2 and present our statistical approach in Section
 mass growth with each collision and presents analytic
 solutions to our equations in two different regimes, efficient
  collisions and inefficient collisions We compare
-these solutions to our statistical results. Sections 2.5
-and 2.7 discuss implications for GW merger events between
+these solutions to our statistical results. Sections 2.6
+and 2.8 discuss implications for GW merger events between
  IMBHs and the SMBH. We then incorporate relaxation
  processes and discuss the subsequent results in
-Section 2.8. Finally, we discuss and summarize our findings
+Section 2.9. Finally, we discuss and summarize our findings
  in Section 3.
-2.METHODOLOGY
+2. METHODOLOGY
 We consider a population of stellar mass BHs embedded
- in a cluster of 1 M ⊙stars. When stars and BHs
+ in a cluster of 1 M ⊙ stars. When stars and BHs
 collide, the BHs can accrete mass. The growth rate depends
- on the physical processes outlined below. We usea statistical approach to estimate the stellar encounters
+ on the physical processes outlined below. We use
+a statistical approach to estimate the stellar encounters
 and final IMBH masses.
 2.1. Physical Picture
 We consider a population of BHs within the inner few
 parsecs of the SMBH in a galactic nucleus (GN). We assume
  that the BH mass distribution follows that of the
 stars from which they originate, a Kroupa initial mass
-functiondN/dm∝m−2.35. While this choice represents
+function dN/dm∝m−2.35. While this choice represents
 a gross oversimplification, it has very little bearing on
 our final results. Future work may address the particulars
  of the BH mass distribution, but we do not expect
 that it will significantly alter the outcome. The upper
 and lower limits of the BH mass distribution are 5 and
-50M⊙, respectively. We select the upper limit to encompass
+50 M⊙, respectively. We select the upper limit to encompass
  the range of upper bounds predicted by stellar
 evolution models, which vary between 40 and 125 M⊙
 depending on the metallicity (Heger et al. 2003; Woosley
@@ -141,85 +162,91 @@ depending on the metallicity (Heger et al. 2003; Woosley
 Belczynski et al. 2020b; Renzo et al. 2020). We assume
 that the orbits of the BHs follow a thermal eccentricity
 distribution. We draw their semimajor axes, a•, from a
-uniform distribution in log distance, dN/d (logr) being
+uniform distribution in log distance, dN/d(log r) being
 constant. While this distribution is not necessarily representative
  of actual conditions in the GN, we use it to
 build a comprehensive physical picture of BH growth at
 all distances from the SMBH, including within 0 .01 pc.
 Otherwise, the innermost region of the GN would be
 poorly represented in our sample. We consider other
-observationally motivated distributions in Section 2.8,
+IMBH Formation in Galactic Nuclei 3
+Figure 1. We plot the relevant timescales, including collision
+ (green), relaxation (gold), and BH-BH GW capture
+(purple), for a single BH in the GN as a function of distance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit. The timescales depend on the
+density, so we adopt a range of density profiles, bounded by
+α= 1 (dashed curve) to α= 2 (dark, solid curve). The dark
+blue line represents the time for a 105 M⊙ BH to merge with
+the SMBH through GW emission.
+observationally motivated distributions in Section 2.9,
 but reserve a more detailed examination of the distribution’s
  impact for future work.
 2.2. Direct Collisions
 BHs in the GN can undergo direct collisions with other
 objects. The timescale for this process, tcoll, can be estimated
  using a simple rate calculation: t−1
-coll=nσA,
-wherenis the number density of objects, σis the velocity
+coll = nσA,
+where n is the number density of objects, σ is the velocity
  dispersion, and Ais the cross-section. We use the
 collision timescale from Rose et al. (2020):
 t−1
-coll=πn(a•)σ(a•)
-×(
+coll = πn(a•)σ(a•)
+×
+(
 f1(e•)r2
-c+f2(e•)rc2G(mBH+m⋆)
-σ(a•)2)
-.(1)
-whereGis the gravitational constant and rcis the sum
+c + f2(e•)rc
+2G(mBH + m⋆)
+σ(a•)2
+)
+. (1)
+where Gis the gravitational constant and rc is the sum
 of the radii of the interacting objects, a black hole with
-massmBHand a star with mass m⋆. Detailed in Rose
-et al. (2020), f1(e•) andf2(e•) account for the effect of
+mass mBH and a star with mass m⋆. Detailed in Rose
+et al. (2020), f1(e•) and f2(e•) account for the effect of
 the eccentricity of the BH’s orbit about the SMBH on
-the collision rate, while nandσare simply evaluated
+the collision rate, while n and σ are simply evaluated
 at the semimajor axis of the orbit (see below). Note
-IMBH Formation in Galactic Nuclei 3
-Figure 1. We plot the relevant timescales, including collision
- (green), relaxation (gold), and BH-BH GW capture
-(purple), for a single BH in the GN as a function of distance
-from the SMBH. For the collision timescale, we assume the
-BH is on a circular orbit. The timescales depend on the
-density, so we adopt a range of density profiles, bounded by
-α= 1 (dashed curve) to α= 2 (dark, solid curve). The dark
-blue line represents the time for a 105M⊙BH to merge with
-the SMBH through GW emission.
 that this timescale equation includes the effects of gravitational
  focusing, which enhances the cross-section of
 interaction.
 Assuming a circular orbit for simplicity, we plot the
 timescale for a BH orbiting in the GN to collide with
-a 1M⊙star as a function of distance from the SMBH
-in Figure 1.2As this timescale depends on the density
+a 1 M⊙ star as a function of distance from the SMBH
+in Figure 1. 2 As this timescale depends on the density
 of surrounding stars, we adopt a density profile of the
 form:
-ρ(r•) =ρ0(r•
-r0)−α
+ρ(r•) = ρ0
+(r•
+r0
+)−α
 , (2)
-wherer•denotes the distance from the SMBH. We adopt
-a SMBH mass of 4 ×106M⊙such that our fiducial GN
+where r• denotes the distance from the SMBH. We adopt
+a SMBH mass of 4 ×106 M⊙ such that our fiducial GN
 matches our own galactic center (e.g., Ghez et al. 2005;
 Genzel et al. 2003). In this case, the normalization in
-Eq. (2) isρ0= 1.35×106M⊙/pc3atr0= 0.25 pc (Genzel
- et al. 2010). Additionally, in Eq. (2), αgives the
+Eq. (2) is ρ0 = 1.35 ×106 M⊙/pc3 at r0 = 0.25 pc (Genzel
+ et al. 2010). Additionally, in Eq. (2), α gives the
 slope of the power law. We assume that a uniform population
  of solar mass stars account for most of the mass
 in the GN, making the stellar number density:
-n(r•) =ρ(r•)
-1M⊙. (3)
-2We note that the eccentricity has a very minor effect on the
-collision timescale (Rose et al. 2020).The collision timescale also depends on the velocity dispersion,
+n(r•) = ρ(r•)
+1 M⊙
+. (3)
+The collision timescale also depends on the velocity dispersion,
  which we express as:
-σ(r•) =√
+σ(r•) =
+√
 GM•
-r•(1 +α), (4)
-whereαis the slope of the density profile and M•denotes
+r•(1 + α), (4)
+where α is the slope of the density profile and M• denotes
  the mass of the SMBH (Alexander 1999; Alexander
  & Pfuhl 2014). As mentioned above, Eq. (1) depends
 on the sum of the radii of the colliding objects, rc. We
-takerc= 1R⊙because these interactions involve a BH
+take rc = 1 R⊙ because these interactions involve a BH
 and a star, and the former has a much smaller physical
  cross-section. For example, the Schwarzschild radius
-of a 10M⊙BH is only 30 km, or 4 .31×10−5R⊙. For
+of a 10 M⊙ BH is only 30 km, or 4 .31 ×10−5 R⊙. For
 this reason, direct collisions between compact objects
 are very rare and not included in our model.
 We note that direct collisions between BHs, via GW
@@ -235,29 +262,26 @@ than the BH-BH GW collision timescale (for the relevant
 2018, for example). Thus, we expect that star-BH collisions
  will be the main driver of IMBH growth in the
 GN.
+2 We note that the eccentricity has a very minor effect on the
+collision timescale (Rose et al. 2020).
+ Rose et al.
 2.3. Statistical Approach to Collisions
 We simulate the mass growth of a population of BHs
 with initial conditions detailed in Section 2.1. Over an
-increment ∆ tof 106yr, we calculate the probability of
+increment ∆t of 106 yr, we calculate the probability of
 a collision occurring, given by ∆ t/tcoll. This choice of
-∆tis motivated by our galactic center’s star formation
+∆t is motivated by our galactic center’s star formation
 timescale (e.g., Lu et al. 2009), allowing for regular replenishment
  of the stellar population in the GN. We have
 checked that the results are not sensitive to this choice
 of ∆t, omitted here to avoid clutter. We draw a number
 between 0 and 1 using a random number generator. If
 that number is less than or equal to the probability, we
-increase the BH’s mass by ∆ m, the mass that the BH is
+increase the BH’s mass by ∆m, the mass that the BH is
 expected to accrete in a single collision (see Section 2.4
 for details). We recalculate the collision timescale using
 the updated BH mass and repeat this process until the
-time elapsed equals the simulation time of 10 Gyr3.
-3Closer to the SMBH, ∆ tmay exceed the collision timescale by
-a factor of a few for steep density profiles. We include a safeguard
- in our code which takes the ratio tcoll/∆tand rounds it
-to the nearest integer. We take this integer to be the number of
-collisions and increase the BH mass accordingly.
- Rose et al.
+time elapsed equals the simulation time of 10 Gyr 3.
 2.4. Mass Growth
 When a BH collides with a star, it may accrete material
  and grow in mass. The details of the accretion
@@ -268,40 +292,61 @@ passing through the star’s center. We begin by considering
  the escape velocity from the BH at the star’s
 outermost point, its surface, which corresponds to the
 maximum impact parameter 1 R ⊙. Qualitatively, one
-might expect that the BH could accrete the entire star
+might expect that the BH could capture the entire star
 (i.e., ∆m∼1 M⊙) if the relative velocity is smaller than
 the escape velocity from the BH at this point. However,
 in the vicinity of the SMBH, the dispersion velocity of
 the stars may be much larger than the escape velocity
 from the BH at the star’s surface. In this case, the BH
-accretes a “tunnel” of material through the star. This
+captures a “tunnel” of material through the star. This
 tunnel has radius equal to the Bondi radius and length
-approximately 1 R⊙.
-To estimate ∆ m, we begin with the Bondi-Hoyle accretion
- rate, ˙ m, given by:
-˙m=4πG2m2
+approximately 1 R⊙. For the purposes of this study, we
+assume that the BH accretes all of the material that
+it captures. The details of the accretion are uncertain,
+however, and it may be much less efficient than our results
+ imply. We discuss accretion in Section 2.5.
+To estimate ∆m, we begin with the Bondi-Hoyle accretion
+ rate, ˙m, given by:
+˙m= 4πG2m2
 BHρstar
-(c2s+σ2)3/2, (5)
-wherecsis the speed of sound in the star and ρstaris its
+(c2s + σ2)3/2 , (5)
+3 Closer to the SMBH, ∆ t may exceed the collision timescale by
+a factor of a few for steep density profiles. We include a safeguard
+ in our code which takes the ratio tcoll/∆t and rounds it
+to the nearest integer. We take this integer to be the number of
+collisions and increase the BH mass accordingly.
+Figure 2.We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10 M⊙ for all of the BHs. Assuming
+the density profile of stars has α= 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+where cs is the speed of sound in the star and ρstar is its
 density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
 et al. 1985; Edgar 2004, see latter for a review). We
 approximate the density as 1 M⊙/(4πR3
 ⊙/3) and take
-the conservative value of cs= 500km s−1, which is
-consistent with the sound speed inside a 1 M⊙star
+the conservative value of cs = 500 km s−1, which is
+consistent with the sound speed inside a 1 M⊙ star
 (Christensen-Dalsgaard et al. 1996) and allows us to set
 a lower limit on ∆ m. To find ∆ m, at each collision, we
 have:
-∆m= min( ˙m×t⋆,cross,1 M⊙), (6)
-wheret⋆,cross∼R⊙/σis the crossing time of the BH in
-the star. We take the minimum between ˙ m×t⋆,crossand
-1 M⊙because the BH cannot accrete more mass than
+∆m= min( ˙m×t⋆,cross,1 M⊙) , (6)
+where t⋆,cross ∼R⊙/σ is the crossing time of the BH in
+the star. We take the minimum between ˙m×t⋆,cross and
+1 M⊙ because the BH cannot accrete more mass than
 one star at each collision.
 Figure 2 juxtaposes the expected growth using BondiHoyle-Lyttleton
  accretion (blue small points) with a
 much simpler model in which the BH accretes the star’s
-entire mass, 1 M⊙(red large points). Both examples
-start with identical populations of 10 M⊙BHs (grey)
+entire mass, 1 M⊙ (red large points). Both examples
+start with identical populations of 10 M⊙ BHs (grey)
 and simulate growth through collisions using a statistical
  approach. As the BHs grow, the collision timescale,
 which depends on mBH, decreases. Simultaneously,
@@ -309,33 +354,23 @@ which depends on mBH, decreases. Simultaneously,
  is exponential growth (see discussion and details
 surrounding Eq. (8)). In Figure 2, however, the simulations
  assume α= 1 for the stellar density profile, ensuring
- the collision timescale is long compared to the simulation
- time, 10 Gyr. Therefore, the BHs grow slowly,
-Figure 2. We consider an example that highlights the mass
-growth as a function of distance from the SMBH. Grey dots
-represent the initial masses and distances from the SMBH
-of the BHs involved in the simulation. For simplicity, we set
-the inital mass equal to 10 M⊙for all of the BHs. Assuming
-the density profile of stars has α= 1, we consider two cases:
-BHs accrete all of the star’s mass during a collision (red) and
-only a portion of the star’s mass is accreted during a collision
-given by Eq. 6 (blue). The latter case results in less growth
-closer to the SMBH where the velocity dispersion becomes
-high. The shaded regions and dashed lines represent the
-analytical predictions detailed in Section 2.4.
+ the collision timescale is long compared to the sim-
+IMBH Formation in Galactic Nuclei 5
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
 and their final masses can be approximated using the
 following equation:
-mfinal(tcoll→const.) =minitial + ∆mT
-tcoll,(7)
-in whichTrepresents the simulation time and ∆ mand
-tcollremain constant, approximated as their initial values.
+mfinal(tcoll →const.) =minitial + ∆m T
+tcoll
+, (7)
+in which T represents the simulation time and ∆ m and
+tcoll remain constant, approximated as their initial values.
 
 This equation is plotted in Figure 2 for both cases,
-∆m= 1M⊙(red) and ∆ mfrom Bondi-Hoyle-Lyttleton
+∆m= 1 M⊙ (red) and ∆mfrom Bondi-Hoyle-Lyttleton
 accretion (blue), and the curves coincide with the corresponding
  simulated results. The shaded regions represent
  one standard deviation from Eq. (7), calculated
-using the square root of the number of collisions, T/tcoll.
+using the square root of the number of collisions,T/tcoll.
 As indicated by the results in red, in the absence of
 Bondi-Hoyle-Lyttleton accretion, the BHs closest to the
 SMBH experience the most growth because they have
@@ -343,74 +378,128 @@ shorter collision timescales. However, Bondi-HoyleLyttleton
  accretion becomes important closer to the
 SMBH, where the velocity dispersion is large compared
 with the stars’ escape velocity, and curtails the mass
-growth for BHs in this region. Outside of 10−2pc, a BH
+growth for BHs in this region. Outside of 10−2 pc, a BH
 consumes the star’s entire mass: the accretion-limited
-∆mgoverned by Eq. (7) is greater than or equal to the
+∆m governed by Eq. (7) is greater than or equal to the
 star’s mass.
 Eq. 7 does not apply for other values of α. When the
 collision timescale is shorter, corresponding to a larger
-indexαin the density profile (see Figure 1), the growth
-IMBH Formation in Galactic Nuclei 5
-is very efficient and ∆ mquickly approaches 1 M ⊙. Consequently,
- while we can now assume ∆ m= 1 M ⊙, we
+index αin the density profile (see Figure 1), the growth
+is very efficient and ∆mquickly approaches 1 M⊙. Consequently,
+ while we can now assume ∆ m = 1 M⊙, we
 can no longer assume the collision timescale is constant.
 The final mass grows exponentially as a result. For
-∆m= 1M ⊙, the general solution is reached by solving
-the differential equation dm/dt = 1M⊙/tcoll(m), which
+∆m = 1M⊙, the general solution is reached by solving
+the differential equation dm/dt= 1 M⊙/tcoll(m), which
 gives:
-mfinal(∆m→1 M⊙) =−A+ (minitial +A)eCT(8)
-whereA=σ2Rstar/GandC= 2πGn starRstar/σ. As an
+mfinal(∆m→1 M⊙) =−A+ (minitial + A) eCT (8)
+where A= σ2Rstar/Gand C = 2πGnstarRstar/σ. As an
 example, we plot this curve in purple for the α= 2 case,
 in Figure 3, which agrees with the simulated masses.
-2.5. GW Inspiral
+2.5. Uncertainties in Accretion
+We note that the ∆ M calculated in this proof-ofconcept
+ study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challenging;
+ this complex problem requires numerically solving
+the generalized GR fluid equations with cooling, heating,
+ and radiative transfer, etc. and remains an active
+field of research (e.g., Blandford & Begelman 1999; Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star results
+ in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity remain
+ uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accumulate
+ significant amounts of mass over many collisions
+even if it accretes very little in a single one. We explore
+ the viability of our channel using a physically motivated
+ inefficient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately vesc/(cη), where vesc is
+the escape velocity from the BH at 1 R ⊙ and η is the
+accretion efficiency at the ISCO. We take η to be 0 .1
+(e.g., Yu & Tremaine 2002). This expression for the
+fraction accreted is consistent with Kremer et al. (2022)
+equation 19 for s= 0.5, which is a reasonable value for
+s, a free parameter between 0 .2 and 0 .8. We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more efficient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radiation
+ away from the captured material (e.g., Blandford
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumenshchev
+ 2008; Begelman 2012a,b; McKinney et al. 2014).
+2.6. GW Inspiral
 When a BH is close to the SMBH, GW emission can
 circularize and shrink its orbit. We implement the effects
  of GW emission on the BH’s semimajor axis and
 eccentricity following Peters & Mathews (1963a). The
 characteristic timescale to merge a BH with an SMBH
 is given by:
-tGW≈2.9×1012yr(M•
-106M⊙)−1(mBH
-106M⊙)−1
-×(M•+mBH
-2×106M⊙)−1(a•
-10−4pc)4
-×f(e•)(1−e2
-•)7/2, (9)
-wheref(e•) is a function of e•. For all values of e•,
-f(e•) is between 0 .979 and 1.81 (Blaes et al. 2002). We
-plot this timescale for a 1 ×105M⊙BH in Figure 1 in
+tGW ≈2.9 ×1012 yr
+( M•
+106 M⊙
+)−1 ( mBH
+106 M⊙
+)−1
+×
+(M• + mBH
+2 ×106 M⊙
+)−1 ( a•
+10−2 pc
+)4
+×f(e•)(1 −e2
+•)7/2 , (9)
+where f(e•) is a function of e•. For all values of e•,
+f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We
+plot this timescale for a 1 ×105 M⊙ BH in Figure 1 in
 blue.
+ Rose et al.
+Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow ( α = 1) to
+cuspy (α= 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
+of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
+merger times of these BHs.
 In our simulations, we assume a BH has merged with
-the SMBH when the condition tGW< telapsed is met.
+the SMBH when the condition tGW < telapsed is met.
 When this condition is satisfied, we terminate mass
-growth through collisions for that BH.4
-2.6. IMBH growth
+growth through collisions for that BH. 4
+2.7. IMBH growth
 As detailed above, BH-stellar collisions can increase
 the BH masses as a function of time. Here, we examine
 the sensitivity of the BH growth to the density power
 law. From Eq. (1), it is clear that the growth rate depends
- on the stellar density profile, governed by the indexα.
- We expect that higher values of α, or steeper
+ on the stellar density profile, governed by the index
+ α. We expect that higher values of α, or steeper
 profiles, will result in more efficient mass growth. In
-Figure 1, larger values of αlead to collision timescales
+Figure 1, larger values of α lead to collision timescales
 in the GN’s inner region, inwards of 0 .25 pc, that are
-4For comparison, we also incrementally changed the semimajor
-axis and eccentricity from GW emission following the equations
-in Peters & Mathews (1963b). This method leads to a slight
-increase in the final IMBH masses because it accounts for the
-collisions that take place while the orbit is gradually shrinking.much smaller that the 10 Gyr simulation time. Figure 3
+much smaller that the 10 Gyr simulation time. Figure 3
 confirms this expectation. It depicts the mass growth of
 a uniform distribution of BHs with initial conditions detailed
  in Section 2.1 for five αvalues, spanning 1 (green)
 to 2 (purple). The most massive IMBHs form inwards
 of 0.25 pc for the α= 2 case.
-2.7. Gravitational Wave Mergers and Intermediate
+2.8. Gravitational Wave Mergers and Intermediate
 and Extreme Mass Ratio Inspiral Candidates
 Towards the SMBH, efficient collisions can create BHs
 massive enough to merge with the SMBH through GWs.
-Following the method detailed in Section 2.5, when a
-given BH meets the criterion tGW< telapsed , we mark
+Following the method detailed in Section 2.6, when a
+given BH meets the criterion tGW < telapsed, we mark
+4 For comparison, we also incrementally changed the semimajor
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b). This method leads to a slight
+increase in the final IMBH masses because it accounts for the
+collisions that take place while the orbit is gradually shrinking.
 it as merged with the SMBH. We assume that at this
 point the dynamics of the BH will be determined by GW
 emission, shrinking and circularizing the BHs orbit until
@@ -421,40 +510,46 @@ merger. It is interesting to note that even in the absence
  of relaxation processes, which are often invoked
 to explain the formation of EMRIs, EMRIs and notably
 IMRIs can form in this region.
-2.8. Two Body Relaxation Processes
+2.9. Two Body Relaxation Processes
 A BH orbiting the SMBH experiences weak gravitational
  interactions with other objects in the GN. Over a
 relaxation time, these interactions alter its orbit about
 the SMBH. The two-body relaxation timescale for a
 single-mass system is:
-trelax= 0.34σ3
-G2ρ⟨M∗⟩ln Λrlx, (10)
-where ln Λ rlxis the Coulomb logarithm and ⟨M∗⟩is the
+trelax = 0.34 σ3
+G2ρ⟨M∗⟩ln Λrlx
+, (10)
+where ln Λrlx is the Coulomb logarithm and ⟨M∗⟩is the
 average mass of the surrounding objects, here assumed
-to be 1M⊙(Spitzer 1987; Binney & Tremaine 2008,
+to be 1 M⊙ (Spitzer 1987; Binney & Tremaine 2008,
 Eq. (7.106)). This equation represents the approximate
 timescale for a BH on a semi-circular orbit to change
 its orbital energy and angular momentum by order of
 themselves. The BH experiences diffusion in its angular
 momentum and energy as a function of time (depending
 on the eccentricity of the orbit, this process can be more
-efficient Fragione & Sari 2018; Sari & Fragione 2019). In
-Figure 1, we plot the relaxation timescale in gold for a
-range ofα. We note that the Bahcall & Wolf (1976) profile,α=
- 7/4, corresponds to zero net flux and therefore
-does not preferentially migrate objects inward.
-Additionally, because they are more massive on
-average than the surrounding objects, BHs are expected
- to segregate inwards in the GN (e.g., Shapiro
-& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+efficient Fragione & Sari 2018; Sari & Fragione 2019).
+Relaxation can cause the orbit of an object in a GN to
+reach high eccentricities. If the object is a BH, it can
+spiral into the SMBH and form an EMRI, while a star
+IMBH Formation in Galactic Nuclei 7
+can be tidally disrupted by the SMBH (e.g. Magorrian
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
+Alexander 2005; Aharon & Perets 2016; Stone & Metzger
+ 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
+Naoz et al. 2022). The relaxation process is therefore
+crucial to our study. In Figure 1, we plot the relaxation
+timescale in gold for a range ofα. We note that the Bahcall
+ & Wolf (1976) profile, α= 7/4, corresponds to zero
+net flux and therefore does not preferentially migrate
+objects inward.
+Additionally, because BHs are more massive on average
+ than the surrounding objects, they are expected
+to segregate inwards in the GN (e.g., Shapiro &
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
 Miralda-Escud´ e & Gould 2000; Baumgardt et al. 2004).
- Rose et al.
-Figure 3. On the right, we plot final masses of 500 BHs using different values of αin the density profile, shallow ( α= 1) to
-cuspy (α= 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
-of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
-merger times of these BHs.
 They sink toward the SMBH on the mass segregation
-timescale,tseg≈⟨M∗⟩/mBH×trelax (e.g., Spitzer 1987;
+timescale, tseg ≈⟨M∗⟩/mBH ×trelax (e.g., Spitzer 1987;
 Fregeau et al. 2002; Merritt 2006), which is typically an
 order of magnitude smaller than the relaxation timescale
 plotted in Figure 1.
@@ -462,15 +557,17 @@ We incorporate relaxation processes by introducing a
 small change in the BH’s energy and angular momentum
  each time it orbits the SMBH. We apply a small
 instantaneous velocity kick to the BH, denoted as ∆ v.
-We draw ∆ vfrom a Guassian distribution with average
-of zero and a standard deviation of ∆ vrlx/√
+We draw ∆v from a Guassian distribution with average
+of zero and a standard deviation of ∆ vrlx/
+√
 3, where
-∆vrlx=v•√
-P•/trlx(see Bradnick et al. 2017, for an
+∆vrlx = v•
+√
+P•/trlx (see Bradnick et al. 2017, for an
 approach to changes in the angular momentum). The
 new orbital parameters can be calculated following Lu
-& Naoz (2019), and see Naoz et al. in prep for full set
-of equations.
+& Naoz (2019), and see Naoz et al. (2022) for the full
+set of equations.
 We account for the effects of relaxation processes,
 including mass-segregation, using a multi-faceted approach.
  We begin by migrating each BH towards the
@@ -484,9 +581,10 @@ scattering for both black holes and stars. Within this radius,
  BH self-interaction dominates over two-body scatterings
  with the now rarer main-sequence stars. The
 BHs will then settle onto a Bahcall-Wolf profile, while
-the stars may follow a shallower profile, with approximatelyn⋆∝r−1.5,
- inwards of the transition radius
-(Linial & Sari in prep.).Therefore, after the initial mass segregation, we allow
+the stars may follow a shallower profile, with approximately
+ n⋆ ∝ r−1.5, inwards of the transition radius
+(Linial & Sari in prep.).
+Therefore, after the initial mass segregation, we allow
 the BHs to begin diffusing over a relaxation timescale,
 their orbital parameters changing slowly through a random
  process. In this random process, some of the BHs
@@ -511,55 +609,56 @@ towards the SMBH, their concentration in the inner region
  of the GN increases, allowing them to dominate the
 scattering. We reserve the inclusion of these interactions
 for future study.
-2.9. Effect of Relaxation Processes
+2.10. Effect of Relaxation Processes
 As depicted in Figure 4, two-body relaxation processes
 result in more EMRIs and IMRIs events. These processes
  allow BHs that begin further from the SMBH
 to migrate inwards and grow more efficiently in mass.
 However, it also impedes the growth of BHs that are
-initially closer to the SMBH by allowing them to dif-
-IMBH Formation in Galactic Nuclei 7
-Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red)
-for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We
-assumeα= 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward
-migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more
-BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two
-different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes.
-The dashed, faded lines represent the corresponding initial histograms. We assume α= 1.75 for the GN density profile. Faded
-stars represent BHs that merged with the SMBH.
-fuse out of the inner region where collisions are efficient.
+initially closer to the SMBH by allowing them to diffuse
+ out of the inner region where collisions are efficient.
 As can be seen in Figure 4, the net result is that more
 BHs grow, but the maximum mass is lower compared
 to the scenario that ignores two-body relaxation. The
 histogram in Figure 4 presents the final BH mass distributions
  for different power law indices α. As expected,
-the two-body relaxation suppresses the αdependence
+the two-body relaxation suppresses the α dependence
 highlighted in Figure 3. In fact, using a KS test, we
 find that we cannot reject the hypothesis that the two
 distributions were drawn from the same sample for the
-α= 1.75 andα= 2 results. Interestingly, a BH mass
-IMF with an average of 10 M ⊙leads to a final distribution
- with an average of ∼200 M ⊙and a median of
-∼45 M ⊙, which lies within the mass gap.
-3.DISCUSSION AND PREDICTIONS
+α = 1.75 and α = 2 results. Interestingly, a BH mass
+IMF with an average of 10 M ⊙ leads to a final distribution
+ with an average of ∼200 M⊙ and a median of
+∼45 M⊙, which lies within the mass gap.
+3. DISCUSSION AND PREDICTIONS
 We explore the feasibility of forming IMBHs in a
 GN through successive collisions between a stellar-mass
 BH and main-sequence stars. Taking both a statistical
  and analytic approach, we show that this channel
 can produce IMBHs efficiently with masses as high as
-103−4M⊙and may result in many IMBH-SMBH mergers
- (intermediate-mass ratio inspiral, IMRIs) and EMRIs.
-
+103−4 M⊙ and may result in many IMBH-SMBH mergers
+ (intermediate-mass ratio inspirals, or IMRIs) and
+EMRIs.
+ Rose et al.
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction.
+We assume α= 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α= 1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
 As the stellar mass BH collides with a star, the BH
 will grow in mass. The increase may equal star’s entire
  mass if the relative velocity is smaller than the escape
  velocity from the BH at 1 R ⊙. However, near the
 SMBH, the velocity dispersion may be larger than the
 escape velocity from the BH at the star’s radius. In this
-limit, the BH accretes a “tunnel” of material through
+limit, the BH captures a “tunnel” of material through
 the star, estimated using Bondi-Hoyle-Lyttleton accretion.
  In our statistical analysis, we account for BondiHoyle-Lyttleton
- accretion and find that BHs outside of10−2pc from the SMBH can accrete the entire star (see
+ accretion and find that BHs outside of
+10−2 pc from the SMBH can capture the entire star (see
 Figure 2).
 The efficiency of collisions, and therefore IMBH,
 EMRI, and IMRI formation as well, are sensitive to
@@ -572,9 +671,31 @@ However, the inclusion of relaxation processes in the
 simulations dampens the influence of the stellar density
 profile by allowing BHs to diffuse into regions of more
 or less efficient growth. As a result, more BHs grow in
-mass, but their maximum mass is smaller ( ∼104M⊙).
+mass, but their maximum mass is smaller ( ∼104 M⊙).
 Additionally, the final masses have no apparent dependence
  on distance from the SMBH (see Figure 4).
+Most simulations in our study assume that the BHs
+accrete all of the mass that they capture. The final BH
+masses can be taken as an upper limit. We note that
+the accretion is a highly uncertain process and represents
+ an active field of study (e.g., Blandford & Begelman
+ 1999; Park & Ostriker 2001; Narayan et al. 2003;
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+Narayan et al. 2022). To assess the limits of our model,
+we also consider a physically motivated accretion model,
+momentum-driven winds (Section 2.5). We present the
+final mass distribution for momentum-driven winds in
+Figure 4. Importantly, we find that BHs within the
+mass gap still form naturally despite the substantially
+reduced accretion. About 5% of the BHs grow by 10
+to 100 M ⊙. Furthermore, if we increase this ∆ M estimate
+ by a factor of 2 (i.e., use η = 0.05), the simulation
+ produces a 3.5 ×103 M⊙ IMBH for the same initial
+conditions. Our proof-of-concept demonstrates that collisions
+ between BH and stars are an important process
+that should be taken into account in dense places such
+as a GN.
 Mass growth through BH-main-sequence star collisions
  may act in concert with other IMBH formation
 channels, such as compact object binary mergers (e.g.,
@@ -593,16 +714,84 @@ Kozai Lidov mechanism, leaving behind a single star or
 a single compact object (e.g., Stephan et al. 2016, 2019;
 Hoang et al. 2018). Additionally, to be susceptible to
 evaporation, BH binaries must have a wider configuration.
- Otherwise, they will be more tightly bound that
- Rose et al.
-the average kinetic energy of the surrounding objects,
-and will only harden through weak gravitational interactions
- with neighboring stars (see for example Figure
+ Otherwise, they will be more tightly bound than
+the average kinetic energy of the surrounding objects
+and will only harden through weak gravitational inter-
+IMBH Formation in Galactic Nuclei 9
+actions with neighboring stars (see for example Figure
 6 in Rose et al. 2020).
-Not included in this study, collisions between the BH
-and other compact objects will increase the BH growth
-rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fragione
- et al. 2021) and even neutron star BH mergers
+We note that we assume a steady-state and treat the
+stars as a reservoir in this model. Future work will take a
+more nuanced approach to the background stars, whose
+density as a function of time can be influenced by several
+factors. Firstly, the relaxation of the stellar population
+occurs on Gyr timescales. Some studies have suggested
+that in situ star formation can occur in the Galactic
+Center as close as 0.04 pc from the SMBH (e.g., Levin
+& Beloborodov 2003; Paumard et al. 2006), and star
+formation episodes can occur as often as every ∼5 Myr
+(e.g. Lu et al. 2009). Therefore, we expect that after
+the first Gyr, stars within ≲ 0.01 pc will be replenished
+at intervals consistent with the star formation episodes;
+the infalling populations of stars are separated by ∼
+5−10 Myr, which is shorter than the collision timescale.
+However, star-star collisions may complicate this picture
+ within ∼0.01 pc. As discussed above, regular star
+formation ensures the BHs always have a stellar population
+ to interact with outside of ∼0.01 pc.5 At 0.01 pc,
+however, the kinetic energy during a collision between
+two 1 M ⊙ stars is larger than their binding energies.
+Collisions can therefore thin out the stellar populations
+during the time it takes them to diffuse to these small
+radii, ≲ 0.01 pc, and may reduce the BH growth in the
+innermost region. We reserve the inclusion of star-star
+collisions for future work. We also note that the disruption
+ of binary stars by the SMBH may help replenish
+the stellar population even as collisions work to deplete
+it (e.g., Balberg et al. 2013); when a binary is disrupted,
+one of the stars is captured on a tightly bound orbit
+about the SMBH.
+An IMBH may also affect the stellar density profile.
+As it spirals into the SMBH, it can perturb stellar orbits,
+and these interactions can lead to hypervelocity stars
+(e.g., Baumgardt et al. 2006a; L¨ ockmann & Baumgardt
+2008). L¨ ockmann & Baumgardt (2008) show that an
+IMBH can modify an initially steep stellar density profile
+ to become consistent with the flatter cusp observed
+in the Galactic Center. The stars may then be replenished
+ on 100 Myr timescales (Baumgardt et al. 2006a).
+Therefore, after the formation of the first few IMBHs,
+subsequent BH growth may occur in bursts, coinciding
+with replenishment of the stars.
+While there are many competing dynamical processes
+that shape the stellar density profile, we stress that α
+5 In fact, the star-star collision timescale is greater than 10 Myr
+for the entire parameter space, save at 0 .001 pc for larger values
+of α; the BH-star collision timescale plotted in Fig. 1 is the same
+order of magnitude as the star-star collision timescale.
+can simply be chosen to encapsulate all of the relevant
+physics. A value for α that is constrained by observations
+ must already reflect ongoing processes like starstar
+ collisions and replenishment. Sch¨ odel et al. (2018)
+find the observed stellar mass enclosed within 0.01 pc of
+the Milky Way’s Galactic Center to be approximately
+180 M⊙. This estimate is consistent to order of magnitude
+ with our α= 1.25 case. In a simulation like those
+depicted in Figure 4, which include relaxation, α= 1.25
+leads to a maximum IMBH mass of 140 M ⊙. Furthermore,
+ while the stellar mass within 0.01 pc may be a
+few hundred M⊙, Do et al. (2019) and GRAVITY Collaboration
+ et al. (2020) set an upper limit on the mass
+enclosed within the orbit of S0-2 to be about a few thousand
+ M⊙, or 0.1% of the central mass. This upper limit
+can include mass that was previously in stars but is now
+in BHs. In that case, the 180 M ⊙ is what remains of the
+stars, while BHs and IMBHs make up the ∼1000 M⊙
+in the innermost region.
+Also not included in this study, collisions between the
+BH and other compact objects will increase the BH
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+Fragione et al. 2021) and even neutron star BH mergers
 (e.g., Hoang et al. 2020) become more likely as the BHs
 increase in mass through stellar collisions. As a result,
 the BH-BH collision timescale, discussed in Section 2.2,
@@ -611,43 +800,53 @@ BHs to grow through this channel in addition to stellar
  collisions. Additionally, this compact object mergers
 result in GW recoil, which may have a large impact on
 the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
-2021)
+2021).
 The BH’s mass growth increases GW emission, which
-dissipates energy from the orbit. Along with relaxation
-processes, GW emission causes BHs to sink towards the
-SMBH and eventually undergo a merger. As a result,
-the GN environment is conducive to the formation of
-EMRIs and IMRIs. The GW emission from EMRIs and
-IMRIs is expected to be at mHz frequencies, making
-them promising candidates for LISA to observe. While
-the exact rate calculation is beyond the scope of this
-study, the mechanism outlined here seems very promising.Our
- results also suggest that IMBHs are likely to exists
- in many galactic nuclei, as well as within our own
-galactic center. This implication seems to be consistent
- with recent observational and theoretical studies
-(e.g., Hansen & Milosavljevi´ c 2003; Maillard et al. 2004;
-G¨ urkan & Rasio 2005; Gualandris & Merritt 2009; Chen
-& Liu 2013; Generozov & Madigan 2020; Fragione et al.
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
-Collaboration et al. 2020).
+dissipates energy from the orbit. Along with relaxation,
+GW emission causes BHs to sink towards the SMBH
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
+and IMRIs. The GW emission from EMRIs and IMRIs
+ is expected to be at mHz frequencies, making them
+promising candidates for LISA to observe. While the
+exact rate calculation is beyond the scope of this study,
+the mechanism outlined here seems very promising.
+Our results also suggest that BHs within the mass gap
+as well as IMBHs likely exist in many galactic nuclei, as
+well as within our own galactic center. This implication
+seems to be consistent with recent observational and
+theoretical studies (e.g., Hansen & Milosavljevi´ c 2003;
+Maillard et al. 2004; G¨ urkan & Rasio 2005; Gualandris
+& Merritt 2009; Chen & Liu 2013; Generozov & Madigan
+ 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz
+et al. 2020; GRAVITY Collaboration et al. 2020).
+ Rose et al.
 Lastly, the collisions between stellar mass BHs and
 stars may contribute to the x-ray emission from our
-galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al.
-2018; Zhu et al. 2018; Cheng et al. 2018)5. These interactions,
- in particular grazing collisions, may also result
-in tidal disruption events (e.g., Perets et al. 2016; Samsing
- et al. 2019; Kremer et al. 2021). Thus, the process
-outlined here may produce electromagnetic signatures
-in addition to GW mergers.
-SR thanks the Charles E Young fellowship, the Nina
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer
+ et al. (2022) for a discussion of electromagnetic signatures
+ from BH-star collisions) 6. These interactions,
+in particular grazing collisions, may also result in tidal
+disruption events (e.g., Baumgardt et al. 2006b; Perets
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer
+ et al. 2021). Thus, the process outlined here may
+produce electromagnetic signatures in addition to GW
+mergers.
+We thank the anonymous referee for useful comments.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful
+ discussion.
+SR thanks the Charles E. Young Fellowship, the Nina
 Byers Fellowship, and the Michael A. Jura Memorial
 Graduate Award for support. SR and SN acknowledge
 the partial support from NASA ATP 80NSSC20K0505.
 SN thanks Howard and Astrid Preston for their generous
  support. IL thanks support from the Adams Fellowship.
  SN and RS thank the Bhaumik Institute visitor
-program.
+program. This work was performed in part at the Aspen
+ Center for Physics, which is supported by National
+Science Foundation grant PHY-1607611.
 REFERENCES
 Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016,
 PhRvL, 116, 241102,
@@ -656,21 +855,40 @@ doi: 10.1103/PhysRevLett.116.241102
 doi: 10.1103/PhysRevLett.118.221101
 —. 2017b, PhRvL, 119, 141101,
 doi: 10.1103/PhysRevLett.119.141101
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
+doi: 10.3847/2041-8205/830/1/L1
 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
 doi: 10.1088/0004-637X/780/2/148
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
+doi: 10.1007/s41114-018-0013-8
+6 The connection between the observed X-ray sources at the Galactic
+ Center and tidal capture has been suggested by Generozov
+et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
+alternative channels.
 Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
 2021, arXiv e-prints, arXiv:2109.12119.
 https://arxiv.org/abs/2109.12119
 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
 doi: 10.1086/154711
-5The connection between the observed X-ray sources at the Galactic
- Center and tidal capture has been suggested by Generozov
-et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
-alternative channels.Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
+Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
 043002, doi: 10.1103/PhysRevD.102.043002
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+doi: 10.1093/mnrasl/slt071
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+2006a, MNRAS, 372, 174,
+doi: 10.1111/j.1365-2966.2006.10818.x
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+Makino, J. 2006b, MNRAS, 372, 467,
+doi: 10.1111/j.1365-2966.2006.10885.x
 Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
 613, 1143, doi: 10.1086/423299
+Begelman, M. C. 1979, MNRAS, 187, 237,
+doi: 10.1093/mnras/187.2.237
+—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3
+IMBH Formation in Galactic Nuclei 11
+—. 2012b, MNRAS, 420, 2912,
+doi: 10.1111/j.1365-2966.2011.20071.x
 Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
 MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
 Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ,
@@ -683,15 +901,20 @@ Binney, J., & Tremaine, S. 1987, Galactic dynamics
 —. 2008, Galactic Dynamics: Second Edition
 Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
 doi: 10.1086/342655
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433,
+doi: 10.1093/mnras/179.3.433
 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
 427, doi: 10.1086/500727
 Bondi, H. 1952, MNRAS, 112, 195,
 doi: 10.1093/mnras/112.2.195
-IMBH Formation in Galactic Nuclei 9
 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
 doi: 10.1093/mnras/104.5.273
 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
 2042, doi: 10.1093/mnras/stx1007
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+860, 14, doi: 10.3847/1538-4357/aac2c4
 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
 C. 2012, JCAP, 2012, 054,
 doi: 10.1088/1475-7516/2012/07/054
@@ -709,16 +932,35 @@ et al. 1996, Science, 272, 1286,
 doi: 10.1126/science.272.5266.1286
 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
 doi: 10.1086/156685
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
+doi: 10.1111/j.1365-2966.2005.09937.x
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
+2009, MNRAS, 393, 1016,
+doi: 10.1111/j.1365-2966.2008.14254.x
 Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
 MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+C. N. 2021a, MNRAS, 505, 2186,
+doi: 10.1093/mnras/stab1428
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+T. C. N. 2021b, MNRAS, 503, 1051,
+doi: 10.1093/mnras/stab402
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+2005, ApJ, 620, 878, doi: 10.1086/427142
 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
 MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
 MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+doi: 10.1126/science.aav8137
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+562, L19, doi: 10.1086/338118
 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
 110, 221101, doi: 10.1103/PhysRevLett.110.221101
 Edgar, R. 2004, NewAR, 48, 843,
 doi: 10.1016/j.newar.2004.06.001
+Escala, A. 2021, ApJ, 908, 57,
+doi: 10.3847/1538-4357/abd93c
 Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
 Monthly Notices of the Royal Astronomical Society, 443,
 2410, doi: 10.1093/mnras/stu1280
@@ -737,13 +979,15 @@ Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., &
 Rasio, F. A. 2004, MNRAS, 352, 1,
 doi: 10.1111/j.1365-2966.2004.07914.x
 Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., &
-Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
+Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576
+Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ,
 649, 91, doi: 10.1086/506193
 Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137,
 doi: 10.3847/1538-4357/ab94bc
 Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker,
 J. P. 2018, MNRAS, 478, 4030,
 doi: 10.1093/mnras/sty1262
+ Rose et al.
 Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
 Modern Physics, 82, 3121,
 doi: 10.1103/RevModPhys.82.3121
@@ -775,23 +1019,41 @@ Dosopoulou, F. 2018, ApJ, 856, 140,
 doi: 10.3847/1538-4357/aaafce
 Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
 doi: 10.3847/1538-4357/abb66a
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
+doi: 10.1086/431475
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
+doi: 10.1086/529025
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
+2003, ApJ, 592, 1042, doi: 10.1086/375769
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
+106, doi: 10.1088/0004-637X/796/2/106
 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
 Royal Astronomical Society, 374, 1557,
 doi: 10.1111/j.1365-2966.2006.11275.x
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+https://arxiv.org/abs/2201.12368
 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
 doi: 10.3847/1538-4357/abeb14
 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
 45, doi: 10.3847/1538-4357/abb945
+Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
+doi: 10.1086/376675
 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
 doi: 10.3847/1538-4365/aacb24
 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
+L¨ ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
+doi: 10.1111/j.1365-2966.2007.12699.x
 Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
 doi: 10.1093/mnras/stz036
- Rose et al.
 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
 690, 1463, doi: 10.1088/0004-637X/690/2/1463
 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
 doi: 10.1086/319848
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+doi: 10.1046/j.1365-8711.1999.02853.x
 Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda,
@@ -799,6 +1061,15 @@ M., & Artale, M. C. 2021a, arXiv e-prints,
 arXiv:2109.06222. https://arxiv.org/abs/2109.06222
 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b,
 MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+doi: 10.1111/j.1365-2966.2006.10256.x
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+doi: 10.1086/422244
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+Narayan, R. 2014, MNRAS, 441, 3177,
+doi: 10.1093/mnras/stu762
 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
 doi: 10.1088/0034-4885/69/9/R01
 Miralda-Escud´ e, J., & Gould, A. 2000, ApJ, 545, 847,
@@ -808,17 +1079,36 @@ Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL,
 622, L113, doi: 10.1086/429721
 Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009,
 ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+618, 569, doi: 10.1086/426067
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+L18, doi: 10.3847/2041-8213/ac574b
 Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
 doi: 10.1088/0004-637X/795/2/102
 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
 L35, doi: 10.3847/2041-8213/ab4fed
+IMBH Formation in Galactic Nuclei 13
 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
 888, L8, doi: 10.3847/2041-8213/ab5e3b
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+Curd, B. 2022, MNRAS, 511, 3795,
+doi: 10.1093/mnras/stac285
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005,
+ApJ, 628, 368, doi: 10.1086/430728
 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
 2127, doi: 10.1111/j.1365-2966.2009.14653.x
 O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
 & O’Shaughnessy, R. 2006, ApJ, 637, 937,
 doi: 10.1086/498446
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+D. 2010, ApJ, 722, 642,
+doi: 10.1088/0004-637X/722/1/642
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+doi: 10.1086/319042
+Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643,
+1011, doi: 10.1086/503273
 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
 Stephen R., J. 2016, ApJ, 823, 113,
 doi: 10.3847/0004-637X/823/2/113
@@ -835,7 +1125,10 @@ Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL,
 Rashkov, V., & Madau, P. 2014, ApJ, 780, 187,
 doi: 10.1088/0004-637X/780/2/187
 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
-A56, doi: 10.1051/0004-6361/202037710Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
+A56, doi: 10.1051/0004-6361/202037710
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+MNRAS, doi: 10.1093/mnras/stac231
+Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
 Rasio, F. A. 2018, PhRvL, 120, 151101,
 doi: 10.1103/PhysRevLett.120.151101
 Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016,
@@ -848,6 +1141,8 @@ Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904,
 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
 & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213.
 https://arxiv.org/abs/2009.01213
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
 100, 043009, doi: 10.1103/PhysRevD.100.043009
 Sari, R., & Fragione, G. 2019, ApJ, 885, 24,
@@ -857,6 +1152,8 @@ Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K.
 doi: 10.1086/339917
 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
 doi: 10.1086/519309
+Sch¨ odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
+609, A27, doi: 10.1051/0004-6361/201730452
 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
 doi: 10.1086/156521
 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985,
@@ -872,6 +1169,10 @@ Spitzer, L. 1987, Dynamical evolution of globular clusters
 Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv
 e-prints. https://arxiv.org/abs/1603.02709
 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
+Stone, N. C., K¨ upper, A. H. W., & Ostriker, J. P. 2017,
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+doi: 10.1093/mnras/stv2281
 The LIGO Scientific Collaboration, the Virgo
 Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
 arXiv:2009.01075. https://arxiv.org/abs/2009.01075
@@ -885,12 +1186,19 @@ Society, 457, 3356, doi: 10.1093/mnras/stw225
 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit,
 G. N. 2021, MNRAS, 504, 146,
 doi: 10.1093/mnras/stab842
-IMBH Formation in Galactic Nuclei 11
+ Rose et al.
 Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
 Breivik, K. 2021, ApJ, 917, 76,
 doi: 10.3847/1538-4357/ac088d
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+doi: 10.1086/379767
 Woosley, S. E. 2017, ApJ, 836, 244,
-doi: 10.3847/1538-4357/836/2/244Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
+doi: 10.3847/1538-4357/836/2/244
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+doi: 10.1046/j.1365-8711.2002.05532.x
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+doi: 10.1088/0004-637X/761/2/129
+Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
 2014, Monthly Notices of the Royal Astronomical
 Society, 440, 1263, doi: 10.1093/mnras/stu351
 Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints,
diff --git a/read/results/pypdf/2201.00029.txt b/read/results/pypdf/2201.00029.txt
index 78813f7..7f07ac2 100644
--- a/read/results/pypdf/2201.00029.txt
+++ b/read/results/pypdf/2201.00029.txt
@@ -1,21 +1,369 @@
-       Exploring new techniques for analyzing variability in white dwarf KIC 8626021 Thomas Huckans, Peter Stine Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St., Bloomsburg, PA 17815
- Abstract  As is common with the collection of astronomical data, signals are frequently dominated by noise. However, when performing FTs of light curves, re-binning data can improve the signal-to-noise ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope, we sequentially re-binned data three times to investigate the SNR improvement of lower frequency (< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8 µHz greatly improved through this process, and we postulate that this frequency is linked to the rotation of KIC 8626021.   Introduction  First detected in 1862, white dwarfs long posed a mystery for early observers. When the companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and densities baffled astronomers. Lacking full understanding of atomic structures and the energy states of electrons, these early researchers believed white dwarfs too dense to exist. However, new discoveries at the turn of the 20th century explained the existence of these stars, and between the world wars white dwarfs were increasingly studied and modeled (Holberg, 2009). As stars age, those that lack the mass to become neutron stars and black holes become white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008).  They are composed of a core of carbon and oxygen ions that slowly cools over billions of years, and the light emanating from these stars is a result of thermal energy. White dwarf stars are no longer supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from collapsing entirely. For many years, accurate detection of light variability in white dwarfs was difficult due to a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made capturing the light of distant stars much more efficient and effective (Basri et al., 2010). Kepler was initially developed with the intention of surveying our region of the Milky Way galaxy in order to find potentially habitable planets. The purpose of the mission was to identify key traits for such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits, and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler observed approximately 1.5 x 105 stars (Johnson, 2018), affording scientists excellent opportunities to research stellar variability. Due to the loss of a second reaction wheel in 2013, NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and astrophysics. Utilizing Kepler’s ability to maintain three-dimensional control, NASA proceeded to use the telescope to collect photometry data of certain sections of our galaxy, although the number of targets was significantly reduced. In addition, the K2 mission was designed to be community-oriented, with the scientific community having an influence on the fields observed and serving as the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations Center (KASOC).
- The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon previous studies, this research investigated novel techniques of analyzing variability in white dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on the star, allowing for the validation of results using our methods. KIC 8626021 has an effective temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research has found that this white dwarf is the DBV with the highest known temperature, and its helium layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too noisy to draw many conclusions, other FTs of short-cadence data have been performed to find variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz (Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777 Herculis, although our research focuses on low frequencies using long-cadence data.    Methods  All data were downloaded from the KASOC database, and the long-cadence (data sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs were performed in Mathematica. The re-binning process consisted of summing adjacent light curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one hour, and then repeating this process on the data sample for a total of three times. In addition, a significant detection was defined as being 3𝝈 above the mean of the relative flux, and 0 on the graphs below represents this 3𝝈 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify improvement in signal strength.   Results   Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the same for Q13.  Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7 and Q13.
 
 
- FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7 lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012. Both graphs were constructed by plotting corrected flux magnitude (flux corrected for instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating between points. Q7 had forty-three interpolated points, and Q13 had sixty-six.
 
 
 
- FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of the light curve data. The significant frequencies of 5.886 µHz and 5.889 µHz are circled. The disappearance of the frequency in the last FT is most likely a byproduct of the method, and the spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re-binning process.
 
 
+Exploring new techniques for analyzing variability in white dwarf KIC 8626021
+Thomas Huckans, Peter Stine
+Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St.,
+Bloomsburg, PA 17815
 
- FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3𝝈 and are nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a starspot (Santos et al., 2017).
 
- Q7 Significant Data Points Light Variability Frequency (µHz) Corrected Flux Magnitude (ppm) Period (days) Signal-to-Noise (dB) Q7 First Iteration 5.886 -1.198 1.966  9.9 Q7 Re-bin 1 5.886  -1.477 1.966  12.8 Q7 Re-bin 2 5.889 0.597 1.965  19.2 TABLE I: The table displays the various frequencies collected from Q7 and the information found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, and therefore was not used in any calculations determining the average period of rotation. The values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff.    Q13 Significant Data Points Light Variability Frequency (µHz) Corrected Flux Magnitude (ppm) Period (days) Signal-to-Noise (dB) Q13 First Iteration 5.784 1.555 2.001 15.6 Q13 Re-bin 1 5.784 2.873 2.001 17.7  Q13 Re-bin 2 5.787 4.938 2.000 22.6  Q13 Re-bin 3 5.787 6.909 2.000 26.3  Q13 Re-bin 3 11.641 7.073 0.994 26.4 Q13 Re-bin 3 16.823 2.299 0.688 24.1 TABLE II: The table displays the various frequencies collected from Q13 and the information found through calculations to find period and SNR. The last two significant frequencies (11.641 µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in further detail in the Conclusions section of this paper. The values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff.
- First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) 0.933 0.933 0.215 0.216 1.148 1.148 0.575 0.575 1.364 1.364 0.934 0.935 1.507 1.507 1.005 1.006 12.561 12.561 1.149 1.150 16.581 16.581 1.221 1.222    1.364 1.366   1.508 1.509   1.580 1.582   1.724 1.725   1.795 1.797   5.889 2.085   6.822 5.392   9.192 5.464   9.479 7.476   11.203 9.489   12.568 11.215   14.291 12.581   16.230 13.084   16.589 13.443    13.659    14.018    14.809    15.097    16.031    16.463    16.894 TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a by-product of the method, and we calculated for such errors when finding our average.
- First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) 3.094 2.018 2.019 1.951 5.784 3.094 3.095 2.019 9.080 5.784 5.787 2.442 13.519 7.667 7.671 2.759 15.671 9.080 9.084 3.095 16.209 11.165 11.641 3.634 16.411 13.519 13.526 4.374  15.469 15.477 4.778  15.671 15.679 4.912  16.209 15.881 5.047  16.411 16.419 5.787    8.479    9.084    10.565    11.641    13.526    15.544    15.881    16.823 TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a by-product of the method, and we calculated for such errors when finding our average.   Conclusions  As our research used the long-cadence data from Kepler, much of the high-frequency variability due to gravitational wave pulsations is lost. However, this presents an opportunity to verify our results with the work of research groups that analyzed short-cadence data.With the data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the periods and accounting for a 1𝝈 margin of error, our research hypothesizes that the rotation period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and these periods indicate that the more precise significant period identified through our re-binning relates to the rotation of the white dwarf.  Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous
- significant frequency disappears, which becomes increasingly likely after successive re-binning processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR improvement ranging from 1.1 dB to 1.3 dB.  Through the re-binning process, more lines, or significant frequencies, appeared above the 3𝝈 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to short-cadence analysis, the re-binning process of long-cadence data can be used to identify significant lower frequencies in white dwarfs. The methods we used are also simple and replicable, which allows even those with less experience to quickly analyze the large amounts of data being collected by orbiting telescopes, such as the currently active TESS (Transiting Exoplanet Survey Satellite) telescope. The presence of possible harmonics in the third re-bin of Q13 also indicates the possible presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously dominated by noise, may have been discovered.
- Acknowledgments  We wish to thank Bloomsburg University of Pennsylvania for its continued support of our research. This paper includes data collected by the Kepler mission and obtained from the MAST data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is provided by the NASA Science Mission Directorate. STScI is operated by the Association of Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555.   References   Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010). PHOTOMETRIC V ARIABILITY IN KEPLER TARGET stars: THE SUN AMONG stars—a FIRST LOOK. The Astrophysical Journal, 713(2), L155-L159. https://doi.org/10.1088/2041-8205/713/2/L155 Bischoff-Kim, A., Østensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven-Period asteroseismic fit of KIC 8626021. EPJ Web of Conferences, 101, 06009. https://doi.org/10.1051/epjconf/201510106009 Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal for the History of Astronomy, 40(2), 137-154. https://doi.org/10.1177%2F002182860904000201 Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S., Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: Characterization and early results. Publications of the Astronomical Society of the Pacific, 126(938), 398-408. https://doi.org/10.1086/676406 Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space Administration. Retrieved September 2, 2021, from https://www.nasa.gov/mission_pages/kepler/overview/index.html Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen-dalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy, G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86. https://dx.doi.org/10.1088/2041-8205/713/2/L79 Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., &  Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39  Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot signature on the light curve. Astronomy & Astrophysics, 599, A1. https://doi.org/10.1051/0004-6361/201629923
- Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. Annual Review of Astronomy and Astrophyics, 46(1), 157-199. https://doi.org/10.1146/annurev.astro.46.060407.145250 Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021).
\ No newline at end of file
+Abstract
+
+As is common with the collection of astronomical data, signals are frequently dominated
+by noise. However, when performing FTs of light curves, re-binning data can improve the signalto-noise
+ ratio (SNR) at lower frequencies. Using data collected from the K epler space telescope,
+we sequentially re-binned data three times to investigate the SNR improvement of lower frequency
+(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8
+µHz greatly improved through this process, and we postulate that this frequen cy is linked to the
+rotation of KIC 8626021.
+
+
+Introduction
+
+First detected in 1862, white dwarfs long posed a mystery for early observ ers. When the
+companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and
+densities baffled astronomers. Lacking full understanding of atom ic structures and the energy
+states of electrons, these early researchers believed white dwarfs too dense to exist. However, new
+discoveries at the turn of the 20th century explained the existence of these stars, and between the
+world wars white dwarfs were increasingly studied and modeled (Holberg, 2009).
+As stars age, those that lack the mass to become neutron stars and black holes become
+white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008).  They are
+composed of a core o f carbon and oxygen ions that slowly cools over  billions of years, and the
+light emanating from these stars is a result of thermal energy. White dwarf stars are no longer
+supported against the force of gravity by fusion, so the stars collapse into an elect ron-degenerate
+state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two
+electrons cannot occupy the same quantum state, Pauli repulsion  keeps white dwarfs from
+collapsing entirely.
+For many years, accurate detection of light variability in white dwarfs was difficult due to
+a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made
+capturing the light of distant stars much more efficient and effective (Basri et al., 2010 ). Kepler
+was initially developed with the intention of surveying our region of the Milky Way galaxy in
+order to find potentially habitable planets. The purpose of the mission was to identify key traits for
+such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits,
+and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler
+observed approximately 1.5 x 10 5 stars (Johnson, 2018) , affording scientists excellent
+opportunities to research stel lar variability. Due to the loss of a second reaction wheel in 2013,
+NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and
+astrophysics.
+Utilizing Kepler’s ability to maintain three -dimensional control, NASA proceeded to use
+the telescope to collect photometry data of certain sections of our galaxy, although the number of
+targets was significantly reduced. In addition, the K2 mission was designed to be community oriented,
+ with the scientific community having an influence on the fields observed and serving as
+the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was
+deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of
+white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations
+Center (KASOC).
+
+The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon
+previous studies, this research investigated novel techniques of analyzing variability in white
+dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on
+the star, allowing for the validation of results using our methods. KIC 8626021 has an effective
+temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research
+has found that this white dwarf is the DBV with the highest known temperature, and its helium
+layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too
+noisy to draw many conclusions, other FTs of short-cadence data have been performed to find
+variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with
+frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz
+(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777
+Herculis, although our research focuses on low frequencies using long-cadence data.
+
+
+
+Methods
+
+All data were downloaded from the KASOC database, and the long-cadence (data
+sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were
+analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs
+were performed in Mathematica. The re-binning process consisted of summing adjacent light
+curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one
+hour, and then repeating this process on the data sample for a total of three times. In addition, a
+significant detection was defined as being 3𝝈 above the mean of the relative flux, and 0 on the
+graphs below represents this 3𝝈 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To
+find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify
+improvement in signal strength.
+
+
+Results
+
+ Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13),
+with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs
+of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the
+same for Q13.
+ Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of
+KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and
+Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7
+and Q13.
+
+
+
+
+
+
+
+
+
+
+FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7
+lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012.
+Both graphs were constructed by plotting corrected flux magnitude (flux corrected for
+instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating
+between points. Q7 had forty-three interpolated points, and Q13 had sixty-six.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of
+the light curve data. The significant fr equencies of 5.886  µHz and 5.889 µHz are circled. The
+disappearance of the frequency in the last FT is most likely a b yproduct of the method, and the
+spurious frequency of 5.464 µHz in the last FT  most probably represents an artifact of the re binning
+ process.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins
+of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In
+addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3𝝈 and are
+nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a
+starspot (Santos et al., 2017).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Q7 Significant
+Data Points
+Light
+Variability
+Frequency
+(µHz)
+Corrected Flux
+Magnitude
+(ppm)
+Period (days) Signal-to-Noise
+(dB)
+Q7 First
+Iteration
+5.886 -1.198 1.966  9.9
+Q7 Re-bin 1 5.886  -1.477 1.966  12.8
+Q7 Re-bin 2 5.889 0.597 1.965  19.2
+TABLE I: The table displays the various frequencies collected from Q7 and the information
+found through calculations to find period and SNR. The frequency of 5.464 µHz is not included,
+and therefore was not used in any calculations determining the average period of rotation. The
+values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus
+negative numbers are under the cutoff.
+
+
+
+Q13 Significant
+Data Points
+Light
+Variability
+Frequency
+(µHz)
+Corrected Flux
+Magnitude
+(ppm)
+Period (days) Signal-to-Noise
+(dB)
+Q13 First
+Iteration
+5.784 1.555 2.001 15.6
+Q13 Re-bin 1 5.784 2.873 2.001 17.7
+Q13 Re-bin 2 5.787 4.938 2.000 22.6
+Q13 Re-bin 3 5.787 6.909 2.000 26.3
+Q13 Re-bin 3 11.641 7.073 0.994 26.4
+Q13 Re-bin 3 16.823 2.299 0.688 24.1
+TABLE II: The table displays the various frequencies collected from Q13 and the information
+found through calculations to find period and SNR. The last two significant frequencies (11.641
+µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in
+further detail in the Conclusions section of this paper. The values under corrected flux magnitude
+are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff.
+
+
+
+
+
+
+
+
+
+
+First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
+0.933 0.933 0.215 0.216
+1.148 1.148 0.575 0.575
+1.364 1.364 0.934 0.935
+1.507 1.507 1.005 1.006
+12.561 12.561 1.149 1.150
+16.581 16.581 1.221 1.222
+  1.364 1.366
+  1.508 1.509
+  1.580 1.582
+  1.724 1.725
+  1.795 1.797
+  5.889 2.085
+  6.822 5.392
+  9.192 5.464
+  9.479 7.476
+  11.203 9.489
+  12.568 11.215
+  14.291 12.581
+  16.230 13.084
+  16.589 13.443
+   13.659
+   14.018
+   14.809
+   15.097
+   16.031
+   16.463
+   16.894
+TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm)
+above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct
+ of the method, and we calculated for such errors when finding our average.
+
+
+
+
+
+First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz)
+3.094 2.018 2.019 1.951
+5.784 3.094 3.095 2.019
+9.080 5.784 5.787 2.442
+13.519 7.667 7.671 2.759
+15.671 9.080 9.084 3.095
+16.209 11.165 11.641 3.634
+16.411 13.519 13.526 4.374
+ 15.469 15.477 4.778
+ 15.671 15.679 4.912
+ 16.209 15.881 5.047
+ 16.411 16.419 5.787
+   8.479
+   9.084
+   10.565
+   11.641
+   13.526
+   15.544
+   15.881
+   16.823
+TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm)
+above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct
+ of the method, and we calculated for such errors when finding our average.
+
+
+Conclusions
+
+As our research used the long-cadence data from Kepler, much of the high-frequency
+variability due to gravitational wave pulsations is lost. However, this presents an opportunity to
+verify our results with the work of research groups that analyzed short-cadence data.With the
+data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the
+periods and accounting for a 1𝝈 margin of error, our research hypothesizes that the rotation
+period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation
+period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et
+al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and
+these periods indicate that the more precise significant period identified through our re-binning
+relates to the rotation of the white dwarf.
+Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it
+improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous
+
+significant frequency disappears, which becomes increasingly likely after successive re-binning
+processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe
+that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR
+improvement ranging from 1.1 dB to 1.3 dB.
+Through the re-binning process, more lines, or significant frequencies, appeared above
+the 3𝝈 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to
+short-cadence analysis, the re-binning process of long-cadence data can be used to identify
+significant lower frequencies in white dwarfs. The methods we used are also simple and
+replicable, which allows even those with less experience to quickly analyze the large amounts of
+data being collected by orbiting telescopes, such as the currently active TESS (Transiting
+Exoplanet Survey Satellite) telescope.
+The presence of possible harmonics in the third re-bin of Q13 also indicates the possible
+presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These
+spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence,
+the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and
+contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously
+dominated by noise, may have been discovered.
+
+Acknowledgments
+
+We wish to thank Bloomsburg University of Pennsylvania for its continued support of our
+research.
+This paper includes data collected by the Kepler mission and obtained from the MAST
+data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is
+provided by the NASA Science Mission Directorate. STScI is operated by the Association of
+Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555.
+
+
+References
+
+ Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D.,
+Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010).
+PHOTOMETRIC V ARIABILITY IN KEPLER TARGET stars: THE SUN AMONG
+stars—a FIRST LOOK. The Astr ophysical Journal, 713(2), L155 -L159.
+https://doi.org/10.1088/2041-8205/713/2/L155
+Bischoff-Kim, A., Øs tensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven -Period
+asteroseismic fit of KI C 8626021.  EPJ Web of  Conferences, 101, 06009.
+https://doi.org/10.1051/epjconf/201510106009
+Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers
+in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047
+Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal
+for the History of Astrono my, 40(2), 137 -154.
+https://doi.org/10.1177%2F002182860904000201
+Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S.,
+Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W.,
+Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission:
+Characterization and early results. Publications of the Astronomical Society of the Pacific,
+126(938), 398-408. https://doi.org/10.1086/676406
+Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space
+Administration. Retrieved September 2, 2021, from
+https://www.nasa.gov/mission_pages/kepler/overview/index.html
+Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensendalsgaard,
+ J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C.,
+Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy,
+G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC
+performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86.
+https://dx.doi.org/10.1088/2041-8205/713/2/L79
+Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., &
+Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The
+Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39
+Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot
+signature on the light  curve. Astronomy & Astrophysics , 599, A1.
+https://doi.org/10.1051/0004-6361/201629923
+
+Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology.
+Annual Review of Astronomy and Astrophyics,  46(1), 157-199.
+https://doi.org/10.1146/annurev.astro.46.060407.145250
+Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021).
\ No newline at end of file
diff --git a/read/results/pypdf/2201.00037.txt b/read/results/pypdf/2201.00037.txt
index ecb4993..96fe280 100644
--- a/read/results/pypdf/2201.00037.txt
+++ b/read/results/pypdf/2201.00037.txt
@@ -1,17 +1,18 @@
 Confidential manuscript submitted to JGR-Planets
 The influence of a fluid core and a solid inner core on the
 Cassini sate of Mercury
-Mathieu Dumberry1
+Mathieu Dumberry 1
 1Department of Physics, University of Alberta, Edmonton, Alberta, Canada.
 Key Points:
-•The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid
+• The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid
 planet by no more than 0.01 arcmin.
-•For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
+• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid
 cores into a common precession motion.
-•The larger the inner core is, the more the obliquity of the polar moment of inertia approaches
+• The larger the inner core is, the more the obliquity of the polar moment of inertia approaches
  that expected for a rigid planet.
 Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca
-–1–arXiv:2201.00037v1  [astro-ph.EP]  31 Dec 202
+–1–
+arXiv:2201.00037v1  [astro-ph.EP]  31 Dec 202
 Confidential manuscript submitted to JGR-Planets
 Abstract
 We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core
@@ -44,51 +45,66 @@ offset smaller than the present-day error in measurements. We also show that the
 solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body.
 1 Introduction
 Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spinsymmetry
- axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo ,
-1966; Peale , 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but
-its present-day orientation can be reconstructed from ephemerides data [ Yseboodt and Margot ,
+ axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo,
+1966; Peale, 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but
+its present-day orientation can be reconstructed from ephemerides data [ Yseboodt and Margot,
 2006; Baland et al. , 2017]. Likewise, the rate of precession is also not observed directly, but is
 reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513
-yr with an inclination angle of I= 8.5330◦between the orbit and Laplace plane normals [ Baland
+yr with an inclination angle of I = 8.5330◦between the orbit and Laplace plane normals [ Baland
  et al. , 2017]. Measurements of the obliquity εm, defined as the angle of misalignment between
  the spin-symmetry axis and the orbit normal, have been obtained by different techniques,
 including ground based radar observations [ Margot et al. , 2007, 2012], and stereo digital terrain
- images [ Stark et al. , 2015a] and radio tracking data [ Mazarico et al. , 2014; Verma and Margot,
+ images [Stark et al. , 2015a] and radio tracking data [ Mazarico et al. , 2014; Verma and Margot,
  2016; Genova et al. , 2019; Konopliv et al. , 2020] from the MErcury Surface Space ENvironment
  GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors,
 all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals
-and consistent with a Cassini state. Furthermore, the observed obliquity angle (2 .042±0.08
+and consistent with a Cassini state. Furthermore, the observed obliquity angle (2 .042 ±0.08
 –2–
 Confidential manuscript submitted to JGR-Planets
 I
 descending
-node of orbitΩpê3I
-Iê3L εmI ê3p
-ascendingnode of orbit
-descendingnode of equatorequatorial plane
-orbitaldirection
-Sê3Iê3L
+node of orbit
+Ωp
+ê3
+I
+I
+ê3
+LεmI ê3
+p
+ascending
+node of orbit
+descending
+node of equator
+equatorial
+plane
+orbital
+direction
+S
+ê3
+Iê3
+L
 M
-εmorbital plane
+εm
+orbital plane
 Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded
 rectangle) and the Cassini state of Mercury. The normal to the orbital plane ( ˆeI
 3) is offset from the normal
  to the Laplace plane ( ˆeL
-3) by an angle I= 8.5330◦. The symmetry axis of the mantle ˆep
-3is offset
+3 ) by an angle I = 8 .5330◦. The symmetry axis of the mantle ˆep
+3 is offset
 from ˆeI
-3byεm≈2 arcmin. ˆeI
-3andˆep
-3are coplanar with, and precess about, ˆeL
-3in a retrograde direction
-at frequency Ω p= 2π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit
+3 by εm ≈ 2 arcmin. ˆeI
+3 and ˆep
+3 are coplanar with, and precess about, ˆeL
+3 in a retrograde direction
+at frequency Ωp = 2 π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit
 when Mercury is above (below) the Laplace plane. Angles are not drawn to scale.
-arcmin [ Margot et al. , 2012], 2.029±0.085 arcmin [ Stark et al. , 2015a] and 1 .968±0.027 [ Genova
+arcmin [Margot et al. , 2012], 2.029±0.085 arcmin [Stark et al. , 2015a] and 1 .968±0.027 [Genova
  et al. , 2019] to list a few) matches that expected if Mercury occupies Cassini state 1.
 The prediction of Mercury’s obliquity is based on the assumption that the whole planet
 precesses as a single body. However, we know that Mercury has a fluid core from two main lines
 of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by
-dynamo action [ Anderson et al. , 2011, 2012; Johnson et al. , 2012]. This requires fluid motion
+dynamo action [Anderson et al. , 2011, 2012; Johnson et al. , 2012]. This requires fluid motion
 in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the observed
  amplitude of the 88-day longitudinal libration is approximately twice as large as that
 expected if Mercury were librating as a rigid body [ Margot et al. , 2007, 2012; Stark et al. , 2015a].
@@ -101,7 +117,7 @@ approximate limit of 800 km on the inner core radius [ Grott et al. , 2011]. How
 core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history.
 –3–
 Confidential manuscript submitted to JGR-Planets
-With a fluid core, and possibly a solid inner core, the observed obliquity εmreflects the
+With a fluid core, and possibly a solid inner core, the observed obliquity εm reflects the
 orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dissipation,
  and at equilibrium in the Cassini state, the spin axis of the fluid core and the spinsymmetry
  axis of the inner core should both also precess about the normal to the Laplace plane
@@ -110,7 +126,7 @@ plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek ,
 their obliquity angles may be different than εm. Whether the spin axis of the fluid core is brought
 into an alignment with the mantle obliquity depends primarily on the pressure torque (also referred
  to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the
-misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´ e , 1910]. The more flattened
+misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´ e, 1910]. The more flattened
  the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained
 into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s
 CMB is not known. But if one assumes that the topography of the CMB coincides with an equipotential
@@ -132,7 +148,7 @@ mantle. Conversely, if the pressure torque at the ICB is the largest, the inner
  at the ICB should also enforce a closer alignment between the rotation vectors of the inner
  core and fluid core.
 It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mercury
- is inferred [e.g. Peale , 1976; Margot et al. , 2018]. Inherent in this calculation is the builtin
+ is inferred [e.g. Peale, 1976; Margot et al. , 2018]. Inherent in this calculation is the builtin
  assumption that the mantle obliquity does not deviate from that of a rigid planet by a substantial
  amount. However, the recent study by Peale et al. [2016] suggests that the inner core
 can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the
@@ -143,7 +159,7 @@ does not coincide with the orientation of the polar moment of inertia of the who
 can introduce a systematic offset between different types of obliquity measurements. Those based
 on tracking topographic features [ Margot et al. , 2007, 2012; Stark et al. , 2015a] capture the obliquity
  of the mantle spin axis. While those based on the orientation of the gravity field [ Mazarico
-et al. , 2014; Verma and Margot , 2016; Genova et al. , 2019; Konopliv et al. , 2020] are instead
+et al., 2014; Verma and Margot , 2016; Genova et al. , 2019; Konopliv et al. , 2020] are instead
 tied to the orientation of the principal moment of inertia of the whole planet. An offset of the
 obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the
 size of the inner core, even though this is difficult to do at present because the different estimates
@@ -164,134 +180,163 @@ differ from that of an entirely rigid Mercury, and third, by how much the obliqu
 2.1 The interior structure of Mercury
 Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid
 outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted
-byrs,rf,rm, andR, and their densities by ρs,ρf,ρm, andρc, respectively. The inner core radiusrscorresponds
- to the ICB radius, the fluid core radius rfto the CMB radius, and R=
+by rs, rf , rm, and R, and their densities by ρs, ρf , ρm, and ρc, respectively. The inner core radius
+ rs corresponds to the ICB radius, the fluid core radius rf to the CMB radius, and R=
 2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure
  with depth are not negligible in the core of Mercury. However adopting uniform densities
 simplifies the analytical expressions of the model while still capturing the first order rotational
 dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same
 strategy facilitates comparisons between our results.
-We build our interior model as detailed in Peale et al. [2016]. We first specify rs,ρs(or
-a density contrast at the ICB), the crustal density ρcand crustal thickness h=R−rm. The
-three unknowns rf,ρfandρmare then solved such that the interior model is consistent with
-the known mass Mand chosen values of the moments of inertia of the whole planet Cand that
+We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or
+a density contrast at the ICB), the crustal density ρc and crustal thickness h= R−rm. The
+three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with
+the known mass M and chosen values of the moments of inertia of the whole planet C and that
 of the mantle and crust Cm.
 Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity)
-byϵi, defined as the difference between the mean equatorial and polar radii, divided by the mean
+by ϵi, defined as the difference between the mean equatorial and polar radii, divided by the mean
 spherical radius. Likewise, we denote the equatorial flattening by the variable ξi, defined as the
 difference between the maximum and minimum equatorial radii, divided by the mean spherical
- radius. As above, we use the subscript i=s,f,mandr, to denote the polar or equatorial
+ radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equatorial
  flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface.
 The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their
 numerical values are given in Table 1. We then assume that the ICB and CMB are both at hydrostatic
  equilibrium with the imposed gravitational potential induced by the flattenings at the
 CrMB and surface. The flattenings at all interior boundaries are specified such that they are
-consistent with the observed degree 2 spherical harmonic coefficients of gravity J2andC22; their
-numerical values are given in Table 1. Specifically, J2andC22are connected to the principal
-moments of inertia of Mercury ( C >B >A ) and to the polar and equatorial flattenings by
-J2=C−¯A
-MR2=8π
-151
-MR2[
-(ρs−ρf)r5
-sϵs+ (ρf−ρm)r5
-fϵf+ (ρm−ρc)r5
-mϵm+ρcR5ϵr]
+consistent with the observed degree 2 spherical harmonic coefficients of gravity J2 and C22; their
+numerical values are given in Table 1. Specifically, J2 and C22 are connected to the principal
+moments of inertia of Mercury ( C >B >A) and to the polar and equatorial flattenings by
+J2 = C−¯A
+MR2 = 8π
+15
+1
+MR2
+[
+(ρs −ρf )r5
+sϵs + (ρf −ρm)r5
+f ϵf + (ρm −ρc)r5
+mϵm + ρcR5ϵr
+]
 , (1a)
-C22=B−A
-4MR2=8π
-151
-4MR2[
-(ρs−ρf)r5
-sξs+ (ρf−ρm)r5
-fξf+ (ρm−ρc)r5
-mξm+ρcR5ξr]
-.(1b)
-where ¯Ais the mean equatorial moment of inertia defined below. The same procedure was used
-inPeale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry
+C22 = B−A
+4MR2 = 8π
+15
+1
+4MR2
+[
+(ρs −ρf )r5
+sξs + (ρf −ρm)r5
+f ξf + (ρm −ρc)r5
+mξm + ρcR5ξr
+]
+. (1b)
+where ¯A is the mean equatorial moment of inertia defined below. The same procedure was used
+in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry
 –5–
 Confidential manuscript submitted to JGR-Planets
 Mercury Parameter Numerical value Reference
-mean motion, n 2π/87.96935 day−1Stark et al. [2015b]
-rotation rate, Ω o= 1.5n 2π/58.64623 day−1Stark et al. [2015b]
-orbit precession rate, Ω p 2π/325,513 yr−1Baland et al. [2017]
-Poincar´ e number, δω= Ωp/Ωo4.9327×10−7
+mean motion, n 2π/87.96935 day−1 Stark et al. [2015b]
+rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b]
+orbit precession rate, Ω p 2π/325,513 yr−1 Baland et al. [2017]
+Poincar´ e number,δω = Ωp/Ωo 4.9327 ×10−7
 orbital eccentricity, ec 0.20563 Baland et al. [2017]
-orbital inclination, I 8.5330◦Baland et al. [2017]
+orbital inclination, I 8.5330◦ Baland et al. [2017]
 mean planetary radius, R 2439.360 km Perry et al. [2015]
-mass,M 3.3012×1023kg Genova et al. [2019]
-mean density, ¯ ρ 5429.5 kg m−3
-J2 5.0291×10−5Genova et al. [2019]
-C22 8.0415×10−6Genova et al. [2019]
-polar surface flattening, ϵr 6.7436×10−4Perry et al. [2015]
-equatorial surface flattening, ξr5.1243×10−4Perry et al. [2015]
-Table 1. Reference parameters for Mercury. The mass Mis computed from GM = 22031.8636×109
-m3/s2taken from Genova et al. [2019]. The mean density is calculated from4π
-3¯ρR3=M. The numerical
-values ofϵrandξrare calculated from ϵr= (¯a−c)/Randξr= (a−b)/R, where ¯a=1
-2(a+b) and where
-a= 2440.53 km,b= 2439.28 km and c= 2438.26 km are the semimajor, intermediate and semiminor
-axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015].J2andC22are
-computed from Equation (4) in the Supporting Information of Genova et al. [2019].
+mass, M 3.3012 ×1023 kg Genova et al. [2019]
+mean density, ¯ρ 5429.5 kg m−3
+J2 5.0291 ×10−5 Genova et al. [2019]
+C22 8.0415 ×10−6 Genova et al. [2019]
+polar surface flattening, ϵr 6.7436 ×10−4 Perry et al. [2015]
+equatorial surface flattening, ξr 5.1243 ×10−4 Perry et al. [2015]
+Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031 .8636 × 109
+m3/s2 taken from Genova et al.[2019]. The mean density is calculated from 4π
+3 ¯ρR3 = M. The numerical
+values of ϵr and ξr are calculated from ϵr = (¯a− c)/R and ξr = (a− b)/R, where ¯a= 1
+2 (a+ b) and where
+a = 2440 .53 km, b = 2439 .28 km and c = 2438 .26 km are the semimajor, intermediate and semiminor
+axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al.[2015]. J2 and C22 are
+computed from Equation (4) in the Supporting Information of Genova et al.[2019].
 and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon.
 Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topography
  and the axes of the principal moments of inertia, which amount to a polar offset of ∼2◦
 and an equatorial offset of ∼15◦[Perry et al. , 2015].
 Once the densities and flattenings of all interior regions are known, we can specify the moments
- of inertia of the fluid core ( Cf> B f> A f) and solid inner core ( Cs> B s> A s)
+ of inertia of the fluid core ( Cf > Bf > Af ) and solid inner core ( Cs > Bs > As)
 along with the mean equatorial moments of inertia
-¯A=1
-2(A+B), ¯Af=1
-2(Af+Bf), ¯As=1
-2(As+Bs). (2)
-From these, we define the polar ( e,ef,es) and equatorial ( γ,γs) dynamical ellipticities of the
+¯A= 1
+2(A+ B) , ¯Af = 1
+2(Af + Bf ) , ¯As = 1
+2(As + Bs) . (2)
+From these, we define the polar ( e, ef , es) and equatorial ( γ, γs) dynamical ellipticities of the
 whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which
 enter our rotational model,
-e=C−¯A
-¯Aef=Cf−¯Af
-¯Afes=Cs−¯As
-¯As, (3a)
-γ=B−A
-¯Aγs=Bs−As
-¯As. (3b)
-We further note that eandγare connected to J2andC22by
-e=MR2
-¯AJ2, γ =4MR2
-¯AC22. (4)
+e= C−¯A
+¯A ef = Cf −¯Af
+¯Af
+es = Cs −¯As
+¯As
+, (3a)
+γ = B−A
+¯A γs = Bs −As
+¯As
+. (3b)
+We further note that e and γ are connected to J2 and C22 by
+e= MR2
+¯A J2 , γ = 4MR2
+¯A C22 . (4)
 –6–
 Confidential manuscript submitted to JGR-Planets
 θm
 θn
 θs
-θfΩ
+θf
+Ω
 Ωs
-Ωfê3p
-ê3s ê3I
-Iεm
-θpê3L
-ê1pê2p
+Ωf
+ê3
+p
+ê3
+sê3
+I
+I
+εm
+θp
+ê3
+L
+ê1
+p
+ê2
+p
 Cassini plane
-ωΩotê3I
-Iεmê3p
-ê1ê2pê3La) b)
+ωΩot
+ê3
+I
+I εm
+ê3
+p
+ê1
+ê2
+p
+ê3
+L
+a) b)
 Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b)
 in a frame attached to the rotating mantle. The orbit normal ( ˆeI
-3) is tilted by an angle I= 8.533◦from
+3) is tilted by an angle I = 8 .533◦ from
 the Laplace normal ( ˆeL
-3) and the symmetry axis of Mercury’s mantle ( ˆep
+3 ) and the symmetry axis of Mercury’s mantle ( ˆep
 3) is tilted by an obliquity εm
 with respect to ˆeI
 3. Shown in (a) are the orientations of the symmetry axis of the inner core ( ˆes
 3), the
-rotation rate vectors of the mantle ( Ω), fluid core ( Ωf) and inner core ( Ωf) and angles θp,θn,θm,θf
-andθsin their Cassini state equilibrium. All vectors and angles are in a common plane which we refer
+rotation rate vectors of the mantle ( Ω), fluid core ( Ωf) and inner core ( Ωf) and angles θp, θn, θm, θf
+and θs in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer
 to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of
 the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section.
 The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial
 mantle axes ˆep
-1andˆep
-2with respect to the Cassini plane. Viewed in the frame attached to the rotating
-mantle (b), the Cassini plane is rotating at frequency ωΩo=−Ωo−ΩpcosIin the longitudinal direction.
+1 and ˆep
+2 with respect to the Cassini plane. Viewed in the frame attached to the rotating
+mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωpcos I in the longitudinal direction.
  The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of
 illustration.
 –7–
@@ -300,73 +345,73 @@ Confidential manuscript submitted to JGR-Planets
 Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes
 3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is
 87.96935 day and the sidereal rotation period is 58.64623 day [ Stark et al. , 2015b]. These define
- the mean motion n= 2π/87.96935 day−1and the sidereal frequency Ω o= 2π/58.64623
-day−1, with Ω o= 1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby
+ the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ω o = 2π/58.64623
+day−1, with Ωo = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby
 the orientations of the orbit normal ( ˆeI
 3) and of the mantle symmetry axis ( ˆep
 3) are both coplanar
  with, and precess about, the normal to the Laplace plane ( ˆeL
-3). The orientation of the Laplace
+3 ). The orientation of the Laplace
 plane varies on long timescales, but it can be taken as invariable in inertial space for our present
 purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆeL
-3andˆeI
+3 and ˆeI
 3
-is the orbital inclination I= 8.5330◦[Baland et al. , 2017], the angle between ˆeI
-3andˆep
-3is the
-obliquityεmand the angle between ˆeL
-3andˆep
-3isθp=I+εm. The precession of ˆeI
-3andˆep
+is the orbital inclination I = 8.5330◦[Baland et al. , 2017], the angle between ˆeI
+3 and ˆep
+3 is the
+obliquity εm and the angle between ˆeL
+3 and ˆep
+3 is θp = I + εm. The precession of ˆeI
+3 and ˆep
 3
-about the Laplace pole is retrograde with frequency Ω p= 2π/325,513 yr−1[Baland et al. , 2017].
+about the Laplace pole is retrograde with frequency Ω p = 2π/325,513 yr−1 [Baland et al. , 2017].
 The mantle and crust are welded together and form a single rotating region which we refer
  to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes
 of the mantle are expected to remain in close alignment, but they do not coincide exactly. We
 define the rotation rate vector of the mantle by Ω, and its misalignment from ˆep
-3by an angle
-θm. Note that θm≪εmand it is often the spin axis of Mercury which is used to define the
-obliquityεm[e.g. Baland et al. , 2017]. If Mercury were an entirely rigid planet, ˆep
-3andΩwould
+3 by an angle
+θm. Note that θm ≪εm and it is often the spin axis of Mercury which is used to define the
+obliquity εm [e.g. Baland et al. , 2017]. If Mercury were an entirely rigid planet, ˆep
+3 and Ω would
 characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and
-the anglesI,εmandθmwould completely describe the Cassini state. The presence of a fluid
+the angles I, εm and θm would completely describe the Cassini state. The presence of a fluid
 outer core and solid inner core require three additional orientation vectors and angles. The symmetry
  axis of the inner core is defined by unit vector ˆes
-3and its misalignment from ˆep
-3by an
-angleθn. The rotation vectors of the fluid core and inner core are defined as ΩfandΩs, respectively,
- and their misalignment from the rotation vector of the mantle Ωare defined by anglesθfandθs(see
- Figure 2a). The rotation and symmetry axes of the inner core remain in close
-alignment, so θn≈θs. To be formal in our definition of the different angles of misalignment,
-forIdefined positive as depicted on Figure 2a, all angles are defined positive in the clockwise
+3 and its misalignment from ˆep
+3 by an
+angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, respectively,
+ and their misalignment from the rotation vector of the mantle Ω are defined by angles
+ θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close
+alignment, so θn ≈θs. To be formal in our definition of the different angles of misalignment,
+for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise
 direction.
 At equilibrium in the Cassini state, the three orientation vectors ( ˆeI
-3,ˆep
-3,ˆes
+3, ˆep
+3, ˆes
 3) and three
-rotation vectors ( Ω,Ωf,Ωs) are forced to precess about ˆeL
-3at the same frequency. If we neglect
+rotation vectors (Ω, Ωf, Ωs) are forced to precess about ˆeL
+3 at the same frequency. If we neglect
  dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed
 in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω p. Viewed
 in the frame attached to the mantle rotating at sidereal frequency Ω o, the Cassini plane is rotating
- in a retrograde direction at frequency ωΩo(see Figure 2b), where ω, expressed in cycles
+ in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles
 per Mercury day, is equal to
-ω=−1−δωcos(θp). (5)
-The factor δω= Ω p/Ωo= 4.933×10−7is the Poincar´ e number, expressing the ratio of the
+ω= −1 −δωcos(θp) . (5)
+The factor δω = Ωp/Ωo = 4.933 ×10−7 is the Poincar´ e number, expressing the ratio of the
 forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal
 as seen in the mantle frame is expressed as
 d
 dtˆeL
-3+Ω×ˆeL
-3=0, (6)
+3 + Ω ×ˆeL
+3 = 0 , (6)
 or equivalently, by Equation (19e) of Stys and Dumberry [2018],
-ωsin(θp) + sin(θm+θp) = 0. (7)
+ωsin(θp) + sin(θm + θp) = 0 . (7)
 –8–
 Confidential manuscript submitted to JGR-Planets
-This expresses a formal connection between θpandθmwhich is independent of the interior structure
- of Mercury. Using Equation (5) and cos( θm)→1, this connection can be rewritten as
-sin(θm) =δωsin(θp). (8)
-and thus the relative amplitudes of θmandθpdepend of the Poincar´ e number δω.
+This expresses a formal connection between θp and θm which is independent of the interior structure
+ of Mercury. Using Equation (5) and cos( θm) →1, this connection can be rewritten as
+sin(θm) = δω sin(θp) . (8)
+and thus the relative amplitudes of θm and θp depend of the Poincar´ e numberδω.
 To investigate Mercury’s response to the gravitational torque from the Sun, we take advantage
  of the framework developed in Mathews et al. [1991] to model the forced nutations of
 Earth [see also Mathews et al. , 2002; Dehant and Mathews , 2015]. This model takes into account
@@ -374,9 +419,9 @@ Earth [see also Mathews et al. , 2002; Dehant and Mathews , 2015]. This model ta
 of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and
 ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned
 with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been
-incorporated into the framework [e.g Buffett , 1992; Buffett et al. , 2002; Mathews and Guo , 2005;
+incorporated into the framework [e.g Buffett, 1992; Buffett et al. , 2002; Mathews and Guo , 2005;
 Deleplace and Cardin , 2006]. The framework was adapted to model the Cassini state of the Moon
-inDumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski
+in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski
 and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury.
 Because the forced precession period is much longer than the rotation and orbital periods
  of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean
@@ -384,24 +429,24 @@ torque averaged over one orbit. This mean torque is perpendicular to the Cassini
  in the same direction as the vector connecting the Sun to the descending node of Mercury’s
 orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque
 is periodic, rotating at frequency ωΩo. Setting the equatorial directions ˆep
-1andˆep
-2to correspond
+1 and ˆep
+2 to correspond
 to the real and imaginary axes of the complex plane, respectively, we can write the equatorial
 components of this periodic applied torque in a compact form as
-Γ1(t) +iΓ2(t) =−i˜Γ(ω) exp[iωΩot], (9)
+Γ1(t) + iΓ2(t) = −i˜Γ(ω) exp[iωΩot] , (9)
 where ˜Γ(ω) represents the amplitude of the torque at frequency ωΩo. In response to this torque,
-the axes defining all angles ( θp,εm,θm,θf,θs,θn) as viewed in the mantle frame are also rotating
- at frequency ωΩo(see Figure 2). The longitudinal direction of each of these angles at
-a specific time tcan then also be written in the equatorial complex plane and is proportional
-to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1andθm2of the
-angleθm, as seen in the mantle frame, can be written as
-θm1(t) +iθm2(t) = ˜mexp[iωΩot], (10a)
+the axes defining all angles ( θp, εm, θm, θf , θs, θn) as viewed in the mantle frame are also rotating
+ at frequency ωΩo (see Figure 2). The longitudinal direction of each of these angles at
+a specific time t can then also be written in the equatorial complex plane and is proportional
+to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1 and θm2 of the
+angle θm, as seen in the mantle frame, can be written as
+θm1(t) + iθm2(t) = ˜m exp[iωΩot] , (10a)
 where
-˜m≡˜m(ω) =Re[ ˜m] +iIm[ ˜m], (10b)
+˜m≡˜m(ω) = Re[ ˜m] + iIm[ ˜m] , (10b)
 is the amplitude at frequency ωΩo. Equivalent definitions apply for all other angles, with the
 connection as follows:
-θm⇔˜m, θ f⇔˜mf, θ s⇔˜ms, θ n⇔˜ns, θ p⇔˜p, ε m⇔˜εm. (11)
-The notation ˜ m, ˜mf, ˜ms, ˜nsfollows that introduced in the original model of Mathews et al. [1991].
+θm ⇔˜m, θ f ⇔˜mf , θ s ⇔˜ms , θ n ⇔˜ns , θ p ⇔˜p, ε m ⇔˜εm . (11)
+The notation ˜m, ˜mf , ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991].
 Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase response
  to the applied torque as a result of dissipation, for instance from viscous or EM coupling
 –9–
@@ -411,8 +456,8 @@ real. We concentrate our analysis in this work on the real part of the solutions
  to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ εm
 corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm,
 though we keep the tilde notation in the presentation of our results to emphasize that it represents
- the real part of the solution from our system. Furthermore, since ˜ m≪˜εm, we often
-refer to ˜εmas the orientation of spin axis of the mantle, since the Cassini state of Mercury is
+ the real part of the solution from our system. Furthermore, since ˜ m ≪˜εm, we often
+refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
 more customarily described in terms of the latter in the literature.
 The model of Mathews et al. [1991] is developed under the assumption of small angles as
 appropriate for the nutations on Earth. The details on how the equations of the model are derived
@@ -420,120 +465,163 @@ appropriate for the nutations on Earth. The details on how the equations of the
  describe, respectively, the time rate of change of the angular momenta of the whole of Mercury,
  the fluid core, and the inner core in the reference frame of the rotating mantle. These three
 equations are
-(ω−e) ˜m+ (1 +ω)[¯Af
-¯A˜mf+¯As
-¯A˜ms+α3es¯As
-¯A˜ns]
-=1
-iΩ2o¯A(
-˜Γsun)
+(ω−e) ˜m+ (1 +ω)
+[¯Af
+¯A ˜mf +
+¯As
+¯A ˜ms + α3es
+¯As
+¯A ˜ns
+]
+= 1
+iΩ2o ¯A
+(
+˜Γsun
+)
 , (12a)
-ω˜m+ (1 +ω+ef) ˜mf−ωα1es¯As
-¯Af˜ns=1
-iΩ2o¯Af(
-−˜Γcmb−˜Γicb)
+ω˜m+ (1 +ω+ ef ) ˜mf −ωα1es
+¯As
+¯Af
+˜ns = 1
+iΩ2o ¯Af
+(
+−˜Γcmb −˜Γicb
+)
 , (12b)
-(ω−α3es) ˜m+α1es˜mf+ (1 +ω) ˜ms+ (1 +ω−α2)es˜ns=1
-iΩ2o¯As(
+(ω−α3es) ˜m+ α1es ˜mf + (1 +ω) ˜ms + (1 +ω−α2) es˜ns = 1
+iΩ2o ¯As
+(
 ˜Γs
-sun+˜Γicb)
+sun + ˜Γicb
+)
 , (12c)
 and a fourth equation consists of a kinematic relation that expresses the change in the orientation
  of the inner core figure as a result of its own rotation,
-˜ms+ω˜ns= 0. (12d)
-In these equations, the parameters α1,α2andα3involve the density contrast at the ICB
+˜ms + ω˜ns = 0 . (12d)
+In these equations, the parameters α1, α2 and α3 involve the density contrast at the ICB
 and are given by
-α1=ρf
-ρs, α 3= 1−α1, α 2=α1−α3αg, (13a)
-where the parameter αgis a measure of the ratio of the gravitational to inertial torque applied
+α1 = ρf
+ρs
+, α 3 = 1 −α1 , α 2 = α1 −α3αg , (13a)
+where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied
 on the inner core,
-αg=8πG
-5Ω2o[ρc(ϵr−ϵm) +ρm(ϵm−ϵf) +ρfϵf], (13b)
-whereGis the gravitational constant.
-˜Γsunis the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
-a small mantle obliquity ˜ εmand a small inner core tilt ˜ ns, it is given by
-˜Γsun=−iΩ2
-o¯A(
-φm˜εm+¯As
-¯Aα3φs˜ns)
+αg = 8πG
+5Ω2o
+[ρc(ϵr −ϵm) + ρm(ϵm −ϵf ) + ρf ϵf ] , (13b)
+where G is the gravitational constant.
+˜Γsun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
+a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by
+˜Γsun = −iΩ2
+o ¯A
+(
+φm˜εm +
+¯As
+¯A α3φs˜ns
+)
 , (14)
 where
 –10–
 Confidential manuscript submitted to JGR-Planets
-φm=3
-2n2
-Ω2o[
-G210e+1
-2G201γ]
+φm = 3
+2
+n2
+Ω2o
+[
+G210 e+ 1
+2G201 γ
+]
 , (15a)
-φs=3
-2n2
-Ω2o[
-G210es+1
-2G201γs]
+φs = 3
+2
+n2
+Ω2o
+[
+G210 es + 1
+2G201 γs
+]
 , (15b)
-and whereG210andG201are functions of the orbital eccentricity ec,
-G210=1
-(1−e2c)3/2, (16a)
-G201=7
-2ec−123
-16e3
-c+489
+and where G210 and G201 are functions of the orbital eccentricity ec,
+G210 = 1
+(1 −e2c)3/2 , (16a)
+G201 = 7
+2ec −123
+16 e3
+c + 489
 128e5
-c. (16b)
+c . (16b)
 The gravitational torque by the Sun acting on the inner core alone, ˜Γs
 sun, is
 ˜Γs
-sun=−iΩ2
-o¯Asα3φs(˜εm+ ˜ns). (17)
-˜Γcmband˜Γicbare the torques from tangential stresses by the fluid core on the mantle at the
+sun = −iΩ2
+o ¯Asα3φs(˜εm + ˜ns) . (17)
+˜Γcmb and ˜Γicb are the torques from tangential stresses by the fluid core on the mantle at the
 CMB and on the inner core at the ICB, respectively. These torques can be parameterized in
-terms of dimensionless complex coupling constants KicbandKcmband the differential angular
- velocities at each boundary [e.g Buffett , 1992; Buffett et al. , 2002],
-˜Γicb=iΩ2
-o¯AsKicb( ˜mf−˜ms), (18a)
-˜Γcmb=iΩ2
-o¯AfKcmb˜mf. (18b)
-Specific expressions for KicbandKcmbare delayed to sections 4 and 5 when we consider the
+terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angular
+ velocities at each boundary [e.g Buffett, 1992; Buffett et al. , 2002],
+˜Γicb = iΩ2
+o ¯AsKicb( ˜mf −˜ms) , (18a)
+˜Γcmb = iΩ2
+o ¯Af Kcmb ˜mf . (18b)
+Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the
 effects of viscous and EM coupling, respectively.
 A fifth equation is required to connect this interior model to the obliquity of the mantle,
-and this is provided by Equation (7). For small angles θmandθp, this gives [e.g. Mathews et al. ,
+and this is provided by Equation (7). For small angles θm and θp, this gives [e.g. Mathews et al. ,
 1991; Dumberry and Wieczorek , 2016; Baland et al. , 2019]
-˜m+ (1 +ω)˜p= 0. (19)
-For Mercury, it is more convenient to connect the internal model with ˜ εminstead of ˜p. This
-is becauseθp≈8.567◦whereas ˜εm≈2 arcmin and thus the latter obeys more strictly the
+˜m+ (1 +ω)˜p= 0 . (19)
+For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This
+is because θp ≈ 8.567◦whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the
 condition of small angles assumed in our framework. Furthermore, the external torques acting
  on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ εm. Written
- in terms of ˜ εm, and with the approximation of ˜ εm≪1 and ˜m≪1, Equation (7) becomes
-˜m+ (1 +ω)˜εm=−(1 +ω) tanI. (20)
-Likewise, the frequency ωfrom Equation (5) can be written simply in terms of I,
-ω=−1−δωcosI. (21)
+ in terms of ˜εm, and with the approximation of ˜εm ≪1 and ˜m≪1, Equation (7) becomes
+˜m+ (1 +ω)˜εm = −(1 + ω) tanI. (20)
+Likewise, the frequency ω from Equation (5) can be written simply in terms of I,
+ω= −1 −δωcos I. (21)
 The set of four Equations (12) with the addition of Equation (20) form a linear system
-of equations for the five rotational variables ˜ m, ˜mf, ˜ms, ˜nsand ˜εm. It captures the response
-of Mercury, in the frequency domain, when subject to a periodic solar torque applied at frequencyω.
- The system can be written in a matrix form as
+of equations for the five rotational variables ˜m, ˜mf , ˜ms, ˜ns and ˜εm. It captures the response
+of Mercury, in the frequency domain, when subject to a periodic solar torque applied at frequency
+ ω. The system can be written in a matrix form as
 –11–
 Confidential manuscript submitted to JGR-Planets
-M·x=y, (22a)
+M·x = y , (22a)
 where the solution ( x) and forcing ( y) vectors are
-xT= [ ˜m,˜mf,˜ms,˜ns,˜εm], (22b)
-yT= [0,0,0,0,−(1 +ω) tanI], (22c)
-and the elements of matrix Mare
-M=
-ω−e (1 +ω)¯Af
-¯A(1 +ω)¯As¯A¯As¯Aα3(
-(1 +ω)es+φs)
+xT = [ ˜m, ˜mf , ˜ms,˜ns,˜εm] , (22b)
+yT = [0,0,0,0,−(1 + ω) tanI] , (22c)
+and the elements of matrix M are
+M =
+
+
+ω−e (1 + ω)
+¯Af
+¯A (1 + ω)
+¯As
+¯A
+¯As
+¯A α3
+(
+(1 + ω)es + φs
+)
 φm
-ω 1 +ω+ef+Kcmb+¯As¯AfKicb−¯As¯AfKicb−ωesα1¯As¯Af0
-ω−α3es α1es−Kicb 1 +ω+Kicb(1 +ω−α2)es+α3φsα3φs
+ω 1 + ω+ ef + Kcmb +
+¯As
+¯Af
+Kicb −
+¯As
+¯Af
+Kicb −ωesα1
+¯As
+¯Af
+0
+ω−α3es α1es −Kicb 1 + ω+ Kicb (1 + ω−α2)es + α3φs α3φs
 0 0 1 ω 0
-1 0 0 0 (1 + ω)
-.
+1 0 0 0 (1 + ω)
+
+
+.
 (22d)
-Solutions of the homogeneous system (i.e. y=0) represent free modes of precession. Three
+Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three
 modes have periods which, when seen in inertial space, are typically in the range of a few hundred
  to a few thousand years. The first is the free axial precession of Mercury maintained by
-the solar torque acting on its elliptical figure [e.g. Peale , 2005]. The second is the free core nutation
+the solar torque acting on its elliptical figure [e.g. Peale, 2005]. The second is the free core nutation
  (FCN), which is the free precession of the spin axis of the fluid core about the symmetry
  axis of the CMB [e.g. Mathews et al. , 1991]. The third is the free inner core nutation (FICN),
 a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al. ,
@@ -543,7 +631,7 @@ although we have retained the triaxial shape of Mercury in the expression of the
 we treat its angular momentum response as if it were an axially symmetric body. This is convenient
  as the two equatorial angular momentum equations for each region can be combined
 into a single equation. To first order, the frequency of the free precession of Mercury is not largely
-altered by triaxiality [e.g. Peale , 2005]. Baland et al. [2019] showed that the frequencies of the
+altered by triaxiality [e.g. Peale, 2005]. Baland et al. [2019] showed that the frequencies of the
 FCN and FICN for a triaxial planetary body may be slightly different than those for an axially
  symmetric body, but not by large factor. As the response of Mercury to the solar torque
 is largely determined by the resonant amplification due to the presence of these three modes,
@@ -568,175 +656,212 @@ Confidential manuscript submitted to JGR-Planets
 2.3.1 The Cassini state of a single-body, rigid Mercury
 For a rigid planet with no fluid and solid cores, our system of equations reduces to Equations
  (12a) and (20),
-(ω−e) ˜m+φm˜εm= 0, (23a)
-˜m+ (1 +ω)˜εm=−(1 +ω) tanI. (23b)
-Using Equation (21), δω≪1, and the approximation ¯A(1 +e+δωcosI) =C+¯AδωcosI≈
+(ω−e) ˜m+ φm ˜εm = 0 , (23a)
+˜m+ (1 +ω)˜εm = −(1 + ω) tanI. (23b)
+Using Equation (21), δω ≪1, and the approximation ¯A(1 +e+ δωcos I) = C+ ¯Aδωcos I ≈
 C, these can be written as
-C˜m=¯Aφm˜εm, (24a)
-˜m=δω(
-sinI+ cosI˜εm)
+C˜m= ¯Aφm ˜εm , (24a)
+˜m= δω
+(
+sin I+ cosI˜εm
+)
 . (24b)
-Equation (24b) gives a direct relationship between ˜ mand ˜εm. ForI= 8.5330◦,δω=
-4.9327×10−7and taking ˜ εm= 2.04 arcmin, this gives ˜ m= 2.52×10−4arcmin, much smaller
+Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8 .5330◦, δω =
+4.9327×10−7 and taking ˜εm = 2.04 arcmin, this gives ˜m= 2.52×10−4 arcmin, much smaller
 than ˜εm: the offset of the rotation axis of the mantle with respect to its symmetry axis is very
 small. Substituting Equation (24b) in Equation (24a) gives
-CΩp(
-sinI+ cosI˜εm)
-=¯AΩoφm˜εm, (25)
-and isolating for ˜ εm,
-˜εm=CΩpsinI
-−CΩpcosI+¯AΩoφm. (26)
-Upon using Equations (4), (15a), and Ω o=3
-2n, we can write
-˜εm=CΩpsinI
-−CΩpcosI+nMR2(G210J2+ 2G201C22). (27)
+CΩp
+(
+sin I+ cosI˜εm
+)
+= ¯AΩoφm˜εm , (25)
+and isolating for ˜εm,
+˜εm = CΩp sin I
+−CΩp cos I+ ¯AΩoφm
+. (26)
+Upon using Equations (4), (15a), and Ω o = 3
+2 n, we can write
+˜εm = CΩp sin I
+−CΩp cos I+ nMR2 (G210J2 + 2G201C22) . (27)
 This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1
-[see for instance Equation (1) of Baland et al. , 2017, where their definition of ˙Ω is equal to−Ωp].
+[see for instance Equation (1) of Baland et al. , 2017, where their definition of ˙Ω is equal to −Ωp].
 Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of
 Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized moment
  of inertia ˆC,
-ˆC=C
-MR2=n
-ΩpG210J2+ 2G201C22
-cosI+ sinI/˜εm. (28)
+ˆC = C
+MR2 = n
+Ωp
+G210J2 + 2G201C22
+cos I+ sinI/˜εm
+. (28)
 which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation
 that a measurement of the obliquity gives a constraint on ˆC.
-Two free modes of precession are found by setting y=0in Equation (23). One mode corresponds
+Two free modes of precession are found by setting y = 0 in Equation (23). One mode corresponds
  to the Eulerian wobble, or Chandler wobble, and represents the prograde precession
 of the rotation axis about the symmetry axis. The second mode is the free retrograde axial precession
  of Mercury. As seen in the inertial frame, its frequency is given by
 –13–
 Confidential manuscript submitted to JGR-Planets
-ωfp=nMR2
-C(
-G210J2+ 2G201C22)
+ωfp = nMR2
+C
+(
+G210J2 + 2G201C22
+)
 , (29)
 which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical component.
  Note that in Peale [2005] it was assumed that only the mantle was involved in the solidbody
- precession and hence Cwas replaced by Cm. UsingC= 0.346·MR2[Margot et al. ,
-2012] and the numerical values for n,J2,C22andecgiven in Table 1, we obtain a free precession
- period of Tfp= 2π/ωfp= 1298 yr. If we use Cminstead ofCin Equation (29), and take
-Cm= 0.431·C= 0.431·0.346·MR2[Margot et al. , 2012], we obtain Tfp= 2π/ωfp= 560 yr.
+ precession and hence C was replaced by Cm. Using C = 0 .346 ·MR2 [Margot et al. ,
+2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free precession
+ period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take
+Cm = 0.431·C = 0.431·0.346·MR2 [Margot et al. , 2012], we obtain Tfp = 2π/ωfp = 560 yr.
 These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical,
 the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid
 core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The
 true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value,
 the free precession period is much shorter than the forcing period of 325 kyr. Using Equation
 (29), Equation (27) can be written as [e.g. Baland et al. , 2017]
-˜εm=ΩpsinI
-−ΩpcosI+ωfp. (30)
-The obliquity of Mercury is thus determined by how the forcing frequency Ω pcompares with
-the free precession frequency ωfp. Becauseωfp>Ωp, Mercury occupies Cassini state 1 [ Peale ,
+˜εm = Ωp sin I
+−Ωp cos I+ ωfp
+. (30)
+The obliquity of Mercury is thus determined by how the forcing frequency Ω p compares with
+the free precession frequency ωfp . Because ωfp >Ωp, Mercury occupies Cassini state 1 [ Peale,
 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant
-amplification if Ω p≈ωfp. Sinceωfp≫Ωp, resonant amplification is minimal and the resulting
- obliquity, ˜ εm≈2 arcmin, is much smaller than the inclination angle I≈8.5◦.
+amplification if Ωp ≈ ωfp . Since ωfp ≫ Ωp, resonant amplification is minimal and the resulting
+ obliquity, ˜εm ≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦.
 2.3.2 The misalignment of the fluid and solid cores
-Withω=−1−δωcosIandδω≪1, Equation (12d) gives ˜ ns≈˜ms; as for the mantle,
+With ω= −1 −δωcos I and δω ≪1, Equation (12d) gives ˜ns ≈˜ms; as for the mantle,
 the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state.
-The relationship between ˜ mand ˜εmof Equation (24b) is independent of the interior structure,
+The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure,
 so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equation
- (12a), and setting ˜ ns= ˜ms, the angular momentum equation of the whole planet becomes
-CΩp(
-sinI+ cosI˜εm)
-+ (¯AfcosIΩp) ˜mf+¯As(cosIΩp−Ωoα3φs)˜ns=¯AΩoφm˜εm. (31)
+ (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes
+CΩp
+(
+sin I+ cosI˜εm
+)
++ ( ¯Af cos IΩp) ˜mf + ¯As(cos IΩp −Ωoα3φs)˜ns = ¯AΩoφm˜εm . (31)
 This latter equation shows how the misaligned inner core and fluid core can lead to a modification
- of the mantle obliquity ˜ εm. Approximate analytical solutions of ˜ nsand ˜mfare given by
-˜ns≈Ωp
-κλs(
-1 +Ωo(Kicb−α1es)
-λf)(
-sinI+ cosI˜εm)
+ of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by
+˜ns ≈ Ωp
+κλs
+(
+1 + Ωo(Kicb −α1es)
+λf
+)(
+sin I+ cosI˜εm
+)
 −Ωoα3φs
-κλs˜εm, (32a)
-˜mf≈Ωp
-λf(
-sinI+ cosI˜εm)
-+Ωo
-λf¯As
-¯Af(
-Kicb−α1es)
-˜ns, (32b)
+κλs
+˜εm , (32a)
+˜mf ≈Ωp
+λf
+(
+sin I+ cosI˜εm
+)
++ Ωo
+λf
+¯As
+¯Af
+(
+Kicb −α1es
+)
+˜ns , (32b)
 where
-κ= 1−¯As
-¯AfΩ2
-o(
-Kicb−α1es)2
-λsλf, (33a)
-λf= ¯σf−ΩpcosI, (33b)
-λs= ¯σs−ΩpcosI, (33c)
+κ= 1 −
+¯As
+¯Af
+Ω2
+o
+(
+Kicb −α1es
+)2
+λs λf
+, (33a)
+λf = ¯σf −Ωp cos I, (33b)
+λs = ¯σs −Ωp cos I, (33c)
 –14–
 Confidential manuscript submitted to JGR-Planets
 and where we have introduced the frequencies
-¯σf= Ωo(
-ef+Kcmb+¯As
-¯AfKicb)
+¯σf = Ωo
+(
+ef + Kcmb +
+¯As
+¯Af
+Kicb
+)
 , (33d)
-¯σs= Ωo(
-esα3αg−esα1+α3φs+Kicb)
+¯σs = Ωo
+(
+esα3αg −esα1 + α3φs + Kicb
+)
 . (33e)
 These solutions are good approximations for all the results that we present in section 3. For
-an observed mantle obliquity ˜ εmand for a chosen set of interior model parameters, they provide
- useful predictions of ˜ nsand ˜mf.
-In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯ σs≫
-Ωpand ¯σf≫Ωp, so that ˜ns→0, ˜mf→0 and Equation (31) reverts back to Equation (25)
+an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they provide
+ useful predictions of ˜ns and ˜mf .
+In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯ σs ≫
+Ωp and ¯σf ≫Ωp, so that ˜ns →0, ˜mf →0 and Equation (31) reverts back to Equation (25)
 for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and
-mantle (i.e. for spherical internal boundaries, ef=es=γs= 0 and no viscous or EM coupling,Kcmb=Kicb=
- 0), then
-φs= 0, κ = 1, λ f=λs=−ΩpcosI, ˜mf= ˜ns=−(tanI+ ˜εm). (34)
-Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm=
-C−¯Af−¯As, we obtain
-CmΩp(
-sinI+ cosI˜εm)
-=¯AΩoφm˜εm. (35)
+mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM coupling,
+ Kcmb = Kicb = 0), then
+φs = 0 , κ = 1 , λ f = λs = −Ωp cos I, ˜mf = ˜ns = −(tan I+ ˜εm) . (34)
+Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm =
+C−¯Af −¯As, we obtain
+Cm Ωp
+(
+sin I+ cosI˜εm
+)
+= ¯AΩoφm˜εm . (35)
 which describes, as expected, a forced precession of the mantle alone. If this was the case for
-Mercury, taking Cm/C= 0.431, the obliquity should be ˜ εm≈0.88 arcmin, substantially smaller
-than the observed obliquity of ˜ εm≈2 arcmin.
-If ¯σf≈Ωp(and thusλf→0) and/or ¯σs≈Ωp(and thusλs→0) resonant amplification
- leads to large amplitudes for ˜ mf, ˜nsand the mantle obliquity ˜ εm. The frequencies ¯ σfand
-¯σsare closely related to the FCN and FICN frequencies ωfcnandωficn, respectively. Hence,
+Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈0.88 arcmin, substantially smaller
+than the observed obliquity of ˜εm ≈2 arcmin.
+If ¯σf ≈Ωp (and thus λf →0) and/or ¯σs ≈Ωp (and thus λs →0) resonant amplification
+ leads to large amplitudes for ˜mf , ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and
+¯σs are closely related to the FCN and FICN frequencies ωfcn and ωficn , respectively. Hence,
 just as a large mantle obliquity can result from resonant amplification when the forcing frequency
 approaches the free precession frequency, a large mantle obliquity can likewise result from resonant
  amplification when the forcing frequency approaches the FCN or FICN frequencies. These
 frequencies depend on the interior density structure and are not known. However, we will show
 that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of
 a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not expect
- an important amplification effect. Furthermore, since ωfcn,ωficn≫Ωp, then ¯σf≫Ωp
-and ¯σs≫Ωp, and we are in the strong coupling limit. The mantle obliquity should be close
-to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜ mfand
-˜nsshould be of the order of ˜ εmor smaller. This further justifies the assumption of small angles
+ an important amplification effect. Furthermore, since ωfcn ,ωficn ≫Ωp, then ¯σf ≫Ωp
+and ¯σs ≫Ωp, and we are in the strong coupling limit. The mantle obliquity should be close
+to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜ mf and
+˜ns should be of the order of ˜εm or smaller. This further justifies the assumption of small angles
  that we have adopted.
 3 Results
 3.1 Geodetic constraints and interior density structure
-All our interior models are constrained to match the mass Mof Mercury and specific choices
-ofˆC=C/MR2andCm/C. The choice of ˆCis determined from Equation (28). For the parameters
- listed in Table 1, and an observed obliquity of εm= 2.04 arcmin [ Margot et al. , 2012],
-this gives ˆC=C/MR2= 0.3455 and all our interior models are consistent with this choice.
+All our interior models are constrained to match the mass M of Mercury and specific choices
+of ˆC = C/MR2 and Cm/C. The choice of ˆC is determined from Equation (28). For the parameters
+ listed in Table 1, and an observed obliquity of εm = 2.04 arcmin [Margot et al. , 2012],
+this gives ˆC = C/MR2 = 0.3455 and all our interior models are consistent with this choice.
 Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are
 –15–
 Confidential manuscript submitted to JGR-Planets
 perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in estimating
- ˆCfrom Equation (28), or conversely in predicting εmbased on a given choice for ˆC.
-Part of the objective of our study is to estimate how large this error is. The ratio Cm/Cis obtained
+ ˆC from Equation (28), or conversely in predicting εm based on a given choice for ˆC.
+Part of the objective of our study is to estimate how large this error is. The ratio Cm/C is obtained
  from the amplitude of the 88-day longitudinal mantle libration φo, which is given by
-φo= 6·f(ec)C22MR2
-CC
-Cm1
-1 +ζ, (36)
+φo = 6 ·f(ec)C22
+MR2
+C
+C
+Cm
+1
+1 + ζ , (36)
 where
-f(ec) = 1−11e2
-c+959
-48e4
-c, (37)
-and whereζis a correction that takes into account the entrainment of the inner core in the libration
- [ Van Hoolst et al. , 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this correction
+f(ec) = 1 −11e2
+c + 959
+48 e4
+c , (37)
+and where ζ is a correction that takes into account the entrainment of the inner core in the libration
+ [Van Hoolst et al. , 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this correction
  is small and, to simplify, we neglect it here. Taking the observed libration amplitude
-to be 38.5 arcsec [ Margot et al. , 2012], ˆC=C/MR2= 0.3455 andC22andecfrom Table 1,
-this corresponds to a ratio Cm/C= 0.4269, or equivalently ˆCm=Cm/MR2= 0.1475.
-For all results presented in our study, the crustal density is set at ρc= 2974 kg m−3[Sori,
-2018]. Our standard choice for the crustal thickness is h= 26 km [ Sori, 2018], although in
+to be 38.5 arcsec [ Margot et al. , 2012], ˆC = C/MR2 = 0.3455 and C22 and ec from Table 1,
+this corresponds to a ratio Cm/C = 0.4269, or equivalently ˆCm = Cm/MR2 = 0.1475.
+For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3 [Sori,
+2018]. Our standard choice for the crustal thickness is h = 26 km [ Sori, 2018], although in
 section 3.2 we also present some results with other choices of h. We have considered two possible
  prescriptions connected to the density of the inner core. First, for all the results presented
-in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs= 8800 kg m−3approximately
+in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 approximately
  that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure
 Fe composition in face-centered cubic phase. This captures an end-member scenario where the
 core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively
@@ -749,80 +874,111 @@ our Mercury model with uniform density layers. To capture this other end-member
  scenario, in section 3.5 we present results where we instead prescribe a fixed density
 contrast between the fluid and solid core; specifically, we set the numerical value of α3.
 For a given choice of inner core radius rs, the densities of the mantle ( ρm) and fluid core
-(ρf) and the radius of the CMB ( rf) are determined such that the interior model matches M,
-ˆC= 0.3455 and ˆCm= 0.1475. Figure 3a shows how ρm,ρfandrfvary as a function of inner
- core radius rsfor each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When
+(ρf ) and the radius of the CMB ( rf ) are determined such that the interior model matches M,
+ˆC = 0.3455 and ˆCm = 0.1475. Figure 3a shows how ρm, ρf and rf vary as a function of inner
+ core radius rs for each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When
 the inner core is small, its presence has a limited influence on the resulting density structure,
-and we find ρm= 3197 kg m−3,ρf= 7263 kg m−3andrf= 2000 km in each of the two
-scenarios. When ρsis fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, rfincreases
- to above 2100 km, ρmapproaches 4000 kg m−3andρfis reduced to below 5000 kg m−3.
+and we find ρm = 3197 kg m −3, ρf = 7263 kg m −3 and rf = 2000 km in each of the two
+scenarios. When ρs is fixed to 8800 kg m −3, as the inner core reaches 1500 km in size, rf increases
+ to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m −3.
 Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core
-size, as otherwise ρmgets unreasonably large and ρfgets inappropriately small (as it would
+size, as otherwise ρm gets unreasonably large and ρf gets inappropriately small (as it would
 require an excessively large concentration of light elements). When adopting instead a fixed density
- contrast, with α3= 0.1, the changes in rf,ρmandρfwith inner core radius are more modest,
- allowing larger possible inner core sizes. Different assumptions on ρcandhwould alter the
+ contrast, with α3 = 0.1, the changes in rf , ρm and ρf with inner core radius are more modest,
+ allowing larger possible inner core sizes. Different assumptions on ρc and h would alter the
 numerical values shown on Figure 3a but not their trends with rs.
-Figure 3b shows how the FCN and FICN periods vary with rsfor each of the two inner
-core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb=Kicb=
+Figure 3b shows how the FCN and FICN periods vary with rs for each of the two inner
+core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb = Kicb =
 –16–
 Confidential manuscript submitted to JGR-Planets
-0200400600800100012001400period (yr)
+0
+200
+400
+600
+800
+1000
+1200
+1400period (yr)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)300040005000600070008000density (kg/m3)
+Inner core radius (km)
+3000
+4000
+5000
+6000
+7000
+8000density (kg/m3)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)200020202040206020802100
-Fluid core radius (km)fluid core density
+Inner core radius (km)
+2000
+2020
+2040
+2060
+2080
+2100
+Fluid core radius (km)
+fluid core density
 CMB radius
-FICNFCN int
-mantle densitya b
+FICN
+FCNint
+mantle density
+a b
 FCN
 Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand
 side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN
-period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to
-a scenario where the density of the inner core is set to 8800 kg m−3; thin dashed lines correspond to a
-scenario where the density contrast between the fluid and solid cores is set to α3= 0.1.
+period when the external torque is set to zero (FCN int) is shown in orange. Solid lines correspond to
+a scenario where the density of the inner core is set to 8800 kg m −3; thin dashed lines correspond to a
+scenario where the density contrast between the fluid and solid cores is set to α3 = 0.1.
 0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small inner
  core, increasing to approximately 600 yr at the largest rs. The FICN period is shorter, close
 to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the
-largestrsunder the fixed ρs(fixedα3) scenario. This confirms that the FCN and FICN periods
+largest rs under the fixed ρs (fixed α3) scenario. This confirms that the FCN and FICN periods
  are both much shorter than the forcing precession period of 325 kyr and sufficiently far away
-from it that we do not expect large ˜ mfand ˜nsfrom resonant amplification.
+from it that we do not expect large ˜mf and ˜ns from resonant amplification.
 The FCN and FICN periods that we have computed include the influence of the external
  torque. As shown by Baland et al. [2019], the external torque allow solid regions to have
 a free motion in inertial space thereby affecting the free rotational modes. To a good approximation,
- the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb=Kicb= 0
+ the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0
 are given by
-ωfcn≈−Ωo(¯A
-¯Am+¯As)(
-ef+φm)
-+ Ωoefφm
-(ef+φm), (38a)
-ωficn≈Ωo(¯A+¯As
-¯A−¯As)(
-esα1−esα3αg−α3φs)
+ωfcn ≈−Ωo
+( ¯A
+¯Am + ¯As
+)(
+ef + φm
+)
++ Ωo
+ef φm
+(ef + φm) , (38a)
+ωficn ≈Ωo
+(¯A+ ¯As
+¯A−¯As
+)(
+esα1 −esα3αg −α3φs
+)
 . (38b)
 The expression of the FICN frequency involves the inertial torque (term esα1) and the gravitational
  torque from the rest of Mercury ( esα3αg) and the Sun ( α3φs) acting on the inner core.
-For both of our inner core density scenarios (and our choices of ρs= 8800 kg m−3andα3=
-0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg≫α1;
+For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 =
+0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg ≫α1;
 the gravitational torque dominates the inertial torque, in large part because of the slow rotation
  rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion
 is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek , 2016; Stys and
-Dumberry , 2018], but it is different for Earth, where α1>α3αgbecause of its faster rotation
+Dumberry, 2018], but it is different for Earth, where α1 >α3αg because of its faster rotation
 and the FICN mode is prograde [ Mathews et al. , 1991]. Note also that our approximate expres–17–
 
 Confidential manuscript submitted to JGR-Planets
-sion for the FICN differs by a factor ( ¯A+¯As)/(¯A−¯As) compared to that given in Dumberry
+sion for the FICN differs by a factor ( ¯A+ ¯As)/( ¯A−¯As) compared to that given in Dumberry
 and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon.
 The expression for FCN frequency differs from the usual expression for Earth. First, it
-involves the external torque from the Sun captured by the parameter φm. If we setφm= 0,
+involves the external torque from the Sun captured by the parameter φm. If we set φm = 0,
 we obtain the FCN frequency for a decoupled model in which only interior torques contribute,
-ωfcn,int≈−Ωo(¯A
-¯Am+¯As)
-ef. (38c)
-This frequency is slightly different from the usual expression for Earth, involving the ratio ¯A/(¯Am+
+ωfcn,int ≈−Ωo
+( ¯A
+¯Am + ¯As
+)
+ef . (38c)
+This frequency is slightly different from the usual expression for Earth, involving the ratio ¯A/( ¯Am+
 ¯As) rather than ¯A/¯Am. This is because of the relatively thin mantle of Mercury; for the largest
-rsconsidered, the moment of inertia of the inner core can get close to 40% of that of the mantle
+rs considered, the moment of inertia of the inner core can get close to 40% of that of the mantle
  and is not negligible. The period of the FCN when only interior torques contribute is shown
 in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr
 at the largest rs. Hence, the influence of the solar torque reduces the FCN period by a factor
@@ -830,151 +986,191 @@ of approximately 3. We note that the FICN period, in contrast, is not altered su
 the external torque is set to zero.
 3.2 Gravitational and inertial coupling
 Let us now investigate the obliquities of the mantle, fluid core and inner core in their equilibrium
- Cassini state. We assume a fixed inner core density scenario in this section, with ρs=
+ Cassini state. We assume a fixed inner core density scenario in this section, with ρs =
 8800 kg m−3. Viscous and EM coupling are set to zero in order to isolate the influence of gravitational
- and inertial coupling. Figure 4 shows how ˜ εm, ˜mfand ˜nsvary as functions of inner
+ and inertial coupling. Figure 4 shows how ˜εm, ˜mf and ˜ns vary as functions of inner
 core radius. We show calculations for three different choices of crustal thickness, but let us concentrate
- first on the case for h= 26 km. For small rs, we retrieve an obliquity of ˜ εm= 2.0494
-arcmin (Figure 4a). ˜ εmdecreases with rs, but not substantially; at the largest rs(1500 km),
-˜εm= 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ εm= 2.04
-arcmin, the obliquity that we used in setting the constraint for ˆC– and hence the prediction
+ first on the case for h= 26 km. For small rs, we retrieve an obliquity of ˜εm = 2.0494
+arcmin (Figure 4a). ˜εm decreases with rs, but not substantially; at the largest rs (1500 km),
+˜εm = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ εm = 2.04
+arcmin, the obliquity that we used in setting the constraint for ˆC – and hence the prediction
 we should recover for a rigid planet – is an overestimate of approximately 0 .01 arcmin which
 occurs for small inner cores.
-The deviation of ˜ εmfrom that of a rigid planet is due to the misalignments of the fluid
-core ( ˜mf) and solid inner core (˜ ns) with respect to the mantle (Figure 4b). The misalignment
-of the fluid core spin axis from the mantle is significant: ˜ mfis approximately 4.02 arcmin for
+The deviation of ˜εm from that of a rigid planet is due to the misalignments of the fluid
+core ( ˜mf ) and solid inner core (˜ns) with respect to the mantle (Figure 4b). The misalignment
+of the fluid core spin axis from the mantle is significant: ˜ mf is approximately 4.02 arcmin for
 a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin
-at the largest rs. Recall that ˜ mfis measured with respect to the mantle rotation axis (which
+at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which
 coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with
-respect to the orbit normal is ˜ εm+ ˜mf≈6 arcmin. The reason why the obliquity of the spin
+respect to the orbit normal is ˜εm+ ˜mf ≈6 arcmin. The reason why the obliquity of the spin
 axis of the fluid core is larger than that of the mantle can be understood from Equation (32b),
-which shows that ˜ mfis determined by the resonant amplification of the FCN mode at the forcing
+which shows that ˜mf is determined by the resonant amplification of the FCN mode at the forcing
  frequency. When the FCN frequency is much larger than the forcing frequency, as is the
-case for Mercury, the resonant amplification is very weak but remains present and ˜ mfis larger
+case for Mercury, the resonant amplification is very weak but remains present and ˜mf is larger
 than zero.
-In contrast to ˜ mf, the misalignment of the inner core with respect to the mantle is much
-smaller; ˜nsis approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ εm.
+In contrast to ˜mf , the misalignment of the inner core with respect to the mantle is much
+smaller; ˜ns is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜εm.
 Physically, this is because the gravitational torque acting on the inner core when it is tilted from
 the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner
 core must remain in close alignment with the mantle. Presented differently, since the FICN period
  is more than 3000 times shorter than the forced precession period, the inner core can eas–18–
 
 Confidential manuscript submitted to JGR-Planets
-2.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin)
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)1.52.02.53.03.54.04.5Obliquity angle (arcmin)
+Inner core radius (km)
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)crustal thickness
+Inner core radius (km)
+crustal thickness
 16 km
-36 km26 kmcrustal thickness
+36 km
+26 km
+crustal thickness
 16 km
-36 km26 kmεm
+36 km
+26 km
+εm
 εg
- for a rigid planetεmmf
-ns(x100)a b
-Figure 4. a) Obliquity of the mantle (˜ εm, solid lines) and of the principal moment of inertia (˜ εg,
-dashed line) b) ˜ mf(solid lines) and ˜ ns(dashed lines, x100) as a function of inner core radius and for
+ for a rigid planetεm
+mf
+ns (x100)
+a b
+Figure 4. a) Obliquity of the mantle (˜εm, solid lines) and of the principal moment of inertia (˜εg,
+dashed line) b) ˜mf (solid lines) and ˜ns (dashed lines, x100) as a function of inner core radius and for
 different choices of crustal thickness.
-ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ nsdoes
+ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ ns does
 not change substantially as the inner core increases in size.
-WhenKicb=Kcmb= 0, a good approximation of ˜ εmis given by
-˜εm=C′ΩpsinI
-−C′ΩpcosI+¯AΩoφm, (39)
-which is identical to the prediction of Equation (26) for a rigid Mercury, except Cis replaced
-byC′. The latter represents an effective moment of inertia that accounts for the coupling of
+When Kicb = Kcmb = 0, a good approximation of ˜εm is given by
+˜εm = C′Ωp sin I
+−C′Ωp cos I+ ¯AΩoφm
+, (39)
+which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced
+by C′. The latter represents an effective moment of inertia that accounts for the coupling of
 the core to the mantle,
-C′=C+¯Acχ, (40)
-where ¯Ac=¯Af+¯Asand
-χ=ΩpcosI
-¯Ac(¯Af
-(¯σf−ΩpcosI)+¯As
-(¯σs−ΩpcosI))
-−¯As
-¯AcΩoα3φs
-(¯σs−ΩpcosI). (41)
-The frequencies ¯ σfand ¯σsare given in Equations (33d-33e) and closely approximate the FCN
-and FICN frequencies of Equations (38c) and (38b), respectively. The factor χcaptures then
+C′= C+ ¯Acχ, (40)
+where ¯Ac = ¯Af + ¯As and
+χ= Ωp cos I
+¯Ac
+( ¯Af
+(¯σf −Ωp cos I) +
+¯As
+(¯σs −Ωp cos I)
+)
+−
+¯As
+¯Ac
+Ωoα3φs
+(¯σs −Ωp cos I) . (41)
+The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN
+and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then
 how the core is entrained to precess with the mantle, with the coupling between the two expressed
  in terms of the resonant amplification of the FCN and FICN frequencies. In the limit
-of ¯σf,¯σs→0, thenχ=−1,C′=Cm, the core is fully decoupled from the mantle and we
-retrieve Equation (35). If instead ¯ σf,¯σs→∞ , thenχ= 0,C′=Cand we retrieve the prediction
+of ¯σf ,¯σs →0, then χ = −1, C′ = Cm, the core is fully decoupled from the mantle and we
+retrieve Equation (35). If instead ¯σf ,¯σs →∞, then χ = 0, C′= C and we retrieve the prediction
  for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω p,
-as is the case here, resonant amplification is weak, χis small and positive, C′> C and this
-leads to a slightly larger ˜ εmcompared to a rigid planet. Because the inner core core is gravitationally
+as is the case here, resonant amplification is weak, χ is small and positive, C′ > Cand this
+leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is gravitationally
  locked to the mantle, deviations from a rigid planet are dominantly caused by the
-misalignment of the fluid core. In Equation (41), ¯ σs≫¯σf, so to a good approximation
+misalignment of the fluid core. In Equation (41), ¯σs ≫¯σf , so to a good approximation
 –19–
 Confidential manuscript submitted to JGR-Planets
-χ≈¯Af
-¯AcΩocosI
-(¯σf−ΩpcosI). (42)
-For a small inner core, χ≈7.55×10−3. As the inner core grows, ¯Afdecreases, and the combination
- ¯Acχalso decreases. This implies that C′decreases with inner core size and, consequently,
-˜εmalso decreases with inner core size, as seen in Figure 4a, though it remains larger than the
+χ≈
+¯Af
+¯Ac
+Ωo cos I
+(¯σf −Ωp cos I) . (42)
+For a small inner core, χ≈7.55×10−3. As the inner core grows, ¯Af decreases, and the combination
+ ¯Acχ also decreases. This implies that C′decreases with inner core size and, consequently,
+˜εm also decreases with inner core size, as seen in Figure 4a, though it remains larger than the
 prediction for a rigid planet.
-The specific predictions of ˜ εm, ˜mfand ˜nson Figure 4 depend sensitively on the assumed
+The specific predictions of ˜εm, ˜mf and ˜ns on Figure 4 depend sensitively on the assumed
 interior density model and on the dynamical ellipticities of the inner core ( es) and fluid core
-(ef). Hence, it depends on the choices we have made for the inner core density ρs, the crustal
-densityρcand its thickness h. Changing ρs,ρcand/orhrequires a different combination of ρf,
-ρmandrfin order to match M,ˆCand ˆCm. In turn, this leads to different ellipticities at interior
- boundary in order to match J2andC22, and thus different predictions for ˜ εm, ˜mfand
+(ef ). Hence, it depends on the choices we have made for the inner core density ρs, the crustal
+density ρc and its thickness h. Changing ρs, ρc and/or h requires a different combination of ρf ,
+ρm and rf in order to match M, ˆC and ˆCm. In turn, this leads to different ellipticities at interior
+ boundary in order to match J2 and C22, and thus different predictions for ˜εm, ˜mf and
 ˜ns. To illustrate this, we show on Figure 4 two additional predictions computed with crustal
-thicknesses changed to h= 16 and 36 km. The change in ˜ εmremains modest,∼0.025%, but
-the changes in ˜ mfand ˜nsare more substantial, ∼5% and∼10%, respectively.
+thicknesses changed to h= 16 and 36 km. The change in ˜εm remains modest, ∼0.025%, but
+the changes in ˜mf and ˜ns are more substantial, ∼5% and ∼10%, respectively.
 We also show on Figure 4a (only for h= 26 km) the obliquity of the principal moment
-of inertia of the whole planet, which we denote by ˜ εg. A difference between ˜ εgand ˜εmoccurs
+of inertia of the whole planet, which we denote by ˜εg. A difference between ˜εg and ˜εm occurs
 if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core
-(with ˜nsassumed small) leads to an off-diagonal component of the moment of inertia tensor
-of (Cs−¯As)α3˜ns=¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that
-the moment of inertia of the whole planet is purely diagonal is ( ¯Asesα3˜ns)/(¯Ae), and hence a
-good approximation of ˜ εgis
-˜εg= ˜εm+¯Ases
-¯Aeα3˜ns. (43)
+(with ˜ns assumed small) leads to an off-diagonal component of the moment of inertia tensor
+of (Cs−¯As)α3˜ns = ¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that
+the moment of inertia of the whole planet is purely diagonal is ( ¯Asesα3˜ns)/( ¯Ae), and hence a
+good approximation of ˜εg is
+˜εg = ˜εm +
+¯Ases
+¯Ae α3˜ns . (43)
 Since the inner core is gravitationally forced into a close alignment with the mantle, the difference
- between ˜ εgand ˜εmremains very small. For the largest inner core radius that we have
-considered, ˜ εgdiffers from ˜ εmonly by approximately 0.001 arcmin.
+ between ˜εg and ˜εm remains very small. For the largest inner core radius that we have
+considered, ˜εg differs from ˜εm only by approximately 0.001 arcmin.
 3.3 Viscous coupling
 We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini
 state. Peale et al. [2014] present two different parameterizations of viscous coupling based on
 the timescale of attenuation of the differential rotation between the fluid core and mantle. More
 complete analytical solutions for the flow resulting from a differentially precessing shell have
-been derived [e.g. Stewartson and Roberts , 1963; Busse , 1968; Rochester , 1976] and we exploit
-these solutions here. The parametrization of the viscous coupling constants KcmbandKicbbased
+been derived [e.g. Stewartson and Roberts , 1963; Busse, 1968; Rochester, 1976] and we exploit
+these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based
 on them are given in Mathews and Guo [2005],
-Kcmb=πρfr4
+Kcmb =
+πρf r4
 f
-¯Af√ν
-2Ωo(
-0.195−1.976i)
+¯Af
+√ ν
+2Ωo
+(
+0.195 −1.976i
+)
 , (44a)
-Kicb=πρfr4
+Kicb = πρf r4
 s
-¯As√ν
-2Ωo(
-0.195−1.976i)
+¯As
+√ ν
+2Ωo
+(
+0.195 −1.976i
+)
 , (44b)
-whereνis the kinematic viscosity. The appropriate numerical value for νin planetary interior
+where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary interior
  is not well known but based on theoretical and experimental studies it is expected to be
-of the order of 10−6m2s−1[e.g. Gans , 1972; de Wijs et al. , 1998; Alf` e et al. , 2000; Rutter et al. ,
+of the order of 10 −6 m2 s−1 [e.g. Gans, 1972; de Wijs et al. , 1998; Alf` e et al., 2000; Rutter et al. ,
 2002a,b].
 –20–
 Confidential manuscript submitted to JGR-Planets
 The above parameterizations are valid only under the assumption that the flow in the boundary
  layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds
-numberRe=rf∆uf/ν, associated with the differential velocity ∆ uf=rfΩo˜mfat the CMB.
-Forrf= 2000 km, and taking ˜ mf= 4 arcmin≈0.001 rad from the results in the previous
-section, we get ∆ uf∼2 mm/s and Re∼6×109. Such a large Reynolds number indicates
+number Re= rf ∆uf /ν, associated with the differential velocity ∆ uf = rf Ωo ˜mf at the CMB.
+For rf = 2000 km, and taking ˜mf = 4 arcmin ≈0.001 rad from the results in the previous
+section, we get ∆ uf ∼2 mm/s and Re ∼6 ×109. Such a large Reynolds number indicates
 that the viscous friction between the fluid core and mantle should induce turbulent flows, as
-is the case for the Cassini state of the Moon [ Yoder , 1981; Williams et al. , 2001; C´ ebron et al. ,
+is the case for the Cassini state of the Moon [ Yoder, 1981; Williams et al. , 2001; C´ ebron et al.,
 2019]. For a boundary layer that involves turbulent flows, the viscous torque should be independent
  of the fluid viscosity and proportional to the square of the differential velocity. The
-coupling constant Kcmbshould be in the form
-Kcmb=fcmb⏐⏐˜mf⏐⏐(
-0.195−1.976i)
+coupling constant Kcmb should be in the form
+Kcmb = fcmb
+⏐⏐˜mf
+⏐⏐
+(
+0.195 −1.976i
+)
 , (45)
-wherefcmbis a numerical factor that depends among other things on surface roughness. Incorporating
+where fcmb is a numerical factor that depends among other things on surface roughness. Incorporating
  a viscous coupling of this form in our rotational model is more challenging not only
-becausefcmbis not known but also because the viscous torque is no longer linear in ˜ mf. One
+because fcmb is not known but also because the viscous torque is no longer linear in ˜ mf . One
 strategy is to find solutions through an iterative process. The simpler alternative strategy that
 we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν
 represents an effective turbulent viscosity.
@@ -982,69 +1178,91 @@ To give an estimate of an appropriate turbulent value for ν, we turn to the Cas
 of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained
 by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR)
 [Williams et al. , 2001, 2014; Williams and Boggs , 2015]. Viscous dissipation is reported in terms
-of a coupling parameter Kand a recent estimate is K/CL= (1.41±0.34)×10−8day−1[Williams
-and Boggs , 2015], where CLis the lunar polar moment of inertia. The connection between K
-andKcmbis
-⏐⏐⏐Im[Kcmb]⏐⏐⏐=K
-CLCL
-CfL1
-ΩL, (46)
-whereCfLis the moment of inertia of the lunar core and Ω L= 2.66×10−6s−1the lunar
-rotation rate. With CfL/CL∼7×10−4[e.g. Williams et al. , 2014], this gives |Im[Kcmb]|∼
+of a coupling parameter Kand a recent estimate is K/CL = (1.41±0.34)×10−8 day−1 [Williams
+and Boggs , 2015], where CL is the lunar polar moment of inertia. The connection between K
+and Kcmb is
+⏐⏐⏐Im[Kcmb]
+⏐⏐⏐= K
+CL
+CL
+CfL
+1
+ΩL
+, (46)
+where CfL is the moment of inertia of the lunar core and Ω L = 2 .66 ×10−6 s−1 the lunar
+rotation rate. With CfL /CL ∼7 ×10−4 [e.g. Williams et al. , 2014], this gives |Im[Kcmb]|∼
 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and assuming
- a lunar core radius of 400 km, the required turbulent viscosity is ν≈5×10−4m2
+ a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 ×10−4 m2
 s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the
-CMB of the Moon is closer to 3 cm/s [ Yoder , 1981; Williams et al. , 2001], more than 10 times
+CMB of the Moon is closer to 3 cm/s [ Yoder, 1981; Williams et al. , 2001], more than 10 times
 larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb
 is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mercury
- should be smaller. Thus, ν≈5×10−4m2s−1gives a conservative upper bound for the
+ should be smaller. Thus, ν ≈5×10−4 m2 s−1 gives a conservative upper bound for the
 possible effective turbulent viscosity that can be expected for Mercury.
-Figure 5 shows how ˜ εm, ˜mfand ˜nsvary as functions of inner core radius for different choices
-of effective viscosities. For ν= 10−5m2s−1, viscous coupling is too weak to affect ˜ εmand
-˜mfand they are essentially unchanged from the solutions shown in Figure 4. With increasing
+Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices
+of effective viscosities. For ν = 10 −5 m2 s−1, viscous coupling is too weak to affect ˜εm and
+˜mf and they are essentially unchanged from the solutions shown in Figure 4. With increasing
 ν, the stronger viscous coupling between the core and the mantle reduces their differential velocity,
- and ˜ mfis reduced. With the reduced differential velocity at the CMB, the prediction
-of ˜εmgets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
-viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ εm
-and ˜mfare qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
+ and ˜mf is reduced. With the reduced differential velocity at the CMB, the prediction
+of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
+viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜εm
+and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
 fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent viscosity
- that we have identified above (i.e ν≈5×10−4m2s−1), the influence of viscous cou–21–
+ that we have identified above (i.e ν ≈5 ×10−4 m2 s−1), the influence of viscous cou–21–
 
 Confidential manuscript submitted to JGR-Planets
-εmεg
+εm
+εg
 mf
 ns
-2.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin)
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)0.00.51.01.52.02.53.03.54.04.5Obliquity angle (arcmin)
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-1 0.0001 m2 s-1 0.0005 m2 s-1 0.001  m2 s-1
+Inner core radius (km)
+kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-10.0001 m2 s-10.0005 m2 s-10.001  m2 s-1
 a b
  for a rigid planetεm
-Figure 5. a) Obliquity of the mantle (˜ εm, solid lines) and gravity field (˜ εg, dashed lines) b) ˜ mf
-(solid lines) and ˜ ns(dashed lines) as a function of inner core radius and for different choices of kinematic
+Figure 5. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
+(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of kinematic
 viscosity (color in legend).
-pling on ˜εmremains modest, reducing its amplitude by a maximum of approximately 0.0015
+pling on ˜εm remains modest, reducing its amplitude by a maximum of approximately 0.0015
 arcmin.
 The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core
 tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the inner
  core with the fluid core spin axis. The viscous coupling strength is inversely proportional
-tors, so a larger viscosity results in a larger inner core radius at which viscous coupling is of
-a similar magnitude to gravitational coupling. Taking again an upper bound of ν= 5×10−4
-m2s−1, Figure 5 indicates that ˜ nsmay be 1 arcmin or larger only if the inner core radius is
+to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of
+a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4
+m2 s−1, Figure 5 indicates that ˜ns may be 1 arcmin or larger only if the inner core radius is
 smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravitational
  coupling is much larger than viscous coupling, and the inner core tilt is limited to a
 fraction of 1 arcmin.
 The larger inner core tilt observed with increasing effective viscosity results in a larger
-offset between the obliquity of the principal moment of inertia ˜ εgand that of the mantle ˜ εm,
-though it remains limited. For the upper bound of ν= 5×10−4m2s−1, and forrs= 1500
-km, the difference between ˜ εgand ˜εmis limited to 0.0013 arcmin.
+offset between the obliquity of the principal moment of inertia ˜εg and that of the mantle ˜εm,
+though it remains limited. For the upper bound of ν = 5 ×10−4 m2 s−1, and for rs = 1500
+km, the difference between ˜εg and ˜εm is limited to 0.0013 arcmin.
 The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller
 the misalignments of both the fluid core and inner core are with respect to the mantle. This
 implies that the larger the inner core is, the more we approach a planet precessing as a rigid
 body, although the misalignment of the spin axis of the fluid core remains important, approximately
  3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ εm, ˜mf
-and ˜nschange with inner core size would certainly be different for a turbulent model of viscous
+and ˜ns change with inner core size would certainly be different for a turbulent model of viscous
 coupling. But the general conclusion remains that the addition of viscous coupling at the CMB
 and ICB does not significantly modify the Cassini state equilibrium angle of the mantle.
 –22–
@@ -1057,64 +1275,85 @@ electrically conducting regions stretches existing magnetic field lines that thr
 This induces a secondary magnetic field (or equivalently, an electrical current) and an associated
  tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB
 acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength
-of the radial magnetic field Brand the electrical conductivity σon either side of the boundary
- [ Rochester , 1960, 1962, 1968].
-The parametrization of EM coupling in terms of the coupling constants KcmbandKicb
-has been developed in a few studies [e.g. Buffett , 1992; Buffett et al. , 2002; Dumberry and Koot ,
+of the radial magnetic field Br and the electrical conductivity σ on either side of the boundary
+ [Rochester, 1960, 1962, 1968].
+The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb
+has been developed in a few studies [e.g. Buffett, 1992; Buffett et al. , 2002; Dumberry and Koot ,
 2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given
-byBr=√
-3⟨
+by Br =
+√
+3
+⟨
 Bd
-r⟩
-cosθ, where⟨
+r
+⟩
+cos θ, where
+⟨
 Bd
-r⟩
+r
+⟩
 is the r.m.s. strength of the field, the coupling constant
-Kcmbcan be written is the form
-Kcmb= 3(1−i)Fcmb⟨
+Kcmb can be written is the form
+Kcmb = 3(1 −i)Fcmb
+⟨
 Bd
-r⟩2, (47)
+r
+⟩2
+, (47)
 where
-Fcmb=1
-Ωoρfrf(1
-σmδm+1
-σfδf)−1
+Fcmb = 1
+Ωoρf rf
+( 1
+σmδm
++ 1
+σf δf
+)−1
 , (48)
-and whereσm,δm=√
-2/(σmµΩo) andσf,δf=√
-2/(σfµΩo) are the electrical conductivities
+and where σm, δm =
+√
+2/(σmµΩo) and σf , δf =
+√
+2/(σf µΩo) are the electrical conductivities
  and magnetic skin depths in the mantle and fluid core, respectively, with µ= 4π×10−7
-N A−2the magnetic permeability of free space. The r.m.s. field strength⟨
+N A−2 the magnetic permeability of free space. The r.m.s. field strength
+⟨
 Bd
-r⟩
+r
+⟩
 is connected to
 the Gauss coefficient g0
-1of the surface magnetic field by
+1 of the surface magnetic field by
 ⟨
 Bd
-r⟩
-=2√
-3(R
-rf)3⏐⏐g0
-1⏐⏐. (49)
+r
+⟩
+= 2√
+3
+(R
+rf
+)3 ⏐⏐g0
+1
+⏐⏐. (49)
 We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity
 of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding
-to the CMB of Mercury is in the range of σm∼0.01−1 S m−1[Constable , 2015]. In contrast,
- the electrical conductivity of Fe in planetary cores is expected to be close σf∼106S
-m−1[Pozzo et al. , 2012; de Koker et al. , 2012]. This implies that ( σmδm)−1≫(σfδf)−1. Takingσm=
- 1 S m−1,⏐⏐g0
-1⏐⏐= 190 nT for Mercury’s dipole field [ Anderson et al. , 2012],rf=
-2000 km,ρf= 7000 kg m−3, this gives Kcmb≈(3.1×10−11)·(1−i). To put this amplitude
-in perspective, taking a molecular viscosity of ν= 10−6m2s−1in Equation (44a) gives a viscous
- coupling constant of Kcmb≈(6.0×10−7)·(0.195−1.976i). Hence, EM coupling at the
+to the CMB of Mercury is in the range of σm ∼ 0.01 −1 S m−1 [Constable, 2015]. In contrast,
+ the electrical conductivity of Fe in planetary cores is expected to be close σf ∼106 S
+m−1 [Pozzo et al. , 2012; de Koker et al. , 2012]. This implies that ( σmδm)−1 ≫(σf δf )−1. Taking
+ σm = 1 S m −1,
+⏐⏐g0
+1
+⏐⏐ = 190 nT for Mercury’s dipole field [ Anderson et al. , 2012], rf =
+2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈(3.1 ×10−11) ·(1 −i). To put this amplitude
+in perspective, taking a molecular viscosity of ν = 10−6 m2 s−1 in Equation (44a) gives a viscous
+ coupling constant of Kcmb ≈(6.0 ×10−7) ·(0.195 −1.976i). Hence, EM coupling at the
 CMB is much weaker than viscous coupling, even if we include other spherical harmonic components
  of the radial magnetic field.
 EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by
-CMB cavities [ Buffett , 2010; Glane and Buffett , 2018], in which case the effective σmcould be
-closer toσf. Likewise, σmcan be increased if a more electrically conducting layer has formed
+CMB cavities [Buffett, 2010; Glane and Buffett , 2018], in which case the effective σm could be
+closer to σf . Likewise, σm can be increased if a more electrically conducting layer has formed
 at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction
 of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al. , 2013]. However, even
-in the extreme case of σm=σf= 106S m−1,Kcmb≈(1.6×10−8)·(1−i), which remains
+in the extreme case of σm = σf = 106 S m−1, Kcmb ≈(1.6 ×10−8) ·(1 −i), which remains
 –23–
 Confidential manuscript submitted to JGR-Planets
 smaller by a factor ∼60 than the smallest possible viscous coupling constant. Viscous forces
@@ -1125,42 +1364,45 @@ coupling can be much larger and dominate viscous coupling. We assume that the ma
 morphology at the ICB is dominantly comprised of small spatial scales for example as predicted
 by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in
 terms of an equivalent uniform radial magnetic field ⟨Br⟩capturing its r.m.s. strength [ Buffett
- et al. , 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σequal in the
-fluid and solid core, the coupling constant Kicbcan be written in the form
-Kicb=5
-4(1−i)Ficb⟨Br⟩2, (50)
+ et al. , 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the
+fluid and solid core, the coupling constant Kicb can be written in the form
+Kicb = 5
+4(1 −i)Ficb ⟨Br⟩2 , (50)
 where
-Ficb=σδ
-Ωoρsrs, (51)
-and whereδ=√
-2/(σµΩo) is the magnetic skin depth. As Ficbis inversely proportional to
-rs,Kicbis inversely proportional to inner core size. Note that computing the EM coupling based
+Ficb = σδ
+Ωoρsrs
+, (51)
+and where δ =
+√
+2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to
+rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based
 on the r.m.s. strength ⟨Br⟩rather than a true field morphology tends to overestimate the strength
 of the coupling [ Koot and Dumberry , 2013]. However, since the strength of the radial magnetic
 field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are
 absorbed in the range of possible ⟨Br⟩values.
 The parametrization of Equation (50) is only valid in a ’weak field’ regime [ Buffett et al. ,
 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected.
-When⟨Br⟩is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
-field’ regime [ Buffett et al. , 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which
-Kicbincreases linearly with ⟨Br⟩instead of quadratically. A good approximation of Kicbcalculated
+When ⟨Br⟩is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
+field’ regime [Buffett et al. , 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which
+Kicb increases linearly with ⟨Br⟩instead of quadratically. A good approximation of Kicb calculated
  for Earth can be extracted from Figure 6a of Dumberry and Koot [2012],
 KE
-icb= (0.175−i0.138)⟨Br⟩, (52)
-where⟨Br⟩is in units of Tesla. The superscript Eemphasizes that the numerical factors are
+icb = (0.175 −i0.138) ⟨Br⟩, (52)
+where ⟨Br⟩is in units of Tesla. The superscript E emphasizes that the numerical factors are
 appropriate for the parameter values adopted for Earth in the computation of Dumberry and
 Koot [2012]. To adapt these numerical factors to Mercury, we write,
-Kicb= (0.175−i0.138)Ficb
+Kicb = (0.175 −i0.138)Ficb
 FE
-icb⟨Br⟩, (53)
-whereFE
-icbis defined as in Equation (51) but using the parameters for Earth as defined in Dumberry
- and Koot [2012]. These are Ω o= 7.292×10−5s−1,ρs= 12846 kg m−3,rs= 1221.5
-km,σ= 5×105S m−1, which givesFE
-icb= 90.36 T−2.
-To computeFicb, we assume an electrical conductivity of σ= 106S m−1in the core of
+icb
+⟨Br⟩, (53)
+where FE
+icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumberry
+ and Koot [2012]. These are Ω o = 7 .292 ×10−5 s−1, ρs = 12846 kg m −3, rs = 1221 .5
+km, σ= 5 ×105 S m−1, which gives FE
+icb = 90.36 T−2.
+To compute Ficb, we assume an electrical conductivity of σ= 106 S m−1 in the core of
 Mercury [e.g. de Koker et al. , 2012; Deng et al. , 2013]. The transition between the weak and
-strong field regime occurs when ⟨Br⟩ ≈ 1.53 mT for the real part of Kicb.⟨Br⟩at the ICB
+strong field regime occurs when ⟨Br⟩ ≈1.53 mT for the real part of Kicb. ⟨Br⟩at the ICB
 of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geometry
  inside the core could be dominated by small length scales, yet only the weaker lower harmonics
  of the field would penetrate through a thermally stratified layer in the upper region of
@@ -1168,82 +1410,106 @@ of Mercury is unknown. The dynamo model of Christensen [2006] showed that the fi
 Confidential manuscript submitted to JGR-Planets
 the fluid core and reach the surface. If so, the field strength inside the core can exceed the surface
  field strength by a factor 1000. Taking a surface field strength equal to ∼300 nT [e.g Anderson
- et al. , 2012],⟨Br⟩at the ICB could be as large as 0.3 mT, corresponding to approximately
+ et al. , 2012], ⟨Br⟩at the ICB could be as large as 0.3 mT, corresponding to approximately
  10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mercury’s
  field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of
 Mercury remains in the weak field regime.
-Figure 6 shows how ˜ εm, ˜mfand ˜nsvary as functions of inner core radius for different choices
-of⟨Br⟩. The larger⟨Br⟩is, the stronger is the EM coupling at the ICB, and the smaller is the
+Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices
+of ⟨Br⟩. The larger ⟨Br⟩is, the stronger is the EM coupling at the ICB, and the smaller is the
 differential rotation between the fluid core and inner core. The inner core and fluid core are virtually
  locked into a common precession motion when ⟨Br⟩>0.3 mT. Further increasing ⟨Br⟩
 above 1 mT does not change the solution as EM coupling already dominates all other torques
 on the inner core. This is the case even when EM coupling transitions into the strong field regime.
-EM coupling at the CMB is included in these calculations, with σm= 1 S m−1and⏐⏐g0
-1⏐⏐=
+EM coupling at the CMB is included in these calculations, with σm = 1 S m −1 and
+⏐⏐g0
+1
+⏐⏐ =
 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core
-we retrieved the solutions of ˜ εmand ˜mfshown in Figure 4.
-As the inner core radius is increased, both ˜ εmand ˜mfget smaller, as it was the case with
+we retrieved the solutions of ˜εm and ˜mf shown in Figure 4.
+As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with
 viscous coupling alone, although the addition of EM coupling lead to more substantial changes.
 The inner core needs to be larger than approximately 500 km for changes in the Cassini state
-equilibrium to be noticeable. It is important to point out that ˜ mfis reduced not because of
+equilibrium to be noticeable. It is important to point out that ˜ mf is reduced not because of
 EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which
 pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the
 inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the
-greater is the reduction in ˜ εmand ˜mf.
+greater is the reduction in ˜εm and ˜mf .
 When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are
-locked into a common precession motion, a good approximation of ˜ εmis given by the same prediction
- as Equations (39-40) involving the effective moment of inertia C′, exceptχis now given
+locked into a common precession motion, a good approximation of ˜εm is given by the same prediction
+ as Equations (39-40) involving the effective moment of inertia C′, except χ is now given
 by
-χ=¯AcΩpcosI−¯AsΩoα3φs
-¯AfΩo(ef+Kcmb) +¯AsΩoesα3αg−¯AcΩpcosI. (54)
-For a small inner core, ¯AcΩpcosI > ¯AsΩoα3φsandχis positive. Because ¯AsΩoα3φsincreases
-with inner core size, χgets smaller, and so do C′and ˜εm. The mantle obliquity drops from 2.049
+χ=
+¯AcΩp cos I−¯AsΩoα3φs
+¯Af Ωo(ef + Kcmb) + ¯AsΩoesα3αg −¯AcΩp cos I . (54)
+For a small inner core, ¯AcΩp cos I >¯AsΩoα3φs and χ is positive. Because ¯AsΩoα3φs increases
+with inner core size, χ gets smaller, and so do C′and ˜εm. The mantle obliquity drops from 2.049
 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015
-arcmin. For an inner core larger than ≈1000 km, ¯AcΩpcosI < ¯AsΩoα3φs, soχbecomes negative,C′becomes
- smaller than the moment of inertia of a rigid Mercury C, and ˜εmbecomes
+arcmin. For an inner core larger than ≈1000 km, ¯AcΩp cos I <¯AsΩoα3φs, so χ becomes negative,
+ C′becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes
 smaller than the prediction based on a rigid planet.
 The larger the inner core is, the smaller are the misalignments of the fluid and solid cores
 with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone
 is not altered with the addition of EM coupling but further strengthened; the larger the inner
 core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the
-obliquity of the gravity field ˜ εgwhich, for a large inner core, asymptotically approaches the obliquity
+obliquity of the gravity field ˜εg which, for a large inner core, asymptotically approaches the obliquity
  expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset between
- ˜εmand ˜εgcan be as large as 0.008 arcmin for a large inner core.
+ ˜εm and ˜εg can be as large as 0.008 arcmin for a large inner core.
 3.5 Fixed inner core density versus fixed ICB density contrast
 Coupling models when viscous and EM stresses are both present have been presented in
 Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results,
 –25–
 Confidential manuscript submitted to JGR-Planets
-2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin)
+2.032
+2.034
+2.036
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)0.00.51.01.52.02.53.03.54.04.5Obliquity angle (arcmin)
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)Br at ICB: 1 mT 0.01 mT 0.03 mT 0.1 mT 0.3  mT
+Inner core radius (km)
+Br at ICB: 1 mT 0.01 mT0.03 mT0.1 mT0.3  mT
 εm
 εg
 mf
-nsa b
+ns
+a b
  for a rigid planetεm
-Figure 6. a) Obliquity of the mantle (˜ εm, solid lines) and gravity field (˜ εg, dashed lines) b) ˜ mf
-(solid lines) and ˜ ns(dashed lines) as a function of inner core radius and for different choices of Br
+Figure 6. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
+(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of Br
 (colour in legend).
 for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by
 viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we consider
- a model where Kcmbis purely from viscous coupling and Kicbpurely from EM coupling.
-We choose an effective viscosity at the CMB of ν= 10−4m2s−1, which we believe to be a
+ a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling.
+We choose an effective viscosity at the CMB of ν = 10 −4 m2 s−1, which we believe to be a
 representative value given the comparison with the Moon (see section 3.3). We take a radial
 field strength at the ICB of ⟨Br⟩= 0.3 mT, approximately the field strength expected under
 the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representative’
- coupling model, although the uncertainty on νand⟨Br⟩obviously remains high.
-Figure 7 shows how ˜ εm, ˜mfand ˜nsvary with inner core radius for the ’representative’
+ coupling model, although the uncertainty on ν and ⟨Br⟩obviously remains high.
+Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’
 coupling model (black lines) under the fixed inner core density scenario that we have used in
 sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same representative
  coupling model, we adopt instead a fixed density contrast between the fluid and solid
-cores and for different choices of α3(coloured lines). For a relatively high density contrast ( α3=
+cores and for different choices of α3 (coloured lines). For a relatively high density contrast ( α3 =
 0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller
 α3, the point at which the orientation of the co-precessing fluid and inner cores begins to be
 pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the
-general behaviour of ˜ εm, ˜mfand ˜nsas functions of inner core radius is unchanged. Hence, all
+general behaviour of ˜εm, ˜mf and ˜ns as functions of inner core radius is unchanged. Hence, all
 our results in the previous three sections would be qualitatively similar under a fixed density
 contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core
 is required in order to produce an equivalent change in the Cassini state equilibrium.
@@ -1254,38 +1520,61 @@ model included the tangential viscous stress at the ICB and CMB, but not the EM
 Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as
 –26–
 Confidential manuscript submitted to JGR-Planets
-2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin)
+2.032
+2.034
+2.036
+2.038
+2.040
+2.042
+2.044
+2.046
+2.048
+2.050Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km)0.00.51.01.52.02.53.03.54.04.5Obliquity angle (arcmin)
+Inner core radius (km)
+0.0
+0.5
+1.0
+1.5
+2.0
+2.5
+3.0
+3.5
+4.0
+4.5Obliquity angle (arcmin)
 0 200 400 600 800 1000 1200 1400
-Inner core radius (km) for a rigid planetεma bα3: 0.20 0.01 0.05 0.10 0.15 ρs = 8800 kg m -3
+Inner core radius (km)
+ for a rigid planetεm
+a b
+α3: 0.20 0.010.05 0.100.15ρs = 8800 kg m-3
 mf
-nsεm
+ns
+εm
 εg
-Figure 7. a) Obliquity of the mantle (˜ εm, solid lines) and gravity field (˜ εg, dashed lines) b) ˜ mf
-(solid lines) and ˜ ns(dashed lines) as a function of inner core radius, for a fixed inner core density of
-8800 kg m−3(black lines) and for different choices of α3(coloured lines).
+Figure 7. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf
+(solid lines) and ˜ns (dashed lines) as a function of inner core radius, for a fixed inner core density of
+8800 kg m−3 (black lines) and for different choices of α3 (coloured lines).
 i′
-m,i′
-fandi′
+m, i′
+f and i′
 s; these represent the obliquities with respect to the orbital plane and are connected
 to our variables by: i′
-m= ˜εm,i′
-f= ˜εm+ ˜m+ ˜mf≈˜εm+ ˜mfandi′
-s= ˜εm+ ˜ns. To summarize
+m = ˜εm, i′
+f = ˜εm + ˜m+ ˜mf ≈˜εm + ˜mf and i′
+s = ˜εm + ˜ns. To summarize
 their results, i′
-fandi′
-svary substantially for different inner core sizes, are always of comparable
+f and i′
+s vary substantially for different inner core sizes, are always of comparable
  amplitude, and i′
-sis always larger than i′
-f. Furthermore, they find that as the inner core
+s is always larger than i′
+f . Furthermore, they find that as the inner core
 size is increased, the mantle obliquity i′
-mgets progressively larger and is displaced further away
+m gets progressively larger and is displaced further away
 from its expected orientation based of a rigid planet (see their Figure 6). The change in i′
-mthey
+m they
 obtain between a case with no inner core and an inner core radius equal to 0.6 times the planetary
  radius (≈1463 km, close to the maximum inner core size of 1500 km we have considered),
-is approximately an increase of 5 ×10−5rad = 0.17 arcmin. This also corresponds approximately
+is approximately an increase of 5 ×10−5 rad = 0.17 arcmin. This also corresponds approximately
  to the deviation of the obliquity with respect to that of a rigid planet.
 When only viscous stress is included in our model (section 3.3), our results are substantially
  different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core
@@ -1323,13 +1612,13 @@ amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than f
  alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016],
 and again, importantly, in the reverse direction.
 Our results suggest then that the presence and size of an inner core leads to only modest
- changes of the mantle obliquity εmcompared to the obliquity predicted on the basis of an
-entirely rigid planet ( εr
-m). Let us denote this difference as ∆ εm=εm−εr
+ changes of the mantle obliquity εm compared to the obliquity predicted on the basis of an
+entirely rigid planet (εr
+m). Let us denote this difference as ∆ εm = εm−εr
 m. The largest ∆ εm
-occurs for a small or no inner core, and is ∆ εm≈0.01 arcmin. This difference is decreased
+occurs for a small or no inner core, and is ∆ εm ≈ 0.01 arcmin. This difference is decreased
 as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM
-coupling and large density contrast at the ICB, ∆ εmcan be negative, but its absolute value
+coupling and large density contrast at the ICB, ∆ εm can be negative, but its absolute value
 remains smaller than 0.01 arcmin.
 To put these results in perspective, the uncertainty in the measurement of the mantle obliquity
  reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much
@@ -1341,11 +1630,11 @@ the inner core size.
 Nevertheless, our results show that the presence of a fluid core and inner core affect the
 resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change
 in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec ( ≈0.006
-arcmin) [ Baland et al. , 2017]. This is also of the same order as the amplitude of the nutation
+arcmin) [Baland et al. , 2017]. This is also of the same order as the amplitude of the nutation
 motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which
-is approximately 0.85 arcsec ( ≈0.014 arcmin) [ Baland et al. , 2017]. The precision on the obliquity
+is approximately 0.85 arcsec ( ≈0.014 arcmin) [Baland et al. , 2017]. The precision on the obliquity
  from the upcoming BepiColombo satellite mission is expected to be ≤0.5 arcsec (≤0.008
-arcmin) [ Cical` o et al. , 2016]. Thus, in addition to including tidal deformation and the precession
+arcmin) [Cical` o et al., 2016]. Thus, in addition to including tidal deformation and the precession
  of the pericenter, a Cassini state model that includes a fluid and solid core will then be
 necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens
 the possibility of further constraining the interior structure of Mercury on the basis of its obliquity.
@@ -1357,19 +1646,19 @@ two orientations do not coincide when an inner core is present and is misaligned
  Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we
 –28–
 Confidential manuscript submitted to JGR-Planets
-find that the misalignment ∆ εg=εg−εmis limited. The maximum offset that we obtain
-is approximately ∆ εg≈0.007 arcmin. This limited magnitude of offset is important in the
-light of the recent obliquity of the gravity field estimated in Genova et al. [2019],εg= 1.968±
+find that the misalignment ∆ εg = εg −εm is limited. The maximum offset that we obtain
+is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the
+light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968±
 0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the
-spin-symmetry axis of the mantle: εm= 2.04±0.08 arcmin [ Margot et al. , 2012] and εm=
-2.029±0.085 arcmin [ Stark et al. , 2015a], although all three measurements remain consistent
+spin-symmetry axis of the mantle: εm = 2.04 ±0.08 arcmin [Margot et al. , 2012] and εm =
+2.029±0.085 arcmin [Stark et al. , 2015a], although all three measurements remain consistent
 with one another within their error estimates. In their interpretation, Genova et al. [2019] suggest
- that the different central value of the obliquity that they obtain (smaller by ∼0.07 arcmin)
- is perhaps explained by an offset ∆ εgdue to the presence of a (possibly large) solid inner
+ that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 arcmin)
+ is perhaps explained by an offset ∆ εg due to the presence of a (possibly large) solid inner
  core. However, this is one order of magnitude larger than the maximum magnitude of ∆ εg
 that we predict. Moreover, we predict that the obliquity of the gravity field should be larger
 than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision
-of the measurements, εgandεmshould coincide, and their difference cannot be interpreted as
+of the measurements, εg and εm should coincide, and their difference cannot be interpreted as
 reflecting the misalignment between the polar moment of inertia of the whole planet and the
 mantle spin axis.
 Lastly, we have concentrated our efforts on the mutual orientations of the different spin
@@ -1412,186 +1701,186 @@ scripts and data files to reproduce all figures are freely accessible in Dumberr
 was supported by an NSERC/CRSNG Discovery Grant.
 References
 Alf` e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core
-conditions, Phys. Rev. ,B61, 132–142.
+conditions, Phys. Rev., B61, 132–142.
 Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin,
 S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global
-magnetic field of Mercury from MESSENGER orbital observations, Science ,333, 1859–
+magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859–
 1862.
 Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Purucker,
  J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Lowdegree
- structure in mercury’s planetary magnetic field, J. Geophys. Res. ,117, E00L12,
+ structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12,
 doi:10.1029/2012JE004159.
 Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mercury:
- Influence of the precession of the pericenter and of tides, Icarus ,291, 136–159.
+ Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159.
 Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin precession
  and polar motion of a synchronously rotating satellite: application to Titan,
-Celestial Mechanics and Dynamical Astronomy ,131(11), 1–50.
+Celestial Mechanics and Dynamical Astronomy , 131 (11), 1–50.
 Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the
-forced nutations of the Earth, J. Geophys. Res. ,97, 19,581–19,597.
+forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597.
 Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from
-observations of nutations, Earth Planet. Sci. Lett. ,296, 367–372.
+observations of nutations, Earth Planet. Sci. Lett. , 296, 367–372.
 Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession:
-effects of electromagnetic coupling, J. Geophys. Res. ,107, doi:10.1029/2001JB000056.
-Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. ,33,
+effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056.
+Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. , 33,
 739–751.
 Byrne, P. K., C. Klimczak, A. M. C. Seng¨ or, S. C. Solomon, T. R. Watters, and S. A.
 Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature
-Geosci. ,7, 301–307.
+Geosci., 7, 301–307.
 C´ ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells:
-flows, dissipation, dynamo and the lunar core, Geophys. J. Int. ,219(Supplement 1),
+flows, dissipation, dynamo and the lunar core, Geophys. J. Int. , 219 (Supplement 1),
 S34–S57, doi:10.1093/gji/ggz037.
-Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature ,
+Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature,
 444, 1056–1058.
 Cical` o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The
 BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software,
-Month. N. Roy. Astr. Soc. ,457, 1507–1521.
-Colombo, G. (1966), Cassini’s second and third laws, Astron. J. ,71, 891–896.
+Month. N. Roy. Astr. Soc. , 457, 1507–1521.
+Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896.
 Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second
-Edition , vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford.
+Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford.
 
 de Koker, N., G. Seinle-Neumann, and V. Vlˇ cek (2012), Electrical resistivity and thermal
 conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc.
-Nat. Acad. Sci. ,109, 4070–4073.
+Nat. Acad. Sci., 109, 4070–4073.
 –30–
 Confidential manuscript submitted to JGR-Planets
 de Wijs, G. A., G. Kresse, L. Voˇ cadlo, D. Dobson, D. Alf´ e, M. J. Gillan, and G. D. Price
-(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature ,
+(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature,
 392, 805–807.
-Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics ,
+Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics,
 vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford.
 Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary,
-Geophys. J. Int. ,167, 557–566.
+Geophys. J. Int. , 167, 557–566.
 Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical
-resistivity of iron and implications for planetary cores, Geophys. Res. Lett. ,40, 33–37,
+resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37,
 doi:10.1029/2012GL054347.
 Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid inner
  core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL
 Dataverse, V2.
 Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nutations,
- Geophys. J. Int. ,191, 530–544.
+ Geophys. J. Int. , 191, 530–544.
 Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization
-regime, Icarus ,248, 254–268.
+regime, Icarus, 248, 254–268.
 Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner
-core, J. Geophys. Res. Planets ,121, 1264–1292.
+core, J. Geophys. Res. Planets , 121, 1264–1292.
 Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mercury’s
- core density structure on its longitudinal librations, Icarus ,225, 62–74.
-Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res. ,77, 360–366.
+ core density structure on its longitudinal librations, Icarus, 225, 62–74.
+Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366.
 Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang,
 T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019),
-Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett. ,46,
+Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46,
 doi:10.1029/2018GL081135.
 Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at
-the top of the core, Frontiers in Earth Science ,6, 171, doi:10.3389/feart.2018.00171.
+the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171.
 Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global
-contraction of Mercury, Earth Planet. Sci. Lett. ,307, 135–146.
+contraction of Mercury, Earth Planet. Sci. Lett. , 307, 135–146.
 Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine,
 E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T.
-Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res. ,118,
+Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118,
 doi:10.1002/jgre.20091.
 Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H.
 Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon
 (2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys.
-Res.,117, E00L14, doi:10.1029/2012JE004217.
+Res., 117, E00L14, doi:10.1029/2012JE004217.
 Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orientation,
  love number, and ephemeris from the MESSENGER radiometric tracking data,
-Icarus ,335, 113,386.
+Icarus, 335, 113,386.
 Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the
-electromagnetic coupling for nutations, Geophys. J. Int. ,195, 200–210.
+electromagnetic coupling for nutations, Geophys. J. Int. , 195, 200–210.
 Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth
-Planet. Sci. Lett. ,193, 509–514.
+Planet. Sci. Lett. , 193, 509–514.
 Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longitude
- libration of Mercury reveals a molten core, Science ,316, 710–714.
+ libration of Mercury reveals a molten core, Science, 316, 710–714.
 Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens,
 M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s
 –31–
 Confidential manuscript submitted to JGR-Planets
-moment of inertia from spin and gravity data, J. Geophys. Res. ,117, E00L09,
+moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09,
 doi:10.1029/2012JE004161.
 Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s
 internal structure, in Mercury: The View after MESSENGER , edited by S. Solomon,
 L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi:
 10.1017/9781316650684.005.
 Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation
-theory, J. Geophys. Res. ,110(B02402), doi:10.1029/2003JB002915.
+theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915.
 Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of
-the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res. ,96, 8219–8242.
+the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242.
 Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and precession:
  New nutation series for nonrigid Earth and insights into the Earth’s interior, J.
-Geophys. Res. ,107, doi:10.1029/2004JB000390.
+Geophys. Res., 107, doi:10.1029/2004JB000390.
 Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber,
 D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of
 Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res.
-Planets ,119, 2417–2436.
+Planets, 119, 2417–2436.
 Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon
-and the phase lead of its Cassini state, Journal of Geophysical Research Planets ,125,
+and the phase lead of its Cassini state, Journal of Geophysical Research Planets , 125,
 e2020JE006386.
-Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J. ,74, 483–489.
-Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J. ,79, 722–744.
-Peale, S. J. (1976), Does Mercury have a molten core?, Nature ,262, 765–766.
-Peale, S. J. (2005), The free precession and libration of Mercury, Icarus ,178, 4–18.
+Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489.
+Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744.
+Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766.
+Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18.
 Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic invariance,
- Icarus ,181, 338–347.
+ Icarus, 181, 338–347.
 Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle
-and tidal torques on Mercury’s spin axis orientation, Icarus ,231, 206–220.
+and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220.
 Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a
-solid inner core on Mercury’s spin configuration, Icarus ,264, 443–455.
+solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455.
 Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of
-Mercury, Geophys. Res. Lett. ,42, 6951–6958.
-Poincar´ e, H. (1910), Sur la pr´ ecession des corps d´ eformables, Bull. Astron. Ser. 1 ,27,
+Mercury, Geophys. Res. Lett., 42, 6951–6958.
+Poincar´ e, H. (1910), Sur la pr´ ecession des corps d´ eformables,Bull. Astron. Ser. 1 , 27,
 321–356.
 Pozzo, M., C. Davies, D. Gubbins, and D. Alf´ e (2012), Thermal and electrical conductivity
-of iron at Earth’s core conditions, Nature ,485, 355–358.
+of iron at Earth’s core conditions, Nature, 485, 355–358.
 Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s
-rotation, Phil. Trans. R. Soc. Lond., A ,252, 531–555.
-Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res. ,67, 4833–
+rotation, Phil. Trans. R. Soc. Lond., A , 252, 531–555.
+Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833–
 4836.
 Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic coremantle
- coupling, J. Geomag. Geoelectr. ,20, 387–402.
+ coupling, J. Geomag. Geoelectr., 20, 387–402.
 Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle
-coupling, Geophys. J. R. Astron. Soc. ,46, 109–126.
+coupling, Geophys. J. R. Astron. Soc. , 46, 109–126.
 Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), Towards
  evaluating the viscosity of the Earth’s outer core: an experimental high pressure
-study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett. ,29, 080,000–1.
+study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1.
 Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang
-(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B ,66, 060,102,
+(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B , 66, 060,102,
 –32–
 Confidential manuscript submitted to JGR-Planets
 doi:10.1029/2001GL014392.
 Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metalsilicate
  partitioning and its role in core formation and composition on Super-Earths,
-Astrophys. J. ,835, 234.
-Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett. ,489, 92–99.
+Astrophys. J., 835, 234.
+Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett. , 489, 92–99.
 Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann,
 S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observations
- of Mercury’s librations, Geophys. Res. Lett. ,42, 7881–7889.
+ of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889.
 Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular
-orbital elements, Celest. Mech. Dyn. Astr. ,123, 263–277.
+orbital elements, Celest. Mech. Dyn. Astr. , 123, 263–277.
 Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity
-of a precessing rigid body, J. Fluid Mech. ,17, 1–20.
+of a precessing rigid body, J. Fluid Mech. , 17, 1–20.
 Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys.
-Res. Planets ,123, 1–25, doi:10.1029/2018JE005607.
-Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics ,
+Res. Planets, 123, 1–25, doi:10.1029/2018JE005607.
+Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics,
 vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford.
 Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides
-and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett. ,333–334 ,
+and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett. , 333–334,
 83–90.
 Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSENGER
- radio science data, J. Geophys. Res. Planets ,121, 1627–1640.
+ radio science data, J. Geophys. Res. Planets , 121, 1627–1640.
 Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping
-Tools: Improved version released, EOS Trans. AGU ,94, 409–410.
+Tools: Improved version released, EOS Trans. AGU, 94, 409–410.
 Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of
-dissipation, J. Geophys. Res. Planets ,120(4), 689–724, doi:10.1002/2014JE004755.
+dissipation, J. Geophys. Res. Planets , 120 (4), 689–724, doi:10.1002/2014JE004755.
 Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar
-rotational dissipation in solid body and molten core, J. Geophys. Res. ,106, 27,933–
+rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933–
 27,968.
 Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine,
 S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A.
 Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek,
 J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J.
 Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J.
-Geophys. Res. Planets ,119(7), 1546–1578, doi:10.1002/2013JE004559.
+Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559.
 Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond.
-A,303, 327–338.
-Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus ,181,
+A, 303, 327–338.
+Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181,
 327–337.
 –33–
\ No newline at end of file
diff --git a/read/results/pypdf/2201.00069.txt b/read/results/pypdf/2201.00069.txt
index aa3d4ce..7194575 100644
Binary files a/read/results/pypdf/2201.00069.txt and b/read/results/pypdf/2201.00069.txt differ
diff --git a/read/results/pypdf/2201.00151.txt b/read/results/pypdf/2201.00151.txt
index 9dc2432..f264cdd 100644
--- a/read/results/pypdf/2201.00151.txt
+++ b/read/results/pypdf/2201.00151.txt
@@ -1,28 +1,29 @@
-arXiv:2201.00151v1  [astro-ph.GA]  1 Jan 2022Astronomy&Astrophysics manuscript no. Populations4 ©ESO 2022
+arXiv:2201.00151v1  [astro-ph.GA]  1 Jan 2022
+Astronomy &Astrophysics manuscript no. Populations4 ©ESO 2022
 January 4, 2022
 Multiple stellar populations in Schwarzschild modeling
 and the application to the Fornax dwarf
 Klaudia Kowalczyk and Ewa L. Łokas
-Nicolaus Copernicus Astronomical Center, Polish Academy o f Sciences, Bartycka 18, 00-716 Warsaw, Poland
-e-mail:klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl
+Nicolaus Copernicus Astronomical Center, Polish Academy o f Sciences, Bartycka 18, 00-716 W arsaw , Poland
+e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl
 January 4, 2022
 ABSTRACT
 Dwarf spheroidal (dSph) galaxies are believed to be strongl y dark matter dominated and thus are considered perfect obje cts to study
-dark matter distribution and test theories of structure for mation. They possess resolved, multiple stellar populatio ns that offer new
+dark matter distribution and test theories of structure for mation. They possess resolved, multiple stellar populatio ns that o ffer new
 possibilities for modeling. A promising tool for the dynami cal modeling of these objects is the Schwarzschild orbit sup erposition
 method. In this work we extend our previous implementation o f the scheme to include more than one population of stars and a more
-general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galax y formed in
-the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations
+general form of the mass-to-light ratio function. W e tested the improved approach on a nearly spherical, gas-free galax y formed in
+the cosmological context from the Illustris simulation. W e modeled the binned velocity moments for stars split into two populations
 by metallicity and demonstrate that in spite of larger sampl ing errors the increased number of constraints leads to sign ificantly tighter
-confidence regions on the recovered density and velocity ani sotropy profiles. We then applied the method to the Fornax dSp h galaxy
+confidence regions on the recovered density and velocity ani sotropy profiles. W e then applied the method to the Fornax dSp h galaxy
 with stars similarly divided into two populations. In compa rison with our earlier work, we find the anisotropy parameter to be slightly
-increasing, rather than decreasing, with radius and more st rongly constrained. We are also able to infer anisotropy for each stellar
+increasing, rather than decreasing, with radius and more st rongly constrained. W e are also able to infer anisotropy for each stellar
 population separately and find them to be significantly di fferent.
 Key words. galaxies: kinematics and dynamics – galaxies: structure – g alaxies: fundamental parameters – galaxies: dwarf – galaxi es:
 star clusters: individual: Fornax
 1. Introduction
 Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo
-1998; Tolstoy et al. 2009) are considered to be a perfect tool to
+1998; T olstoy et al. 2009) are considered to be a perfect tool to
 test our current theories of structure formation involving dark
 matter in the context of near-field cosmology. The objects ar e
 believed to be strongly dark matter dominated with mass-to- light
@@ -48,17 +49,18 @@ higher order line-of-sight velocity moments, such as the ku rtosis,
  and use the corresponding Jeans equations. Since the ku rtosis
  is more sensitive to the velocity anisotropy than to the m ass
 distribution, useful constraints can be obtained on both. S till, the
-method requires large kinematic samples to estimate the vel ocitymoments reliably and some assumption on the functional form
+method requires large kinematic samples to estimate the vel ocity
+moments reliably and some assumption on the functional form
 of the anisotropy (Łokas 2002; Łokas et al. 2005).
 The Schwarzschild modeling technique (Schwarzschild
-1979) offers a different approach to estimate the properties of
+1979) o ffers a di fferent approach to estimate the properties of
 dSph galaxies without prior assumptions on the type of orbit s.
 It relies on building a galaxy model out of a set of best-fittin g
 orbits probed in the range of energy and angular momenta. In
 this method, the anisotropy of the stellar orbits comes out a s a
 result of the modeling in the same way as the density profile. A lthough
  it has been originally developed for large elliptica l galaxies
- (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt e t al.
+ (van der Marel et al. 1998; V alluri et al. 2004; Gebhardt e t al.
 2015), it has recently been adopted for use on discrete data
 characteristic of dSph galaxies and applied to a number of
 dwarfs, including Carina, Draco, Fornax, Sculptor, and Sex tans
@@ -76,23 +78,23 @@ the modeling. This approach was first used by Battaglia et al.
 (2008) to model the mass distribution in the Sculptor dSph
 galaxy. A few attempts have also been made to constrain the
 inner slope of the dark matter profile in dSph galaxies using
-this technique (Walker & Peñarrubia 2011; Amorisco & Evans
+this technique (W alker & Peñarrubia 2011; Amorisco & Evans
 2012; Hayashi et al. 2018) in order to resolve the so-called c uspcore
  problem. It has been shown to be di fficult, however, due
 Article number, page 1 of 12
 A&A proofs: manuscript no. Populations4
-Table 1. Properties of the Illustris galaxy used to create mock data.
-Property Value
+T able 1.Properties of the Illustris galaxy used to create mock data.
+Property V alue
 Subhalo ID 16960
 Number of stellar particles ( N⋆) 70446
-Number of dark matter particles ( NDM) 78448
-Stellar mass ( M⋆) 5 .74×1010M⊙
-Dark matter mass ( MDM) 4 .91×1011M⊙
+Number of dark matter particles ( NDM ) 78448
+Stellar mass ( M⋆) 5 .74 ×1010 M⊙
+Dark matter mass ( MDM ) 4 .91 ×1011 M⊙
 Mean mass of stellar particles 815808 M ⊙
 Stellar half-mass radius 9 .99 kpc
-Stellar half-number radius ( r1/2) 9.6 kpc
-Axis ratio c/awithin r1/2 0.907
-Axis ratio b/awithin r1/2 0.949
+Stellar half-number radius ( r1/2 ) 9.6 kpc
+Axis ratio c/a within r1/2 0.907
+Axis ratio b/a within r1/2 0.949
 Triaxiality 0.56
 to the nonsphericity of the dwarfs that introduces biases in such
 measurements (Kowalczyk et al. 2013; Genina et al. 2018).
@@ -100,7 +102,7 @@ In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we
 developed the Schwarzschild technique in the form applicab le to
 binned velocity moments of a single tracer and verified its ab ility
  to reproduce the mass distribution and velocity anisotr opy of
-simulated galaxies. We have also studied biases resulting f rom
+simulated galaxies. W e have also studied biases resulting f rom
 the nonsphericity of the modeled objects. Later, we applied the
 method to model the kinematics of the Fornax dSph galaxy esti mating
  its mass and anisotropy profiles with unprecedented p recision.
@@ -108,14 +110,14 @@ method to model the kinematics of the Fornax dSph galaxy esti mating
 In this paper we extend our Schwarzschild modeling technique
  to include multiple stellar populations with the aim t o
 constrain the properties of dSph galaxies even more strongl y.
-We test our approach on a realistic simulated galaxy formed i n
+W e test our approach on a realistic simulated galaxy formed i n
 the cosmological context, originating from the Illustris p roject
 (V ogelsberger et al. 2014a). Although no precise analogues of
 dSph galaxies are available in this simulation because of th e resolution,
  we use a more massive galaxy but with properties oth erwise
  similar to dSphs. The reliability of the modeling doe s not
 depend on the particular value of the mass so we believe these
-tests to be viable. We do not attempt to constrain the inner da rk
+tests to be viable. W e do not attempt to constrain the inner da rk
 matter density profile (which is poorly resolved anyway) but try
 to put tighter limits on the estimates of the mass and anisotr opy
 profiles. Finally, we apply the improved method to the availa ble
@@ -128,7 +130,7 @@ Section 3 contains an overview of our modeling method, the ap plication
  of the method to all stars and to two populations, a nd
 a comparison of the results obtained with these two approach es.
 The results of the application of the method to the Fornax dSp h
-galaxy are presented in Section 4. We discuss our findings and
+galaxy are presented in Section 4. W e discuss our findings and
 summarize the paper in Section 5.
 2. Mock data
 2.1. Selection of the simulated galaxy
@@ -139,19 +141,34 @@ data, we decided to use a galaxy from the Illustris project
  follows the formation and evolution of galaxies fro m the
 early Universe to the present by solving gravity and hydrody namics,
  as well as modeling of star formation, galactic wind s,
-SFR [M⊙ yr-1]
-t [Gyr] 0 4 8 12 16
- 0 2 4 6 8 10 12
+SFR [M ⊙ yr-1]
+t [Gyr]
+ 0
+ 4
+ 8
+ 12
+ 16
+ 0  2  4  6  8  10  12
 Fig. 1. Star formation rate as a function of the age of the Universe in
 the simulated galaxy from the Illustris project used to crea te mock data.
 The black and gray vertical arrows indicate the last mergers which the
-galaxy underwent, wet and dry, respectively.
+galaxy underwent, wet and dry , respectively .
 t [Gyr]
-Z [Z⊙] 0 2 4 6 8 10
- 0 1 2 3 4 5 0 2 4 6
+Z [Z⊙]
+ 0
+ 2
+ 4
+ 6
+ 8
+ 10
+ 0  1  2  3  4  5
+ 0
+ 2
+ 4
+ 6
 N [102]
 Fig. 2. Number of stars as a function of their metallicity and time of
-formation (the age of the Universe) in the simulated galaxy. The vertical
+formation (the age of the Universe) in the simulated galaxy . The vertical
 line indicates the applied split into stellar populations.
 magnetic fields, and the feedback from black holes. Although
 dwarf galaxies that are of our interest here are not resolved in the
@@ -164,9 +181,9 @@ nearly spherical shape. The last condition was adopted in an attempt
  to avoid any strong bias introduced by the spherical mo deling
  of a nonspherical object. Moreover, we required the ga laxy
 to possess a significant number of both stellar and dark matter
- particles (over 105), and a well resolved center. Due to the
+ particles (over 10 5 ), and a well resolved center. Due to the
 large softening scale for dark matter particles in the simul ation
-(ǫDM=1.42 kpc), we looked for an object in which even the
+(ǫDM = 1.42 kpc), we looked for an object in which even the
 more concentrated stellar population (see Section 2.2) ext ended
 over 43 kpc so that the region a ffected by the numerical artifacts
 was enclosed within 2-3 innermost data bins (we used 20 linea rly
@@ -174,69 +191,176 @@ spaced spatial bins, see Section 3.1).
 Out of 27345 galaxies listed in the catalog of stellar circularities,
  angular momenta, and axis ratios published by the Illustris
  team (Genel et al. 2015) containing subhalos with the st ellar
-mass larger than 109M⊙, only a few met our restrictive requireArticle
+mass larger than 10 9 M⊙, only a few met our restrictive requireArticle
  number, page 2 of 1
 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling
--80-4004080POPULATION I[kpc]majorPOPULATION I
-intermediatePOPULATION I
+-80
+-40
+0
+40
+80
+POPULATION I
+[kpc]
+major
+POPULATION I
+intermediate
+POPULATION I
 minor
- 5.3 5.9 6.5 7.1 7.7
+ 5.3
+ 5.9
+ 6.5
+ 7.1
+ 7.7
 log(Σ) [M⊙/kpc2]
--80-40040POPULATION II[kpc]POPULATION IIPOPULATION II
--160-80 0 80 160
+-80
+-40
+0
+40
+POPULATION II
+[kpc]
+POPULATION IIPOPULATION II
+-160
+-80
+ 0
+ 80
+ 160
 V [km/s]
--80-40040
--80-40040POPULATION II[kpc]
-[kpc]-80-40040POPULATION II
-[kpc]-80-4004080POPULATION II
-[kpc] 0 30 60 90
-σ [km/s]-80-4004080POPULATION II[kpc]majorPOPULATION II
-intermediatePOPULATION II
+-80
+-40
+0
+40
+-80 -40 0 40
+POPULATION II
+[kpc]
+[kpc]
+-80 -40 0 40
+POPULATION II
+[kpc]
+-80 -40 0 40 80
+POPULATION II
+[kpc]
+ 0
+ 30
+ 60
+ 90
+σ [km/s]
+-80
+-40
+0
+40
+80
+POPULATION II
+[kpc]
+major
+POPULATION II
+intermediate
+POPULATION II
 minor
- 5.3 5.9 6.5 7.1 7.7
+ 5.3
+ 5.9
+ 6.5
+ 7.1
+ 7.7
 log(Σ) [M⊙/kpc2]
--80-40040POPULATION II[kpc]POPULATION IIPOPULATION II
--160-80 0 80 160
+-80
+-40
+0
+40
+POPULATION II
+[kpc]
+POPULATION IIPOPULATION II
+-160
+-80
+ 0
+ 80
+ 160
 V [km/s]
--80-40040
--80-40040POPULATION II[kpc]
-[kpc]-80-40040POPULATION II
-[kpc]-80-4004080POPULATION II
-[kpc] 0 30 60 90
+-80
+-40
+0
+40
+-80 -40 0 40
+POPULATION II
+[kpc]
+[kpc]
+-80 -40 0 40
+POPULATION II
+[kpc]
+-80 -40 0 40 80
+POPULATION II
+[kpc]
+ 0
+ 30
+ 60
+ 90
 σ [km/s]
-Fig. 3. Maps of the projected stellar density, mean stellar velocit y, and stellar velocity dispersion (in rows) for two stellar populations: the metalrich
+Fig. 3. Maps of the projected stellar density , mean stellar velocit y , and stellar velocity dispersion (in rows) for two stellar populations: the metalrich
  population I (left-hand side panels) and the metal-poo r population II (right-hand side), and observations along t he principal axes determined
 for all stars (in columns, along the major, the intermediate , and the minor axis, respectively).
--1-0.5 0 0.5 1
- 1  10  100β(r)
+-1
+-0.5
+ 0
+ 0.5
+ 1
+ 1  10  100
+β(r)
+r [kpc]
+-1
+-0.5
+ 0
+ 0.5
+ 1
+ 0  10  20  30  40  50
+β(r)
 r [kpc]
--1-0.5 0 0.5 1
- 0 10 20 30 40 50β(r)
-r [kpc]all stars
+all stars
 pop I
-pop II 40 60 80 100 120
- 1  10  100σr(r)
+pop II
+ 40
+ 60
+ 80
+ 100
+ 120
+ 1  10  100
+σr(r)
+r [kpc]
+ 40
+ 60
+ 80
+ 100
+ 120
+ 0  10  20  30  40  50
+σr(r)
 r [kpc]
- 40 60 80 100 120
- 0 10 20 30 40 50σr(r)
-r [kpc] 40 60 80 100 120
- 1  10  100σt(r)
+ 40
+ 60
+ 80
+ 100
+ 120
+ 1  10  100
+σt(r)
 r [kpc]
- 40 60 80 100 120
- 0 10 20 30 40 50σt(r)
+ 40
+ 60
+ 80
+ 100
+ 120
+ 0  10  20  30  40  50
+σt(r)
 r [kpc]
 Fig. 4. Profiles of the velocity anisotropy parameter, radial veloc ity dispersion, and tangential velocity dispersion (in con secutive columns) calculated
  from all stars (in red), including only population I (i n orange), and only population II (in blue). The upper row sho ws the profiles using the
 logarithmic distance scale and reaching the outskirts of th e galaxy whereas the bottom row presents in the linear scale o nly the radial range used
 in the modeling.
-ments. We decided to use a galaxy labeled as subhalo 16960.
-All the relevant properties of the galaxy are given in Table 1 ,
+ments. W e decided to use a galaxy labeled as subhalo 16960.
+All the relevant properties of the galaxy are given in T able 1 ,
 including numbers of particles and total masses for both com ponents,
  and details on the shape of the stellar component: the axis
 ratios minor to major (shortest to longest) c/a, intermediate to
-major b/a, and the triaxiality parameter T=(a2−b2)/(a2−c2).
-We distinguish between the half-mass radius provided in the Illustris
- database and the half-number radius r1/2, which we usefor further calculations in this paper. The di fference between the
+major b/a, and the triaxiality parameter T = (a2 −b2 )/(a2 −c2 ).
+W e distinguish between the half-mass radius provided in the Illustris
+ database and the half-number radius r1/2 , which we use
+for further calculations in this paper. The di fference between the
 two comes from a small gradient in the stellar mass-to-light ratio
 with the distance from the galactic center. Since in our appr oach
 we treat stars as equal-mass particles and refer to number de nsities
@@ -245,19 +369,26 @@ needed), the application of the half-number radius is more s elfconsistent.
 
 Article number, page 3 of 12
 A&A proofs: manuscript no. Populations4
-10-310-1101103
- 10  100n⋆(R) [kpc-2]
-R [kpc]major
+10-3
+10-1
+101
+103
  10  100
-R [kpc]intermediate
+n⋆(R) [kpc-2]
+R [kpc]
+major
  10  100
-R [kpc]minor
+R [kpc]
+intermediate
+ 10  100
+R [kpc]
+minor
 all stars
 pop I
 pop II
 Fig. 5. Surface number density profiles of the stellar data samples f or the simulated galaxy observed along di fferent lines of sight (from the left to
-the right). Different lines show profiles for all available stars (in red), th e metal-rich population I (in orange), and the metal-poor po pulation II (in
-blue). Thin vertical lines indicate r0(see text) and the outer boundary of the spectroscopic data.
+the right). Di fferent lines show profiles for all available stars (in red), th e metal-rich population I (in orange), and the metal-poor po pulation II (in
+blue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data.
 2.2. Splitting the stars into populations
 Our chosen galaxy shows a complex formation history undergoing
  multiple mergers which result in extended star format ion
@@ -265,7 +396,7 @@ with a few star formation bursts. The last wet merger, that is a
 merger with an object containing gas, happens at 6.9 Gyr from
 the beginning of the simulation, whereas the last dry merger (no
 gas transfer) at 12.1 Gyr, giving the galaxy enough time to re gain
-dynamical equilibrium. We present the star formation rate ( SFR)
+dynamical equilibrium. W e present the star formation rate ( SFR)
 as a function of time (the age of the Universe) in Fig. 1, where
 these last mergers are indicated with black and gray vertica l arrows.
  In Fig. 2 we show the distribution of stars as a function of
@@ -276,7 +407,7 @@ split is indicated in Fig. 2 with the vertical line. With sati sfying
 accuracy it separates the stars born before and after 4 Gyr si nce
 the start of the simulation, which corresponds to the format ion
 time before and after the end of the second major star burst, a s
-shown in Fig. 1. We refer to the metal-rich stars as populatio n I
+shown in Fig. 1. W e refer to the metal-rich stars as populatio n I
 and to the metal-poor as population II, following the common ly
 used nomenclature in astronomy.
 In Fig. 3 we present maps of the projected stellar mass density,
@@ -284,26 +415,27 @@ In Fig. 3 we present maps of the projected stellar mass density,
 for both populations obtained by projecting the galaxy alon g its
 principal axes. The orientation was determined from the ine rtia
  tensor calculated from all stars within the half-number radius
-r1/2and therefore is the same in both panels. The two populations
- differ significantly in the spatial distribution and kinematics
+r1/2 and therefore is the same in both panels. The two populations
+ di ffer significantly in the spatial distribution and kinematics
  with the metal-rich (considered to be younger) populati on I
 being more concentrated but having lower central velocity d ispersion.
  Both populations show a weak rotation signal at lar ge
 distances from the center.
-The velocity anisotropy parameter β(r)=1−(σ2
-θ+
+The velocity anisotropy parameter β(r) = 1 − (σ2
+θ +
 σ2
 φ)/(2σ2
-r), whereσiare velocity dispersions in spherical coordinates
+r ), where σi are velocity dispersions in spherical coordinates
  (Binney & Tremaine 2008), describes the orbital struc ture
 of galaxies. It is one of the most important dynamical proper ties
 of bound systems which cannot be inferred directly from observations
  and has to be recovered by dynamical modeling. Th e
-profiles of the anisotropy parameter βas well as the radial σr
-and tangentialσt=[(σ2
-θ+σ2
-φ)/2]1/2velocity dispersions for our
-simulated galaxy are presented in the consecutive columns o fFig. 4. Throughout the paper we use red, orange, and blue colo rs
+profiles of the anisotropy parameter β as well as the radial σr
+and tangential σt = [(σ2
+θ +σ2
+φ)/2]1/2 velocity dispersions for our
+simulated galaxy are presented in the consecutive columns o f
+Fig. 4. Throughout the paper we use red, orange, and blue colo rs
 to indicate values calculated or recovered for all stars, po pulation
  I, and population II, respectively. The two rows of the fi gure
 show the behavior of the parameters at di fferent scales. The top
@@ -312,12 +444,12 @@ galaxy in the logarithmic scale and shows the drop of anisotr opy
 at the outer edges of the object. The bottom row uses the linea r
 distance scale and focuses on the main body of the galaxy.
 Figure 5 shows the surface number density profiles of the
-stars as measured in di fferent directions. We can see that while
-the different subsamples have quite distinguishable profiles, the
+stars as measured in di fferent directions. W e can see that while
+the di fferent subsamples have quite distinguishable profiles, the
 difference between the lines of sight is small because the galaxy
 is close to spherical.
 2.3. Observables
-We generated nine sets of mock data by observing all stars and
+W e generated nine sets of mock data by observing all stars and
 each population separately along the principal axes determ ined
 from all stars. For the observables to be used in the modeling we
 divided the stars into 20 bins spaced linearly in distance fr om
@@ -331,127 +463,226 @@ intermediate, and minor axis of the galaxy. For clarity of th e figure,
  in each panel we indicate only the error bars for one of th e
 data sets. However, as the number of stars in a sample remains
 roughly constant between the lines of sight, the error bars a re
-very similar among the panels in a given row.
+very similar among the panels in a given row .
 Although in our previous studies of the reliability of
 the Schwarzschild modeling and its applications to real dat a
 (Kowalczyk et al. 2017, 2018, 2019) we approximated the density
  profile of the tracer with the Sérsic formula, we found th at it
 does not provide a good approximation of the data for the simu lated
- galaxy considered here. We therefore fit the projected density
+ galaxy considered here. W e therefore fit the projected density
  profile with the King formula (King 1962)
-I(R)=I01√
-1+(R/Rc)2−1√
-1+(Rt/Rc)22
+I(R) = I0
+
+
+
+
+
+
+
+1
+√
+1 +(R/Rc)2
+− 1√
+1 +(Rt /Rc)2
+
+
+
+
+
+
+
+2
 , (1)
 Article number, page 4 of 12
 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling
-10-310-210-1100
- 0 10 20 30 40M(R)
-R [kpc]major
- 0 10 20 30 40
-R [kpc]intermediate
- 0 10 20 30 40 50
-R [kpc]minor
-36912
- 0 10 20 30 40m2(R)[103(km s-1)2]
-R [kpc] 0 10 20 30 40
-R [kpc] 0 10 20 30 40 50
+10-3
+10-2
+10-1
+100
+ 0  10  20  30  40
+M(R)
+R [kpc]
+major
+ 0  10  20  30  40
+R [kpc]
+intermediate
+ 0  10  20  30  40  50
+R [kpc]
+minor
+3
+6
+9
+12
+ 0  10  20  30  40
+m 2(R)[103(km s-1)2]
+R [kpc]
+ 0  10  20  30  40
+R [kpc]
+ 0  10  20  30  40  50
+R [kpc]
+-10
+-5
+0
+5
+10
+ 0  10  20  30  40
+m 3(R)[104(km s-1)3]
+R [kpc]
+ 0  10  20  30  40
 R [kpc]
--10-50510
- 0 10 20 30 40m3(R)[104(km s-1)3]
-R [kpc] 0 10 20 30 40
-R [kpc] 0 10 20 30 40 50
+ 0  10  20  30  40  50
 R [kpc]
-01234
- 0 10 20 30 40m4(R)[108(km s-1)4]
-R [kpc] 0 10 20 30 40
-R [kpc] 0 10 20 30 40 50
-R [kpc]all stars
+0
+1
+2
+3
+4
+ 0  10  20  30  40
+m 4(R)[108(km s-1)4]
+R [kpc]
+ 0  10  20  30  40
+R [kpc]
+ 0  10  20  30  40  50
+R [kpc]
+all stars
 pop I
 pop II
-Fig. 6. Observables used in our Schwarzschild modeling scheme of th e simulated galaxy. In rows: the fraction of the total number of stars, 2nd,
+Fig. 6. Observables used in our Schwarzschild modeling scheme of th e simulated galaxy . In rows: the fraction of the total number of stars, 2nd,
 3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, interme diate, and minor axis. In red we present
-the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively. For clarity of t he figure, in each panel we
+the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively . For clarity of t he figure, in each panel we
 indicate only the error bars for one of the data sets.
-where I0,Rc, and Rtare the model parameters. The profile can
+where I0 , Rc , and Rt are the model parameters. The profile can
 be analytically deprojected to obtain the 3D density
-ρ(r)=ρ0
-z2[1
-zarccos( z)−√
-1−z2]
+ρ(r) = ρ0
+z2
+[ 1
+z arccos(z) −
+√
+1 −z2
+]
 , (2)
 where
-ρ0=I0
-πRc[1+(Rt/Rc)2]3/2(3)
+ρ0 = I0
+πRc [1 +(Rt /Rc)2 ]3/2 (3)
 and
-z=√
-r2+R2c
-R2c+R2
-t. (4)3. Schwarzschild modeling
+z =
+√
+r2 +R2
+c
+R2
+c +R2
+t
+. (4)
+3. Schwarzschild modeling
 In this section we briefly present our modeling method and its
 application to the data sets derived for all stars and the two populations
  of the simulated galaxy separately. In both cases o ur
 aim was to recover the profiles of the total mass and the veloci ty
 anisotropy.
 3.1. Overview of the method
-We follow the approach introduced in Kowalczyk et al. (2018) ,
+W e follow the approach introduced in Kowalczyk et al. (2018) ,
 namely we model the total mass profile with the mass-to-light
-ratioΥvarying with radius:
-logΥ(r)={
-log(Υ0) r≤r0
-a(logr−logr0)c+log(Υ0)r>r0(5)
+ratio Υvarying with radius:
+log Υ(r) =
+{
+log(Υ0 ) r ≤ r0
+a(log r −log r0 )c +log(Υ0) r > r0
+(5)
 Article number, page 5 of 12
 A&A proofs: manuscript no. Populations4
- 1 2 3
- 0 0.5 1 1 2 3ALL
-Υ0ac
- 1 2 3
- 0 0.5 1 1 2 3POPULATIONS
-Υ0ac
- 10 100
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+ALL
+Υ0
+a
+c
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+POPULATIONS
+Υ0
+a
+c
+ 10
+ 100
 χ2
- 1 2 3
- 0 0.5 1 1 2 3POP I
-Υ0ac
- 1 2 3
- 0 0.5 1 1 2 3POP II
-Υ0ac
- 10 100
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+POP I
+Υ0
+a
+c
+ 1
+ 2
+ 3
+ 0
+ 0.5
+ 1
+ 1
+ 2
+ 3
+POP II
+Υ0
+a
+c
+ 10
+ 100
 χ2
-Fig. 7. Absolute values of χ2obtained from the fits of three data sets: all stars (top left p anel), population I (bottom left), and population II (botto m
-right) for the observations along the major axis of the simul ated galaxy. The results for the modeling of two populations (top right) were obtained
-as an algebraic sum of values for populations I and II. To avoi d large numbers in the figure, Υ0was divided by the mean mass of a stellar particle.
-where ris the distance from the center of the galaxy, r0is a
-constant, whileΥ0,a, and care the parameters of a model. We
-have assumed log r0=0.33 which corresponds to three softening
+Fig. 7. Absolute values of χ2 obtained from the fits of three data sets: all stars (top left p anel), population I (bottom left), and population II (botto m
+right) for the observations along the major axis of the simul ated galaxy . The results for the modeling of two populations (top right) were obtained
+as an algebraic sum of values for populations I and II. T o avoi d large numbers in the figure, Υ0 was divided by the mean mass of a stellar particle.
+where r is the distance from the center of the galaxy, r0 is a
+constant, while Υ0 , a, and c are the parameters of a model. W e
+have assumed log r0 = 0.33 which corresponds to three softening
 scales for stellar particles in the Illustris simulation.
-We probed the parameter a∈[0 : 1.3] with a step∆a=0.04
-andc∈[1.1 : 2.9] with a step∆c=0.2, imposing the requirement
+W e probed the parameter a ∈ [0 : 1 .3] with a step ∆a = 0.04
+and c ∈ [1.1 : 2 .9] with a step ∆c = 0.2, imposing the requirement
  on the total density profile to be monotonically decreas ing
 with radius. For each set of parameters and for each line of si ght
 we generated 1200 orbits using 100 values of energy (express ed
 with the radius of a circular orbit) spaced logarithmically and
 12 values of the relative angular momentum spaced linearly. The
 outer radius of the orbit library, that is the apocenter of th e most
-extended orbit, was set to rout=165 kpc in order to cover over
+extended orbit, was set to rout = 165 kpc in order to cover over
 0.999 of the total stellar mass based on the fitted King profile
 parameters.
-We fit the kinematics weighted with the fraction of mass with
+W e fit the kinematics weighted with the fraction of mass with
 the constrained least squares algorithm where di fferent values
-ofΥ0were obtained with a simple transformation of velocities
+of Υ0 were obtained with a simple transformation of velocities
 given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order
  to smooth out the numerical artifacts, the three-dimens ional
-χ2spaces were then interpolated with 12-order polynomials(∼a4c4Υ4
-0) that were further used to determine the global minimums
- (identified as the best-fitting models) and 1, 2, 3 σconfidence
- levels which for three parameters correspond to ∆χ2=
-3.53,8.02,14.2 (Press et al. 1992).
+χ2 spaces were then interpolated with 12-order polynomials
+(∼ a4 c4 Υ4
+0 ) that were further used to determine the global minimums
+ (identified as the best-fitting models) and 1, 2, 3 σ confidence
+ levels which for three parameters correspond to ∆χ2 =
+3.53, 8.02, 14.2 (Press et al. 1992).
 3.2. Application to mock data
 In the following we present the direct and inferred results o f
 the Schwarzschild modeling of the data sets described in Sec tion
  2.3.
 First, Fig. 7 shows the distribution of the absolute values o f
-theχ2as a function of three parameters of the mass-to-light ratio.
+the χ2 as a function of three parameters of the mass-to-light ratio.
  In order to avoid unnecessary repetitions, we include o nly
 the plot for the mock data obtained by observing the Illustri s
 galaxy along its major axis as the others are qualitatively s imilar.
@@ -463,76 +694,149 @@ As our parametrization of the mass-to-light ratio is not int uitive
  we present its profiles explicitly in the first rows of th e leftArticle
  number, page 6 of 12
 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling
-1061071081091010
- 10  100ALLΥ(r) [M⊙/L⊙]
-r [kpc]major
- 10  100ALL
-r [kpc]intermediate
- 10  100ALL
-r [kpc]minor
+106
+107
+108
+109
+1010
+ 10  100
+ALL
+Υ(r) [M⊙/L⊙]
+r [kpc]
+major
+ 10  100
+ALL
+r [kpc]
+intermediate
+ 10  100
+ALL
+r [kpc]
+minor
 3σ
 2σ
 1σ
 best model
 data
-104106108
- 10  100ALLνtot(r) [M⊙ kpc-3]
-r [kpc] 10  100ALL
-r [kpc] 10  100ALL
+104
+106
+108
+ 10  100
+ALL
+νtot(r) [M⊙ kpc-3]
+r [kpc]
+ 10  100
+ALL
+r [kpc]
+ 10  100
+ALL
+r [kpc]
+1010
+1011
+1012
+ 10  100
+ALL
+M tot(r) [M⊙]
+r [kpc]
+ 10  100
+ALL
+r [kpc]
+ 10  100
+ALL
+r [kpc]
+-2
+-1
+0
+1
+ 0  10  20  30  40
+ALL
+β(r)
 r [kpc]
-101010111012
- 10  100ALLMtot(r) [M⊙]
-r [kpc] 10  100ALL
-r [kpc] 10  100ALL
+ 0  10  20  30  40
+ALL
 r [kpc]
--2-101
- 0 10 20 30 40ALLβ(r)
-r [kpc] 0 10 20 30 40ALL
-r [kpc] 0 10 20 30 40 50ALL
-r [kpc]1061071081091010
- 10  100POPULATIONSΥ(r) [M⊙/L⊙]
-r [kpc]major
- 10  100POPULATIONS
-r [kpc]intermediate
- 10  100POPULATIONS
-r [kpc]minor
+ 0  10  20  30  40  50
+ALL
+r [kpc]
+106
+107
+108
+109
+1010
+ 10  100
+POPULATIONS
+Υ(r) [M⊙/L⊙]
+r [kpc]
+major
+ 10  100
+POPULATIONS
+r [kpc]
+intermediate
+ 10  100
+POPULATIONS
+r [kpc]
+minor
 3σ
 2σ
 1σ
 best model
 data
-104106108
- 10  100POPULATIONSνtot(r) [M⊙ kpc-3]
-r [kpc] 10  100POPULATIONS
-r [kpc] 10  100POPULATIONS
+104
+106
+108
+ 10  100
+POPULATIONS
+νtot(r) [M⊙ kpc-3]
+r [kpc]
+ 10  100
+POPULATIONS
+r [kpc]
+ 10  100
+POPULATIONS
+r [kpc]
+1010
+1011
+1012
+ 10  100
+POPULATIONS
+M tot(r) [M⊙]
+r [kpc]
+ 10  100
+POPULATIONS
+r [kpc]
+ 10  100
+POPULATIONS
+r [kpc]
+-2
+-1
+0
+1
+ 0  10  20  30  40
+POPULATIONS
+β(r)
 r [kpc]
-101010111012
- 10  100POPULATIONSMtot(r) [M⊙]
-r [kpc] 10  100POPULATIONS
-r [kpc] 10  100POPULATIONS
+ 0  10  20  30  40
+POPULATIONS
 r [kpc]
--2-101
- 0 10 20 30 40POPULATIONSβ(r)
-r [kpc] 0 10 20 30 40POPULATIONS
-r [kpc] 0 10 20 30 40 50POPULATIONS
+ 0  10  20  30  40  50
+POPULATIONS
 r [kpc]
 Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy al ong the principal
-axes. In rows: derived mass-to-light ratio, total density, total mass, an d anisotropy parameter. In columns: observations along the major, intermediate,
- and minor axis, respectively. Green lines indicate v alues for the best-fit models whereas the colored areas of dec reasing intensity show the
-1, 2, and 3σconfidence levels. The true values are presented as black lin es. Thin vertical lines mark the values of r0and the outer range of the
+axes. In rows: derived mass-to-light ratio, total density , total mass, an d anisotropy parameter. In columns: observations along the major, intermediate,
+ and minor axis, respectively . Green lines indicate v alues for the best-fit models whereas the colored areas of dec reasing intensity show the
+1, 2, and 3 σ confidence levels. The true values are presented as black lin es. Thin vertical lines mark the values of r0 and the outer range of the
 data sets, from left to right. Right-hand side: same as left b ut for the fit of two stellar populations.
 and right-hand side panels of Fig. 8 for the results obtained for
-all stars and the populations, respectively. We further cal culate
+all stars and the populations, respectively. W e further cal culate
 the total density (second rows) and the total mass content (t hird
-rows). We include the obtained orbit anisotropy within the m odeled
+rows). W e include the obtained orbit anisotropy within the m odeled
  range in the bottom rows. The consecutive columns prese nt
 the results for the observations along the major, intermedi ate,
 and minor axis. Green lines indicate values for the best-fit m odels
  whereas the colored areas of decreasing intensity corre spond
-to 1, 2, and 3σconfidence regions obtained as extreme values allowed
- by the models with χ2within a given region. In each panel
+to 1, 2, and 3 σconfidence regions obtained as extreme values allowed
+ by the models with χ2 within a given region. In each panel
 the true values from the simulation are presented with black lines
-while thin vertical lines mark the values of r0and the outer range
+while thin vertical lines mark the values of r0 and the outer range
 of the data sets beyond which the reliability of results drop s significantly.
  The true mass-to-light ratio profile was obtaine d by
 dividing the total mass by the fitted King profiles, therefore the
@@ -545,73 +849,93 @@ location of global minimum and confidence levels from two pop ulations
 another method of calculating the anisotropy. In the second and
 third row we show the derived profiles for population I and II
 separately and combine them as stellar mass weighted averag e
-in the top row. As in previous figures, three columns refer to t he
+in the top row . As in previous figures, three columns refer to t he
 different lines of sight whereas the narrow fourth one shows the
 behavior of the true profiles outside the modeled range which , as
 we noticed in our previous studies, in a limited way influence s
 the results. Such an impact is understandable since the star s at
 larger distances from the center are still included in the li ne-ofsight
- measurements.3.3. Comparison of fitting results
+ measurements.
+3.3. Comparison of fitting results
 The main strength of the two populations method comes from
 tracing the underlying gravitational potential at di fferent scales.
 As can be seen in the bottom panels of Fig. 7, population I, whi ch
-is more concentrated, is also more sensitive to Υ0, but gives
-weaker constraints on aorc. On the other hand, population II
+is more concentrated, is also more sensitive to Υ0 , but gives
+weaker constraints on a or c. On the other hand, population II
 attempts to reproduce the total mass content at larger dista nces
 as well, therefore showing stronger coupling between the pa rameters.
 
-The global minimums of the χ2distributions for both approaches,
+The global minimums of the χ2 distributions for both approaches,
  that is modeling one and two populations, which we
 identify as the best-fitting models, closely coincide showi ng that
 there is no internal bias in the improved method. However, si gnificant
- differences can be observed when comparing the confidence
+ di fferences can be observed when comparing the confidence
  levels, mainly at 1 and 3 σ. Namely, we find that using
 two populations, the constraints we obtain on the density an d
 anisotropy profile are much stronger.
 Additionally, the more accurate method allows us to study
-other effects and biases, for example the consequences of the
+other e ffects and biases, for example the consequences of the
 nonsphericity of the modeled object. Whereas for the fit of al l
 stars the true values of the density, mass, and anisotropy pr ofiles
-are contained within 1 σconfidence regions, the results for the
+are contained within 1 σ confidence regions, the results for the
 populations are more or less biased depending on the axis. Th ey
 are well reproduced for the observation along the intermedi ate
 axis, for which the e ffects of nonsphericity seem to cancel out,
-and more biased for the remaining lines of sight. We notice a
+and more biased for the remaining lines of sight. W e notice a
 trend from under- to overestimation of the anisotropy when g oing
  from the major to the minor axis.
 Article number, page 7 of 12
 A&A proofs: manuscript no. Populations4
--101
- 0 10 20 30 40POP I + POP II
+-1
+0
+1
+ 0  10  20  30  40
+POP I + POP II
 β(r)
-r [kpc]major
- 0 10 20 30 40
-r [kpc]intermediate
- 0 10 20 30 40
-r [kpc]minor
- 50 60 70 80
--101
- 0 10 20 30 40POP I
+r [kpc]
+major
+ 0  10  20  30  40
+r [kpc]
+intermediate
+ 0  10  20  30  40
+r [kpc]
+minor
+ 50  60  70  80
+-1
+0
+1
+ 0  10  20  30  40
+POP I
 β(r)
-r [kpc] 0 10 20 30 40
-r [kpc] 0 10 20 30 40
-r [kpc] 50 60 70 80
--101
- 0 10 20 30 40POP II
+r [kpc]
+ 0  10  20  30  40
+r [kpc]
+ 0  10  20  30  40
+r [kpc]
+ 50  60  70  80
+-1
+0
+1
+ 0  10  20  30  40
+POP II
 β(r)
-r [kpc] 0 10 20 30 40
-r [kpc] 0 10 20 30 40
-r [kpc] 50 60 70 80
+r [kpc]
+ 0  10  20  30  40
+r [kpc]
+ 0  10  20  30  40
+r [kpc]
+ 50  60  70  80
 data
 best model
- 1σ
+
+1σ
 2σ
 3σ
-Fig. 9. Profiles of the anisotropy parameter obtained with the Schwa rzschild modeling of two stellar populations of the simulat ed galaxy. In rows:
+Fig. 9. Profiles of the anisotropy parameter obtained with the Schwa rzschild modeling of two stellar populations of the simulat ed galaxy . In rows:
 results for all stars (calculated as the superposition of tw o populations), population I, and population II. Colors fol low the convention used in
 previous figures. In columns: observations along the major, intermediate, and minor axis . The last narrower column shows the data (black lines)
 outside the modeled radial range. Color lines indicate valu es for the best-fit models whereas the colored areas of decrea sing intensity show the 1,
-2, and 3σconfidence regions.
+2, and 3 σ confidence regions.
 4. Modeling Fornax dSph
 In this section we present the application of our Schwarzsch ild
 modeling scheme to the observational data for the Fornax dSp h
@@ -629,45 +953,57 @@ of the final spectroscopic sample is shown in Fig. 10. Additio nally,
  we color-coded each bin with the population it has been
 assigned to, namely orange or blue for population I or II. Int erestingly,
  the case of Fornax is similar to our simulated gala xy
-as the split at [Fe/H]=−1 also captures an important feature
+as the split at [Fe /H]= −1 also captures an important feature
 of the object’s star formation history, separating stars in to subsamples
  older and younger than 6 Gyr, as shown in Fig. 12 of
 del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The
 numbers of stars contained in the samples of all stars, popul ation
- I, and population II are given in Table 2, where the indic es
+ I, and population II are given in T able 2, where the indic es
 "phot" and "spec" refer to the photometric and kinematic sam ples.
- The sum of stars in the populations is lower than in thesample of all stars since only stars with reliable measureme nts
+ The sum of stars in the populations is lower than in the
+sample of all stars since only stars with reliable measureme nts
 of metallicity could be included.
 N
-[Fe/H]pop I
+[Fe/H]
+pop I
 pop II
- 0 20 40 60 80 100
--2.5-2-1.5-1-0.5 0
+ 0
+ 20
+ 40
+ 60
+ 80
+ 100
+-2.5 -2 -1.5 -1 -0.5  0
 Fig. 10. Metallicity histogram of the final spectroscopic sample use d in
 the modeling of two stellar populations in the Fornax dSph. E ach bin is
 color-coded according to the population it has been assigne d to, orange
-or blue for population I and II, respectively.
+or blue for population I and II, respectively .
 As we have shown in our earlier work, the light profile of the
 Fornax dSph can be well reproduced with the three-parameter
 Article number, page 8 of 12
 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling
-Table 2. Properties of the data samples for the Fornax dSph.
+T able 2.Properties of the data samples for the Fornax dSph.
 Property ALL POP I POP II
-Number of stars ( Nphot) 65 797 14 882 49 205
-Number of stars ( Nspec) 3286 1136 1151
+Number of stars ( Nphot ) 65 797 14 882 49 205
+Number of stars ( Nspec ) 3286 1136 1151
 Stars within 1.8 kpc 3268 1134 1130
-Fitted normalization ( N0) [×104] 6.95 1.81 5.45
-Sérsic radius ( RS) [kpc] 0.454 0.429 0.420
+Fitted normalization ( N0 ) [ ×104] 6.95 1.81 5.45
+Sérsic radius ( RS ) [kpc] 0.454 0.429 0.420
 Sérsic parameter ( m) 0.808 0.807 0.898
-102103104105
- 0.2  0.5  2  0.1  1n⋆(R) [kpc-2]
-R [kpc]all stars
+102
+103
+104
+105
+ 0.2  0.5  2 0.1  1
+n⋆(R) [kpc-2]
+R [kpc]
+all stars
 popI
 popII
 Fig. 11. Surface number density profiles of the photometric data samples
  for the Fornax dSph: all available stars (in red), the me tal-rich population
  I (in orange), and the metal-poor population II (in b lue). Thin
-vertical lines indicate r0(see text) and the outer boundary of the spectroscopic
+vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic
  data.
 Sérsic formula (Sérsic 1968). The profiles of number density for
 all stars and both populations together with the best-fittin g Sérsic
@@ -675,48 +1011,72 @@ profiles are presented in Fig. 11. The colors follow the conve ntion
  introduced in previous sections. Thin vertical lines i ndicate
 the innermost data point for the light profile for all stars an d
 the outer boundary of the kinematic sample. The former, set a t
-logr=−0.16, is also used as the minimum of the mass-to-light
-ratio profile ( r0in Eq. 5). The fitted parameters of the profiles,
-that is the normalization N0, the Sérsic radius RS, and the Sérsic
-parameter m, are included in the second part of Table 2.
+log r = −0.16, is also used as the minimum of the mass-to-light
+ratio profile ( r0 in Eq. 5). The fitted parameters of the profiles,
+that is the normalization N0 , the Sérsic radius RS , and the Sérsic
+parameter m, are included in the second part of T able 2.
 Figure 12 presents the profiles of the observables used in the
 Schwarzschild modeling: the fraction of stars and the 2nd, 3 rd,
 and 4th velocity moments (top to bottom) for the three data sa mples:
  all stars, population I, and population II (in red, ora nge, and
 blue, respectively). The error bars indicate 1 σsampling errors.
 The parameter space for Υ(r) has been probed as follows:
-a∈[0 : 1.85] with a step∆a=0.05 and c∈[1.2 : 6] with a
-step∆c=0.2. We point out that in Kowalczyk et al. (2019) the
-parameter cwas fixed at c=3 and now we fit it as a free parameter.
+a ∈ [0 : 1 .85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a
+step ∆c = 0.2. W e point out that in Kowalczyk et al. (2019) the
+parameter c was fixed at c = 3 and now we fit it as a free parameter.
  As for the mock data in Section 3.2, di fferent values of
-Υ0were obtained with the transformation of velocity moments
-within theχ2fitting routine. The values of ∆χ2for all stars and
+Υ0 were obtained with the transformation of velocity moments
+within the χ2 fitting routine. The values of ∆χ2 for all stars and
 the populations are shown in the two panels of Fig. 13 (left an d
 right-hand side, respectively). Due to the dense coverage o f the
-grid, we decided to include only the values within 3 σfrom the
+grid, we decided to include only the values within 3 σ from the
 fitted minimums (see Section 3.1).
 The profiles of the mass-to-light ratio, total density, tota l
-mass, and velocity anisotropy resulting from the χ2distributions
+mass, and velocity anisotropy resulting from the χ2 distributions
 are presented in the consecutive rows of Fig. 14. The anisotr opy
-profile for the populations is based on the fit of all stars but u sing 0 0.05 0.1 0.15 0.2 0.25
- 0 0.4  0.8  1.2  1.6M(R)
-R [kpc]all stars
+profile for the populations is based on the fit of all stars but u sing
+ 0
+ 0.05
+ 0.1
+ 0.15
+ 0.2
+ 0.25
+ 0  0.4  0.8  1.2  1.6
+M(R)
+R [kpc]
+all stars
 pop I
 pop II
-04080120160200
- 0 0.4  0.8  1.2  1.6m2(R)[(km s-1)2]
+0
+40
+80
+120
+160
+200
+ 0  0.4  0.8  1.2  1.6
+m 2(R)[(km s-1)2]
 R [kpc]
--16-80816
- 0 0.4  0.8  1.2  1.6m3(R)[102(km s-1)3]
+-16
+-8
+0
+8
+16
+ 0  0.4  0.8  1.2  1.6
+m 3(R)[102(km s-1)3]
 R [kpc]
-0481216
- 0 0.4  0.8  1.2  1.6m4(R)[104(km s-1)4]
+0
+4
+8
+12
+16
+ 0  0.4  0.8  1.2  1.6
+m 4(R)[104(km s-1)4]
 R [kpc]
 Fig. 12. Observables of the Fornax dSph used in our Schwarzschild
 modeling scheme. In rows: the fraction of the total number of stars, the
 2nd, 3rd, and 4th velocity moment. In red we present the value s obtained
 for all stars whereas in orange and blue those for population s I and II,
-respectively.
+respectively .
 the confidence levels on Υfrom the fit of two populations. Green
 lines indicate the values for the best-fitting models wherea s the
 colored areas of decreasing intensity show the 1, 2, and 3 σconfidence
@@ -726,44 +1086,68 @@ As a result of freeing the steepness of the mass-to-light
 ratio profile (parameter c) with respect to the previous study
 Article number, page 9 of 12
 A&A proofs: manuscript no. Populations4
- 0 0.5 1 1.5
  0
  0.5
  1
- 1.5 2 3 4 5 6ALL
+ 1.5
+ 0
+ 0.5
+ 1
+ 1.5
+ 2
+ 3
+ 4
+ 5
+ 6
+ALL
 Υ0
-ac
- 0 0.5 1 1.5
+a
+c
+ 0
+ 0.5
+ 1
+ 1.5
  0
  0.5
  1
- 1.5 2 3 4 5 6POPULATIONS
+ 1.5
+ 2
+ 3
+ 4
+ 5
+ 6
+POPULATIONS
 Υ0
-ac
- 0 3 6 9 12
+a
+c
+ 0
+ 3
+ 6
+ 9
+ 12
 χ2-χ2
 min
-Fig. 13. Values ofχ2relative to the fitted minimum within the range of 3 σconfidence level for all stars (left panel) and for the popula tions (right
+Fig. 13. V alues of χ2 relative to the fitted minimum within the range of 3 σ confidence level for all stars (left panel) and for the popula tions (right
 panel) for the Fornax dSph.
 (Kowalczyk et al. 2019), we obtained higher estimates of the enclosed
  total mass at larger radii. In particular, for the mas s enclosed
- within 1.8 kpc we get Mall(<1.8 kpc)=3.87+1.48
-−1.56×108
-M⊙from the fit for all stars and Mpops(<1.8 kpc)=4.71+0.87
-−1.13×
-108M⊙from the fit of populations, while previously we had
-Mold(<1.8 kpc)=3.7+1.4
-−1.3×108M⊙.
+ within 1.8 kpc we get Mall (< 1.8 kpc) = 3.87+1.48
+−1.56 × 108
+M⊙ from the fit for all stars and Mpops (< 1.8 kpc) = 4.71+0.87
+−1.13 ×
+108 M⊙ from the fit of populations, while previously we had
+Mold (< 1.8 kpc) = 3.7+1.4
+−1.3 ×108 M⊙.
 Interestingly, despite the significant shift of the positio n of
 χ2
-min(toc=4.2 for all stars and 3.6 for populations), the obtained
+min (to c = 4.2 for all stars and 3.6 for populations), the obtained
  profile of the anisotropy parameter remains decreasi ng or
 flat for all stars but changes to increasing from 0 to 0.5 for th e
 populations. Nevertheless, even in the latter case the prev ious
 result agrees with the new finding within 1 σ.
 The detailed analysis of the anisotropy is shown in Fig. 15
 where the middle and bottom panels present the profiles obtained
- for each population separately. We notice that the pr ofile
+ for each population separately. W e notice that the pr ofile
 for population I is decreasing or has a local minimum whereas
 for population II is increasing (from −0.25 to 0.5 for the bestfitting
  model). Since population I is more concentrated, the last
@@ -773,7 +1157,7 @@ top panel of Fig. 15 presents the anisotropy of all stars calc ulated
 approach we still obtain the increasing profile (from 0 to 0.5 ) but
 the previous result agrees with it only within 2 σ.
 Since Fornax dSph is significantly elongated with the projected
- ellipticity of ǫ=0.30±0.01 (Irwin & Hatzidimitriou
+ ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou
 1995), we anticipate some bias in the obtained results cause d
 by the spherically symmetric modeling. Kowalczyk et al. (20 18)
 studied such bias in an axisymmetric simulated object quali tatively
@@ -786,12 +1170,13 @@ underestimated, further strengthening the likelihood of t he real
 anisotropy to be radial and its profile to be growing with radi us
 with respect to the results of Kowalczyk et al. (2019).
 Both constant (like for our population I) and growing (population
- II) anisotropy profiles can arise from biased modeli ngof the real growing profile by observing an object along the
+ II) anisotropy profiles can arise from biased modeli ng
+of the real growing profile by observing an object along the
 minor and major axis, respectively. However, for the bias to
 occur in two populations presented here, their inner orient ations
  would need to be opposite. Since such morphological fe atures
  are not supported by the photometric studies of Fornax
-(del Pino et al. 2015; Wang et al. 2019) which rather find a good
+(del Pino et al. 2015; W ang et al. 2019) which rather find a good
 spatial alignment between the stellar populations, we conc lude
 that the anisotropy profiles of the two populations modeled i n
 this work are indeed significantly distinct.
@@ -799,8 +1184,8 @@ Finally, it is worth noticing that the so-called mass-follo wslight
  model, that is the one following from the assumption th at
 the total density traces the stellar distribution, is no lon ger supported
  by the fit of the populations. With our parametrizatio n,
-the mass-follows-light model corresponds to a=0 and whereas
-it is enclosed within 3 σfor the fit of all stars, as was the case
+the mass-follows-light model corresponds to a = 0 and whereas
+it is enclosed within 3 σ for the fit of all stars, as was the case
 in Kowalczyk et al. (2019), the allowed values for the improv ed
 method are much larger, as demonstrated by the right panel of
 Fig. 13.
@@ -817,7 +1202,7 @@ expect a significant improvement in the estimates of not only the
 total mass content but also the orbit anisotropy since this r obust
 modeling technique reproduces the anisotropy as a by-produ ct
 of the modeling rather than taking it as an assumption.
-We have tested our hypothesis by modeling mock data generated
+W e have tested our hypothesis by modeling mock data generated
  from a galaxy formed in the Illustris simulation. Due to the
 limitations of the resolution, we chose a galaxy of mass a few orders
  of magnitude larger than the estimated masses of classi cal
@@ -825,36 +1210,55 @@ dwarfs. Still, the galaxy possessed appropriate qualitati ve characteristics,
  such as the lack of gas and an almost spherical s hape,
 Article number, page 10 of 12
 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling
-101103105
- 0.1  1Υ(r) [M⊙/L⊙]
-r [kpc]ALL
- 0.1  1r [kpc]POPULATIONS
+101
+103
+105
+ 0.1  1
+Υ(r) [M⊙/L⊙]
+r [kpc]
+ALL
+ 0.1  1r [kpc]
+POPULATIONS
 3σ
 2σ
 1σ
 best model
 K19
-104106108
- 0.1  1νtot(r) [M⊙ kpc-3]
-r [kpc] 0.1  1
+104
+106
+108
+ 0.1  1
+νtot(r) [M⊙ kpc-3]
+r [kpc]
+ 0.1  1
+r [kpc]
+105
+107
+109
+ 0.1  1
+M tot(r) [M⊙]
 r [kpc]
-105107109
- 0.1  1Mtot(r) [M⊙]
-r [kpc] 0.1  1
+ 0.1  1
 r [kpc]
--3-2-101
- 0 0.4 0.8 1.2 1.6β(r)
-r [kpc] 0 0.4 0.8 1.2 1.6
+-3
+-2
+-1
+0
+1
+ 0  0.4  0.8  1.2  1.6
+β(r)
+r [kpc]
+ 0  0.4  0.8  1.2  1.6
 r [kpc]
 Fig. 14. Results of Schwarzschild modeling of the Fornax dSph.
-In rows: derived mass-to-light ratio, total density, total mass, an d
+In rows: derived mass-to-light ratio, total density , total mass, an d
 anisotropy parameter. In columns: results for all stars and the populations,
- respectively. Green lines indicate the values for th e best-fit models
-whereas the colored areas of decreasing intensity show the 1 , 2, and 3σ
+ respectively . Green lines indicate the values for th e best-fit models
+whereas the colored areas of decreasing intensity show the 1 , 2, and 3 σ
 confidence regions. The best-fitting values obtained by Kowa lczyk et al.
 (2019) are shown with black dashed lines.
 that made it a good test bed for modeling techniques applicable
- to dSph galaxies. We applied our approach to all data and
+ to dSph galaxies. W e applied our approach to all data and
 to two stellar populations separately, comparing the accur acy of
 the obtained results. Although the addition of the second tr acer
 seemingly increases the number of constraints twice, the in crement
@@ -863,31 +1267,45 @@ number of stars in each sample is then reduced. Still, we foun d
 strong improvements in the accuracy of the method when using
  two populations. The results of the modeling show that th e
 density and velocity anisotropy profiles are more strongly c onstrained,
- most importantly at the 3 σlevel, that is the range of
+ most importantly at the 3 σ level, that is the range of
 allowed values is much narrower.
 Similarly to the conclusions of Kowalczyk et al. (2018) who
-explored the effects of nonsphericity using large and small
+explored the e ffects of nonsphericity using large and small
 data samples, the comparison of results presented in the lef tand
  right-hand side panels of Fig. 8 suggests that the improv ed
 method using two stellar populations gives more precise but less
 accurate outcome. However, in both studies the apparent det erioration
  of the reliability is a consequence of modeling of a
 nonspherical object. In both cases, a simpler approach (muc h
-smaller data samples or using one stellar population) resul ted-2-101
- 0 0.4 0.8 1.2 1.6POP I + POP II
+smaller data samples or using one stellar population) resul ted
+-2
+-1
+0
+1
+ 0  0.4  0.8  1.2  1.6
+POP I + POP II
 β(r)
 r [kpc]
--2-101
- 0 0.4 0.8 1.2 1.6POP I
+-2
+-1
+0
+1
+ 0  0.4  0.8  1.2  1.6
+POP I
 β(r)
 r [kpc]
--2-101
- 0 0.4 0.8 1.2 1.6POP II
+-2
+-1
+0
+1
+ 0  0.4  0.8  1.2  1.6
+POP II
 β(r)
 r [kpc]
 best model
 1σ
-2σ3σ
+2σ
+3σ
 K19
 Fig. 15. Profiles of the anisotropy parameter obtained with the
 Schwarzschild modeling of two stellar populations for the F ornax dSph.
@@ -897,20 +1315,20 @@ for the best-fit models whereas the colored areas of decreasi ng intensity
 show the 1, 2, and 3 σconfidence regions. The dashed black line shows
 the result from Kowalczyk et al. (2019) for comparison.
 in larger final uncertainties, usually containing the true v alues
-within 1σconfidence region. On the other hand, the improved
+within 1 σ confidence region. On the other hand, the improved
 methods exhibit substantially reduced uncertainties, hig hlighting
 the underlying bias.
 Our method parametrizes the total mass content with the
 mass-to-light ratio varying with radius as a power-law in th e loglog
- scale. We made two main changes with respect to our previous
- work: we added a third parameter ccontrolling the steepness
+ scale. W e made two main changes with respect to our previous
+ work: we added a third parameter c controlling the steepness
 of the mass-to-light ratio profile (previously fixed at the va lue of
 3) and allowed for di fferent stellar density profiles (previously
 only Sérsic, now also King). These changes are of course coupled
- since different density profiles require di fferent exponents to
+ since di fferent density profiles require di fferent exponents to
 reproduce the same mass profile. It is visible also in our resu lts
 since the King profile applied in the simulated galaxy gave us
-values of clower than 3. Nevertheless, we decided to use di fferent
+values of c lower than 3. Nevertheless, we decided to use di fferent
  density profiles to make our method more general and appli cable
  to objects, such as our Illustris galaxy, for which the Sérsic
 formula does not provide a good approximation of the density
@@ -932,20 +1350,20 @@ estimates is seen in the results of modeling two populations in
 Fornax. In this case we find the anisotropy to be slightly incr easing
  rather than decreasing with radius and, most importantl y, the
 confidence regions for this parameter, as well as for the density,
- are much narrower. We were thus able to obtain tighter c onstraints
+ are much narrower. W e were thus able to obtain tighter c onstraints
  on the properties of Fornax, which means that the im proved
  method is successful. For the first time, we were also a ble
 to deduce the velocity anisotropy profiles for each of the pop ulations
- separately. We found that the more concentrated, meta l-rich
+ separately. W e found that the more concentrated, meta l-rich
 population I has a decreasing anisotropy profile while the mo re
 extended, metal-poor population II has the anisotropy incr easing
 with radius. This finding may partially explain the large spr ead
 of the anisotropy values obtained in the literature and summ arized
- in Table 2 and 3 of Kowalczyk et al. (2019), which were
+ in T able 2 and 3 of Kowalczyk et al. (2019), which were
 often based on modeling subsamples of our spectroscopic dat a
 set.
 For both studied objects we split the stars into two populations
- by dividing them in half based on their metallicity, Z(in
+ by dividing them in half based on their metallicity, Z (in
 solar units), for the Illustris galaxy and [Fe /H] for Fornax. Such
 a method is approximate but justified. Both galaxies have com plex
  star formation history with multiple star formation bu rsts, as
@@ -957,7 +1375,7 @@ objects are approximately unimodal not allowing for a conve nient
  separation. More refined methods of division have been
 suggested in the literature, for example in the form of the li kelihood
  function based on the position, velocity, and metallic ity index
- (Walker & Peñarrubia 2011). However, the likelihood fun ction
+ (W alker & Peñarrubia 2011). However, the likelihood fun ction
  requires many assumptions which introduce additional uncertainties
  into the treatment of the data. On the other hand , our
 approach ensures the maximization of each sample (and there fore
@@ -965,25 +1383,26 @@ approach ensures the maximization of each sample (and there fore
  features of the star formation history.
 Further improvements to the Schwarzschild modeling
 method are certainly possible. One way to proceed would be to
-include the modeling of the proper motions of the stars. For n ow,
+include the modeling of the proper motions of the stars. For n ow ,
 measurements of transverse velocities are available only f or the
 brightest stars in dSph galaxies, but even small samples of t his
 type could provide further constraints on the models, as dem onstrated
  by Strigari et al. (2007) and Massari et al. (2020).
-Acknowledgements. We are grateful to Andrés del Pino for providing the data for
+Acknowledgements. W e are grateful to Andrés del Pino for providing the data for
 the Fornax dSph and to the Illustris team for making their sim ulations publicly
 available. Useful comments from the anonymous referee are k indly appreciated.
 This research was supported by the Polish National Science C enter under grant
 2018/28/C/ST9/00529.
 References
-Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184
-Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13
-Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 32 7, L15
-Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University
+Amorisco, N. C., & Evans, N. W . 2012, MNRAS, 419, 184
+Battaglia, G., Helmi, A., T olstoy , E., et al. 2008, ApJ, 681, L13
+Bellazzini, M., Ferraro, F . R., & Pancino, E. 2001, MNRAS, 32 7, L15
+Binney , J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University
  Press, Princeton)
 Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35
-Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia,
-G. 2013, MNRAS, 433, 3173del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505
+Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de V en, G., & Battaglia,
+G. 2013, MNRAS, 433, 3173
+del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505
 del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996
 del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017 , MNRAS, 465,
 3708
@@ -991,38 +1410,38 @@ Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126
 Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92
 Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L 40
 Genel, S., V ogelsberger, M., Springel, V ., et al. 2014, MNRA S, 445, 175
-Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, M NRAS, 474, 1398
+Genina, A., Benitez-Llambay , A., Frenk, C. S., et al. 2018, M NRAS, 474, 1398
 Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 4 81, 250
 Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354
 Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89
-Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Wi lliams, M. J. 2013,
+Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory , N., & Wi lliams, M. J. 2013,
 ApJ, 763, 91
 King, I. 1962, AJ, 67, 471
 Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 201 3, MNRAS, 431,
 2796
-Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3 959
-Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2 918
-Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482,
+Kowalczyk, K., Łokas, E. L., & V alluri, M. 2017, MNRAS, 470, 3 959
+Kowalczyk, K., Łokas, E. L., & V alluri, M. 2018, MNRAS, 476, 2 918
+Kowalczyk, K., del Pino, A., Łokas, E. L., & V alluri, M. 2019, MNRAS, 482,
 5241
 Łokas, E. L., 2002, MNRAS, 333, 697
-Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918
+Łokas, E. L., Mamon, G. A., & Prada, F . 2005, MNRAS, 363, 918
 Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633 , A36
 Mateo, M. 1998, ARA&A, 36, 435
 Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13,
 12
-Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 49 5, 3022
-Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery , B. P. 1992, Numerical
+Pace, A. B., Kaplinghat, M., Kirby , E., et al. 2020, MNRAS, 49 5, 3022
+Press, W . H., T eukolsky , S. A., V etterling, W . T ., & Flannery , B. P . 1992, Numerical
  Recipes in C, 2nd edn. (Cambridge University Press, Cam bridge)
 Schwarzschild, M. 1979, ApJ, 232, 236
 Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observator io Astronomico, Cordoba,
  Argentina)
 Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 6 57, L1
-Tolstoy, E., Hill, V ., & Tosi, M. 2009, ARA&A, 47, 371
-Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66
-van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493,
+T olstoy , E., Hill, V ., & T osi, M. 2009, ARA&A, 47, 371
+V alluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66
+van der Marel, R. P ., Cretton, N., de Zeeuw , P . T ., & Rix, H.-W . 1998, ApJ, 493,
 613
 V ogelsberger, M., Genel, S., Springel, V ., et al. 2014a, Nat ure, 509, 177
 V ogelsberger, M., Genel, S., Springel, V ., et al. 2014b, MNR AS, 444, 1518
-Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20
-Wang, M. Y ., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118
+W alker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20
+W ang, M. Y ., de Boer, T ., Pieres, A., et al. 2019, ApJ, 881, 118
 Article number, page 12 of 
\ No newline at end of file
diff --git a/read/results/pypdf/2201.00178.txt b/read/results/pypdf/2201.00178.txt
index 7cf5988..6423552 100644
--- a/read/results/pypdf/2201.00178.txt
+++ b/read/results/pypdf/2201.00178.txt
@@ -1,10 +1,10 @@
 Draft version January 4, 2022
-Typeset using L ATEX default style in AASTeX631
+Typeset using LATEX default style in AASTeX631
 Imaging the Sun’s near-surface flows using mode-coupling analysis
 Prasad Mani
- ,1Chris S. Hanson
- ,2and Shravan Hanasoge
-1, 2
+ ,1 Chris S. Hanson
+ ,2 and Shravan Hanasoge
+ 1, 2
 1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India
 2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE
 ABSTRACT
@@ -15,9 +15,9 @@ Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on
 measurements to show that the resulting divergence and radial vorticity maps at supergranular length
 scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Correlation
  Tracking method. We find that the Pearson correlation coefficient is ≥0.9 for divergence flows,
-while≥0.8 is obtained for the radial vorticity.
+while ≥0.8 is obtained for the radial vorticity.
 Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662)
-1.INTRODUCTION
+1. INTRODUCTION
 Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect
 on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun,
 behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber
@@ -43,7 +43,8 @@ et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Man
 et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was
 validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and
 comparing with previous time-distance studies (Langfellner et al. 2018).
-prasad.subramanian@tifr.res.inarXiv:2201.00178v1  [astro-ph.SR]  1 Jan 2022
+prasad.subramanian@tifr.res.in
+arXiv:2201.00178v1  [astro-ph.SR]  1 Jan 2022
  Mani et al.
 Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combination
  of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete
@@ -58,105 +59,122 @@ In this study, we extend the spectral analysis of H21 and develop the method to
 at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is
 reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow
 and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order
-coupling (p 2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface.
+coupling (p2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface.
 We compare our results with flows obtained using the Local Correlation Tracking method on solar granules.
 1.1. Forward problem
 In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a
 complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we
-denote the horizontal unit vectors exandeyin our local Cartesian domain as pointing towards west and north on the
-solar surface, respectively, and ezpoints outwards. This approximation is valid when observing patches of the surface
+denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the
+solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface
 that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood
 of the supergranular scale ( ∼30 Mm), we expect the measured spectral cross-correlation signal to peak around the
-horizontal wavenumber qR⊙≈120 (Rincon & Rieutord 2018), where q=|q|=|(qx,qy)|is the vector horizontal
+horizontal wavenumber qR⊙ ≈120 (Rincon & Rieutord 2018), where q = |q|= |(qx,qy)|is the vector horizontal
 wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow
 perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon
 & Rieutord 2018), permitting us to model the flow vector uuu= (ux,uy,uz) in the Cartesian domain like so (Unno et al.
 1989; Woodard 2006)
-uσ=∇×[∇×(Pez)] +∇×(Tez), (1)
-whereP=Pσ(x) andT=Tσ(x) are poloidal and toroidal scalar functions, varying with position xand temporal
-frequencyσ.∇is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
+uσ = ∇×[∇×(Pez)] + ∇×(Tez), (1)
+where P = Pσ(x) and T = Tσ(x) are poloidal and toroidal scalar functions, varying with position x and temporal
+frequency σ. ∇is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying
 perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for
-example), here we only consider the frequency bin σ= 0, denoting the temporally averaged flow over the period
-of analysis. We therefore suppress σfrom all terms this point forward, remembering that temporal dynamics of
+example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period
+of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of
 perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using
 vector calculus results in
-u=−∇2Pez+∇(∂zP) +∇hT×ez, (2)
-where∇hrefers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the
+u= −∇2Pez + ∇(∂zP) + ∇hT×ez, (2)
+where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the
 Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a
-function of horizontal wavenumber qand depthzez. Hence the poloidal and toroidal flows are described by Pq(z) and
-Tq(z), respectively. Furthermore, we parametrize the flow along ezusing basis functions f(z) (Chebyshev, B-spline,
+function of horizontal wavenumber qand depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and
+Tq(z), respectively. Furthermore, we parametrize the flow along ez using basis functions f(z) (Chebyshev, B-spline,
 etc). This is expressed as
-P≡Pq(z) =∑
-jfj(z)Pqj, T≡Tq(z) =∑
-jfj(z)Tqj. (3)
-The flow coefficients PqjandTqj, represented by the discrete indices qandj, become ideal candidates for inversions,
-where the flow for each wavenumber qcan be inverted for independently; parallelization in computation can thus be
-exploited to expedite inversions. Note that Pqj=P∗
-−qjandTqj=T∗
-−qjfor the flow field to be real in the spatiotemporal
+P ≡Pq(z) =
+∑
+j
+fj(z) Pqj, T ≡Tq(z) =
+∑
+j
+fj(z) Tqj. (3)
+The flow coefficients Pqj and Tqj, represented by the discrete indices qand j, become ideal candidates for inversions,
+where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be
+exploited to expedite inversions. Note that Pqj = P∗
+−qj and Tqj = T∗
+−qj for the flow field to be real in the spatiotemporal
  domain.
-To infer flows from wavefields φscattered by a perturbation of length scale q, cross-correlate them in the manner
+To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner
 Imaging near-surface flows using mode-coupling analysis 3
 φω∗
-kφω
-k+q, wherekis the oscillation mode wavenumber ( kx,ky) andωis the temporal frequency. Relate φω∗
-kφω
-k+qthus
-to the flow coefficients PqjandTqj(see eq A7)
+k φω
+k+q, where kis the oscillation mode wavenumber (kx,ky) and ω is the temporal frequency. Relate φω∗
+k φω
+k+q thus
+to the flow coefficients Pqj and Tqj (see eq A7)
 ⟨φω∗
-kφω
-k+q⟩=Hω
-kk′nn′∑
-jCqj,kPqj+Dqj,kTqj. (4)
-The weight factor Hω(see eq A8) is a function of frequency, capturing information about the extent of coupling between
-the two modes [ n,k] and [n′,k′], wherenandn′are the radial orders of the modes, and k=|k|andk′=|k′|=|k+q|.
+k φω
+k+q⟩= Hω
+kk′nn′
+∑
+j
+Cqj,kPqj + Dqj,kTqj. (4)
+The weight factorHω (see eq A8) is a function of frequency, capturing information about the extent of coupling between
+the two modes [n,k] and [n′,k′], where nand n′are the radial orders of the modes, and k= |k|and k′= |k′|= |k+q|.
 The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the
-Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,kandDqj,kare poloidal
+Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,k and Dqj,k are poloidal
 and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements
-and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k=C−qj,−kand
-Dqj,k=D−qj,−k(see eq A6). The kernels, as flows, are expressed on the basis fj(z).
+and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and
+Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj(z).
 1.2. Least-squares of cross-correlation
 Even though φω∗
-kφω
-k+qisolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement,
- known in mode-coupling literature as ’ B-coefficients’, is much better designed for inversion as it reduces the
+k φω
+k+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement,
+ known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the
 dimension of the problem. A least-squares fit to the cross-correlation φω∗
-kφω
-k+q(see Woodard 2006, 2014, 2016) results
-in theB-coefficients Bk,q, according to
-Bk,q=∑
-ωHω∗
-kk′nn′φω∗
-kφω
+k φω
+k+q (see Woodard 2006, 2014, 2016) results
+in the B-coefficients Bk,q, according to
+Bk,q =
+∑
+ω
+Hω∗
+kk′nn′ φω∗
+k φω
 k+q
 ∑
-ω|Hω
-kk′nn′|2. (5)
+ω
+|Hω
+kk′nn′ |2 . (5)
 Multiplying eq 4 on both sides by Hω∗
-kk′nn′and substituting by eq 5 on the left-hand-side results in a concisely defined
+kk′nn′ and substituting by eq 5 on the left-hand-side results in a concisely defined
 forward problem (compare with eq 4)
-Bk,q=∑
-jCqj,kPqj+Dqj,kTqj. (6)
+Bk,q =
+∑
+j
+Cqj,kPqj + Dqj,kTqj. (6)
 In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω.
-Here, we sum over both ±ωwithin a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk,
-|ω|∈(
-ωnk−ϵΓnk/2,ωnk+ϵΓnk/2)
+Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk,
+|ω|∈
+(
+ωnk −ϵΓnk/2,ωnk + ϵΓnk/2
+)
 or
-|ω|∈(
-ωn′k′−ϵΓn′k′/2,ωn′k′+ϵΓn′k′/2)
+|ω|∈
+(
+ωn′k′ −ϵΓn′k′ /2,ωn′k′ + ϵΓn′k′ /2
+)
 . (7)
-Summing over±ωguarantees that the parity Bk,q=B∗
-−k,−q(see Appendix A for derivation) is obeyed, thereby
+Summing over ±ω guarantees that the parity Bk,q = B∗
+−k,−q (see Appendix A for derivation) is obeyed, thereby
 ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain.
-Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −qand
+Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and
 −k,
 B∗
-−k,−q=∑
-jC−qj,−kP∗
-−qj+D−qj,−kT∗
+−k,−q =
+∑
+j
+C−qj,−kP∗
+−qj + D−qj,−kT∗
 −qj. (8)
-Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,qis constructed by a
-least-squares fitting, it is noteworthy that summing over −ωwill also lead to improvement in its signal-to-noise as a
+Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a
+least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a
 by-product.
 1.3. Noise model
 In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from
@@ -167,25 +185,25 @@ Every independent realization of a mode can be understood as the output of a dam
 random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes
 and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters
  Mani et al.
-Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p 1(orange) and p 2(green). The shaded
+Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p 1 (orange) and p 2 (green). The shaded
 regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of
-kR⊙andω/2πto which we have restricted ourselves in this analysis. Beyond kR⊙of 2000, it is seen that the theoretical fitting
+kR⊙ and ω/2π to which we have restricted ourselves in this analysis. Beyond kR⊙ of 2000, it is seen that the theoretical fitting
 of mode frequencies start deviating from the observed dispersion relation for the f-mode.
-such as its amplitude, frequency and linewidth, and consequently in Bk,qin our case. We use the same noise model
+such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model
 as in H21, which was motivated by the above discussion,
-Gk,q≡⟨|Bk,q|2⟩, (9)
-where, unlike H21, we again sum over ±ω.Gk,qis real, with the symmetry relation Gk,q=G−k,−q(see Appendix A
+Gk,q ≡⟨|Bk,q|2⟩, (9)
+where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A
 for explanation).
-2.DATA ANALYSIS
+2. DATA ANALYSIS
 In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the
 Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image
 is Postel projected, with a spatial resolution of approximately 0 .48Mm, sperated in time by 45 seconds, and is tracked
-at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194 .4×194.4 Mm2in size, tracked for 24 hours
+at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194 .4 ×194.4 Mm2 in size, tracked for 24 hours
 and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number
-2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x,y;t). The Fourier-space
-wavefieldφω
-k(and subsequently, the cross-correlation φω∗
-kφω
+2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x,y; t). The Fourier-space
+wavefield φω
+k (and subsequently, the cross-correlation φω∗
+k φω
 k+q) is obtained by computing the 3D spatial and temporal
 Fourier transform of the Dopplercube.
 The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in
@@ -193,21 +211,21 @@ Eq 6, while short enough that supergranules do not substantially evolve (lifetim
 & Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination
 from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015).
 Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral
-profiles of the two modes [ n,k] and [n′,k′] closely align in ωspace. This implies that their mode frequencies should be
-sufficiently close ( |ωnk−ωn′k′|≤δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
-±ωis significant only over a few linewidths ( ϵ, the summation parameter; see eq 7). We have empirically found and
-tabulatedδin Table 1 for the radial order couplings n-n′∈f-f, p 1-p1, and p 2-p2(the signal strength depends only
-weakly onϵ; we set it to 3 line widths).
-Figure 1 shows that for any two adjacent ridges (adjacent nandn′), mode frequencies ωnkandωn′kbecome spaced
+profiles of the two modes [n,k] and [n′,k′] closely align in ω space. This implies that their mode frequencies should be
+sufficiently close (|ωnk −ωn′k′ |≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over
+±ω is significant only over a few linewidths ( ϵ, the summation parameter; see eq 7). We have empirically found and
+tabulated δ in Table 1 for the radial order couplings n-n′ ∈f-f, p1-p1, and p 2-p2 (the signal strength depends only
+weakly on ϵ; we set it to 3 line widths).
+Figure 1 shows that for any two adjacent ridges (adjacent nand n′), mode frequencies ωnk and ωn′k become spaced
 farther apart with increasing wavenumber kR⊙. It is also known that mode linewidth Γ grows with radial orders for
-a givenkR⊙. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
-observation set the total number of modes within a range of kR⊙(andω/2π) that can be clearly observed, thereby
+a given kR⊙. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
+observation set the total number of modes within a range of kR⊙ (and ω/2π) that can be clearly observed, thereby
 affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually
 inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR⊙at fixed
-radial order are different. In wavenumber, we restrict our analysis to within 200 ≤kR⊙≤2000 andqR⊙≤300. Our
+radial order are different. In wavenumber, we restrict our analysis to within 200 ≤kR⊙≤2000 and qR⊙≤300. Our
 frequency range is confined to span the range over which acoustic modes are observed (2 ≤ω/2π≤5 in mHz).
 Imaging near-surface flows using mode-coupling analysis 5
-Coupling kR⊙range # of δ
+Coupling kR⊙ range # of δ
 modes
 f-f [400,1000] 5240 4
 [1000,1500] 7784 1.1
@@ -217,103 +235,110 @@ p1-p1 [400,1000] 5240 4.5
 p2-p2 [200,1000] 5886 3
 [1000,1300] 4280 3
 Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different
-ranges ofkR⊙.
-3.INVERSION
+ranges of kR⊙.
+3. INVERSION
 The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements
-Bk,qfrom the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
+Bk,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
 leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods
 complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas
-SOLA gives better localization. For total number of modes M, RLS scales as MxJwhereJis the number of basis
-functionsfj(z) (J≪M; see eq 3 and section 3.1), whereas SOLA scales as M2(see Appendix B). For M > 5000,
+SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis
+functions fj(z) (J ≪M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M >5000,
 computation starts to quickly become expensive for SOLA.
 Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While
 f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is
-present even in p 1-p1, and p 2-p2(see Figure 3), and possibly other higher order self- and cross-couplings. Since we are
+present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are
 interested in only surface flows, we leave higher order coupling to future work.
-It bears mentioning that the slopes of the ridges in the kR⊙-νspectrum (Figure 1) increase with radial order. This
+It bears mentioning that the slopes of the ridges in the kR⊙-ν spectrum (Figure 1) increase with radial order. This
 limits us to low-to-intermediate kR⊙(<1000) for these higher radial orders if we are to remain under the acoustic cutoff
  frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals
-from lowkR⊙- too large an observation region could possibly render invalid the Cartesian geometry approximation.
+from low kR⊙- too large an observation region could possibly render invalid the Cartesian geometry approximation.
 Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions
 separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich
 helioseismic technique.
 3.1. RLS
-For givenq, the forward problem may be stated as
-KU=B, (10)
-with the aim to minimize the misfit∑
-k||KU−B||2, with||||2denoting the L2norm. Here, Kis the matrix formed
-by the sensitivity kernels: {Cqj,k,Dqj,k}.Uis a vector composed of the flow coefficients: {Pqj,Tqj}andBis a vector
-composed of computed B-coefficients:{Bk,q}. The least-squares problem is solved simultaneously for poloidal and
+For given q, the forward problem may be stated as
+KU = B, (10)
+with the aim to minimize the misfit ∑
+k
+||KU −B||2, with ||||2 denoting the L2 norm. Here, K is the matrix formed
+by the sensitivity kernels: {Cqj,k,Dqj,k}. U is a vector composed of the flow coefficients: {Pqj,Tqj}and B is a vector
+composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and
 toroidal flow. We use B-spline basis functions as our fj(z), comprising 11 knots spaced uniformly in acoustic radius,
-for both poloidal and toroidal coefficients. Hence, for Mmodes (total number of kfor a givenqisM) and 11 basis
-functions for each poloidal and toroidal, the dimensions of K,UandBare thusM×22, 22×1, andM×1 respectively.
-Normalizing both sides of eq 10 by the noise covariance Λ(a diagonal matrix with the entries Gk,q; see eq 9; dimension
-M×M) and pre-multiplying by K⊺,
-(K⊺Λ−1K)U=(K⊺Λ−1)B, (11)
-U=(K⊺Λ−1K)−1K⊺Λ−1B. (12)
+for both poloidal and toroidal coefficients. Hence, for M modes (total number of kfor a given q is M) and 11 basis
+functions for each poloidal and toroidal, the dimensions of K, U and B are thus M×22, 22×1, and M×1 respectively.
+Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension
+M ×M) and pre-multiplying by K⊺,
+(K⊺Λ−1K)U =(K⊺Λ−1)B, (11)
+U =(K⊺Λ−1K)−1K⊺Λ−1B. (12)
  Mani et al.
-Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙= [−112,−45],
-at the depth zo=−0.41 Mm. Right : L-curve for the mode qR⊙= [−112,−45]; the knee ( λ= 2.48) is marked by a blue
+Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙ = [−112,−45],
+at the depth zo = −0.41 Mm. Right: L-curve for the mode qR⊙ = [ −112,−45]; the knee ( λ = 2 .48) is marked by a blue
 diamond.
-Since the least-squares problem is typically ill-posed, we restate the minimization as∑
-k||KU−B||2+λ||U||2with
-the regularization parameter λwhich this results in a trade-off between misfit reduction (first term) and solution
-norm minimization (second term). Under-regularizing can lead to a solution Uthat is dominated by errors in the
+Since the least-squares problem is typically ill-posed, we restate the minimization as ∑
+k
+||KU −B||2 + λ||U||2 with
+the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution
+norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the
 data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this
 regularization makes the problem better conditioned and is now defined as
-U= (K⊺Λ−1K+λI)−1K⊺Λ−1B, (13)
-where Iis the identity matrix for L1regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
-by plotting||U||2vs||KU−B||2for different values of λ(see right panel of Figure 2), is usually chosen as the
+U = (K⊺Λ−1K + λI)−1K⊺Λ−1B, (13)
+where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed
+by plotting ||U||2 vs ||KU −B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the
 regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal
-flowPqare shown in Figure 3.
-4.LCT
+flow Pq are shown in Figure 3.
+4. LCT
 To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained
 from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by
-examining the advection of convective granules (1.2 Mm, qR⊙≈3500; Hathaway et al. 2015) by underlying largerscale
+examining the advection of convective granules (1.2 Mm, qR⊙ ≈3500; Hathaway et al. 2015) by underlying largerscale
  flow systems. Since granules are used as tracers, which are much smaller in size than supergranules ( ≈35 Mm),
 LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation.
 Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2
 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are obtained
  and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between
 consecutive intensity images, which we denote as I1,I2. The LCT method selects a patch in two images each
-(I1=I1e(x−xij)2/2sigma2,I2=I2e(x−xij)2/2sigma2) that observe the same granule at the grid point xij= (xi,yj).
+(I1 = I1e(x−xij)2/2 sigma2
+,I2 = I2e(x−xij)2/2 sigma2
+) that observe the same granule at the grid point xij = ( xi,yj).
 A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance
 moved by granules are usually in sub-pixel regime. The convention for the direction of xis the same as described in
-section 1.1. The two patches I1,I2are then cross correlated for different values of position shifts ∆ x,
-Cij(∆x,∆y) =∫
+section 1.1. The two patches I1,I2 are then cross correlated for different values of position shifts ∆ x,
+Cij(∆x,∆y) =
+∫
 dxI∗
-1(−x)I2(∆x−x). (14)
-The shift ∆x= (∆x,∆y) that maximizes the cross-correlation Cijis taken to be the proper motion of the granule.
+1 (−x)I2(∆x−x). (14)
+The shift ∆ x= (∆x,∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule.
 Provided that the time difference ∆ t, here 45 seconds, between the images is less than the lifetime of granules ( <10
-min), the velocities are given by vx= ∆x/∆tandvy= ∆y/∆t. This exercise is repeated for all grid points in the
-imagesI1,I2and for each consecutive pair of images in the cube.
-In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vxandvy. FLCT
-requires the input sigma , which we set to 4 pix, that captures the extent of localization desired, and depends on the
+min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the
+images I1,I2 and for each consecutive pair of images in the cube.
+In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT
+requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the
 Imaging near-surface flows using mode-coupling analysis 7
-Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p 1-p1, and p 2-p2as a function of qxR⊙and
-qyR⊙.Bottom : Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σerror around the
-mean. Total power appears to increase through the radial orders. Power is in units of m2/s4.
+Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p 1-p1, and p2-p2 as a function of qxR⊙ and
+qyR⊙. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the
+mean. Total power appears to increase through the radial orders. Power is in units of m 2/s4.
 dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the
-FLCT code. vxandvyare then computed for consecutive pairs of images and are averaged over the entire day.
-5.MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY
+FLCT code. vx and vy are then computed for consecutive pairs of images and are averaged over the entire day.
+5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY
 For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by
-substituting PandTfrom eq 3 into eq 2 as below uuu(q,z)
- =−∇2Pez+∇(∂zP) +∇hT×ez,
-=−(0,0, ∂2
-xP+∂2
-yP+∂2
+substituting P and T from eq 3 into eq 2 as below uuu(q,z)
+ = −∇2Pez + ∇(∂zP) + ∇hT×ez,
+= −(0, 0, ∂2
+xP + ∂2
+yP + ∂2
 zP) + (∂x∂zP, ∂y∂zP, ∂2
-zP) + (∂yT,−∂xT,0). (15)
-Setting∂2
-x+∂2
-y=q2,divis given by,
-∇h·uuu(q,z) =q2∂zP, (16)
-andcurl is given by,
+zP) + (∂yT, −∂xT, 0). (15)
+Setting ∂2
+x + ∂2
+y = q2, div is given by,
+∇h ·uuu(q,z) = q2∂zP, (16)
+and curl is given by,
 [
-∇×uuu(q,z)]
-z=q2T. (17)
+∇×uuu(q,z)
+]
+z
+= q2T. (17)
 We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The
-essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR⊙of
+essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR⊙ of
 interest (see Figure 4), and subsequently convert it to real space.
 We seek to show comparisons (see Figures 5, 6, and 7) for qR⊙= 100, 150, 200 and 250. To sufficiently delineate
 flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The
@@ -321,79 +346,99 @@ Gaussian is centered at the desired wavenumber with a half-width of 25. We then
 obtain a real-space steady-flow map.
  Mani et al.
 Figure 4. Left: Divergence-flow power spectrum |div|2, from eqn 16, obtained from inversion using all the couplings. The
-power-spectrum is then filtered with a bandpass centered around qR⊙= 150 (middle panel). The resulting spectra is shown in
-the right panel. The units of |div|2are in s−2. For illustration, we show the action of the filter on the power-spectrum |div|2
-since it is a real quantity, but recall that it is the Fourier-space flow div(a complex quantity) on which we apply the filter.
-For LCT, we first apply a Gaussian smoothing to vxandvyto average over small-scale features; the extent of
-smoothing depends on the length scale qR⊙to be compared with mode-coupling. divandcurl are then simply
+power-spectrum is then filtered with a bandpass centered around qR⊙ = 150 (middle panel). The resulting spectra is shown in
+the right panel. The units of |div|2 are in s −2. For illustration, we show the action of the filter on the power-spectrum |div|2
+since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter.
+For LCT, we first apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of
+smoothing depends on the length scale qR⊙ to be compared with mode-coupling. div and curl are then simply
 computed by
-div=∂xvx+∂yvy, (18)
-curl=∂xvy−∂yvx. (19)
+div= ∂xvx + ∂yvy, (18)
+curl= ∂xvy −∂yvx. (19)
 We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling,
 and transform back to real space.
 Condensing all of the above, the following sequence of operations to compare flows at desired length scales are
 performed for mode-coupling (M-C) and for LCT M-C
- :φ(x,y;t)3D FFT= = = = =⇒φω
-k,Bk,qinversion= = = = = =⇒P,T∇h·= = =⇒
-∇×eqns 16,17Filter,= = = = =⇒
-2D FFTdiv,curl
-LCT :I1,I2FLCT= = = =⇒vx,vysmooth,= = = = = =⇒
-∇h·∇×eqns 18,192D FFT,= = = = = =⇒
-FilterFiltered,
+ : φ(x,y; t)
+3D FFT
+= = = = =⇒φω
+k,Bk,q
+inversion
+= = = = = =⇒P,T
+∇h·
+= = =⇒
+∇×
+eqns 16, 17
+Filter,
+= = = = =⇒
+2D FFT
+div,curl
+LCT : I1,I2
+FLCT
+= = = =⇒vx,vy
+smooth,
+= = = = = =⇒
+∇h· ∇×
+eqns 18, 19
+2D FFT,
+= = = = = =⇒
+Filter
+Filtered,
 Fourier-space
-flows2D FFT= = = = =⇒div,curl
-6.RESULTS
+flows
+2D FFT
+= = = = =⇒div,curl
+6. RESULTS
 Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5,
 where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from
-the two methods near supergranular scale ( qR⊙≈100). Near-surface flows are imaged most faithfully when all the
+the two methods near supergranular scale ( qR⊙ ≈100). Near-surface flows are imaged most faithfully when all the
 couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of
 vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between
 the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence
 flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to
-insufficient modes for the p 2-p2case (see Table 1), we are unable to infer vortical flows with conviction other than near
+insufficient modes for the p2-p2 case (see Table 1), we are unable to infer vortical flows with conviction other than near
 the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished
-through mode-coupling helioseismology - using f-f or p 1-p1alone to seismically infer near-surface divergence and vortical
+through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical
 flows at different scales ( qR⊙= 100,150) can yield extremely good agreement with LCT. As the length scale of the
 inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases.
 An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to
 comment substantively on the flows at these scales.
 6.1. Amplitudes of mode-coupling flows
 Imaging near-surface flows using mode-coupling analysis 9
-(a)qR⊙= 100 ,f-f + p 1-p1+ p 2-p2
-Figure 5. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
+(a) qR⊙ = 100, f-f + p1-p1 + p2-p2
+Figure 5.Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
 for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around
-qR⊙= 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges
+qR⊙ = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges
 out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter
 plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum
 values.
 For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated
 numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward
-a precise statement on them. H21 reported a 60% greater amplitude for p 1-p1over f-f coupling (Figure 3 reflects a
+a precise statement on them. H21 reported a 60% greater amplitude for p 1-p1 over f-f coupling (Figure 3 reflects a
 similar conclusion), another element to consider when combining different radial orders. The choice of regularization
 (see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow
 amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages.
 This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a
 radial grid along which kernels and flows tend to be described.
 Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors:
-•Coupling(s) used,
-•Regularization parameter in the inversion,
-•Smoothing applied to LCT flows (indirectly; see below paragraph),
-•The depth at which flows are inferred.
+• Coupling(s) used,
+• Regularization parameter in the inversion,
+• Smoothing applied to LCT flows (indirectly; see below paragraph),
+• The depth at which flows are inferred.
 Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close
 to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR⊙, we first fix the coupling(s)
 and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and
  Mani et al.
-(a)qR⊙= 100 ,f-f
- (b)qR⊙= 150 ,p1-p1
-Figure 6. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
+(a) qR⊙ = 100, f-f
+ (b) qR⊙ = 150, p1-p1
+Figure 6.Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
 for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around
-qR⊙= 100, and using (b) p 1-p1coupling (bottom row), bandpass filtered around qR⊙= 150. We cut edges out from the flow
+qR⊙ = 100, and using (b) p 1-p1 coupling (bottom row), bandpass filtered around qR⊙ = 150. We cut edges out from the flow
 maps and compare a circular region of diameter ≈175 Mm.
-(a)qR⊙= 200 ,f-f + p 1-p1+ p 2-p2
- (b)qR⊙= 250 ,f-f + p 1-p1+ p 2-p2
-Figure 7. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
+(a) qR⊙ = 200, f-f + p1-p1 + p2-p2
+ (b) qR⊙ = 250, f-f + p1-p1 + p2-p2
+Figure 7.Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1)
 for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around
-(a)qR⊙= 200, and (b) qR⊙= 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm.
+(a) qR⊙ = 200, and (b) qR⊙ = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm.
 vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained
 from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation
 (corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired
@@ -402,12 +447,12 @@ It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line
 and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes
 for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history
 (see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨ oning et al. 2020; Korda
-&ˇSvanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al.
+& ˇSvanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al.
 Imaging near-surface flows using mode-coupling analysis 11
-Coupling qR⊙div curl
+Coupling qR⊙ div curl
 f-f 100 0.97 0.87
-+ p 1-p1 150 0.95 0.76
-+ p 2-p2 200 0.92 0.76
++ p1-p1 150 0.95 0.76
++ p2-p2 200 0.92 0.76
 250 0.85 0.65
 f-f 100 0.96 0.85
 150 0.93 0.76
@@ -421,7 +466,7 @@ p2-p2 100 0.94 0.7
 150 0.91 0.39
 200 0.79 0.3
 250 0.55 0.3
-Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images,
+Table 2.Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images,
 respectively.
 (2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes
 from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see
@@ -435,206 +480,261 @@ signal-to-noise through larger observation sizes, we suggest that Cartesian mode
 applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013;
 Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves.
 APPENDIX
-A.DERIVATION OF THE FORWARD MODEL
-As described in section 1.1, we seek to describe the flow uas a function of qalongez. To that end, substituting
+A. DERIVATION OF THE FORWARD MODEL
+As described in section 1.1, we seek to describe the flow uas a function of q along ez. To that end, substituting
 eq 3 into eq 2,
 uσ
-q(z) =∑
-j{
-q2fjez+iqf′
-j}
+q(z) =
+∑
+j
+{
+q2 fjez + iqf′
+j
+}
 Pσ
-jq+iq×ezfjTσ
+jq + iq×ezfjTσ
 jq. (A1)
-For flows in the anelastic limit ( u≪speed of sound), we can denote the flow perturbation operator as δLσ=
-−2iωρuσ·∇(see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
+For flows in the anelastic limit ( u ≪speed of sound), we can denote the flow perturbation operator as δLσ =
+−2iωρuσ ·∇(see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get,
 δLσ
-q=−2iωρ(iuσ
-q·k+uσ
-q·ez∂z), (A2)
-=−2iωρ∑
-j{
+q = −2iωρ (iuσ
+q ·k+ uσ
+q ·ez∂z), (A2)
+= −2iωρ∑
+j
+{
 −k·qf′
 jPσ
-jq−k·(q×ez)fjTσ
-jq+q2fjPσ
-jq∂z}
+jq −k·(q×ez) fjTσ
+jq + q2 fjPσ
+jq ∂z
+}
 . (A3)
  Mani et al.
 Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006)
-ξk≡ξnk(z) =iˆkHnk(z)ez+ ˆzVnk(z), (A4)
-whereHandVare real-valued functions; nandn′are dropped for compactness of notation. Then the coupling of
-two modesξkandξk′(k′=k+q), by the flow perturbation operator δLσ
-q, denoted by coupling integral Λk
-k′(σ), is
+ξk ≡ξnk(z) = iˆkHnk(z)ez + ˆzVnk(z), (A4)
+where H and V are real-valued functions; n and n′ are dropped for compactness of notation. Then the coupling of
+two modes ξk and ξk′ (k′= k+ q), by the flow perturbation operator δLσ
+q, denoted by coupling integral Λ k
+k′ (σ), is
 given by
 Λk
-k′(σ)≡∫
+k′ (σ) ≡
+∫
 dx(δLσ
-qξk)·ξ∗
-k′=∫
-dx[
-−2iωρ∑
-j{
-q2fjPσ
-jq(ˆk·ˆk′H′
+qξk) ·ξ∗
+k′ =
+∫
+dx
+[
+−2iωρ
+∑
+j
+{
+q2 fjPσ
+jq (ˆk·ˆk
+′
+H′
 kH∗
-k′+V′
+k′ + V′
 kV∗
-k′)
-−[
+k′ )
+−
+[
 k·qf′
 jPσ
-jq+k·(q×ez)fjTσ
-jq]
-(ˆk·ˆk′HkH∗
-k′+VkV∗
-k′)}]
+jq + k·(q×ez) fjTσ
+jq
+]
+(ˆk·ˆk
+′
+HkH∗
+k′ + VkV∗
+k′ )
+}]
 (A5)
-We desire to linearly relate the coupling integral in the above equation to the flows PandT, through poloidal and
-toroidal sensitivity kernels, Cqj,kandDqj,krespectively. Hence, they are given by
-Cqj,k=∫
-dzρ[
-q2fj(ˆk·ˆk′H′
+We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and
+toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by
+Cqj,k =
+∫
+dzρ
+[
+q2 fj(ˆk·ˆk
+′
+H′
 kH∗
-k′+V′
+k′ + V′
 kV∗
-k′)
+k′ )
 −k·qf′
-j(ˆk·ˆk′HkH∗
-k′+VkV∗
-k′)]
+j(ˆk·ˆk
+′
+HkH∗
+k′ + VkV∗
+k′ )
+]
 ,
-Dqj,k=k·(q×ez)∫
-dzρfj(ˆk·ˆk′HkH∗
-k′+VkV∗
-k′). (A6)
-Note the symmetry Cqj,k=C−qj,−kandDqj,k=D−qj,−k. This coupling integral contributes to the cross-spectral
-measurement between modes kandk+qFrom eq 8 of Woodard (2014), we write the first-order effect of flow on
+Dqj,k = k·(q×ez)
+∫
+dzρf j(ˆk·ˆk
+′
+HkH∗
+k′ + VkV∗
+k′ ). (A6)
+Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral
+measurement between modes k and k+ q From eq 8 of Woodard (2014), we write the first-order effect of flow on
 wavefield cross-correlation as
 ⟨φω∗
-kφω+σ
-k+q⟩=Hω
+k φω+σ
+k+q ⟩= Hω
 kk′σΛk
-k′(σ), (A7)
+k′ (σ), (A7)
 where the function His given by
 Hω
-kk′σ=−2iω(Nk|Rω
-k|2Rω+σ
-k′+Nk′|Rω+σ
-k′|2Rω∗
-k). (A8)
-We absorb the factor −2iωinto the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4.
-The mode spectral profile Ris a Lorentzian, given by
+kk′σ = −2iω(Nk|Rω
+k|2 Rω+σ
+k′ + Nk′ |Rω+σ
+k′ |2 Rω∗
+k ). (A8)
+We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4.
+The mode spectral profile R is a Lorentzian, given by
 Rω
-k=1
+k = 1
 ω2
-nk−ω2−iωγnk/2, (A9)
-whereωnkis the resonant frequency of the mode, and γnkis the mode linewidth. Eq A9 can be derived by introducing
-mode damping−iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq
+nk −ω2 −iωγnk/2, (A9)
+where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing
+mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq
 5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation.
-Also, the parityHω
-kk′σ=H−ω∗
-kk′−σandRω
-k=R−ω∗
-kare established. Mode normalization Nis given by
-Nk=1
-QQ∑
-k∑
-ω|φω
+Also, the parity Hω
+kk′σ = H−ω∗
+kk′−σ and Rω
+k = R−ω∗
+k are established. Mode normalization N is given by
+Nk = 1
+Q
+Q∑
+k
+∑
+ω
+|φω
 k|2
 ∑
-ωRω
-k, (A10)
-where the1
-QQ∑
-kon the right-hand-side implies average over all [ kx,ky] (Q terms in all) such that k=|k|is constant.
-This forces Nto be isotropic, i.e., to only depend on k, and notk. The sum over ωis within five linewidths of ωnk.
+ω
+Rω
+k
+, (A10)
+where the 1
+Q
+Q∑
+k
+on the right-hand-side implies average over all [ kx,ky] (Q terms in all) such that k = |k|is constant.
+This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ωnk.
 Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real.
 The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve
 to establish the parity Bσ
-k,q=B∗−σ
+k,q = B∗−σ
 −k,−q. This allows for obtaining Pσ
-q=P∗−σ
-−q, and subsequently, purely real flow in
-the real domain. Setting σ= 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into
-the noise model obtained in H21 and summing over ±ωestablishes the symmetry Gσ
-k,q=G−σ
+q = P∗−σ
+−q , and subsequently, purely real flow in
+the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into
+the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ
+k,q = G−σ
 −k,−q.
 Imaging near-surface flows using mode-coupling analysis 13
-B.SOLA INVERSIONS
+B. SOLA INVERSIONS
 Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors
-for the mode qand depthzo, which we will call αk,zo. A linear weighted sum of the measurements Bk,qin the fashion∑
-kαk,zoBk,qallows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients
-αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode qcan be summed up coherently to give an
-’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zoand a width ∆ is chosen
+for the mode qand depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion∑
+k
+αk,zoBk,q allows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients
+αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an
+’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen
 which the averaging kernel should resemble after performing inversion.
 B.1. Kernels in the integral form
 Since the kernels in eq A6 are manifest as coefficients on a basis fj(z), we first derive kernels that can be expressed
-as a function of depth z(see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions:
-P≡Pq(z),p≡Pqj,F≡fj(z),B≡Bk,qC≡Cqj,kandK≡Kk,q(z), we write (assume only poloidal flow for
+as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions:
+P ≡Pq(z), p ≡Pqj, F ≡fj(z), B ≡Bk,q C ≡Cqj,k and K ≡Kk,q(z), we write (assume only poloidal flow for
 simplicity, the same derivations hold true for toroidal flow as well)
-P=Fp (B11)
-The size of Pis thus the same as the length of the radial grid z.
-Now, pre-multiply by FTand integrate over zon both sides (drop the integral notation for compactness),
-FTP= (FTF)p
-p= (FTF)−1FTP (B12)
+P = Fp (B11)
+The size of P is thus the same as the length of the radial grid z.
+Now, pre-multiply by FT and integrate over z on both sides (drop the integral notation for compactness),
+FTP = (FTF)p
+p= (FTF)−1 FTP (B12)
 Now, substituting eq B12 into the forward problem eq 6,
-B=Cp
+B = Cp
 = (FTF)−1FTCP
-=KP (B13)
+= KP (B13)
 where
-K= (FTF)−1FTC,
-i.e.,Kk,q(z) =∑
-j,j′[∫
-dzfj(z)fj′(z)]−1
-fj′(z)Cqj′,k (B14)
+K = (FTF)−1FTC,
+i.e., Kk,q(z) =
+∑
+j,j′
+[∫
+dzfj(z)fj′ (z)
+]−1
+fj′ (z)Cqj′,k (B14)
 B.2. Obtaining the coefficients α
 Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at zo
-T(z,zo) =1√
-2π∆2exp(z−zo
-2∆2)
+T(z,zo) = 1√
+2π∆2 exp
+(z−zo
+2∆2
+)
 . (B15)
 This can be achieved by solving the optimization problem
-minimizeX=∫
-dz[
-T(z,zo)−Θq(z,zo)]2
+minimize X=
+∫
+dz
+[
+T(z,zo) −Θq(z,zo)
+]2
 , (B16)
 where we introduce the averaging kernel for mode qthus
-Θq(z,zo) =∑
-kαk,zoKk,q(z). (B17)
+Θq(z,zo) =
+∑
+k
+αk,zoKk,q(z). (B17)
 As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13
 and B14.
  Mani et al.
-Figure 8. Left: KernelKk,q(z) (eq B14) shown vs depth zfor the three radial order couplings f-f, p 1-p1, and p 2-p2.qR⊙=
-[−112,−45] and kR⊙= [−853,−157] is chosen for all the radial order couplings for comparison. Right : Averaging kernel
-(eq B17) using SOLA, for qR⊙= [−112,−45] at depth z0=−0.48 Mm, and the corresponding target Gaussian (eq B15).
-Integral of the averaging kernel over zis 0.89.
-Setting∂X
-∂α→0 gives us the matrix problem to be solved
-A{α}=v,
-{α}=[
-A+µI]−1
+Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p 1-p1, and p 2-p2. qR⊙ =
+[−112,−45] and kR⊙ = [ −853,−157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel
+(eq B17) using SOLA, for qR⊙ = [ −112,−45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15).
+Integral of the averaging kernel over z is 0.89.
+Setting ∂X
+∂α →0 gives us the matrix problem to be solved
+A{α}= v,
+{α}=
+[
+A+ µI
+]−1
 v, (B18)
-where the square matrix A=∫
-dzKk,q(z)Kk′,q(z) andv=∫
-dzKk,q(z)T(z,zo). Here,k′is just a dummy index for
-denoting elements in the matrix A, (k′̸=k+q). In the last line of eq B18, we introduce regularization using an Identity
-matrixI, with the regularization parameter µ- purpose being the same as that described in section 3.1. Obtaining
-αthus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α
-obtained from eq B18 into last line of eq B13, and∑
-kon both sides
+where the square matrix A=
+∫
+dzKk,q(z)Kk′,q(z) and v =
+∫
+dzKk,q(z)T(z,zo). Here, k′is just a dummy index for
+denoting elements in the matrixA, (k′̸= k+q). In the last line of eq B18, we introduce regularization using an Identity
+matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining
+α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α
+obtained from eq B18 into last line of eq B13, and ∑
+k
+on both sides
+∑
+k
+αk,zo Bσ
+k,q =
 ∑
-kαk,zoBσ
-k,q=∑
-kαk,zo∫
+k
+αk,zo
+∫
 dzKk,q(z)Pσ
-q(z),
-=∫
+q (z),
+=
+∫
 dzΘq(z,zo)Pσ
-q(z),
+q (z),
 ≈⟨Pσ
-q(zo)⟩ (B19)
+q (zo)⟩ (B19)
 Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Divergence
  flow can then be obtained from eq 16. Results are shown in Figures 9 and 10.
 REFERENCES
@@ -647,7 +747,8 @@ Birch, A. C., Schunker, H., Braun, D. C., et al. 2016,
 Science Advances, 2, e1600557,
 doi: 10.1126/sciadv.1600557
 Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019,
-A&A, 628, A37, doi: 10.1051/0004-6361/201935591B¨ oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
+A&A, 628, A37, doi: 10.1051/0004-6361/201935591
+B¨ oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
 Schou, J. 2020, A&A, 635, A181,
 doi: 10.1051/0004-6361/201937331
 Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189,
@@ -657,10 +758,10 @@ Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073
 —. 2021, Living Reviews in Solar Physics, 18, 2,
 doi: 10.1007/s41116-020-00028-3
 Imaging near-surface flows using mode-coupling analysis 15
-Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR⊙andqyR⊙.Right : Corresponding power-spectrum
-averaged over the azimuthal angle. Shaded region shows ±1−σerror around the mean. Power is in units of m2/s4.
-Figure 10. Real-space divergence flows (in units of 10−5s−1) for mode-coupling inversion through SOLA using f-f coupling,
-and LCT, bandpass filtered around qR⊙= 100. We cut edges out from the flow maps and compare a circular region of diameter
+Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR⊙ and qyR⊙. Right: Corresponding power-spectrum
+averaged over the azimuthal angle. Shaded region shows ±1 −σ error around the mean. Power is in units of m 2/s4.
+Figure 10. Real-space divergence flows (in units of 10 −5s−1) for mode-coupling inversion through SOLA using f-f coupling,
+and LCT, bandpass filtered around qR⊙ = 100. We cut edges out from the flow maps and compare a circular region of diameter
 ≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is
 1.05. For demonstration, we show inversions only for poloidal flow using SOLA.
 De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh,
@@ -683,7 +784,8 @@ Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S.
 Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472,
 doi: 10.1086/423367
 Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020,
-Science, 368, 1469, doi: 10.1126/science.aaz7119Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A,
+Science, 368, 1469, doi: 10.1126/science.aaz7119
+Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A,
 652, L6, doi: 10.1051/0004-6361/202141462
 Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ,
 824, 128, doi: 10.3847/0004-637X/824/2/128
@@ -738,7 +840,8 @@ doi: 10.1086/166758
 Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231
 Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., &
 Stein, R. 2001, A&A, 377, L14,
-doi: 10.1051/0004-6361:20011160Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar
+doi: 10.1051/0004-6361:20011160
+Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar
 Physics, 15, 6, doi: 10.1007/s41116-018-0013-5
 Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord,
 M. 2017, A&A, 599, A69,
diff --git a/read/results/pypdf/2201.00200.txt b/read/results/pypdf/2201.00200.txt
index 0736717..02833d2 100644
--- a/read/results/pypdf/2201.00200.txt
+++ b/read/results/pypdf/2201.00200.txt
@@ -1,12 +1,12 @@
-Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected ©ESO 2022
+Astronomy & Astrophysicsmanuscript no. solar˙model˙v10˙corrected © ESO 2022
 January 4, 2022
 Local heating due to convective overshooting and the solar
 modelling problem
-I. Bara ffe1,2, T. Constantino1, J. Clarke1, A. Le Saux1,2, T. Go ffrey4, T. Guillet1, J. Pratt3, D. G. Vlaykov1
-1University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk )
-2´Ecole Normale Sup ´erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´e de Lyon, France
-3Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA
-4Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK
+I. Baraffe1,2, T. Constantino1, J. Clarke1, A. Le Saux1,2, T. Goffrey4, T. Guillet1, J. Pratt3, D. G. Vlaykov1
+1 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail:i.baraffe@ex.ac.uk)
+2 ´Ecole Normale Sup´erieure, Lyon, CRAL (UMR CNRS 5574), Universit´e de Lyon, France
+3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA
+4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK
 ABSTRACT
 Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary
 of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in onedimensional
@@ -22,7 +22,7 @@ Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution
 1. Introduction
 Modelling the internal structure of the Sun is still a challenge.
 A recent review by Christensen-Dalsgaard (2021) describes in
-detail the long-standing e fforts to improve solar models. The solar
+detail the long-standing efforts to improve solar models. The solar
  modelling problem refers to the discrepancy between helioseismology
  and solar interior models that adopt low metallicities
  predicted by the three-dimensional (3D) atmosphere models
@@ -49,7 +49,8 @@ a nearly adiabatic form to a radiative form is usually assumed,
 as suggested by the theoretical work of Zahn (1991). Models
 with a smoother transition have also been investigated. Based
 on the analysis of models with di fferent stratifications near the
-Send o ffprint requests to : I. Bara ffebase of the convective zone, Christensen-Dalsgaard et al. (2011)
+Send offprint requests to: I. Baraffe
+base of the convective zone, Christensen-Dalsgaard et al. (2011)
 found that models that better fit the helioseismic data have a
 weakly sub-adiabatic temperature gradient in the lower part of
 the convective zone and a smooth transition to the radiative gradient
@@ -71,8 +72,8 @@ Zhang et al. (2019) find that this model cannot solve the whole
 solar problem because such a flux worsens the sound-speed profile
  in the deep radiative interior of their solar model. Given the
 uncertainties regarding the temperature stratification of the overshooting
- region, solar modellers have considered these e ffects as
-secondary and have focused their e fforts on exploring the impact
+ region, solar modellers have considered these effects as
+secondary and have focused their efforts on exploring the impact
 of solar abundances, microphysics (opacities, equations of state,
 nuclear reaction rates), and chemical mixing and di ffusion (see
 details and references in the review of Buldgen et al. 2019a).
@@ -80,8 +81,9 @@ Additional, more exotic e ffects such as early disk accretion or
 solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot
 2021) are also attracting increasing attention.
 To reinvigorate the debate, Buldgen et al. (2019b) recently
-highlighted once again how the transition of the temperature gra1arXiv:2201.00200v1
-  [astro-ph.SR]  1 Jan 2022
+highlighted once again how the transition of the temperature gra1
+
+arXiv:2201.00200v1  [astro-ph.SR]  1 Jan 2022
 Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
 dient just below the convective envelope can significantly impact
 the disagreement between solar models and helioseismic constraints.
@@ -94,7 +96,7 @@ two extremes. Christensen-Dalsgaard et al. (2018) also note that
 an increase in the temperature at the transition would remove
 a remaining small sharp dip in the speed of sound immediately
 beneath the convective zone of the model. A major di fficulty is
-to disentangle the e ffects of overshoot from the e ffects of opacities,
+to disentangle the effects of overshoot from the effects of opacities,
  which can also alter the temperature gradient in these layers.
 Given the large number of parameters to deal with in order to improve
  solar models and the current lack of strong arguments in
@@ -143,7 +145,8 @@ et al. 2002; Brun et al. 2011; Hotta 2017; K ¨apyl¨a 2019; Cai
 et al. 2019; Higl et al. 2021) have also reported a modification
 of the local thermal background in the overshooting region, but
 without providing a detailed description. The simulations of B21
-provide a physical explanation that links the convective penetra-tion process to the local heating and to the radiative bump in the
+provide a physical explanation that links the convective penetration
+ process to the local heating and to the radiative bump in the
 overshooting layer. The solar-like star simulated in B21 is based
 on a model that is not thermally relaxed. It is reasonable to assume
  that the local heating seen in B21 is present in stars because
@@ -153,18 +156,18 @@ These two features are also commonly observed in other hydrodynamical
  simulations, as mentioned above. An exploration of
 the impact of this heating on stellar evolution models may reveal
 that heating is a necessary aspect of models for overshooting.
-Fig. 1. Radial profile of the temperature departure ∆T/T0from
-the initial profile T0and of the sub-adiabaticity ( ∇−∇ ad) close to
+Fig. 1.Radial profile of the temperature departure ∆T/T0 from
+the initial profile T0 and of the sub-adiabaticity (∇−∇ad) close to
 the convective boundary predicted by 2D hydrodynamical simulations
  (B21) of solar-like models. The lower panel corresponds
 to the model with a realistic stellar luminosity and the upper
 panel to a model with luminosity enhanced by a factor of ten.
-The dash-dotted red lines show ∆T/T0(in %), the relative difference
+The dash-dotted red lines show ∆T/T0 (in %), the relative difference
  between the time and space averages of the temperature,
 T, and the initial temperature, T0. The solid blue lines show the
 time and space averages of the sub-adiabaticity ( ∇−∇ ad). The
 dashed black lines show the initial profile of the sub-adiabaticity,
-(∇−∇ ad)init. The convective boundary is indicated by the vertical
+(∇−∇ad)init. The convective boundary is indicated by the vertical
 solid line (see details in B21)
 The behaviour of the thermal profile below the convective
 boundary found in the simulations of B21 is illustrated in Fig.
@@ -173,20 +176,20 @@ boundary found in the simulations of B21 is illustrated in Fig.
  enhancement in the luminosity by a factor of ten because the
 features are intensified in these ‘boosted’ models (upper panel).
 The figure shows the local heating in the overshooting layer and
-its impact on the sub-adiabaticity ( ∇−∇ ad), with∇=d log T
-d log Pthe
+its impact on the sub-adiabaticity (∇−∇ad), with ∇= d logT
+d logP the
 
 Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-temperature gradient and ∇ad=d log T
-d log P|Sthe adiabatic gradient.
+temperature gradient and ∇ad = d logT
+d logP |S the adiabatic gradient.
 The initial stratification below the convective boundary (located
-atr=0.6734×Rstarfor this specific stellar model) is set by
-the stable radiative gradient, ∇rad(see the dashed black line below
+at r = 0.6734 ×Rstar for this specific stellar model) is set by
+the stable radiative gradient, ∇rad (see the dashed black line below
  the convective boundary in Fig. 1). B21 show that, as a result
  of the local heating below the convective boundary characterised
- by the bump in temperature di fference ∆T/T0displayed
+ by the bump in temperature di fference ∆T/T0 displayed
 in Fig. 1, the temperature gradient becomes less sub-adiabatic
-immediately below the convective boundary1. The net result is
+immediately below the convective boundary 1. The net result is
 a smoother transition just below the convective boundary with
 a temperature gradient that has an intermediate value between
 the radiative temperature gradient and the adiabatic one. In the
@@ -203,25 +206,26 @@ qualitative impact of the local heating produced by overshooting.
 et al. (2020), who constructed a static structure of the Sun in
 agreement with seismic inversions of the Ledoux discriminant
 defined by
-A=1
-Γ1dlnP
-dlnr−dlnρ
-dlnr, (1)
-with Γ1=(∂lnP/∂lnρ)ad. Starting from a reference evolutionary
+A = 1
+Γ1
+d ln P
+d ln r −d ln ρ
+d ln r , (1)
+with Γ1 = (∂ln P/∂ln ρ)ad. Starting from a reference evolutionary
  model, Buldgen et al. (2020) used an inversion procedure
  to iteratively reconstruct a solar model. Successive inversions
  of the Ledoux discriminant allowed them to obtain a
 model-independent profile for this quantity. Their reconstruction
 method also gives solar structures that are in excellent agreement
- with other structural inversions, namely the entropy, S, the
+ with other structural inversions, namely the entropy,S , the
 square of the speed of sound, c2
-s, and the density, ρ. To illustrate
+s , and the density, ρ. To illustrate
 the convergence of their reconstruction procedure, they show
 (right panels of their Figs. 3-6) the successive iterations that converge
  to an excellent level of agreement for the four structural
-inversions ( A,S,c2
-s,ρ) starting from the initial reference model
-adopted in their work. The di fferences found between the reconstructed
+inversions (A, S , c2
+s , ρ) starting from the initial reference model
+adopted in their work. The differences found between the reconstructed
  model and the reference model are useful as they indicate
  the modifications of the reference model that are required to
 converge towards a solar model in agreement with helioseismic
@@ -231,25 +235,26 @@ analysis in Sect. 3.2.
 The first concerns the Ledoux discriminant. The major discrepancy
  between the Sun and the reference model occurs just
 below the convective boundary, with a large positive bump for
-the quantity ( ASun-Aref).
+the quantity (ASun - Aref).
 The second concerns the speed of sound. The same positive
 bump at the same location as for the Ledoux discriminant, A, is
 observed for the quantity ( c2
-s,Sun−c2
+s,Sun −c2
 s,ref)/c2
 s,ref. The corrections
-applied to Aduring the reconstruction procedure also reduce the
+applied to A during the reconstruction procedure also reduce the
 discrepancy in the speed of sound in the radiative region.
 The third concerns the entropy. Large discrepancies are observed
  in both the radiative region and the convective zone. The
-1Less sub-adiabatic means that |∇−∇ ad|decreases compared to the
-initial profile.entropy discrepancy ( SSun−Sref)/Srefhas two positive peaks in
+1 Less sub-adiabatic means that |∇−∇ ad|decreases compared to the
+initial profile.
+entropy discrepancy (S Sun −S ref)/S ref has two positive peaks in
 the radiative zone, one just below the overshooting region and a
 larger peak deeper at ∼40% of the stellar radius. This discrepancy
  is negative in the convective zone. The corrections applied
-toAhelp reduce these entropy discrepancies in both regions.
-The fourth concerns the density. The quantity ( ρSun−
-ρref)/ρrefhas a negative peak in the radiative region, at ∼35%
+to A help reduce these entropy discrepancies in both regions.
+The fourth concerns the density. The quantity ( ρSun −
+ρref)/ρref has a negative peak in the radiative region, at ∼35%
 of the stellar radius, and is positive in the convective zone.
 Importantly, Buldgen et al. (2020) mention that their reconstruction
  procedure gives similar Ledoux discriminant profiles
@@ -271,7 +276,7 @@ modified to reproduce the temperature gradient in the overshooting
 the chemical abundances are not modified by nuclear reactions,
 mixing, or microscopic di ffusion during the relaxation process.
 For these tests, we used the 1D Lyon stellar evolution code
-(Bara ffe et al. 1998). We repeated this experiment based on thermal
+(Baraffe et al. 1998). We repeated this experiment based on thermal
  relaxation with the stellar evolution code MONSTAR (e.g.
 Constantino et al. 2014) and obtained the same qualitative results.
 
@@ -280,35 +285,35 @@ modification of the temperature gradient in the overshooting
 layer from the zero age main sequence (ZAMS). The models
 are then evolved until they reach the solar radius and luminosity.
 With this approach, changes in the chemical abundances from
-nuclear reactions, microscopic di ffusion, and overshooting mixing
+nuclear reactions, microscopic diffusion, and overshooting mixing
  are also consistent with any modification of the structure
 induced by the forced local heating in the overshooting layer.
 These tests were performed with MONSTAR as it includes the
-treatment of microscopic di ffusion.
+treatment of microscopic diffusion.
 The first method allows the impact of local heating in
 the overshooting layer after thermal relaxation to be isolated.
 The second method provides evolutionary models that are selfconsistent
- since the e ffect of the modification of the temperature
+ since the effect of the modification of the temperature
 gradient is accounted for during their evolution on the main sequence.
 
 In the following, we adopt a modification of the local temperature
  gradient in the overshooting layer that qualitatively reproduces
  the behaviour displayed in Fig. 1. We define an overshooting
- length dov=αovHP,CB, with HP,CBthe pressure scale height
-at the convective boundary and αova free parameter. We also define
- two radial locations, rov=rCB−dovandrmid=rCB−dov/2,
-with rCBthe radial location of the convective boundary. The temperature
- gradient is modified as follows. For rmid≤r<rCB, we
+ length dov = αovHP,CB, with HP,CB the pressure scale height
+at the convective boundary andαov a free parameter. We also define
+ two radial locations, rov = rCB −dov and rmid = rCB −dov/2,
+with rCB the radial location of the convective boundary. The temperature
+ gradient is modified as follows. For rmid ≤r < rCB, we
 use
-∇=g(r)∇ad+(1−g(r))∇rad, (2)
+∇= g(r)∇ad + (1 −g(r))∇rad, (2)
 with
-g(r)=sin{[(r−rmid)/(rCB−rmid)]a×π/2}. (3)
+g(r) = sin{[(r −rmid)/(rCB −rmid)]a ×π/2}. (3)
 
 Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-Forrov≤r<rmid, we use
-∇=∇rad−h(r)∇ad, (4)
+For rov ≤r <rmid, we use
+∇= ∇rad −h(r)∇ad, (4)
 with
-h(r)=b×sin{[(rmid−r)/(rmid−rov)]×π}. (5)
+h(r) = b ×sin{[(rmid −r)/(rmid −rov)] ×π}. (5)
 Sine functions are used in Eqs. (3) and (5) to reproduce the
 smooth variations in the temperature gradient below the convective
  boundary produced by the hydrodynamical simulations. We
@@ -317,22 +322,22 @@ these variations and to the exact shape of the temperature gradient
  radial profile.We adopted a=0.3 in Eq. (3) as it provides a
 behaviour for the temperature gradient very close to the one displayed
  in Fig. 1. Results are rather insensitive to variations in the
-values of abetween 0.2 and 0.4. We adopted b=0.03 in Eq. (5),
+values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5),
 which also provides a close visual match to the hydrodynamical
 results, but we note that the results are insensitive to the value of
 b.
 3.2.1. Thermal equilibrium models
 The details of the procedure for the first method are the following.
- We calculate the evolution of a 1 M⊙model with an initial
-helium mass fraction of 0.28, metallicity Z=0.02,and a mixing
- length lmix=1.9HP. We use a reference model that is in
-thermal equilibrium2and has the luminosity and radius of the
+ We calculate the evolution of a 1 M⊙ model with an initial
+helium mass fraction of 0.28, metallicity Z = 0.02, and a mixing
+ length lmix = 1.9HP. We use a reference model that is in
+thermal equilibrium2 and has the luminosity and radius of the
 current Sun. Starting from this reference model, the temperature
  gradient is modified over a prescribed depth to mimic the
 impact of overshooting according to the hydrodynamical simulations
  described in Sect. 2. We adopt the prescription given
-by Eqs. (2)-(5) over a distance dovbelow the convective boundary.
- We show the results in Fig. 2 for αov=0.15 andαov=0.20.
+by Eqs. (2)-(5) over a distance dov below the convective boundary.
+ We show the results in Fig. 2 for αov = 0.15 and αov= 0.20.
 These overshooting widths are in good agreement with the maximal
  depth reached by downflows below the convective boundary
 predicted by the hydrodynamical simulations for the solar-like
@@ -342,9 +347,9 @@ for details). B21 also mention that one should be cautious when
 directly applying the overshooting depths predicted by their simulations
  to real stars since the final relaxed state for these simulations
  may have di fferent properties from non-thermally relaxed
-states. We varied αovbetween 0.15 and 0.35 and find that the
+states. We varied αov between 0.15 and 0.35 and find that the
 results do not change qualitatively. However, the amplitude of
-the variations in the model properties depends on dov(see below).
+the variations in the model properties depends on dov (see below).
  As shown below, this simple prescription implemented in
 a stellar evolution code yields a local increase in the temperature
  below the convective boundary, similar to that observed in
@@ -361,31 +366,32 @@ whole relaxation process, and this is referred to as a ‘forced local
 heating’. This procedure ensures that the model with a modified
 temperature gradient can be consistently compared to the reference
  model. As shown in Fig. 2, the simple prescription given
-2Thermal equilibrium means that the total nuclear energy produced
-in the central regions balances the radiative losses at the surface, i.e.the
-total nuclear luminosity, Lnuc, equals the total stellar luminosity, L.by Eqs. (2)-(5) yields similar qualitative changes in the temperature
+2 Thermal equilibrium means that the total nuclear energy produced
+in the central regions balances the radiative losses at the surface,i.e. the
+total nuclear luminosity, Lnuc, equals the total stellar luminosity, L.
+by Eqs. (2)-(5) yields similar qualitative changes in the temperature
  and the sub-adiabaticity close to the convective boundary
 that was found in the hydrodynamical simulations of B21.
-Fig. 2. Radial profile of the temperature di fference and of the
+Fig. 2.Radial profile of the temperature di fference and of the
 sub-adiabaticity of a 1D solar-like structure with a modified temperature
  gradient in the overshooting layer according to Eqs.
 (2)-(5). The temperature gradient is modified over a distance
-dov=αovHP,CB, withαov=0.15 in the lower panel and αov=0.20
+dov = αovHP,CB, with αov=0.15 in the lower panel and αov=0.20
 in the upper panel. The dash-dotted red lines show the percentage
- relative temperature di fference, ∆T/Tref, with ∆T=T−Tref.
-The solid blue lines correspond to the sub-adiabaticity ( ∇−∇ ad).
+ relative temperature difference, ∆T/Tref, with ∆T = T −Tref.
+The solid blue lines correspond to the sub-adiabaticity (∇−∇ad).
 The dashed black lines show the sub-adiabaticity of the reference
  model. The convective boundary is indicated by the vertical
 solid line. The vertical dashed line in each panel is located at a
-distance dovbelow the convective boundary.
+distance dov below the convective boundary.
 The impact on the whole stellar structure was quantified by
-comparing the four structural quantities ( A,S,c2
-s,ρ) between the
+comparing the four structural quantities (A, S , c2
+s , ρ) between the
 modified and the reference model. The results are displayed in
-Fig. 3, with ∆Xdefined as ( X−Xref) for any structural quantity X.
+Fig. 3, with∆X defined as (X−Xref) for any structural quantityX.
 The forced local heating in the overshooting layer produces similar
- positive peaks for ∆A,∆S, and ∆c2
-s, as found for the temperature.
+ positive peaks for ∆A, ∆S , and ∆c2
+s , as found for the temperature.
  The modification thus provides the correction required to
 improve the discrepancy for the Ledoux discriminant described
 in the first of the trends outlined in Sect. 3.1. Unsurprisingly,
@@ -398,7 +404,7 @@ remove the sound speed anomaly below the convective boundary
 Christensen-Dalsgaard et al. (2011). But it is also interesting to
 note that such a modification yields a slight cooling of the convective
  zone (see Fig. 2) and thus a negative di fference for the
-entropy (see Fig. 3). A negative di fference in the convective envelope
+entropy (see Fig. 3). A negative difference in the convective envelope
  is in agreement with the correction required for the reference
  model of Buldgen et al. (2020) to better match the Sun
 (see third trend in Sect. 3.1). Regarding the density, the modification
@@ -408,14 +414,14 @@ to the reference model over a broad region below the convective
 boundary. The impact on the density in the convective region for
 this specific model is partly in agreement with the correction required
  for this quantity in the Buldgen et al. (2020) study, with a
-positive di fference found only in the upper part of the convective
+positive difference found only in the upper part of the convective
 envelope (see the fourth trend in Sect. 3.1).
 These trends are insensitive to the depth over which the temperature
  gradient is modified. Increasing the depth increases the
-magnitude of the di fferences but has no impact on their sign. We
+magnitude of the differences but has no impact on their sign. We
 find that the maximum variation in the model properties, such as
 the speed of sound, ∆c2
-s/c2
+s /c2
 s,ref, roughly scales with d2
 ov. This scaling
  is linked to the integrated area between the modified temperature
@@ -423,19 +429,19 @@ ov. This scaling
 temperature gradient, which roughly decreases linearly with r.
 This area is proportional to the square of the overshooting depth,
 and consequently, the maximum variation in the model properties
- is also proportional to d2
+ is also proportional tod2
 ov. The qualitative trends also remain
 the same whether overshooting mixing in the reference model
 is ignored or included using a step function (with instantaneous
-mixing) or an exponential decay for the di ffusion coe fficient (e.g.
+mixing) or an exponential decay for the diffusion coefficient (e.g.
 Freytag et al. 1996).
 3.2.2. Self-consistent evolutionary models
 For the tests based on the second method, we ran di fferent sets
-of models with di fferent combinations of assumptions, including
+of models with different combinations of assumptions, including
 or not microscopic di ffusion and with or without overshooting
 mixing. When overshooting mixing was included in the overshooting
  layer, it was based either on a step function or on an
-exponential decay for the di ffusion coe fficient. Microscopic diffusion
+exponential decay for the diffusion coefficient. Microscopic diffusion
  for H and He was implemented according to Thoul et al.
 (1994). For these tests, the temperature gradient was modified
 according to Eqs. (2)-(5). All models start from the ZAMS and
@@ -447,8 +453,8 @@ has no modification of the temperature gradient but everything
 else is the same (i.e. the same treatment of microscopic di ffusion
  and of overshooting mixing). The evolutionary models with
 temperature gradient modifications are thus self-consistent. The
-main di fference between this approach and the one in the previous
- section is that these models accumulate small di fferences in,
+main difference between this approach and the one in the previous
+ section is that these models accumulate small differences in,
 for example, central H abundance when compared to their reference
  model. These tests produce the same trends in the overshooting
  layer as found for the tests based on the first method
@@ -457,22 +463,22 @@ mixing and whether microscopic di ffusion is included or not.
 In the convective zone, all models give a positive di fference for
 the density between the model with a modified temperature gradient
  and the relevant reference model. For the other quantities
-(S,c2
-s), the di fferences in the convective zone are very sensitive
-Fig. 3. Difference of various structural quantities between a
+(S , c2
+s ), the differences in the convective zone are very sensitive
+Fig. 3.Difference of various structural quantities between a
 model with a modified temperature gradient in the overshooting
  layer and a reference model calculated with the Lyon stellar
 evolution code. The temperature gradient in the modified model
-is changed over a distance dov=αovHP,CBbelow the convective
+is changed over a distance dov = αovHP,CB below the convective
  boundary (indicated by the vertical solid line). The lower
-panel shows the results for αov=0.15 and the upper panel for
-αov=0.20.
+panel shows the results for αov = 0.15 and the upper panel for
+αov = 0.20.
 to the assumptions regarding whether overshooting mixing is included
  or not. But at least we find solutions that are compatible
 with the four trends found by Buldgen et al. (2020) for the four
 structural quantities. This is illustrated in Fig. 4 with a model
 that accounts for step function overshooting mixing over a distance
- dov=0.15HP,CB(lower panel) and dov=0.20HP,CB(upper
+ dov = 0.15HP,CB (lower panel) and dov = 0.20HP,CB (upper
 panel).
 4. Conclusion
 The tests performed in Sect. 3 are based on di fferent methods
@@ -480,19 +486,19 @@ The tests performed in Sect. 3 are based on di fferent methods
 construct solar models. Independently of the method used, the
 tests show that a local increase in the temperature in the overshooting
  region due to convective penetration provides the qualitative
- e ffects required to improve the speed of sound discrepancy
+ effects required to improve the speed of sound discrepancy
 below the convective boundary. This discrepancy is persistent in
 
 Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
-Fig. 4. Difference of various structural quantities between a
+Fig. 4.Difference of various structural quantities between a
 modified model and a reference model calculated with the
 MONSTAR stellar evolution code. The reference model is
 evolved from the ZAMS with microscopic di ffusion and step
-function overshooting mixing over a distance dov=αovHP,CBbelow
+function overshooting mixing over a distancedov = αovHP,CB below
  the convective boundary. The lower panel shows the results
-forαov=0.15 and the upper panel for αov=0.20. The models
+for αov = 0.15 and the upper panel for αov = 0.20. The models
 with a modified temperature gradient in the overshooting layer
-(same microscopic di ffusion and overshooting mixing treatment
+(same microscopic diffusion and overshooting mixing treatment
 as the reference model) are evolved similarly from the ZAMS.
 The convective boundary is indicated by the vertical solid line.
 solar models that use low solar metal abundances. This is not
@@ -502,23 +508,24 @@ this problem, as mentioned in Sect. 1. However, the details of
 the physical process responsible for this local heating have been
 lacking, whereas we can now suggest an explanation based on
 the B21 results. The trends that we find for the four structural
-quantities ( A,S,c2
-s,ρ) are robust below the convective boundary
+quantities (A, S , c2
+s , ρ) are robust below the convective boundary
  and in a large fraction of the radiative core, independently of
-the treatment of mixing and di ffusion and of the method for constructing
+the treatment of mixing and diffusion and of the method for constructing
  the models in Sects. 3.2.1 and 3.2.2. Our experiments
 additionally show that such a local change in the temperature,
 despite being made over a very limited region below the convective
- boundary, can also a ffect the density, the entropy, and thespeed of sound in the convective envelope after thermal relaxation
+ boundary, can also a ffect the density, the entropy, and the
+speed of sound in the convective envelope after thermal relaxation
  or evolution on the main sequence. How these quantities
 are affected in the convective envelope compared to a reference
 model with no local heating depends on the strategy for building
 solar models and on the treatment of overshooting mixing. This
 mixing is obviously linked to the local heating given that both
 result from the same dynamical process. A combined testing of
-both e ffects in stellar models could provide more constraints on
+both effects in stellar models could provide more constraints on
 the general process of overshooting.
-Increasingly, e fforts are now devoted to characterising the
+Increasingly, efforts are now devoted to characterising the
 process of convective boundary mixing in stellar models based
 on multi-dimensional hydrodynamical simulations. More work
 is required to obtain reliable determinations of an overshooting
@@ -539,19 +546,19 @@ that produce a local change in the temperature gradient are also
 responsible for the mixing in this region. Because much observational
  evidence points towards the need for extra mixing at convective
  boundaries, for example lithium depletion in solar-like
-stars (Bara ffe et al. 2017), the size of convective cores (Claret
+stars (Baraffe et al. 2017), the size of convective cores (Claret
 & Torres 2016), and colour-magnitude diagrams (Castro et al.
 2014), solar modellers often include this extra mixing in their
 models. But a consistent approach should also require accounting
  for a local change in the temperature gradient. The impact of
 this local heating goes in the right direction to improve not only
 the discrepancies of solar models below the convective boundary,
- but also in the convective envelope. This e ffect offers an interesting
+ but also in the convective envelope. This effect offers an interesting
  step forward for solving the solar modelling problem.
 In this exploratory work, we adopt a simple prescription for the
 local heating in the overshooting layer since the main goal is
 to highlight its qualitative impact on stellar models. However,
-this e ffect should not be considered as another free parameter in
+this effect should not be considered as another free parameter in
 the solar modelling problem. Future multi-dimensional hydrodynamical
  simulations will enable this process, and its treatment
 in 1D stellar evolution codes, to be better constrained.
@@ -559,7 +566,7 @@ in 1D stellar evolution codes, to be better constrained.
 We thank our anonymous referee for valuable comments which
 helped improving the manuscript. This work is supported by the
 ERC grant No. 787361-COBOM and the consolidated STFC
-grant ST /R000395 /1. IB thanks the Max Planck Institut f ¨ur
+grant ST /R000395/1. IB thanks the Max Planck Institut f ¨ur
 Astrophysics (Garching) for warm hospitality during completion
 of part of this work. The authors would like to acknowledge the
 use of the University of Exeter High-Performance Computing
@@ -567,8 +574,8 @@ use of the University of Exeter High-Performance Computing
 at Leicester, operated by the University of Leicester IT Services,
 which forms part of the STFC DiRAC HPC Facility. The equipment
  was funded by BEIS capital funding via STFC capital
-grants ST /K000373 /1 and ST /R002363 /1 and STFC DiRAC
-Operations grant ST /R001014 /1. DiRAC is part of the National
+grants ST /K000373/1 and ST /R002363/1 and STFC DiRAC
+Operations grant ST/R001014/1. DiRAC is part of the National
 e-Infrastructure.
 
 Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem
@@ -577,7 +584,7 @@ Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197
 Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141
 Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481
 Baraffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403
-Baraffe, I., Pratt, J., Go ffrey, T., et al. 2017, ApJ, 845, L6
+Baraffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6
 Baraffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126
 Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825
 Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79
@@ -597,11 +604,11 @@ M. J. 2011, MNRAS, 414, 1158
 Claret, A. & Torres, G. 2016, A&A, 592, A15
 Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56
 Edelmann, P. V . F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4
-Freytag, B., Ludwig, H. G., & Ste ffen, M. 1996, A&A, 313, 497
+Freytag, B., Ludwig, H. G., & Steffen, M. 1996, A&A, 313, 497
 Goffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7
 Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed.
 N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25
-Higl, J., M ¨uller, E., & Weiss, A. 2021, A&A, 646, A133
+Higl, J., M¨uller, E., & Weiss, A. 2021, A&A, 646, A133
 Hotta, H. 2017, ApJ, 843, 52
 Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563
 K¨apyl¨a, P. J. 2019, A&A, 631, A122
@@ -613,9 +620,9 @@ Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A,
 293, 127
 Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765
 Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828
-Viallet, M., Bara ffe, I., & Walder, R. 2011, A&A, 531, A86
-Viallet, M., Go ffrey, T., Bara ffe, I., et al. 2016, A&A, 586, A153
-Viallet, M., Meakin, C., Arnett, D., & Moc ´ak, M. 2013, ApJ, 769, 1
+Viallet, M., Baraffe, I., & Walder, R. 2011, A&A, 531, A86
+Viallet, M., Goffrey, T., Baraffe, I., et al. 2016, A&A, 586, A153
+Viallet, M., Meakin, C., Arnett, D., & Moc´ak, M. 2013, ApJ, 769, 1
 Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202
 Zahn, J. P. 1991, A&A, 252, 179
 Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759,
diff --git a/read/results/pypdf/2201.00201.txt b/read/results/pypdf/2201.00201.txt
index 2a34d9c..278bc8e 100644
--- a/read/results/pypdf/2201.00201.txt
+++ b/read/results/pypdf/2201.00201.txt
@@ -1,8 +1,8 @@
-Astronomy &Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022
+Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022
 January 19, 2022
-Letter to the Editor
+Letter to theEditor
 The period-age relation of long-period variables
-M. Trabucchi1,⋆, N. Mowlavi1
+M. Trabucchi1, ⋆, N. Mowlavi1
 Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland
 December 2021
 ABSTRACT
@@ -49,8 +49,9 @@ behavior (also using proper motion data, e.g., Wilson & Merrill
  by a higher velocity dispersion. Furthermore, groups of
 LPVs with relatively short periods are characterized by a greater
 scale height above the Galactic plane. This was shown, using for
-⋆Corresponding author: M. Trabucchi
-(michele.trabucchi@unige.ch )the first time the radial velocity of LPVs in the southern hemisphere,
+⋆ Corresponding author: M. Trabucchi
+(michele.trabucchi@unige.ch)
+the first time the radial velocity of LPVs in the southern hemisphere,
  by Feast (1963). In this seminal paper, Feast realized
 that LPVs with shorter periods must be members of older stellar
 populations and emphasized their highly promising applications
@@ -66,21 +67,22 @@ globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; Whitelock
  1986), toward the galactic center and bulge (Lloyd Evans
 1976; Feast et al. 1980; Whitelock et al. 1991) or at high galactic
 latitude (Jura & Kleinmann 1992; Whitelock et al. 1994). Of particular
- interest is the recent e ffort to extend the analysis of LPVs
+ interest is the recent effort to extend the analysis of LPVs
 to dwarf galaxies in the Local Group (Menzies et al. 2002, 2008;
 Whitelock et al. 2009; Menzies et al. 2010, 2011; Sakamoto et al.
 2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013;
 Menzies et al. 2015).
-TheHipparcos mission provided the means to refine the results
+The Hipparcos mission provided the means to refine the results
  on the period-kinematics connection. This was done by
 Feast & Whitelock (2000b), who found evidence supporting the
 existence of a bar-like structure in the Bulge from the orbits of
 local LPVs. A similar study dedicated to C-rich LPVs was performed
  by Feast et al. (2006), who provided quantitative age
 estimates for these stars. A summary of the main results and
-prospects emerging from these Hipparcos -era studies is given by
-Article number, page 1 of 9arXiv:2201.00201v2  [astro-ph.SR]  17 Jan 2022
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+prospects emerging from theseHipparcos-era studies is given by
+Article number, page 1 of 9
+arXiv:2201.00201v2  [astro-ph.SR]  17 Jan 2022
+A&A proofs:manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
 Feast (2007). More recently, the study of the Galaxy with LPVs
 has been stimulated by the wealth of data acquired by large-scale
 surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), especially
@@ -93,7 +95,7 @@ of classical Cepheids from stellar evolution and pulsation models.
  (e.g., Bono et al. 2005; Anderson et al. 2016; De Somma
 et al. 2020). In contrast, when it comes to theoretical assessments
 of the LPV PA relation, the literature is surprisingly scarce (especially
- in comparison with the significant e ffort put into empirical
+ in comparison with the significant effort put into empirical
 studies). In fact, we were able to identify only two relevant studies
  addressing this subject (Wyatt & Cahn 1983; Eggen 1998).
 The discrepancy in period predictions between linear and nonlinear
@@ -119,28 +121,29 @@ We employed PARSEC-COLIBRI isochrones (Marigo et al.
  1.2S) for the preceding evolution. The adopted set of
 isochrones covers the range 0.001 to 0.016 in initial metallicity
  ( Zi), with a 0.001 step, while it spans the age interval
-8.00≤log(τ/yr)≤10.45 with a step of 0.05. Since the AGB
+8.00 ≤log(τ/yr) ≤10.45 with a step of 0.05. Since the AGB
 phase is short-lived, it only spans a small range of initial masses
-for each given isochrone, of order of 10−2M⊙at most.
+for each given isochrone, of order of 10−2 M⊙at most.
 The adopted isochrones include linear pulsation periods from
 Trabucchi et al. (2019) for overtone modes and nonlinear periods
 computed with the period-mass-radius relation from Trabucchi
-et al. (2021b) for the FM1. Pulsation properties were computed
+et al. (2021b) for the FM 1. Pulsation properties were computed
 along both the early-AGB and the TP-AGB. We did not extend
 our analysis to red supergiant stars as the pulsation prescription
-we employed are strictly valid only below 7 M ⊙.
+we employed are strictly valid only below 7 M⊙.
 We recall that, with the adopted nonlinear relation, the period
-increases with radius ( R) as a broken power law, whose exponent
-decreases as soon as the “bending radius” Rbis exceeded, it and
-becomes zero when the “saturation radius” Rs>Rbis reached
+increases with radius (R) as a broken power law, whose exponent
+decreases as soon as the “bending radius” Rb is exceeded, it and
+becomes zero when the “saturation radius” Rs > Rb is reached
 (i.e., the period becomes independent of radius). The exact values
- of RbandRs, as well as of the exponents, depend on the
+ of Rb and Rs, as well as of the exponents, depend on the
 current mass ( M). We assume that the FM is dominant if the
 stellar radius is larger than the critical value Rdom,0, which we
 computed from the current stellar mass using Eq. 4 of Trabucchi
 et al. (2021b).
-1Hereinafter, whenever we discuss periods, it should be understood
-that we refer to FM periods on which this work is focused.2.2. Data
+1 Hereinafter, whenever we discuss periods, it should be understood
+that we refer to FM periods on which this work is focused.
+2.2. Data
 As a first set of data, we considered the cluster-LPV pairs used
 by Grady et al. (2019, see their tables 1 and 2). These consist of
 19 clusters in the Large Magellanic Cloud, hosting a total of 20
@@ -148,7 +151,7 @@ potential LPV members, and eight Galactic clusters each hosting
 a potential LPV member.
 We expanded this list with data for LPVs in a few populous
 clusters, namely the Galactic clusters NGC 362, NGC 2808, 47
-Tuc (NGC 104), and ωCen (NGC 5139); the LMC clusters NGC
+Tuc (NGC 104), andωCen (NGC 5139); the LMC clusters NGC
 1978 and NGC 1846; and the cluster NGC 419 in the Small Magellanic
  Cloud (SMC). The source lists were taken from Lebzelter
  & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010),
@@ -166,8 +169,8 @@ Cutri et al. 2013), the catalog of variable stars from the AllSky
  Automated Survey for SuperNovae (ASAS-SN Jayasinghe
 et al. 2020), the catalogs of LPVs in the Magellanic Clouds from
 the third phase of the Optical Gravitational Lensing Experiment
-(OGLE-III, Soszy ´nski et al. 2009, 2011), the early third data release
- from the Gaia mission ( Gaia EDR3, Gaia Collaboration
+(OGLE-III, Soszy´nski et al. 2009, 2011), the early third data release
+ from the Gaia mission (Gaia EDR3, Gaia Collaboration
 et al. 2021), and the catalog of LPV candidates from Gaia DR2
 (Mowlavi et al. 2018).
 Following Grady et al. (2019), we took ages from
@@ -175,18 +178,18 @@ Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters
 in the Galaxy and LMC, respectively, thereby ensuring that ages
 would be homogeneously derived for clusters in both galaxies.
 Age uncertainties from Baumgardt et al. (2013), provided for
-each cluster, are generally around σlog(τ)≃0.05. Kharchenko
+each cluster, are generally around σlog(τ) ≃ 0.05. Kharchenko
 et al. (2016) do not provide age uncertainties, but a reasonable
-upper limit for their method should be σlog(τ)=0.2 based on
+upper limit for their method should be σlog(τ) = 0.2 based on
 the analysis of Kharchenko et al. (2005) (the same value was
 adopted by Grady et al. 2019, in their Fig. 7).
 As discussed by Kamath et al. (2010), the age of the SMC
 cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is
-consistent with the value τ=1.45±0.05 Gyr from Goudfrooij
-et al. (2014), while it is as young as τ≃0.89±0.015 Gyr according
+consistent with the value τ = 1.45 ±0.05 Gyr from Goudfrooij
+et al. (2014), while it is as young as τ ≃0.89 ±0.015 Gyr according
  to Perren et al. (2017). Since an accurate estimate is not
 necessary for our exploratory analysis, we took a rough average
-and assumed log( τ/yr)=9.1±0.1. NGC 419 and NGC 1846
+and assumed log( τ/yr) = 9.1 ±0.1. NGC 419 and NGC 1846
 likely exhibit TP-AGB boosting (Girardi et al. 2013). We note
 that some clusters show multiple stellar populations, whose age
 spread has been estimated in some cases (e.g., Mackey & Broby
@@ -194,12 +197,12 @@ Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is consistent
  with the age uncertainties we adopted.
 Distances of Galactic clusters were also taken from
 Kharchenko et al. (2016), while for the Magellanic Clouds and
-their clusters we adopted the distance moduli µLMC=18.49±
-0.09 mag and µSMC=18.96±0.02 mag from de Grijs et al.
+their clusters we adopted the distance moduli µLMC = 18.49 ±
+0.09 mag and µSMC = 18.96 ±0.02 mag from de Grijs et al.
 (2017). We searched for data on interstellar extinction from several
  literature works (e.g., Nayak et al. 2016; Kharchenko et al.
 2016; Perren et al. 2017), all of which suggest that extinction
-in the Ksfilter is smaller than ∼0.1 mag for most of the clusters
+in the Ks filter is smaller than ∼0.1 mag for most of the clusters
  we considered, and at most as large as ∼0.3 mag, which is
 negligible for our purposes.
 Article number, page 2 of 9
@@ -208,7 +211,7 @@ A detailed membership verification is beyond the scope of
 this work, and we relied on the checks performed by authors
 whose source lists we adopted. It should be kept in mind that
 some sources may not be real cluster members.
-For sources without a spectral type, we used the Gaia 2MASS
+For sources without a spectral type, we used the Gaia2MASS
  diagram (Lebzelter et al. 2018, 2019) to determine
 whether they are O- or C-rich. We used the near-infrared periodluminosity
  diagram to identify the most likely pulsation mode
@@ -230,7 +233,7 @@ likely negligible compared with those associated with age.
 Panel (a) of Fig. 1 shows a comparison between model predictions
  and observations in the PFM–log(τ/yr) plane. The former
 are displayed by a density map showing the expected number
-NFMof LPVs pulsating in the FM in each period-age bin, normalized
+NFM of LPVs pulsating in the FM in each period-age bin, normalized
  to maximum. Model predictions are in good agreement
 with data derived from observations (i.e., individual LPVs in
 clusters, represented by symbols), and they show that the period
@@ -238,17 +241,17 @@ clusters, represented by symbols), and they show that the period
 Crosses mark the average properties of the three groups of Crich
  LPVs from Feast et al. (2006, their table 4), which fit the
 general pattern with the exception of their group 3, estimated to
-be older than what our models predict at P≃650.
+be older than what our models predict at P ≃650.
 We also show a linear best-fit to the models distribution
 (weighted by NFM), which shows a fairly good agreement with
 the best-fit to observations by Grady et al. (2019, also shown).
 However, the best-fit line does not fully capture the properties
 of the predictions, nor of the observed trend. Indeed, models are
 indicative of a substantial dispersion around the relation. For instance,
- at 1 Gyr, the FM period ranges from ∼200 days to∼550
+ at 1 Gyr, the FM period ranges from∼200 days to ∼550
 days. Conversely, LPVs pulsating in the FM with a period of 350
-days are predicted to be at least ∼200 Myr old, but they can be as
-old as∼3 Gyr. Observed data are consistent with the predicted
+days are predicted to be at least∼200 Myr old, but they can be as
+old as ∼3 Gyr. Observed data are consistent with the predicted
 spread, although the agreement cannot be considered as the observed
  sample adopted is not complete.
 Nonetheless, it is relevant that some clusters host multiple
@@ -261,20 +264,21 @@ with the age uncertainties we adopted. This means that longerperiod
 opposite is true at shorter periods. This tends to strengthen the
 agreement between models and observations.
 Our data set samples the intermediate-age range (NGC 419
-and NGC 1846) relatively well as well as old ages ( ωCen, 47
+and NGC 1846) relatively well as well as old ages ( ω Cen, 47
 Tuc, NGC 362, and NGC 2808). This provides us with the opportunity
  to study the period distribution at these ages, and for
-a more detailed comparison between models and observations.On the basis of the average age of these two groups of clusters
+a more detailed comparison between models and observations.
+On the basis of the average age of these two groups of clusters
  and the associated uncertainty, and taking the discrete age
 sampling of the isochrones into account, we considered the age
-ranges log(τ/yr)=9.15±0.10 and log(τ/yr)=10.10±0.20. Period
+ranges log(τ/yr) = 9.15 ±0.10 and log(τ/yr) = 10.10 ±0.20. Period
  distributions at those ages are displayed in panels (b) and (c)
 of Fig. 1, respectively, showing good agreement between model
 predictions and observations. We note that in both cases, the distribution
  is skewed toward short periods, which seems to be true
 at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2,
 which is a version of the PA plane limited to an O-rich composition2.
- Indeed, although at τ≲5 Gyr the observed sample is
+ Indeed, although at τ ≲ 5 Gyr the observed sample is
 very scarce, it appears to be consistent with models predicting a
 more densely populated region in the shorter-period half of the
 PA distribution.
@@ -312,7 +316,7 @@ both patterns emerging because of the prominent role of mass in
 shaping stellar structure and evolution. Indeed, stellar mass determines
  the lifetimes of the main evolutionary stages, and thus
 the age of stars in the AGB phase. Pulsation models (Trabucchi
- et al. 2021b) show that the radius Rdom,0(and corresponding
+ et al. 2021b) show that the radius Rdom,0 (and corresponding
  luminosity) at the onset of dominant FM pulsation (DFMP)
 increases with mass, so that the most massive FM-dominated
 LPVs are brighter. They also have longer periods, as this increases
@@ -323,18 +327,18 @@ We note that this would not be the case if the FM were dominant
  along the entire AGB, as the large change in radius during
 this phase would result in a wide range of periods at a given age.
 It is the very fact that DFMP occurs only during the final portion
-2A further version of the PA plane highlighting both chemical types
+2 A further version of the PA plane highlighting both chemical types
 can be found in Fig. A.2 of appendix A.1.
 Article number, page 3 of 9
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
-Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on
+A&A proofs:manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Fig. 1.Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on
 a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape
 indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of
 galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit
 to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked
-in panel (a) by the blue and red shaded areas (at log( τ/yr)∼9.15 and∼10.10, respectively). For clarity, the e ffect of the TP-AGB boosting is
+in panel (a) by the blue and red shaded areas (at log( τ/yr) ∼9.15 and ∼10.10, respectively). For clarity, the e ffect of the TP-AGB boosting is
 suppressed in panel (a).
-Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while
+Fig. 2.Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while
 dashed lines are best fits to the edges of the model distribution (see the text for more details).
 of the AGB that limits the range of periods a FM-pulsating LPV
 can have at a given age. Yet, the DFMP part of the AGB is long
@@ -346,19 +350,20 @@ At a given initial metallicity Zi, the shape of the period distribution
 envelope expansion accelerates, while the period becomes progressively
  less sensitive to changes in radius (see Appendix C).
 In particular, the slope of the period-radius relation decreases
-sharply at Pb=P(Rb). The FM period distribution is roughly
+sharply at Pb = P(Rb). The FM period distribution is roughly
 symmetric around that value, but at its short-period side, the FM
 is not dominant. Therefore, when only FM-dominated LPVs are
 considered, as is done here, the observed period distribution appears
- skewed toward short periods.This feature is strengthened when a set of isochrones is considered
+ skewed toward short periods.
+This feature is strengthened when a set of isochrones is considered
  which spans a range of initial metallicities because the
 adopted criterion for the onset of DFMP does not depend on
 metallicity, but the FM period does as metal-poor LPVs are
 warmer and have smaller radii compared with metal-rich ones.
 As a consequence, the bulk of the period distribution of metalpoor
  LPVs is at periods shorter than Pb, so they only contribute
-to the global distribution (i.e., at all Ziat a given age) over a
-small period range at P≳Pb. In contrast, metal-rich LPVs have
+to the global distribution (i.e., at all Zi at a given age) over a
+small period range at P ≳ Pb. In contrast, metal-rich LPVs have
 periods well beyond Pb, so they contribute both at that value and
 at longer periods. The result is an excess of FM-dominated LPVs
 near Pb, that is to say on the short side of the overall period distribution.
@@ -377,20 +382,20 @@ environment-dependent, and it is not necessarily universal.
 A further point of uncertainty stems from the fact that the
 prescription we adopted assumes that the FM period only depends
  upon the mass and radius, and that it is a ffected by a
-change in composition only through the e ffect that such a variation
+change in composition only through the effect that such a variation
  has on the radius. While this is true to a good approximation,
 linear models show a small dependence of periods on metallicity
  at a fixed mass and radius, but the quantitative impact in the
 nonlinear case is unknown. We can only estimate, based on the
-results of Trabucchi et al. (2019), an uncertainty of ±10% at most
+results of Trabucchi et al. (2019), an uncertainty of±10% at most
 with respect to the prescriptions adopted here.
 Qualitatively, a realistic age-metallicity relation and the
 metallicity dependence of the period and of the onset of DFMP
 are all expected to result in a steeper PA relation than the one
 we predict, but it is di fficult to assess the relative importance of
-these e ffects. In this sense, the composition probably a ffects the
+these effects. In this sense, the composition probably a ffects the
 shape of the PA relation more than its dispersion. The latter is
-likely a ffected by the composition indirectly through mass loss,
+likely affected by the composition indirectly through mass loss,
 the analysis of which is beyond the scope of this study. However,
  we point out that mass loss represents a source of scatter in
 combination with the occurrence of thermal pulses, because it reduces
@@ -410,7 +415,7 @@ as is customarily done for classical Cepheids with a color term
 (e.g., Bono et al. 2005), but with unsatisfactory results. A correction
  dependent on the photometric amplitude of variability represents
  a promising alternative, but it cannot be pursued at the
-moment. Indeed, for computational e fficiency, current pulsation
+moment. Indeed, for computational efficiency, current pulsation
 models include only a crude treatment of the atmospheric layers
 as they do not a ffect pulsation periods. On the other hand, the
 atmosphere is crucial in determining the spectral energy distribution
@@ -420,7 +425,7 @@ sample adopted here is too heterogeneous for a self-consistent
 investigation of amplitude, but this kind of study could be made
 possible by the upcoming data release 3 of the Gaia mission
 (Gaia Collaboration et al. 2021) and the future Legacy Survey
-of Space and Time (LSST, Ivezi ´c et al. 2019) of the Vera Rubin
+of Space and Time (LSST, Ivezi´c et al. 2019) of the Vera Rubin
 Observatory.
 It is worth noting that our analysis applies to Miras as well
 as SRVs, provided that they predominantly pulsate in the FM.
@@ -430,7 +435,8 @@ been done in literature so far, undoubtedly has some advantages:
 detect than SRVs, and their light curves are easier to process
 as they tend to be more regular. Moreover, Miras represent the
 end-point of AGB evolution, so in principle they correspond to a
-smaller range of stellar parameters compared to the full extent ofthe DFMP regime, and they display a smaller range of periods
+smaller range of stellar parameters compared to the full extent of
+the DFMP regime, and they display a smaller range of periods
 at a given age (cf. Feast & Whitelock 2000b). In other words,
 they should exhibit a relatively narrow PA relation (even though,
 based on the observational data set we adopted, there is no conclusive
@@ -492,7 +498,7 @@ scatter. We suggest that corrective terms, involving the amplitude
 this possibility. A study of the impact of metallicity on nonlinear
 pulsation is highly desirable to pursue this line of investigation,
 Article number, page 5 of 9
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+A&A proofs:manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
 as would be a theoretical investigation of the dependence of photometric
  amplitudes upon global stellar parameters.
 Acknowledgements. M.T. and N.M. acknowledge the support provided by the
@@ -502,29 +508,29 @@ this paper, and to Léo Girardi for helping with the computation and interpretat
  of isochrones. This research has made use of: data from the OGLE-III
 Catalog of Variable Stars; data products from the Two Micron All Sky Survey,
  which is a joint project of the University of Massachusetts and the Infrared
- Processing and Analysis Center /California Institute of Technology, funded
+ Processing and Analysis Center/California Institute of Technology, funded
 by the National Aeronautics and Space Administration and the National Science
  Foundation; data from the European Space Agency (ESA) mission Gaia
-(https://www.cosmos.esa.int/gaia ), processed by the Gaia Data Processing
+(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Processing
  and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/
-gaia/dpac/consortium ). Funding for the DPAC has been provided by national
+gaia/dpac/consortium). Funding for the DPAC has been provided by national
  institutions, in particular the institutions participating in the Gaia Multilateral
- Agreement. This research has made use of the following free /open source
-software and /or libraries: the Starlink Tables Infrastructure Library (STILTS and
+ Agreement. This research has made use of the following free/open source
+software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and
 Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver
-et al. 2016) notebooks; the P ython libraries N umPy(Harris et al. 2020), S ciPy
-(Virtanen et al. 2020), matplotlib (a Python library for publication quality graphics,
- Hunter 2007), and A stropy (a community-developed core P ython package
+et al. 2016) notebooks; the P ython libraries NumPy (Harris et al. 2020), S ciPy
+(Virtanen et al. 2020),matplotlib(a Python library for publication quality graphics,
+ Hunter 2007), and A stropy (a community-developed core Python package
 for Astronomy, Astropy Collaboration et al. 2018). This research has made use of
 NASA’s Astrophysics Data System Bibliographic Services, and of the following
 services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue
-access tool (DOI: 10.26093 /cds/vizier, Ochsenbein et al. 2000), the “Aladin sky
+access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky
 atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012;
 Pineau et al. 2020).
 References
 Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A,
 591, A8
-Astropy Collaboration, Price-Whelan, A. M., Sip ˝ocz, B. M., et al. 2018, AJ, 156,
+Astropy Collaboration, Price-Whelan, A. M., Sip˝ocz, B. M., et al. 2018, AJ, 156,
 123
 Battinelli, P. & Demers, S. 2012, A&A, 544, A10
 Battinelli, P. & Demers, S. 2013, A&A, 553, A93
@@ -565,10 +571,11 @@ Grady, J., Belokurov, V ., & Evans, N. W. 2019, MNRAS, 483, 3022
 Grady, J., Belokurov, V ., & Evans, N. W. 2020, MNRAS, 492, 3128
 Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357
 Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90
-Ivezi ´c, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111
-Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13Joo, S.-J. & Lee, Y .-W. 2013, ApJ, 762, 36
+Ivezi´c, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111
+Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13
+Joo, S.-J. & Lee, Y .-W. 2013, ApJ, 762, 36
 Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105
-Kamath, D., Wood, P. R., Soszy ´nski, I., & Lebzelter, T. 2010, MNRAS, 408, 522
+Kamath, D., Wood, P. R., Soszy´nski, I., & Lebzelter, T. 2010, MNRAS, 408, 522
 Kharchenko, N. V ., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D.
 2005, A&A, 438, 1163
 Kharchenko, N. V ., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D.
@@ -614,9 +621,9 @@ Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Shortridg
  125
 Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y . 2012, ApJ, 761, L10
 Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163
-Soszy ´nski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22
-Soszy ´nski, I., Udalski, A., Szyma ´nski, M. K., et al. 2009, Acta Astron., 59, 239
-Soszy ´nski, I., Udalski, A., Szyma ´nski, M. K., et al. 2011, Acta Astron., 61, 217
+Soszy´nski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22
+Soszy´nski, I., Udalski, A., Szyma´nski, M. K., et al. 2009, Acta Astron., 59, 239
+Soszy´nski, I., Udalski, A., Szyma´nski, M. K., et al. 2011, Acta Astron., 61, 217
 Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Series,
  V ol. 351, Astronomical Data Analysis Software and Systems XV , ed.
 C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666
@@ -639,14 +646,14 @@ Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225
 Ya’Ari, A. & Tuchman, Y . 1996, ApJ, 456, 350
 Article number, page 6 of 9
 Trabucchi et al.: The period-age relation of LPVs
-Fig. A.1. Absolute- KsGaia -2MASS diagram for the stars with or without
+Fig. A.1.Absolute-Ks Gaia-2MASS diagram for the stars with or without
  a spectral type (left and right panels, respectively) in the selected
 sample. Symbol colors and shapes indicate the spectral type and host
 cluster described in the legend, respectively, which also reports the number
  of sources displayed (i.e., having both optical and NIR photometry).
 The dashed line marks the separation between O- and C-rich sources
 according to Lebzelter et al. (2018). An arrow marks the source MSX
-LMC 124 in NGC 1830 that, having WBP,RP−WJ,Ks=9.73 mag, lies outside
+LMC 124 in NGC 1830 that, havingWBP,RP −WJ,Ks = 9.73 mag, lies outside
  the plot area. Background dots are LPVs in the LMC from OGLEIII
  (light gray) and Mowlavi et al. (2018) (darker gray).
 Appendix A: Classification of observed LPVs
@@ -658,24 +665,25 @@ is the star 5-3 in NGC 419, for which we adopted the S-type as
 reported by Lloyd Evans (1983a).
 We also searched the SIMBAD astronomical database
 (Wenger et al. 2000) for spectral type information, which we
-found for 26 more stars. We used the Gaia -2MASS diagram of
+found for 26 more stars. We used the Gaia-2MASS diagram of
 Lebzelter et al. (2018) to confirm the chemical type classification
 taken from literature and to characterize the surface chemistry of
 sources of an unknown spectral type (see Fig. A.1). Among the
 latter, we identified 13 C-rich stars and 106 O-rich sources.
 Three of the sources without a spectral type lack Gaia photometry,
- so they cannot be classified with the Gaia -2MASS. Two
+ so they cannot be classified with theGaia-2MASS. Two
 of them (LW5 and LW22 in 47 Tuc) have no match in Gaia
 EDR3, but they have NIR data and are probably O-rich based on
-their position in the J−Ksversus Kscolor-magnitude diagram.
+their position in the J −Ks versus Ks color-magnitude diagram.
 The third source is one of the two stars in NGC 1903 from the
 list of Grady et al. (2019), which we identified with the 2MASS
 source J05171633-6920298. It is likely C-rich according to the
 NIR color-magnitude diagram.
-Finally, the sources V138 in ωCen, LW15 in NGC 2808,
+Finally, the sources V138 in ω Cen, LW15 in NGC 2808,
 and LW4 in NGC 362 lack NIR data. They cannot be placed in
 the NIR PL diagram, upon which we relied to assign pulsation
-modes to periods, so we excluded them from the sample. Thedistribution of O- and C-rich sources in the period-age diagram
+modes to periods, so we excluded them from the sample. The
+distribution of O- and C-rich sources in the period-age diagram
 is shown in Fig. A.2.
 Appendix A.2: Variability
 For variability information, we complemented the data from
@@ -709,12 +717,12 @@ When available, the variability type was taken from OGLEIII
  or ASAS-SN. We note that we are only interested in whether
 a star is classified as a Mira or semi-regular variable. In many
 cases, this type is not given or the star is simply considered, for
-instance, as an LPV or AGB in SIMBAD , in which case we considered
+instance, as an LPV or AGB in SIMBAD, in which case we considered
  the variability type as undetermined.
 Appendix B: Fitting relations
 We obtained analytic expressions for the PA relations separately
 for O- and C-rich stars, proceeding as follows. For each bin of
-log(τ/yr),we modeled the period distribution with a Gaussian
+log(τ/yr), we modeled the period distribution with a Gaussian
 kernel density estimator (KDE) and identified the peak of the
 distribution. To describe the boundaries of the PA relation, we
 adopted, at each age, the values of the period at which the distribution
@@ -722,35 +730,37 @@ adopted, at each age, the values of the period at which the distribution
 value upon visual inspection of the PA plane. We modeled the
 central trend of the PA relation, as well as its short- and longperiod
  edges, with linear or quadratic functions in the form
-log(τ/yr)=a0+a1(P/˜P)+a2(P/˜P)2, (B.1)
-(where ˜P=350 days) and employed a Lenvenberg-Marquardt
-nonlinear regression algorithm3to derive the best-fit coe fficients,
+log(τ/yr) = a0 + a1 (P/˜P) + a2 (P/˜P)2 , (B.1)
+(where ˜P = 350 days) and employed a Lenvenberg-Marquardt
+nonlinear regression algorithm3 to derive the best-fit coefficients,
 which are listed in Table B.1. We remark that these best-fit expressions
- are only valid in the intervals 8 .0≤log(τ/yr)≤10.3
-and 20<P/days<700 for O-rich composition, and within
-3We made use of the Python library SciPy to perform Gaussian KDE
+ are only valid in the intervals 8 .0 ≤log(τ/yr) ≤10.3
+and 20 < P/days < 700 for O-rich composition, and within
+3 We made use of thePython library SciPy to perform Gaussian KDE
 modeling and best-fit, respectively, by means of the gaussian_kde
 tool from the stats module and the curve_fit function from the
 optimize module.
 Article number, page 7 of 9
-A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
-Fig. A.2. Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red).
-Table B.1. Best-fit coe fficients for the PA relation and its boundaries in
+A&A proofs:manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs
+Fig. A.2.Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red).
+Table B.1.Best-fit coefficients for the PA relation and its boundaries in
 the form given in Eq. B.1.
 Sp. type relation a0 a1 a2
-O-richcenter 10.78 -2.660 0.5953
+O-rich
+center 10.78 -2.660 0.5953
 lower edge 10.46 -2.818 0.6578
 upper edge 10.54 -0.8187 -0.2335
-C-richcenter 9.755 -0.7532
+C-rich
+center 9.755 -0.7532
 lower edge 9.982 -1.698
 upper edge 8.498 -1.827 -0.9959
-8.6≤log(τ/yr)≤9.3 and 140<P/days<620 in the C-rich
+8.6 ≤log(τ/yr) ≤9.3 and 140 < P/days < 620 in the C-rich
 case.
 Because of the connection between age and initial mass, the
 PA relation can be translated into a period-initial mass relation,
 which we derived using the same approach described above, and
 assuming the form
-log(Mi/M⊙)=b0+b1(P/˜P)+b2(P/˜P)2. (B.2)
+log(Mi/M⊙) = b0 + b1 (P/˜P) + b2 (P/˜P)2 . (B.2)
 The resulting best-fit lines are displayed in Fig. B.1, and the coefficients
  are given in Table B.2.
 We remark that both the PA and the period-initial mass relations
@@ -758,29 +768,32 @@ We remark that both the PA and the period-initial mass relations
 mixing, as well as on the properties of the population of LPVs,
 namely the star-formation history and age-metallicity relation.
 Appendix C: The shape of the period distribution
-As an example case, we consider an isochrone of age log( τ/yr)=
-8.3 and initial metallicity Zi=0.006. Stars on the TP-AGB have
-initial masses Mi≃3.85 M⊙over a small range of ∼10−3M⊙.
+As an example case, we consider an isochrone of age log(τ/yr) =
+8.3 and initial metallicity Zi = 0.006. Stars on the TP-AGB have
+initial masses Mi ≃3.85 M⊙ over a small range of ∼10−3 M⊙.
 The relation between period and initial mass is displayed in
-panel (a) of Fig. C.1, where isochrone portions undergoingTable B.2. Best-fit coe fficients for the period-initial mass relation and
+panel (a) of Fig. C.1, where isochrone portions undergoing
+Table B.2.Best-fit coefficients for the period-initial mass relation and
 its boundaries in the form given in Eq. B.2.
 Sp. type relation b0 b1 b2
-O-richcenter -0.2790 0.8958 -0.1828
+O-rich
+center -0.2790 0.8958 -0.1828
 lower edge -0.1772 0.9975 -0.2203
 upper edge -0.1740 0.2783 0.8247
-C-richcenter -0.0304 0.2885
+C-rich
+center -0.0304 0.2885
 lower edge -0.0131 0.5752
 upper edge -0.2245 -0.2720 0.2343
 DFMP are indicated by solid lines. Panel (b) shows the period
-distributions for a few di fferent cases.
-It is instructive, to begin with, to ignore the e ffect of thermal
+distributions for a few different cases.
+It is instructive, to begin with, to ignore the effect of thermal
 pulses and consider only the quiescent evolution (green lines in
 Fig. C.1). The smallest initial mass corresponds to a star that just
 entered the TP-AGB, when the FM has a period of ∼240 days
 but is not dominant. It only becomes dominant above a threshold
 radius Rdom,0, that is for periods longer than a (mass-dependent)
-critical period Pdom,0(the solid gray line in Fig. C.1). The least
-evolved (quiescent) model with dominant FM has PFM≃360
+critical period Pdom,0 (the solid gray line in Fig. C.1). The least
+evolved (quiescent) model with dominant FM has PFM ≃360
 days (green circle and horizontal line), corresponding to a sharp
 cut in the period distribution shown in panel (b) of Fig. C.1.
 As a star evolves along the AGB it expands, and its period becomes
@@ -790,17 +803,17 @@ radius and a longer period. The rate at which a period increases
 with radius is not fixed, but rather decreases with evolution. According
  to the prescription of Trabucchi et al. (2021b), a period
 grows with radius as a broken power-law with exponent α≃1.8
-ifR<Rb, and withα≃1.25 at larger radii.
+if R <Rb, and with α≃1.25 at larger radii.
 This is equivalent to saying that the period grows more
-slowly after it exceeds a critical value Pb=P(Rb), marked by
+slowly after it exceeds a critical value Pb = P(Rb), marked by
 the gray dotted line in Fig. C.1. The isochrone reaches it at
 Article number, page 8 of 9
 Trabucchi et al.: The period-age relation of LPVs
-Fig. B.1. Similar to Fig. 2, but showing initial mass Miin place of age. The best-fit lines to the most populated band and edges of the theoretical
-PFM–Mirelation are shown.
-Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows
+Fig. B.1.Similar to Fig. 2, but showing initial mass Mi in place of age. The best-fit lines to the most populated band and edges of the theoretical
+PFM – Mi relation are shown.
+Fig. C.1.Period distribution at fixed age and metallicity. Panel (a) shows
 period as a function of initial mass (current mass on the top axis) on the
-TP-AGB for a∼200 Myr old isochrone with Zi=0.006. Red lines
+TP-AGB for a ∼200 Myr old isochrone with Zi = 0.006. Red lines
 show full thermal pulses, while blue lines ignore luminosity spikes and
 green lines show only the quiescent evolution. The same color code
 is used for the period distributions (normalized to their maximum) on
@@ -811,7 +824,7 @@ Gray lines mark the critical values of periods at which the FM becomes
 dominant (solid line), less sensitive to radius (dotted line, which occurs
 at the vertical line for this specific isochrone), and independent of radius
 (dashed line).
-Mi≃3.8524 M⊙(vertical gray line), when PFM≃420 days. In
+Mi ≃3.8524 M⊙ (vertical gray line), when PFM ≃420 days. In
 models with a smaller initial mass, the period is still increasing
 at a relatively large rate as the envelope expands, while in more
 massive models the period has already become less sensitive to
@@ -822,9 +835,10 @@ distribution shown in panel (b) of Fig. C.1. The period distribution
 this maximum, while limiting the selection to DFMP, produces
 a distribution skewed toward short periods, as found in Sect. 3.
 If the luminosity dips following thermal pulses are taken
-into account (blue lines), the corresponding envelope contrac-tion causes the period to decrease, and the cut at ∼360 days
+into account (blue lines), the corresponding envelope contraction
+ causes the period to decrease, and the cut at ∼ 360 days
 becomes less sharp. Because of mass loss, the threshold period
-Pdom,0is lowered, so that the shortest period associated with
+Pdom,0 is lowered, so that the shortest period associated with
 DFMP does not correspond to the least evolved model (green
 circle), but rather to the luminosity dip of a thermal pulse (blue
 circle).
diff --git a/read/results/pypdf/2201.00214.txt b/read/results/pypdf/2201.00214.txt
index f048d15..9e65d22 100644
--- a/read/results/pypdf/2201.00214.txt
+++ b/read/results/pypdf/2201.00214.txt
@@ -1,8 +1,9 @@
-arXiv:2201.00214v1  [astro-ph.SR]  1 Jan 2022Temperature Analysis of Flaring
+arXiv:2201.00214v1  [astro-ph.SR]  1 Jan 2022
+T emperature Analysis of Flaring
 (AR11283) and non-Flaring (AR12194)
 Coronal Loops
 N. F athalian1, S. S. H osseini Rad2, N. A lipour2, H. S afari2
-1Department of Physics, Payame Noor University (PNU), 19395 -3697, Tehran, Iran.
+1Department of Physics, Payame Noor University (PNU), 19395 -3697, T ehran, Iran.
 2Department of Physics, Faculty of Science, University of Za njan, 45195-313, Zanjan, Iran.
 e-mail: narges_fathalian@alum.sharif.edu
 January 4, 2022
@@ -10,7 +11,7 @@ Abstract
 Here, we study the temperature structure of flaring and non-fl aring coronal loops, using extracted
 loops from images taken in six extreme ultraviolet (EUV) cha nnels recorded by Atmospheric Imaging
 Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use dat a for loops of X2.1-class-flaring active
-region (AR11283) during 22:10UT till 23:00UT, on 2011, Sept ember 6; and non-flaring active region
+region (AR11283) during 22:10UT till 23:00UT , on 2011, Sept ember 6; and non-flaring active region
 (AR12194) during 08:00:00UT till 09:00:00UT on 2014, Octob er 26. By using spatially-synthesized
 Gaussian DEM forward-fitting method, we calculate the peak t emperatures for each strip of the loops.
 We apply the Lomb-Scargle method to compute the oscillation s periods for the temperature series of each
@@ -26,400 +27,411 @@ flaring ones, and maybe they are just fluctuations. Based on ou r confined obser
 flaring loops’ periods show more diversity and their tempera tures have wider ranges of variation than the
 non-flaring ones. More accurate commentary in this respect r equires more extensive statistical research
 and broader observations.
-Coronal Loops,Temperature Analysis, Temperature Oscilla tions,Flaring and non-Flaring Active Regions
+Coronal Loops,T emperature Analysis, T emperature Oscilla tions,Flaring and non-Flaring Active Regions
 I. I ntroduction
 Analyzing the thermal structure of coronal loops is of consi derable interest, especially as these
 magnetic loops have an essential role in heating the solar ch romosphere and corona. Such analysis
  can help to describe how the process of solar flaring is co rrelated with the loop’s thermal
 structure.
 Detections of coronal waves have a historical preview and ha ve been reported for several times
-(e.g., Aschwanden et al. (1999 );Nakariakov et al. (1999 );Wang et al. (2003 );Wang & Solanki (2004 );
-Berghmans & Clette (1999 );De Moortel et al. (2000 ),Verwichte et al. (2004 ),De Moortel & Brady
-(2007 ),Ballai et al. (2011 )). Coronal seismology and MHD waves have been reviewed wide ly by
+(e.g., Aschwanden et al. (1999); Nakariakov et al. (1999);W ang et al. (2003); W ang & Solanki (2004);
+Berghmans & Clette (1999); De Moortel et al. (2000), V erwichte et al. (2004), De Moortel & Brady
+(2007), Ballai et al. (2011)). Coronal seismology and MHD waves have been reviewed wide ly by
 
-De Moortel (2005 ),Nakariakov & Verwichte (2005 ),Aschwanden (2006 ),Banerjee et al. (2007 ) and
-De Moortel & Nakariakov (2012 ). Along with the development of the observations, transver se
-and longitudinal oscillations have also been studied theor etically (e.g., Gruszecki et al. (2006 ),
-Pascoe et al. (2007 ),Fathalian et al. (2010 );Luna et al. (2010 );Fathalian & Safari (2010 ). Coronal
+De Moortel (2005), Nakariakov & V erwichte (2005), Aschwanden (2006), Banerjee et al. (2007) and
+De Moortel & Nakariakov (2012). Along with the development of the observations, transver se
+and longitudinal oscillations have also been studied theor etically (e.g., Gruszecki et al. (2006),
+Pascoe et al. (2007), Fathalian et al. (2010); Luna et al. (2010); Fathalian & Safari (2010). Coronal
 seismology techniques help to elicit the information from o bservations of oscillatory phenomena
-and the results to be interpreted by using theoretical model s (see for e.g., Roberts et al. (1984 );
-Goossens et al. (1992 )). Oscillatory patterns and processes which happen during solar flares, were
-interesting and subject of investigations from different a pproaches (e.g., Nakariakov et al. (2010 ),
-Nisticò et al. (2013 ),Anfinogentov et al. (2013 ),Hindman & Jain (2014 ),Russell et al. (2015 )). As
+and the results to be interpreted by using theoretical model s (see for e.g., Roberts et al. (1984);
+Goossens et al. (1992)). Oscillatory patterns and processes which happen during solar flares, were
+interesting and subject of investigations from different a pproaches (e.g., Nakariakov et al. (2010),
+Nisticò et al. (2013), Anfinogentov et al. (2013), Hindman & Jain (2014), Russell et al. (2015)). As
 we know the transverse loops oscillations usually occur in r esponse to a close filament or flare
-(Wills-Davey & Thompson (1999 )).
+(W ills-Davey & Thompson (1999)).
 Rapidly decaying long-period oscillations are mostly inte rpreted as global (or fundamental
- mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014 ), and Wang
-(2011 ), also see Ofman & Wang (2002 ), and for slow-mode observed in fan-loops see Pant et al.
-(2017 )). They often occur in hot coronal loops of active regions, a ssociated with tiny (or micro-)
+ mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014), and W ang
+(2011), also see Ofman & W ang (2002), and for slow-mode observed in fan-loops see Pant et al.
+(2017)). They often occur in hot coronal loops of active regions, a ssociated with tiny (or micro-)
 flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected
 in intensity plots of solar and stellar flares are possibly ca used by standing slow-mode waves (see
-reviews by Van Doorsselaere et al. (2016 ), and McLaughlin et al. (2018 )).Excitation, propagation,
-and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al.
-(2007 );Wang et al. (2015 );Jess et al. (2016 );Nakariakov et al. (2017 );Nisticò et al. (2017 );Kolotkov
-et al. (2019 );Krishna Prasad et al. (2019 );Reale et al. (2019 );Wang & Ofman (2019 )). To have
+reviews by V an Doorsselaere et al. (2016), and McLaughlin et al. (2018)).Excitation, propagation,
+and damping mechanisms of slow-mode waves have been studied theoretically (e.g., W ang et al.
+(2007); W ang et al. (2015); Jess et al. (2016); Nakariakov et al. (2017); Nisticò et al. (2017); Kolotkov
+et al. (2019); Krishna Prasad et al. (2019); Reale et al. (2019); W ang & Ofman (2019)). T o have
 a complete overview of slow-mode magnetoacoustic waves in c oronal loops see the review by
-Wang et al. (2021 ).
+W ang et al. (2021).
 Investigating and comparing the thermal structures and osc illations of coronal loops in loops
 of flaring and non-flaring active regions could help us in bett er understanding the loops’ material
 oscillations and the flare impact on them. Several different methods have been developed to investigate
  the thermal structure of the coronal loops and loo p strands. The thermal stability of the
-coronal loops was the subject of research, done by Habbal & Rosner (1979 ) (and references cited
-therein). McClymont & Craig (1985 ) stated that a pressure fluctuation must assist asymmetric
+coronal loops was the subject of research, done by Habbal & Rosner (1979) (and references cited
+therein). McClymont & Craig (1985) stated that a pressure fluctuation must assist asymmetric
 coronal temperature perturbation. They concluded that cor onal loops are impartially stable in
-the case of uniform heating. Van Doorsselaere et al. (2011 ) used spectroscopic line ratios to obtain
+the case of uniform heating. V an Doorsselaere et al. (2011) used spectroscopic line ratios to obtain
 the required temperature (via CHIANTI code) and estimated t he adiabatic index of the corona.
 The dependence of coronal loop temperature on loop length an d magnetic field strength is also
-a favorite topic. For instance, Dahlburg et al. (2018 ) probed the temperature properties of solar
+a favorite topic. For instance, Dahlburg et al. (2018) probed the temperature properties of solar
 coronal loops over a wide range of lengths and magnetic field s trengths via numerical simulations
  and observed a very high correlation between magnetic field strength and a maximum of
 the temperature. The effect of temperature inhomogeneity o n the periods and the damping times
 of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al.
-(2012 )).Fathalian (2019 ) estimated the loop temperature using the intensity ratios and the AIA response
+(2012)). Fathalian (2019) estimated the loop temperature using the intensity ratios and the AIA response
  functions in different wavelengths. Different emis sion measure (DEM) computations and
 methods have been developed to estimate the temperature in t he corona, which led to various
-discussions. Schmelz et al. (2010 ) analyzed a coronal loop, which was observed on 2010 August
+discussions. Schmelz et al. (2010) analyzed a coronal loop, which was observed on 2010 August
 3, by AIA. They took some differential emission measure (DEM ) curves, claiming a multithermal
 rather than an isothermal DEM distribution (for the cross-s ectional temperature of the loop). After
- that, Aschwanden & Boerner (2011 ) criticized the method of background subtraction which
+ that, Aschwanden & Boerner (2011) criticized the method of background subtraction which
 Schmelz et al. had applied. They claimed that the background subtraction method caused their
-inferred result of a multithermal loop. Aschwanden & Boerner (2011 ) analyzed a set of hundred
+inferred result of a multithermal loop. Aschwanden & Boerner (2011) analyzed a set of hundred
 loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian
 DEM model. In this regard, some attention was paid to the inst rumental limitations and ability
- of AIA and Guennou et al. (2012a ,b) discussed on the accuracy of the differential emission
+ of AIA and Guennou et al. (2012a,b) discussed on the accuracy of the differential emission
 measure diagnostics of solar plasmas in respect of the AIA in strument of SDO. The abovementioned
  controversy of whether the cross-field temperatures of coronal loops are multithermal or
-isothermal, continued by Schmelz et al. (2013 ) (similar to Schmelz et al. (2011 )). They analyzed
+isothermal, continued by Schmelz et al. (2013) (similar to Schmelz et al. (2011)). They analyzed
 twelve loops to understand the cross-field temperature dist ributions of them and reveal the loops’
-substructure. Based on their achievements, the warmer loop s entail broader DEMs. Thereafter,
-Schmelz et al. (2014 ) found indications of a relationship between the DEM weight ed-temperature
+substructure. Based on their achievements, the warmer loop s entail broader DEMs. Thereafter ,
+Schmelz et al. (2014) found indications of a relationship between the DEM weight ed-temperature
 and the cross-field DEM width for coronal loops. They argued t hat cooler loops tend to have
 narrower DEM widths. This could imply that fewer strands are seen emitting in the later cooling
- phase, which they claim could potentially resolve the ab ovementioned controversy. In this
-subject, Aschwanden et al. (2015 ) (as well as 2013 ( Aschwanden ,2013 )) developed a method to
+ phase, which they claim could potentially resolve the ab ovementioned controversy . In this
+subject, Aschwanden et al. (2015) (as well as 2013 ( Aschwanden, 2013)) developed a method to
 extract the loop temperature which is based on Gaussian fit fo r Differential Emission Measure,
 named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter).
 This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and
-non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows:
+non-flaring active regions, 11283 and 12194, respectively . The contents of this paper are as follows:
 In section II, data, we introduce the considered flaring and non-flaring ac tive regions and describe
 the data employed and the time and properties of the flare, occ urred in the active region. In
 section III, we explain the method we use to analyze the time-series of te mperatures in different
-strips of the loops. Section IVis specified to our results, obtained related to flaring and no nflaring
- regions. In section Vwe briefly state a summary of this work.
+strips of the loops. Section IV is specified to our results, obtained related to flaring and no nflaring
+ regions. In section V we briefly state a summary of this work.
 II. D ata
-We investigate the thermal structure and treatment of loops in a flaring region to see if it follows
+W e investigate the thermal structure and treatment of loops in a flaring region to see if it follows
 the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time.
 For this purpose, we select a high energy flare x2.1 which the t ransverse oscillations of two loops
-of it have been analyzed by Jain et al. (2015 ). They analyzed intensity variations in the wavelength
+of it have been analyzed by Jain et al. (2015). They analyzed intensity variations in the wavelength
 171 in two coronal loops of this region and detected obvious t ransverse oscillation with periods
-of roughly 2 minutes and decay times of 5 minutes for these loo ps at the flare time. To see
+of roughly 2 minutes and decay times of 5 minutes for these loo ps at the flare time. T o see
 the specific thermal properties of the flaring loops, as a blin d test, we select a non-flaring active
 region, extract its loops and analyze their thermal treatme nt. Then we compare the temperature
 treatment of the loops at the flaring region with the loops of t he non-flaring region to see the
 differences.
 The temperature analysis done here uses EUV images from the A IA onboard the SDO. AIA
 has ten different wavelength channels, three in white light and UV , and the other seven in EUV
-channels. Between these seven, the 304 filter, which is mostl y sensitive to chromospheric temperatures
- (in order of T=104.7K), not the corona, is not taken into account (Aschwanden et a l. 2015).
+channels. Between these seven, the 304 filter , which is mostl y sensitive to chromospheric temperatures
+ (in order of T = 104.7 K), not the corona, is not taken into account (Aschwanden et a l. 2015).
 Therefore, we consider the images of the events in the six wav elengths (94, 131, 171, 193, 211, 335
-). These are covering the coronal temperature range from T≈0.6 to T≥16MK.
+). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16 MK .
 The two below data sets are finally selected to study thermal v ariations and coronal loops
 oscillations in flaring or non-flaring active regions. A few d istinct loops are visible in the regions.
-Finally, these loops are chosen:
-– Three loops of the x-flaring active region 11283: Observati onally, the X-class flares are rarely
-happening around the loops with the specification we are look ing for. So this selected LOS
-X-flare, which occurs near the loops is of rare cases. We consi der EUV images of NOAA
+Finally , these loops are chosen:
+– Three loops of the x-flaring active region 11283: Observati onally , the X-class flares are rarely
+happening around the loops with the specification we are look ing for . So this selected LOS
+X-flare, which occurs near the loops is of rare cases. W e consi der EUV images of NOAA
 AR 11283, in the time period of 22:10UT till 23:00UT of 2011 Se ptember 6 with the cadence
 of 12 sec. This period of time is selected since no other flare i s happening during it. A
 few distinct loops are visible and follow-able here during t his period. Loop shapes in our
 active region change permanently; therefore, it is difficul t or impossible to follow a loop
 over a very long time. Hence, it is not useful to extend the tim e interval of this region
 to the time before the flare. The transverse oscillations of t wo loops in this region were
-analyzed before by Jain et al. (2015 ). We mark these loops by A and B in Figure 1b. They
+analyzed before by Jain et al. (2015). W e mark these loops by A and B in Figure 1 b. They
 detected fundamental mode oscillation with periods of roug hly 2 minutes and decay time
-of 5 minutes for these loops. We are curious to see the loops’ t hermal oscillations (if any)
+of 5 minutes for these loops. W e are curious to see the loops’ t hermal oscillations (if any)
 or thermal fluctuations in this condition. Figure 1a (left) displays AR 11283 and the area,
-indicated by the white box is featured in a zoom-in view in Fig ure1.b (right) and the five
+indicated by the white box is featured in a zoom-in view in Fig ure 1.b (right) and the five
 selected parts of the center of the three chosen loops are sho wn by red lines (the movie of
 the region is available in this link). As it is clear in the mov ie, these three loops oscillate
-together and their oscillations decay simultaneously. The center of figure 1.a is coordinated
-at (230, 165) arcsec and its width and height are 450′′×456′′/750×775 pixels. The flare
+together and their oscillations decay simultaneously . The center of figure 1.a is coordinated
+at (230, 165) arcsec and its width and height are 450
+′′
+× 456
+′′
+/750 × 775 pixels. The flare
 occurring in this active region is an X2.1 class flare located close to the disk center at latitude
-14◦north and longitude 18◦west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22 :12UT,
-ends about 22:24UT with the peak at 22:20UT, and associates w ith a coronal mass ejection
-(CME) which occurs from 2011 September 6, 21:36:05T to 2011 S eptember 7, 02:24:05T, with
+14◦ north and longitude 18 ◦ west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22 :12UT ,
+ends about 22:24UT with the peak at 22:20UT , and associates w ith a coronal mass ejection
+(CME) which occurs from 2011 September 6, 21:36:05T to 2011 S eptember 7, 02:24:05T , with
 the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for
-more details look at LASCO CME catalogue.)1
+more details look at LASCO CME catalogue.) 1
 – Three loops of non-flaring active region 12194: As a blind te st, we select three loops of the
 non-flaring (nonf hereafter) active region 12194 in the smoo th time period of 08:00:00UT till
 09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec
-and its width and height are 615′′×615′′/1025×1025 pixels. We consider the images of
+and its width and height are 615
+′′
+× 615
+′′
+/1025 × 1025 pixels. W e consider the images of
 the selected area with the cadence of 12 sec in the same six wav elengths mentioned above.
 These loops are relatively motionless and do not show any tra nsversal oscillation (see the
-region’s movie in the link). We select the loops in such a way t hat they do not have any
-crossing over the neighbor loops (in our perspective) durin g this time. In figure 2the
+region’s movie in the link). W e select the loops in such a way t hat they do not have any
+crossing over the neighbor loops (in our perspective) durin g this time. In figure 2 the
 selected loops are distinguished in red in the mentioned act ive region. The size of the final
-cut of non-flaring region (represented in the right) is 351 ×401 pixels.
-The data set are primarily downloaded at level 1 with a pixel r esolution of 0.6 arcsec. We use
-the standard aia_prep .prosubroutine available in SDO package SolarSoftWare library to adjust
+cut of non-flaring region (represented in the right) is 351 × 401 pixels.
+The data set are primarily downloaded at level 1 with a pixel r esolution of 0.6 arcsec. W e use
+the standard aia _ pre p . pro subroutine available in SDO package SolarSoftW are library to adjust
 the screen scale between the four arms of the AIA. This pre-pr ocessing step increases the data
 level from 1 to 1.5, so that finally no jump or sudden movement i s observed in the image series.
-We also used drot_map.prosubroutine to correct the differential rotation effect. Ac cording to the
+W e also used drot _ma p . pro subroutine to correct the differential rotation effect. Ac cording to the
 movie made by pre-processed images, the most obvious loops ( marked in the abovementioned
 figures) are selected in each region (with obvious transvers al oscillations in the case of the flaring
 active region).
 III. T emperature Analysis Method
-We extract the selected loop segment pixels, for each loop, a nd calculate the normal vectors
+W e extract the selected loop segment pixels, for each loop, a nd calculate the normal vectors
 to each point of the loop’s direction. Then by using these dat a, we straighten each loop in a
 considered box with the thickness of 15 to 40 pixels (macro-p ixels, depending on the available
 empty area around each loop and the distance to the neighbor l oop). The area around the
 loop is needed for calculations of background subtraction. The selected loop segment is cut in
-1Based on data on these WebSites: https://solarflare.njit.e du/webapp.html, and https://www.swpc.noaa.gov/
+1 Based on data on these W ebSites: https://solarflare.njit.e du/webapp.html, and https://www .swpc.noaa.gov/
 all wavelengths and at the same considered box from the image s set. These loop images are
 necessary entrances for our thermal analysis process. Then the loop is divided into different
-strips and its best division in terms of pixel intervals is co nsidered. To do thermal analysis, we
+strips and its best division in terms of pixel intervals is co nsidered. T o do thermal analysis, we
 use the spatially-synthesized Gaussian DEM forward-fittin g method founded by Aschwanden
-et al. (2015 ).
+et al. (2015).
 The images in the above six wavelength filters are considered to calculate the temperature in
 each strip of the loop. The DEM function is considered a singl e-Gaussian function relative to the
-temperature determined by the forward fitting method. To obt ain the temperature for each loop,
+temperature determined by the forward fitting method. T o obt ain the temperature for each loop,
 we divided the loop into narrow strips, and then the intensit y flux was averaged over each strip.
 The number of each strip is displayed with the index i. One of t he usual methods to subtract
 the background from observed data is fitting a single-Gaussi an cospatial function with a linear
 function on the flux profile. The DEM for each strip is consider ed to be single-Gaussian DEM
 in terms of the logarithm of the temperature, which has three free parameters ( Aschwanden &
-Boerner ,2011 ):
-DEM i=dEM i
-dT=EM p,iexp(−[log(T)−log(Tp,i)
-2σ2
-T,i). (1)
-In which, Tp,iis the DEM peak temperature, EM p,iis the peak EM function, and σT,iis the
-logarithmic width of the temperature for that strip. To calc ulate the background-subtracted fluxes
-(for each strip) we use Eq.6 of Aschwanden & Boerner (2011 ) (in below):
-F0λ=∫dEM(T)
-dTRλ(T)dT=∑
-kEM(Tk)Rλ(Tk). (2)
-Here, Rλ(T)is the instrumental temperature response function of each w avelength filter λ, which
-is obtained by the code aia_get_response .proin the SSW package. As time has passed, the AIA
+Boerner, 2011):
+DE M i = dE M i
+dT = E Mp,i exp (− [log (T) − log (Tp,i )
+2σ 2
+T,i
+). (1)
+In which, Tp,i is the DEM peak temperature, E Mp,i is the peak EM function, and σ T,i is the
+logarithmic width of the temperature for that strip. T o calc ulate the background-subtracted fluxes
+(for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below):
+F0λ =
+∫ dE M (T)
+dT Rλ (T)dT = ∑
+k
+E M(Tk )Rλ (Tk ). (2)
+Here, Rλ (T) is the instrumental temperature response function of each w avelength filter λ , which
+is obtained by the code aia _get _res ponse . pro in the SSW package. As time has passed, the AIA
 response functions calibration has partly changed. Here, w e use the updated calibration of the
 temperature response functions, for each of the AIA tempera ture filters, according to the CHIANTI
- Version 2019 code available in the Solar SoftWare (SSW) . After forward-fitting the Gaussian
+ V ersion 2019 code available in the Solar SoftW are (SSW) . After forward-fitting the Gaussian
 DEM to the background-subtracted observed fluxes in multipl e wavelengths, the three-fitting parameters,
- temperature width ( σT,i), peak of temperature ( Tp,i), and peak emission measure ( EM p,i)
-are found by minimizing χ2
-i.
+ temperature width ( σ T,i), peak of temperature ( Tp,i), and peak emission measure ( E Mp,i )
+are found by minimizing χ 2
+i .
 Our data sample is uneven because of omitting some damaged im ages in between. Therefore
  to analyze the temperature oscillations, we use the Lom b-Scargle method. This method is
 developed to use the technique periodogram, in the case wher e the observation times are unevenly
- spaced ( Scargle ,1982 ). The Lomb-Scargle periodogram method is useful in cases wh ere
+ spaced ( Scargle, 1982). The Lomb-Scargle periodogram method is useful in cases wh ere
 the periodicity of data treatment is not immediately appare nt. This method allows efficient computation
  of a Fourier-like power spectrum estimator from un evenly-sampled data, resulting in
-an intuitive means of determining the period of oscillation (VanderPlas ,2018 ). Therefore we use
+an intuitive means of determining the period of oscillation (V anderPlas, 2018). Therefore we use
 Lomb-Scargle Periodogram to evaluate and estimate the effic ient periods of temperature oscillations
- in our loops. We select the first period related to the hi ghest power frequency, which is
-obtained by this method.We considered the achieved periods with the highest significances and
+ in our loops. W e select the first period related to the hi ghest power frequency , which is
+obtained by this method.W e considered the achieved periods with the highest significances and
 amplitudes. The most significant (highest) periods observe d in temperature (minute) for flaring
-and non-flaring loops are listed in Tables 1 and 2, respective ly. To estimate the significance of
+and non-flaring loops are listed in T ables 1 and 2, respective ly . T o estimate the significance of
 the periods, we computed the probability values (p-values) . In the Lomb-Scargle method, the
 significance returned here is the false alarm probability of the null hypothesis, i.e., as the data
-is composed of independent Gaussian random variables. Acco rdingly, low probability values
+is composed of independent Gaussian random variables. Acco rdingly , low probability values
 (p-value less than 0.05) indicate a high degree of significan ce in the associated periodic signal.
 IV . R esults
-i. Temperature Analysis of Flaring Active Region Loops
+i. T emperature Analysis of Flaring Active Region Loops
 Thenceforth the temperature time-series of different stri ps of the selected loops are calculated
 using the method described in section 3. In the following figu res, the vertical axis shows the
-logarithm of the temperature and the horizontal axis shows t he time duration. To be comparable
+logarithm of the temperature and the horizontal axis shows t he time duration. T o be comparable
 by eyes, all the forthcoming figures (which show the loops tem perature oscillations) have been coscaled
  in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature
-map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 1 2, and 6 strips, respectively. Each
-strip’s length is equal to 4 pixels (macro-pixel), for all lo ops in this paper. For brevity, a few strips’
-temperature oscillations are presented here. Figure 3displays the time-series of temperature
-oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the
+map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 1 2, and 6 strips, respectively . Each
+strip’s length is equal to 4 pixels (macro-pixel), for all lo ops in this paper . For brevity , a few strips’
+temperature oscillations are presented here. Figure 3 displays the time-series of temperature
+oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. W e calculated the
 errors for each point (temperature) but removed in the prese ntation to avoid overcrowding of the
-figures. As we observe in Figures 3and 4), the temperature oscillations are started and increase
+figures. As we observe in Figures 3 and 4), the temperature oscillations are started and increase
 around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended
 (22:24). These temperature oscillations follow the transv erse loop oscillations observed by Jain
-et al. (2015 ). As Jain et al. reported, LoopA and B have a transverse oscil lation with periods
+et al. (2015). As Jain et al. reported, LoopA and B have a transverse oscil lation with periods
 of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time
 (23:20) and decaying after the flare ended (22:24). So as we ob serve, the temperature oscillations in
 these flaring loops happen before the start of their transver se oscillations and are continuing even
-in the time interval after the transverse oscillations deca y. Although the temperature oscillations
-do not decay as rapid as the transverse oscillations do, and c onversely, the loop temperature
+in the time interval after the transverse oscillations deca y . Although the temperature oscillations
+do not decay as rapid as the transverse oscillations do, and c onversely , the loop temperature
 increases at the end of the oscillating mode (see Fig. 4, the temperature map of the loop A, for
 instance)
-We calculate the temperature oscillations periods, using L omb-Scargle method. We consider
+W e calculate the temperature oscillations periods, using L omb-Scargle method. W e consider
 the thermal oscillations periods with the highest significa nces. As this method shows, the most
 powerful period in the range of data time-series (listed in T able1) are from 7 to 28.4 minutes
 observed in the strips of the marked loops of this flaring regi on. These loops of flaring region
 also show some short periods in temperature oscillations wh ich some are less than 10 minutes
-(listed in Table 1). These short periods are more frequently observed in the lo ops of the flaring
+(listed in T able 1). These short periods are more frequently observed in the lo ops of the flaring
 active region. Such short periods are very scarce for the loo ps of the non-flaring active region
-(compare Tables 1and 2).
-The first column in Table 1is the number of every strip along the loop. The second column is
+(compare T ables 1 and 2).
+The first column in T able 1 is the number of every strip along the loop. The second column is
 the period of the most powerful frequency observed for the lo op strips, calculated by the LombScargle
- method. The third column shows the maximum of log (T)minus its minimum in each
-strip. The columns of Table 2are exactly the same as Table 1; the only difference is that Table 2is
+ method. The third column shows the maximum of log (T) minus its minimum in each
+strip. The columns of T able 2 are exactly the same as T able 1; the only difference is that T able 2 is
 for the non-flaring loops.
 The loop A, has the length of 42.3 (Mm) which is the length of th e selected part of the loop
 marked in Figure 1.b. The mean of the parameter (Max(log T)-Min(log T)) for the strips of loop A
-is 1.21. Mean of the temperature (log) of this loop over time i s 6.15±0.25. The loop B1, divided
-into 11 strips, has the length of 20.24 (Mm). The mean of (Max( logT)-Min(log T)) and the mean
-of the temperature for this loop are, 1.10, and 6.28 ±0.22 respectively. The loop B2, which has 8
-strips, with the length of 15.61 (Mm), has the mean temperatu re (log) of 6.21 ±0.21. The mean
+is 1.21. Mean of the temperature (log) of this loop over time i s 6.15 ± 0.25. The loop B1, divided
+into 11 strips, has the length of 20.24 (Mm). The mean of (Max( log T)-Min(log T)) and the mean
+of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively . The loop B2, which has 8
+strips, with the length of 15.61 (Mm), has the mean temperatu re (log) of 6.21 ± 0.21. The mean
 of (Max(log T)-Min(log T)) is 0.81 through this loop segment. The loops C1 and C2, divi ded into
-12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ±0.22,
-and 6.14 ±0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively.
-We observe that despite the temperature oscillations, the fl aring loops show a temperature
-rise at the end of the considered time interval (figure 3). As their temperature maps also show,
+12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22,
+and 6.14 ± 0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively .
+W e observe that despite the temperature oscillations, the fl aring loops show a temperature
+rise at the end of the considered time interval (figure 3). As their temperature maps also show ,
 the oscillations follow with a relatively sensible rise in t he final temperature of the loop segments
 (Figures 4). Although in the case of the transverse oscillations, the l oops oscillate as the flare
 occurs and then the oscillations decay and stop, in the case o f temperature oscillations, the temperatures
  of the various strips of the loops oscillate and at the end of the flare occurrence, they
 get to a relatively higher value of temperature in average.
-Figure 4shows the temperature maps of the flaring loops A, B1, B2, C1, a nd C2, respectively
+Figure 4 shows the temperature maps of the flaring loops A, B1, B2, C1, a nd C2, respectively
 as a time series. In each plot, the vertical axis is the distan ce along the loop segment in Mm, and
 the horizontal axis shows time. The color bar (in the left) sh ows the temperature range. Each
-separated grid part on the map is standing for one strip. Figu re4shows that the temperature
+separated grid part on the map is standing for one strip. Figu re 4 shows that the temperature
 for most of the strips increased, bypassing a few oscillatio ns. Before the end of the time duration,
 some strips become hotter (yellow ones) and some cooler (blu e ones). The loop B1 is colder at
 the early times of the duration and becomes hotter at the midd le and end times with a swing
 to lower temperatures again (see Fig. 4). There are some temperature fluctuations at the middle
 times (the red and green stripes) while at the end the strips t emperatures are smoother with less
 fluctuations. The temperature map of the loop segment B2 (Fig .4) shows that at the beginning of
-the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end
+the time duration, the first strips of the loop are hotter , and the last ones are colder , but at the end
 times this pattern is reversed in this loop segment. In loop s egment C1 (Fig. 4), the temperature
 fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time
 (23:00) the temperature is much higher than the beginning. T he temperature is increasing after
 the flare time (22:24) for the loop C2 either (see Fig. 4). This happens with some oscillations in
-the strips’ temperatures. So as figure 4shows, the temperature increases with some fluctuation
+the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation
 in most of the flaring loops’ strips after the flare time. Accor ding to these temperature maps,
 the temperature fluctuations in the flaring loops are increas ing at the flaring time and around 20
 minutes after that.
-We expect the flaring loops to cool down as a result of heat cond uction and radiative cooling.
+W e expect the flaring loops to cool down as a result of heat cond uction and radiative cooling.
 Hence this relative temperature increase should be scrutin ized. As we probed, this temperature
-rise is also followed in intensity time-series. As the inten sity time-series show, the related intensity
-in the Loop A of the flaring AR increases at the end of the time du ration. To be assured, the
-authors also checked the wavelength of Fe XVIII which has a peak formation temperature of
-7×106◦K(Ugarte-Urra & Warren (2014 )). By using the method developed by Warren et al. (2012 )
-the contribution of the Fe XVIII emission line can be isolated from the AIA 94 , to analyze the
-evolution of hot plasma in the loops. We do it to omit the conta mination from the cooler plasma
-(mostly around 1MK) which also contributes to this AIA chann elBoerner et al. (2012 ). This is
+rise is also followed in intensity time-series. As the inten sity time-series show , the related intensity
+in the Loop A of the flaring AR increases at the end of the time du ration. T o be assured, the
+authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of
+7 × 106 ◦K (Ugarte-Urra & W arren (2014)). By using the method developed by W arren et al. (2012)
+the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the
+evolution of hot plasma in the loops. W e do it to omit the conta mination from the cooler plasma
+(mostly around 1MK) which also contributes to this AIA chann el Boerner et al. (2012). This is
 done by subtracting the contaminating warm (i.e., around 1M K) component to the bandpass.
 This warm contribution is calculated from a weighted combin ation of the emission from the AIA
-171 and 193 channels dominated by Fe Xand Fe XII emission, respectively. This intensity
+171 and 193 channels dominated by Fe X and Fe X I I emission, respectively . This intensity
 analysis is done directly and it has not gone through any othe r process like the thermal analysis.
-For this purpose, we applied the formulation (1) used by Li et al. (2015 ). Plots in Figure 5show
-the intensity map, and the mean intensity variation of the wa velength Fe XVIII , for Loop A of
-the flaring region, respectively. As these plots show, this i ntensity is also higher at the end of
+For this purpose, we applied the formulation (1) used by Li et al. (2015). Plots in Figure 5 show
+the intensity map, and the mean intensity variation of the wa velength Fe XV I I I , for Loop A of
+the flaring region, respectively . As these plots show , this i ntensity is also higher at the end of
 the time duration in respect of the flare time. It seems to us th at the expected cooling has not
 occurred in these flaring loops yet, even after the flare occur rence in the probed duration due to
-some plausible reasons. We consider that the mentioned simu ltaneous CME (see section II) which
-this flare is associated with could cause this increase in tem perature. We can be sure that the
-source of this CME is AR 11283 ( Romano et al. (2015 )). This CME is in our flare region, hence
+some plausible reasons. W e consider that the mentioned simu ltaneous CME (see section II) which
+this flare is associated with could cause this increase in tem perature. W e can be sure that the
+source of this CME is AR 11283 ( Romano et al. (2015)). This CME is in our flare region, hence
 the loops receive energy even after the flare occurrence and i t is probably the reason why the
-expected cooling does not occur.
+expected cooling does not occur .
 The thermal oscillations periods obtained the Lomb-Scargl e method, do not have the same
 significance in all strips of the loops, but for most strips of the flaring loops, the significances are
-very near to one. To be assured about these oscillations, we p robed the intensity time-series for
+very near to one. T o be assured about these oscillations, we p robed the intensity time-series for
 each strip of the loops and we observed that this loop’s inten sities shows intensity oscillations
-too (i.e., alongside the loop). The most probable dominant p eriods observed in intensity, for
+too (i.e., alongside the loop). The most probable dominant p eriods observed in intensity , for
 wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A , 16.7, and 18.22 min for strips of
 F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods
 are in the same order of the observed thermal oscillation per iods. The intensity in this time series
 has not passed any thermal process but still shows oscillati on periods close to thermal ones. So
 we think these results confirm the observation of thermal osc illations.
-ii. Temperature Analysis of non-Flaring Active Region Loop s
+ii. T emperature Analysis of non-Flaring Active Region Loop s
 The temperature time-series for different strips of the sel ected loops of the non-flaring active
 region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. 6),
 the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time
-duration. Figure 6displays the time-series of temperature variations for the first two strips of
+duration. Figure 6 displays the time-series of temperature variations for the first two strips of
 the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the
 logarithm of temperature (like the flaring loops range). The most powerful periods, observed in
-most of these non-flaring loops’ strips (listed in Table 2) are from 8.5 min. to 30 min. Comparing
-the periods of the loops in the flaring region (Table 1) with the non-flaring one (Table 2), we see
+most of these non-flaring loops’ strips (listed in T able 2) are from 8.5 min. to 30 min. Comparing
+the periods of the loops in the flaring region (T able 1) with the non-flaring one (T able 2), we see
 that the temperature periods of the flaring loops have lower v alues on average and have more
-diversity than the non-flaring ones. As Tables 1and 2show, the mean temperatures of nonfloops
+diversity than the non-flaring ones. As T ables 1 and 2 show , the mean temperatures of nonfloops
  are lower in comparison with the f-loops, a fact we also expected from common sense.
 The parameter (Max(log T)-Min(log T)) in nonf-loops’ strips is less than that for the flaring loop s’
 strips.
 Nonf-loop A, divided into 11 strips, has the length of 19.91 ( Mm) which is the length of the
 selected part of the loop marked in Figure 2b. The mean of (Max(log T)-Min(log T)) for the strips
-of nonf-loop A is 0.81. Mean of the temperature (log) of this l oop segment over time is 5.93 ±0.10.
+of nonf-loop A is 0.81. Mean of the temperature (log) of this l oop segment over time is 5.93 ± 0.10.
 Nonf-Loop B, divided into 6 strips, has the length of 11.11 (M m), and the mean temperature (log),
-and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ±0.13 and 0.62 respectively. Nonfloop
+and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ± 0.13 and 0.62 respectively . Nonfloop
  C, which has 5 strips, with the length of 10.13 (Mm), has t he mean temperature (log) of
-5.82±0.12, and the mean (Max(log T)-Min(log T)) of 0.56.
+5.82 ± 0.12, and the mean (Max(log T)-Min(log T)) of 0.56.
 The first highest period observed for the temperature oscill ations of these non-flaring loops’
-strips is reported in Table 2. As we observe the temperature periods in these non-flaring l oops
-are mostly longer than those of the flaring loops (compare the values listed in Table 1and Table 2).
+strips is reported in T able 2. As we observe the temperature periods in these non-flaring l oops
+are mostly longer than those of the flaring loops (compare the values listed in T able 1 and T able2).
 Therefore the temperature oscillations of these loops are a little slower than the flaring ones.
-Figure 7shows the temperature maps of the non-flaring loops A, B, and C , respectively as a
+Figure 7 shows the temperature maps of the non-flaring loops A, B, and C , respectively as a
 time series. In each plot, the vertical axis is the distance a long the loop in Mm, and the horizontal
 axis is the time. The color bar in the left shows the colors con sidered for the temperature range.
 Each separated colored part in the map is one strip. These col or maps are plotted totally at the
-same color range of the loops of the flaring region either.
-As figure 7shows, the strips’ temperature of these non-flaring loops ha ve fewer temperature
+same color range of the loops of the flaring region either .
+As figure 7 shows, the strips’ temperature of these non-flaring loops ha ve fewer temperature
 fluctuations and are smoother in comparison with the flaring o nes (Fig. 4). Furthermore, that
 much increase in the temperatures of the strips, which was ob vious in the loops of the flaring
 region toward the end times, is not observed here. The temper atures are also totally lower in the
 nonf-loops in comparison with the flaring loops. Conversely , it seems that different strips of the
 non-flaring loops have relatively more similar temperature fluctuations.
-As figure 8shows, the peaks of the observed temperature periods for the loops’ strips of the
+As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the
 flaring active region (blue ones), and non-flaring active reg ion (red ones), are around 18 minutes,
-and 30 minutes, respectively. The temperature periods’ div ersity is higher in the loops’ strips of
+and 30 minutes, respectively . The temperature periods’ div ersity is higher in the loops’ strips of
 the flaring active region, and shorter temperature periods ( less than 10 minutes, nearer to the
 transverse oscillations periods) are observed in the case o f the flaring loops’ strips in comparison
-with the non-flaring ones. And figure 9shows that the increasing and decreasing of temperature
+with the non-flaring ones. And figure 9 shows that the increasing and decreasing of temperature
 range, or the difference between maximum and minimum of the t emperature value (max(log (T))min(log(T))),
  is much higher on average for the loops’ strips of the flari ng AR in comparison with
 the loops’ strips of the non-flaring one.
 V . S ummery
-We reported the temperature oscillations of coronal loops o f a flaring active region. We selected
+W e reported the temperature oscillations of coronal loops o f a flaring active region. W e selected
 the flaring active region 11283 to investigate the thermal st ructure and treatment of its loops. This
 region includes a high energy flare x2.1 and the transverse os cillations of two loops of it have been
-analyzed before by Jain et al. (2015 ). They analyzed intensity variations in the wavelength 171
+analyzed before by Jain et al. (2015). They analyzed intensity variations in the wavelength 171
 in two coronal loops of this region and detected obvious tran sverse oscillation with periods of
 roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure. 1b)
-at the flare time. We were curious to know if the temperature va riations follow the transverse
-oscillations of the loops, or there is any relation or correl ation between them. We also wanted to
+at the flare time. W e were curious to know if the temperature va riations follow the transverse
+oscillations of the loops, or there is any relation or correl ation between them. W e also wanted to
 investigate the thermal fluctuations at the flare time. As a bl ind test to see the specific thermal
 properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three
 segments of its loops and analyzed their thermal treatment. Then we compared the temperature
 treatment of the loops at the flaring region with the loops of t he non-flaring region to see the
-differences. We were eager to observe the probable discrepa ncies between flaring and non-flaring
+differences. W e were eager to observe the probable discrepa ncies between flaring and non-flaring
 loops in this respect.
 Here we used data of three loops of the flaring active region (A R11283) around the time of the
 Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, pl us three loops of the non-flaring
 active region (AR12194), from 08:00:00UT till 09:00:00UT o f 2014 October 26 (marked in figures
-1and 2). To calculate the time series of the loop temperature value s, we first extracted the loop
+1 and 2). T o calculate the time series of the loop temperature value s, we first extracted the loop
 pixels in each image and then displayed the loop straightly f or all the images in the time series
-of different wavelengths. To do thermal analysis, we used th e spatially-synthesized Gaussian
-DEM forward-fitting method founded by Aschwanden et al. (2015 ). We calculated the peak
+of different wavelengths. T o do thermal analysis, we used th e spatially-synthesized Gaussian
+DEM forward-fitting method founded by Aschwanden et al. (2015). W e calculated the peak
 temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze
 temperature oscillations of the time-series for each strip of the loops.
-We observed temperature oscillations which are following t he transverse loop oscillations
-observed by Jain et al. (2015 ) for the flaring loops. Furthermore, the temperature oscill ations in
+W e observed temperature oscillations which are following t he transverse loop oscillations
+observed by Jain et al. (2015) for the flaring loops. Furthermore, the temperature oscill ations in
 these flaring loops happen before the transverse oscillatio ns start and continue even in the time
-duration after the transverse oscillations decay. As obser ved, the temperature oscillations do not
-decay as rapidly as the transverse oscillations do. Convers ely, the strips’ temperatures increase
+duration after the transverse oscillations decay . As obser ved, the temperature oscillations do not
+decay as rapidly as the transverse oscillations do. Convers ely , the strips’ temperatures increase
 at the end of the oscillating mode and a rather sensible rise i s observed in the final temperatures
 of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the
-flaring loops, and from 8.5 min. to 30 min. for the non-flaring l oops. With the onset of X-flare in
+flaring loops, and from 8.5 min. to 30 min. for the non-flaring l oops. W ith the onset of X-flare in
 the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly
 2 minutes and decay time of 5 minutes, a temperature oscillat ion is observed with periods of
 roughly 10 to 28.5 minutes in different segments of this loop . And as the transverse oscillation
 decays in this interval, no special definite decay is observe d in its temperature oscillations.
 The temperature periods of the flaring loops are rather short er than the temperature periods
 of the non-flaring loops. The loops of the flaring region show s ome short temperature oscillations
-periods in which some are less than 10 minutes (Table 1). These kind of short periods are more
+periods in which some are less than 10 minutes (T able 1). These kind of short periods are more
 frequently observed for the loops of the flaring active regio n and in the case of the non-flaring
-ones, are very scarce. We observed that the periods of the flar ing loops have more diversity
+ones, are very scarce. W e observed that the periods of the flar ing loops have more diversity
 than those of the non-flaring ones. Based on our confined obser vations, the non-flaring loops’
-periods are longer and their temperatures’ values are total ly lower. So our research showed that
+periods are longer and their temperatures’ values are total ly lower . So our research showed that
 thermal structures of the flaring loops differ from the non-fl aring ones in the ways described
-above. As temperature maps show, the temperature fluctuatio ns are increasing at the flaring time
-and around 20 min. after, in the flaring loops. This happens wi th some oscillations in strips’
-temperature. Conversely, it seems that different strips of the non-flaring loops have relatively
+above. As temperature maps show , the temperature fluctuatio ns are increasing at the flaring time
+and around 20 min. after , in the flaring loops. This happens wi th some oscillations in strips’
+temperature. Conversely , it seems that different strips of the non-flaring loops have relatively
 more similar temperature fluctuations. The temperatures ar e either higher in average in the flaring
  loops’ segments as expected. The significances of the per iods, obtained by the Lomb-Scargle
 method, are calculated for each strip of each loop and the res ults show that these significances
@@ -431,89 +443,227 @@ they are just fluctuations.
 Using this method for the coronal loops showed that the oscil lation modes obtained for the
 temperatures of the flaring loops are very close to those of th e spatial slow-mode oscillations of
 the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These
-kind of oscillations often occur in hot coronal loops (log (T)>6) of active regions especially the
-ones associated with small (or micro-) flares ( Wang et al. (2021 )). The loops of our flaring active
+kind of oscillations often occur in hot coronal loops (log (T) > 6) of active regions especially the
+ones associated with small (or micro-) flares ( W ang et al. (2021)). The loops of our flaring active
 region are also hot loops with the mean temperature above thi s range. They also show intensity
 oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring
-loops. The temperature of the non-flaring loops are lower (lo g(T)<6) and as discussed above,
+loops. The temperature of the non-flaring loops are lower (lo g(T) < 6) and as discussed above,
 we believe that the observed oscillation-like periods in no n-flaring loops should be more probably
 related to the high amplitude fluctuations.
 Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes
-of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the
-FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively.
- And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for
-nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)Min(log
- T)) for these non-flaring loops show a difference from the flari ng ones and are lower.
+of the fluctuations show a discrepancy . Mean of the parameter (Max(log T)-Min(log T)) in the
+FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively
+ . And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for
+nonfloopA, B, and C respectively . Therefore the values of the quantity mean of (Max(log T)Min(log
+ T)) for these non-flaring loops show a difference from the flari ng ones and are lower .
 Loops of the non-flaring active region 12194 have a relativel y uniform temperature at the
 beginning of the time interval, which rises slightly at its e nd. As the Solar Monitor reports in the
 neighborhood of this region, the flaring active region 12192 exists of which between its multiple
-flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a p ossible suggestion
+flares, there is a c4.6 class flare occurring at 9:44UT . Therefore, it could be a p ossible suggestion
 that the abovementioned slight temperature rise in the loop s of AR 12194 (in the time interval
 8:00 to 9:00) originated from the influence of an increase in t he energy at the pre-flare conditions
 exist in the AR 12192.
 Hence as our study shows, the temperature of coronal loops of flaring AR changes in an
-oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher temperatures
+oscillatory manner . Compared with these non-flaring loops, the flaring loops show higher temperatures
  on average and higher oscillation periods with hi gher peaks and deeper valleys. More
 accurate commentary in this respect requires more extensiv e statistical research and broader observations.
 
-arcsecarcsec
-79154229304379454−6825118211304397
+arcsec
+arcsec
+79 154 229 304 379 454
+−68
+25
+118
+211
+304
+397
 a
-arcsecarcsec
+arcsec
+arcsec
 
-114.6  171.2 227.8 284.4 341171.4206.3241.2276.1311
-Loop B1 Loop ALoop C2
-Loop C1b
+
+114.6  171.2 227.8 284.4 341
+171.4
+206.3
+241.2
+276.1
+311
+Loop B1 Loop A
+Loop C2
+Loop C1
+b
 Loop B2
-Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as se en in the 171 filter. (b) Zoom-in view
+Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as se en in the 171 filter . (b) Zoom-in view
 of the area marked by a box in the left. The selected loops are d istinguished in red. The loops A and B are
-the same loops studied by Jain et al. (2015 ) (see Fig.3a in Jain et al. (2015 )).
-arcsecarcsec
-−154 0 154 308−572−418−264−11044
+the same loops studied by Jain et al. (2015) (see Fig.3a in Jain et al. (2015)).
+arcsec
+arcsec
+−154 0 154 308
+−572
+−418
+−264
+−110
+44
 a
-arcsecarcsec
-−202 −134 −66 2 70−396−338−280−221−162
-nonf−LoopAnonf−LoopB
-nonf−LoopCb
+arcsec
+arcsec
+−202 −134 −66 2 70
+−396
+−338
+−280
+−221
+−162
+nonf−LoopA
+nonf−LoopB
+nonf−LoopC
+b
 Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 r ecorded by AIA/SDO. (b) Zoom-in
 view of the area, marked by a box in the left, the loops are dist inguished in red.
-5.866.26.46.66.8LogTF−LoopA
-5.866.26.46.66.8LogT
-22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8
-timeLogT
-      5.866.26.46.66.8LogTF−LoopB1
-22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8
-timeLogT
+5.8
+6
+6.2
+6.4
+6.6
+6.8LogT
+F−LoopA
+5.8
+6
+6.2
+6.4
+6.6
+6.8LogT
+22:10 22:20 22:30 22:40 22:50 23:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
+
+5.8
+6
+6.2
+6.4
+6.6
+6.8LogT
+F−LoopB1
+22:10 22:20 22:30 22:40 22:50 23:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
 Figure 3: From up to down: The time-series of the temperature oscillat ions for the first 3 strips of Loop A (strip 1 to
 3 from top to down), and the first 2 strips of LoopB1. Horizonta l axis is the time and the vertical axis is the
 logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1.
-22:10 22:20 22:30 22:40 22:50 23:000 11213242  F−loopA
-Time Loop Length(Mm)
-5.866.26.46.66.8
-22:10 22:20 22:30 22:40 22:50 23:000 5 101520  F−loopB1
-Time Loop Length(Mm)
-66.056.16.156.26.256.36.356.46.456.5
-22:10 22:20 22:30 22:40 22:50 23:000 4 8 1216  F−loopB2
-Time Loop Length(Mm)
-5.866.26.46.66.8
-22:10 22:20 22:30 22:40 22:50 23:000 6 111722  F−loopC1
-Time Loop Length(Mm)
-5.65.866.26.46.66.8
-22:10 22:20 22:30 22:40 22:50 23:000 3 6 8 11  F−loopC2
-Time Loop Length(Mm)
-5.866.26.46.66.8
-Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+11
+21
+32
+42
+F−loopA
+Time
+
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+5
+10
+15
+20
+F−loopB1
+Time
+
+Loop Length(Mm)
+6
+6.05
+6.1
+6.15
+6.2
+6.25
+6.3
+6.35
+6.4
+6.45
+6.5
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+4
+8
+12
+16
+F−loopB2
+Time
+
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+6
+11
+17
+22
+F−loopC1
+Time
+
+Loop Length(Mm)
+5.6
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+22:10 22:20 22:30 22:40 22:50 23:00
+0
+3
+6
+8
+11
+F−loopC2
+Time
+
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+Figure 4: T emperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical
 axis is the distance along the loop in Mm, and the horizontal a xis is the time. The colorbar in the left shows
 the colors considered for the temperature range.
-Table 1: The properties observed for the loop segments of the flaring A R.
+T able 1:The properties observed for the loop segments of the flaring A R.
 FLoopA
-(Strip Number)The highest
-Temp.’s period
-observedMax(log(T))Min(log(T))FLoopB2
+(Strip Number)
+The highest
+T emp.’s period
+observed
+Max(log(T))Min(log(T))
 
-(Strip Number)The highest
-Temp.’s period
-observedMax(log(T))Min(log(T))
+FLoopB2
+(Strip Number)
+The highest
+T emp.’s period
+observed
+Max(log(T))Min(log(T))
 
 1 9.94 1.09 1 18.07 0.68
 2 16.57 0.79 2 24.85 0.83
@@ -552,11 +702,13 @@ FLoopB1 - - 4 16.57 0.93
 9 11.04 1.6
 10 18.07 1.6
 11 18.07 1.6
-Table 2: The properties observed for the loop segments of the non flari ng AR.
+T able 2:The properties observed for the loop segments of the non flari ng AR.
 Nonf-LoopA
-(Strip Number)The highest
-Temp.’s period
-observedMax(log(T))Min(log(T))
+(Strip Number)
+The highest
+T emp.’s period
+observed
+Max(log(T))Min(log(T))
 
 1 24 0.61
 2 30 0.95
@@ -570,9 +722,11 @@ observedMax(log(T))Min(log(T))
 10 30 0.77
 11 30 0.61
 Nonf-LoopB
-(Strip Number)The highest
-Temp.’s period
-observedMax(log(T))Min(log(T))
+(Strip Number)
+The highest
+T emp.’s period
+observed
+Max(log(T))Min(log(T))
 
 1 26.66 0.36
 2 26.66 0.64
@@ -581,126 +735,242 @@ observedMax(log(T))Min(log(T))
 5 30 0.98
 6 8.57 0.67
 Nonf-LoopC
-(Strip Number)The highest
-Temp.’s period
-observedMax(log(T))Min(log(T))
+(Strip Number)
+The highest
+T emp.’s period
+observed
+Max(log(T))Min(log(T))
 
 1 26.66 0.76
 2 26.66 0.75
 3 26.66 0.26
 4 30 0.27
 5 30 0.8
-22:10 22:20 22:30 22:40 22:50 23:000 11223243  Int−Fe−LoopA
-Time Loop Length(Mm)
-00.020.040.060.080.10.120.140.160.180.2
-22:10 22:20 22:30 22:40 22:50 23:0000.10.20.30.40.50.60.70.80.91Int−Fe−LoopA
-TimeNormalized Intensity Fe XVIII
-Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XVIII, and mean intensity of Fe
-XVIII (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and
+22:10 22:20 22:30 22:40 22:50 23:000
+11
+22
+32
+43
+Int−Fe−LoopA
+Time
+
+Loop Length(Mm)
+0
+0.02
+0.04
+0.06
+0.08
+0.1
+0.12
+0.14
+0.16
+0.18
+0.2
+22:10 22:20 22:30 22:40 22:50 23:000
+0.1
+0.2
+0.3
+0.4
+0.5
+0.6
+0.7
+0.8
+0.9
+1
+Int−Fe−LoopA
+Time
+Normalized Intensity Fe XVIII
+Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XV I I I, and mean intensity of Fe
+XV I I I (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and
 normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors
 considered for the Intensity range.
 VI. acknowledgements
 The author Narges Fathalian wishes to also express her thank s for the technical support and
-comments which has received from Dr.Farhad Daii and Dr.Mohs en Javaherian regarding to this
+comments which has received from Dr .Farhad Daii and Dr .Mohs en Javaherian regarding to this
 work.
-       5.866.26.46.66.8LogTNonF−LoopA
-8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8
-timeLogT
-       5.866.26.46.66.8LogTNonF−LoopB
-8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8
-timeLogT
+
+5.8
+6
+6.2
+6.4
+6.6
+6.8LogT
+NonF−LoopA
+8:00 8:10 8:20 8:30 8:40 8:50 9:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
+
+5.8
+6
+6.2
+6.4
+6.6
+6.8LogT
+NonF−LoopB
+8:00 8:10 8:20 8:30 8:40 8:50 9:00
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+time
+LogT
 Figure 6: from top to down: The time-series of the temperature for the fi rst 2 strips (from top to down) of the nonflaring
  Loops A and B. Horizontal axis is the time and the verti cal axis is the logarithm of the temperature.
-8:10 8:20 8:30 8:40 8:50 9:000 5 101520  NonF−loopA
-Time Loop Length(Mm)
-5.866.26.46.66.8
-8:10 8:20 8:30 8:40 8:50 9:000 5 9 1418  NonF−loopB
-Time Loop Length(Mm)
-5.866.26.46.66.8
-8:10 8:20 8:30 8:40 8:50 9:000 3 5 8 10  NonF−loopC
-Time Loop Length(Mm)
-5.866.26.46.66.8
-Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis
+8:10 8:20 8:30 8:40 8:50 9:00
+0
+5
+10
+15
+20
+NonF−loopA
+Time
+
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+8:10 8:20 8:30 8:40 8:50 9:00
+0
+5
+9
+14
+18
+NonF−loopB
+Time
+
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+8:10 8:20 8:30 8:40 8:50 9:00
+0
+3
+5
+8
+10
+NonF−loopC
+Time
+
+Loop Length(Mm)
+5.8
+6
+6.2
+6.4
+6.6
+6.8
+Figure 7: from top to down: T emperature map of the non-flaring loops A, B and C as a time-series. The vertical axis
 is the distance along the loop in Mm, and the horizontal axis i s the time. The color-bar in the left shows the
 colors considered for the temperature range.
-678910111213141516171819202122232425262728293000.050.10.150.20.250.30.350.4
-Temp. Period (min)Percentage of Temp. Periods
+6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
+0
+0.05
+0.1
+0.15
+0.2
+0.25
+0.3
+0.35
+0.4
+Temp. Period (min)
+Percentage of Temp. Periods
 Figure 8: Hisogram of the temperature periods percentages for the loo ps’ strips of the flaring (blue bars) and nonflaring
  (red bars) ARs. The horizontal axis shows the tempera ture periods in minute.
-0.20.30.40.50.60.70.80.911.11.21.31.41.51.61.7024681012
-max(log(T))−min(log(T))Number
+0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7
+0
+2
+4
+6
+8
+10
+12
+max(log(T))−min(log(T))
+Number
 Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) fo r each strip of the loops of the flaring (blue bars)
 and non-flaring (red bars) ARs.
 References
 Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 28 0
-Anfinogentov, S., Nakariakov, V . M., Mathioudakis, M., Van D oorsselaere, T., & Kowalski, A. F.
+Anfinogentov, S., Nakariakov, V . M., Mathioudakis, M., V an D oorsselaere, T ., & Kowalski, A. F .
 2013, ApJ, 773, 156
 Aschwanden, M., B. P . S. C. M. A. 2013, Solar Physics, 283, 5
-Aschwanden, M. J. 2006, Philosophical Transactions of the R oyal Society of London Series A, 364,
+Aschwanden, M. J. 2006, Philosophical T ransactions of the R oyal Society of London Series A, 364,
 417
-Aschwanden, M. J., & Boerner, P . 2011, The Astrophysical Jou rnal, 732, 81
-Aschwanden, M. J., Boerner, P ., Ryan, D., et al. 2015, The Ast rophysical Journal, 802, 53
+Aschwanden, M. J., & Boerner , P . 2011, The Astrophysical Jou rnal, 732, 81
+Aschwanden, M. J., Boerner , P ., Ryan, D., et al. 2015, The Ast rophysical Journal, 802, 53
 Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexand er, D. 1999, ApJ, 520, 880
 Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13
 Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Sol ar Physics, 246, 3
-Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207
+Berghmans, D., & Clette, F . 1999, Solar Physics, 186, 207
 Boerner, P ., Edwards, C., Lemen, J., et al. 2012, Solar Physi cs, 275, 41
-Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116
-De Moortel, I. 2005, Philosophical Transactions of the Roya l Society of London Series A, 363, 2743
+Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F ., & V elli, M. 2018, ApJ, 868, 116
+De Moortel, I. 2005, Philosophical T ransactions of the Roya l Society of London Series A, 363, 2743
 De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210
-De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23
-De Moortel, I., & Nakariakov, V . M. 2012, Philosophical Tran sactions of the Royal Society of
+De Moortel, I., Ireland, J., & W alsh, R. W . 2000, A&A, 355, L23
+De Moortel, I., & Nakariakov, V . M. 2012, Philosophical T ran sactions of the Royal Society of
 London Series A, 370, 3193
 Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369
 Fathalian, N., & Safari, H. 2010, ApJ, 724, 411
-Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403
-Goossens, M., Hollweg, J. V ., & Sakurai, T. 1992, Solar Physi cs, 138, 233
+Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy , 15, 403
+Goossens, M., Hollweg, J. V ., & Sakurai, T . 1992, Solar Physi cs, 138, 233
 Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A , 460, 887
-Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203 , 25
-Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203 , 26
+Guennou, C., Auchère, F ., Soubrié, E., et al. 2012a, ApJ, 203 , 25
+Guennou, C., Auchère, F ., Soubrié, E., et al. 2012b, ApJ, 203 , 26
 Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113
-Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103
-Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19
+Hindman, B. W ., & Jain, R. 2014, ApJ, 784, 103
+Jain, R., Maurya, R. A., & Hindman, B. W . 2015, ApJ, 804, L19
 Jess, D. B., Reznikova, V . E., Ryans, R. S. I., et al. 2016, Nat ure Physics, 12, 179
-Kolotkov, D. Y., Nakariakov, V . M., & Zavershinskii, D. I. 20 19, A&A, 628, A133
-Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space
+Kolotkov, D. Y ., Nakariakov, V . M., & Zavershinskii, D. I. 20 19, A&A, 628, A133
+Krishna Prasad, S., Jess, D. B., & V an Doorsselaere, T . 2019, Frontiers in Astronomy and Space
 Sciences, 6, 57
-Li, L. P ., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109
-Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277
-Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, A pJ, 716, 1371
+Li, L. P ., Peter, H., Chen, F ., & Zhang, J. 2015, A&A, 583, A109
+Liu, W ., & Ofman, L. 2014, Solar Physics, 289, 3233–3277
+Luna, M., T erradas, J., Oliver, R., & Ballester, J. L. 2010, A pJ, 716, 1371
 McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834
-McLaughlin, J. A., Nakariakov, V . M., Dominique, M., Jelíne k, P ., & Takasao, S. 2018, Space
+McLaughlin, J. A., Nakariakov, V . M., Dominique, M., Jelíne k, P ., & T akasao, S. 2018, Space
 Science Reviews volume, 214, 45
-Nakariakov, V . M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2 017, ApJ, 849, 62
+Nakariakov, V . M., Afanasyev, A. N., Kumar, S., & Moon, Y . J. 2 017, ApJ, 849, 62
 Nakariakov, V . M., Inglis, A. R., Zimovets, I. V ., et al. 2010 , Plasma Physics and Controlled Fusion,
 52, 124009
 Nakariakov, V . M., Ofman, L., Deluca, E. E., Roberts, B., & Da vila, J. M. 1999, Science, 285, 862
-Nakariakov, V . M., & Verwichte, E. 2005, Living Reviews in So lar Physics, 2, 3
-Nisticò, G., Nakariakov, V . M., & Verwichte, E. 2013, A&A, 55 2, A57
+Nakariakov, V . M., & V erwichte, E. 2005, Living Reviews in So lar Physics, 2, 3
+Nisticò, G., Nakariakov, V . M., & V erwichte, E. 2013, A&A, 55 2, A57
 Nisticò, G., Polito, V ., Nakariakov, V . M., & Del Zanna, G. 20 17, A&A, 600, A37
-Ofman, L., & Wang, T. 2002, ApJ, 580, L85
-Pant, V ., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5
-Pascoe, D. J., Nakariakov, V . M., & Arber, T. D. 2007, Solar Ph ysics, 246, 165
-Reale, F., Testa, P ., Petralia, A., & Kolotkov, D. Y. 2019, Ap J, 884, 131
+Ofman, L., & W ang, T . 2002, ApJ, 580, L85
+Pant, V ., Tiwari, A., Y uan, D., & Banerjee, D. 2017, ApJ, 847, L5
+Pascoe, D. J., Nakariakov, V . M., & Arber, T . D. 2007, Solar Ph ysics, 246, 165
+Reale, F ., T esta, P ., Petralia, A., & Kolotkov, D. Y . 2019, Ap J, 884, 131
 Roberts, B., Edwin, P . M., & Benz, A. O. 1984, ApJ, 279, 857
-Romano, P ., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55
+Romano, P ., Zuccarello, F ., Guglielmino, S. L., et al. 2015, A&A, 582, A55
 Russell, A. J. B., Simões, P . J. A., & Fletcher, L. 2015, A&A, 5 81, A8
 Scargle, J. D. 1982, ApJ, 263, 835
-Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ , 731, 49
-Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ , 725, L34
-Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171
-Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 201 3, ApJ, 764, 53
-Ugarte-Urra, I., & Warren, H. P . 2014, ApJ, 783, 12
-Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, So lar Physics, 291, 3143
-Van Doorsselaere, T., Wardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32
-VanderPlas, J. T. 2018, ApJ, 236, 16
-Verwichte, E., Nakariakov, V . M., Ofman, L., & Deluca, E. E. 2 004, Solar Physics, 223, 77
-Wang, T. 2011, Space Science Reviews, 158, 397–419
-Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598
-Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33
-Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17
-Wang, T., & Ofman, L. 2019, ApJ, 886, 2
-Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M . 2015, ApJ, 811, L13
-Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Revi ews, 217
-Warren, H. P ., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141
-Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467
\ No newline at end of file
+Schmelz, J. T ., Jenkins, B. S., W orley, B. T ., et al. 2011, ApJ , 731, 49
+Schmelz, J. T ., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ , 725, L34
+Schmelz, J. T ., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171
+Schmelz, J. T ., Pathak, S., Jenkins, B. S., & W orley, B. T . 201 3, ApJ, 764, 53
+Ugarte-Urra, I., & W arren, H. P . 2014, ApJ, 783, 12
+V an Doorsselaere, T ., Kupriyanova, E. G., & Y uan, D. 2016, So lar Physics, 291, 3143
+V an Doorsselaere, T ., W ardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32
+V anderPlas, J. T . 2018, ApJ, 236, 16
+V erwichte, E., Nakariakov, V . M., Ofman, L., & Deluca, E. E. 2 004, Solar Physics, 223, 77
+W ang, T . 2011, Space Science Reviews, 158, 397–419
+W ang, T ., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598
+W ang, T . J., & Solanki, S. K. 2004, A&A, 421, L33
+W ang, T . J., Solanki, S. K., Innes, D. E., Curdt, W ., & Marsch, E. 2003, A&A, 402, L17
+W ang, T ., & Ofman, L. 2019, ApJ, 886, 2
+W ang, T ., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M . 2015, ApJ, 811, L13
+W ang, T ., Ofman, L., Y uan, D., et al. 2021, Space Science Revi ews, 217
+W arren, H. P ., W inebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141
+W ills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467
\ No newline at end of file
diff --git a/read/results/pypdf/GeoTopo-book.txt b/read/results/pypdf/GeoTopo-book.txt
index 62ca6ac..d04416c 100644
--- a/read/results/pypdf/GeoTopo-book.txt
+++ b/read/results/pypdf/GeoTopo-book.txt
@@ -11,7 +11,7 @@ Danksagungen
 An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen
 gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich
 basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze
-Abschnitte konnten direkt mit L ATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre
+Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre
 Inhalte in diesem Skript einbauen zu dürfen!
 Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsaufgaben
  und Lösungen zu benutzen.
@@ -19,25 +19,25 @@ Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer
 Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per
 E-Mail und nach dem Tutorium beantwortet. Danke!
 Was ist Topologie?
-Die Kugeloberfläche S2lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche
-oder der Oberfläche einer Pyramide verformen, aber nicht zum R2oder zu einem Torus T2. Für
-denR2müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein
+Die KugeloberflächeS2 lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche
+oder der Oberfläche einer Pyramide verformen, aber nicht zumR2 oder zu einem TorusT2. Für
+den R2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein
 Loch machen.
 Erforderliche Vorkenntnisse
-Es wird ein sicherer Umgang mit den Quantoren ( ∀,∃), Mengenschreibweisen ( ∪,∩,\,∅,R,P(M))
+Es wird ein sicherer Umgang mit den Quantoren (∀,∃), Mengenschreibweisen (∪,∩,\,∅,R,P(M))
 und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Widerspruchsbeweisen
- sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag,
+ sollte bekannt sein und der Umgang mit komplexen ZahlenC, deren Betrag,
 Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem
 in „Analysis I“ vermittelt.
 Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit,
-der Spektralsatz und der projektive Raum P(R)aus „Lineare Algebra I“ bekannt sind. In „Lineare
+der Spektralsatz und der projektive RaumP(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare
 Algebra II“ wird der Begriff der Orthonormalbasis eingeführt.
 
-(a)S2
-(b) Würfel (c) Pyramide
+(a) S2
+ (b) Würfel (c) Pyramide
 y
 x
-(d)R2(e)T2
+(d) R2 (e) T2
 Abbildung 0.1: Beispiele für verschiedene Formen
 Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und
 Zahlentheorie“ gehört zu haben.
@@ -81,622 +81,735 @@ Stichwortverzeichnis 111
 1 Topologische Grundbegriffe
 1.1 Topologische Räume
 Definition 1
-Eintopologischer Raum ist ein Paar (X,T)bestehend aus einer Menge XundT⊆P(X)
+Ein topologischer Raumist ein Paar(X,T) bestehend aus einer MengeX und T ⊆P(X)
 mit folgenden Eigenschaften
-(i)∅,X∈T
-(ii) SindU1,U2∈T, so istU1∩U2∈T
-(iii) IstIeine Menge und Ui∈Tfür jedesi∈I, so ist⋃
-i∈IUi∈T
-Die Elemente von Theißenoffene Teilmengen vonX.
-A⊆Xheißtabgeschlossen , wennX\Aoffen ist.
-Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0,1). Auch gibt es
+(i) ∅,X ∈T
+(ii) Sind U1,U2 ∈T, so istU1 ∩U2 ∈T
+(iii) Ist I eine Menge undUi ∈T für jedesi∈I, so ist
+⋃
+i∈I
+Ui ∈T
+Die Elemente vonT heißenoffene Teilmengenvon X.
+A⊆X heißtabgeschlossen, wennX\A offen ist.
+Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B.[0,1). Auch gibt es
 Mengen, die sowohl abgeschlossen als auch offen sind.
 Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.)
-Betrachte∅undXmit dertrivialen Topologie Ttriv={∅,X}.
-Es gilt:X∈Tund∅∈T, d. h.Xund∅sind offen. Außerdem XC=X\X=∅∈Tund
-X\∅=X∈T, d. h.Xund∅sind als Komplement offener Mengen abgeschlossen. ■
+Betrachte ∅und X mit dertrivialen Topologie Ttriv = {∅,X }.
+Es gilt:X ∈T und ∅∈ T, d. h.X und ∅sind offen. AußerdemXC = X\X = ∅∈ T und
+X\∅ = X ∈T, d. h.X und ∅sind als Komplement offener Mengen abgeschlossen. ■
 Beispiel 1 (Topologien)
-1)X=Rnmit der von der euklidischen Metrik erzeugten Topologie TEuklid:
-U⊆Rnoffen⇔für jedesx∈Ugibt esr>0,
-sodass Br(x) ={y∈Rn|d(x,y)<r}⊆U
-Diese Topologie wird auch „Standardtopologie des Rn“ genannt. Sie beinhaltet unter
+1) X = Rn mit der von der euklidischen Metrik erzeugten TopologieTEuklid:
+U ⊆Rn offen ⇔für jedesx∈U gibt esr> 0,
+sodass Br(x) = {y∈Rn |d(x,y) <r }⊆ U
+Diese Topologie wird auch „Standardtopologie desRn“ genannt. Sie beinhaltet unter
 anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedlichem
  Mittelpunkt (vgl. Definition 1.ii).
-2) Jeder metrische Raum (X,d)ist auch ein topologischer Raum.
-3) Für eine Menge XheißtTDiskret =P(X)diskrete Topologie .
-4)X:=R,TZ:={U⊆R|R\Uendlich}∪{∅} heißtZariski-Topologie
+2) Jeder metrische Raum(X,d) ist auch ein topologischer Raum.
+3) Für eine MengeX heißtTDiskret = P(X) diskrete Topologie.
+4) X := R,TZ := {U ⊆R |R \U endlich}∪{∅} heißtZariski-Topologie
 Beobachtungen:
-•U∈TZ⇔∃f∈R[X], sodass R\U=V(f) ={x∈R|f(x) = 0}
-•Es gibt keine disjunkten offenen Mengen in TZ.
+•U ∈TZ ⇔∃f ∈R[X], sodassR \U = V(f) = {x∈R |f(x) = 0 }
+•Es gibt keine disjunkten offenen Mengen inTZ.
  1.1. TOPOLOGISCHE RÄUME
-5)X:=Rn,TZ={U⊆Rn|Es gibt Polynome f1,...,fr∈R[X1,...,Xn]sodass
-Rn\U=V(f1,...,fr)}
-6)X:={0,1},T={∅,{0,1},{0}}heißtSierpińskiraum .
-∅,{0,1},{1}sind dort alle abgeschlossenen Mengen.
+5) X := Rn,TZ = {U ⊆Rn|Es gibt Polynomef1,...,f r ∈R[X1,...,X n] sodass
+Rn \U = V(f1,...,f r)}
+6) X := {0,1 },T = {∅,{0,1 },{0 }} heißtSierpińskiraum.
+∅,{0,1 },{1 }sind dort alle abgeschlossenen Mengen.
 Definition 2
-Sei(X,T)ein topologischer Raum und x∈X.
-Eine Teilmenge U⊆XheißtUmgebung vonx, wenn es ein U0∈Tgibt mitx∈U0und
-U0⊆U.
-Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokalgilt.
+Sei (X,T) ein topologischer Raum undx∈X.
+Eine TeilmengeU ⊆X heißtUmgebung von x, wenn es einU0 ∈T gibt mitx∈U0 und
+U0 ⊆U.
+Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaftlokalgilt.
 Definition 3
-Sei(X,T)ein topologischer Raum und M⊆Xeine Teilmenge.
-a)M◦:={x∈M|Mist Umgebung von x}=⋃
+Sei (X,T) ein topologischer Raum undM ⊆X eine Teilmenge.
+a) M◦ := {x∈M |M ist Umgebung vonx}=
+⋃
 U⊆M
-U∈TUheißtInneres oderoffener
-KernvonM.
-b)M:=⋂
+U∈T
+U heißtInneres oder offener
+Kern von M.
+b) M :=
+⋂
 M⊆A
-AabgeschlossenAheißtabgeschlossene Hülle oderAbschluss vonM.
-c)∂M:=M\M◦heißtRandvonM.
-d)MheißtdichtinX, wennM=Xist.
+A abgeschlossen
+A heißtabgeschlossene Hülleoder Abschluss von M.
+c) ∂M := M \M◦heißtRand von M.
+d) M heißtdichtin X, wennM = X ist.
 Beispiel 2
-1) SeiX=Rmit euklidischer Topologie und M=Q. Dann gilt: M=RundM◦=∅
-2) SeiX=RundM= (a,b). Dann gilt: M= [a,b]
-3) SeiX=R,T=TZundM= (a,b). Dann gilt: M=R
+1) Sei X = R mit euklidischer Topologie undM = Q. Dann gilt:M = R und M◦= ∅
+2) Sei X = R und M = (a,b). Dann gilt:M = [a,b]
+3) Sei X = R,T = TZ und M = (a,b). Dann gilt:M = R
 Definition 4
-Sei(X,T)ein topologischer Raum.
-a)B⊆TheißtBasisder Topologie T, wenn jedes U∈TVereinigung von Elementen
-ausBist.
-b)S⊆TheißtSubbasis der Topologie T, wenn jedes U∈TVereinigung von endlichen
-Durchschnitten von Elementen aus Sist.
+Sei (X,T) ein topologischer Raum.
+a) B ⊆T heißtBasis der TopologieT, wenn jedesU ∈T Vereinigung von Elementen
+aus B ist.
+b) S⊆ T heißtSubbasis der TopologieT, wenn jedesU ∈T Vereinigung von endlichen
+Durchschnitten von Elementen ausSist.
 Beispiel 3 (Basis und Subbasis)
 1) Jede Basis ist auch eine Subbasis, z.B.
-S={(a,b)|a,b∈R,a<b}ist für Rmit der Standardtopologie sowohl Basis als
+S = {(a,b) |a,b ∈R,a<b }ist für R mit der Standardtopologie sowohl Basis als
 auch Subbasis.
-2) Gegeben sei X=Rnmit euklidischer Topologie T. Dann ist
-B={Br(x)|r∈Q>0,x∈Qn}
-ist eine abzählbare Basis von T.
-3)Sei(X,T)eintopologischerRaummit X={0,1,2}undT={∅,{0},{0,1},{0,2},X}.
-Dann istS={∅,{0,1},{0,2}}eine Subbasis von T, da gilt:
+2) Gegeben seiX = Rn mit euklidischer TopologieT. Dann ist
+B = {Br(x) |r∈Q>0,x ∈Qn }
+ist eine abzählbare Basis vonT.
+3) Sei(X,T) eintopologischerRaummit X = {0,1,2 }undT = {∅,{0 },{0,1 },{0,2 },X }.
+Dann istS= {∅,{0,1 },{0,2 }} eine Subbasis vonT, da gilt:
  1.1. TOPOLOGISCHE RÄUME
-• S⊆ T
-• ∅,{0,1}und{0,2}∈S
-• {0}={0,1}∩{ 0,2}
-•X={0,1}∪{ 0,2}
-Allerings istSkeine Basis von (X,T), da{0}nicht als Vereinigung von Elementen
-ausSerzeugt werden kann.
+• S⊆T
+• ∅,{0,1 }und {0,2 }∈S
+• {0 }= {0,1 }∩{ 0,2 }
+•X = {0,1 }∪{ 0,2 }
+Allerings istSkeine Basis von(X,T), da{0 }nicht als Vereinigung von Elementen
+aus Serzeugt werden kann.
 Bemerkung 2
-SeiXeine Menge undS⊆P (X). Dann gibt es genau eine Topologie TaufX, für dieS
+Sei X eine Menge undS⊆P (X). Dann gibt es genau eine TopologieT auf X, für dieS
 Subbasis ist.
 Definition 5
-Sei(X,T)ein topologischer Raum und Y⊆X.
-TY:={U∩Y|U∈T}ist eine Topologie auf Y.
-TYheißtTeilraumtopologie und(Y,TY)heißt einTeilraum von(X,T).
-Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt.
+Sei (X,T) ein topologischer Raum undY ⊆X.
+TY := {U ∩Y |U ∈T }ist eine Topologie aufY.
+TY heißtTeilraumtopologieund (Y,TY) heißt einTeilraumvon (X,T).
+Die Teilraumtopologie wird auchSpurtopologie oder Unterraumtopologie genannt.
 Definition 6
-SeienX1,X2topologische Räume.
-U⊆X1×X2sei offen, wenn es zu jedem x= (x1,x2)∈UUmgebungen Uiumximit
-i= 1,2gibt, sodass U1×U2⊆Ugilt.
-T={U⊆X1×X2|Uoffen}ist eine Topologie auf X1×X2. Sie heißt Produkttopologie .
-B={U1×U2|Uioffen inXi,i= 1,2}ist eine Basis von T.
+Seien X1,X2 topologische Räume.
+U ⊆X1 ×X2 sei offen, wenn es zu jedemx = (x1,x2) ∈U Umgebungen Ui um xi mit
+i= 1,2 gibt, sodassU1 ×U2 ⊆U gilt.
+T = {U ⊆X1 ×X2 |U offen}ist eine Topologie aufX1×X2. Sie heißtProdukttopologie.
+B = {U1 ×U2 |Ui offen inXi,i = 1,2 }ist eine Basis vonT.
 U
 xx2
-x1U2
-U1X1X2
-Abbildung 1.1: Zu x= (x1,x2)gibt es Umgebungen U1,U2mitU1×U2⊆U
+x1
+U2
+U1
+X1
+X2
+Abbildung 1.1: Zux= (x1,x2) gibt es UmgebungenU1,U2 mit U1 ×U2 ⊆U
 Beispiel 4 (Produkttopologien)
-1)X1=X2=Rmit euklidischer Topologie.
-⇒Die Produkttopologie auf R×R=R2stimmt mit der euklidischen Topologie auf
-R2überein.
-2)X1=X2=Rmit Zariski-Topologie. TProdukttopologie auf R2:U1×U2
+1) X1 = X2 = R mit euklidischer Topologie.
+⇒Die Produkttopologie aufR ×R = R2 stimmt mit der euklidischen Topologie auf
+R2 überein.
+2) X1 = X2 = R mit Zariski-Topologie.T Produkttopologie aufR2: U1 ×U2
 (Siehe Abbildung 1.2)
  1.1. TOPOLOGISCHE RÄUME
-U1=R\NU2=R\N
-Abbildung 1.2: Zariski-Topologie auf R2
+U1 = R \N
+U2=R\N
+Abbildung 1.2: Zariski-Topologie aufR2
 Definition 7
-SeiXein topologischer Raum, ∼eine Äquivalenzrelation auf X,X=X/∼sei die Menge
-der Äquivalenzklassen, π:X→X, x↦→[x]∼.
-TX:={
-U⊆X⏐⏐π−1(U)∈TX}
-(X,TX)heißtQuotiententopologie .
+Sei X ein topologischer Raum,∼eine Äquivalenzrelation aufX, X = X/∼sei die Menge
+der Äquivalenzklassen,π: X →X, x ↦→[x]∼.
+TX :=
+{
+U ⊆X
+⏐⏐π−1(U) ∈TX
+}
+(X,TX) heißtQuotiententopologie.
 Beispiel 5
-X=R,a∼b:⇔a−b∈Z
-R -1012345
-0a
-Ua π−1(u)
-0∼1, d. h. [0] = [1]
+X = R,a ∼b:⇔a−b∈Z
+R-1 0 1 2 3 4 5
+0
+a
+U
+aπ−1(u)
+0 ∼1, d. h.[0] = [1]
 Beispiel 6
-SeiX=R2und(x1,y1)∼(x2,y2)⇔x1−x2∈Zundy1−y2∈Z. Dann istX/∼ein Torus.
+Sei X = R2 und (x1,y1) ∼(x2,y2) ⇔x1 −x2 ∈Z und y1 −y2 ∈Z. Dann istX/∼ein Torus.
 Beispiel 7 (Projektiver Raum)
-X=Rn+1\{0}, x∼y⇔∃λ∈R×mity=λx
-⇔xundyliegen auf der gleichen
+X = Rn+1 \{0 }, x ∼y⇔∃λ∈R×mit y= λx
+⇔x und y liegen auf der gleichen
 Ursprungsgerade
-X=Pn(R)
+X = Pn(R)
  1.2. METRISCHE RÄUME
 Also fürn= 1:
-−4−2 2 4 6 8
-−4−224
+−4 −2 2 4 6 8
+−4
+−2
+2
+4
 1.2 Metrische Räume
 Definition 8
-SeiXeine Menge. Eine Abbildung d:X×X→R+
-0heißtMetrik, wenn gilt:
-(i) Definitheit: d(x,y) = 0⇔x=y∀x,y∈X
-(ii) Symmetrie: d(x,y) =d(y,x)∀x,y∈X
-(iii) Dreiecksungleichung: d(x,z)≤d(x,y) +d(y,z)∀x,y,z∈X
-Das Paar (X,d)heißt einmetrischer Raum .
+Sei X eine Menge. Eine Abbildungd: X×X →R+
+0 heißtMetrik, wenn gilt:
+(i) Definitheit: d(x,y) = 0 ⇔x= y ∀x,y ∈X
+(ii) Symmetrie: d(x,y) = d(y,x) ∀x,y ∈X
+(iii) Dreiecksungleichung: d(x,z) ≤d(x,y) + d(y,z) ∀x,y,z ∈X
+Das Paar(X,d) heißt einmetrischer Raum.
 Bemerkung 3
-Sei(X,d)ein metrischer Raum und
-Br(x) :={y∈X|d(x,y)<r}fürx∈X,r∈R+
-B={Br(x)⊆P(X)|x∈X,r∈R+}ist Basis einer Topologie auf X.
+Sei (X,d) ein metrischer Raum und
+Br(x) := {y∈X |d(x,y) <r }für x∈X,r ∈R+
+B = {Br(x) ⊆P(X) |x∈X,r ∈R+ }ist Basis einer Topologie aufX.
 Definition 9
-Seien (X,dX)und(Y,dY)metrische Räume und ϕ:X→Yeine Abbildung mit
-∀x1,x2∈X:dX(x1,x2) =dY(ϕ(x1),ϕ(x2))
-Dann heißt ϕeineIsometrie vonXnachY.
+Seien (X,dX) und (Y,dY) metrische Räume undϕ: X →Y eine Abbildung mit
+∀x1,x2 ∈X : dX(x1,x2) = dY(ϕ(x1),ϕ(x2))
+Dann heißtϕ eine Isometrie von X nach Y.
 Beispiel 8 (Skalarprodukt erzeugt Metrik)
-SeiVein euklidischer oder hermitescher Vektorraum mit Skalarprodukt ⟨·,·⟩. Dann wird V
-durchd(x,y) :=√
-⟨x−y,x−y⟩zum metrischen Raum.
+Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt⟨·,·⟩. Dann wirdV
+durch d(x,y) :=
+√
+⟨x−y,x −y⟩zum metrischen Raum.
 Beispiel 9 (diskrete Metrik)
-SeiXeine Menge. Dann heißt
-d(x,y) ={
-0fallsx=y
-1fallsx̸=y
-diediskrete Metrik . Die Metrik dinduziert die diskrete Topologie .
+Sei X eine Menge. Dann heißt
+d(x,y) =
+{
+0 falls x= y
+1 falls x̸= y
+die diskrete Metrik. Die Metrikd induziert diediskrete Topologie.
  1.2. METRISCHE RÄUME
 Beispiel 10
-X=R2undd((x1,y1),(x2,y2)) := max(∥x1−x2∥,∥y1−y2∥)ist Metrik.
-Beobachtung: derzeugt die euklidische Topologie.
-Br(0) =r r
+X = R2 und d((x1,y1),(x2,y2)) := max(∥x1 −x2∥,∥y1 −y2∥) ist Metrik.
+Beobachtung: d erzeugt die euklidische Topologie.
+Br(0) =
+r r
 r
 r
-(a)Br(0) (b) Euklidische Topologie
-Abbildung 1.3: Veranschaulichungen zur Metrik daus Beispiel 10
+(a) Br(0) (b) Euklidische Topologie
+Abbildung 1.3: Veranschaulichungen zur Metrikd aus Beispiel 10
  1.2. METRISCHE RÄUME
 Beispiel 11 (SNCF-Metrik1)
-X=R2
-−4−2 2 4 6 8
-−4−224
+X = R2
+−4 −2 2 4 6 8
+−4
+−2
+2
+4
 Definition 10
-Ein topologischer Raum Xheißthausdorffsch , wenn es für je zwei Punkte x̸=yinX
-Umgebungen UxumxundUyumygibt, sodass Ux∩Uy=∅.
+Ein topologischer RaumX heißthausdorffsch, wenn es für je zwei Punktex ̸= y in X
+Umgebungen Ux um x und Uy um y gibt, sodassUx ∩Uy = ∅.
 Bemerkung 4 (Trennungseigenschaft)
 Metrische Räume sind hausdorffsch, wegen
-d(x,y)>0⇒∃ε>0 :Bε(x)∩Bε(y) =∅
+d(x,y) >0 ⇒∃ε> 0 : Bε(x) ∩Bε(y) = ∅
 Beispiel 12 (Topologische Räume und Hausdorff-Räume)
-1)(R,TZ)ist ein topologischer Raum, der nicht hausdorffsch ist.
-2)(R,TEuklid )ist ein topologischer Hausdorff-Raum.
+1) (R,TZ) ist ein topologischer Raum, der nicht hausdorffsch ist.
+2) (R,TEuklid) ist ein topologischer Hausdorff-Raum.
 Bemerkung 5 (Eigenschaften von Hausdorff-Räumen)
-SeienX,X 1,X2Hausdorff-Räume.
-a) Jeder Teilraum von Xist hausdorffsch.
-b)X1×X2ist hausdorffsch (vgl. Abbildung 1.4).
+Seien X,X1,X2 Hausdorff-Räume.
+a) Jeder Teilraum vonX ist hausdorffsch.
+b) X1 ×X2 ist hausdorffsch (vgl. Abbildung 1.4).
 Definition 11
-SeiXein topologischer Raum und (x)n∈Neine Folge in X.x∈XheißtGrenzwert oder
-Limesvon(xn), wenn es für jede Umgebung Uvonxeinn0gibt, sodass xn∈Ufür alle
+Sei X ein topologischer Raum und(x)n∈N eine Folge inX. x∈X heißtGrenzwert oder
+Limes von (xn), wenn es für jede UmgebungU von x ein n0 gibt, sodassxn ∈U für alle
 n≥n0.
 Bemerkung 6
-IstXhausdorffsch, so hat jede Folge in Xhöchstens einen Grenzwert.
-Beweis: Sei(xn)eine konvergierende Folge und xundyGrenzwerte der Folge.
-DaXhausdorffsch ist, gibt es Umgebungen UxvonxundUyvonymitUx∩Uy=∅falls
-x̸=y. Da (xn)gegenxundykonvergiert, existiert ein n0mitxn∈Ux∩Uyfür allen≥n0
-⇒x=y ■
+Ist X hausdorffsch, so hat jede Folge inX höchstens einen Grenzwert.
+Beweis: Sei (xn) eine konvergierende Folge undx und y Grenzwerte der Folge.
+Da X hausdorffsch ist, gibt es UmgebungenUx von x und Uy von y mit Ux ∩Uy = ∅falls
+x̸= y. Da(xn) gegen x und y konvergiert, existiert einn0 mit xn ∈Ux ∩Uy für allen≥n0
+⇒x= y ■
 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt.
  1.3. STETIGKEIT
 (x1,y1) (x2,y2)
 x1 x2
-U1×X2 U2×X2X1X2
-Abbildung 1.4: Wenn X1,X2hausdorffsch sind, dann auch X1×X2
+U1 ×X2 U2 ×X2
+X1
+X2
+Abbildung 1.4: WennX1,X2 hausdorffsch sind, dann auchX1 ×X2
 1.3 Stetigkeit
 Definition 12
-Seien (X,TX),(Y,TY)topologische Räume und f:X→Yeine Abbildung.
-a)fheißtstetig :⇔∀U∈TY:f−1(U)∈TX.
-b)fheißtHomöomorphismus , wennfstetig ist und es eine stetige Abbildung g:
-Y→Xgibt, sodass g◦f=idXundf◦g=idY.
+Seien (X,TX),(Y,TY) topologische Räume undf : X →Y eine Abbildung.
+a) f heißtstetig :⇔∀U ∈TY : f−1(U) ∈TX.
+b) f heißtHomöomorphismus, wenn f stetig ist und es eine stetige Abbildungg :
+Y →X gibt, sodassg◦f = idX und f ◦g= idY.
 Bemerkung 72
-SeienX,Ymetrische Räume und f:X→Yeine Abbildung.
-Dann gilt:fist stetig⇔zu jedemx∈Xund jedem ε>0gibt esδ(x,ε)>0, sodass für
-alley∈Xmitd(x,y)<δgiltdY(f(x),f(y))<ε.
-Beweis: „⇒“: Seix∈X,ε> 0gegeben und U:=Bε(f(x)).
-Dann istUoffen inY.
-Def. 12.a= = = = =⇒f−1(U)ist offen in X. Dann istx∈f−1(U).
-⇒∃δ>0, sodass Bδ(x)⊆f−1(U)
-⇒f(Bδ(x))⊆U
-⇒{y∈X|dX(x,y)<δ}⇒Beh.
-„⇐“: SeiU⊆Yoffen,X∈f−1(U).
-Dann gibt es ε>0, sodass Bε(f(x))⊆U
-Vor.= =⇒Es gibtδ>0, sodassf(Bδ(x))⊆Bε(f(x)))
-⇒Bδ(x)⊆f−1(Bε(f(x)))⊆f−1(U) ■
+Seien X,Y metrische Räume undf: X →Y eine Abbildung.
+Dann gilt:f ist stetig⇔zu jedemx∈X und jedemε> 0 gibt esδ(x,ε) >0, sodass für
+alle y∈X mit d(x,y) <δ gilt dY(f(x),f(y)) <ε.
+Beweis: „⇒“: Seix∈X,ε> 0 gegeben undU := Bε(f(x)).
+Dann istU offen inY.
+Def. 12.a= = = = =⇒f−1(U) ist offen inX. Dann istx∈f−1(U).
+⇒∃δ >0, sodassBδ(x) ⊆f−1(U)
+⇒f(Bδ(x)) ⊆U
+⇒{y∈X |dX(x,y) <δ }⇒ Beh.
+„⇐“: SeiU ⊆Y offen, X ∈f−1(U).
+Dann gibt esε> 0, sodassBε(f(x)) ⊆U
+Vor.= =⇒Es gibtδ >0, sodassf(Bδ(x)) ⊆Bε(f(x)))
+⇒Bδ(x) ⊆f−1(Bε(f(x))) ⊆f−1(U) ■
 Bemerkung 8
-SeienX,Ytopologische Räume und f:X→Yeine Abbildung. Dann gilt:
-fist stetig
-⇔für jede abgeschlossene Teilmenge A⊆Ygilt:f−1(A)⊆Xist abgeschlossen.
+Seien X,Y topologische Räume undf : X →Y eine Abbildung. Dann gilt:
+f ist stetig
+⇔für jede abgeschlossene TeilmengeA⊆Y gilt : f−1(A) ⊆X ist abgeschlossen.
 Beispiel 13 (Stetige Abbildungen und Homöomorphismen)
-1) Für jeden topologischen Raum Xgilt: idX:X→Xist Homöomorphismus.
+1) Für jeden topologischen RaumX gilt: idX : X →X ist Homöomorphismus.
 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt.
  1.3. STETIGKEIT
-2)Ist(Y,TY)trivialer topologischer Raum, d. h. TY=Ttriv, so ist jede Abbildung
-f:X→Ystetig.
-3)IstXdiskreter topologischer Raum, so ist f:X→Ystetig für jeden topologischen
-RaumYund jede Abbildung f.
-4) SeiX= [0,1),Y=S1={z∈C|∥z∥= 1}undf(t) =e2πit.
-R 010f
+2) Ist (Y,TY) trivialer topologischer Raum, d. h.TY = Ttriv, so ist jede Abbildung
+f : X →Y stetig.
+3) Ist X diskreter topologischer Raum, so istf : X →Y stetig für jeden topologischen
+Raum Y und jede Abbildungf.
+4) Sei X = [0,1),Y = S1 = {z∈C |∥z∥= 1 }und f(t) = e2πit.
+R0 1
+0
+f
 g
-Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung gnicht stetig ist.
-Die Umkehrabbildung gist nicht stetig, da g−1(U)nicht offen ist (vgl. Abbildung 1.5).
+Abbildung 1.5: Beispiel einer stetigen Funktionf, deren Umkehrabbildungg nicht stetig ist.
+Die Umkehrabbildungg ist nicht stetig, dag−1(U) nicht offen ist (vgl. Abbildung 1.5).
 Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig)
-SeienX,Y,Ztopologische Räume, f:X→Yundg:Y→Zstetige Abbildungen.
-Dann istg◦f:X→Zstetig.
-Xf→→
-g◦f↘↘Y
-g↙↙
+Seien X,Y,Z topologische Räume,f : X →Y und g: Y →Z stetige Abbildungen.
+Dann istg◦f : X →Z stetig.
+X
+f → →
+g◦f ↘ ↘
+Y
+g↙ ↙
 Z
-Beweis: SeiU⊆Zoffen⇒(g◦f)−1(U) =f−1(g−1(U)).g−1(U)ist offen in Yweilgstetig
-ist,f−1(g−1(U))ist offen in X, weilfstetig ist. ■
+Beweis: Sei U ⊆Z offen ⇒(g◦f)−1(U) = f−1(g−1(U)). g−1(U) ist offen inY weil g stetig
+ist, f−1(g−1(U)) ist offen inX, weilf stetig ist. ■
 Bemerkung 10
-a) Für jeden topologischen Raum Xist
-Homöo (X) :={f:X→X|fist Homöomorphismus }
+a) Für jeden topologischen RaumX ist
+Homöo(X) := {f : X →X |f ist Homöomorphismus}
 eine Gruppe.
-b) Jede Isometrie f:X→Yzwischen metrischen Räumen ist ein Homöomorphismus.
-c)Iso(X) :={f:X→X|fist Isometrie}ist eine Untergruppe von Homöo (X)für
-jeden metrischen Raum X.
+b) Jede Isometrief : X →Y zwischen metrischen Räumen ist ein Homöomorphismus.
+c) Iso(X) := {f : X →X |f ist Isometrie}ist eine Untergruppe vonHomöo(X) für
+jeden metrischen RaumX.
 Bemerkung 11 (Projektionen sind stetig)
-SeienX,Ytopologische Räume. πX:X×Y→XundπY:X×Y→Ydie Projektionen
-πX: (x,y)↦→xundπY: (x,y)↦→y
-WirdX×Ymit der Produkttopologie versehen, so sind πXundπYstetig.
-Beweis: SeiU⊆Xoffen
+Seien X,Y topologische Räume.πX : X×Y →X und πY : X×Y →Y die Projektionen
+πX : (x,y) ↦→x und πY : (x,y) ↦→y
+Wird X×Y mit der Produkttopologie versehen, so sindπX und πY stetig.
+Beweis: Sei U ⊆X offen
 ⇒π−1
-X(U) =U×Yist offen in X×Y. ■
+X (U) = U ×Y ist offen inX×Y. ■
 Bemerkung 12
-SeiXein topologischer Raum, ∼eine Äquivalenzrelation auf X,X=X/∼der Bahnenraum
-versehen mit der Quotiententopologie, π:X→X,x↦→[x]∼.
-Dann istπstetig.
+Sei X ein topologischer Raum,∼eine Äquivalenzrelation aufX, X = X/∼der Bahnenraum
+versehen mit der Quotiententopologie,π: X →X, x↦→[x]∼.
+Dann istπ stetig.
  1.4. ZUSAMMENHANG
-Beweis: Nach Definition ist U⊆Xoffen⇔π−1(U)⊆Xoffen. ■
-Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass πstetig wird.
+Beweis: Nach Definition istU ⊆X offen ⇔π−1(U) ⊆X offen. ■
+Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodassπ stetig wird.
 Beispiel 14 (Stereographische Projektion)
-RnundSn\{N}sind homöomorph für beliebiges N∈Sn. Es gilt:
-Sn={
-x∈Rn+1⏐⏐∥x∥= 1}
-={
-x∈Rn+1⏐⏐⏐⏐⏐n+1∑
-i=1x2
-i= 1}
-O. B. d. A. sei N=
-0
+Rn und Sn \{N }sind homöomorph für beliebigesN ∈Sn. Es gilt:
+Sn =
+{
+x∈Rn+1 ⏐⏐∥x∥= 1
+}
+=
+{
+x∈Rn+1
+⏐⏐⏐⏐⏐
+n+1∑
+i=1
+x2
+i = 1
+}
+O. B. d. A. seiN =
+
+
+0
 ...
 0
-1
-. Die Gerade durch NundPschneidet die Ebene Hin genau
-einem Punkt ˆP.Pwird auf ˆPabgebildet.
-f:Sn\{N}→Rn
-P↦→genau ein Punkt
-LP∩H
-wobeiRn=H=
+1
+
+. Die Gerade durchN und P schneidet die EbeneH in genau
+einem Punkt ˆP. P wird auf ˆP abgebildet.
+f :Sn \{N }→ Rn
+P ↦→
+genau ein Punkt
+  
+LP ∩H
+wobei Rn = H =
+
 
-
-x1
+
+
+
+x1
 ...
-xn+1
-∈Rn+1⏐⏐⏐⏐⏐⏐⏐xn+1= 0
+xn+1
+
+∈Rn+1
+⏐⏐⏐⏐⏐⏐⏐
+xn+1 = 0
+
 
-undLPdie Gerade in Rn+1durchN
-undPist.
-SeiP=
-x1
+
+und LP die Gerade inRn+1 durch N
+und P ist.
+Sei P =
+
+
+x1
 ...
-xn+1
-, so istxn+1<1, also istLPnicht parallel zu H. Also schneiden sich LP
-undHin genau einem Punkt ˆP.
-Es gilt:fist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
+xn+1
+
+, so istxn+1 <1, also istLP nicht parallel zuH. Also schneiden sichLP
+und H in genau einem PunktˆP.
+Es gilt:f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
 1.4 Zusammenhang
 Definition 13
-a)EinRaumXheißtzusammenhängend ,wenneskeineoffenen,nichtleerenTeilmengen
-U1,U2vonXgibt mitU1∩U2=∅undU1∪U2=X.
-b)Eine Teilmenge Y⊆Xheißt zusammenhängend, wenn Yals topologischer Raum mit
+a) EinRaum Xheißtzusammenhängend,wenneskeineoffenen,nichtleerenTeilmengen
+U1,U2 von X gibt mitU1 ∩U2 = ∅und U1 ∪U2 = X.
+b) Eine TeilmengeY ⊆X heißt zusammenhängend, wennY als topologischer Raum mit
 der Teilraumtopologie zusammenhängend ist.
  1.4. ZUSAMMENHANG
-xyz
+x
+y
+z
 N
-ˆP0P
+ˆP
+0
+P
 Abbildung 1.6: Visualisierung der stereographischen Projektion
 Bemerkung 13
-Xist zusammenhängend ⇔Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1,A2
-mitA1∩A2=∅undA1∪A2=X.
+X ist zusammenhängend⇔Es gibt keine abgeschlossenen, nichtleeren TeilmengenA1,A2
+mit A1 ∩A2 = ∅und A1 ∪A2 = X.
 Beispiel 15 (Zusammenhang von Räumen)
-1)(Rn,TEuklid )ist zusammenhängend, denn:
-Annahme :Rn=U1˙∪U2mit∅̸=U1,U2∈TEuklidexistieren.
-Seix∈U1,y∈U2und [x,y]die Strecke zwischen xundy. SeiV= [x,y]. Nun
-betrachten wir V⊊ Rnals (metrischen) Teilraum mit der Teilraumtopologie TV.
-Somit giltU1∩[x,y]∈TVwegen der Definition der Teilraumtopologie.
-Dann gibt es z∈[x,y]mitz∈∂(U1∩[x,y]), aberz /∈U1⇒z∈U2. In jeder
-Umgebung von zliegt ein Punkt von U1⇒Widerspruch zu U2offen.
-2)R\{0}ist nicht zusammenhängend, denn R\{0}=R<0∪R>0
-3)R2\{0}ist zusammenhängend.
-4)Q ⊊ Rist nicht zusammenhängend, da (Q∩R<√
-2)∪(Q∩R>√
-2) =Q
-5){x}ist zusammenhängend für jedes x∈X, wobeiXein topologischer Raum ist.
-6)Rmit Zariski-Topologie ist zusammenhängend.
+1) (Rn,TEuklid) ist zusammenhängend, denn:
+Annahme: Rn = U1 ˙∪U2 mit ∅̸= U1,U2 ∈TEuklid existieren.
+Sei x ∈U1,y ∈U2 und [x,y] die Strecke zwischenx und y. Sei V = [ x,y]. Nun
+betrachten wir V ⊊ Rn als (metrischen) Teilraum mit der TeilraumtopologieTV.
+Somit giltU1 ∩[x,y] ∈TV wegen der Definition der Teilraumtopologie.
+Dann gibt es z ∈[x,y] mit z ∈∂(U1 ∩[x,y]), aber z /∈U1 ⇒z ∈U2. In jeder
+Umgebung vonz liegt ein Punkt vonU1 ⇒Widerspruch zuU2 offen.
+2) R \{0 }ist nicht zusammenhängend, dennR \{0 }= R<0 ∪R>0
+3) R2 \{0 }ist zusammenhängend.
+4) Q ⊊ Rist nicht zusammenhängend, da(Q ∩R<
+√
+2) ∪(Q ∩R>
+√
+2) = Q
+5) {x}ist zusammenhängend für jedesx∈X, wobeiX ein topologischer Raum ist.
+6) R mit Zariski-Topologie ist zusammenhängend.
 Bemerkung 14
-SeiXein topologischer Raum und A⊆Xzusammenhängend. Dann ist auch Azusammenhängend.
+Sei X ein topologischer Raum undA⊆X zusammenhängend. Dann ist auchA zusammenhängend.
 
  1.4. ZUSAMMENHANG
 Beweis: durch Widerspruch
-Annahme :A=A1∪A2, Aiabgeschlossen, Ai̸=∅,A1∩A2=∅
-⇒A= (A∩A1)
-abgeschlossen˙∪(A∩A2)
-abgeschlossen
+Annahme: A= A1 ∪A2, Ai abgeschlossen, Ai ̸= ∅, A1 ∩A2 = ∅
+⇒A= ( A∩A1)  
+abgeschlossen
+˙∪ (A∩A2)  
+abgeschlossen
+  
 disjunkt
-WäreA∩A1=∅
-⇒A⊆A=A1˙∪A2
-⇒A⊆A2⇒A⊆A2
-⇒A1=∅
-⇒Widerspruch zu A1̸=∅
-⇒A∩A1̸=∅und analog A∩A2̸=∅
-⇒Widerspruch zu Aist zusammenhängend. ■
+Wäre A∩A1 = ∅
+⇒A⊆A= A1 ˙∪A2
+⇒A⊆A2 ⇒A⊆A2
+⇒A1 = ∅
+⇒Widerspruch zuA1 ̸= ∅
+⇒A∩A1 ̸= ∅und analogA∩A2 ̸= ∅
+⇒Widerspruch zuA ist zusammenhängend. ■
 Bemerkung 15
-SeiXein topologischer Raum und A,B⊆Xzusammenhängend.
-IstA∩B̸=∅, dann istA∪Bzusammenhängend.
-Beweis: SeiA∪B=U1˙∪U2,Ui̸=∅offen
-o. B. d. A.= = = = = =⇒A= (A∩U1)˙∪(A∩U2)offen
-Azhgd.= = = =⇒A∩U1=∅
-A∩B̸=∅= = = =⇒U1⊆B
-B= (B∩U1)
-=U1∪(B∩U2)
-=∅ist unerlaubte Zerlegung.
+Sei X ein topologischer Raum undA,B ⊆X zusammenhängend.
+Ist A∩B ̸= ∅, dann istA∪B zusammenhängend.
+Beweis: Sei A∪B = U1 ˙∪U2,Ui ̸= ∅offen
+o. B. d. A.= = = = = =⇒A= (A∩U1) ˙∪(A∩U2) offen
+A zhgd.
+= = = =⇒A∩U1 = ∅
+A∩B̸=∅
+= = = =⇒U1 ⊆B
+B = (B∩U1)  
+=U1
+∪(B∩U2)  
+=∅
+ist unerlaubte Zerlegung.
 ■
 Definition 14
-SeiXein topologischer Raum.
-Fürx∈XseiZ(x)⊆Xdefiniert durch
-Z(x) :=⋃
+Sei X ein topologischer Raum.
+Für x∈X sei Z(x) ⊆X definiert durch
+Z(x) :=
+⋃
 A⊆Xzhgd.
-x∈AA
-Z(x)heißtZusammenhangskomponente .
+x∈A
+A
+Z(x) heißtZusammenhangskomponente.
 Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten)
-SeiXein topologischer Raum. Dann gilt:
-a)Z(x)ist die größte zusammenhängende Teilmenge von X, diexenthält.
-b)Z(x)ist abgeschlossen.
-c)Xist disjunkte Vereinigung von Zusammenhangskomponenten.
+Sei X ein topologischer Raum. Dann gilt:
+a) Z(x) ist die größte zusammenhängende Teilmenge vonX, diex enthält.
+b) Z(x) ist abgeschlossen.
+c) X ist disjunkte Vereinigung von Zusammenhangskomponenten.
 Beweis:
  1.5. KOMPAKTHEIT
-a) SeiZ(x) =A1˙∪A2mitAi̸=∅abgeschlossen.
-O. B. d. A. sei x∈A1undy∈A2.yliegt in einer zusammehängenden Teilmenge A,
-die auchxenthält.⇒A= (A∩A1)
-∋x∪(A∩A2)
-∋yist unerlaubte Zerlegung.
-b) Nach Bemerkung 14 ist Z(x)zusammenhängend ⇒Z(x)⊆Z(x)⇒Z(x) =Z(x)
-c) IstZ(y)∩Z(x)̸=∅Bem. 15= = = = =⇒Z(y)∪Z(x)ist zusammenhängend.
-⇒Z(x)∪Z(y)⊆Z(x)⇒Z(y)⊆Z(x)
-⊆Z(y)⇒Z(x)⊆Z(y)
+a) Sei Z(x) = A1 ˙∪A2 mit Ai ̸= ∅abgeschlossen.
+O. B. d. A. seix∈A1 und y∈A2. y liegt in einer zusammehängenden TeilmengeA,
+die auchx enthält. ⇒A= (A∩A1)  
+∋x
+∪(A∩A2)  
+∋y
+ist unerlaubte Zerlegung.
+b) Nach Bemerkung 14 istZ(x) zusammenhängend ⇒Z(x) ⊆Z(x) ⇒Z(x) = Z(x)
+c) Ist Z(y) ∩Z(x) ̸= ∅Bem. 15= = = = =⇒Z(y) ∪Z(x) ist zusammenhängend.
+⇒Z(x) ∪Z(y) ⊆Z(x) ⇒Z(y) ⊆Z(x)
+⊆Z(y) ⇒Z(x) ⊆Z(y)
 ■
 Bemerkung 17
-Seif:X→Ystetig. IstA⊆Xzusammenhängend, so ist f(A)⊆Yzusammenhängend.
-Beweis: Seif(A) =U1∪U2,Ui̸=∅,offen, disjunkt.
-⇒f−1(f(A)) =f−1(U1)∪f−1(U2)
-⇒A= (A∩f−1(U1))
-̸=∅∪(A∩f−1(U2))
-̸=∅■
+Sei f : X →Y stetig. IstA⊆X zusammenhängend, so istf(A) ⊆Y zusammenhängend.
+Beweis: Sei f(A) = U1 ∪U2,Ui ̸= ∅, offen, disjunkt.
+⇒f−1(f(A)) = f−1(U1) ∪f−1(U2)
+⇒A= (A∩f−1(U1))  
+̸=∅
+∪(A∩f−1(U2))  
+̸=∅
+■
 1.5 Kompaktheit
 Definition 15
-SeiXeine Menge und U⊆P(X).
-Uheißt eine Überdeckung vonX, wenn gilt:
-∀x∈X:∃M∈U:x∈M
+Sei X eine Menge undU ⊆P(X).
+U heißt eineÜberdeckung von X, wenn gilt:
+∀x∈X : ∃M ∈U : x∈M
 Definition 16
-Ein topologischer Raum Xheißtkompakt , wenn jede offene Überdeckung von X
-U={Ui}i∈ImitUioffen inX
+Ein topologischer RaumX heißtkompakt, wenn jede offene Überdeckung vonX
+U = {Ui }i∈I mit Ui offen inX
 eine endliche Teilüberdeckung
 ⋃
-i∈J⊆IUi=Xmit|J|∈N
+i∈J⊆I
+Ui = X mit |J|∈ N
 besitzt.
 Bemerkung 18
-Das Einheitsintervall I:= [0,1]ist kompakt bezüglich der euklidischen Topologie.
-Beweis: Sei(Ui)i∈Jeine offene Überdeckung von I.
-Es genügt zu zeigen, dass es ein δ>0gibt, sodass jedes Teilintervall der Länge δvonIin
-einem derUienthalten ist. Wenn es ein solches δgibt, kann man Iin endlich viele Intervalle
+Das EinheitsintervallI := [0,1] ist kompakt bezüglich der euklidischen Topologie.
+Beweis: Sei (Ui)i∈J eine offene Überdeckung vonI.
+Es genügt zu zeigen, dass es einδ >0 gibt, sodass jedes Teilintervall der Längeδ von I in
+einem derUi enthalten ist. Wenn es ein solchesδ gibt, kann manI in endlich viele Intervalle
  1.5. KOMPAKTHEIT
-der Längeδunterteilen und alle Uiin die endliche Überdeckung aufnehmen, die Teilintervalle
+der Längeδunterteilen und alleUi in die endliche Überdeckung aufnehmen, die Teilintervalle
 enthalten.
-Angenommen, es gibt kein solches δ. Dann gibt es für jedes n∈Nein Intervall In⊆[0,1]
-der Länge 1/nsodassIn⊊Uifür allei∈J.
-Seixnder Mittelpunkt von In. Die Folge (xn)hat einen Häufungspunkt x∈[0,1]. Dann
-gibt esi∈Jmitx∈Ui. DaUioffen ist, gibt es ein ε >0, sodass (x−ε,x+ε)⊆Ui.
-Dann gibt es n0, sodass gilt: 1/n0<ε/2und für unendlich viele3n≥n0:|x−xn|<ε/2, also
-In⊆(x−ε,x+ε)⊆Uifür mindestens ein n∈N.4
+Angenommen, es gibt kein solchesδ. Dann gibt es für jedesn∈N ein IntervallIn ⊆[0,1]
+der Länge1/n sodass In ⊊ Ui für allei∈J.
+Sei xn der Mittelpunkt vonIn. Die Folge(xn) hat einen Häufungspunktx∈[0,1]. Dann
+gibt es i ∈J mit x ∈Ui. Da Ui offen ist, gibt es einε >0, sodass (x−ε,x + ε) ⊆Ui.
+Dann gibt esn0, sodass gilt:1/n0 <ε/2 und für unendlich viele3 n≥n0 : |x−xn|<ε/2, also
+In ⊆(x−ε,x + ε) ⊆Ui für mindestens einn∈N.4
 ⇒Widerspruch
-Dann überdecke [0,1]mit endlich vielen Intervallen I1,...,Idder Längeδ. JedesIjist in
-Uijenthalten.
-⇒Uj1,...,Ujdist endliche Teilüberdeckung von U. ■
+Dann überdecke[0,1] mit endlich vielen IntervallenI1,...,I d der Längeδ. JedesIj ist in
+Uij enthalten.
+⇒Uj1 ,...,U jd ist endliche Teilüberdeckung vonU. ■
 Beispiel 16 (Kompakte Räume)
-1)Rist nicht kompakt.
-2)(0,1)ist nicht kompakt.
-Un= (1/n,1−1/n)⇒⋃
-n∈NUn= (0,1)
-3)Rmit der Zariski-Topologie ist kompakt und jede Teilmenge von Rist es auch.
+1) R ist nicht kompakt.
+2) (0,1) ist nicht kompakt.
+Un = (1/n,1 −1/n) ⇒⋃
+n∈N Un = (0,1)
+3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge vonR ist es auch.
 Bemerkung 19
-SeiXkompakter Raum, A⊆Xabgeschlossen. Dann ist Akompakt.
-Beweis: Sei(Vi)i∈Ioffene Überdeckung von A.
-Dann gibt es für jedes i∈Ieine offene Teilmenge Ui⊆XmitVi=Ui∩A.
-⇒A⊆⋃
-i∈IUi
-⇒U={Ui|i∈I}∪{X\A}ist offene Überdeckung von X
-Xkompakt= = = = = = =⇒es gibti1,...,in∈I, sodassn⋃
-j=1Uij∪(X\A) =X
-⇒
-n⋃
-j=1Uij∪(X\A)
-∩A=A
-⇒n⋃
-j=1(Uij∩A)
-=Vij∪((X\A)∩A)
-=∅=A
-⇒Vi1,...,Vinüberdecken A.
+Sei X kompakter Raum,A⊆X abgeschlossen. Dann istA kompakt.
+Beweis: Sei (Vi)i∈I offene Überdeckung von A.
+Dann gibt es für jedesi∈I eine offene TeilmengeUi ⊆X mit Vi = Ui ∩A.
+⇒A⊆
+⋃
+i∈I
+Ui
+⇒U = {Ui |i∈I}∪{ X\A}ist offene Überdeckung vonX
+X kompakt
+= = = = = = =⇒ es gibti1,...,i n ∈I, sodass
+n⋃
+j=1
+Uij ∪(X\A) = X
+⇒
+
+
+n⋃
+j=1
+Uij ∪(X\A)
+
+∩A= A
+⇒
+n⋃
+j=1
+(Uij ∩A)  
+=Vij
+∪((X\A) ∩A)  
+=∅
+= A
+⇒Vi1 ,...,V in überdecken A.
 ■
 Bemerkung 20
-SeienX,Ykompakte topologische Räume. Dann ist X×Ymit der Produkttopologie
+Seien X,Y kompakte topologische Räume. Dann istX ×Y mit der Produkttopologie
 kompakt.
-Beweis: Sei(Wi)i∈Ieine offene Überdeckung von X×Y. Für jedes (x,y)∈X×Ygibt es
-offene Teilmengen Ux,yvonXundVx,yvonYsowie eini∈I, sodassUx,y×Vx,y⊆Wi.
-3Dies gilt nicht für alle n≥n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
+Beweis: Sei (Wi)i∈I eine offene Überdeckung vonX ×Y. Für jedes(x,y) ∈X ×Y gibt es
+offene TeilmengenUx,y von X und Vx,y von Y sowie eini∈I, sodassUx,y ×Vx,y ⊆Wi.
+3Dies gilt nicht für allen≥n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
 4Sogar für unendlich viele.
  1.5. KOMPAKTHEIT
 Wi
 xy
-xVx,y
-Ux,yYX
+x
+Vx,y
+Ux,y
+Y
+X
 Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen
-Die offenen Mengen Ux0,y×Vx0,yfür festesx0und alley∈Yüberdecken{x0}×y. DaY
-kompakt ist, ist auch {x0}×Ykompakt. Also gibt es y1,...,ym(x0)mit⋃m(x0)
-i=1Ux0,yi×
-Vx0,yi⊇{x0}×Y.
-SeiUx0:=⋂m(x)
-i=1Ux0,yi. DaXkompakt ist, gibt es x1,...,xn∈Xmit⋃n
-j=1Uxj=X
+Die offenen MengenUx0,y ×Vx0,y für festesx0 und alley∈Y überdecken {x0 }×y. DaY
+kompakt ist, ist auch{x0 }×Y kompakt. Also gibt esy1,...,y m(x0) mit ⋃m(x0)
+i=1 Ux0,yi ×
+Vx0,yi ⊇{x0 }×Y.
+Sei Ux0 := ⋂m(x)
+i=1 Ux0,yi. DaX kompakt ist, gibt esx1,...,x n ∈X mit ⋃n
+j=1 Uxj = X
 ⇒⋃k
-j=1⋃m(xj)
-i=1(
-Uxj,yi×Vxj,yi)
-
-Ein grün-oranges Kästchen⊇X×Y
+j=1
+⋃m(xj)
+i=1
+(
+Uxj,yi ×Vxj,yi
+)
+  
+Ein grün-oranges Kästchen
+⊇X×Y
 ⇒⋃
-j⋃
-iWi(xj,yi) =X×Y ■
+j
+⋃
+iWi(xj,yi) = X×Y ■
 Bemerkung 21
-SeiXein Hausdorffraum und K⊆Xkompakt. Dann ist Kabgeschlossen.
+Sei X ein Hausdorffraum undK ⊆X kompakt. Dann istK abgeschlossen.
 Beweis: z. Z.:Komplement ist offen
-IstX=K, so istKabgeschlossen in X. Andernfalls sei y∈X\K. Für jedes x∈Kseien
-Uxbzw.VyUmgebungen von xbzw. vony, sodassUx∩Vy=∅.
+Ist X = K, so istK abgeschlossen inX. Andernfalls seiy∈X\K. Für jedesx∈K seien
+Ux bzw. Vy Umgebungen vonx bzw. vony, sodassUx ∩Vy = ∅.
 Xi
-Kx
+K
+x
 y
-DaKkompakt ist, gibt es endlich viele x1,...,xn∈K, sodass⋃m
-i=1Uxi⊇K.
-SeiV:=n⋂
-i=1Vxi
+Da K kompakt ist, gibt es endlich vielex1,...,x n ∈K, sodass⋃m
+i=1 Uxi ⊇K.
+Sei V :=
+n⋂
+i=1
+Vxi
  1.6. WEGE UND KNOTEN
-⇒V∩(n⋃
-i=1Uxi)
-=∅
-⇒V∩K=∅
-⇒Vist Überdeckung von y, die ganz in X\Kenthalten ist .
-⇒X\Kist offen
-Damit istKabgeschlossen. ■
+⇒V ∩
+(n⋃
+i=1
+Uxi
+)
+= ∅
+⇒V ∩K = ∅
+⇒V ist Überdeckung vony, die ganz inX\K enthalten ist.
+⇒X\K ist offen
+Damit istK abgeschlossen. ■
 Bemerkung 22
-SeienX,Ytopologische Räume, f:X→Ystetig.
-IstK⊆Xkompakt, so ist f(K)⊆Ykompakt.
-Beweis: Sei(Vi)i∈Ioffene Überdeckung von f(K)
-fstetig= = = =⇒(f−1(Vi))i∈Iist offene Überdeckung von K
-Kompakt= = = = =⇒es gibti1,...,in, sodassf−1(Vi1),...,f−1(Vin)Überdeckung von Kist.
-⇒f(f−1(Vi1)),...,f (f−1(Vin))überdecken f(K).
-Es gilt:f(f−1(V)) =V∩f(X) ■
+Seien X,Y topologische Räume,f : X →Y stetig.
+Ist K ⊆X kompakt, so istf(K) ⊆Y kompakt.
+Beweis: Sei (Vi)i∈I offene Überdeckung vonf(K)
+f stetig
+= = = =⇒(f−1(Vi))i∈I ist offene Überdeckung vonK
+Kompakt
+= = = = =⇒es gibti1,...,i n, sodassf−1(Vi1 ),...,f −1(Vin) Überdeckung vonK ist.
+⇒f(f−1(Vi1 )),...,f (f−1(Vin)) überdecken f(K).
+Es gilt:f(f−1(V)) = V ∩f(X) ■
 Satz 1.1 (Heine-Borel)
-Eine Teilmenge von RnoderCnist genau dann kompakt, wenn sie beschränkt und
+Eine Teilmenge vonRn oder Cn ist genau dann kompakt, wenn sie beschränkt und
 abgeschlossen ist.
-Beweis: „⇒“: SeiK⊆Rn(oderCn) kompakt.
-DaRnundCnhausdorffsch sind, ist Knach Bemerkung 21 abgeschlossen. Nach Voraussetzung
- kannKmit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒Kist
+Beweis: „⇒“: SeiK ⊆Rn (oder Cn) kompakt.
+Da Rn und Cn hausdorffsch sind, istK nach Bemerkung 21 abgeschlossen. Nach Voraussetzung
+ kannK mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden⇒K ist
 beschränkt.
-„⇐“ SeiA⊆Rn(oderCn) beschränkt und abgeschlossen.
-Dann gibt es einen Würfel W= [−N,N ]×···× [−N,N ]
-nmalmitA⊆Wbzw. „Polyzylinder“
-Z={(z1,...,zn)∈Cn|zi≤Nfüri= 1,...,n}
-Nach Bemerkung 20 und Bemerkung 18 ist Wkompakt, also ist Anach Bemerkung 19 auch
-kompakt. Genauso ist Zkompakt, weil
-{z∈C∥z|≤1}
-homöomorph zu{
-(x,y)∈R2⏐⏐∥(x,y)∥≤1}
+„⇐“ SeiA⊆Rn (oder Cn) beschränkt und abgeschlossen.
+Dann gibt es einen WürfelW = [−N,N ] ×···× [−N,N ]  
+n mal
+mit A⊆W bzw. „Polyzylinder“
+Z = {(z1,...,z n) ∈Cn |zi ≤N für i= 1,...,n }
+Nach Bemerkung 20 und Bemerkung 18 istW kompakt, also istAnach Bemerkung 19 auch
+kompakt. Genauso istZ kompakt, weil
+{z∈C ∥z|≤ 1 }
+homöomorph zu {
+(x,y) ∈R2 ⏐⏐∥(x,y)∥≤ 1
+}
 ist. ■
 1.6 Wege und Knoten
 Definition 17
-SeiXein topologischer Raum.
+Sei X ein topologischer Raum.
  1.6. WEGE UND KNOTEN
-a) EinWeginXist eine stetige Abbildung γ: [0,1]→X.
-b)γheißtgeschlossen , wennγ(1) =γ(0)gilt.
-c)γheißteinfach, wennγ|[0,1)injektiv ist.
+a) Ein Wegin X ist eine stetige Abbildungγ : [0,1] →X.
+b) γ heißtgeschlossen, wennγ(1) = γ(0) gilt.
+c) γ heißteinfach, wennγ|[0,1) injektiv ist.
 Beispiel 17
-IstXdiskret, so ist jeder Weg konstant, d. h. von der Form
-∀x∈[0,1] :γ(x) =c, c∈X
-Dennγ([0,1])ist zusammenhängend für jeden Weg γ.
+Ist X diskret, so ist jeder Weg konstant, d. h. von der Form
+∀x∈[0,1] : γ(x) = c, c ∈X
+Denn γ([0,1]) ist zusammenhängend für jeden Wegγ.
 Definition 18
-Ein topologischer Raum Xheißtwegzusammenhängend , wenn es zu je zwei Punkten
-x,y∈Xeinen Wegγ: [0,1]→Xgibt mitγ(0) =xundγ(1) =y.
+Ein topologischer RaumX heißtwegzusammenhängend, wenn es zu je zwei Punkten
+x,y ∈X einen Wegγ : [0,1] →X gibt mitγ(0) = x und γ(1) = y.
 Bemerkung 23
-SeiXein topologischer Raum.
-a)Xist wegzusammenhängend ⇒Xist zusammenhängend
-b)Xist wegzusammenhängend ̸⇐Xist zusammenhängend
+Sei X ein topologischer Raum.
+a) X ist wegzusammenhängend⇒X ist zusammenhängend
+b) X ist wegzusammenhängend̸⇐X ist zusammenhängend
 Beweis:
-a)SeiXein wegzusammenhängender topologischer Raum, A1,A2nichtleere, disjunkte,
-abgeschlossene Teilmengen von XmitA1∪A2=X. Seix∈A1,y∈A2,γ: [0,1]→X
-ein Weg von xnachy.
-Dann istC:=γ([0,1])⊆Xzusammenhängend, weil γstetig ist.
-C= (C∩A1)
-∋x∪(C∩A2)
+a) Sei X ein wegzusammenhängender topologischer Raum,A1,A2 nichtleere, disjunkte,
+abgeschlossene Teilmengen vonX mit A1 ∪A2 = X. Seix∈A1,y ∈A2,γ : [0,1] →X
+ein Weg vonx nach y.
+Dann istC := γ([0,1]) ⊆X zusammenhängend, weilγ stetig ist.
+C = (C∩A1)  
+∋x
+∪(C∩A2)  
 ∋y
-ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒Widerspruch
-b) SeiX={
-(x,y)∈R2⏐⏐⏐x2+y2= 1∨y= 1 + 2·e−1
-10x}
+ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen⇒Widerspruch
+b) Sei X =
+{
+(x,y) ∈R2
+⏐⏐⏐x2 + y2 = 1 ∨y= 1 + 2·e−1
+10 x
+}
 .
 Abbildung 1.8a veranschaulicht diesen Raum.
-SeiU1∪U2=X,U 1̸=U2=∅,Uioffen.X=C∪S. Dann istC⊆U1oderC⊆U2,
-weilCundSzusammenhängend sind.
-Also istC=U1undS=U2(oder umgekehrt).
-Seiy∈C=U1,ε> 0undBε(y)⊆U1eine Umgebung von y, die inU1enthalten ist.
-Aber: Bε(y)∩S̸=∅⇒Widerspruch⇒X∪Sist zusammenhängend, aber nicht
+Sei U1 ∪U2 = X,U1 ̸= U2 = ∅,Ui offen. X = C∪S. Dann istC ⊆U1 oder C ⊆U2,
+weil C und S zusammenhängend sind.
+Also istC = U1 und S = U2 (oder umgekehrt).
+Sei y∈C = U1,ε> 0 und Bε(y) ⊆U1 eine Umgebung vony, die inU1 enthalten ist.
+Aber: Bε(y) ∩S ̸= ∅⇒ Widerspruch ⇒X ∪S ist zusammenhängend, aber nicht
 wegzusammenhängend. ■
 Beispiel 18 (Hilbert-Kurve)
-Es gibt stetige, surjektive Abbildungen [0,1]→[0,1]×[0,1]. Ein Beispiel ist die in Abbildung
+Es gibt stetige, surjektive Abbildungen[0,1] →[0,1] ×[0,1]. Ein Beispiel ist die in Abbildung
  1.9 dargestellte Hilbert-Kurve.
 Definition 19
-SeiXein topologischer Raum. Eine Jordankurve inXist ein Homöomorphismus γ:
-[0,1]→C⊆Xbzw.γ:S1→C⊆X, wobeiC:= Bildγ.
+Sei X ein topologischer Raum. EineJordankurve in X ist ein Homöomorphismus γ :
+[0,1] →C ⊆X bzw. γ : S1 →C ⊆X, wobeiC := Bild γ.
  1.6. WEGE UND KNOTEN
-(a) Spirale Smit KreisC0.1 1
-−101
-XY{(x,sin(1
-x))∈X×Y}
-(−1,1)⊆Y
+(a) SpiraleS mit KreisC
+0.1 1
+−1
+0
+1
+X
+Y
+{(x,sin( 1
+x)) ∈X ×Y}
+(−1,1) ⊆Y
 (b) Sinus
-Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
+Abbildung 1.8:Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
 sind.
-(a)n= 1 (b)n= 2 (c)n= 3 (d)n= 4 (e)n= 5
+(a) n= 1 (b) n= 2 (c) n= 3 (d) n= 4 (e) n= 5
 Abbildung 1.9: Hilbert-Kurve
 Jede Jordankurve ist also ein einfacher Weg.
 Satz 1.2 (Jordanscher Kurvensatz)
-IstC=γ([0,1])eine geschlossene Jordankurve in R2, so hat R2\Cgenau zwei
+Ist C = γ([0,1]) eine geschlossene Jordankurve in R2, so hat R2 \C genau zwei
 Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt.
 außen
 innen
 Jordankurve
-Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte
+Abbildung 1.10:Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte
  äußeres genannt.
 Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie:
 Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265)
 nachgelesen werden.
-Idee: Ersetze Weg Cdurch Polygonzug.
+Idee: Ersetze WegC durch Polygonzug.
  1.6. WEGE UND KNOTEN
 Definition 20
-Eine geschlossene Jordankurve in R3heißtKnoten.
+Eine geschlossene Jordankurve inR3 heißtKnoten.
 Beispiel 19 (Knoten)
 (a) Trivialer Knoten
  (b) Kleeblattknoten
  (c) Achterknoten
- (d)62-Knoten
+ (d) 62-Knoten
 Abbildung 1.11: Beispiele für verschiedene Knoten
 Definition 21
-Zwei Knoten γ1,γ2:S1→R3heißenäquivalent , wenn es eine stetige Abbildung
-H:S1×[0,1]→R3
+Zwei Knotenγ1,γ2 : S1 →R3 heißenäquivalent, wenn es eine stetige Abbildung
+H : S1 ×[0,1] →R3
 gibt mit
-H(z,0) =γ1(z)∀z∈S1
-H(z,1) =γ2(z)∀z∈S1
-und für jedes feste t∈[0,1]ist
-Hz:S1→R3,z↦→H(z,t)
-ein Knoten. Die Abbildung HheißtIsotopie zwischenγ1undγ2.
+H(z,0) = γ1(z) ∀z∈S1
+H(z,1) = γ2(z) ∀z∈S1
+und für jedes festet∈[0,1] ist
+Hz : S1 →R3,z ↦→H(z,t)
+ein Knoten. Die AbbildungH heißtIsotopie zwischen γ1 und γ2.
 Definition 22
-Seiγ: [0,1]→R3ein Knoten, Eeine Ebene und π:R3→Eeine Projektion auf E.
-πheißtKnotendiagramm vonγ, wenn gilt:
-⏐⏐π−1(x)⏐⏐≤2∀x∈π(γ)
-Ist(π|γ([0,1]))−1(x) ={y1,y2}, soliegty1übery2, wenn gilt:
-∃λ>1 : (y1−x) =λ(y2−x)
+Sei γ : [0,1] →R3 ein Knoten,E eine Ebene undπ: R3 →E eine Projektion aufE.
+π heißtKnotendiagramm von γ, wenn gilt:
+⏐⏐π−1(x)
+⏐⏐≤2 ∀x∈π(γ)
+Ist (π|γ([0,1]))−1(x) = {y1,y2 }, soliegt y1 über y2, wenn gilt:
+∃λ> 1 : (y1 −x) = λ(y2 −x)
 Satz 1.3 (Satz von Reidemeister)
 Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie
 durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können.
  1.6. WEGE UND KNOTEN
-(a)Ω1
- (b)Ω2
-(c)Ω3
+(a) Ω1
+ (b) Ω2
+(c) Ω3
 Abbildung 1.12: Reidemeister-Züge
 Beweis: Durch sorgfältige Fallunterscheidung.5
 Definition 23
-Ein Knotendiagramm heißt 3-färbbar , wenn jeder Bogen von Dso mit einer Farbe gefärbt
+Ein Knotendiagramm heißt3-färbbar, wenn jeder Bogen vonD so mit einer Farbe gefärbt
 werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben
 auftreten.
 Abbildung 1.13: Ein 3-gefärber Kleeblattknoten
@@ -704,26 +817,26 @@ Abbildung 1.13: Ein 3-gefärber Kleeblattknoten
  1.6. WEGE UND KNOTEN
 Übungsaufgaben
 Aufgabe 1 (Sierpińskiraum)
-Es seiX:={0,1}undTX:={∅,{0},X}. Dies ist der sogenannte Sierpińskiraum.
-(a) Beweisen Sie, dass (X,TX)ein topologischer Raum ist.
-(b) Ist (X,TX)hausdorffsch?
-(c) Ist TXvon einer Metrik erzeugt?
+Es seiX := {0,1 }und TX := {∅,{0 },X }. Dies ist der sogenannte Sierpińskiraum.
+(a) Beweisen Sie, dass(X,TX) ein topologischer Raum ist.
+(b) Ist (X,TX) hausdorffsch?
+(c) Ist TX von einer Metrik erzeugt?
 Aufgabe 2
-Es seiZmit der von den Mengen Ua,b:=a+bZ(a∈Z,b∈Z\{0})erzeugten Topologie
+Es seiZ mit der von den MengenUa,b := a+ bZ(a∈Z,b ∈Z \{0 }) erzeugten Topologie
 versehen.
 Zeigen Sie:
-(a) JedesUa,bund jede einelementige Teilmenge von Zist abgeschlossen.
-(b){−1,1}ist nicht offen.
+(a) Jedes Ua,b und jede einelementige Teilmenge vonZ ist abgeschlossen.
+(b) {−1,1 }ist nicht offen.
 (c) Es gibt unendlich viele Primzahlen.
 Aufgabe 3 (Cantorsches Diskontinuum)
-Für jedesi∈NseiPi:={0,1}mit der diskreten Topologie. Weiter Sei P:=∏
-i∈NPi.
-(a) Wie sehen die offenen Mengen von Paus?
-(b) Was können Sie über den Zusammenhang von Psagen?
+Für jedesi∈N sei Pi := {0,1 }mit der diskreten Topologie. Weiter SeiP := ∏
+i∈N Pi.
+(a) Wie sehen die offenen Mengen vonP aus?
+(b) Was können Sie über den Zusammenhang vonP sagen?
 Aufgabe 4 (Kompaktheit)
-(a) Ist GLn(R) ={A∈Rn×n|det(A)̸= 0}kompakt?
-(b) Ist SLn(R) ={A∈Rn×n|det(A) = 1}kompakt?
-(c) IstP(R)kompakt?
+(a) Ist GLn(R) = {A∈Rn×n |det(A) ̸= 0 }kompakt?
+(b) Ist SLn(R) = {A∈Rn×n |det(A) = 1 }kompakt?
+(c) Ist P(R) kompakt?
 Aufgabe 5 (Begriffe)
 Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“.
 Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist,
@@ -738,1345 +851,1596 @@ Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie
 Simplizialkomplexe
 2.1 Topologische Mannigfaltigkeiten
 Definition 24
-Sei(X,T)ein topologischer Raum und n∈N.
-a)Einen-dimensionale KarteaufXist ein Paar (U,ϕ), wobeiU∈Tundϕ:U→V
-Homöomorphismus von Uauf eine offene Teilmenge V⊆Rn.
-b)Einn-dimensionaler AtlasAaufXist eine Familie (Ui,ϕi)i∈Ivon Karten auf X,
-sodass⋃
-i∈IUi=X.
-c)Xheißt (topologische) n-dimensionale Mannigfaltigkeit , wennXhausdorffsch ist,
-eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt.
-Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rnähnlich.
+Sei (X,T) ein topologischer Raum undn∈N.
+a) Eine n-dimensionale Karte auf X ist ein Paar(U,ϕ), wobeiU ∈T und ϕ: U →V
+Homöomorphismus vonU auf eine offene TeilmengeV ⊆Rn.
+b) Ein n-dimensionaler Atlas Aauf X ist eine Familie(Ui,ϕi)i∈I von Karten aufX,
+sodass ⋃
+i∈I Ui = X.
+c) X heißt (topologische)n-dimensionale Mannigfaltigkeit, wennX hausdorffsch ist,
+eine abzählbare Basis der Topologie hat und einenn-dimensionalen Atlas besitzt.
+Anschaulich ist also einn-dimensionale Mannigfaltigkeit lokal demRn ähnlich.
 Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten)
-Jeden-dimensionale Mannigfaltigkeit mit n≥1ist mindestens so mächtig wie R.
-Beweis: Sei(X,T)ein topologischer Raum und (U,ϕ)mitU∈Tundϕ:U→V⊆Rn, wobei
-Voffen undϕein Homöomorphismus ist, eine Karte auf X.
-Da jede offene Teilmenge des Rngenauso mächtig ist wie der Rn,ϕals Homöomorphismus
+Jede n-dimensionale Mannigfaltigkeit mitn≥1 ist mindestens so mächtig wieR.
+Beweis: Sei (X,T) ein topologischer Raum und(U,ϕ) mit U ∈T und ϕ: U →V ⊆Rn, wobei
+V offen undϕ ein Homöomorphismus ist, eine Karte aufX.
+Da jede offene Teilmenge desRn genauso mächtig ist wie derRn, ϕ als Homöomorphismus
 insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig
-sind, istUgenauso mächtig wie der Rn. Da jede Mannigfaltigkeit mindestens eine Karte
-hat, muss jede Mannigfaltigkeit Xmindestens so mächtig sein wie der Rn.■
-Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
+sind, istU genauso mächtig wie derRn. Da jede Mannigfaltigkeit mindestens eine Karte
+hat, muss jede MannigfaltigkeitX mindestens so mächtig sein wie derRn. ■
+Hinweis: Es gibt auch noch0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
 beliebig viele Elemente haben.
 Bemerkung 25
-a) Es gibt surjektive, stetige Abbildungen [0,1]→[0,1]×[0,1]
-b)Fürn̸=msindRnundRmnicht homöomorph. Zum Beweis benutzt man den „Satz
+a) Es gibt surjektive, stetige Abbildungen[0,1] →[0,1] ×[0,1]
+b) Für n̸= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz
 von der Gebietstreue“ (Brouwer):
-IstU⊆Rnoffen undf:U→Rnstetig und injektiv, so ist f(U)offen.
-Istn<mundRmhomöomorph zu Rn, so wäre
-f:Rn→Rm→Rn,(x1,...,xn)↦→(x1,x2,...,xn,0,..., 0)
-eine stetige injektive Abbildung. Also müsste f(Rn)offen sein⇒Widerspruch
+Ist U ⊆Rn offen undf : U →Rn stetig und injektiv, so istf(U) offen.
+Ist n<m und Rm homöomorph zuRn, so wäre
+f : Rn →Rm →Rn, (x1,...,x n) ↦→(x1,x2,...,x n,0,..., 0)
+eine stetige injektive Abbildung. Also müsstef(Rn) offen sein⇒Widerspruch
  2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
 Beispiel 20 (Mannigfaltigkeiten)
-1)Jede offene Teilmenge U⊆Rnist einen-dimensionale Mannigfaltigkeit mit einem
+1) Jede offene TeilmengeU ⊆Rn ist eine n-dimensionale Mannigfaltigkeit mit einem
 Atlas aus einer Karte.
-2)Cnist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
-(z1,...,zn)↦→(ℜ(z1),ℑ(z1),...,ℜ(zn),ℑ(zn))
-3)Pn(R) = (Rn+1\{0})/∼=Sn/∼undPn(C)sind Mannigfaltigkeiten der Dimension
-nbzw. 2n, da gilt:
-SeiUi:={(x0:···:xn)∈Pn(R)|xi̸= 0}∀i∈0,...,n. Dann istPn(R) =⋃n
-i=0Ui
+2) Cn ist eine2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
+(z1,...,z n) ↦→(ℜ(z1),ℑ(z1),..., ℜ(zn),ℑ(zn))
+3) Pn(R) = (Rn+1 \{0 })/∼= Sn/∼und Pn(C) sind Mannigfaltigkeiten der Dimension
+n bzw. 2n, da gilt:
+Sei Ui := {(x0 : ··· : xn) ∈Pn(R) |xi ̸= 0 }∀i∈0,...,n . Dann istPn(R) = ⋃n
+i=0 Ui
 und die Abbildung
-Ui→Rn
-(x0:···:xn)↦→(x0
-xi,...,
-xi
-xi,...,xn
-xi)
-(y1:···:yi−1: 1 :yi:···:yn)↦→(y1,...,yn)
+Ui →Rn
+(x0 : ··· : xn) ↦→
+(x0
+xi
+,...,
+
+xi
+xi
+,..., xn
+xi
+)
+(y1 : ··· : yi−1 : 1 : yi : ··· : yn) ↦→(y1,...,y n)
 ist bijektiv.
-DieUimiti= 0,...,nbilden einen n-dimensionalen Atlas:
-x= (1 : 0 : 0)∈U0→R2x↦→(0,0)
-y= (0 : 1 : 1)∈U2→R2y↦→(0,1)
-Umgebung: B1(0,1)→{(1 :u:v)|∥(u,v)∥<1}=V1
-Umgebung: B1(0,1)→{
-(w:z: 1)⏐⏐w2+z2<1}
-=V2
-V1∩V2=∅?
-(a:b:c)∈V1∩V2
-⇒a̸= 0und(b
-a)2+ (c
-a)2<1⇒c
-a<1
-⇒c̸= 0und(a
-c)2+ (b
-c)2<1⇒a
-c<1
+Die Ui mit i= 0,...,n bilden einenn-dimensionalen Atlas:
+x= (1 : 0 : 0) ∈U0 →R2 x↦→(0,0)
+y= (0 : 1 : 1) ∈U2 →R2 y↦→(0,1)
+Umgebung: B1(0,1) →{(1 : u: v) |∥(u,v)∥<1 }= V1
+Umgebung: B1(0,1) →
+{
+(w: z: 1)
+⏐⏐w2 + z2 <1
+}
+= V2
+V1 ∩V2 = ∅?
+(a: b: c) ∈V1 ∩V2
+⇒a̸= 0 und ( b
+a)2 + (c
+a)2 <1 ⇒c
+a <1
+⇒c̸= 0 und (a
+c)2 + (b
+c)2 <1 ⇒a
+c <1
 ⇒Widerspruch
-4)Sn={
-x∈Rn+1⏐⏐∥x∥= 1}
-istn-dimensionale Mannigfaltigkeit.
+4) Sn =
+{
+x∈Rn+1 ⏐⏐∥x∥= 1
+}
+ist n-dimensionale Mannigfaltigkeit.
 Karten:
-Di:={(x1,...,xn+1)∈Sn|xi>0}→B1(0,..., 0
-∈Rn)
-Ci:={(x1,...,xn+1)∈Sn|xi<0}→B1(0,..., 0)
-(x1,...,xn+1)↦→(x1,..., xi,...,xn+1)1
-(x1,...,xn)↦→(x1,...,xi−1,√
-1−∑n
-k=1x2
-k,xi,...,xn), oder−√
-1−∑n
-k=1x2
-kfürCi
-Sn=⋃n+1
-i=1(Ci∪Di)
-Als kompakte Mannigfaltigkeit wird Snauch „geschlossene Mannigfaltigkeit“ genannt.
-5)[0,1]ist keine Mannigfaltigkeit, denn:
-Es gibt keine Umgebung von 0in[0,1], die homöomorph zu einem offenem Intervall
+Di := {(x1,...,x n+1) ∈Sn|xi >0}→ B1(0,..., 0  
+∈Rn
+)
+Ci := {(x1,...,x n+1) ∈Sn|xi <0}→ B1(0,..., 0)
+(x1,...,x n+1) ↦→(x1,..., xi,...,x n+1)1
+(x1,...,x n) ↦→(x1,...,x i−1,
+√
+1 −∑n
+k=1 x2
+k,xi,...,x n), oder−
+√
+1 −∑n
+k=1 x2
+k für Ci
+Sn = ⋃n+1
+i=1 (Ci ∪Di)
+Als kompakte Mannigfaltigkeit wirdSn auch „geschlossene Mannigfaltigkeit“ genannt.
+5) [0,1] ist keine Mannigfaltigkeit, denn:
+Es gibt keine Umgebung von0 in [0,1], die homöomorph zu einem offenem Intervall
 ist.
-1xiwird rausgenommen
+1xi wird rausgenommen
  2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-6)V1={
-(x,y)∈R2⏐⏐x·y= 0}
+6) V1 =
+{
+(x,y) ∈R2 ⏐⏐x·y= 0
+}
 ist keine Mannigfaltigkeit.
 Das Problem ist (0,0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4
-Zusammenhangskomponenten. Jeder Rnzerfällt jedoch in höchstens zwei Zusammenhangskomponenten,
+Zusammenhangskomponenten. JederRn zerfällt jedoch in höchstens zwei Zusammenhangskomponenten,
  wenn man einen Punkt entfernt.
-7)V2={
-(x,y)∈R2⏐⏐x3=y2}
+7) V2 =
+{
+(x,y) ∈R2 ⏐⏐x3 = y2 }
 ist eine Mannigfaltigkeit.
-8)X= (R\{0})∪(01,02)
-U⊆Xoffen⇔{
-Uoffen in R\{0},falls01/∈U,02∈U
-∃ε>0 : (−ε,ε)⊆Ufalls01∈U,02∈U
-Insbesondere sind (R\{0})∪{01}und(R\{0})∪{02}offen und homöomorph
-zuR.
-Aber:Xist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01
-und02.
-9)GLn(R)ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2
+8) X = (R \{0 }) ∪(01,02)
+U ⊆X offen ⇔
+{
+U offen inR \{0 }, falls 01 /∈U,02 ∈U
+∃ε> 0 : (−ε,ε) ⊆U falls 01 ∈U,02 ∈U
+Insbesondere sind(R \{0 }) ∪{01 }und (R \{0 }) ∪{02 }offen und homöomorph
+zu R.
+Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von01
+und 02.
+9) GLn(R) ist eine Mannigfaltigkeit der Dimensionn2, weil offene Teilmengen vonRn2
 eine Mannigfaltigkeit bilden.
 Definition 25
-SeienX,Y n-dimensionale Mannigfaltigkeiten, U⊆XundV⊆Yoffen, Φ :U→Vein Homöomorphismus
- Z= (X˙∪Y)/∼mit der von u∼Φ(u)∀u∈Uerzeugten Äquivalenzrelation
+Seien X,Y n-dimensionale Mannigfaltigkeiten,U ⊆X und V ⊆Y offen, Φ : U →V ein Homöomorphismus
+ Z = (X ˙∪Y)/∼mit der vonu∼Φ(u) ∀u∈U erzeugten Äquivalenzrelation
 und der von∼induzierten Quotiententopologie.
-ZheißtVerklebung vonXundYlängsUundV.ZbesitzteinenAtlasaus n-dimensionalen
-Karten. Falls Zhausdorffsch ist, ist Zeinen-dimensionale Mannigfaltigkeit.
+ZheißtVerklebungvonXundY längsU undV.Zbesitzt einen Atlas ausn-dimensionalen
+Karten. FallsZ hausdorffsch ist, istZ eine n-dimensionale Mannigfaltigkeit.
 Bemerkung 26
-SindX,YMannigfaltigkeiten der Dimension nbzw.m, so istX×Yeine Mannigfaltigkeit
-der Dimension n+m.
+Sind X,Y Mannigfaltigkeiten der Dimensionn bzw. m, so istX×Y eine Mannigfaltigkeit
+der Dimensionn+ m.
 Beweis: Produkte von Karten sind Karten. ■
 Beispiel 21
 Mannigfaltigkeiten mit Dimension 1:
-1) Offene Intervalle, R,(0,1)sind alle homöomorph
-2)S1
+1) Offene Intervalle,R, (0,1) sind alle homöomorph
+2) S1
 Mannigfaltigkeiten mit Dimension 2:
-1)R2
-2)S2(0 Henkel)
-3)T2(1 Henkel)
+1) R2
+2) S2 (0 Henkel)
+3) T2 (1 Henkel)
 4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1
 Bemerkung 27
-Sein∈N,F:Rn→Rstetig differenzierbar und X=V(F) :={x∈Rn|F(x) = 0}das
+Sei n∈N,F : Rn →R stetig differenzierbar undX = V(F) := {x∈Rn |F(x) = 0 }das
 „vanishing set“.
 Dann gilt:
  2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
 Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus.
-a)Xist abgeschlossen in Rn
-b) Ist grad(F)(X)̸= 0∀x∈X, so istXeine Mannigfaltigkeit der Dimension n−1.
+a) X ist abgeschlossen inRn
+b) Ist grad(F)(X) ̸= 0 ∀x∈X, so istX eine Mannigfaltigkeit der Dimensionn−1.
 Beweis:
-a)Seiy∈Rn\V(F). WeilFstetig ist, gibt es δ>0, sodassF(Bδ(y))⊆Bε(F(y))mit
-ε=1
-2∥F(y)∥. Folgt Bδ(y)∩V(F) =∅⇒Rn\V(F)ist offen.
-b)Seix∈Xmitgrad(F)(x)̸= 0, also o. B. d. A.∂F
-∂X1(x)̸= 0,x= (x1,...,xn),
-x′:= (x2,...,xn)∈Rn−1. Der Satz von der impliziten Funktion liefert nun: Es
-gibt Umgebungen Uvonx′und differenzierbare Funktionen g:U→R, sodass
-G:U→Rn, u↦→(g(u),u)eine stetige Abbildung auf eine offene Umgebung Vvonx
-inXist.
+a) Sei y∈Rn \V(F). WeilF stetig ist, gibt esδ >0, sodassF(Bδ(y)) ⊆Bε(F(y)) mit
+ε= 1
+2 ∥F(y)∥. FolgtBδ(y) ∩V(F) = ∅⇒ Rn \V(F) ist offen.
+b) Sei x ∈ X mit grad(F)(x) ̸= 0 , also o. B. d. A. ∂F
+∂X1
+(x) ̸= 0 , x = ( x1,...,x n),
+x′ := ( x2,...,x n) ∈Rn−1. Der Satz von der impliziten Funktion liefert nun: Es
+gibt Umgebungen U von x′ und differenzierbare Funktionen g : U → R, sodass
+G: U →Rn, u↦→(g(u),u) eine stetige Abbildung auf eine offene UmgebungV von x
+in X ist.
 ■
 Beispiel 22
-1)F:R3→R,(x,y,z )↦→x2+y2+z2−1,V(F) =S2,grad(F) = (2x,2y,2z)Bem. 27.b= = = = = =⇒
-Snistn-dimensionale Mannigfaltigkeit in Rn+1
-2)F:R2→R,(x,y)↦→y2−x3Es gilt: grad(F) = (−3x2,2y). Also: grad(0,0) = (0,0).
-−5−4−3−2−1012345−4
+1) F : R3 →R, (x,y,z ) ↦→x2 +y2 +z2 −1, V(F) = S2, grad(F) = (2x,2y,2z) Bem. 27.b= = = = = =⇒
+Sn ist n-dimensionale Mannigfaltigkeit inRn+1
+2) F : R2 →R, (x,y) ↦→y2 −x3 Es gilt:grad(F) = (−3x2,2y). Also:grad(0,0) = (0,0).
+−5−4−3−2−1012345
+−4
 −2
 0
 2
-4−1000100
-xyz
-−1000100f(x,y)
-(a)F(x,y) =y2−x32 4 6 8 10 12
-−10−5510
-xy
-a=1
+4
+−100
+0
+100
+x
+y
+z
+−100
+0
+100
+f(x,y)
+(a) F(x,y) = y2 −x3
+2 4 6 8 10 12
+−10
+−5
+5
+10
+x
+y
+a= 1
 3
 a= 1
 a= 2
-(b)y2−ax3= 0
-Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a.
-DaheristBemerkung 27.bnichtanwendbar,aber V(F)isttrotzdemeine1-dimensionale
+(b) y2 −ax3 = 0
+Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parametera.
+Daherist Bemerkung 27.bnicht anwendbar, aberV(F) isttrotzdemeine 1-dimensionale
 topologische Mannigfaltigkeit.
  2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
 Definition 26
-SeiXein Hausdorffraum mit abzählbarer Basis der Topologie. Xheißtn-dimensionale
-Mannigfaltigkeit mit Rand , wenn es einen Atlas (Ui,ϕi)gibt, wobei Ui⊆Xioffen und
-ϕiein Homöomorphismus auf eine offene Teilmenge von
+Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie.X heißtn-dimensionale
+Mannigfaltigkeit mit Rand, wenn es einen Atlas(Ui,ϕi) gibt, wobeiUi ⊆Xi offen und
+ϕi ein Homöomorphismus auf eine offene Teilmenge von
 Rn
-+,0:={(x1,...,xn)∈Rn|xn≥0}
++,0 := {(x1,...,x n) ∈Rn |xn ≥0 }
 ist.
 Rn
-+,0ist ein „Halbraum“.
++,0 ist ein „Halbraum“.
 Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten.
 ∼=
 (a) Halbraum
+∼
+=
+(b) Pair of pants
 ∼=
-(b) Pair of pants∼=
 (c) Sphäre mit einem Loch
 Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand
 Definition 27
-SeiXeinen-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt
-∂X:=⋃
-(U,ϕ)∈A{x∈U|ϕ(x) = 0}
-RandvonX.
-∂Xist eine Mannigfaltigkeit der Dimension n−1.
+Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und AtlasA. Dann heißt
+∂X :=
+⋃
+(U,ϕ)∈A
+{x∈U |ϕ(x) = 0 }
+Rand von X.
+∂X ist eine Mannigfaltigkeit der Dimensionn−1.
 Definition 28
-SeiXeinen-dimensionale Mannigfaltigkeit mit Atlas (Ui,ϕi)i∈I
-Füri,j∈ImitUi∩Uj̸=∅heißt
-ϕij:=ϕj◦ϕ−1
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas(Ui,ϕi)i∈I
+Für i,j ∈I mit Ui ∩Uj ̸= ∅heißt
+ϕij := ϕj ◦ϕ−1
 i
-ϕi(Ui∩Uj)→ϕj(Ui∩Uj)
-Kartenwechsel oderÜbergangsfunktion .
+ϕi(Ui ∩Uj) →ϕj(Ui ∩Uj)
+Kartenwechseloder Übergangsfunktion.
  2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-RnRnUiUj
-Vi VjX
+Rn Rn
+Ui Uj
+Vi Vj
+X
 ϕi ϕj
 Abbildung 2.4: Kartenwechsel
 2.2 Differenzierbare Mannigfaltigkeiten
 Definition 29
-SeiXeinen-dimensionale Mannigfaltigkeit mit Atlas (Ui,ϕi)i∈I.
-a)Xheißtdifferenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Kartenwechselabbildung
+Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas(Ui,ϕi)i∈I.
+a) X heißtdifferenzierbare Mannigfaltigkeit der KlasseCk, wenn jede Kartenwechselabbildung
  ϕij, i,j∈I k-mal stetig differenzierbar ist.
-b)Xheißtdifferenzierbare Mannigfaltigkeit , wennXeine differenzierbare Mannigfaltigkeit
- der Klasse C∞ist.
-Differenzierbare Mannigfaltigkeiten der Klasse C∞werden auch glattgenannt.
+b) X heißtdifferenzierbare Mannigfaltigkeit, wennX eine differenzierbare Mannigfaltigkeit
+ der KlasseC∞ist.
+Differenzierbare Mannigfaltigkeiten der KlasseC∞werden auchglatt genannt.
 Definition 30
-SeiXeine differenzierbare Mannigfaltigkeit der Klasse Ck(k∈N∪{∞}) mit Atlas
+Sei X eine differenzierbare Mannigfaltigkeit der KlasseCk (k ∈N ∪{∞} ) mit Atlas
 A= (Ui,ϕi)i∈I.
-a)Eine Karte (U,ϕ)aufXheißtverträglich mitA, wenn alle Kartenwechsel ϕ◦ϕ−1
+a) Eine Karte(U,ϕ) auf X heißtverträglichmit A, wenn alle Kartenwechselϕ◦ϕ−1
 i
-undϕi◦ϕ−1(i∈ImitUi∩U̸=∅) differenzierbar von Klasse Cksind.
-b)Die Menge aller mit Averträglichen Karten auf Xbildet einen maximalen Atlas der
-KlasseCk. Er heißtCk-Struktur aufX.
-EineC∞-Struktur heißt auch differenzierbare Struktur aufX.
+und ϕi ◦ϕ−1 (i∈I mit Ui ∩U ̸= ∅) differenzierbar von KlasseCk sind.
+b) Die Menge aller mitAverträglichen Karten aufX bildet einen maximalen Atlas der
+Klasse Ck. Er heißtCk-Struktur auf X.
+Eine C∞-Struktur heißt auchdifferenzierbare Strukturauf X.
 Bemerkung 28
-Fürn≥4gibt es aufSnmehrere verschiedene differenzierbare Strukturen, die sogenannten
+Für n≥4 gibt es aufSn mehrere verschiedene differenzierbare Strukturen, die sogenannten
 „exotische Sphären“.
 Definition 31
-SeienX,Ydifferenzierbare Mannigfaltigkeiten der Dimension nbzw.m,x∈X.
-a)Eine stetige Abbildung f:X→Yheißtdifferenzierbar inx(von Klasse Ck), wenn
-es Karten (U,ϕ)vonXmitx∈Uund (V,ψ)vonYmitf(U)⊆Vgibt, sodass
-ψ◦f◦ϕ−1stetig differenzierbar von Klasse Ckinϕ(x)ist.
-b)fheißtdifferenzierbar (von Klasse Ck), wennfin jedemx∈Xdifferenzierbar ist.
-c)fheißtDiffeomorphismus , wennfdifferenzierbar von Klasse C∞ist und es eine
-differenzierbare Abbildung g:Y→Xvon Klasse C∞gibt mitg◦f=idXund
-f◦g=idY.
+Seien X,Y differenzierbare Mannigfaltigkeiten der Dimensionn bzw. m, x∈X.
+a) Eine stetige Abbildungf : X →Y heißtdifferenzierbar in x(von KlasseCk), wenn
+es Karten (U,ϕ) von X mit x ∈U und (V,ψ) von Y mit f(U) ⊆V gibt, sodass
+ψ◦f ◦ϕ−1 stetig differenzierbar von KlasseCk in ϕ(x) ist.
+b) f heißtdifferenzierbar (von KlasseCk), wennf in jedemx∈X differenzierbar ist.
+c) f heißtDiffeomorphismus, wennf differenzierbar von KlasseC∞ ist und es eine
+differenzierbare Abbildung g : Y →X von Klasse C∞ gibt mit g◦f = idX und
+f ◦g= idY.
  2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
 Bemerkung 29
 Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab.
-Beweis: Seien (U′,ϕ′)und(V′,ψ′)Karten von Xbzw.Yumxbzw.f(x)mitf(U′)⊆V′.
-⇒ψ′◦f◦(ϕ′)−1
-=ψ′◦(ψ−1◦ψ)◦f◦(ϕ−1◦ϕ)◦(ϕ′)−1
-ist genau dann differenzierbar, wenn ψ◦f◦ϕ−1differenzierbar ist.
+Beweis: Seien (U′,ϕ′) und (V′,ψ′) Karten vonX bzw. Y um x bzw. f(x) mit f(U′) ⊆V′.
+⇒ψ′◦f ◦(ϕ′)−1
+= ψ′◦(ψ−1 ◦ψ) ◦f ◦(ϕ−1 ◦ϕ) ◦(ϕ′)−1
+ist genau dann differenzierbar, wennψ◦f ◦ϕ−1 differenzierbar ist.
 Beispiel 23
-f:R→R, x↦→x3ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) :=3√x
-gilt:f◦g=idR, g◦f=idR
+f : R →R, x ↦→x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mitg(x) := 3√x
+gilt: f ◦g= idR, g ◦f = idR
 Bemerkung 30
-SeiXeine glatte Mannigfaltigkeit. Dann ist
-Diffeo(X) :={f:X→X|fist Diffeomorphismus }
-eine Untergruppe von Homöo (X).
+Sei X eine glatte Mannigfaltigkeit. Dann ist
+Diffeo(X) := {f : X →X |f ist Diffeomorphismus}
+eine Untergruppe von Homöo(X).
 Definition 32
-S⊆R3heißtreguläre Fläche :⇔∀s∈S∃Umgebung V(s)⊆R3∃U⊆R2offen:
-∃differenzierbare Abbildung F:U→V∩S: Rg(JF(u)) = 2∀u∈U.
-Fheißt (lokale) reguläre Parametrisierung vonS.
+S ⊆ R3 heißtreguläre Fläche :⇔∀s ∈ S ∃Umgebung V(s) ⊆ R3 ∃U ⊆ R2 offen:
+∃differenzierbare AbbildungF : U →V ∩S: Rg(JF(u)) = 2 ∀u∈U.
+F heißt (lokale)reguläre Parametrisierungvon S.
 F(u,v) = (x(u,v),y(u,v),z(u,v))
-JF(u,v) =
-∂x
-∂u(p)∂x
+JF(u,v) =
+
+
+∂x
+∂u(p) ∂x
 ∂v(p)
 ∂y
-∂u(p)∂y
+∂u(p) ∂y
 ∂v(p)
 ∂z
-∂u(p)∂z
-∂v(p)
+∂u(p) ∂z
+∂v(p)
+
 
 Beispiel 24
-1) Rotationsflächen: Sei r:R→R>0eine differenzierbare Funktion.
-F:R2→R3(u,v)↦→(r(u) cos(u),r(v) sin(u),v)
-JF(u,v) =
-−r(v) sinu r′(v) cosu
-r(v) cosu r′(v) sinu
-0 1
+1) Rotationsflächen: Seir: R →R>0 eine differenzierbare Funktion.
+F : R2 →R3 (u,v) ↦→(r(u) cos(u),r(v) sin(u),v)
+JF(u,v) =
+
+
+−r(v) sinu r′(v) cosu
+r(v) cosu r ′(v) sinu
+0 1
+
 
-hat Rang 2 für alle (u,v)∈R2.
-2) Kugelkoordinaten: F:R2→R3,
-(u,v)↦→(Rcosvcosu,Rcosvsinu,Rsinv)
-Es gilt:F(u,v)∈S2
+hat Rang 2 für alle(u,v) ∈R2.
+2) Kugelkoordinaten: F : R2 →R3,
+(u,v) ↦→(Rcos vcos u,R cos vsin u,R sin v)
+Es gilt:F(u,v) ∈S2
 R, denn
-R2cos2(v) cos2(u) +R2cos2(v) sin2(u) +R2sin2(v)
+R2 cos2(v) cos2(u) + R2 cos2(v) sin2(u) + R2 sin2(v)
 =R2(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v))
-=R2(
-cos2(v)(cos2(u) + sin2(u)) + sin2(v))
-=R2(
-cos2(v) + sin2(v))
+=R2 (
+cos2(v)(cos2(u) + sin2(u)) + sin2(v)
+)
+=R2 (
+cos2(v) + sin2(v)
+)
 =R2
  2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
 N
-Svu
-(a) Kugelkoordinaten−1
-0
+S
+vu
+(a) Kugelkoordinaten
+−1 0
+1
+2−2 −1 0 1 2
+0.6
+0.8
 1
-2−2−10120.60.81
 (b) Rotationskörper
-π
-2π 3π
-22π
-−1−0.50.51
-xy
-sinx
-cosx
+π2 π 3π2 2π
+−1
+−0.5
+0.5
+1
+x
+y
+sinxcosx
 (c) Sinus und Kosinus haben keine gemeinsame Nullstelle
  2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
 Die Jacobi-Matrix
-JF(u,v) =
-−Rcosvsinu−Rsinvcosu
-Rcosvcosu−Rsinvsinu
-0 Rcosv
+JF(u,v) =
+
+
+−Rcos vsin u −Rsin vcos u
+Rcos vcos u −Rsin vsin u
+0 Rcos v
+
 
-hat Rang 2 für cosv̸= 0. InNundSistcosv= 0.
+hat Rang 2 fürcos v̸= 0. InN und S ist cos v= 0.
 Bemerkung 31
-Jede reguläre Fläche S⊆R3ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
+Jede reguläre FlächeS ⊆R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
 Beweis:
-S⊆R3ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
-regulären Flächen folgt direkt, dass Karten (Ui,Fi)und(Uj⊆R2,Fj:R2→R3)vonSmit
-Ui∩Uj̸=∅existieren, wobei FiundFjnach Definition differenzierbare Abbildungen sind.
-z.Z.:F−1
-j◦Fiist ein Diffeomorphismus.
-Ui UjS
+S ⊆R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
+regulären Flächen folgt direkt, dass Karten(Ui,Fi) und (Uj ⊆R2,Fj : R2 →R3) von S mit
+Ui ∩Uj ̸= ∅existieren, wobeiFi und Fj nach Definition differenzierbare Abbildungen sind.
+z.Z.: F−1
+j ◦Fi ist ein Diffeomorphismus.
+Ui Uj
+S
 s
 Fi Fj
 F−1
-j◦Fi
-Abbildung 2.5: Reguläre Fläche Szum Beweis von Bemerkung 31
-Idee:Finde differenzierbare Funktion˜F−1
-jin Umgebung Wvons, sodass˜F−1
-j|S∩W=F−1
-j.
-Ausführung: Seiu0∈Ui,v0∈UjmitFi(u0) =s=Fj(v0).
-DaRg(JFj(v0)) = 2ist, ist o. B. d. A.
-det(∂x
-∂u∂x
+j ◦Fi
+Abbildung 2.5: Reguläre FlächeS zum Beweis von Bemerkung 31
+Idee: Finde differenzierbare Funktion˜F−1
+j in UmgebungW von s, sodass ˜F−1
+j |S∩W = F−1
+j .
+Ausführung: Sei u0 ∈Ui, v0 ∈Uj mit Fi(u0) = s= Fj(v0).
+Da Rg(JFj(v0)) = 2 ist, ist o. B. d. A.
+det
+(∂x
+∂u
+∂x
 ∂v∂y
-∂u∂y
-∂v)
-(v0)̸= 0
-undFj(u,v) = (x(u,v),y(u,v),z(u,v)).
-Definiere˜Fj:Uj×R→R3durch
-˜Fj(u,v,t ) := (x(u,v),y(u,v),z(u,v) +t)
-Offensichtlich: ˜Fj|Uj×{0}=Fj
-J˜Fj=
-∂x
-∂u∂x
-∂v0
+∂u
+∂y
+∂v
+)
+(v0) ̸= 0
+und Fj(u,v) = (x(u,v),y(u,v),z(u,v)).
+Definiere ˜Fj : Uj ×R →R3 durch
+˜Fj(u,v,t ) := (x(u,v),y(u,v),z(u,v) + t)
+Offensichtlich: ˜Fj|Uj×{0 }= Fj
+J˜Fj
+=
+
+
+∂x
+∂u
+∂x
+∂v 0
 ∂y
-∂u∂y
-∂v0
+∂u
+∂y
+∂v 0
+∂z
+∂u
 ∂z
-∂u∂z
-∂v1
-⇒detJ˜Fj(v0,0)̸= 0
-Analysis II= = = = = =⇒Es gibt Umgebungen WvonFjvon˜Fj(v0,0) =Fj(v0) =s, sodass˜FjaufWeine
-differenzierbar Inverse F−1
-jhat.
+∂v 1
+
+⇒det J˜Fj
+(v0,0) ̸= 0
+Analysis II
+= = = = = =⇒Es gibt UmgebungenW von Fj von ˜Fj(v0,0) = Fj(v0) = s, sodass˜Fj auf W eine
+differenzierbar InverseF−1
+j hat.
  2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
 Weiter gilt:
-˜Fj−1|W∩S=F−1
-j|W∩S
+˜Fj
+−1
+|W∩S = F−1
+j |W∩S
 ⇒F−1
-j◦Fi|F−1
-i(W∩S)=F−1
-j◦Fi|F−1
-i(W∩S)
+j ◦Fi|F−1
+i (W∩S) = F−1
+j ◦Fi|F−1
+i (W∩S)
 ist differenzierbar.
 Definition 33
-SeiGeine Mannigfaltigkeit und (G,◦)eine Gruppe.
-a)Gheißttopologische Gruppe , wenn die Abbildungen ◦:G×G→Gundι:G→G
+Sei G eine Mannigfaltigkeit und(G,◦) eine Gruppe.
+a) Gheißttopologische Gruppe, wenn die Abbildungen◦: G×G→Gund ι: G→G
 definiert durch
-g◦h:=g·hundι(g) :=g−1
+g◦h:= g·h und ι(g) := g−1
 stetig sind.
-b)IstGeine differenzierbare Mannigfaltigkeit, so heißt GLie-Gruppe , wenn (G,◦)und
-(G,ι)differenzierbar sind.
+b) Ist Geine differenzierbare Mannigfaltigkeit, so heißtGLie-Gruppe, wenn(G,◦) und
+(G,ι) differenzierbar sind.
 Beispiel 25 (Lie-Gruppen)
 1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen.
-2)GLn(R)
-3)(R×,·)
-4)(R>0,·)
-5)(Rn,+), dennA·B(i,j) =∑n
-k=1aikbkjist nach allen Variablen differenzierbar
-(A−1)(i,j) =det(Aij)
-detA
-Aij=
-ai1... ain
-.........
-an1... ann
+2) GLn(R)
+3) (R×,·)
+4) (R>0,·)
+5) (Rn,+), dennA·B(i,j) = ∑n
+k=1 aikbkj ist nach allen Variablen differenzierbar
+(A−1)(i,j) = det(Aij)
+det A
+Aij =
+
+
+ai1 ... a in
+... ... ...
+an1 ... a nn
+
 ∈R(n−1)×(n−1)
 ist differenzierbar.
-detAijkann 0werden, da:(1 1
-−1 0)
-6)SLn(R) ={A∈GLn(R)|det(A) = 1}
+det Aij kann 0 werden, da: (1 1
+−1 0
+)
+6) SLn(R) = {A∈GLn(R) |det(A) = 1 }
 Bemerkung 32
-IstGeine Lie-Gruppe und g∈G, so ist die Abbildung
-lg:G→G
+Ist G eine Lie-Gruppe undg∈G, so ist die Abbildung
+lg : G→G
 h↦→g·h
 ein Diffeomorphismus.
  2.3. SIMPLIZIALKOMPLEX
 2.3 Simplizialkomplex
 Definition 34
-Seienv0,...,vk∈RnPunkte.
-a)v0,...,vksindin allgemeiner Lage
-⇔es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0,...,vkenthält
-⇔v1−v0,...,vk−v0sind linear unabhängig.
-b)conv(v0,...,vk) :={∑k
-i=0λivi⏐⏐⏐λi≥0,∑k
-i=0λi= 1}
-heißt diekonvexe Hülle von
-v0,...,vk.
+Seien v0,...,v k ∈Rn Punkte.
+a) v0,...,v k sind in allgemeiner Lage
+⇔es gibt keinen(k−1)-dimensionalen affinen Untervektorraum, derv0,...,v k enthält
+⇔v1 −v0,...,v k −v0 sind linear unabhängig.
+b) conv(v0,...,v k) :=
+{∑k
+i=0 λivi
+⏐⏐⏐λi ≥0,∑k
+i=0 λi = 1
+}
+heißt diekonvexe Hüllevon
+v0,...,v k.
 Definition 35
-a)Sei∆n=conv(e0,...,en)⊆Rn+1die konvexe Hülle der Standard-Basisvektoren
-e0,...,en.
-Dann heißt ∆nStandard-Simplex undndie Dimension des Simplex.
-b)Für Punkte v0,...,vkimRnin allgemeiner Lage heißt ∆(v0,...,vk) =conv(v0,...,vk)
-eink-Simplex inRn.
-c)Ist∆(v0,...,vk)eink-Simplex und I={i0,...,ir}⊆{ 0,...,k}, so istsi0,...,ir:=
-conv(vi0,...,vir)einr-Simplex und heißt Teilsimplex oderSeitevon∆.
-(a) 0-Simplex ∆0
-1 2 3123
-e0e1
-(b) 1-Simplex ∆11 2 3123
-e0e1
+a) Sei ∆n = conv(e0,...,e n) ⊆Rn+1 die konvexe Hülle der Standard-Basisvektoren
+e0,...,e n.
+Dann heißt∆n Standard-Simplex und n die Dimension des Simplex.
+b) Für Punktev0,...,v k im Rn in allgemeiner Lage heißt∆(v0,...,v k) = conv(v0,...,v k)
+ein k-Simplex in Rn.
+c) Ist ∆(v0,...,v k) ein k-Simplex undI = {i0,...,i r }⊆{ 0,...,k }, so istsi0,...,ir :=
+conv(vi0 ,...,v ir) ein r-Simplex und heißtTeilsimplexoder Seite von ∆.
+(a) 0-Simplex∆0
+1 2 3
+1
+2
+3
+e0
+e1
+(b) 1-Simplex∆1
+1 2 3
+1
+2
+3
+e0
+e1
+e2
+(c) 2-Simplex∆2
+e0 e1
 e2
-(c) 2-Simplex ∆2e0 e1e2
 e3
-(d) 3-Simplex ∆3
-Abbildung 2.6: Beispiele für k-Simplexe
+(d) 3-Simplex∆3
+Abbildung 2.6: Beispiele fürk-Simplexe
 Definition 36
-a)Eine endliche Menge Kvon Simplizes im Rnheißt (endlicher) Simplizialkomplex ,
+a) Eine endliche MengeK von Simplizes imRn heißt (endlicher)Simplizialkomplex,
 wenn gilt:
-(i) Für ∆∈KundS⊆∆Teilsimplex ist S∈K.
-(ii) Für ∆1,∆2∈Kist∆1∩∆2leer oder ein Teilsimplex von ∆1und von ∆2.
-b)|K|:=⋃
-∆∈K∆(mit Teilraumtopologie) heißt geometrische Realisierung vonK.
-c) Istd= max{k∈N0|Kenthältk-Simplex}, so heißtddieDimension vonK.
+(i) Für ∆ ∈K und S ⊆∆ Teilsimplex istS ∈K.
+(ii) Für ∆1,∆2 ∈K ist ∆1 ∩∆2 leer oder ein Teilsimplex von∆1 und von∆2.
+b) |K|:= ⋃
+∆∈K ∆ (mit Teilraumtopologie) heißtgeometrische Realisierungvon K.
+c) Ist d= max {k∈N0 |K enthält k-Simplex}, so heißtd die Dimension von K.
  2.3. SIMPLIZIALKOMPLEX
-(a) 1D Simplizialkomplex (b)2D Simplizialkomplex
-(ohne untere Fläche!)(c) 2D Simplizialkomplex
+(a) 1D Simplizialkomplex(b) 2D Simplizialkomplex
+(ohne untere Fläche!)
+(c) 2D Simplizialkomplex
 (d) 1D Simplizialkomplex (e) 2D Simplizialkomplex
 P
-(f)Pist kein Teilsimplex, da Eigenschaft
- Punkt b.ii verletzt istP
+(f) P ist kein Teilsimplex, da Eigenschaft
+ Punkt b.ii verletzt ist
+P
 (g) Simplizialkomplex
 Abbildung 2.7: Beispiele für Simplizialkomplexe
 Definition 37
-SeienK,LSimplizialkomplexe. Eine stetige Abbildung
-f:|K|→|L|
-heißtsimplizial , wenn für jedes ∆∈Kgilt:
-a)f(∆)∈L
-b)f|∆: ∆→f(∆)ist eine affine Abbildung.
+Seien K,L Simplizialkomplexe. Eine stetige Abbildung
+f : |K|→| L|
+heißtsimplizial, wenn für jedes∆ ∈K gilt:
+a) f(∆) ∈L
+b) f|∆ : ∆ →f(∆) ist eine affine Abbildung.
 Beispiel 26 (Simpliziale Abbildungen)
-1)ϕ(e1) :=b1,ϕ(e2) :=b2
-ϕist eine eindeutig bestimmte lineare Abbildung
+1) ϕ(e1) := b1, ϕ(e2) := b2
+ϕ ist eine eindeutig bestimmte lineare Abbildung
  2.3. SIMPLIZIALKOMPLEX
-0 e2e1
-0 b1b2
+0 e2
+e1
+0 b1
+b2
 ϕ
-2) Folgende Abbildung ϕ: ∆n→∆n−1ist simplizial:
+2) Folgende Abbildungϕ: ∆n →∆n−1 ist simplizial:
 ϕ
 3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8)
-M Ma
-aab
-bbc
-cc
-dd
-dMa
-bc
+M M
+a
+a
+a
+b
+b
+b
+c
+c
+c
+d
+d
+d
+M
+a
+b
+c
 d
-/Bullet /Bullet /Bullet/Bullet /Bullet /Bullet/Bullet /Bullet /Bullet
+/Bullet /Bullet /Bullet
+/Bullet /Bullet /Bullet
+/Bullet /Bullet /Bullet
+/Bullet
+/Bullet
+/Bullet
+/Bullet
+/Bullet
+/Bullet
+/Bullet /Bullet
+/Bullet
+/Bullet /Bullet
+/Bullet /Bullet
+/Bullet /Bullet
+/Bullet
 /Bullet
-/Bullet/Bullet/Bullet
-/Bullet/Bullet
-/Bullet /Bullet/Bullet
-/Bullet /Bullet/Bullet /Bullet/Bullet /Bullet
 /Bullet
-/Bullet/Bullet
 /Bullet
 Abbildung 2.8: Abbildung eines Torus auf eine Sphäre
 Definition 38
-SeiKein endlicher Simplizialkomplex. Für n≥0seian(K)die Anzahl der n-Simplizes in
+Sei K ein endlicher Simplizialkomplex. Fürn≥0 sei an(K) die Anzahl dern-Simplizes in
 K.
 Dann heißt
-χ(K) :=dimK∑
-n=0(−1)nan(K)
-Eulerzahl (oder Euler-Charakteristik) von K.
+χ(K) :=
+dim K∑
+n=0
+(−1)nan(K)
+Eulerzahl (oder Euler-Charakteristik) vonK.
 Beispiel 27
-1)χ(∆1) = 2−1 = 1
-χ(∆2) = 3−3 + 1 = 1
-χ(∆3) = 4−6 + 4−1 = 1
-2)χ(Oktaeder-Oberfläche ) = 6−12 + 8 = 2
-χ(Rand des Tetraeders ) = 2
-χ(Ikosaeder ) = 12−30 + 20 = 2
-3)χ(Würfel ) = 8−12 + 6 = 2
-χ(Würfel, unterteilt in Dreiecksflächen ) = 8−(12 + 6) + (6·2) = 2
+1) χ(∆1) = 2 −1 = 1
+χ(∆2) = 3 −3 + 1 = 1
+χ(∆3) = 4 −6 + 4−1 = 1
+2) χ(Oktaeder-Oberfläche) = 6 −12 + 8 = 2
+χ(Rand des Tetraeders) = 2
+χ(Ikosaeder) = 12 −30 + 20 = 2
+3) χ(Würfel) = 8 −12 + 6 = 2
+χ(Würfel, unterteilt in Dreiecksflächen) = 8 −(12 + 6) + (6·2) = 2
 Bemerkung 33
-χ(∆n) = 1für jedesn∈N0
+χ(∆n) = 1 für jedesn∈N0
  2.3. SIMPLIZIALKOMPLEX
-Beweis: ∆nist die konvexe Hülle von (e0,...,en)inRn+1. Jede (k+ 1)-elementige Teilmenge
-von{e0,...,en}definiert ein k-Simplex.
-⇒ak(∆n) =(n+1
-k+1)
+Beweis: ∆n ist die konvexe Hülle von(e0,...,e n) in Rn+1. Jede(k+ 1)-elementige Teilmenge
+von {e0,...,e n }definiert eink-Simplex.
+⇒ak(∆n) =
+(n+1
+k+1
+)
 , k = 0,...,n
-⇒χ(∆n) =∑n
+⇒χ(∆n) = ∑n
 k=0(−1)k(n+1
-k+1)
-f(x) = (x+ 1)n+1Binomischer
-Lehrsatz=∑n+1
-k=0(n+1
-k)
+k+1
+)
+f(x) = (x+ 1)n+1
+Binomischer
+Lehrsatz
+= ∑n+1
+k=0
+(n+1
+k
+)
 xk
-⇒0 =∑n+1
-k=0(n+1
-k)
-(−1)k=χ(∆n)−1
+⇒0 = ∑n+1
+k=0
+(n+1
+k
+)
+(−1)k = χ(∆n) −1
 ⇒χ(∆n) = 1 ■
 Definition 39
-a) Ein 1D-Simplizialkomplex heißt Graph.
-b) Ein Graph, der homöomorph zu S1ist, heißtKreis.
-c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält.
-(a)Dies wird häufig auch als
-Multigraph bezeichnet.(b)Planare Einbettung des Tetraeders
+a) Ein 1D-Simplizialkomplex heißtGraph.
+b) Ein Graph, der homöomorph zuS1 ist, heißtKreis.
+c) Ein zusammenhängender Graph heißtBaum, wenn er keinen Kreis enthält.
+(a) Dies wird häufig auch als
+Multigraph bezeichnet.
+(b) Planare Einbettung des Tetraeders
 
-(c)K5 (d)K3,3
+(c) K5 (d) K3,3
 Abbildung 2.9: Beispiele für Graphen
 Bemerkung 34
-Für jeden Baum Tgiltχ(T) = 1.
+Für jeden BaumT gilt χ(T) = 1.
 Beweis: Induktion über die Anzahl der Ecken.
 Bemerkung 35
-a)Jeder zusammenhängende Graph Γenthält einen Teilbaum T, der alle Ecken von Γ
+a) Jeder zusammenhängende GraphΓ enthält einen TeilbaumT, der alle Ecken vonΓ
 enthält.2
-b) Istn=a1(Γ)−a1(T), so istχ(Γ) = 1−n.
+b) Ist n= a1(Γ) −a1(T), so istχ(Γ) = 1 −n.
 Beweis:
 a) Siehe „Algorithmus von Kruskal“.
-2Twird „Spannbaum“ genannt.
+2T wird „Spannbaum“ genannt.
  2.3. SIMPLIZIALKOMPLEX
-b)χ(Γ) =a0(Γ)−a1(Γ)
-=a0(Γ)−(n+a1(T))
-=a0(T)−a1(T)−n
-=χ(T)−n
-= 1−n
+b) χ(Γ) = a0(Γ) −a1(Γ)
+= a0(Γ) −(n+ a1(T))
+= a0(T) −a1(T) −n
+= χ(T) −n
+= 1 −n
 Bemerkung 36
-Sei∆einn-Simplex und x∈∆◦⊆Rn. SeiKder Simplizialkomplex, der aus ∆durch
-„Unterteilung“ in xentsteht. Dann ist χ(K) =χ(∆) = 1.
-(a)K (b)∆, das ausKdurch Unterteilung
+Sei ∆ ein n-Simplex und x ∈∆◦ ⊆Rn. Sei K der Simplizialkomplex, der aus∆ durch
+„Unterteilung“ inx entsteht. Dann istχ(K) = χ(∆) = 1.
+(a) K (b) ∆, das ausK durch Unterteilung
  entsteht
 Abbildung 2.10: Beispiel für Bemerkung 36.
-Beweis:χ(K) =χ(∆)−(−1)n
+Beweis: χ(K) = χ(∆) − (−1)n
 
-n-Simplex+n∑
-k=0(−1)k(n+ 1
-k)
- 
-(1+(−1))n+1=χ(∆) ■
+n-Simplex
++
+n∑
+k=0
+(−1)k
+(n+ 1
+k
+)
+  
+(1+(−1))n+1
+= χ(∆) ■
 Definition 40
-SeiXein topologischer Raum, Kein Simplizialkomplex und
-h:|K|→X
-ein Homöomorphismus von der geometrischen Realisierung |K|aufX. Dann heißt heine
-Triangulierung vonX.
+Sei X ein topologischer Raum,K ein Simplizialkomplex und
+h: |K|→ X
+ein Homöomorphismus von der geometrischen Realisierung|K|auf X. Dann heißth eine
+Triangulierungvon X.
 Beispiel 28 (Triangulierung des Torus)
 Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für
 fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind
 in Beispiel 28.
 Satz 2.1 (Eulersche Polyederformel)
-SeiPein konvexes Polyeder in R3, d. h.∂Pist ein 2-dimensionaler Simplizialkomplex,
+Sei P ein konvexes Polyeder inR3, d. h.∂P ist ein 2-dimensionaler Simplizialkomplex,
 sodass gilt:
-∀x,y∈∂P: [x,y]⊆P
+∀x,y ∈∂P : [x,y] ⊆P
 Dann istχ(∂P) = 2.
 Beweis:
 1) Die Aussage ist richtig für den Tetraeder.
-2)O. B. d. A. sei 0∈PundP⊆B1(0). Projeziere ∂Pvon0aus auf∂B1(0) =S2.
-Erhalte Triangulierung von S2.
+2) O. B. d. A. sei0 ∈P und P ⊆B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2.
+Erhalte Triangulierung vonS2.
  2.3. SIMPLIZIALKOMPLEX
-(a)Die beiden markierten Dreiecke schneiden sich im
-Mittelpunkt und in einer Seite.(b)Die beiden markierten Dreiecke schneiden sich im
+(a) Die beiden markierten Dreiecke schneiden sich im
+Mittelpunkt und in einer Seite.
+(b) Die beiden markierten Dreiecke schneiden sich im
 Mittelpunkt und außen.
 Abbildung 2.11: Fehlerhafte Triangulierungen
 (a) Einfache Triangulierung (b) Minimale Triangulierung
 Abbildung 2.12: Triangulierungen des Torus
  2.3. SIMPLIZIALKOMPLEX
-3)SindP1undP2konvexe Polygone und T1,T2die zugehörigen Triangulierungen von
-S2, so gibt es eine Triangulierung T, die sowohl um T1als auch um T2Verfeinerung
+3) Sind P1 und P2 konvexe Polygone undT1,T2 die zugehörigen Triangulierungen von
+S2, so gibt es eine TriangulierungT, die sowohl umT1 als auch umT2 Verfeinerung
 ist (vgl. Abbildung 2.13).
 T1
 T2
 T
-Abbildung 2.13: Tist eine Triangulierung, die für T1undT2eine Verfeinerung ist.
-Nach Bemerkung 36 ist χ(∂P1) =χ(T1) =χ(T) =χ(T2) =χ(∂P2) = 2, weil o. B. d. A.
-P2ein Tetraeder ist.
+Abbildung 2.13:T ist eine Triangulierung, die fürT1 und T2 eine Verfeinerung ist.
+Nach Bemerkung 36 istχ(∂P1) = χ(T1) = χ(T) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A.
+P2 ein Tetraeder ist.
 Bemerkung 37 (Der Rand vom Rand ist 0)
-SeiKein endlicher Simplizialkomplex mit Knotenmenge Vund<eine Totalordnung auf V.
-SeiAndie Menge der n-Simplizes in K, d. h.
-An(K) :={σ∈K|dim(σ) =n}fürn= 0,...,d = dim(K)
-undCn(K)derR-Vektorraum mit Basis An(K), d. h.
-Cn(K) =
+Sei K ein endlicher Simplizialkomplex mit KnotenmengeV und < eine Totalordnung aufV.
+Sei An die Menge dern-Simplizes inK, d. h.
+An(K) := {σ∈K |dim(σ) = n} für n= 0,...,d = dim(K)
+und Cn(K) der R-Vektorraum mit BasisAn(K), d. h.
+Cn(K) =
+
 
-∑
-σ∈An(K)cσ·σ⏐⏐⏐⏐⏐⏐cσ∈R
+
+∑
+σ∈An(K)
+cσ ·σ
+⏐⏐⏐⏐⏐⏐
+cσ ∈R
+
 
 
-Seiσ= ∆(x0,...,xn)∈An(K), sodassx0<x 1<···<xn.
-Füri= 0,...,nsei∂iσ:= ∆(x0,..., ˆxi,...,xn)diei-te Seite von σunddσ=dnσ:=∑
-i=0(−1)i∂iσ∈Cn−1(K)unddn:Cn(K)→Cn−1(K)die dadurch definierte lineare
+Sei σ= ∆(x0,...,x n) ∈An(K), sodassx0 <x1 <··· <xn.
+Für i = 0,...,n sei ∂iσ := ∆(x0,..., ˆxi,...,x n) die i-te Seite vonσ und dσ = dnσ :=∑
+i=0(−1)i∂iσ ∈Cn−1(K) und dn : Cn(K) →Cn−1(K) die dadurch definierte lineare
 Abbildung.
-Dann gilt:dn−1◦dn= 0
-abc
+Dann gilt:dn−1 ◦dn = 0
+a b
+c
 σ
-e3e1 e2
+e3
+e1e2
 Abbildung 2.14: Simplizialkomplex mit Totalordnung
 Beispiel 29
-Seia<b<c . Dann gilt:
-d2σ=e1−e2+e3
-d1(e1−e2+e3) = (c−b)−(c−a) + (b−a)
+Sei a<b<c . Dann gilt:
+d2σ= e1 −e2 + e3
+d1(e1 −e2 + e3) = (c−b) −(c−a) + (b−a)
  2.3. SIMPLIZIALKOMPLEX
 = 0
-Seia<b<c<d . Dann gilt für Tetraeder:
-d3(∆(a,b,c,d )) = ∆(b,c,d )−∆(a,c,d ) + ∆(a,b,d )−∆(a,b,c ),wobei:
-d2( ∆(b,c,d )) = ∆(c,d)−∆(b,d) + ∆(b,c)
-d2(−∆(a,c,d )) =−∆(c,d) + ∆(a,d)−∆(a,c)
-d2( ∆(a,b,d )) = ∆(b,d)−∆(a,d) + ∆(a,b)
-d2(−∆(a,b,c )) =−∆(b,c) + ∆(a,c)−∆(a,b)
+Sei a<b<c<d . Dann gilt für Tetraeder:
+d3(∆(a,b,c,d )) = ∆(b,c,d ) −∆(a,c,d ) + ∆(a,b,d ) −∆(a,b,c ),wobei:
+d2( ∆( b,c,d )) = ∆( c,d)−∆(b,d) + ∆(b,c)
+d2(−∆(a,c,d )) = −∆(c,d) + ∆(a,d)−∆(a,c)
+d2( ∆( a,b,d )) = ∆( b,d)−∆(a,d) + ∆(a,b)
+d2(−∆(a,b,c )) = −∆(b,c) + ∆(a,c)−∆(a,b)
 ⇒d2(d3(∆(a,b,c,d ))) = 0
-Beweis: Seiσ∈An. Dann gilt:
-dn−1(dnσ) =dn−1(n∑
-i=0(−1)i∂iσ)
-=n∑
-i=0(−1)idn−1(∂iσ)
-=n∑
-i=0(−1)in−1∑
-j=0∂i(∂jσ)(−1)j
-=∑
-0≤i≤j≤n−1(−1)i+j∂j(∂i(σ)) +∑
-0≤j<i≤n(−1)i+j∂i−1(∂jσ)
+Beweis: Sei σ∈An. Dann gilt:
+dn−1(dnσ) = dn−1(
+n∑
+i=0
+(−1)i∂iσ)
+=
+n∑
+i=0
+(−1)idn−1(∂iσ)
+=
+n∑
+i=0
+(−1)i
+n−1∑
+j=0
+∂i(∂jσ)(−1)j
+=
+∑
+0≤i≤j≤n−1
+(−1)i+j∂j(∂i(σ)) +
+∑
+0≤j<i≤n
+(−1)i+j∂i−1(∂jσ)
 = 0
 weil jeder Summand aus der ersten Summe auch in der zweiten Summe vorkommt, aber mit
 umgekehrten Vorzeichen. ■
 Definition 41
-SeiKein Simplizialkomplex, Zn:=Kern (dn)⊆CnundBn:=Bild(dn+1)⊆Cn.
-a)Hn=Hn(K,R) :=Zn/Bnheißtn-teHomologiegruppe vonK.
-b)bn(K) := dim RHnheißtn-teBetti-Zahl vonK.
+Sei K ein Simplizialkomplex,Zn := Kern(dn) ⊆Cn und Bn := Bild(dn+1) ⊆Cn.
+a) Hn = Hn(K,R) := Zn/Bn heißtn-te Homologiegruppe von K.
+b) bn(K) := dimR Hn heißtn-te Betti-Zahl von K.
 Bemerkung 38
-Nach Bemerkung 37 ist Bn⊆Zn, denndn+1(C)∈Kern (dn)fürC∈Cn+1.
+Nach Bemerkung 37 istBn ⊆Zn, denndn+1(C) ∈Kern(dn) für C ∈Cn+1.
 Satz 2.2
-Für jeden endlichen Simplizialkomplex Kder Dimension dgilt:
+Für jeden endlichen SimplizialkomplexK der Dimensiond gilt:
 d∑
-k=0(−1)kbk(K) =d∑
-k=0(−1)kak(K) =χ(K)
+k=0
+(−1)kbk(K) =
+d∑
+k=0
+(−1)kak(K) = χ(K)
 Bemerkung 39
-Es gilt nicht ak=bk∀k∈N0.
+Es gilt nichtak = bk ∀k∈N0.
  2.3. SIMPLIZIALKOMPLEX
 Beweis:
-•Dimensionsformel für dn:an= dimZn+ dimBn−1fürn≥1
-•Dimensionsformel für Zn→Hn=Zn/Bn: dimZn=bn+ dimBn
-•dimZd=bd, dadimZd=bd+ dimBd, wobei dimBd= 0, daad+1= 0
-•a0−dimB0=b0, daa0−dimB0=a0−dimZ0+b0unda0=dimZ0, weila−1= 0
-⇒d∑
-k=0(−1)kak=a0+d∑
-k=1(−1)k(dimZk+ dimBk−1)
-=a0+d∑
-k=1(−1)kdimZk+d−1∑
-k=0(−1)k+1dimBk
-=a0+d∑
-k=1(−1)kdimZk−d−1∑
-k=0(−1)kdimBk
-=a0+d−1∑
-k=1(−1)kbk+ (−1)ddimZd
-=bd−dimB0
-=b0+d−1∑
-k=1(−1)kbk+ (−1)dbd
-=d∑
-k=0(−1)kbk
+•Dimensionsformel fürdn: an = dim Zn + dimBn−1 für n≥1
+•Dimensionsformel fürZn →Hn = Zn/Bn : dim Zn = bn + dimBn
+•dim Zd = bd, dadim Zd = bd + dimBd, wobeidim Bd = 0, daad+1 = 0
+•a0 −dim B0 = b0, daa0 −dim B0 = a0 −dim Z0 + b0 und a0 = dim Z0, weila−1 = 0
+⇒
+d∑
+k=0
+(−1)kak = a0 +
+d∑
+k=1
+(−1)k(dim Zk + dimBk−1)
+= a0 +
+d∑
+k=1
+(−1)kdim Zk +
+d−1∑
+k=0
+(−1)k+1 dim Bk
+= a0 +
+d∑
+k=1
+(−1)kdim Zk −
+d−1∑
+k=0
+(−1)kdim Bk
+= a0 +
+d−1∑
+k=1
+(−1)kbk + (−1)ddim Zd  
+=bd
+−dim B0
+= b0 +
+d−1∑
+k=1
+(−1)kbk + (−1)dbd
+=
+d∑
+k=0
+(−1)kbk
  2.3. SIMPLIZIALKOMPLEX
 Übungsaufgaben
 Aufgabe 7 (Zusammenhang)
-(a)Beweisen Sie, dass eine topologische Mannigfaltigkeit genau dann wegzusammenhängend
+(a) Beweisen Sie, dass eine topologische Mannigfaltigkeit genau dann wegzusammenhängend
  ist, wenn sie zusammenhängend ist
-(b)Betrachten Sie nun wie in Beispiel 20.8 den Raum X:= (R\{0})∪{01,02}versehen
-mit der dort definierten Topologie. Ist Xwegzusammenhängend?
+(b) Betrachten Sie nun wie in Beispiel 20.8 den RaumX := (R\{0 })∪{01,02 }versehen
+mit der dort definierten Topologie. IstX wegzusammenhängend?
 3 Fundamentalgruppe und Überlagerungen
 3.1 Homotopie von Wegen
-a bγ1
+a b
+γ1
 γ2
-(a)γ1undγ2sind homotop,
+(a) γ1 und γ2 sind homotop,
 da man sie „zueinander verschieben“
- kann.a bγ1
+ kann.
+a b
+γ1
 γ2
-(b)γ1undγ2sind wegen dem
+(b) γ1 und γ2 sind wegen dem
 Hindernis nicht homotop.
-Abbildung 3.1: Beispiele für Wege γ1undγ2
+Abbildung 3.1: Beispiele für Wegeγ1 und γ2
 Definition 42
-SeiXein topologischer Raum, a,b∈X,γ1,γ2:I→XWege vonanachb, d. h.γ1(0) =
-γ2(0) =a,γ1(1) =γ2(1) =b
-γ1undγ2heißenhomotop , wenn es eine stetige Abbildung H:I×I→Xmit
-H(t,0) =γ1(t)∀t∈I
-H(t,1) =γ2(t)∀t∈I
-undH(0,s) =aundH(1,s) =bfür alles∈Igibt. Dann schreibt man: γ1∼γ2
-HheißtHomotopie zwischenγ1undγ2.
+Sei X ein topologischer Raum,a,b ∈X, γ1,γ2 : I →X Wege vona nach b, d. h.γ1(0) =
+γ2(0) = a, γ1(1) = γ2(1) = b
+γ1 und γ2 heißenhomotop, wenn es eine stetige AbbildungH : I×I →X mit
+H(t,0) = γ1(t) ∀t∈I
+H(t,1) = γ2(t) ∀t∈I
+und H(0,s) = a und H(1,s) = b für alles∈I gibt. Dann schreibt man:γ1 ∼γ2
+H heißtHomotopie zwischen γ1 und γ2.
 Bemerkung 40
-SeiXein topologischer Raum, a,b∈X,γ1,γ2:I→XWege vonanachbundHeine
-Homotopie zwischen γ1undγ2.
+Sei X ein topologischer Raum,a,b ∈X, γ1,γ2 : I →X Wege vona nach b und H eine
+Homotopie zwischenγ1 und γ2.
 Dann gilt: Der Weg
-γs:I→X, γs(t) =H(t,s)
-ist Weg inXvonanachbfür jedess∈I.
-Beweis:Hist stetig, also ist H(t,s)insbesondere für jedes feste sstetig. DaH(0,s) =aund
-H(1,s) =bfür alles∈Iundγseine Abbildung von IaufXist, istγsein Weg inXvona
-nachbfür jedess∈I. ■
+γs : I →X, γ s(t) = H(t,s)
+ist Weg inX von a nach b für jedess∈I.
+Beweis: H ist stetig, also istH(t,s) insbesondere für jedes festes stetig. DaH(0,s) = a und
+H(1,s) = b für alles∈I und γs eine Abbildung vonI auf X ist, istγs ein Weg inX von a
+nach b für jedess∈I. ■
 Bemerkung 41
-Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege in Xvonanachb
+Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege inX von anach b
 definiert.
 Beweis:
  3.1. HOMOTOPIE VON WEGEN
-•reflexiv:H(t,s) =γ(t)für alle (t,s)∈I×I
-•symmetrisch: H′(t,s) =H(t,1−s)für alle (t,s)∈I×I
-•transitiv: Seien H′bzw.H′′Homotopien von γ1nachγ2bzw. vonγ2nachγ3.
-Dann seiH(t,s) :={
-H′(t,2s)falls0≤s≤1
+•reflexiv: H(t,s) = γ(t) für alle(t,s) ∈I×I
+•symmetrisch: H′(t,s) = H(t,1 −s) für alle(t,s) ∈I×I
+•transitiv: SeienH′bzw. H′′Homotopien vonγ1 nach γ2 bzw. vonγ2 nach γ3.
+Dann seiH(t,s) :=
+{
+H′(t,2s) falls 0 ≤s≤1
 2
-H′′(t,2s−1)falls1
-2≤s≤1
-⇒Hist stetig und Homotopie von γ1nachγ3.
+H′′(t,2s−1) falls 1
+2 ≤s≤1
+⇒H ist stetig und Homotopie vonγ1 nach γ3.
 ■
 Beispiel 30
-1) SeiX=S1.γ1undγ2aus Abbildung 3.3a nicht homotop.
-2) SeiX=T2.γ1,γ2undγ3aus Abbildung 3.3b sind paarweise nicht homotop.
-3) SeiX=R2unda=b= (0,0).
-Je zwei Wege im R2mit Anfangs- und Endpunkt (0,0)sind homotop.
-Abbildung 3.2: Zwei Wege im R2mit Anfangs- und Endpunkt (0,0)
-Seiγ0:I→R2der konstante Weg γ0(t) = (0,0)∀t∈I. Seiγ(0) =γ(1) = (0,0).
-H(t,s) := (1−s)γ(t)ist stetig,H(t,0) =γ(t)∀t∈IundH(t,1) = (0,0)∀t∈I.
+1) Sei X = S1. γ1 und γ2 aus Abbildung 3.3a nicht homotop.
+2) Sei X = T2. γ1,γ2 und γ3 aus Abbildung 3.3b sind paarweise nicht homotop.
+3) Sei X = R2 und a= b= (0,0).
+Je zwei Wege imR2 mit Anfangs- und Endpunkt(0,0) sind homotop.
+Abbildung 3.2: Zwei Wege imR2 mit Anfangs- und Endpunkt(0,0)
+Sei γ0 : I →R2 der konstante Wegγ0(t) = (0,0) ∀t∈I. Seiγ(0) = γ(1) = (0,0).
+H(t,s) := (1 −s)γ(t) ist stetig,H(t,0) = γ(t) ∀t∈I und H(t,1) = (0,0) ∀t∈I.
 Bemerkung 42
-SeiXein topologischer Raum, γ:I→Xein Weg und ϕ:I→Istetig mitϕ(0) = 0,
-ϕ(1) = 1. Dann sind γundγ◦ϕhomotop.
-Beweis: SeiH(t,s) =γ((1−s)t+s·ϕ(t)).
-Dann istHstetig,H(t,0) =γ(t), H (t,1) =γ(ϕ(t)), H (0,s) =γ(0)undH(1,s) =
-γ(1−s+s) =γ(1)
-⇒Hist Homotopie. ■
+Sei X ein topologischer Raum,γ : I →X ein Weg undϕ : I →I stetig mit ϕ(0) = 0 ,
+ϕ(1) = 1. Dann sindγ und γ◦ϕ homotop.
+Beweis: Sei H(t,s) = γ((1 −s)t+ s·ϕ(t)).
+Dann ist H stetig, H(t,0) = γ(t), H (t,1) = γ(ϕ(t)), H (0,s) = γ(0) und H(1,s) =
+γ(1 −s+ s) = γ(1)
+⇒H ist Homotopie. ■
  3.1. HOMOTOPIE VON WEGEN
-ab
-γ1 γ2
+a
+b
+γ1γ2
 (a) Kreis mit zwei Wegen
-ab (b) Torus mit drei Wegen
+a
+b (b) Torus mit drei Wegen
 Abbildung 3.3: Beispiele für (nicht)-Homotopie von Wegen
 Definition 43
-Seienγ1,γ2Wege inXmitγ1(1) =γ2(0). Dann ist
-γ(t) ={
-γ1(2t)falls0≤t<1
+Seien γ1,γ2 Wege inX mit γ1(1) = γ2(0). Dann ist
+γ(t) =
+{
+γ1(2t) falls 0 ≤t< 1
 2
-γ2(2t−1)falls1
-2≤t≤1
-ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ=γ1∗γ2.
+γ2(2t−1) falls 1
+2 ≤t≤1
+ein Weg inX. Er heißtzusammengesetzter Wegund man schreibtγ = γ1 ∗γ2.
 Bemerkung 43
 Das Zusammensetzen von Wegen ist nur bis auf Homotopie assoziativ, d. h.:
-γ1∗(γ2∗γ3)̸= (γ1∗γ2)∗γ3
-γ1∗(γ2∗γ3)∼(γ1∗γ2)∗γ3
-mitγ1(1) =γ2(0)undγ2(1) =γ3(0).
+γ1 ∗(γ2 ∗γ3) ̸= (γ1 ∗γ2) ∗γ3
+γ1 ∗(γ2 ∗γ3) ∼(γ1 ∗γ2) ∗γ3
+mit γ1(1) = γ2(0) und γ2(1) = γ3(0).
 γ1 γ2 γ3
 0 1/2 3/4 1
-(a)γ1∗(γ2∗γ3)
+(a) γ1 ∗(γ2 ∗γ3)
 γ1 γ2 γ3
 0 1/4 1/2 1
-(b)(γ1∗γ2)∗γ3
+(b) (γ1 ∗γ2) ∗γ3
 Abbildung 3.4: Das Zusammensetzen von Wegen ist nicht assoziativ
 Beweis: Das Zusammensetzen von Wegen ist wegen Bemerkung 42 bis auf Homotopie assoziativ.
 Verwende dazu
-ϕ(t) =
+ϕ(t) =
+
 
-1
-2tfalls0≤t<1
+
+1
+2 t falls 0 ≤t< 1
 2
 t−1
-4falls1
-2≤t<3
+4 falls 1
+2 ≤t< 3
 4
-2t−1falls3
-4≤t≤1
+2t−1 falls 3
+4 ≤t≤1
 Bemerkung 44
-SeiXein topologischer Raum, a,b,c∈X,γ1,γ′
-1Wege vonanachbundγ2,γ′
-2Wege vonb
-nachc.
-Sindγ1∼γ′
-1undγ2∼γ′
-2, so istγ1∗γ2∼γ′
-1∗γ′
+Sei X ein topologischer Raum,a,b,c ∈X, γ1,γ′
+1 Wege vona nach b und γ2,γ′
+2 Wege vonb
+nach c.
+Sind γ1 ∼γ′
+1 und γ2 ∼γ′
+2, so istγ1 ∗γ2 ∼γ′
+1 ∗γ′
 2.
  3.2. FUNDAMENTALGRUPPE
-γ1 γ′
+γ1γ′
 1
 a
-bc
+b
+c
 γ′
-2γ2
+2
+γ2
 Abbildung 3.5: Situation aus Bemerkung 44
 .
-Beweis: SeiHieine Homotopie zwischen γiundγ′
-i,i= 1,2.
+Beweis: Sei Hi eine Homotopie zwischenγi und γ′
+i, i= 1,2.
 Dann ist
-H(t,s) :={
-H1(2t,s)falls0≤t≤1
-2∀s∈I
-H2(2t−1,s)falls1
-2≤t≤1
-eine Homotopie zwischen γ1∗γ2undγ′
-1∗γ′
+H(t,s) :=
+{
+H1(2t,s) falls 0 ≤t≤1
+2 ∀s∈I
+H2(2t−1,s) falls 1
+2 ≤t≤1
+eine Homotopie zwischenγ1 ∗γ2 und γ′
+1 ∗γ′
 2.
 Eine spezielle Homotopieäquivalenz sind sog. Deformationsretraktionen:
 Definition 44
-SeiXein topologischer Raum, A⊆X,r:X→Aeine stetige Abbildung und ι= (idX)|A.
-a)ι:A→Xmitι(x) =xheißt dieInklusionsabbildung und man schreibt: ι:A↪→X.
-b)rheißtRetraktion , wennr|A=idAist.
-c)AheißtDeformationsretrakt , wenn es eine Retraktion raufAmitι◦r∼idXgibt.
+Sei X ein topologischer Raum,A⊆X, r: X →A eine stetige Abbildung undι= (idX)|A.
+a) ι: A→X mit ι(x) = xheißt dieInklusionsabbildungund man schreibt:ι: A↪→X.
+b) r heißtRetraktion, wennr|A = idA ist.
+c) AheißtDeformationsretrakt, wenn es eine Retraktionr auf Amit ι◦r∼idX gibt.
 Beispiel 31 (Zylinder auf Kreis)
-SeiX=S1×Rein topologischer Raum und
-r:S1×R→S1×{0}∼=S1
+Sei X = S1 ×R ein topologischer Raum und
+r: S1 ×R →S1 ×{0 }∼= S1
 mit
 r(x,y) := (x,0)
-eine Abbildung. rist eine Retraktion, da r|S1∼=idS1.
-ι◦r:S1×R→S1×R
-(x,y)↦→(x,0)
-H: (S1×R)×I→S1×R
-(x,y,t )↦→(x,ty)
+eine Abbildung.r ist eine Retraktion, dar|S1 ∼= idS1 .
+ι◦r: S1 ×R →S1 ×R
+(x,y) ↦→(x,0)
+H : (S1 ×R) ×I →S1 ×R
+(x,y,t ) ↦→(x,ty)
 3.2 Fundamentalgruppe
-Für einen Weg γsei[γ]seineHomotopieklasse .
+Für einen Wegγ sei [γ] seine Homotopieklasse.
 Definition 45
-SeiXein topologischer Raum und x∈X. Sei außerdem
-π1(X,x) :={[γ]|γist Weg inXmitγ(0) =γ(1) =x}
+Sei X ein topologischer Raum undx∈X. Sei außerdem
+π1(X,x) := {[γ] |γ ist Weg inX mit γ(0) = γ(1) = x}
  3.2. FUNDAMENTALGRUPPE
-Durch [γ1]∗G[γ2] := [γ1∗γ2]wirdπ1(X,x)zu einer Gruppe. Diese Gruppe heißt Fundamentalgruppe
- vonXim Basispunkt x.
+Durch [γ1] ∗G [γ2] := [γ1 ∗γ2] wird π1(X,x) zu einer Gruppe. Diese Gruppe heißtFundamentalgruppe
+ von X im Basispunktx.
 Bemerkung 45
-ImR2gibt es nur eine Homotopieklasse.
+Im R2 gibt es nur eine Homotopieklasse.
 Beweis: (Fundamentalgruppe ist eine Gruppe)
-a) Abgeschlossenheit folgt direkt aus der Definition von ∗G
+a) Abgeschlossenheit folgt direkt aus der Definition von∗G
 b) Assoziativität folgt aus Bemerkung 43
-c) Neutrales Element e= [γ0],γ0(t) =x∀t∈I.e∗[γ] = [γ] = [γ]∗e, daγ0∗γ∼γ
-d) Inverses Element [γ]−1= [γ] = [γ(1−t)], dennγ∗γ∼γ0∼γ∗γ
+c) Neutrales Elemente= [γ0],γ0(t) = x ∀t∈I. e∗[γ] = [γ] = [γ] ∗e, daγ0 ∗γ ∼γ
+d) Inverses Element [γ]−1 = [γ] = [γ(1 −t)], dennγ∗γ ∼γ0 ∼γ∗γ
 Beispiel 32
-1)S1={z∈C||z|= 1}={
-(cosϕ,sinϕ)∈R2⏐⏐0≤ϕ≤2π}
-π1(S1,1) ={
-[γk]⏐⏐k∈Z}∼=Z. Dabei ist γ(t) =e2πit=cos(2πt) +isin(2πt)und
-γk:=γ∗···∗γ
-kmal
-[γk]↦→kist ein Isomorphismus.
-2)π1(R2,0) =π1(R2,x) ={e}für jedesx∈R2
-3)π1(Rn,x) ={e}für jedesx∈Rn
-4)G⊆Rnheißtsternförmig bzgl.x∈G, wenn für jedes y∈Gauch die Strecke
-[x,y]⊆Gist.
-Für jedes sternförmige G⊆Rnistπ1(G,x) ={e}
+1) S1 = {z∈C ||z|= 1 }=
+{
+(cos ϕ,sin ϕ) ∈R2 ⏐⏐0 ≤ϕ≤2π
+}
+π1(S1,1) =
+{
+[γk]
+⏐⏐k∈Z
+}∼= Z. Dabei istγ(t) = e2πit = cos(2πt) + isin(2πt) und
+γk := γ∗···∗ γ  
+k mal
+[γk] ↦→k ist ein Isomorphismus.
+2) π1(R2,0) = π1(R2,x) = {e}für jedesx∈R2
+3) π1(Rn,x) = {e}für jedesx∈Rn
+4) G ⊆Rn heißtsternförmig bzgl. x ∈G, wenn für jedesy ∈G auch die Strecke
+[x,y] ⊆G ist.
+Für jedes sternförmigeG⊆Rn ist π1(G,x) = {e}
 x
 Abbildung 3.6: Sternförmiges Gebiet
 .
-5)π1(S2,x0) ={e}, da im R2alle Wege homotop zu {e}sind. Mithilfe der stereographischen
- Projektion kann von S2auf den R2abgebildet werden.
+5) π1(S2,x0) = {e}, da imR2 alle Wege homotop zu{e}sind. Mithilfe der stereographischen
+ Projektion kann vonS2 auf denR2 abgebildet werden.
 Dieses Argument funktioniert nicht mehr bei flächenfüllenden Wegen, d. h. wenn
-γ:I→S2surjektiv ist.
+γ : I →S2 surjektiv ist.
 Bemerkung 46
-SeiXein topologischer Raum, a,b∈X,δ:I→Xein Weg von anachb.
+Sei X ein topologischer Raum,a,b ∈X, δ: I →X ein Weg vona nach b.
 Dann ist die Abbildung
-α:π1(X,a)→π1(X,b) [γ]↦→[δ∗γ∗δ]
+α: π1(X,a) →π1(X,b) [ γ] ↦→[δ∗γ∗δ]
 ein Gruppenisomorphismus.
  3.2. FUNDAMENTALGRUPPE
-a bγ
+a b
+γ
 δ
 Abbildung 3.7: Situation aus Bemerkung 46
 .
 Beweis:
-α([γ1]∗[γ2]) = [δ∗(γ1∗γ2)∗δ]
-= [δ∗γ1∗δ∗δ∗γ2∗δ]
-= [δ∗γ1∗δ]∗[δ∗γ2∗δ]
-=α([γ1])∗α([γ2])
+α([γ1] ∗[γ2]) = [δ∗(γ1 ∗γ2) ∗δ]
+= [δ∗γ1 ∗δ∗δ∗γ2 ∗δ]
+= [δ∗γ1 ∗δ] ∗[δ∗γ2 ∗δ]
+= α([γ1]) ∗α([γ2])
 Definition 46
-Ein wegzusammenhängender topologischer Raum Xheißteinfach zusammenhängend ,
-wennπ1(X,x) ={e}für einx∈X.
-Wennπ1(X,x) ={e}für einx∈Xgilt, dann wegen Bemerkung 46 sogar für alle x∈X.
+Ein wegzusammenhängender topologischer RaumX heißteinfach zusammenhängend,
+wenn π1(X,x) = {e}für einx∈X.
+Wenn π1(X,x) = {e}für einx∈X gilt, dann wegen Bemerkung 46 sogar für allex∈X.
 Bemerkung 47
-Es seienX,Ytopologische Räume, f:X→Yeine stetige Abbildung, x∈X,y :=f(x)∈Y.
-a)Dann ist die Abbildung f∗:π1(X,x)→π1(Y,y),[γ]→[f◦γ]ein Gruppenhomomorphismus.
+Es seienX,Y topologische Räume,f : X →Y eine stetige Abbildung,x∈X,y := f(x) ∈Y.
+a) Dann ist die Abbildungf∗: π1(X,x) →π1(Y,y),[γ] →[f ◦γ] ein Gruppenhomomorphismus.
 
-b)IstZein weiterer topologischer Raum und g:Y→Zeine stetige Abbildung z:=g(y).
-Dann ist (g◦f)∗=g∗◦f∗:π1(X,x)→π1(Z,z)
+b) Ist Z ein weiterer topologischer Raum undg: Y →Z eine stetige Abbildungz:= g(y).
+Dann ist(g◦f)∗= g∗◦f∗: π1(X,x) →π1(Z,z)
 Beweis:
-a)f∗ist wohldefiniert: Seien γ1,γ2homotope Wege von x. z.Z.:f◦γ1∼f◦γ2: Nach
-Voraussetzung gibt es stetige Abbildungen H:I×I→Xmit
-H(t,0) =γ1(t),
-H(t,1) =γ2(t),
-H(0,s) =H(1,s) =x.
-Dann istf◦H:I×I→Ystetig mit (f◦H)(t,0) =f(H(t,0)) =f(γ1(t)) = (f◦γ1)(t)
-etc.⇒f◦γ1∼f◦γ2.
-f∗([γ1]∗[γ2]) = [f◦(γ1∗γ2)] = [(f◦γ1)]∗[(f◦γ2)] =f∗([γ1])∗f∗([γ2])
-b)(g◦f)∗([γ]) = [(g◦f)◦γ] = [g◦(f◦γ)] =g∗([f◦γ]) =g∗(f∗([γ])) = (g∗◦f∗)([γ])
+a) f∗ ist wohldefiniert: Seienγ1,γ2 homotope Wege vonx. z.Z.:f ◦γ1 ∼f ◦γ2: Nach
+Voraussetzung gibt es stetige AbbildungenH : I×I →X mit
+H(t,0) = γ1(t),
+H(t,1) = γ2(t),
+H(0,s) = H(1,s) = x.
+Dann istf◦H : I×I →Y stetig mit(f◦H)(t,0) = f(H(t,0)) = f(γ1(t)) = (f◦γ1)(t)
+etc. ⇒f ◦γ1 ∼f ◦γ2.
+f∗([γ1] ∗[γ2]) = [f ◦(γ1 ∗γ2)] = [(f ◦γ1)] ∗[(f ◦γ2)] = f∗([γ1]) ∗f∗([γ2])
+b) (g◦f)∗([γ]) = [(g◦f) ◦γ] = [g◦(f ◦γ)] = g∗([f ◦γ]) = g∗(f∗([γ])) = (g∗◦f∗)([γ])
 Beispiel 33
-1)f:S1↪→R2ist injektiv, aber f∗:π1(S1,1)∼=Z→π1(R2,1) ={e}ist nicht injektiv.
-2)f:R→S1,t↦→(cos2πt,sin2πt)istsurjektiv,aber f∗:π1(R,0) ={e}→π1(S1,1)∼=
-Zist nicht surjektiv.
+1) f : S1 ↪→R2 ist injektiv, aberf∗: π1(S1,1) ∼= Z →π1(R2,1) = {e}ist nicht injektiv.
+2) f : R →S1,t ↦→(cos 2πt,sin 2πt) ist surjektiv, aberf∗: π1(R,0) = {e}→ π1(S1,1) ∼=
+Z ist nicht surjektiv.
  3.2. FUNDAMENTALGRUPPE
 Bemerkung 48
-Seif:X→Yein Homöomorphismus zwischen topologischen Räumen X,Y. Dann gilt:
-f∗:π1(X,x)→π1(Y,f(x))
-ist ein Isomorphismus für jedes x∈X.
-Beweis: Seig:Y→Xdie Umkehrabbildung, d. h. gist stetig und f◦g=idY,g◦f=idX
-⇒f∗◦g∗= (f◦g)∗= (idY)∗=idπ1(Y,f(X)undg∗◦f∗=idπ1(X,x).
+Sei f : X →Y ein Homöomorphismus zwischen topologischen RäumenX,Y . Dann gilt:
+f∗: π1(X,x) →π1(Y,f (x))
+ist ein Isomorphismus für jedesx∈X.
+Beweis: Sei g: Y →X die Umkehrabbildung, d. h.g ist stetig undf ◦g= idY, g◦f = idX
+⇒f∗◦g∗= (f ◦g)∗= (idY)∗= idπ1(Y,f(X) und g∗◦f∗= idπ1(X,x).
 Definition 47
-SeienX,Ytopologische Räume, x0∈X,y 0∈Y,f,g :X→Ystetig mitf(x0) =y0=g(x0).
-fundgheißenhomotop (f∼g), wenn es eine stetige Abbildung H:X×I→Ymit
-H(x,0) =f(x)∀x∈X
-H(x,1) =g(x)∀x∈X
-H(x0,s) =y0∀s∈I
+Seien X,Y topologische Räume,x0 ∈X,y0 ∈Y,f,g : X →Y stetig mitf(x0) = y0 = g(x0).
+f und g heißenhomotop (f ∼g), wenn es eine stetige AbbildungH : X×I →Y mit
+H(x,0) = f(x) ∀x∈X
+H(x,1) = g(x) ∀x∈X
+H(x0,s) = y0 ∀s∈I
 gibt.
 Bemerkung 49
-Sindfundghomotop, so ist f∗=g∗:π1(X,x 0)→π1(Y,y0).
-Beweis: Seiγein geschlossener Weg in Xumx0, d. h. [γ]∈π1(X,x 0).
-Z. z.:f◦γ∼g◦γ
-Sei dazuHγ:I×I→Y,(t,s)↦→H(γ(t),s). Dann gilt:
-Hγ(t,0) =H(γ(t),0) = (f◦γ)(t)∀t∈I
-Hγ(1,s) =H(γ(1),s) =H(x0,s) =y0∀s∈I
-Hγ(t,1) =H(γ(t),1) =g(γ(t))∀t∈I
+Sind f und g homotop, so istf∗= g∗: π1(X,x0) →π1(Y,y0).
+Beweis: Sei γ ein geschlossener Weg inX um x0, d. h.[γ] ∈π1(X,x0).
+Z. z.:f ◦γ ∼g◦γ
+Sei dazuHγ : I×I →Y,(t,s) ↦→H(γ(t),s). Dann gilt:
+Hγ(t,0) = H(γ(t),0) = (f ◦γ)(t) ∀t∈I
+Hγ(1,s) = H(γ(1),s) = H(x0,s) = y0 ∀s∈I
+Hγ(t,1) = H(γ(t),1) = g(γ(t)) ∀t∈I
 Beispiel 34
-f:X→Y,g:Y→Xmitg◦f∼idX, f◦g∼idY
-⇒f∗ist Isomorphismus. Konkret: f:R2→{0}, g:{0}→R2
-⇒f◦g=id{0},g◦f:R2→R2,x↦→0für allex.
-g◦f∼idR2mit Homotopie: H:R2×I→R2,H(x,s) = (1−s)x(stetig!)
-⇒H(x,0) =x=idR2(x),H(x,1) = 0,H(0,s) = 0∀s∈I.
+f : X →Y,g : Y →X mit g◦f ∼idX, f◦g∼idY
+⇒f∗ist Isomorphismus. Konkret:f : R2 →{0 }, g: {0 }→ R2
+⇒f ◦g= id{0 }, g◦f : R2 →R2, x↦→0 für allex.
+g◦f ∼idR2 mit Homotopie:H : R2 ×I →R2,H(x,s) = (1 −s)x (stetig!)
+⇒H(x,0) = x= idR2 (x), H(x,1) = 0, H(0,s) = 0 ∀s∈I.
 Satz 3.1 (Satz von Seifert und van Kampen „light“)
-SeiXein topologischer Raum, U,V⊆Xoffen mitU∪V=XundU∩Vwegzusammenhängend.
+Sei X ein topologischer Raum,U,V ⊆X offen mitU ∪V = X und U ∩V wegzusammenhängend.
 
-Dann wird π1(X,x)fürx∈U∩Verzeugt von geschlossenen Wegen um x, die ganz in
-Uoder ganz in Vverlaufen.
+Dann wirdπ1(X,x) für x∈U ∩V erzeugt von geschlossenen Wegen umx, die ganz in
+U oder ganz inV verlaufen.
  3.3. ÜBERLAGERUNGEN
-Beweis: Seiγ:I→Xein geschlossener Weg um x. Überdecke Imit endlich vielen offenen
-Intervallen I1,I2,...,In, die ganz in γ−1(U)oder ganz in γ−1(V)liegen.
-O. B. d. A. sei γ(I1)⊆U,γ(I2)⊆V, etc.
-Wähleti∈Ii∩Ii+1, alsoγ(ti)∈U∩V. SeiσiWeg inU∩Vvonx0nachγ(ti)⇒γist
+Beweis: Sei γ : I →X ein geschlossener Weg umx. ÜberdeckeI mit endlich vielen offenen
+IntervallenI1,I2,...,I n, die ganz inγ−1(U) oder ganz inγ−1(V) liegen.
+O. B. d. A. seiγ(I1) ⊆U,γ(I2) ⊆V, etc.
+Wähle ti ∈Ii ∩Ii+1, alsoγ(ti) ∈U ∩V. Seiσi Weg inU ∩V von x0 nach γ(ti) ⇒γ ist
 homotop zu
-γ1∗σ1
-inU∗σ1∗γ2∗σ2
-inV∗···∗σn−1∗γ2mitγi:=γ|Ii
+γ1 ∗σ1  
+in U
+∗σ1 ∗γ2 ∗σ2  
+in V
+∗···∗ σn−1 ∗γ2 mit γi := γ|Ii
 a b
 x
-Abbildung 3.8: Topologischer Raum X
+Abbildung 3.8: Topologischer RaumX
 Beispiel 35 (Satz von Seifert und van Kampen)
-1)SeiXwie in Abbildung 3.8. π1(X,x)wird „frei“ erzeugt von aundb, weilπ1(U,x) =
-⟨a⟩∼=Z,π1(V,x) =⟨b⟩∼=Z, insbesondere ist a∗bnicht homotop zu b∗a.
-2) Torus:π1(T2,X)wird erzeugt von aundb.
-VUa
+1) Sei X wie in Abbildung 3.8.π1(X,x) wird „frei“ erzeugt vona und b, weilπ1(U,x) =
+⟨a⟩∼= Z,π1(V,x) = ⟨b⟩∼= Z, insbesondere ista∗b nicht homotop zub∗a.
+2) Torus: π1(T2,X) wird erzeugt vona und b.
+V
+U
+a
 b
-Va b
-Abbildung 3.9: a∗b=b∗a⇔a∗b∗a∗b∼e
+V
+a b
+Abbildung 3.9:a∗b= b∗a⇔a∗b∗a∗b∼e
 3.3 Überlagerungen
 Definition 48
-Es seienX,Yzusammenhängende topologische Räume und p:Y→Xeine stetige Abbildung.
+Es seienX,Y zusammenhängende topologische Räume undp: Y →X eine stetige Abbildung.
 
-pheißtÜberlagerung , wenn jedes x∈Xeine offene Umgebung U=U(x)⊆Xbesitzt,
-sodassp−1(U)disjunkte Vereinigung von offenen Teilmengen Vj⊆Yist(j∈I)und
-p|Vj:Vj→Uein Homöomorphismus ist.
-|I|heißtGrad der Überlagerung pund man schreibt:
-degp:=|I|
+p heißtÜberlagerung, wenn jedesx∈X eine offene UmgebungU = U(x) ⊆X besitzt,
+sodass p−1(U) disjunkte Vereinigung von offenen TeilmengenVj ⊆Y ist (j ∈I) und
+p|Vj : Vj →U ein Homöomorphismus ist.
+|I|heißtGrad der Überlagerungp und man schreibt:
+deg p:= |I|
  3.3. ÜBERLAGERUNGEN
-Abbildung 3.10: R→S1,
+Abbildung 3.10:R →S1,
 t↦→(cos 2πt,sin 2πt)
 Beispiel 36
 1) siehe Abbildung 3.10
 2) siehe Abbildung 3.11
-3)Rn→Tn=Rn/Zn
-4)Sn→Pn(R)
-5)S1→S1,z↦→z2, siehe Abbildung 3.12
-0 1 2 3 4 5 60123456
+3) Rn →Tn = Rn/Zn
+4) Sn →Pn(R)
+5) S1 →S1, z↦→z2, siehe Abbildung 3.12
+0 1 2 3 4 5 60
+1
+2
+3
+4
+5
+6
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
+*
 *
-******
-******
-******
-******
-******
-******
 −−−→
-Abbildung 3.11: R2→T2=R2/Z2
+Abbildung 3.11:R2 →T2 = R2/Z2
 Bemerkung 50
 Überlagerungen sind surjektiv.
-Beweis: Seip:Y→Xeine Überlagerung und x∈Xbeliebig. Dann existiert eine offene
-Umgebung U(x)⊆Xund offene Teilmengen Vj⊆Xmitp−1(U) =˙⋃Vjundp|Vj:Vj→U
+Beweis: Sei p : Y →X eine Überlagerung undx ∈X beliebig. Dann existiert eine offene
+Umgebung U(x) ⊆X und offene TeilmengenVj ⊆X mit p−1(U) = ˙⋃Vj und p|Vj : Vj →U
 ist Homöomorphismus.
-D. h. es existiert ein y∈Vj, so dassp|Vj(y) =x. Dax∈Xbeliebig war und ein y∈Y
-existiert, mit p(y) =x, istpsurjektiv. ■
+D. h. es existiert einy ∈Vj, so dassp|Vj(y) = x. Dax ∈X beliebig war und einy ∈Y
+existiert, mitp(y) = x, istp surjektiv. ■
  3.3. ÜBERLAGERUNGEN
-1i
-zz2
-ϕϕz2
-Abbildung 3.12: t↦→(cos 4πt,sin 4πt)
+1
+i
+z
+z2
+ϕϕ z2
+Abbildung 3.12:t↦→(cos 4πt,sin 4πt)
 Definition 49
-Seien (X,TX),(Y,TY)topologische Räume und f:X→Yeine Abbildung.
-fheißtoffen :⇔∀U∈TX:f(U)∈TY.
+Seien (X,TX),(Y,TY) topologische Räume undf : X →Y eine Abbildung.
+f heißtoffen :⇔∀U ∈TX : f(U) ∈TY.
 Beispiel 37 (Offene und stetige Abbildungen)
-SeiXein topologischer Raum und seien fi:R→Rmiti∈{1,2,3}undg:R→S1=
-{z∈C|∥z∥= 1}Abbildungen.
-1)f1:=idRist eine offene und stetige Abbildung.
-2)g(x) :=e2πixist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
-3)f2(x) := 42ist eine stetige, aber keine offene Abbildung.
-4)f3(x) :={
-0fallsx∈Q
-42fallsx∈R\Q
+Sei X ein topologischer Raum und seienfi : R →R mit i∈{ 1,2,3 }und g : R →S1 =
+{z∈C |∥z∥= 1 }Abbildungen.
+1) f1 := idR ist eine offene und stetige Abbildung.
+2) g(x) := e2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
+3) f2(x) := 42 ist eine stetige, aber keine offene Abbildung.
+4) f3(x) :=
+{
+0 falls x∈Q
+42 falls x∈R \Q
 ist weder stetig noch offen.
 Bemerkung 51
 Überlagerungen sind offene Abbildungen.
-Beweis: Seiy∈Vundx∈p(V), sodassx=p(y)gilt. Sei weiter U=Uxeine offene Umgebung
-vonxwie in Definition 48 und Vjdie Komponente von p−1(U), dieyenthält.
-Dann istV∩Vjoffene Umgebung von y.
-⇒p(V∩Vj)ist offen inp(Vj), also auch offen in X. Außerdem ist p(y) =x∈p(V∩Vj)und
-p(V∩Vj)⊆p(V).
-⇒p(V)ist offen.
+Beweis: Sei y∈V und x∈p(V), sodassx= p(y) gilt. Sei weiterU = Ux eine offene Umgebung
+von x wie in Definition 48 undVj die Komponente vonp−1(U), diey enthält.
+Dann istV ∩Vj offene Umgebung vony.
+⇒p(V ∩Vj) ist offen inp(Vj), also auch offen inX. Außerdem istp(y) = x∈p(V ∩Vj) und
+p(V ∩Vj) ⊆p(V).
+⇒p(V) ist offen.
 Definition 50
-SeiXein topologischer Raum und M⊆X.
-Mheißtdiskret inX, wennMinXkeinen Häufungspunkt hat.
+Sei X ein topologischer Raum undM ⊆X.
+M heißtdiskret in X, wennM in X keinen Häufungspunkt hat.
 Bemerkung 52
-Seip:Y→XÜberlagerung, x∈X.
-a)Xhausdorffsch⇒Yhausdorffsch
-b)p−1(x)ist diskret in Yfür jedesx∈X.
+Sei p: Y →X Überlagerung, x∈X.
+a) X hausdorffsch ⇒Y hausdorffsch
+b) p−1(x) ist diskret inY für jedesx∈X.
 Beweis:
-a) Seieny1,y2∈Y.
-1. Fall:p(y1) =p(y2) =x.
+a) Seien y1,y2 ∈Y.
+1. Fall: p(y1) = p(y2) = x.
  3.3. ÜBERLAGERUNGEN
-SeiUUmgebung von xwie in Definition 48, Vj1bzw.Vj2die Komponente von p−1(U),
-diey1bzw.y2enthält.
-Dann istVj1̸=Vj2, weil beide ein Element aus p−1(x)enthalten.
-⇒Vj1∩Vj2=∅nach Voraussetzung.
-2. Fall:p(y1)̸=p(y2).
-Dann seien U1undU2disjunkte Umgebungen von p(y1)undp(y2).
-⇒p−1(U1)undp−1(U2)sind disjunkte Umgebungen von y1undy2.
-b) Seix∈Xbeliebig, aber fest.
-Zu zeigen :∀yi∈p−1(x) :∃Vi∈TYmityi∈Vi, sodass gilt: i̸=j⇒Vi∩Vj=∅.
-DieViexistieren wegen der Definition einer Überlagerung: pheißt Überlagerung
-:⇔∀x∈X∃U=U(x)∈TX:p−1(U) =˙⋃
-Vi∈TYViundp|Viist Homöomorphismus.
-⇒(p|Vi)−1(x) ={yi}
-⇒Alleyiliegen diskret in Y, da Häufungspunkte unendlich viele Elemente in jeder
+Sei U Umgebung vonxwie in Definition 48,Vj1 bzw. Vj2 die Komponente vonp−1(U),
+die y1 bzw. y2 enthält.
+Dann istVj1 ̸= Vj2 , weil beide ein Element ausp−1(x) enthalten.
+⇒Vj1 ∩Vj2 = ∅nach Voraussetzung.
+2. Fall: p(y1) ̸= p(y2).
+Dann seienU1 und U2 disjunkte Umgebungen vonp(y1) und p(y2).
+⇒p−1(U1) und p−1(U2) sind disjunkte Umgebungen vony1 und y2.
+b) Sei x∈X beliebig, aber fest.
+Zu zeigen: ∀yi ∈p−1(x) : ∃Vi ∈TY mit yi ∈Vi, sodass gilt:i̸= j ⇒Vi ∩Vj = ∅.
+Die Vi existieren wegen der Definition einer Überlagerung: p heißt Überlagerung
+:⇔∀x∈X∃U = U(x) ∈TX : p−1(U) = ˙⋃
+Vi∈TY Vi und p|Vi ist Homöomorphismus.
+⇒(p|Vi)−1(x) = {yi }
+⇒Alle yi liegen diskret inY, da Häufungspunkte unendlich viele Elemente in jeder
 Umgebung benötigen. ■
 Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
-Seip:Y→XÜberlagerung. Dann gilt:
-∀x1,x2∈X:|p−1(x1)|=|p−1(x2)|
-Hinweis:|p−1(x1)|=∞ist erlaubt!
-Beweis: SeiUUmgebung von x1wie in Definition 48, x∈U. Dann enthält jedes Vjmitj∈I
-genau ein Element von p−1(x).
-⇒|p−1(x)|ist konstant für x∈U
-Xzhgd.= = = =⇒|p−1(x)|ist konstant für x∈X.
+Sei p: Y →X Überlagerung. Dann gilt:
+∀x1,x2 ∈X : |p−1(x1)|= |p−1(x2)|
+Hinweis: |p−1(x1)|= ∞ist erlaubt!
+Beweis: Sei U Umgebung vonx1 wie in Definition 48,x∈U. Dann enthält jedesVj mit j ∈I
+genau ein Element vonp−1(x).
+⇒|p−1(x)|ist konstant fürx∈U
+X zhgd.
+= = = =⇒|p−1(x)|ist konstant fürx∈X.
 Definition 51
-Es seienX,Y,Ztopologische Räume, p:Y→Xeine Überlagerung und f:Z→Xstetig.
-Eine stetige Abbildung ˜f:Z→YheißtLiftung vonf, wennp◦˜f=fist.
+Es seienX,Y,Z topologische Räume,p: Y →X eine Überlagerung undf : Z →X stetig.
+Eine stetige Abbildung˜f : Z →Y heißtLiftung von f, wennp◦˜f = f ist.
 Y
-XZ
-p˜f
+X
+Z
+p
+˜f
 f
 Bemerkung 54 (Eindeutigkeit der Liftung)
-SeiZzusammenhängend und f0,f1:Z→YLiftungen von f.
-∃z0∈Z:f0(z0) =f1(z0)⇒f0=f1
-Beweis: SeiT={z∈Z|f0(z) =f1(z)}.
-Z. z.:Tist offen und Z\Tist auch offen.
+Sei Z zusammenhängend undf0,f1 : Z →Y Liftungen vonf.
+∃z0 ∈Z : f0(z0) = f1(z0) ⇒f0 = f1
+Beweis: Sei T = {z∈Z |f0(z) = f1(z) }.
+Z. z.: T ist offen undZ\T ist auch offen.
  3.3. ÜBERLAGERUNGEN
-0 1 2 3 4 5 60123456
-TLiften−−−→R2/Z2
-Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht geschlossen
+0 1 2 3 4 5 60
+1
+2
+3
+4
+5
+6
+T Liften−−−→R2/Z2
+Abbildung 3.13:Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht geschlossen
 
-Seiz∈T,x=f(z),UUmgebung von xwie in Definition 48, Vdie Komponente von p−1(U),
-diey:=f0(z) =f1(z)enthält.
-Seiq:U→Vdie Umkehrabbildung zu p|V.
-SeiW:=f−1(U)∩f−1
-0(V)∩f−1
-1(V).Wist offene Umgebung in Zvonz.
-Behauptung: W⊆T
-Denn fürw∈Wistq(f(w)) =q((p◦f0))(w) = ((q◦p)◦f0)(w) =f0(w) =q(f(w)) =f1(w)
-⇒Tist offen.
-Analog:Z\Tist offen.
+Sei z∈T,x = f(z),U Umgebung vonxwie in Definition 48,V die Komponente vonp−1(U),
+die y:= f0(z) = f1(z) enthält.
+Sei q: U →V die Umkehrabbildung zup|V.
+Sei W := f−1(U) ∩f−1
+0 (V) ∩f−1
+1 (V). W ist offene Umgebung inZ von z.
+Behauptung: W ⊆T
+Denn fürw∈W ist q(f(w)) = q((p◦f0))(w) = ((q◦p) ◦f0)(w) = f0(w) = q(f(w)) = f1(w)
+⇒T ist offen.
+Analog: Z\T ist offen.
 Satz 3.2
-Seip:Y→XÜberlagerung, γ:I→Xein Weg,y∈Ymitp(y) =γ(0) =:x.
-Dann gibt es genau einen Weg ˜γ:I→Ymit˜γ(0) =yundp◦˜γ=γ.
-p:Y→XÜberlagerung, X,Ywegzusammenhängend. pstetig und surjektiv, zu x∈X∃
-Umgebung U, so dassp−1(U) =⋃Vj
-p|Vj:Vj→UHomöomorphismus.
+Sei p: Y →X Überlagerung, γ : I →X ein Weg,y∈Y mit p(y) = γ(0) =: x.
+Dann gibt es genau einen Weg˜γ : I →Y mit ˜γ(0) = y und p◦˜γ = γ.
+p : Y →X Überlagerung, X,Y wegzusammenhängend. p stetig und surjektiv, zux ∈X∃
+Umgebung U, so dassp−1(U) = ⋃Vj
+p|Vj : Vj →U Homöomorphismus.
 Bemerkung 55
-Wege inXlassen sich zu Wegen in Yliften.
-Zu jedemy∈p−1(γ(0))gibt es genau einen Lift von γ.
+Wege inX lassen sich zu Wegen inY liften.
+Zu jedemy∈p−1(γ(0)) gibt es genau einen Lift vonγ.
  3.3. ÜBERLAGERUNGEN
 Proposition 3.3
-Seienp:Y→Xeine Überlagerung, a,b∈X,γ0,γ1:I→Xhomotope Wege von a
-nachb,˜a∈p−1(a),˜γ0,˜γ1Liftungen von γ0bzw.γ1mit˜γi(0) = ˜a.
-Dann ist ˜γ0(1) = ˜γ1(1)und ˜γ0∼˜γ1.
-Beweis: SeiH:I×I→XHomotopie zwischen γ1undγ2.
-Fürs∈Iseiγs:I→X,t↦→H(t,s).
-Sei˜γsLift vonγsmit˜γs(0) = ˜a
-Sei˜H:I×I→Y, ˜H(t,s) := ( ˜γs(t),s)
+Seien p : Y →X eine Überlagerung,a,b ∈X, γ0,γ1 : I →X homotope Wege vona
+nach b, ˜a∈p−1(a), ˜γ0, ˜γ1 Liftungen vonγ0 bzw. γ1 mit ˜γi(0) = ˜a.
+Dann ist ˜γ0(1) = ˜γ1(1) und ˜γ0 ∼ ˜γ1.
+Beweis: Sei H : I×I →X Homotopie zwischenγ1 und γ2.
+Für s∈I sei γs : I →X, t↦→H(t,s).
+Sei ˜γs Lift vonγs mit ˜γs(0) = ˜a
+Sei ˜H : I×I →Y, ˜H(t,s) := ( ˜γs(t),s)
 Dann gilt:
-(i)˜Hist stetig (Beweis wie für Bemerkung 54)
-(ii) ˜H(t,0) = ˜γ0(t),˜H(t,1) = ˜γ1(t)
+(i) ˜H ist stetig (Beweis wie für Bemerkung 54)
+(ii) ˜H(t,0) = ˜γ0(t), ˜H(t,1) = ˜γ1(t)
 (iii) ˜H(0,s) = ˜γs(0) = ˜a
-(iv) ˜H(1,s)∈p−1(b)
-Dap−1(b)diskrete Teilmenge von Yist
-⇒˜bs=˜H(1,s) =˜H(1,0)∀s∈I
-⇒˜b0=˜b1und ˜Hist Homotopie zwischen ˜γ0und ˜γ1. ■
+(iv) ˜H(1,s) ∈p−1(b)
+Da p−1(b) diskrete Teilmenge vonY ist
+⇒˜bs = ˜H(1,s) = ˜H(1,0) ∀s∈I
+⇒˜b0 = ˜b1 und ˜H ist Homotopie zwischen˜γ0 und ˜γ1. ■
 Folgerung 3.4
-Seip:Y→Xeine Überlagerung, x0∈X,y 0∈p−1(x0)
-a)p∗:π1(Y,y0)→π1(X,x 0)ist injektiv
-b)[π1(X,x 0) :p∗(π1(Y,y0))] = deg(p)
+Sei p: Y →X eine Überlagerung,x0 ∈X,y0 ∈p−1(x0)
+a) p∗: π1(Y,y0) →π1(X,x0) ist injektiv
+b) [π1(X,x0) : p∗(π1(Y,y0))] = deg(p)
 Beweis:
-a) Sei ˜γein Weg in Yumy0undp∗([˜γ]) =e, alsop◦˜γ∼γx0
-Nach Proposition 3.3 ist dann ˜γhomotop zum Lift des konstanten Wegs γx0mit
-Anfangspunkt y0, also zuγy0⇒[˜γ] =e
-b)Seid=degpundp−1(x0) ={y0,y1,...,yd−1}. Für einen geschlossenen Weg γinX
-umx0sei˜γdie Liftung mit ˜γ(0) =y0.
-˜γ(1)∈{y0,...,yd−1}hängt nur von [γ]∈π1(X,x 0)ab.
-Für geschlossene Wege γ0,γ1umxgilt:
+a) Sei ˜γ ein Weg inY um y0 und p∗([˜γ]) = e, alsop◦˜γ ∼γx0
+Nach Proposition 3.3 ist dann˜γ homotop zum Lift des konstanten Wegsγx0 mit
+Anfangspunkt y0, also zuγy0 ⇒[˜γ] = e
+b) Sei d= deg p und p−1(x0) = {y0,y1,...,y d−1 }. Für einen geschlossenen Wegγ in X
+um x0 sei ˜γ die Liftung mit˜γ(0) = y0.
+˜γ(1) ∈{y0,...,y d−1 }hängt nur von[γ] ∈π1(X,x0) ab.
+Für geschlossene Wegeγ0,γ1 um x gilt:
 ˜γ0(1) = ˜γ1(1)
-⇔[ ˜γ0∗˜γ1−1]∈π1(Y,y0)
-⇔[γ0∗γ−1
-1]∈p∗(π1(Y,y0))
-⇔[γ0]und[γ1]liegen in der selben Nebenklasse bzgl. p∗(π1(Y,y0))
+⇔[ ˜γ0 ∗˜γ1−1] ∈π1(Y,y0)
+⇔[γ0 ∗γ−1
+1 ] ∈p∗(π1(Y,y0))
+⇔[γ0] und [γ1]liegen in der selben Nebenklasse bzgl.p∗(π1(Y,y0))
  3.3. ÜBERLAGERUNGEN
-Zui∈{0,...,d−1}gibt es Weg δiinYmitδi(0) =y0undδi(1) =yi
-⇒p∪δiist geschlossener Weg in Xumx0.
-⇒Jedesyimiti= 0,...,d−1ist˜γ(1)für ein [γ]∈π1(X,x 0).
+Zu i∈{0,...,d −1 }gibt es Wegδi in Y mit δi(0) = y0 und δi(1) = yi
+⇒p∪δi ist geschlossener Weg inX um x0.
+⇒Jedes yi mit i= 0,...,d −1 ist ˜γ(1) für ein[γ] ∈π1(X,x0).
 Bemerkung 56
-Seip:Y→XÜberlagerung und Xeinfach zusammenhängend.
-Dann istpein Homöomorphismus.
-Beweis: Wegen Bemerkung 55.a ist auch Yeinfach zusammenhängend und wegen Bemerkung
- 55.b ist deg(p) = 1,pist also bijektiv.
-Nach Bemerkung 51 ist poffen⇒p−1ist stetig.⇒pist Homöomorphismus. ■
+Sei p: Y →X Überlagerung undX einfach zusammenhängend.
+Dann istp ein Homöomorphismus.
+Beweis: Wegen Bemerkung 55.a ist auchY einfach zusammenhängend und wegen Bemerkung
+ 55.b istdeg(p) = 1, p ist also bijektiv.
+Nach Bemerkung 51 istp offen ⇒p−1 ist stetig.⇒p ist Homöomorphismus. ■
 Definition 52
-Eine Überlagerung p:˜X→Xheißtuniversell , wenn ˜Xeinfach zusammenhängend ist.
+Eine Überlagerungp: ˜X →X heißtuniversell, wenn ˜X einfach zusammenhängend ist.
 Beispiel 38 (Universelle Überlagerungen)
-R→S1, t↦→(cos 2πt,sin 2πt)
-R2→T2=R2/Z2
-Sn→Pn(R)fürn≥2
+R →S1, t ↦→(cos 2πt,sin 2πt)
+R2 →T2 = R2/Z2
+Sn →Pn(R) für n≥2
 Satz 3.5
-Seip:˜X→Xeine universelle Überlagerung, q:Y→Xweitere Überlagerung.
-Seix0∈X,˜x0∈˜X,y 0∈Ymitq(y0) =x0=p( ˜x0).
-Dann gibt es genau eine Überlagerung ˜p:˜X→Ymit˜p( ˜x0) =y0.
-Beweis: Seiz∈˜X,γz:I→˜Xein Weg von ˜x0nachz.
-Seiδzdie eindeutige Liftung von p◦γznachYmitδz(0) =y0.
-Setze ˜p(z) =δz(1).
-Da˜Xeinfach zusammenhängend ist, hängt ˜p(z)nicht vom gewählten Weg γzab.
-Offensichtlich ist q(˜p(z)) =p(z).
-Zu zeigen: ˜pist stetig in z∈˜X:
-SeiW⊆Yoffene Umgebung von ˜p(z).
-qoffen= = = =⇒q(W)ist offene Umgebung von p(z)·d(˜p(z)).
-SeiU⊆q(W)offen wie in Definition 48 und V⊆q−1(U)die Komponente, die ˜p(z)enthält.
-O. B. d. A. sei V⊆W.
-SeiZ:=p−1(U). Füru∈Zseiδein Weg in Zvonznachu.
-⇒γz∗δist Weg von x0nachu
-⇒˜p(u)∈V
-⇒Z⊆˜p−1(W)
-⇒˜pist stetig
+Sei p: ˜X →X eine universelle Überlagerung,q: Y →X weitere Überlagerung.
+Sei x0 ∈X, ˜x0 ∈ ˜X,y0 ∈Y mit q(y0) = x0 = p( ˜x0).
+Dann gibt es genau eine Überlagerung˜p: ˜X →Y mit ˜p( ˜x0) = y0.
+Beweis: Sei z∈ ˜X,γz : I → ˜X ein Weg von˜x0 nach z.
+Sei δz die eindeutige Liftung vonp◦γz nach Y mit δz(0) = y0.
+Setze ˜p(z) = δz(1).
+Da ˜X einfach zusammenhängend ist, hängt˜p(z) nicht vom gewählten Wegγz ab.
+Offensichtlich istq(˜p(z)) = p(z).
+Zu zeigen: ˜p ist stetig inz∈ ˜X:
+Sei W ⊆Y offene Umgebung von˜p(z).
+q offen
+= = = =⇒q(W) ist offene Umgebung vonp(z) ·d(˜p(z)).
+Sei U ⊆q(W) offen wie in Definition 48 undV ⊆q−1(U) die Komponente, die˜p(z) enthält.
+O. B. d. A. seiV ⊆W.
+Sei Z := p−1(U). Füru∈Z sei δ ein Weg inZ von z nach u.
+⇒γz ∗δ ist Weg vonx0 nach u
+⇒˜p(u) ∈V
+⇒Z ⊆ ˜p−1(W)
+⇒˜p ist stetig
  3.3. ÜBERLAGERUNGEN
 Folgerung 3.6
-Sindp:˜X→Xundq:˜Y→Xuniverselle Überlagerungen, so sind ˜Xund ˜Yhomöomorph.
-Beweis: Seienx0∈X,˜x0∈˜Xmitp( ˜x0) =x0und ˜y0∈q−1(x0)⊆˜Y.
+Sind p: ˜X →X und q: ˜Y →X universelle Überlagerungen, so sind˜X und ˜Y homöomorph.
+Beweis: Seien x0 ∈X, ˜x0 ∈ ˜X mit p( ˜x0) = x0 und ˜y0 ∈q−1(x0) ⊆˜Y.
 Nach Satz 3.5 gibt es genau eine Überlagerung
-f:˜X→˜Ymitf(x0) = ˜y0undq◦f=p
+f : ˜X →˜Y mit f(x0) = ˜y0 und q◦f = p
 und genau eine Überlagerung
-g:˜Y→˜Xmitg( ˜y0) = ˜x0undp◦g=q
-Damit gilt: p◦q◦f=q◦f=p,q◦f◦g=p◦g=q. Also istg◦f:˜X→˜XLift von
-p:˜X→Xmit(g◦f)( ˜x0) = ˜x0.
-Da auch id ˜xdiese Eigenschaft hat, folgt mit Bemerkung 53: g◦f=id˜X.
-Analog gilt f◦g=id˜Y. ■
+g: ˜Y → ˜X mit g( ˜y0) = ˜x0 und p◦g= q
+Damit gilt:p◦q◦f = q◦f = p, q◦f ◦g = p◦g = q. Also istg◦f : ˜X → ˜X Lift von
+p: ˜X →X mit (g◦f)( ˜x0) = ˜x0.
+Da auch id˜x diese Eigenschaft hat, folgt mit Bemerkung 53:g◦f = id˜X.
+Analog giltf ◦g= id˜Y. ■
 Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
 Definition 53
-Sei(X,T)ein topologischer Raum und x∈X.
-U⊆TheißteineUmgebungsbasis vonx,wennjedeoffeneUmgebungvon xeineTeilmenge
-vonUenthält.
+Sei (X,T) ein topologischer Raum undx∈X.
+U ⊆T heißt eineUmgebungsbasisvonx, wenn jede offene Umgebung vonxeine Teilmenge
+von U enthält.
 Satz 3.7
-Es seiXein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
+Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
 Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
 Dann gibt es eine universelle Überlagerung.
-Beweis: Seix0∈Xund ˜X:={(x,[γ])|x∈X,γWeg vonxonachx}undp:˜X→X,(x,[γ])↦→
+Beweis: Seix0 ∈Xund ˜X := {(x,[γ]) |x∈X,γ Weg vonxo nach x}undp: ˜X →X,(x,[γ]) ↦→
 x.
-Die Topologie auf ˜Xist folgende: Definiere eine Umgebungsbasis von (x,[γ])wie folgt: Es
-seiUeine einfach zusammenhängende Umgebung von xund
-˜U=˜U(x,[γ]) :={(y,[γ∗α])|y∈U,αWeg inUvonxnachy}
-pist Überlagerung: p|˜U:˜U→Ubijektiv.pist stetig und damit p|˜Uein Homöomorphismus.
-Sindγ1,γ2Wege vonx0nachxundγ1∼γ2, so ist ˜U(x,[γ1])∩˜U(x,[γ2]) =∅, denn: Ist
-γ1∗α∼γ2∗α, so ist auch γ1∼γ2. Also istpeine Überlagerung.
-˜Xist einfach zusammenhängend: Es sei ˜x0:= (x0,e)und˜γ:I→˜Xein geschlossener Weg
-um˜x0.
-Seiγ:=p(˜γ).
-Annahme :[˜γ]̸=e
-Mit Bemerkung 55.a folgt dann: [γ]̸=e.
-Dann ist der Lift von γnach ˜xmit Anfangspunkt ˜x0ein Weg von ˜x0nach (x0,[γ]). Widerspruch.
+Die Topologie auf˜X ist folgende: Definiere eine Umgebungsbasis von(x,[γ]) wie folgt: Es
+sei U eine einfach zusammenhängende Umgebung vonx und
+˜U = ˜U(x,[γ]) := {(y,[γ∗α]) |y∈U,α Weg inU von x nach y}
+p ist Überlagerung:p|˜U : ˜U →U bijektiv. p ist stetig und damitp|˜U ein Homöomorphismus.
+Sind γ1,γ2 Wege vonx0 nach x und γ1 ∼γ2, so ist ˜U(x,[γ1]) ∩˜U(x,[γ2]) = ∅, denn: Ist
+γ1 ∗α∼γ2 ∗α, so ist auchγ1 ∼γ2. Also istp eine Überlagerung.
+˜X ist einfach zusammenhängend: Es sei˜x0 := (x0,e) und ˜γ : I → ˜X ein geschlossener Weg
+um ˜x0.
+Sei γ := p(˜γ).
+Annahme: [˜γ] ̸= e
+Mit Bemerkung 55.a folgt dann:[γ] ̸= e.
+Dann ist der Lift vonγ nach ˜x mit Anfangspunkt ˜x0 ein Weg von˜x0 nach (x0,[γ]). Widerspruch.
 
  3.3. ÜBERLAGERUNGEN
 Definition 54
-Es seip:Y→Xeine Überlagerung und f:Y→Yein Homöomorphismus.
-a)fheißtDecktransformation vonp:⇔p◦f=p.
-b)Die Decktransformationen von p:Y→Xbilden mit der Verkettung eine Gruppe,
-die sog.Decktransformationsgruppe . Man schreibt: Deck (p),Deck (Y/X)oder
-Deck(Y→X).
-c)pheißtregulär, wenn|Deck(Y/X )|= degpgilt.
+Es seip: Y →X eine Überlagerung undf : Y →Y ein Homöomorphismus.
+a) f heißtDecktransformation von p:⇔p◦f = p.
+b) Die Decktransformationen vonp : Y →X bilden mit der Verkettung eine Gruppe,
+die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y/X) oder
+Deck(Y →X).
+c) p heißtregulär, wenn|Deck(Y/X)|= deg p gilt.
 Bemerkung 57 (Eigenschaften der Decktransformation)
-a)(DeckY/X,◦)ist eine Gruppe
-b) Istf∈Deck(Y/X )undf̸=id, dann hat fkeinen Fixpunkt.
-c)|Deck(Y/X )|≤degp
-d)Istfeine reguläre Überlagerung, dann gilt: ∀x∈X:Deck (Y/X)operiert transitiv
-auf der Menge der Urbilder f−1(x).
+a) (DeckY/X, ◦) ist eine Gruppe
+b) Ist f ∈Deck(Y/X) und f ̸= id, dann hatf keinen Fixpunkt.
+c) |Deck(Y/X)|≤ deg p
+d) Ist f eine reguläre Überlagerung, dann gilt:∀x∈X : Deck(Y/X) operiert transitiv
+auf der Menge der Urbilderf−1(x).
 Beweis:
 a) Es gilt:
-•idY∈DeckY/X,
-•f,g∈DeckY/X⇒p◦(f◦g) = (p◦f)◦g=p◦g⇒f◦g∈DeckY/X
-•f∈DeckY/X⇒p◦f=p⇒p◦f−1= (p◦f)◦f−1=p◦(f◦f−1) =p⇒
-f−1∈DeckY/X
+•idY ∈Deck Y/X,
+•f,g ∈Deck Y/X ⇒p◦(f ◦g) = (p◦f) ◦g= p◦g⇒f ◦g∈Deck Y/X
+•f ∈Deck Y/X ⇒p◦f = p ⇒p◦f−1 = (p◦f) ◦f−1 = p◦(f ◦f−1) = p ⇒
+f−1 ∈Deck Y/X
 b) Die Menge
-Fix(f) ={y∈Y|f(y) =y}
-ist abgeschlossen als Urbild der Diagonale ∆⊆Y×Yunter der stetigen Abbildung
-y↦→(f(y),y). Außerdem ist Fix(f)offen, denn ist y∈Fix(f), so seiUeine Umgebung
-vonp(y)∈Xwie in Definition 48 und U⊆p−1(U)die Komponente, die yenthält;
-alsop:V→Uein Homöomorphismus. Dann ist W:=f−1(V)∩Voffene Umgebung
-vony.
-Fürz∈Wistf(z)∈Vundp(f(z)) =p(z). Dapinjektiv auf Vist, folgtf(z) =z,
-d. h. Fix(f)̸=∅.
-DaYzusammenhängend ist, folgt aus Fix(˜f)̸=∅schon Fix(f) =Y, alsof=idY.
-c)Es seix0∈X,deg(p) =dundp−1(x0) ={y0,...,yd−1}. Fürf∈Deck (Y/X)ist
-f(y0) ={y0,...,yd−1}.
-Zui∈{0,...,d−1}gibt es höchstens ein f∈Deck (Y/X )mitf(y0) =y1, denn ist
-f(y0) =g(y0), so ist (g−1◦f)(y0) =y0, also nach Bemerkung 57.c g−1◦f=idY.
+Fix(f) = {y∈Y |f(y) = y}
+ist abgeschlossen als Urbild der Diagonale∆ ⊆Y ×Y unter der stetigen Abbildung
+y↦→(f(y),y). Außerdem istFix(f) offen, denn isty∈Fix(f), so seiU eine Umgebung
+von p(y) ∈X wie in Definition 48 undU ⊆p−1(U) die Komponente, diey enthält;
+also p: V →U ein Homöomorphismus. Dann istW := f−1(V) ∩V offene Umgebung
+von y.
+Für z ∈W ist f(z) ∈V und p(f(z)) = p(z). Dap injektiv aufV ist, folgtf(z) = z,
+d. h.Fix(f) ̸= ∅.
+Da Y zusammenhängend ist, folgt ausFix( ˜f) ̸= ∅schon Fix(f) = Y, alsof = idY.
+c) Es sei x0 ∈X, deg(p) = d und p−1(x0) = {y0,...,y d−1 }. Für f ∈Deck(Y/X) ist
+f(y0) = {y0,...,y d−1 }.
+Zu i∈{0,...,d −1 }gibt es höchstens einf ∈Deck(Y/X) mit f(y0) = y1, denn ist
+f(y0) = g(y0), so ist(g−1 ◦f)(y0) = y0, also nach Bemerkung 57.cg−1 ◦f = idY.
 d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
 Beispiel 39 (Decktransformationen)
-1)p:R→S1: Deck( R/S1) ={t↦→t+n|n∈Z}∼=Z
-2)p:R2→T2: Deck( R2/T2)∼=Z×Z=Z2
-3)p:Sn→Pn(R) : Deck(Sn/Pn(R)) ={x↦→±x}∼=Z/2Z
+1) p: R →S1 : Deck(R/S1) = {t↦→t+ n|n∈Z }∼= Z
+2) p: R2 →T2 : Deck(R2/T2) ∼= Z ×Z = Z2
+3) p: Sn →Pn(R) : Deck(Sn/Pn(R)) = {x↦→±x}∼= Z/2Z
  3.3. ÜBERLAGERUNGEN
 Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamentalgruppe
  herstellen:
 Satz 3.8
-Istp:˜X→Xeine universelle Überlagerung, so gilt:
-Deck( ˜X/X )∼=π1(X,x 0)∀x0∈X
-Beweis: Wähle ˜x0∈p−1(x0). Es seiρ:Deck (˜x/x)→π1(X,x 0)die Abbildung, die fauf[p(γf)]
-abbildet, wobei γfein Weg von ˜x0nachf(˜x0)sei. Da ˜xeinfach zusammenhängend ist, ist
-γfbis auf Homotopie eindeutig bestimmt und damit auch ρwohldefiniert.
-•ρist Gruppenhomomorphismus : Seienf,g∈Deck (˜X/X )⇒γg◦f=γg∗g(γf)⇒
-p(γg◦f) =p(γg)∗(p◦g)
-=p(γf) =ρ(g)̸=ρ(f)
-•ρist injektiv :ρ(f) =e⇒p(γf)∼γx0Satz 3.2= = = =⇒γf∼γ˜x0⇒f(x0) =˜x0Bem. 57.c= = = = = =⇒f=
+Ist p: ˜X →X eine universelle Überlagerung, so gilt:
+Deck( ˜X/X) ∼= π1(X,x0) ∀x0 ∈X
+Beweis: Wähle ˜x0 ∈p−1(x0). Es seiρ: Deck(˜x/x) →π1(X,x0) die Abbildung, dief auf [p(γf)]
+abbildet, wobeiγf ein Weg von˜x0 nach f( ˜x0) sei. Da˜x einfach zusammenhängend ist, ist
+γf bis auf Homotopie eindeutig bestimmt und damit auchρ wohldefiniert.
+•ρ ist Gruppenhomomorphismus: Seien f,g ∈Deck( ˜X/X) ⇒γg◦f = γg ∗g(γf) ⇒
+p(γg◦f) = p(γg) ∗(p◦g)  
+=p
+(γf) = ρ(g) ̸= ρ(f)
+•ρ ist injektiv: ρ(f) = e⇒p(γf) ∼γx0
+Satz 3.2= = = =⇒γf ∼γ˜x0 ⇒f(x0) = ˜x0
+Bem. 57.c= = = = = =⇒f =
 id˜x.
-•ρist surjektiv : Sei [γ]∈π1(X,x 0),˜γLift vonγnach ˜xmit Anfangspunkt ˜x0. Der
-Endpunkt von ˜γsei˜x1.
-pist reguläre Überlagerung : Seien ˜x0,˜x1∈˜Xmitp(˜x0) =p(˜x1). Nach Satz 3.5 gibt
-es genau eine Überlagerung ˜p:˜X→Xmitp=p◦˜pund˜p(˜x0) =˜x1. Somit ist ˜peine
-Decktransformation und damit peine reguläre Überlagerung.
-Dapreguläre Überlagerung ist, gibt es ein f∈Deck( ˜X/X )mitf( ˜x0) = ˜x1.
-Aus der Definition von ρfolgt:ρ(f) =p(γf) =γ
+•ρ ist surjektiv: Sei [γ] ∈π1(X,x0), ˜γ Lift vonγ nach ˜x mit Anfangspunkt ˜x0. Der
+Endpunkt von˜γ sei ˜x1.
+p ist reguläre Überlagerung: Seien ˜x0, ˜x1 ∈ ˜X mit p( ˜x0) = p( ˜x1). Nach Satz 3.5 gibt
+es genau eine Überlagerung˜p: ˜X →X mit p= p◦˜p und ˜p( ˜x0) = ˜x1. Somit ist˜p eine
+Decktransformation und damitp eine reguläre Überlagerung.
+Da p reguläre Überlagerung ist, gibt es einf ∈Deck( ˜X/X) mit f( ˜x0) = ˜x1.
+Aus der Definition vonρ folgt: ρ(f) = p(γf) = γ
 ■
-Beispiel 40 (Bestimmung von π1(S1))
-p:R→S1,t↦→(cos2πt,sin2πt)ist universelle Überlagerung, da Rzusammenhängend ist.
-Fürn∈Zseifn:R→R,t↦→t+ndie Translation um n.
-Es gilt: (p◦fn)(t) =p(fn(t)) =p(t)∀t∈R, d. h.fnist Decktransformation.
-Ist umgekehrt girgendeine Decktransformation, so gilt insbesondere für t= 0:
-(cos(2πg(0)),sin(2πg(0))) = (p◦g)(0) =p(0) = (1,0)
-Es existiert n∈Zmitg(0) =n. Da auchfn(0) = 0 +n=ngilt, folgt mit Bemerkung 57.c
-g=fn. Damit folgt:
-Deck(R/S1) ={fn|n∈Z}∼=Z
-Nach Satz 3.8 also π1(S1)∼=Deck(R/S1)∼=Z
+Beispiel 40 (Bestimmung vonπ1(S1))
+p: R →S1, t↦→(cos 2πt,sin 2πt) ist universelle Überlagerung, daR zusammenhängend ist.
+Für n∈Z sei fn : R →R,t ↦→t+ n die Translation umn.
+Es gilt:(p◦fn)(t) = p(fn(t)) = p(t) ∀t∈R, d. h.fn ist Decktransformation.
+Ist umgekehrtg irgendeine Decktransformation, so gilt insbesondere fürt= 0:
+(cos(2πg(0)),sin(2πg(0))) = (p◦g)(0) = p(0) = (1,0)
+Es existiertn∈Z mit g(0) = n. Da auchfn(0) = 0 + n= n gilt, folgt mit Bemerkung 57.c
+g= fn. Damit folgt:
+Deck(R/S1) = {fn |n∈Z }∼= Z
+Nach Satz 3.8 alsoπ1(S1) ∼= Deck(R/S1) ∼= Z
  3.4. GRUPPENOPERATIONEN
 3.4 Gruppenoperationen
 Definition 55
-Sei(G,·)eine Gruppe und Xeine Menge.
-EineGruppenoperation vonGaufXist eine Abbildung ◦:G×X→Xfür die gilt:
-a)1G◦x=x∀x∈X
-b)(g·h)◦x=g◦(h◦x)∀g,h∈G∀x∈X
+Sei (G,·) eine Gruppe undX eine Menge.
+Eine Gruppenoperation von G auf X ist eine Abbildung◦: G×X →X für die gilt:
+a) 1G ◦x= x ∀x∈X
+b) (g·h) ◦x= g◦(h◦x) ∀g,h ∈G∀x∈X
 Beispiel 41
-1)G= (Z,+),X=R,n◦x=x+n
-2)Goperiert auf X=Gdurchg◦h:=g·h
-3)Goperiert auf X=Gdurchg◦h:=g·h·g−1, denn
-i)1G◦h= 1G·h·1−1
-G=h
-ii)(g1·g2)◦h= (g1·g2)·h·(g·g2)−1
-=g1·(g2·h·g−1
-2)·g−1
+1) G= (Z,+),X = R,n ◦x= x+ n
+2) G operiert aufX = G durch g◦h:= g·h
+3) G operiert aufX = G durch g◦h:= g·h·g−1, denn
+i) 1G ◦h= 1G ·h·1−1
+G = h
+ii) (g1 ·g2) ◦h= (g1 ·g2) ·h·(g·g2)−1
+= g1 ·(g2 ·h·g−1
+2 ) ·g−1
 1
-=g1◦(g2◦h)
+= g1 ◦(g2 ◦h)
 Definition 56
-SeiGeine Gruppe, Xein topologischer Raum und ◦:G×X→Xeine Gruppenoperation.
-a)Goperiert durch Homöomorphismen , wenn für jedes g∈Gdie Abbildung
-mg:X→X,x↦→g◦x
+Sei G eine Gruppe,X ein topologischer Raum und◦: G×X →X eine Gruppenoperation.
+a) G operiert durch Homöomorphismen, wenn für jedesg∈G die Abbildung
+mg : X →X,x ↦→g◦x
 ein Homöomorphismus ist.
-b) IstGeine topologische Gruppe, so heißt die Gruppenoperation ◦stetig, wenn
-∀g∈G:mgist stetig
+b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation◦stetig, wenn
+∀g∈G: mg ist stetig
 gilt.
 Bemerkung 58
 Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
-Beweis: Nach Voraussetzung ist mg:=◦|{g}×X:X→X,x↦→g◦xstetig.
-Die Umkehrabbildung zu mgistmg−1:
-(mg−1◦mg)(x) =mg−1(mg(x))
-=mg−1(g◦x)
-=g−1◦(g◦x)
-Def. 55.b= (g−1·g)◦x
-= 1G◦x
-Def. 55.a=x
+Beweis: Nach Voraussetzung istmg := ◦|{g}×X : X →X,x ↦→g◦x stetig.
+Die Umkehrabbildung zumg ist mg−1 :
+(mg−1 ◦mg)(x) = mg−1 (mg(x))
+= mg−1 (g◦x)
+= g−1 ◦(g◦x)
+Def. 55.b= (g−1 ·g) ◦x
+= 1G ◦x
+Def. 55.a= x
 Beispiel 42
-In Beispiel 41.1 operiert Zdurch Homöomorphismen.
+In Beispiel 41.1 operiertZ durch Homöomorphismen.
  3.4. GRUPPENOPERATIONEN
 Bemerkung 59
-SeiGeine Gruppe und Xeine Menge.
-a)DieGruppenoperationvon GaufXentsprechenbijektivdenGruppenhomomorphismen
-ϱ:G→Perm(X) = Sym(X) ={f:X→X|fist bijektiv}
-b)IstXein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
-Homöomorphismus den Gruppenhomomorphismen G→Homöo (X)
+Sei G eine Gruppe undX eine Menge.
+a) DieGruppenoperationvon GaufXentsprechenbijektivdenGruppenhomomorphismen
+ϱ: G→Perm(X) = Sym(X) = {f : X →X |f ist bijektiv}
+b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
+Homöomorphismus den GruppenhomomorphismenG→Homöo(X)
 Beweis:
-Sei◦:G×X→Xeine Gruppenoperation von GaufX. Dann sei ϱ:G→Perm (X)
-definiert durch ϱ(g)(X) =g·x∀g∈G,x∈X, alsoϱ(g) =mg.
-ϱist Homomorphismus: ϱ(g1·g2) =mg1·g2=mg1◦mg2=ϱ(g1)◦ϱ(g2), denn fürx∈X:
-ϱ(g1·g2)(x) = (g1·g2)◦x=g1◦(g2◦x) =ϱ(g1)(ϱ(g2)(x)) = (ϱ(g1)◦ϱ(g2))(x)
-Umgekehrt: Sei ϱ:G→Perm (X)Gruppenhomomorphismus. Definiere ◦:G×X→X
-durchg◦x=ϱ(g)(x).
+Sei ◦: G×X →X eine Gruppenoperation von G auf X. Dann seiϱ : G →Perm(X)
+definiert durchϱ(g)(X) = g·x ∀g∈G,x ∈X, alsoϱ(g) = mg.
+ϱ ist Homomorphismus:ϱ(g1 ·g2) = mg1·g2 = mg1 ◦mg2 = ϱ(g1) ◦ϱ(g2), denn fürx∈X :
+ϱ(g1 ·g2)(x) = (g1 ·g2) ◦x= g1 ◦(g2 ◦x) = ϱ(g1)(ϱ(g2)(x)) = (ϱ(g1) ◦ϱ(g2))(x)
+Umgekehrt: Seiϱ : G →Perm(X) Gruppenhomomorphismus. Definiere◦: G×X →X
+durch g◦x= ϱ(g)(x).
 z. z. Definition 55.b:
-g1◦(g2◦x) =ϱ(g1)(g2◦x)
-=ϱ(g1)(ϱ(g2)(x))
-= (ϱ(g1)◦ϱ(g2))(x)
-ϱist Hom.=ϱ(g1·g2)(x)
-= (g1·g2)◦x
-z. z. Definition 55.a: 1G·x=ϱ(1G)(x) =idX(x) =x, weilϱein Homomorphismus ist.
+g1 ◦(g2 ◦x) = ϱ(g1)(g2 ◦x)
+= ϱ(g1)(ϱ(g2)(x))
+= (ϱ(g1) ◦ϱ(g2))(x)
+ϱ ist Hom.
+= ϱ(g1 ·g2)(x)
+= (g1 ·g2) ◦x
+z. z. Definition 55.a:1G ·x= ϱ(1G)(x) = idX(x) = x, weilϱ ein Homomorphismus ist.
 Beispiel 43
-SeiXein wegzusammenhängender topologischer Raum, p:˜X→Xeine universelle Überlagerung,x0∈X,˜x0∈˜Xmitp(
- ˜x0) =x0.
-Dann operiert π1(X,x 0)auf˜Xdurch Homöomorphismen wie folgt:
-Für[γ]∈π1(X,x 0)und ˜x∈˜Xsei[γ]◦˜x=˜γ∗ϱ(1)wobei ˜γein Weg von ˜x0nach ˜xin˜X
-sei,ϱ:=p(˜δ) =p◦δ.
-Also:δist ein Weg in Xvonx0nachx=p(˜x)und˜γ∗δdie Liftung von γ∗δmit
+Sei X ein wegzusammenhängender topologischer Raum,p: ˜X →X eine universelle Überlagerung,
+ x0 ∈X, ˜x0 ∈ ˜X mit p( ˜x0) = x0.
+Dann operiertπ1(X,x0) auf ˜X durch Homöomorphismen wie folgt:
+Für [γ] ∈π1(X,x0) und ˜x∈ ˜X sei [γ] ◦˜x= ˜γ∗ϱ(1) wobei ˜γ ein Weg von˜x0 nach ˜x in ˜X
+sei, ϱ:= p(˜δ) = p◦δ.
+Also: δ ist ein Weg in X von x0 nach x = p(˜x) und ˜γ∗δ die Liftung von γ ∗δ mit
 Anfangspunkt ˜x0.
-[γ]·˜xhängt nicht von der Wahl von ˜γab; ist ˜γ′ein anderer Weg von ˜x0nach ˜x, so sind ˜δ
-und˜δ′homotop, also auch ˜γ∗δund˜γ∗δ′homotop.
+[γ] ·˜x hängt nicht von der Wahl von˜γ ab; ist˜γ′ein anderer Weg von˜x0 nach ˜x, so sind˜δ
+und ˜δ′homotop, also auch˜γ∗δ und ˜γ∗δ′homotop.
 Gruppenoperation, denn:
-i)[e]◦˜x=˜e∗δ= ˜x
-ii)˜γ1∗γ2∗δ(1) = [γ1∗γ2]◦˜x= ([γ1]∗[γ2])◦˜x
-γ1∗γ2∗δ(1) = [γ1]◦(˜γ2∗δ)(1) = [γ1]◦([γ2]◦˜x)
-Erinnerung :Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π1(X,x 0)
-aus Beispiel 43 einen Gruppenhomomorphismus ϱ:π1(X,x 0)→Homöo (X). Nach Satz 3.8 ist
-ϱ(π1(X,x 0)) = Deck( ˜X/X )
-={
-f:˜X→˜XHomöomorphismus⏐⏐⏐p◦f=p}
+i) [e] ◦˜x= ˜e ∗δ= ˜x
+ii) ˜γ1 ∗γ2 ∗δ(1) = [γ1 ∗γ2] ◦˜x= ([γ1] ∗[γ2]) ◦˜x
+γ1 ∗γ2 ∗δ(1) = [γ1] ◦( ˜γ2 ∗δ)(1) = [γ1] ◦([γ2] ◦˜x)
+Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperationπ1(X,x0)
+aus Beispiel 43 einen Gruppenhomomorphismusϱ: π1(X,x0) →Homöo(X). Nach Satz 3.8 ist
+ϱ(π1(X,x0)) = Deck( ˜X/X)
+=
+{
+f : ˜X → ˜X Homöomorphismus
+⏐⏐⏐p◦f = p
+}
  3.4. GRUPPENOPERATIONEN
 Beispiel 44
-SeiX:=S2⊆R3undτdie Drehung um die z-Achse um 180◦.
-g=⟨τ⟩={id,τ}operiert auf S2durch Homöomorphismen.
-Frage: Was ist S2/G? IstS2/Geine Mannigfaltigkeit?
+Sei X := S2 ⊆R3 und τ die Drehung um diez-Achse um180◦.
+g= ⟨τ⟩= {id,τ }operiert aufS2 durch Homöomorphismen.
+Frage: Was istS2/G? IstS2/G eine Mannigfaltigkeit?
 4 Euklidische und nichteuklidische
 Geometrie
 Definition 57
-Das Tripel (X,d,G )heißt genau dann eine Geometrie , wenn (X,d)ein metrischer Raum
-und∅̸=G⊆P(X)gilt. Dann heißt Gdie Menge aller Geraden .
+Das Tripel(X,d,G ) heißt genau dann eineGeometrie, wenn(X,d) ein metrischer Raum
+und ∅̸= G⊆P(X) gilt. Dann heißtG die Menge allerGeraden.
 4.1 Axiome für die euklidische Ebene
 Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
 nennt man Axiomensystem. Da der Begriff des Axiomensystems so grundlegend ist, hat man
-auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
-sein, die Axiome sollen möglichst unabhängig sein undVollständigkeit wäre auch toll. Mit
+auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollenwiderspruchsfrei
+sein, die Axiome sollen möglichstunabhängig sein undVollständigkeitwäre auch toll. Mit
 Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
 auf den ersten Blick eine einfache Eigenschaft zu sein. Auf den zweiten Blick muss man jedoch
 einsehen, dass das Parallelenproblem, also die Frage ob das Parallelenaxiom unabhängig von
@@ -2088,9 +2452,9 @@ bewiesen oder widerlegt werden können.
 Kehren wir nun jedoch zurück zur Geometrie. Euklid hat in seiner Abhandlung „Die Elemente“
 ein Axiomensystem für die Geometrie aufgestellt.
 Euklids Axiome
-•Strecke zwischen je zwei Punkten
-•Jede Strecke bestimmt genau eine Gerade
-•Kreis(um jeden Punkt mit jedem Radius)
+•Streckezwischen je zwei Punkten
+•Jede Strecke bestimmt genau eineGerade
+•Kreis (um jeden Punkt mit jedem Radius)
 •Je zwei rechte Winkel sind gleich (Isometrie, Bewegung)
 •Parallelenaxiom von Euklid:
 Wird eine Gerade so von zwei Geraden geschnitten, dass die Summe der Innenwinkel
@@ -2098,419 +2462,502 @@ kleiner als zwei Rechte ist, dann schneiden sich diese Geraden auf der Seite die
 Man mache sich klar, dass das nur dann nicht der Fall ist, wenn beide Geraden parallel
  sind und senkrecht auf die erste stehen.
 Definition 58
-Eineeuklidische Ebene ist eine Geometrie (X,d,G ), die Axiome §1 - §5 erfüllt:
-§1)Inzidenzaxiome :
+Eine euklidische Ebeneist eine Geometrie(X,d,G ), die Axiome §1 - §5 erfüllt:
+§1) Inzidenzaxiome:
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-(i) ZuP̸=Q∈Xgibt es genau ein g∈Gmit{P,Q}⊆g.
-(ii)|g|≥2∀g∈G
-(iii)X /∈G
-§2)Abstandsaxiom : ZuP,Q,R∈Xgibt es genau dann ein g∈Gmit{P,Q,R}⊆g,
+(i) Zu P ̸= Q∈X gibt es genau eing∈G mit {P,Q }⊆ g.
+(ii) |g|≥ 2 ∀g∈G
+(iii) X /∈G
+§2) Abstandsaxiom: ZuP,Q,R ∈X gibt es genau dann eing∈G mit {P,Q,R }⊆ g,
 wenn gilt:
-•d(P,R) =d(P,Q) +d(Q,R)oder
-•d(P,Q) =d(P,R) +d(R,Q)oder
-•d(Q,R) =d(Q,P) +d(P,R)
+•d(P,R) = d(P,Q) + d(Q,R) oder
+•d(P,Q) = d(P,R) + d(R,Q) oder
+•d(Q,R) = d(Q,P) + d(P,R)
 Definition 59
-Sei(X,d,G )eine Geometrie und seien P,Q,R∈X.
-a)P,Q,Rliegenkollinear , wenn esg∈Ggibt mit{P,Q,R}⊆g.
-b)Qliegt zwischen PundR, wennd(P,R) =d(P,Q) +d(Q,R)
-c)StreckePR:={Q∈X|Qliegt zwischen PundR}
-d)Halbgeraden :
-PR+:={Q∈X|Qliegt zwischen PundRoder
-Rliegt zwischen PundQ}
-PR−:={Q∈X|Pliegt zwischen QundR}
+Sei (X,d,G ) eine Geometrie und seienP,Q,R ∈X.
+a) P,Q,R liegen kollinear, wenn esg∈G gibt mit{P,Q,R }⊆ g.
+b) Q liegt zwischenP und R, wennd(P,R) = d(P,Q) + d(Q,R)
+c) StreckePR := {Q∈X |Q liegt zwischenP und R}
+d) Halbgeraden:
+PR+ := {Q∈X|Q liegt zwischenP und R oder
+R liegt zwischenP und Q}
+PR−:= {Q∈X |P liegt zwischenQ und R}
 P R
-PR−PR
+PR− PR
 PR+
 Abbildung 4.1: Halbgeraden
 Bemerkung 60
-a)PR+∪PR−=PR
-b)PR+∩PR−={P}
+a) PR+ ∪PR−= PR
+b) PR+ ∩PR−= {P }
 Beweis:
-a) „⊆“ folgt direkt aus der Definition von PR+undPR−
-„⊇“: SeiQ∈PR⇒P,Q,Rsind kollinear.
-2⇒
+a) „⊆“ folgt direkt aus der Definition vonPR+ und PR−
+„⊇“: SeiQ∈PR ⇒P,Q,R sind kollinear.
+2⇒
+
 
-Qliegt zwischen PundR⇒Q∈PR
-Rliegt zwischen PundQ⇒Q∈PR
-Pliegt zwischen QundR⇒Q∈PR
+
+Q liegt zwischenP und R⇒Q∈PR
+R liegt zwischenP und Q⇒Q∈PR
+P liegt zwischenQ und R⇒Q∈PR
 b) „⊇“ ist offensichtlich
-„⊆“: SeiPR+∩PR−. Dann istd(Q,R) =d(P,Q) +d(P,R)weilQ∈PR−und
-{d(P,R) =d(P,Q) +d(Q,R)oder
-d(P,Q) =d(P,R) +d(R,Q)}
+„⊆“: SeiPR+ ∩PR−. Dann istd(Q,R) = d(P,Q) + d(P,R) weil Q∈PR−und
+{ d(P,R) = d(P,Q) + d(Q,R) oder
+d(P,Q) = d(P,R) + d(R,Q)
+}
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-⇒d(Q,R) = 2d(P,Q) +d(Q,R)
+⇒d(Q,R) = 2d(P,Q) + d(Q,R)
 ⇒d(P,Q) = 0
-⇒P=Q
-d(P,Q) = 2d(P,R) +d(P,Q)
-⇒P=R
+⇒P = Q
+d(P,Q) = 2d(P,R) + d(P,Q)
+⇒P = R
 ⇒Widerspruch
 Definition 60
-§3)Anordnungsaxiome
-(i)Zu jeder Halbgerade Hmit Anfangspunkt P∈Xund jedem r∈R≥0gibt es
-genau einQ∈Hmitd(P,Q) =r.
-(ii)Jede Gerade zerlegt X\g=H1˙∪H2in zwei nichtleere Teilmengen H1,H2, sodass
-für alleA∈Hi,B∈Hjmiti,j∈{1,2}gilt:AB∩g̸=∅⇔i̸=j.
-Diese Teilmengen HiheißenHalbebenen bzgl.g.
-§4)Bewegungsaxiom : ZuP,Q,P′,Q′∈Xmitd(P,Q) =d(P′,Q′)gibt es mindestens
-2 Isometrien ϕ1,ϕ2mitϕi(P) =P′undϕi(Q) =Q′miti= 1,2.1
-§5)Parallelenaxiom : Zu jeder Geraden g∈Gund jedem Punkt P∈X\ggibt es
-höchstens ein h∈GmitP∈hundh∩g=∅.hheißtParallele zu gdurchP.
+§3) Anordnungsaxiome
+(i) Zu jeder HalbgeradeH mit AnfangspunktP ∈X und jedemr ∈R≥0 gibt es
+genau einQ∈H mit d(P,Q) = r.
+(ii) Jede Gerade zerlegtX\g= H1 ˙∪H2 in zwei nichtleere TeilmengenH1,H2, sodass
+für alleA∈Hi, B ∈Hj mit i,j ∈{1,2 }gilt: AB∩g̸= ∅⇔ i̸= j.
+Diese TeilmengenHi heißenHalbebenen bzgl. g.
+§4) Bewegungsaxiom: ZuP,Q,P ′,Q′∈X mit d(P,Q) = d(P′,Q′) gibt es mindestens
+2 Isometrienϕ1,ϕ2 mit ϕi(P) = P′und ϕi(Q) = Q′mit i= 1,2.1
+§5) Parallelenaxiom: Zu jeder Geradeng ∈G und jedem Punkt P ∈X \g gibt es
+höchstens einh∈G mit P ∈h und h∩g= ∅. h heißtParallele zug durch P.
 Satz 4.1 (Satz von Pasch)
-SeienP,Q,Rnicht kollinear, g∈Gmitg∩{P,Q,R}=∅undg∩PQ̸=∅.
-Dann ist entweder g∩PR̸=∅oderg∩QR̸=∅.
+Seien P, Q, R nicht kollinear,g∈G mit g∩{P,Q,R }= ∅und g∩PQ ̸= ∅.
+Dann ist entwederg∩PR ̸= ∅oder g∩QR̸= ∅.
 Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
 schneiden, auch eine weitere Seite schneiden.
-Beweis:g∩PQ̸=∅
-3(ii)⇒PundQliegen in verschiedenen Halbebenen bzgl. g
-⇒o. B. d. A. RundPliegen in verschieden Halbebenen bzgl. g
-⇒g∩RP̸=∅
+Beweis: g∩PQ ̸= ∅
+3(ii)
+⇒P und Q liegen in verschiedenen Halbebenen bzgl.g
+⇒o. B. d. A.R und P liegen in verschieden Halbebenen bzgl.g
+⇒g∩RP ̸= ∅
 Bemerkung 61
-SeiP,Q∈XmitP̸=QsowieA,B∈X\PQmitA̸=B. Außerdem seien AundBin der
-selben Halbebene bzgl. PQsowieQundBin der selben Halbebene bzgl. PA.
-Dann gilt:PB+∩AQ̸=∅
+Sei P,Q ∈X mit P ̸= Qsowie A,B ∈X\PQ mit A̸= B. Außerdem seienAund B in der
+selben Halbebene bzgl.PQ sowie Q und B in der selben Halbebene bzgl.PA.
+Dann gilt:PB+ ∩AQ̸= ∅
 Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
 eines konvexen Vierecks schneiden sich.
-Beweis: SeiP′∈PQ−,P′̸=PSatz 4.1= = = =⇒PBschneidetAP′∪AQ
-SeiCder Schnittpunkt. Dann gilt:
-1Die „Verschiebung“ von P′Q′nachPQund die Isometrie, die zusätzlich an der Gerade durch PundQspiegelt.
+Beweis: Sei P′∈PQ−,P′̸= P Satz 4.1= = = =⇒PB schneidet AP′∪AQ
+Sei C der Schnittpunkt. Dann gilt:
+1Die „Verschiebung“ vonP′Q′nach PQ und die Isometrie, die zusätzlich an der Gerade durchP und Qspiegelt.
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-PP′QA B
+P
+P′
+Q
+A B
 C
 Abbildung 4.2: Situation aus Bemerkung 61
-(i)C∈PB+, dennAundBliegen in derselben Halbebene bzgl. PQ=P′Q, also auch
-AP′undAQ.
-(ii)Cliegt in derselben Halbebene bzgl. PAwieB, weil das für Qgilt.
-AP′liegt in der anderen Halbebene bzgl. PA⇒C /∈P′A⇒C∈AQ
-DaC∈PB+undC∈AQfolgt nun direkt:∅̸={C}⊆PB+∩AQ ■
+(i) C ∈PB+, dennA und B liegen in derselben Halbebene bzgl.PQ = P′Q, also auch
+AP′und AQ.
+(ii) C liegt in derselben Halbebene bzgl.PA wie B, weil das fürQ gilt.
+AP′liegt in der anderen Halbebene bzgl.PA ⇒C /∈P′A⇒C ∈AQ
+Da C ∈PB+ und C ∈AQ folgt nun direkt:∅̸= {C}⊆ PB+ ∩AQ ■
 Bemerkung 62
-SeienP,Q∈XmitP̸=QundA,B∈X\PQin der selben Halbebene bzgl. PQ. Außerdem
-seid(A,P) =d(B,P)undd(A,Q) =d(B,Q).
-Dann istA=B.
-PQAB
-Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
-Intuitiv weiß man, dass daraus folgt, dass A=Bgilt.
+Seien P,Q ∈X mit P ̸= Qund A,B ∈X\PQ in der selben Halbebene bzgl.PQ. Außerdem
+sei d(A,P) = d(B,P ) und d(A,Q) = d(B,Q).
+Dann istA= B.
+P
+Q
+A
+B
+Abbildung 4.3:Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
+Intuitiv weiß man, dass daraus folgt, dassA= B gilt.
 Beweis: durch Widerspruch
-Annahme :A̸=B
-Dann istB /∈(PA∪QA)wegen §2.
-1. Fall:QundBliegen in derselben Halbebene bzgl. PA
-Bem. 61= = = = =⇒PB+∩AQ̸=∅.
-SeiCder Schnittpunkt vom PBundAQ.
+Annahme: A̸= B
+Dann istB /∈(PA ∪QA) wegen §2.
+1. Fall: Q und B liegen in derselben Halbebene bzgl.PA
+Bem. 61= = = = =⇒PB+ ∩AQ̸= ∅.
+Sei C der Schnittpunkt vomPB und AQ.
 Dann gilt:
-(i)d(A,C) +d(C,Q) =d(A,Q)Vor.=d(B,Q)<d(B,C) +d(C,Q)⇒d(A,C)<d(B,C)
+(i) d(A,C) + d(C,Q) = d(A,Q) Vor.= d(B,Q) <d(B,C) + d(C,Q) ⇒d(A,C) <d(B,C)
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-P QBCA
-(a) 1. FallPQA B
+P Q
+B
+C
+A
+(a) 1. Fall
+P
+Q
+AB
 (b) 2. Fall
 Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
-(ii) a)Bliegt zwischen PundC.
-d(P,A) +d(A,C)> d(P,C) =d(P,B) +d(B,C) =d(P,A) +d(B,C)⇒
-d(A,C)>d(B,C)⇒Widerspruch zu Punkt (i)
-b)Cliegt zwischen PundB
-d(P,C) +d(C,A)>d(P,A) =d(P,B) =d(P,C) +d(C,B)
-⇒d(C,A)>d(C,B)
+(ii) a) B liegt zwischenP und C.
+d(P,A) + d(A,C) > d(P,C) = d(P,B) + d(B,C) = d(P,A) + d(B,C) ⇒
+d(A,C) >d(B,C) ⇒Widerspruch zu Punkt (i)
+b) C liegt zwischenP und B
+d(P,C) + d(C,A) >d(P,A) = d(P,B) = d(P,C) + d(C,B)
+⇒d(C,A) >d(C,B)
 ⇒Widerspruch zu Punkt (i)
-2. Fall:QundBliegen auf verschieden Halbebenen bzgl. PA.
-Dann liegen AundQin derselben Halbebene bzgl. PB.
-TauscheAundB⇒Fall 1 ■
+2. Fall: Q und B liegen auf verschieden Halbebenen bzgl.PA.
+Dann liegenA und Q in derselben Halbebene bzgl.PB.
+TauscheA und B ⇒Fall 1 ■
 Bemerkung 63
-Sei(X,d,G )eine Geometrie, die §1 - §3 erfüllt, P,Q∈XmitP̸=Qundϕeine Isometrie
-mitϕ(P) =Pundϕ(Q) =Q.
-Dann giltϕ(S) =S∀S∈PQ.
+Sei (X,d,G ) eine Geometrie, die §1 - §3 erfüllt,P,Q ∈X mit P ̸= Q und ϕ eine Isometrie
+mit ϕ(P) = P und ϕ(Q) = Q.
+Dann giltϕ(S) = S ∀S ∈PQ.
 Beweis:
-O. B. d. A. sei S∈PQ2⇔d(P,Q) =d(P,S) +d(S,Q)
-ϕ∈Iso(X)⇒d(ϕ(P),ϕ(Q)) =d(ϕ(P),ϕ(S)) +d(ϕ(S),ϕ(Q))
-P,Q∈Fix(ϕ)⇒d(P,Q) =d(P,ϕ(S)) +d(ϕ(S),Q)
-⇒ϕ(S)liegt zwischen PundQ
-⇒d(P,S) =d(ϕ(P),ϕ(S)) =d(P,ϕ(S))
-3(i)⇒ϕ(S) =S
+O. B. d. A. seiS ∈PQ 2⇔d(P,Q) = d(P,S) + d(S,Q)
+ϕ∈Iso(X)
+⇒ d(ϕ(P),ϕ(Q)) = d(ϕ(P),ϕ(S)) + d(ϕ(S),ϕ(Q))
+P,Q∈Fix(ϕ)
+⇒ d(P,Q) = d(P,ϕ(S)) + d(ϕ(S),Q)
+⇒ϕ(S) liegt zwischenP und Q
+⇒d(P,S) = d(ϕ(P),ϕ(S)) = d(P,ϕ(S))
+3(i)
+⇒ϕ(S) = S
 ■
 Proposition 4.2
-In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P,P′,Q,Q′mitd(P,Q) =d(P′,Q′)
-höchstens zwei Isometrien mit ϕ(P) =P′undϕ(Q) =Q′
+In einer Geometrie, die §1 - §3 erfüllt, gibt es zuP,P ′,Q,Q ′ mit d(P,Q) = d(P′,Q′)
+höchstens zwei Isometrien mitϕ(P) = P′und ϕ(Q) = Q′
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
 Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit
-ϕi(P) =P′undϕi(Q) =Q′gibt.
-Beweis: Seienϕ1,ϕ2,ϕ3Isometrien mit ϕi(P) =P′,ϕi(Q) =Q′miti= 1,2,3.
+ϕi(P) = P′und ϕi(Q) = Q′gibt.
+Beweis: Seien ϕ1,ϕ2,ϕ3 Isometrien mitϕi(P) = P′, ϕi(Q) = Q′mit i= 1,2,3.
 Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
-(Teil i)∃R∈X\PQmitϕ1(R) =ϕ2(R).
-(Teil ii) Hat ϕ3 Fixpunkte, die nicht kollinear sind, so ist ϕ=idX.
-Aus (Teil i) und (Teil ii) folgt, dass ϕ−1
-2◦ϕ1=idX, alsoϕ2=ϕ1, daP,QundRin diesem
+(Teil i) ∃R∈X\PQ mit ϕ1(R) = ϕ2(R).
+(Teil ii) Hatϕ 3 Fixpunkte, die nicht kollinear sind, so istϕ= idX.
+Aus (Teil i) und (Teil ii) folgt, dassϕ−1
+2 ◦ϕ1 = idX, alsoϕ2 = ϕ1, daP, Qund R in diesem
 Fall Fixpunkte sind.
 Nun zu den Beweisen der Teilaussagen:
-(Teil i)SeiR∈X\PQ. Von den drei Punkten ϕ1(R),ϕ2(R),ϕ3(R)liegen zwei in der selben
-Halbebene bzgl. P′Q′=ϕi(PQ).
-O. B. d. A. seien ϕ1(R)undϕ2(R)in der selben Halbebene.
-Es gilt:d(P′,ϕ1(R)) =d(ϕ1(P),ϕ1(R))
-=d(P,R)
-=d(ϕ2(P),ϕ2(R))
-=d(P′,ϕ2(R))
-und analog d(Q′,ϕ1(R)) =d(Q′,ϕ2(R))
-(Teil ii) SeienP,QundRFixpunkte von ϕ,R /∈PQundA /∈PQ∪PR∪QR. SeiB∈
-PQ\{P,Q}. Dann istϕ(B) =Bwegen Bemerkung 63.
-IstR∈AB, so enthält AB2 Fixpunkte von ϕBem. 63= = = = =⇒ϕ(A) =A.
-P BQCRA
-Abbildung 4.5: P,Q,Rsind Fixpunkte, B∈PQ\{P,Q},A /∈PQ∪PR∪QR
-IstR /∈AB, so istAB∩PR̸=∅oderAB∈RQ̸=∅nach Satz 4.1. Der Schnittpunkt
-Cist dann Fixpunkt von ϕ′nach Bemerkung 63 ⇒ϕ(A) =A.
+(Teil i) Sei R∈X\PQ. Von den drei Punktenϕ1(R),ϕ2(R),ϕ3(R) liegen zwei in der selben
+Halbebene bzgl.P′Q′= ϕi(PQ).
+O. B. d. A. seienϕ1(R) und ϕ2(R) in der selben Halbebene.
+Es gilt: d(P′,ϕ1(R)) = d(ϕ1(P),ϕ1(R))
+= d(P,R)
+= d(ϕ2(P),ϕ2(R))
+= d(P′,ϕ2(R))
+und analogd(Q′,ϕ1(R)) = d(Q′,ϕ2(R))
+(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈PQ und A /∈PQ ∪PR ∪QR. Sei B ∈
+PQ \{P,Q }. Dann istϕ(B) = B wegen Bemerkung 63.
+Ist R∈AB, so enthältAB 2 Fixpunkte vonϕ Bem. 63= = = = =⇒ϕ(A) = A.
+P B Q
+C
+RA
+Abbildung 4.5:P,Q,R sind Fixpunkte,B ∈PQ \{P,Q }, A /∈PQ ∪PR ∪QR
+Ist R /∈AB, so istAB∩PR ̸= ∅oder AB ∈RQ̸= ∅nach Satz 4.1. Der Schnittpunkt
+C ist dann Fixpunkt vonϕ′nach Bemerkung 63⇒ϕ(A) = A.
 Bemerkung 64 (SWS-Kongruenzsatz)
-Sei(X,d,G )eine Geometrie, die §1 - §4 erfüllt. Seien außerdem △ABCund△A′B′C′
+Sei (X,d,G ) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem△ABC und △A′B′C′
 Dreiecke, für die gilt:
-(i)d(A,B) =d(A′,B′)
-(ii)∠CAB∼=∠C′A′B′
+(i) d(A,B) = d(A′,B′)
+(ii) ∠CAB ∼= ∠C′A′B′
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-(iii)d(A,C) =d(A′,C′)
-Dann ist△ABCkongruent zu△A′B′C′.
-Beweis: Seiϕdie Isometrie mit ϕ(A′) =A,ϕ(A′C′+) =AC+undϕ(A′B′+) =AB+. Diese
+(iii) d(A,C) = d(A′,C′)
+Dann ist△ABC kongruent zu△A′B′C′.
+Beweis: Sei ϕ die Isometrie mitϕ(A′) = A, ϕ(A′C′+) = AC+ und ϕ(A′B′+) = AB+. Diese
 Isometrie existiert wegen Punkt §4.
-⇒C∈ϕ(A′C′+)undB∈ϕ(A′B′+).
-d(A′,C′) =d(ϕ(A′),ϕ(C′)) =d(A,ϕ(C′))3(i)= =⇒ϕ(C′) =C
-d(A′,B′) =d(ϕ(A′),ϕ(B′)) =d(A,ϕ(B′))3(i)= =⇒ϕ(B′) =B
-Also gilt insbesondere ϕ(△A′B′C′) =△ABC. ■
+⇒C ∈ϕ(A′C′+) und B ∈ϕ(A′B′+).
+d(A′,C′) = d(ϕ(A′),ϕ(C′)) = d(A,ϕ(C′))
+3(i)
+= =⇒ϕ(C′) = C
+d(A′,B′) = d(ϕ(A′),ϕ(B′)) = d(A,ϕ(B′))
+3(i)
+= =⇒ϕ(B′) = B
+Also gilt insbesondereϕ(△A′B′C′) = △ABC. ■
 Bemerkung 65 (WSW-Kongruenzsatz)
-Sei(X,d,G )eine Geometrie, die §1 - §4 erfüllt. Seien außerdem △ABCund△A′B′C′
+Sei (X,d,G ) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem△ABC und △A′B′C′
 Dreiecke, für die gilt:
-(i)d(A,B) =d(A′,B′)
-(ii)∠CAB∼=∠C′A′B′
-(iii)∠ABC∼=∠A′B′C′
-Dann ist△ABCkongruent zu△A′B′C′.
-Beweis: Seiϕdie Isometrie mit ϕ(A′) =A,ϕ(B′) =Bundϕ(C′)liegt in der selben Halbebene
-bzgl.ABwieC. Diese Isometrie existiert wegen §4.
-Aus∠CAB =∠C′A′B′=∠ϕ(C′)ϕ(A′)ϕ(B′) =∠ϕ(C′)ABfolgt, dassϕ(C′)∈AC+.
-Analog folgt aus ∠ABC =∠A′B′C′=∠ϕ(A′)ϕ(B′)ϕ(C′) =∠ABϕ (C′), dassϕ(C′)∈
+(i) d(A,B) = d(A′,B′)
+(ii) ∠CAB ∼= ∠C′A′B′
+(iii) ∠ABC ∼= ∠A′B′C′
+Dann ist△ABC kongruent zu△A′B′C′.
+Beweis: Sei ϕdie Isometrie mitϕ(A′) = A, ϕ(B′) = B und ϕ(C′) liegt in der selben Halbebene
+bzgl. AB wie C. Diese Isometrie existiert wegen §4.
+Aus ∠CAB = ∠C′A′B′= ∠ϕ(C′)ϕ(A′)ϕ(B′) = ∠ϕ(C′)AB folgt, dassϕ(C′) ∈AC+.
+Analog folgt aus ∠ABC = ∠A′B′C′ = ∠ϕ(A′)ϕ(B′)ϕ(C′) = ∠ABϕ(C′), dass ϕ(C′) ∈
 BC+.
-Dann giltϕ(C′)∈AC∩BC={C}⇒ϕ(C′) =C.
-Es gilt also ϕ(△A′B′C′) =△ABC. ■
+Dann giltϕ(C′) ∈AC∩BC = {C}⇒ ϕ(C′) = C.
+Es gilt alsoϕ(△A′B′C′) = △ABC. ■
 Definition 61
-a)EinWinkel ist ein Punkt P∈Xzusammen mit 2Halbgeraden mit Anfangspunkt P.
-Man schreibt:∠R1PR2bzw.∠R2PR12
-b)Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
+a) Ein Winkelist ein PunktP ∈X zusammen mit2 Halbgeraden mit AnfangspunktP.
+Man schreibt:∠R1PR2 bzw. ∠R2PR12
+b) Zwei Winkel sindgleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
 anderen abbildet.
-c)∠R′
+c) ∠R′
 1P′R′
-2heißtkleinerals∠R1PR2, wenn es eine Isometrie ϕgibt, mitϕ(P′) =P,
+2 heißtkleiner als ∠R1PR2, wenn es eine Isometrieϕ gibt, mitϕ(P′) = P,
 ϕ(P′R′+
-1) =PR+
-1undϕ(R′
-2)liegt in der gleichen Halbebene bzgl. PR1wieR2und in
-der gleichen Halbebene bzgl. PR2wieR1
-d) Im Dreieck△PQRgibt esInnenwinkel undAußenwinkel .
+1 ) = PR+
+1 und ϕ(R′
+2) liegt in der gleichen Halbebene bzgl.PR1 wie R2 und in
+der gleichen Halbebene bzgl.PR2 wie R1
+d) Im Dreieck△PQR gibt esInnenwinkelund Außenwinkel.
 Bemerkung 66
 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel.
-Beweis: Zeige∠PRQ<∠RQP′.
-SeiMder Mittelpunkt der Strecke QRundP′∈PQ+\PQ. SeiA∈MP−mitd(P,M ) =
-d(M,A ).
-2Für dieses Skript gilt: ∠R1PR 2=∠R2PR 1. Also sind insbesondere alle Winkel ≤180◦.
+Beweis: Zeige ∠PRQ< ∠RQP′.
+Sei M der Mittelpunkt der StreckeQR und P′∈PQ+ \PQ. SeiA∈MP−mit d(P,M) =
+d(M,A).
+2Für dieses Skript gilt:∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel≤180◦.
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-PR′
-1R1R′
-2R2
-(a)∠R′
+P R′
+1 R1
+R′
+2
+R2
+(a) ∠R′
 1P′R′
-2ist kleiner als∠R1PR 2,
-vgl. Definition 61.cP
+2 ist kleiner als∠R1PR2,
+vgl. Definition 61.c
+P
 Q R
-(b)Innenwinkel und Außenwinkel
+(b) Innenwinkel und Außenwinkel
  in△PQR, vgl. Definition
  61.d
 Abbildung 4.6: Situation aus Definition 61
-QM
-AP
+Q M
+A
+P
+R
+(a) Parallelogramm AQPR
+α
+β
 R
-(a)Parallelogramm AQPR
-αβR
 Q P
-(b)Innen- und Außenwinkel
+(b) Innen- und Außenwinkel
  von△PQR
 Abbildung 4.7: Situation aus Bemerkung 66
-Es gilt:d(Q,M ) =d(M,R )undd(P,M ) =d(M,A )sowie∠PMR =∠AMQ⇒△MRQ
-ist kongruent zu△AMQ, denn eine der beiden Isometrien, die ∠PMRauf∠AMQabbildet,
-bildetRaufQundPaufAab.
-⇒∠MQA =∠MRP =∠QRP =∠PRQ.
-Noch zu zeigen:∠MQA<∠RQP′, dennAliegt in der selben Halbebene bzgl. PQwieM.
+Es gilt:d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ ⇒△MRQ
+ist kongruent zu△AMQ, denn eine der beiden Isometrien, die∠PMR auf ∠AMQ abbildet,
+bildet R auf Q und P auf A ab.
+⇒∠MQA = ∠MRP = ∠QRP = ∠PRQ.
+Noch zu zeigen:∠MQA< ∠RQP′, dennA liegt in der selben Halbebene bzgl.PQ wie M.
 Proposition 4.3 (Existenz der Parallelen)
-Sei(X,d,G )eine Geometrie mit den Axiomen §1 - §4.
-Dann gibt es zu jeder Geraden g∈Gund jedem Punkt P∈X\gmindestens eine
-Paralleleh∈GmitP∈hundg∩h=∅.
-Beweis: SeienP,Q∈f∈Gundϕdie Isometrie, die QaufPundPaufP′∈fmit
-d(P,P′) =d(P,Q)abbildet und die Halbebenen bzgl. ferhält.
+Sei (X,d,G ) eine Geometrie mit den Axiomen §1 - §4.
+Dann gibt es zu jeder Geradeng ∈G und jedem PunktP ∈X \g mindestens eine
+Parallele h∈G mit P ∈h und g∩h= ∅.
+Beweis: Seien P,Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P′ ∈ f mit
+d(P,P ′) = d(P,Q) abbildet und die Halbebenen bzgl.f erhält.
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-Qhf
-gP
+Q
+h
+f
+g
+P
 Abbildung 4.8: Situation aus Proposition 4.3
-Annahme:ϕ(g)∩g̸=∅
-⇒Es gibt einen Schnittpunkt {R}=ϕ(g)∩g.
-Dann ist∠RQP =∠RQP′<∠RPP′nach Bemerkung 66 und ∠RQP =∠RPP′, weil
-ϕ(∠RQP ) =∠RPP′.
+Annahme: ϕ(g) ∩g̸= ∅
+⇒Es gibt einen Schnittpunkt{R}= ϕ(g) ∩g.
+Dann ist ∠RQP = ∠RQP′ < ∠RPP′ nach Bemerkung 66 und∠RQP = ∠RPP′, weil
+ϕ(∠RQP) = ∠RPP′.
 ⇒Widerspruch
-⇒ϕ(g)∩g=∅ ■
+⇒ϕ(g) ∩g= ∅ ■
 Folgerung 4.4
-Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
-D. h. es gibt eine Isometrie ϕmitϕ(Q) =Pundϕ(QP+) =PR+, sodassϕ(R)in der gleichen
-Halbebene bzgl. PQliegt wieR.
-Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die
+Die Summe zweier Innenwinkel in einem Dreieck ist kleiner alsπ.
+D. h. es gibt eine Isometrieϕ mit ϕ(Q) = P und ϕ(QP+) = PR+, sodassϕ(R) in der gleichen
+Halbebene bzgl.PQ liegt wieR.
+Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln istπ, d. h. die
 beiden Halbgeraden bilden eine Gerade.
-Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
-Dreiecke mit drei 90◦-Winkeln.
+Abbildung 4.9:In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
+Dreiecke mit drei90◦-Winkeln.
 Proposition 4.5
 In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
-Innenwinkel≤π.
+Innenwinkel ≤π.
  4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-Sei im Folgenden „ IWS“ die „Innenwinkelsumme“.
-Beweis: Sei△ein Dreieck mit IWS(△) =π+ε
-αβγ
+Sei im Folgenden „IWS“ die „Innenwinkelsumme“.
+Beweis: Sei △ein Dreieck mitIWS(△) = π+ ε
+α
+βγ
 P
-(a) Summe der Winkel α,βundγα1α2 βγ
+(a) Summe der Winkelα, β und γ
+α1
+α2 β
+γ
 M
-A BC A′
+A B
+C A′
 α
 (b) Situation aus Proposition 4.5
 Abbildung 4.10: Situation aus Proposition 4.5
-Seiαein Innenwinkel von △.
-Beh.:Es gibt ein Dreieck △′mitIWS(△′) = IWS(△)und einem Innenwinkel α′≤α
-2.
-Dann gibt es für jedes nein△nmitIWS(△n) =IWS(△)und Innenwinkel α′≤α
+Sei α ein Innenwinkel von△.
+Beh.: Es gibt ein Dreieck△′mit IWS(△′) = IWS(△) und einem Innenwinkelα′≤α
+2 .
+Dann gibt es für jedesnein △n mit IWS(△n) = IWS(△) und Innenwinkelα′≤ α
 2n. Für
 α
-2n<εist dann die Summe der beiden Innenwinkel um △ngrößer alsπ⇒Widerspruch
+2n <ε ist dann die Summe der beiden Innenwinkel um△n größer alsπ⇒Widerspruch
 zu Folgerung 4.4.
-Beweis: Es seienA,B,C∈Xund△das Dreieck mit den Eckpunkten A,B,Cundαsei
-der Innenwinkel bei A,βder Innenwinkel bei Bundγder Innenwinkel bei C.
-SeiMder Mittelpunkt der Strecke BC. Sei außerdem α1=∠CAMundα2=∠BAM.
-Sei weiterA′∈MA−mitd(A′,M) =d(A,M ).
+Beweis: Es seienA,B,C ∈X und △das Dreieck mit den EckpunktenA,B,C und α sei
+der Innenwinkel beiA, β der Innenwinkel beiB und γ der Innenwinkel beiC.
+Sei M der Mittelpunkt der StreckeBC. Sei außerdemα1 = ∠CAM und α2 = ∠BAM.
+Sei weiterA′∈MA−mit d(A′,M) = d(A,M).
 Die Situation ist in Abbildung 4.10b skizziert.
-⇒△ (MA′C)und△(MAB )sind kongruent.⇒∠ABM =∠A′CMund∠MA′C=
-∠MAB.⇒α+β+γ=IWS(△ABC ) =IWS(△AA′C)undα1+α2=α, also o. B. d. A.
-α1≤α
+⇒△(MA′C) und △(MAB) sind kongruent.⇒∠ABM = ∠A′CM und ∠MA′C =
+∠MAB. ⇒α+β+γ = IWS(△ABC) = IWS(△AA′C) und α1 +α2 = α, also o. B. d. A.
+α1 ≤α
 2
 Bemerkung 67
-In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
+In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleichπ.
 α′
 α′′
-α ββ′
+α β
+β′
 γ
-A BC
+A B
+C
 g
 Abbildung 4.11: Situation aus Bemerkung 67
-Beweis: Seigeine Parallele von ABdurchC.
-•Es giltα′=αwegen Proposition 4.3.
-•Es giltβ′=βwegen Proposition 4.3.
-•Es giltα′′=α′wegen Aufgabe 8.
+Beweis: Sei g eine Parallele vonAB durch C.
+•Es giltα′= α wegen Proposition 4.3.
+•Es giltβ′= β wegen Proposition 4.3.
+•Es giltα′′= α′wegen Aufgabe 8.
  4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-⇒IWS(△ABC ) =γ+α′′+β′=π
+⇒IWS(△ABC) = γ+ α′′+ β′= π
 Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
-πist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
+π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
 4.2 Weitere Eigenschaften einer euklidischen Ebene
 Satz 4.6 (Strahlensatz)
 In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich.
-xy
-−1 0 1 2 3 40123
+x
+y
+−1 0 1 2 3 4
+0
+1
+2
+3
 z
-xλ2z
+x
+λ2z
 λ2x
 Abbildung 4.12: Strahlensatz
 Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar.
-A B′C′
-BC
-cba
-c′b′
+A B′
+C′
+B
+C
+c
+b a
+c′
+b′
 a′
-Abbildung 4.13: Die Dreiecke △ABCund△AB′C′sind ähnlich.
+Abbildung 4.13: Die Dreiecke△ABC und △AB′C′sind ähnlich.
 4.2.1 Flächeninhalt
 Definition 62
-„Simplizialkomplexe“ in euklidischer Ebene (X,d)heißenflächengleich , wenn sie sich in
+„Simplizialkomplexe“ in euklidischer Ebene(X,d) heißenflächengleich, wenn sie sich in
 kongruente Dreiecke zerlegen lassen.
  4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-(a) Zwei kongruente Dreiecke (b)ZweiweiterekongruenteDreiecke
+(a) Zwei kongruente Dreiecke(b) ZweiweiterekongruenteDreiecke
 
 Abbildung 4.14: Flächengleichheit
-Der Flächeninhalt eines Dreiecks ist 1/2·Grundseite·Höhe.
-A BC
-LChc
+Der Flächeninhalt eines Dreiecks ist1/2 ·Grundseite·Höhe.
+A B
+C
+LC
+hc
 c
-(a)1/2·|AB|·|hc|·
-A BC
+(a) 1/2 ·|AB|·|hc|
+·
+A B
+C
 LA
-hac
-(b)1/2·|BC|·|ha|
+ha
+c
+(b) 1/2 ·|BC|·|ha|
 Abbildung 4.15: Flächenberechnung im Dreieck
-Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
-αα
-γγ
-A BC
+Zu zeigen:Unabhängigkeit von der gewählten Grundseite.
+α
+α
+γ
+γ
+A B
+C
 LA
 LC
-Abbildung 4.16:△ABLaund△CLCBsind ähnlich, weil IWS =π
-Strahlensatz= = = = = = =⇒a
-hc=c
-ha→a·ha=c·hc
+Abbildung 4.16:△ABLa und △CLCB sind ähnlich, weilIWS = π
+Strahlensatz= = = = = = =⇒ a
+hc = c
+ha →a·ha = c·hc
 Satz 4.7 (Satz des Pythagoras)
-Im rechtwinkligen Dreieck gilt a2+b2=c2, wobeicdie Hypotenuse und a,bdie beiden
+Im rechtwinkligen Dreieck gilta2 + b2 = c2, wobeic die Hypotenuse unda,b die beiden
 Katheten sind.
-Beweis: (a+b)·(a+b) =a2+ 2ab+b2=c2+ 4·(1
-2·a·b)
+Beweis: (a+ b) ·(a+ b) = a2 + 2ab+ b2 = c2 + 4 ·(1
+2 ·a·b)
  4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-cb a
-A BC
+c
+b a
+A B
+C
 ·
-(a)a,bsind Katheten und cist die Hypotenuseb
- abab a
+(a) a,b sind Katheten undc ist die Hypotenuse
+
+b a
+b
+a
+ba
 b
-a·
-· ··
+a
+·
+··
+·
 γ
 (b) Beweisskizze
 Abbildung 4.17: Satz des Pythagoras
 Satz 4.8
-Bis auf Isometrie gibt es genau eine euklidische Ebene (X,d,G ), nämlichX=R2,
-d=euklidischer Abstand, G=Menge der üblichen Geraden.
+Bis auf Isometrie gibt es genau eine euklidische Ebene(X,d,G ), nämlich X = R2,
+d= euklidischer Abstand,G= Menge der üblichen Geraden.
 Beweis:
-(i)(R2,dEuklid )ist offensichtlich eine euklidische Ebene.
-(ii)Sei(X,d)eine euklidische Ebene und g1,g2Geraden in X, die sich in einem Punkt 0
+(i) (R2,dEuklid) ist offensichtlich eine euklidische Ebene.
+(ii) Sei (X,d) eine euklidische Ebene undg1,g2 Geraden inX, die sich in einem Punkt0
 im rechten Winkel schneiden.
-SeiP∈X\(g1∪g2)ein Punkt und PXder Fußpunkt des Lots von Paufg1(vgl.
-Aufgabe 9 (c)) und PYder Fußpunkt des Lots von Paufg2.
-SeixP:=d(PX,0)undyP:=d(PY,0).
+Sei P ∈X\(g1 ∪g2) ein Punkt undPX der Fußpunkt des Lots vonP auf g1 (vgl.
+Aufgabe 9 (c)) undPY der Fußpunkt des Lots vonP auf g2.
+Sei xP := d(PX,0) und yP := d(PY,0).
 In Abbildung 4.19 wurde die Situation skizziert.
-Seih:X→R2eine Abbildung mit h(P) := (xP,yP)Dadurch wird hauf dem
-Quadranten definiert, in dem Pliegt, d. h.
-∀Q∈XmitPQ∩g1=∅=PQ∩g2
-Fortsetzung auf ganz Xdurch konsistente Vorzeichenwahl.
+Sei h : X →R2 eine Abbildung mit h(P) := ( xP,yP) Dadurch wird h auf dem
+Quadranten definiert, in demP liegt, d. h.
+∀Q∈X mit PQ ∩g1 = ∅= PQ ∩g2
+Fortsetzung auf ganzX durch konsistente Vorzeichenwahl.
 Im Folgenden werden zwei Aussagen gezeigt:
-(i)hist surjektiv
-(ii)hist eine Isometrie
-Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass hbijektiv ist.
+(i) h ist surjektiv
+(ii) h ist eine Isometrie
+Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dassh bijektiv ist.
 Nun zu den Beweisen der Teilaussagen:
  4.3. HYPERBOLISCHE GEOMETRIE
 ·
-g1g2
-PX
-(a) Schritt 1·
-g1g2
-xPyPP
-0 PXPYX
+g1
+g2
+P
+X
+(a) Schritt 1
+·
+g1
+g2
+xP
+yP
+P
+0 PX
+PY
+X
 (b) Schritt 2
 Abbildung 4.18: Beweis zu Satz 4.8
-(i)Sei(x,y)∈R2, z. B.x≥0,y≥0. SeiP′∈g1mitd(0,P′) =xundP′auf der
-gleichen Seite von g2wieP.
-g1g2
-xPyPPQ
-0R
+(i) Sei (x,y) ∈R2, z. B.x≥0,y ≥0. SeiP′∈g1 mit d(0,P′) = x und P′ auf der
+gleichen Seite vong2 wie P.
+g1
+g2
+xP
+yP
+P
+Q
+0
+R
 X
 Abbildung 4.19: Beweis zu Satz 4.8
-(ii) Zu Zeigen: d(P,Q) =d(h(P),h(Q))
-d(P,Q)2Pythagoras=d(P,R)2+d(R,Q)2= (yQ−yP)2+ (xQ−xP)2.
+(ii) Zu Zeigen:d(P,Q) = d(h(P),h(Q))
+d(P,Q)2 Pythagoras
+= d(P,R)2 + d(R,Q)2 = (yQ −yP)2 + (xQ −xP)2.
 h(Q) = (xQ,yQ)
 4.3 Hyperbolische Geometrie
 Definition 63
 Sei
-H:={z∈C|ℑ(z)>0}={
-(x,y)∈R2⏐⏐y>0}
+H := {z∈C |ℑ(z) >0 }=
+{
+(x,y) ∈R2 ⏐⏐y >0
+}
  4.3. HYPERBOLISCHE GEOMETRIE
-die obere Halbebene bzw. Poincaré-Halbebene und G=G1∪G2mit
-G1={g1⊆H|∃m∈R,r∈R>0:g1={z∈H:|z−m|=r}}
-G2={g2⊆H|∃x∈R:g2={z∈H:ℜ(z) =x}}
-Die Elemente aus Gheißenhyperbolische Geraden .
+die obere Halbebene bzw. Poincaré-Halbebene undG= G1 ∪G2 mit
+G1 = {g1 ⊆H |∃m∈R,r ∈R>0 : g1 = {z∈H : |z−m|= r}}
+G2 = {g2 ⊆H |∃x∈R : g2 = {z∈H : ℜ(z) = x}}
+Die Elemente ausG heißenhyperbolische Geraden.
 Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
 Die hyperbolischen Geraden erfüllen...
 a) ...die Inzidenzaxiome §1
@@ -2518,1017 +2965,1289 @@ b) ...das Anordnungsaxiom §3 (ii)
 c) ...nicht das Parallelenaxiom §5
 Beweis:
 a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
-Gegebenz1,z2∈H
+Gegeben z1,z2 ∈H
 Existenz:
-Fall 1ℜ(z1) =ℜ(z2)
-⇒z1undz2liegen auf
-g={z∈C|ℜ(z) =ℜ(z1)∧H}
+Fall 1 ℜ(z1) = ℜ(z2)
+⇒z1 und z2 liegen auf
+g= {z∈C |ℜ(z) = ℜ(z1) ∧H }
 Siehe Abbildung 4.20a.
-Fall 2ℜ(z1)̸=ℜ(z2)
-Betrachte nun z1undz2als Punkte in der euklidischen Ebene. Die Mittelsenkrechte
- zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten
-zuz1undz2sindgleichweitvon z1undz2entfernt.DaheristderSchnittpunktmit
-derx-Achse der Mittelpunkt eines Kreises durch z1undz2(vgl. Abbildung 4.20b)
-xy
-−1 0 1 2 3 4 501234
-Z1Z2
+Fall 2 ℜ(z1) ̸= ℜ(z2)
+Betrachte nunz1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte
+ zu diesen Punkten schneidet diex-Achse. Alle Punkte auf der Mittelsenkrechten
+zuz1 undz2 sind gleich weit vonz1 undz2 entfernt. Daher ist der Schnittpunkt mit
+der x-Achse der Mittelpunkt eines Kreises durchz1 und z2 (vgl. Abbildung 4.20b)
+x
+y
+−1 0 1 2 3 4 5
+0
+1
+2
+3
+4
+Z1
+Z2
 ℜ(Z1)
-(a) Fall 1xy
-−1 0 1 2 3 4 501234
-Z1Z2
+(a) Fall 1
+x
+y
+−1 0 1 2 3 4 5
+0
+1
+2
+3
+4
+Z1
+Z2
 (b) Fall 2
-Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
+Abbildung 4.20:Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
 Geraden
-b) Seig∈G1˙∪G2eine hyperbolische Gerade.
+b) Sei g∈G1 ˙∪G2 eine hyperbolische Gerade.
  4.3. HYPERBOLISCHE GEOMETRIE
-Es existieren disjunkte Zerlegungen von H\g:
-Fall 1:g={z∈H∥z−m|=r}∈G1
+Es existieren disjunkte Zerlegungen vonH \g:
+Fall 1:g= {z∈H ∥z−m|= r}∈ G1
 Dann gilt:
-H={z∈H∥z−m|<r}
-=:H1(Kreisinneres)˙∪{z∈H∥z−m|>r}
-=:H2(Kreisäußeres)
-Dar>0istH1nicht leer, da r∈RistH2nicht leer.
-Fall 2:g={z∈H|ℜz=x}∈G2
+H = {z∈H ∥z−m|<r }  
+=:H1 (Kreisinneres)
+˙∪{z∈H ∥z−m|>r }  
+=:H2 (Kreisäußeres)
+Da r> 0 ist H1 nicht leer, dar∈R ist H2 nicht leer.
+Fall 2:g= {z∈H |ℜz= x}∈ G2
 Die disjunkte Zerlegung ist:
-H={z∈H|ℜ(z)<x}
-=:H1(Links)˙∪{z∈H|ℜ(z)>x}
-=:H2(Rechts)
-Zu zeigen:∀A∈Hi,B∈Hjmiti,j∈{1,2}gilt:AB∩g̸=∅⇔i̸=j
-„⇐“:A∈H1,B∈H2:AB∩g̸=∅
-DadHstetig ist, folgt diese Richtung direkt. Alle Punkte in H1haben einen Abstand
-vonmder kleiner ist als rund alle Punkte in H2haben einen Abstand von mder
-größer ist als r. Da man jede Strecke von AnachBinsbesondere auch als stetige
-Abbildung f:R→R>0auffassen kann, greift der Zwischenwertsatz ⇒AB∩g̸=∅
-„⇒“:A∈Hi,B∈Hjmiti,j∈{1,2}:AB∩g̸=∅⇒i̸=j
-Seihdie Gerade, die durch AundBgeht.
-DaA,B /∈g, aberA,B∈hgilt, haben gundhinsbesondere mindestens einen
-unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich gundhin höchstens einen Punkt
-schneiden. Sei Cdieser Punkt.
-AusA,B /∈gfolgt:C̸=AundC̸=B. Also liegt CzwischenAundB. Daraus folgt,
-dassAundBbzgl.gin verschiedenen Halbebenen liegen.
+H = {z∈H |ℜ(z) <x }  
+=:H1 (Links)
+˙∪{z∈H |ℜ(z) >x }  
+=:H2 (Rechts)
+Zu zeigen:∀A∈Hi, B ∈Hj mit i,j ∈{1,2 }gilt: AB∩g̸= ∅⇔ i̸= j
+„⇐“:A∈H1,B ∈H2 : AB∩g̸= ∅
+Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte inH1 haben einen Abstand
+von m der kleiner ist alsr und alle Punkte inH2 haben einen Abstand vonm der
+größer ist alsr. Da man jede Strecke vonA nach B insbesondere auch als stetige
+Abbildung f : R →R>0 auffassen kann, greift der Zwischenwertsatz⇒AB∩g̸= ∅
+„⇒“:A∈Hi,B ∈Hj mit i,j ∈{1,2 }: AB∩g̸= ∅⇒ i̸= j
+Sei h die Gerade, die durchA und B geht.
+Da A,B /∈g, aber A,B ∈h gilt, haben g und h insbesondere mindestens einen
+unterschiedlichen Punkt. Aus §1 (i) folgt, dass sichg und h in höchstens einen Punkt
+schneiden. SeiC dieser Punkt.
+Aus A,B /∈g folgt: C ̸= A und C ̸= B. Also liegtC zwischen A und B. Daraus folgt,
+dass A und B bzgl. g in verschiedenen Halbebenen liegen.
 c) Siehe Abbildung 4.21.
-xy
-−5−4−3−2−1 0 1 2 3 4 5 6012345
+x
+y
+−5 −4 −3 −2 −1 0 1 2 3 4 5 6
+0
+1
+2
+3
+4
+5
 Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
  4.3. HYPERBOLISCHE GEOMETRIE
 Definition 64
-Es seiena,b,c,d∈Rmitad−bc̸= 0undσ:C→Ceine Abbildung definiert durch
-σ(z) :=az+b
-cz+d
-σheißtMöbiustransformation .
+Es seiena,b,c,d ∈R mit ad−bc̸= 0 und σ: C →C eine Abbildung definiert durch
+σ(z) := az+ b
+cz+ d
+σ heißtMöbiustransformation.
 Proposition 4.9
-a) Die Gruppe SL2(R)operiert auf Hdurch die Möbiustransformation
-σ(z) :=(a b
-c d)
-◦z:=az+b
-cz+d
-b) Die Gruppe PSL 2(R) = SL 2(R)/(±I)operiert durch σaufH.
-c)PSL 2(R)operiert auf R∪{∞}. Diese Gruppenoperation ist 3-fach transitiv, d. h.
-zux0<x 1<x∞∈Rgibt es genau ein σ∈PSL 2(R)mitσ(x0) = 0,σ(x1) = 1,
-σ(x∞) =∞.
-d)SL2(R)wird von den Matrizen
-(λ0
-0λ−1)
-
-=:Aλ,(1t
-0 1)
-
-=:Btund(0 1
-−1 0)
-
-=:Cmitt,λ∈R×
+a) Die GruppeSL2(R) operiert aufH durch die Möbiustransformation
+σ(z) :=
+(a b
+c d
+)
+◦z:= az+ b
+cz+ d
+b) Die GruppePSL2(R) = SL2(R)/(±I) operiert durchσ auf H.
+c) PSL2(R) operiert aufR∪{∞} . Diese Gruppenoperation ist 3-fach transitiv, d. h.
+zu x0 <x1 <x∞∈R gibt es genau einσ ∈PSL2(R) mit σ(x0) = 0, σ(x1) = 1,
+σ(x∞) = ∞.
+d) SL2(R) wird von den Matrizen
+(λ 0
+0 λ−1
+)
+  
+=:Aλ
+,
+(1 t
+0 1
+)
+  
+=:Bt
+und
+(0 1
+−1 0
+)
+  
+=:C
+mit t,λ ∈R×
 erzeugt.
-e)PSL 2(R)operiert auf G.
+e) PSL2(R) operiert aufG.
 Beweis:
-a) Seiz=x+iy∈H, d. h.y>0undσ=(a b
-c d)
+a) Sei z= x+ iy ∈H, d. h.y >0 und σ=
+(a b
+c d
+)
 ∈SL2(R)
-⇒σ(z) =a(x+iy) +b
-c(x+iy) +d
-=(ax+b) +iay
-(cx+d) +icy·(cx+d)−icy
-(cx+d)−icy
-=(ax+b)(cx+d) +aycy
-(cx+d)2+ (cy)2+iay(cx+d)−(ax+b)cy
-(cx+d)2+ (cy)2
-=axcx +axd+bcx+bd+aycy
-(cx+d)2+ (cy)2+i(ad−bc)y
-(cx+d)2+ (cy)2
-SL2(R)=ac(x2+y2) +adx+bcx+bd
-(cx+d)2+ (cy)2+iy
-(cx+d)2+ (cy)2
-⇒ℑ(σ(z)) =y
-(cx+d)2+(cy)2>0
-Die Abbildung bildet also nach Hab. Außerdem gilt:
+⇒σ(z) = a(x+ iy) + b
+c(x+ iy) + d
+= (ax+ b) + iay
+(cx+ d) + icy ·(cx+ d) −icy
+(cx+ d) −icy
+= (ax+ b)(cx+ d) + aycy
+(cx+ d)2 + (cy)2 + iay(cx+ d) −(ax+ b)cy
+(cx+ d)2 + (cy)2
+= axcx+ axd+ bcx+ bd+ aycy
+(cx+ d)2 + (cy)2 + i (ad−bc)y
+(cx+ d)2 + (cy)2
+SL2(R)
+= ac(x2 + y2) + adx+ bcx+ bd
+(cx+ d)2 + (cy)2 + i y
+(cx+ d)2 + (cy)2
+⇒ℑ(σ(z)) = y
+(cx+d)2+(cy)2 >0
+Die Abbildung bildet also nachH ab. Außerdem gilt:
 (1 0
-0 1)
-◦z=x+iy
-1=x+iy=z
+0 1
+)
+◦z= x+ iy
+1 = x+ iy = z
  4.3. HYPERBOLISCHE GEOMETRIE
 und
 (a b
-c d)
-◦((a′b′
-c′d′)
-◦z)
-=(a b
-c d)
-◦a′z+b′
-c′z+d′
-=aa′z+b′
-c′z+d′+b
+c d
+)
+◦
+((a′ b′
+c′ d′
+)
+◦z
+)
+=
+(a b
+c d
+)
+◦a′z+ b′
+c′z+ d′
+=
+aa′z+b′
+c′z+d′ + b
 ca′z+b′
-c′z+d′+d
-=a(a′z+b′)+b(c′z+d′)
+c′z+d′ + d
+=
+a(a′z+b′)+b(c′z+d′)
 c′z+d′
 c(a′z+b′)+d(c′z+d′)
 c′z+d′
-=a(a′z+b′) +b(c′z+d′)
-c(a′z+b′) +d(c′z+d′)
-=(aa′+bc′)z+ab′+bd′
-(ca′+db′)z+cb′+dd′
-=(aa′+bc′ab′+bd′
-ca′+db′cb′+dd′)
+= a(a′z+ b′) + b(c′z+ d′)
+c(a′z+ b′) + d(c′z+ d′)
+= (aa′+ bc′)z+ ab′+ bd′
+(ca′+ db′)z+ cb′+ dd′
+=
+(aa′+ bc′ ab′+ bd′
+ca′+ db′ cb′+ dd′
+)
 ◦z
-=((a b
-c d)
-·(a′b′
-c′d′))
+=
+((a b
+c d
+)
+·
+(a′ b′
+c′ d′
+))
 ◦z
-b) Es giltσ(z) = (−σ)(z)für alleσ∈SL2(R)undz∈H.
-c) Ansatz: σ=(a b
-c d)
-σ(x0) =ax0+b
-cx0+d!= 0⇒ax0+b= 0⇒b=−ax0
-σ(x∞) =∞⇒cx∞+d= 0⇒d=−cx∞
-σ(x1) = 1⇒ax1+b=cx1+d
-a(x1−x0) =c(x1−x∞)⇒c=ax1−x0
+b) Es giltσ(z) = (−σ)(z) für alleσ∈SL2(R) und z∈H.
+c) Ansatz: σ=
+(a b
+c d
+)
+σ(x0) = ax0+b
+cx0+d
+!= 0 ⇒ax0 + b= 0 ⇒b= −ax0
+σ(x∞) = ∞⇒ cx∞+ d= 0 ⇒d= −cx∞
+σ(x1) = 1 ⇒ax1 + b= cx1 + d
+a(x1 −x0) = c(x1 −x∞) ⇒c= ax1−x0
 x1−x∞
-⇒−a2·x∞x1−x0
-x1−x∞+a2x0x1−x0
-x1−x∞= 1
-⇒a2x1−x0
-x0−x∞(x0−x∞) = 1⇒a2=x1−x∞
+⇒−a2 ·x∞x1−x0
+x1−x∞ + a2x0 x1−x0
+x1−x∞ = 1
+⇒a2 x1−x0
+x0−x∞(x0 −x∞) = 1 ⇒a2 = x1−x∞
 (x1−x∞)(x1−x0)
 d) Es gilt:
 A−1
-λ=A1
+λ = A1
 λ
 B−1
-t=B−t
-C−1=C3
-Daher genügt es zu zeigen, dass man mit Aλ,BtundCalle Matrizen aus SL2(R)
+t = B−t
+C−1 = C3
+Daher genügt es zu zeigen, dass man mitAλ, Bt und C alle Matrizen ausSL2(R)
 erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
-Matrizen der Form Aλ,BtundCdie Einheitsmatrix zu generieren.
+Matrizen der FormAλ, Bt und C die Einheitsmatrix zu generieren.
 Sei also
-M=(a b
-c d)
+M =
+(a b
+c d
+)
 ∈SL2(R)
 beliebig.
 Fall 1:a= 0
-DaM∈SL2(R)ist, gilt detM= 1 =ad−bc=−bc. Daher ist insbesondere c̸= 0. Es
+Da M ∈SL2(R) ist, giltdet M = 1 = ad−bc= −bc. Daher ist insbesonderec̸= 0. Es
 folgt:
 (0 1
-−1 0)
-·(a b
-c d)
-=(c d
-−a−b)
+−1 0
+)
+·
+(a b
+c d
+)
+=
+( c d
+−a −b
+)
  4.3. HYPERBOLISCHE GEOMETRIE
 Gehe zu Fall 2.
 Fall 2:a̸= 0
-Nun wird in MdurchM·A1
-aan der Stelle von aeine1erzeugt:
+Nun wird inM durch M ·A1
+a
+an der Stelle vona eine 1 erzeugt:
 (a b
-c d)
-·(1
-a0
-0a)
-=(1ab
+c d
+)
+·
+(1
+a 0
+0 a
+)
+=
+(1 ab
 c
-aad)
+a ad
+)
 Gehe zu Fall 3.
 Fall 3:a= 1
-(1b
-c d)
-·(1−b
-0 1)
-=(1 0
-c d−bc)
-Da wir detM= 1 =ad−bc=d−bcwissen, gilt sogar M2,2= 1.
+(1 b
+c d
+)
+·
+(1 −b
+0 1
+)
+=
+(1 0
+c d −bc
+)
+Da wirdet M = 1 = ad−bc= d−bc wissen, gilt sogarM2,2 = 1.
 Gehe zu Fall 4.
-Fall 4:a= 1,b= 0,d= 1
-A−1CBcC(1 0
-c1)
-=(1 0
-0 1)
-Daher erzeugen Matrizen der Form Aλ,BtundCdie Gruppe SL2R.■
+Fall 4:a= 1, b= 0, d= 1
+A−1CBcC
+(1 0
+c 1
+)
+=
+(1 0
+0 1
+)
+Daher erzeugen Matrizen der FormAλ, Bt und C die GruppeSL2R. ■
 e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
-•σ=(λ0
-0λ−1)
-, alsoσ(z) =λ2z. Daraus ergeben sich die Situationen, die in
+•σ =
+(λ 0
+0 λ−1
+)
+, also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in
 Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
-xy
-−1 0 1 2 3 4 5 6 70123
-mλ2mm+irλ2m+iλ2r
+x
+y
+−1 0 1 2 3 4 5 6 70
+1
+2
+3
+m λ2m
+m+ir
+λ2m+iλ2r
 m+ 1
-(a) Fall 1xy
-−1 0 1 2 3 40123
+(a) Fall 1
+x
+y
+−1 0 1 2 3 4
+0
+1
+2
+3
 z
-xλ2z
+x
+λ2z
 λ2x
 (b) Fall 2 (Strahlensatz)
 Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
-•Offensichtlich gilt die Aussage für σ=(1a
-0 1)
-•Sei nunσ=(0 1
-−1 0)
-, alsoσ(z) =−1
+•Offensichtlich gilt die Aussage fürσ=
+(1 a
+0 1
+)
+•Sei nunσ=
+(0 1
+−1 0
+)
+, alsoσ(z) = −1
 z
 Bemerkung 69
-Zu hyperbolischen Geraden g1,g2gibt esσ∈PSL 2(R)mitσ(g1) =g2.
+Zu hyperbolischen Geradeng1,g2 gibt esσ∈PSL2(R) mit σ(g1) = g2.
  4.3. HYPERBOLISCHE GEOMETRIE
 ·
-xy
-−1 0 101z=r·eiϕ
+x
+y
+−1 0 1
+0
+1
+z= r·eiϕ
 1
-z=1
-r·eiϕ
+z = 1
+r ·eiϕ
 Abbildung 4.23: Inversion am Kreis
-Beweis: Nach Proposition 4.9 (c) gibt es σmitσ(a1) =b1undσ(a2) =b2. Dann existiert
-σ(g1) :=g2wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
+Beweis: Nach Proposition 4.9 (c) gibt esσ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert
+σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
 Definition 65
-Seienz1,z2,z3,z4∈Cpaarweise verschieden.
+Seien z1,z2,z3,z4 ∈C paarweise verschieden.
 Dann heißt
-DV(z1,z2,z3,z4) :=z1−z4
+DV(z1,z2,z3,z4) :=
+z1−z4
 z1−z2
 z3−z4
-z3−z2=(z1−z4)·(z3−z2)
-(z1−z2)·(z3−z4)
-Doppelverhältnis vonz1,...,z 4.
+z3−z2
+= (z1 −z4) ·(z3 −z2)
+(z1 −z2) ·(z3 −z4)
+Doppelverhältnis von z1,...,z 4.
 Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
-a)DV(z1,...,z 4)∈C\{0,1}
-b)DV(z1,z4,z3,z2) =1
+a) DV(z1,...,z 4) ∈C \{0,1 }
+b) DV(z1,z4,z3,z2) = 1
 DV(z1,z2,z3,z4)
-c)DV(z3,z2,z1,z4) =1
+c) DV(z3,z2,z1,z4) = 1
 DV(z1,z2,z3,z4)
-d)DVist auch wohldefiniert, wenn eines der zi=∞oder wenn zwei der zigleich sind.
-e)DV(0,1,∞,z4) =z4(Der Fallz4∈{0,1,∞}ist zugelassen).
-f) Fürσ∈PSL 2(C)undz1,...,z 4∈C∪{∞}ist
+d) DV ist auch wohldefiniert, wenn eines derzi = ∞oder wenn zwei derzi gleich sind.
+e) DV(0,1,∞,z4) = z4 (Der Fallz4 ∈{0,1,∞} ist zugelassen).
+f) Für σ∈PSL2(C) und z1,...,z 4 ∈C ∪{∞} ist
 DV(σ(z1),σ(z2),σ(z3),σ(z4)) = DV(z1,z2,z3,z4)
-und fürσ(z) =1
-zgilt
+und fürσ(z) = 1
+z gilt
 DV(σ(z1),σ(z2),σ(z3),σ(z4)) = DV(z1,z2,z3,z4)
-g)DV(z1,z2,z3,z4)∈R∪{∞}⇔ z1,...,z 4liegen auf einer hyperbolischen Geraden.
+g) DV(z1,z2,z3,z4) ∈R ∪{∞}⇔ z1,...,z 4 liegen auf einer hyperbolischen Geraden.
 Beweis:
-a)DV(z1,...,z 4)̸= 0, dazipaarweise verschieden
-DV(z1,...,z 4)̸= 1, da:
+a) DV(z1,...,z 4) ̸= 0, dazi paarweise verschieden
+DV(z1,...,z 4) ̸= 1, da:
 Annahme: DV(z1,...,z 4) = 1
-⇔(z1−z2)(z3−z4) = (z1−z4)(z3−z2)
+⇔(z1 −z2)(z3 −z4) = (z1 −z4)(z3 −z2)
  4.3. HYPERBOLISCHE GEOMETRIE
-⇔z1z3−z2z3−z1z4+z2z4=z1z3−z3z4−z1z2+z2z4
-⇔z2z3+z1z4=z3z4+z1z2
-⇔z2z3−z3z4=z1z2−z1z4
-⇔z3(z2−z4) =z1(z2−z4)
-⇔z3=z1oderz2=z4
-Allezisind paarweise verschieden ⇒Widerspruch ■
-b)DV(z1,z4,z3,z2) =(z1−z2)·(z3−z4)
-(z1−z4)·(z3−z2)=1
+⇔z1z3 −z2z3 −z1z4 + z2z4 = z1z3 −z3z4 −z1z2 + z2z4
+⇔z2z3 + z1z4 = z3z4 + z1z2
+⇔z2z3 −z3z4 = z1z2 −z1z4
+⇔z3(z2 −z4) = z1(z2 −z4)
+⇔z3 = z1 oder z2 = z4
+Alle zi sind paarweise verschieden⇒Widerspruch ■
+b) DV(z1,z4,z3,z2) = (z1−z2)·(z3−z4)
+(z1−z4)·(z3−z2) = 1
 DV(z1,z2,z3,z4)
-c)DV(z3,z2,z1,z4) =(z3−z4)·(z1−z2)
-(z3−z2)·(z1−z4)=1
+c) DV(z3,z2,z1,z4) = (z3−z4)·(z1−z2)
+(z3−z2)·(z1−z4) = 1
 DV(z1,z2,z3,z4)
-d) Zwei der zidürfen gleich sein, da:
-Fall 1z1=z4oderz3=z2
-In diesem Fall ist DV(z1,...,z 4) = 0
-Fall 2z1=z2oderz3=z4
-Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1,...,z 4) =∞gilt.
-Fall 3z1=z3oderz2=z4
-Durch Einsetzen ergibt sich DV(z1,...,z 4) = 1.
-Im Fall, dass ein zi=∞ist, ist entweder DV(0,1,∞,z4) = 0oder DV(0,1,∞,z4)±∞
-e)DV(0,1,∞,z4) =(0−z4)·(∞−1)
-(0−1)·(∞−z4)=z4·(∞−1)
-∞−z4=z4
+d) Zwei derzi dürfen gleich sein, da:
+Fall 1 z1 = z4 oder z3 = z2
+In diesem Fall istDV(z1,...,z 4) = 0
+Fall 2 z1 = z2 oder z3 = z4
+Mit der Regel von L’Hospital folgt, dass in diesem FallDV(z1,...,z 4) = ∞gilt.
+Fall 3 z1 = z3 oder z2 = z4
+Durch Einsetzen ergibt sichDV(z1,...,z 4) = 1.
+Im Fall, dass einzi = ∞ist, ist entwederDV(0,1,∞,z4) = 0 oder DV(0,1,∞,z4) ±∞
+e) DV(0,1,∞,z4) = (0−z4)·(∞−1)
+(0−1)·(∞−z4) = z4·(∞−1)
+∞−z4
+= z4
 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-g)Seiσ∈PSL 2(C)mitσ(z1) = 0,σ(z2) = 1,σ(z3) =∞. Ein solches σexistiert, da man
-drei Parameter von σwählen darf.
+g) Sei σ∈PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solchesσ existiert, da man
+drei Parameter vonσ wählen darf.
 Bem. 70.f⇒ DV(z1,...,z 4) = DV(0,1,∞,σ(z4))
-⇒ DV(z1,...,z 4)∈R∪{∞}
-⇔σ(z4)∈R∪{∞}
-Behauptung folgt, weil σ−1(R∪∞)ein Kreis oder eine Gerade in Cist.
+⇒ DV(z1,...,z 4) ∈R ∪{∞}
+⇔σ(z4) ∈R ∪{∞}
+Behauptung folgt, weilσ−1(R ∪∞) ein Kreis oder eine Gerade inC ist.
 Definition 66
-Fürz1,z2∈Hseigz1,z2die eindeutige hyperbolische Gerade durch z1undz2unda1,a2die
-„Schnittpunkte“ von gz1,z2mitR∪{∞}.
-Dann seidH(z1,z2) :=1
-2|ln DV(a1,z1,a2,z2)|und heiße hyperbolische Metrik .
-Beh.:Fürz1,z2∈Hseigz1,z2die eindeutige hyperbolische Gerade durch z1undz2unda1,a2
-die „Schnittpunkte“ von gz1,z2mitR∪{∞}.
+Für z1,z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durchz1 und z2 und a1,a2 die
+„Schnittpunkte“ vongz1,z2 mit R ∪{∞} .
+Dann seidH(z1,z2) := 1
+2 |ln DV(a1,z1,a2,z2)|und heißehyperbolische Metrik.
+Beh.: Für z1,z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durchz1 und z2 und a1,a2
+die „Schnittpunkte“ vongz1,z2 mit R ∪{∞} .
 Dann gilt:
 1
-2|ln DV(a1,z1,a2,z2)|=1
+2|ln DV(a1,z1,a2,z2)|= 1
 2|ln DV(a2,z1,a1,z2)|
 Beweis: Wegen Bemerkung 70.c gilt:
-DV(a1,z1,a2,z2) =1
+DV(a1,z1,a2,z2) = 1
 DV(a2,z1,a1,z2)
 Außerdem gilt:
-ln1
-x= lnx−1= (−1)·lnx=−lnx
+ln 1
+x = ln x−1 = (−1) ·ln x= −ln x
  4.3. HYPERBOLISCHE GEOMETRIE
-Da der lnim Betrag steht, folgt direkt:
+Da derln im Betrag steht, folgt direkt:
 1
-2|ln DV(a1,z1,a2,z2)|=1
+2|ln DV(a1,z1,a2,z2)|= 1
 2|ln DV(a2,z1,a1,z2)|
-Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelverhältnis
+Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit derx-Achse im Doppelverhältnis
  genutzt werden. ■
-Beh.:Die hyperbolische Metrik ist eine Metrik auf H.
+Beh.: Die hyperbolische Metrik ist eine Metrik aufH.
 Beweis: Wegen Bemerkung 70.f ist
-d(z1,z2) :=d(σ(z1),σ(z2))mitσ(a1) = 0, σ(a2) =∞
-d. h.σ(gz1,z2) =iR(imaginäre Achse).
-also gilt o. B. d. A. z1=iaundz2=ibmita,b∈Runda<b.
-2d(ia,ib ) =|ln DV(0,ia,∞,ib)|
-=|ln(0−ib)(∞−ia)
-(0−ia)(∞−ib)|
-=|lnb
-a|
-= lnb−lna
-Also:d(z1,z2)≥0,d(z1,z2) = 0⇔z1=z2
-2d(z2,z1) =|ln DV(a2,z2,a1,z1)|
-=|ln DV(∞,ib,0,ia)|
-Bem. 70.b=|ln DV(0,ib,∞,ia)|
+d(z1,z2) := d(σ(z1),σ(z2)) mit σ(a1) = 0, σ(a2) = ∞
+d. h.σ(gz1,z2 ) = iR (imaginäre Achse).
+also gilt o. B. d. A.z1 = ia und z2 = ib mit a,b ∈R und a<b .
+2d(ia,ib) =|ln DV(0,ia, ∞,ib) |
+=|ln (0 −ib)(∞−ia)
+(0 −ia)(∞−ib) |
+=|ln b
+a |
+= ln b−ln a
+Also: d(z1,z2) ≥0, d(z1,z2) = 0 ⇔z1 = z2
+2d(z2,z1) =|ln DV(a2,z2,a1,z1) |
+=|ln DV(∞,ib, 0,ia) |
+Bem. 70.b= |ln DV(0,ib, ∞,ia) |
 = 2d(z1,z2)
-Liegen drei Punkte z1,z2,z3∈Cauf einer hyperbolischen Geraden, so gilt d(z1,z3) =
-d(z1,z2) +d(z2,z3)(wennz2zwischenz1undz3liegt).
+Liegen drei Punkte z1,z2,z3 ∈C auf einer hyperbolischen Geraden, so giltd(z1,z3) =
+d(z1,z2) + d(z2,z3) (wenn z2 zwischen z1 und z3 liegt).
 Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die
 Vorlesung „Hyperbolische Geometrie“ verwiesen.
 Satz 4.10
-Die hyperbolische Ebene Hmit der hyperbolischen Metrik dund den hyperbolischen
+Die hyperbolische EbeneH mit der hyperbolischen Metrikd und den hyperbolischen
 Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt,
 aber Axiom §5 ist verletzt.
  4.3. HYPERBOLISCHE GEOMETRIE
 Übungsaufgaben
 Aufgabe 8
-Seien (X,d)eine absolute Ebene und P,Q,R∈XPunkte. Der Scheitelwinkel des Winkels
-∠PQRist der Winkel, der aus den Halbgeraden QP−undQR−gebildet wird. Die
-Nebenwinkel von∠PQRsind die von QP+undQR−bzw.QP−undQR+gebildeten
+Seien (X,d) eine absolute Ebene undP,Q,R ∈X Punkte. DerScheitelwinkel des Winkels
+∠PQR ist der Winkel, der aus den Halbgeraden QP− und QR− gebildet wird. Die
+Nebenwinkel von ∠PQR sind die vonQP+ und QR− bzw. QP− und QR+ gebildeten
 Winkel.
 Zeigen Sie:
-(a) Die beiden Nebenwinkel von ∠PQRsind gleich.
-(b) Der Winkel∠PQRist gleich seinem Scheitelwinkel.
+(a) Die beiden Nebenwinkel von∠PQR sind gleich.
+(b) Der Winkel∠PQR ist gleich seinem Scheitelwinkel.
 Aufgabe 9
-Sei(X,d)eine absolute Ebene. Der Abstand eines Punktes Pzu einer Menge Y⊆Xvon
-Punkten ist definiert durch d(P,Y) := infd(P,y)|y∈Y.
+Sei (X,d) eine absolute Ebene. DerAbstand eines PunktesP zu einer MengeY ⊆X von
+Punkten ist definiert durchd(P,Y ) := inf d(P,y)|y∈Y.
 Zeigen Sie:
-(a)Ist△ABCein Dreieck, in dem die Seiten ABundACkongruent sind, so sind die
-Winkel∠ABCund∠BCAgleich.
-(b)Ist△ABCein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
+(a) Ist △ABC ein Dreieck, in dem die SeitenAB und AC kongruent sind, so sind die
+Winkel ∠ABC und ∠BCA gleich.
+(b) Ist △ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
 gegenüber und umgekehrt.
-(c)Sindgeine Gerade und P /∈gein Punkt, so gibt es eine eindeutige Gerade hmit
-P∈hund diegim rechten Winkel schneidet. Diese Grade heißt LotvonPaufg
-und der Schnittpunkt des Lots mit gheißt Lotfußpunkt .
+(c) Sind g eine Gerade undP /∈g ein Punkt, so gibt es eine eindeutige Geradeh mit
+P ∈h und dieg im rechten Winkel schneidet. Diese Grade heißtLot von P auf g
+und der Schnittpunkt des Lots mitg heißtLotfußpunkt.
 Aufgabe 10
-Seienf,g,h∈Gund paarweise verschieden.
-Zeigen Sie: f∥g∧g∥h⇒f∥h
+Seien f,g,h ∈G und paarweise verschieden.
+Zeigen Sie:f ∥g∧g∥h⇒f ∥h
 Aufgabe 11
-Beweise den Kongruenzsatz SSS.
+Beweise den KongruenzsatzSSS.
 5 Krümmung
 Definition 67
-Seif: [a,b]→Rneine eine Funktion aus C∞. Dann heißt fKurve.
+Sei f : [a,b] →Rn eine eine Funktion ausC∞. Dann heißtf Kurve.
 5.1 Krümmung von Kurven
 Definition 68
-Seiγ:I= [a,b]→Rneine Kurve.
-a) Die Kurve γheißtdurch Bogenlänge parametrisiert , wenn gilt:
-∥γ′(t)∥2= 1∀t∈I
+Sei γ : I = [a,b] →Rn eine Kurve.
+a) Die Kurveγ heißtdurch Bogenlänge parametrisiert, wenn gilt:
+∥γ′(t)∥2 = 1 ∀t∈I
 Dabei istγ′(t) = (γ′
 1(t),γ′
-2(t),...,γ′
+2(t),...,γ ′
 n(t)).
-b)l(γ) =∫b
-a∥γ′(t)∥dtheißtLänge von γ.
+b) l(γ) =
+∫b
+a ∥γ′(t)∥dt heißtLänge vonγ.
 Bemerkung 71 (Eigenschaften von Kurven I)
-Seiγ:I= [a,b]→RneineC∞-Funktion.
-a) Istγdurch Bogenlänge parametrisiert, so ist l(γ) =b−a.
-b) Istγdurch Bogenlänge parametrisiert, so ist γ′(t)orthogonal zu γ′′(t)für allet∈I.
+Sei γ : I = [a,b] →Rn eine C∞-Funktion.
+a) Ist γ durch Bogenlänge parametrisiert, so istl(γ) = b−a.
+b) Ist γ durch Bogenlänge parametrisiert, so istγ′(t) orthogonal zuγ′′(t) für allet∈I.
 Beweis:
-a)l(γ) =∫b
-a∥γ′(t)∥dt=∫b
-a1dt=b−a.
-b)Im Folgenden wird die Aussage nur für γ: [a,b]→R2bewiesen. Allerdings funktioniert
-der Beweis im Rnanalog. Es muss nur die Ableitung angepasst werden.
-1 =∥γ′(t)∥=∥γ′(t)∥2=⟨γ′(t),γ′(t)⟩
-⇒0 =d
+a) l(γ) =
+∫b
+a ∥γ′(t)∥dt=
+∫b
+a 1dt= b−a.
+b) Im Folgenden wird die Aussage nur fürγ : [a,b] →R2 bewiesen. Allerdings funktioniert
+der Beweis imRn analog. Es muss nur die Ableitung angepasst werden.
+1 = ∥γ′(t)∥= ∥γ′(t)∥2 = ⟨γ′(t),γ′(t)⟩
+⇒0 = d
 dt⟨γ′(t),γ′(t)⟩
-=d
+= d
 dt(γ′
 1(t)γ′
-1(t) +γ′
+1(t) + γ′
 2(t)γ′
 2(t))
-= 2·(γ′′
-1(t)·γ′
-1(t) +γ′′
-2(t)·γ′
+= 2 ·(γ′′
+1 (t) ·γ′
+1(t) + γ′′
+2 (t) ·γ′
 2(t))
-= 2·⟨γ′′(t),γ′(t)⟩
+= 2 ·⟨γ′′(t),γ′(t)⟩
 Definition 69
-Seiγ:I→R2eine durch Bogenlänge parametrisierte Kurve.
-a) Fürt∈Isein(t)Normalenvektor anγintwenn gilt:
-⟨n(t),γ′(t)⟩= 0,∥n(t)∥= 1und det((γ′(t),n(t))) = +1
+Sei γ : I →R2 eine durch Bogenlänge parametrisierte Kurve.
+a) Für t∈I sei n(t) Normalenvektoran γ in t wenn gilt:
+⟨n(t),γ′(t)⟩= 0, ∥n(t)∥= 1 und det((γ′(t),n(t))) = +1
  5.1. KRÜMMUNG VON KURVEN
-b) Seitκ:I→Rso, dass gilt:
-γ′′(t) =κ(t)·n(t)
-Dann heißt κ(t)Krümmung vonγint.
-Dan(t)undγ′′(t)nach Bemerkung 71.b linear abhängig sind, existiert κ(t).
+b) Seit κ: I →R so, dass gilt:
+γ′′(t) = κ(t) ·n(t)
+Dann heißtκ(t) Krümmung von γ in t.
+Da n(t) und γ′′(t) nach Bemerkung 71.b linear abhängig sind, existiertκ(t).
 Beispiel 45
-Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt:
-γ(t) =(
-r·cost
-r,r·sint
-r)
-fürt∈[0,2πr]
+Gegeben sei ein Kreis mit Radiusr, d. h. mit Umfang2πr. Es gilt:
+γ(t) =
+(
+r·cos t
+r,r ·sin t
+r
+)
+für t∈[0,2πr]
 ist parametrisiert durch Bogenlänge, da gilt:
-γ′(t) =(
+γ′(t) =
+(
 (r·1
-r)(−sint
+r)(−sin t
 r),r1
-rcost
-r)
-=(
-−sint
-r,cost
-r)
-Der Normalenvektor von γintist
-n(t) =(
-−cost
-r,−sint
-r)
+rcos t
+r
+)
+=
+(
+−sin t
+r,cos t
+r
+)
+Der Normalenvektor vonγ in t ist
+n(t) =
+(
+−cos t
+r,−sin t
+r
+)
 da gilt:
-⟨n(t),γ′(t)⟩=⟨(−cost
+⟨n(t),γ′(t)⟩=
+⟨(−cos t
 r
-−sint
-r)
-,(−sint
+−sin t
 r
-cost
-r)⟩
-= (−cost
-r)·(−sint
-r) + (−sint
-r)·(cost
+)
+,
+(−sin t
+r
+cos t
+r
+)⟩
+= (−cos t
+r) ·(−sin t
+r) + (−sin t
+r) ·(cos t
 r)
 = 0
-∥n(t)∥=(−cost
-r,−sint
-r)
-= (−cost
-r)2+ (−sint
+∥n(t)∥=
+(−cos t
+r,−sin t
+r)
+
+= (−cos t
+r)2 + (−sin t
 r)2
 = 1
 det(γ′
-1(t),n(t)) =(−sint
-r−cost
+1(t),n(t)) =
+
+(−sin t
+r −cos t
 r
-cost
-r−sint
-r)
-= (−sint
-r)2−(−cost
-r)·cost
+cos t
+r −sin t
+r
+)
+= (−sin t
+r)2 −(−cos t
+r) ·cos t
 r
 = 1
-Die Krümmung ist für jedes tkonstant1
+Die Krümmung ist für jedest konstant 1
 r, da gilt:
-γ′′(t) =(
+γ′′(t) =
+(
 −1
-rcost
+rcos t
 r,−1
-rsint
-r)
-=1
-r·(
-−cost
-r,−sint
-r)
-⇒κ(t) =1
+rsin t
+r
+)
+= 1
+r ·
+(
+−cos t
+r,−sin t
+r
+)
+⇒κ(t) = 1
 r
  5.2. TANGENTIALEBENE
 Definition 70
-Seiγ:I→R3eine durch Bogenlänge parametrisierte Kurve.
-a) Fürt∈Iheißtκ(t) :=∥γ′′(t)∥dieKrümmung vonγint.
-b) Ist fürt∈Idie Ableitung γ′′(t)̸= 0, so heißtγ′′(t)
-∥γ′′(t)∥Normalenvektor anγint.
-c)b(t)sei ein Vektor, der γ′(t),n(t)zu einer orientierten Orthonormalbasis von R3ergänzt.
+Sei γ : I →R3 eine durch Bogenlänge parametrisierte Kurve.
+a) Für t∈I heißtκ(t) := ∥γ′′(t)∥die Krümmung von γ in t.
+b) Ist fürt∈I die Ableitungγ′′(t) ̸= 0, so heißtγ′′(t)
+∥γ′′(t)∥ Normalenvektoran γ in t.
+c) b(t) sei ein Vektor, derγ′(t),n(t) zu einer orientierten Orthonormalbasis vonR3 ergänzt.
 Also gilt:
 det(γ′(t),n(t),b(t)) = 1
-b(t)heißtBinormalenvektor , die Orthonormalbasis
+b(t) heißtBinormalenvektor, die Orthonormalbasis
 {
-γ′(t),n(t),b(t)}
-heißtbegleitendes Dreibein .
+γ′(t),n(t),b(t)
+}
+heißtbegleitendes Dreibein.
 Bemerkung 72 (Eigenschaften von Kurven II)
-Seiγ:I→R3durch Bogenlänge parametrisierte Kurve.
-a)n(t)ist orthogonal zu γ′(t).
-b)b(t)aus Definition 70.c ist eindeutig.
+Sei γ : I →R3 durch Bogenlänge parametrisierte Kurve.
+a) n(t) ist orthogonal zuγ′(t).
+b) b(t) aus Definition 70.c ist eindeutig.
 5.2 Tangentialebene
 Erinnerung Sie sich an Definition 32 „reguläre Fläche“.
-Äquivalent dazu ist: Sist lokal von der Form
-V(f) ={
-x∈R3⏐⏐f(x) = 0}
-für eineC∞-Funktionf:R3→R.
+Äquivalent dazu ist:S ist lokal von der Form
+V(f) =
+{
+x∈R3 ⏐⏐f(x) = 0
+}
+für eineC∞-Funktionf : R3 →R.
 Definition 71
-SeiS⊆R3eine reguläre Fläche, s∈S,F:U→V∩Seine lokale Parametrisierung um
+Sei S ⊆R3 eine reguläre Fläche,s∈S, F : U →V ∩S eine lokale Parametrisierung um
 s∈V:
-(u,v)↦→(x(u,v),y(u,v),z(u,v))
-Fürp=F−1(s)∈Usei
-JF(p) =
-∂x
-∂u(p)∂x
+(u,v) ↦→(x(u,v),y(u,v),z(u,v))
+Für p= F−1(s) ∈U sei
+JF(p) =
+
+
+∂x
+∂u(p) ∂x
 ∂v(p)
 ∂y
-∂u(p)∂y
+∂u(p) ∂y
 ∂v(p)
 ∂z
-∂u(p)∂z
-∂v(p)
+∂u(p) ∂z
+∂v(p)
+
 
-undDpF:R2→R3die durchJF(p)definierte lineare Abbildung.
-Dann heißt TsS:= Bild(DpF)dieTangentialebene ans∈S.
+und DpF : R2 →R3 die durchJF(p) definierte lineare Abbildung.
+Dann heißtTsS := Bild(DpF) die Tangentialebenean s∈S.
 Bemerkung 73 (Eigenschaften der Tangentialebene)
-a)TsSist2-dimensionaler Untervektorraum von R3.
-b)TsS=⟨˜u,˜v⟩, wobei ˜u,˜vdie Spaltenvektoren der Jacobi-Matrix JF(p)sind.
-c)TsShängt nicht von der gewählten Parametrisierung ab.
+a) TsS ist 2-dimensionaler Untervektorraum vonR3.
+b) TsS = ⟨˜u,˜v⟩, wobei˜u,˜v die Spaltenvektoren der Jacobi-MatrixJF(p) sind.
+c) TsS hängt nicht von der gewählten Parametrisierung ab.
  5.2. TANGENTIALEBENE
-d)SeiS=V(f)eine reguläre Fläche in R3, alsof:V→ReineC∞-Funktion,V⊆R3
-offen, grad(f)(x)̸= 0für allex∈S.
-Dann istTsS= (grad(f)(s))⊥für jedess∈S.
+d) Sei S = V(f) eine reguläre Fläche inR3, alsof : V →R eine C∞-Funktion,V ⊆R3
+offen, grad(f)(x) ̸= 0 für allex∈S.
+Dann istTsS = (grad(f)(s))⊥für jedess∈S.
 Beweis:
-a)JFist eine 3×2-Matrix, die mit einem 2×1-Vektor multipliziert wird. Das ist
+a) JF ist eine 3 ×2-Matrix, die mit einem 2 ×1-Vektor multipliziert wird. Das ist
 eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein
-Vektorraum ist. Da Rg(JF) = 2, ist auch dim(TsS) = 2.
+Vektorraum ist. DaRg(JF) = 2, ist auchdim(TsS) = 2.
 b) Hier kann man wie in Punkt a) argumentieren
-c)TsS={x∈R3|∃parametrisierte Kurve γ: [−ε,+ε]→Sfür einε >0mitγ(0) =
-sundγ′(0) =x}
+c) TsS = {x ∈R3|∃parametrisierte Kurveγ : [−ε,+ε] →S für einε >0 mit γ(0) =
+s und γ′(0) = x}
 Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-d)Seix∈TsS,γ: [−ε,+ε]→Seine parametrisierte Kurve mit ε >0undγ′(0) =s,
-sodassγ′(0) =xgilt. Daγ(t)∈Sfür allet∈[−ε,ε], istf◦γ= 0
-⇒0 = (f◦γ)′(0) =⟨grad(f)(γ(0)),γ′(0)⟩
-⇒TsS⊆grad(f)(s)⊥
-dim=2= = = =⇒TsS= (grad(f)(s))⊥
+d) Sei x ∈TsS,γ : [−ε,+ε] →S eine parametrisierte Kurve mitε >0 und γ′(0) = s,
+sodass γ′(0) = x gilt. Daγ(t) ∈S für allet∈[−ε,ε], istf ◦γ = 0
+⇒0 = (f ◦γ)′(0) = ⟨grad(f)(γ(0)),γ′(0)⟩
+⇒TsS ⊆grad(f)(s)⊥
+dim=2= = = =⇒TsS = (grad(f)(s))⊥
 Definition 72
-a)EinNormalenfeld auf der regulären Fläche S⊆R3ist eine Abbildung n:S→S2⊆
-R3mitn(s)∈TsS⊥für jedess∈S.
-b)Sheißtorientierbar , wenn es ein stetiges Normalenfeld auf Sgibt.
-Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden.
+a) Ein Normalenfeld auf der regulären FlächeS ⊆R3 ist eine Abbildungn: S →S2 ⊆
+R3 mit n(s) ∈TsS⊥für jedess∈S.
+b) S heißtorientierbar, wenn es ein stetiges Normalenfeld aufS gibt.
+Manchmal wird zwischen einemNormalenfeld und einemEinheitsnormalenfeld unterschieden.
 Im Folgenden werden diese Begriffe jedoch synonym benutzt.
 Bemerkung 74 (Eigenschaften von Normalenfeldern)
-a) Ein Normalenfeld auf Sist genau dann stetig, wenn es glatt ist (also C∞).
-b)Zu jedems∈Sgibt es eine Umgebung V⊆R3vonsund eine lokale Parametrisierung
-F:U→VvonSums, sodass auf F(U) =V∩Sein stetiges Normalenfeld existiert.
-c)Sist genau dann orientierbar, wenn es einen differenzierbaren Atlas von Saus lokalen
-Parametrisierungen Fi:Ui→Vi, i∈Igibt, sodass für alle i,j∈Fund alle
-s∈Vi∩Vj∩Sgilt:
-det(DsVi→Vj
-Fj◦F−1
-i
-∈R3×3)>0
+a) Ein Normalenfeld aufS ist genau dann stetig, wenn es glatt ist (alsoC∞).
+b) Zu jedems∈S gibt es eine UmgebungV ⊆R3 von sund eine lokale Parametrisierung
+F : U →V von S um s, sodass aufF(U) = V ∩S ein stetiges Normalenfeld existiert.
+c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas vonS aus lokalen
+Parametrisierungen Fi : Ui → Vi, i ∈ I gibt, sodass für alle i,j ∈ F und alle
+s∈Vi ∩Vj ∩S gilt:
+det(Ds
+Vi→Vj
+  
+Fj ◦F−1
+i  
+∈R3×3
+) >0
 Beweis: Wird hier nicht geführt.
 Beispiel 46 (Normalenfelder)
-1)S=S2,n1=idS2ist ein stetiges Normalenfeld.
-Auchn2=−idS2ist ein stetiges Normalenfeld.
-2)S=Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld,
+1) S = S2, n1 = idS2 ist ein stetiges Normalenfeld.
+Auch n2 = −idS2 ist ein stetiges Normalenfeld.
+2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld,
  aber kein stetiges Normalenfeld.
  5.3. GAUSS-KRÜMMUNG
 Abbildung 5.1: Möbiusband
 5.3 Gauß-Krümmung
 Bemerkung 75
-SeiSeine reguläre Fläche, s∈S,n(s)ist ein Normalenvektor in s,x∈TsS,∥x∥= 1.
-SeiEder vonxundn(s)aufgespannte 2-dimensionale Untervektorraum von R3.
-Dann gibt es eine Umgebung V⊆R3vons, sodass
-C:= (s+E)∩S∩V
-das Bild einer durch Bogenlänge parametrisierten Kurve γ: [−ε,ε]→Senthält mit γ(0) =s
-undγ′(0) =x.
+Sei S eine reguläre Fläche,s∈S, n(s) ist ein Normalenvektor ins, x∈TsS, ∥x∥= 1.
+Sei E der vonx und n(s) aufgespannte 2-dimensionale Untervektorraum vonR3.
+Dann gibt es eine UmgebungV ⊆R3 von s, sodass
+C := (s+ E) ∩S∩V
+das Bild einer durch Bogenlänge parametrisierten Kurveγ : [−ε,ε] →S enthält mitγ(0) = s
+und γ′(0) = x.
 Beweis: „Satz über implizite Funktionen“1
 Definition 73
-In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0)der Kurveγin der Ebene
-(s+E)im PunktsdieNormalkrümmung vonSinsin Richtung x=γ′(0).
-Man schreibt: κNor(s,x) :=κγ(0)
+In der Situation aus Bemerkung 75 heißt die Krümmungκγ(0) der Kurveγ in der Ebene
+(s+ E) im Punkts die Normalkrümmung von S in s in Richtungx= γ′(0).
+Man schreibt:κNor(s,x) := κγ(0)
 Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt.
 Beispiel 47 (Gauß-Krümmung)
-1)S=S2=V(X2+Y2+Z2−1)ist die Kugel um den Ursprung mit Radius 1, n=id,
-s= (0,0,1),x= (1,0,0)
-⇒E=R·x+R·n(s)(x,z-Ebene)
-C=E∩Sist Kreislinie
-κNor(s,x) =1
-r= 1
-2)S=V(X2+Z2−1)⊆R3ist ein Zylinder (siehe Abbildung 5.2a). s= (1,0,0)
-x1= (0,1,0)⇒E1=R·e1+R·e2(x,y-Ebene)
-S∩E1=V(X2+Y2−1)∩E, Kreislinie in E
-⇒κNor(s,x1) =±1
-x2= (0,0,1),E2=R·e1+R·e3(x,z-Ebene)
-1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II
+1) S = S2 = V(X2 + Y2 + Z2 −1) ist die Kugel um den Ursprung mit Radius 1,n= id,
+s= (0,0,1), x= (1,0,0)
+⇒E = R ·x+ R ·n(s) (x,z-Ebene)
+C = E∩S ist Kreislinie
+κNor(s,x) = 1
+r = 1
+2) S = V(X2 + Z2 −1) ⊆R3 ist ein Zylinder (siehe Abbildung 5.2a).s= (1,0,0)
+x1 = (0,1,0) ⇒E1 = R ·e1 + R ·e2 (x,y-Ebene)
+S∩E1 = V(X2 + Y2 −1) ∩E, Kreislinie inE
+⇒κNor(s,x1) = ±1
+x2 = (0,0,1),E2 = R ·e1 + R ·e3 (x,z-Ebene)
+1Siehe z. B.https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II
  5.3. GAUSS-KRÜMMUNG
-V∩E2∩S={
-(1,0,z)∈R3⏐⏐z∈R}
+V ∩E2 ∩S =
+{
+(1,0,z) ∈R3 ⏐⏐z∈R
+}
 ist eine Gerade
 ⇒κNor(s,x2) = 0
-3)S=V(X2−Y2−Z),s= (0,0,0)(Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
-x1= (1,0,0),n(s) = (0,0,1)
-x2= (0,1,0)
+3) S = V(X2 −Y2 −Z), s= (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
+x1 = (1,0,0), n(s) = (0,0,1)
+x2 = (0,1,0)
 κNor(s,x1) = 2
-κNor(s,x2) =−2
-−1.5−1−0.500.511.5
-−101012345
-xyz
-(a)S=V(X2+Z2−1)−2−1.5−1−0.500.511.52
-−2−1012−202
-xyz
+κNor(s,x2) = −2
+−1.5 −1 −0.5 0 0.5 1 1.5−1
+0
+1
+0
+1
+2
+3
+4
+5
+x
+y
+z
+(a) S = V(X2 + Z2 −1)
+−2 −1.5 −1 −0.5 0 0.5 1 1.5 2
+−2
+−1
+0
+1
+2
+−2
+0
+2
+x
+y
+z
 −4−2024f(x,y)
-(b)S=V(X2−Y2−Z)
+(b) S = V(X2 −Y2 −Z)
 Abbildung 5.2: Beispiele für reguläre Flächen
 Definition 74
-SeiS⊆R3eine reguläre Fläche, s∈Sundnein stetiges Normalenfeld auf S.
-γ: [−ε,ε]→Seine nach Bogenlänge parametrisierte Kurve ( ε >0) mitγ(0) =sund
-γ′′(0)̸= 0.
-Sein(0) :=γ′′(0)
+Sei S ⊆R3 eine reguläre Fläche,s∈S und n ein stetiges Normalenfeld aufS.
+γ : [−ε,ε] →S eine nach Bogenlänge parametrisierte Kurve (ε >0) mit γ(0) = s und
+γ′′(0) ̸= 0.
+Sei n(0) := γ′′(0)
 ∥γ′′(0)∥. Zerlege
-n(0) =n(0)t+n(0)⊥mitn(0)t∈TsSundn(0)⊥∈(TsS)⊥
-Dann istn(0)⊥=⟨n(0),n(s)⟩·n(s)
-κNor(s,γ) :=⟨γ′′(0),n(s)⟩dieNormalkrümmung .
+n(0) = n(0)t + n(0)⊥mit n(0)t ∈TsS und n(0)⊥∈(TsS)⊥
+Dann istn(0)⊥= ⟨n(0),n(s)⟩·n(s)
+κNor(s,γ) := ⟨γ′′(0),n(s)⟩die Normalkrümmung.
 Bemerkung 76
-Seiγ(t) =γ(−t),t∈[−ε,ε]. Dann istκNor(s,γ) =κNor(s,γ).
-Beweis:γ′′(0) =γ′′(0), daγ′(0) =−γ′(0).
-Es gilt:κNor(s,γ)hängt nur von|γ′(0)|ab und ist gleich κNor(s,γ′(0)).
+Sei γ(t) = γ(−t), t∈[−ε,ε]. Dann istκNor(s,γ) = κNor(s,γ).
+Beweis: γ′′(0) = γ′′(0), daγ′(0) = −γ′(0).
+Es gilt:κNor(s,γ) hängt nur von|γ′(0)|ab und ist gleichκNor(s,γ′(0)).
 Bemerkung 77
-SeiSeine reguläre Fläche und n=n(s)ein Normalenvektor an Sins.
-SeiT1
-sS={x∈TsS|∥x∥= 1}∼=S1. Dann ist
+Sei S eine reguläre Fläche undn= n(s) ein Normalenvektor anS in s.
+Sei T1
+sS = {x∈TsS |∥x∥= 1 }∼= S1. Dann ist
 κn
-Nor(s) :T1
-sS→R, x↦→κNor(s,x)
-eine glatte Funktion und Bildκn
-Nor(s)ist ein abgeschlossenes Intervall.
+Nor(s) : T1
+sS →R, x ↦→κNor(s,x)
+eine glatte Funktion undBild κn
+Nor(s) ist ein abgeschlossenes Intervall.
 Definition 75
-SeiSeine reguläre Fläche und n=n(s)ein Normalenvektor an Sins.
+Sei S eine reguläre Fläche undn= n(s) ein Normalenvektor anS in s.
  5.3. GAUSS-KRÜMMUNG
-a)κn
-1(s) : = min{
+a) κn
+1 (s) : = min
+{
 κn
-Nor(s,x)⏐⏐x∈T1
-sS}
+Nor(s,x)
+⏐⏐x∈T1
+sS
+}
 und
 κn
-2(s) : = max{
+2 (s) : = max
+{
 κn
-Nor(s,x)⏐⏐x∈T1
-sS}heißenHauptkrümmungen vonSins.
-b)K(s) :=κn
-1(s)·κn
-2(s)heißtGauß-Krümmung vonSins.
+Nor(s,x)
+⏐⏐x∈T1
+sS
+}
+heißenHauptkrümmungenvon S in s.
+b) K(s) := κn
+1 (s) ·κn
+2 (s) heißtGauß-Krümmungvon S in s.
 Bemerkung 78
-Ersetzt man ndurch−n, so gilt:
+Ersetzt mann durch −n, so gilt:
 κ−n
-Nor(s,x) =−κn
-Nor(x)∀x∈T1
+Nor(s,x) = −κn
+Nor(x) ∀x∈T1
 sS
 ⇒κ−n
-1(s) =−κn
-2(s)
+1 (s) = −κn
+2 (s)
 κ−n
-2(s) =−κn
-1(s)
-undK−n(s) =Kn(s) =:K(s)
+2 (s) = −κn
+1 (s)
+und K−n(s) = Kn(s) =: K(s)
 Beispiel 48
-1)S=S2. Dann istκ1(s) =κ2(s) =±1∀s∈S2
+1) S = S2. Dann istκ1(s) = κ2(s) = ±1 ∀s∈S2
 ⇒K(s) = 1
 2) Zylinder:
-κ1(s) = 0,κ2(s) = 1⇒K(s) = 0
+κ1(s) = 0,κ2(s) = 1 ⇒K(s) = 0
 3) Sattelpunkt auf hyperbolischem Paraboloid:
-κ1(s)<0,κ2(s) = 0→K(s)<0
-4)S=Torus. Siehe Abbildung 5.3
-s1s2
+κ1(s) <0,κ2(s) = 0 →K(s) <0
+4) S = Torus. Siehe Abbildung 5.3
+s1
+s2
 s3
-Abbildung 5.3: K(s1)>0,K(s2) = 0,K(s3)<0
+Abbildung 5.3:K(s1) >0, K(s2) = 0, K(s3) <0
 Bemerkung 79
-SeiSeine reguläre Fläche, s∈Sein Punkt.
+Sei S eine reguläre Fläche,s∈S ein Punkt.
  5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-a) IstK(s)>0, so liegtSin einer Umgebung von sganz auf einer Seite von TsS+s.
-b) IstK(s)<0, so schneidet jede Umgebung von sinSbeide Seiten von TsS+s.
+a) Ist K(s) >0, so liegtS in einer Umgebung vons ganz auf einer Seite vonTsS+ s.
+b) Ist K(s) <0, so schneidet jede Umgebung vons in S beide Seiten vonTsS+ s.
 5.4 Erste und zweite Fundamentalform
-SeiS⊆R3eine reguläre Fläche, s∈S,TsSdie Tangentialebene an SinsundF:U→Veine
-lokale Parametrisierung von Sums. Weiter sei p:=F−1(s).
+Sei S ⊆R3 eine reguläre Fläche,s∈S, TsS die Tangentialebene anS in s und F : U →V eine
+lokale Parametrisierung vonS um s. Weiter seip:= F−1(s).
 Definition 76
-SeiIS∈R2×2definiert als
-IS: =(g1,1(s)g1,2(s)
-g1,2(s)g2,2(s))
-=(E(s)F(s)
-F(s)G(s))
-mitgi,j=gs(DpF(ei),DpF(ej))
-=⟨∂F
-∂ui(p),∂F
-∂uj(p)⟩i,j∈{1,2}
-Die Matrix ISheißterste Fundamentalform vonSbzgl. der Parametrisierung F.
+Sei IS ∈R2×2 definiert als
+IS : =
+(g1,1(s) g1,2(s)
+g1,2(s) g2,2(s)
+)
+=
+(E(s) F(s)
+F(s) G(s)
+)
+mit gi,j = gs(DpF(ei),DpF(ej))
+= ⟨∂F
+∂ui
+(p), ∂F
+∂uj
+(p)⟩ i,j ∈{1,2 }
+Die MatrixIS heißterste Fundamentalformvon S bzgl. der ParametrisierungF.
 Bemerkung 80
-a)Die Einschränkung des Standardskalarproduktes des R3aufTsSmachtTsSzu einem
+a) Die Einschränkung des Standardskalarproduktes desR3 auf TsS macht TsS zu einem
 euklidischen Vektorraum.
-b){DpF(e1),DpF(e2)}ist eine Basis von TsS.
-c)Bzgl. der Basis{DpF(e1),DpF(e2)}hat das Standardskalarprodukt aus Bemerkung
- 80.a die Darstellungsmatrix IS.
-d)gi,j(s)ist eine differenzierbare Funktion von s.
+b) {DpF(e1),DpF(e2) }ist eine Basis vonTsS.
+c) Bzgl. der Basis {DpF(e1),DpF(e2) }hat das Standardskalarprodukt aus Bemerkung
+ 80.a die DarstellungsmatrixIS.
+d) gi,j(s) ist eine differenzierbare Funktion vons.
 Bemerkung 81
-det(IS) =∂F
-∂u1(p)×∂F
-∂u2(p)2
-Beweis: Sei∂F
-∂u1(p) =
-x1
+det(IS) =
+
+∂F
+∂u1
+(p) ×∂F
+∂u2
+(p)
+
+2
+Beweis: Sei ∂F
+∂u1
+(p) =
+
+
+x1
 x2
-x3
-,∂F
-∂u2(p) =
-y1
+x3
+
+, ∂F
+∂u2
+(p) =
+
+
+y1
 y2
-y3
+y3
+
 
-Dann ist∂F
-∂u1(p)×∂F
-∂u2(p) =
-z1
+Dann ist ∂F
+∂u1
+(p) ×∂F
+∂u2
+(p) =
+
+
+z1
 z2
-z3
+z3
+
 mit
-z1=x2y3−x3y2
-z2=x3y1−x1y3
-z3=x1y2−x2y1
+z1 = x2y3 −x3y2
+z2 = x3y1 −x1y3
+z3 = x1y2 −x2y1
 ⇒∥∂F
-∂u1(p)×∂F
-∂u2(p)∥=z2
-1+z2
-2+z2
+∂u1
+(p) ×∂F
+∂u2
+(p)∥= z2
+1 + z2
+2 + z2
 3
  5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-det(IS) =g1,1g2,2−g2
+det(IS) = g1,1g2,2 −g2
 1,2
-=⟨
-x1
+=
+⟨
+
+x1
 x2
-x3
-,
-x1
+x3
+
+,
+
+
+x1
 x2
-x3
-⟩⟨
-y1
+x3
+
+
+⟩⟨
+
+y1
 y2
-y3
-,
-y1
+y3
+
+,
+
+
+y1
 y2
-y3
-⟩
-−⟨
-x1
+y3
+
+
+⟩
+−
+⟨
+
+x1
 x2
-x3
-,
-y1
+x3
+
+,
+
+
+y1
 y2
-y3
-⟩2
+y3
+
+
+⟩2
 = (x2
-1+x2
-2+x2
+1 + x2
+2 + x2
 3)(y2
-1+y2
-2+y2
-3)−(x1y1+x2y2+x3y3)2
+1 + y2
+2 + y2
+3) −(x1y1 + x2y2 + x3y3)2
 Definition 77
-a)Das Differential dA=√
-det(I)du1du2heißtFlächenelement vonSbzgl. der Parametrisierung
+a) Das DifferentialdA=
+√
+det(I)du1du2 heißtFlächenelementvon S bzgl. der Parametrisierung
  F.
-b) Für eine Funktion f:V→Rheißt
+b) Für eine Funktionf : V →R heißt
+∫
+V
+fdA:=
 ∫
-VfdA:=∫
-Uf(F(u1,u2)
-=:s)√
-detI(s)du1du2
-derWert des Integrals vonfüberV, falls das Integral rechts existiert.
+U
+f(F(u1,u2)  
+=:s
+)
+√
+det I(s)du1du2
+der Wert des Integralsvon f über V, falls das Integral rechts existiert.
 Bemerkung 82
-a)∫
-VfdAist unabhängig von der gewählten Parametrisierung.
-b) Seif:S→Reine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
-Dann ist∫
-SfdAwohldefiniert, falls (z. B.) Skompakt ist.
+a)
+∫
+V fdA ist unabhängig von der gewählten Parametrisierung.
+b) Sei f : S →R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
+Dann ist
+∫
+SfdA wohldefiniert, falls (z. B.)S kompakt ist.
 Etwa:
 ∫
-SfdA=n∑
-i=1∫
-VifdA
-−∑
-i̸=j∫
-Vi∩VjfdA
-+∑
-i,j,k∫
-Vi∩Vj∩VkfdA
+S
+fdA=
+n∑
+i=1
+∫
+Vi
+fdA
+−
+∑
+i̸=j
+∫
+Vi∩Vj
+fdA
++
+∑
+i,j,k
+∫
+Vi∩Vj∩Vk
+fdA
 −...
 Beweis:
 a) Mit Transformationsformel.
 b) Ist dem Leser überlassen.
 Proposition 5.1
-SeiS⊆R3eine reguläre, orientierbare Fläche mit glatten Normalenfeld n:S→S2.
+Sei S ⊆R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeldn : S →S2.
 Dann gilt:
-a)ninduziert für jedes s∈Seine lineare Abbildung dsn:TsS→Tn(s)S2durch
-dsn(x) =d
-dtn(s„+“tx
-Soll auf Fläche Sbleiben)⏐⏐⏐
+a) n induziert für jedess∈S eine lineare Abbildungdsn: TsS →Tn(s)S2 durch
+dsn(x) = d
+dtn(s„+“tx  
+Soll auf FlächeS bleiben
+)
+⏐⏐⏐
 t=0
-Die Abbildung dsnheißtWeingarten-Abbildung
+Die Abbildungdsn heißtWeingarten-Abbildung
  5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-b)Tn(s)S2=TsS.
-c)dsnist ein Endomorphismus von TsS.
-d)dsnist selbstadjungiert bzgl. des Skalarproduktes IS.
-Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt.
+b) Tn(s)S2 = TsS.
+c) dsn ist ein Endomorphismus vonTsS.
+d) dsn ist selbstadjungiert bzgl. des SkalarproduktesIS.
+Hinweis: Die Weingarten-Abbildung wird auchFormoperator genannt.
  5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
 Beweis:
 a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-b)Tn(S)S2=⟨n(s)⟩⊥=TsS
-c) Wegen Proposition 5.1 (a) ist dsnein Homomorphismus.
-d) Zu zeigen:∀x,y∈IsS:⟨x,dsn(y)⟩=⟨dsn(x),y⟩
+b) Tn(S)S2 = ⟨n(s)⟩⊥= TsS
+c) Wegen Proposition 5.1 (a) istdsn ein Homomorphismus.
+d) Zu zeigen:∀x,y ∈IsS : ⟨x,dsn(y)⟩= ⟨dsn(x),y⟩
 Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
 Basisvektoren zu zeigen.
-Seixi=DpF(ei) =∂F
-∂ui(p)i= 1,2
-Beh.:⟨xi,dsn(xj)⟩=⟨∂2F
-∂ui∂uj(p),dsn(xi)⟩
-⇒⟨∂2F
-∂ui∂uj(p),dsn(xi)⟩=⟨xj,dsn(xi)⟩
-Bew.: 0 =⟨∂F
-∂u(p+tej),n(p+tej)⟩
-⇒0 =d
-dt(
+Sei xi = DpF(ei) = ∂F
+∂ui
+(p) i= 1,2
+Beh.: ⟨xi,dsn(xj)⟩= ⟨ ∂2F
+∂ui∂uj
+(p),dsn(xi)⟩
+⇒⟨ ∂2F
+∂ui∂uj
+(p),dsn(xi)⟩= ⟨xj,dsn(xi)⟩
+Bew.: 0 = ⟨∂F
+∂u(p+ tej),n(p+ tej)⟩
+⇒0 = d
+dt
+(
 ⟨∂F
-∂u(p+tej),n(p+tej)⟩)⏐⏐⏐
+∂u(p+ tej),n(p+ tej)⟩
+)⏐⏐⏐
 t=0
-=⟨d
-dt∂F
-∂ui(p+tej)
-
+= ⟨d
+dt
+∂F
+∂ui
+(p+ tej)
+  
 ∂2F
-∂uj∂ui(p)⏐⏐⏐
-t=0,n(s)⟩+⟨xi,dsnDpF(ej)
-xj⟩
+∂uj∂ui
+(p)
+⏐⏐⏐
+t=0
+,n(s)⟩+ ⟨xi,dsnDpF(ej)  
+xj
+⟩
 Definition 78
-Die durch−dsndefinierte symmetrische Bilinearform auf TsSheißtzweite FundamentalformvonSinsbzgl.F.
-
-Man schreibt: IIs(x,y) =⟨−dsn(x),y⟩=Is(−dsn(x),y)
+Die durch−dsndefinierte symmetrische Bilinearform aufTsS heißtzweite Fundamentalform
+ von S in s bzgl. F.
+Man schreibt:IIs(x,y) = ⟨−dsn(x),y⟩= Is(−dsn(x),y)
 Bemerkung 83
-Bezüglich der Basis {x1,x2}vonTsShatIIsdie Darstellungsmatrix
+Bezüglich der Basis{x1,x2 }von TsS hat IIs die Darstellungsmatrix
 (h(s)
-i,j)i,j=1,2mithi,j(s) =⟨∂2F
-∂ui∂uj(p),n(s)⟩
+i,j)i,j=1,2 mit hi,j(s) = ⟨ ∂2F
+∂ui∂uj
+(p),n(s)⟩
 Proposition 5.2
-Seiγ: [−ε,ε]→Seine nach Bogenlänge parametrisierte Kurve mit γ(0) =s. Dann gilt:
-κNor(s,γ) =IIs(γ′(0),γ′(0))
-Beweis: Nach Definition 74 ist κNor(s,γ) =⟨γ′′(0),n(s)⟩. Nach Voraussetzung gilt
-n(γ(t))⊥γ′(t)⇔⟨γ′′(0),n(s)⟩= 0
-Die Ableitung nach tergibt
-0 =d
+Sei γ : [−ε,ε] →S eine nach Bogenlänge parametrisierte Kurve mitγ(0) = s. Dann gilt:
+κNor(s,γ) = IIs(γ′(0),γ′(0))
+Beweis: Nach Definition 74 istκNor(s,γ) = ⟨γ′′(0),n(s)⟩. Nach Voraussetzung gilt
+n(γ(t)) ⊥γ′(t) ⇔⟨γ′′(0),n(s)⟩= 0
+Die Ableitung nacht ergibt
+0 = d
 dt(⟨n(γ(t)),γ′(t))
-=⟨d
-dtn(γ(t))⏐⏐⏐
-t=0,γ′(0)⟩
-+⟨n(s),γ′′(0)⟩
+=
+⟨d
+dtn(γ(t))
+⏐⏐⏐
+t=0
+,γ′(0)
+⟩
++ ⟨n(s),γ′′(0)⟩
  5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-=⟨dsn(γ′(0)),γ′(0)⟩+κNor(s,γ)
-=−IIs(γ′(0),γ′(0)) +κNor(s,γ)
+= ⟨dsn(γ′(0)),γ′(0)⟩+ κNor(s,γ)
+= −IIs(γ′(0),γ′(0)) + κNor(s,γ)
 Folgerung 5.3
 Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein:
-κNor(s,γ) =κNor(s,γ′(0))
+κNor(s,γ) = κNor(s,γ′(0))
 Satz 5.4
-SeiS⊆R3eine reguläre, orientierbare Fläche und s∈S.
-a) Die Hauptkrümmungen κ1(s),κ2(s)sind die Eigenwerte von IIs.
-b) Für die Gauß-Krümmung gilt: K(s) = det(IIs)
+Sei S ⊆R3 eine reguläre, orientierbare Fläche unds∈S.
+a) Die Hauptkrümmungenκ1(s),κ2(s) sind die Eigenwerte vonIIs.
+b) Für die Gauß-Krümmung gilt:K(s) = det(IIs)
 Beweis:
-a)IIsist symmetrisch, IsShat also eine Orthonormalbasis aus Eigenvektoren y1,y2von
-IIs. Istx∈TsS,∥x∥= 1, so gibt es ϕ∈[0,2π)mitx= cosϕ·y1+ sinϕ·y2.
-Seienλ1,λ2die Eigenwerte von IIs, alsoIIs(yi,yi) =λi. Dann gilt:
-IIs(x,x) = cos2ϕλ1+ sin2ϕλ2
-= (1−sin2ϕ)λ1+ sin2ϕλ2
-=λ1+ sin2ϕ(λ2−λ1)≥λ1
-= cos2ϕ+ (1−cos2ϕ)λ2
-=λ2−cos2ϕ(λ2−λ1)≤λ2
-Prop. 5.2= = = = =⇒λ1= min{
-κNor(s,x)⏐⏐x∈T1
-sS}
-λ2= max{
-κNor(s,x)⏐⏐x∈T1
-sS}
+a) IIs ist symmetrisch,IsS hat also eine Orthonormalbasis aus Eigenvektoreny1,y2 von
+IIs. Istx∈TsS, ∥x∥= 1, so gibt esϕ∈[0,2π) mit x= cos ϕ·y1 + sinϕ·y2.
+Seien λ1,λ2 die Eigenwerte vonIIs, alsoIIs(yi,yi) = λi. Dann gilt:
+IIs(x,x) = cos2 ϕλ1 + sin2 ϕλ2
+= (1 −sin2 ϕ)λ1 + sin2 ϕλ2
+= λ1 + sin2 ϕ(λ2 −λ1) ≥λ1
+= cos2 ϕ+ (1 −cos2 ϕ)λ2
+= λ2 −cos2 ϕ(λ2 −λ1) ≤λ2
+Prop. 5.2
+= = = = =⇒λ1 = min
+{
+κNor(s,x)
+⏐⏐x∈T1
+sS
+}
+λ2 = max
+{
+κNor(s,x)
+⏐⏐x∈T1
+sS
+}
 Satz 5.5 (Satz von Gauß-Bonnet)
-SeiS⊆R3eine kompakte orientierbare reguläre Fläche. Dann gilt:
+Sei S ⊆R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:
 ∫
-SK(s)dA= 2πχ(S)
-Dabei istχ(S)die Euler-Charakteristik von S.
+S
+K(s)dA= 2πχ(S)
+Dabei istχ(S) die Euler-Charakteristik vonS.
 Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von
 Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden.
 Lösungen der Übungsaufgaben
 Lösung zu Aufgabe 1
-Teilaufgabe a) Es gilt:
-(i)∅,X∈TX.
-(ii)TXist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1,U2∈
-TX:U1∩U2∈TX.
-(iii)Auch unter beliebigen Vereinigungen ist TXabgeschlossen, d. h. es gilt für eine
-beliebige Indexmenge Iund alleUi∈TXfür allei∈I:⋃
-i∈IUi∈TX
-Also ist (X,TX)ein topologischer Raum.
-Teilaufgabe b) Wählex= 1,y= 0. Dann gilt x̸=yund die einzige Umgebung von x
-istX. Day= 0∈Xkönnen also xundynicht durch offene Mengen getrennt werden.
-(X,TX)ist also nicht hausdorffsch.
-Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,TX)nach
-(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X,TX)
+Teilaufgabe a)Es gilt:
+(i) ∅,X ∈TX.
+(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alleU1,U2 ∈
+TX : U1 ∩U2 ∈TX.
+(iii) Auch unter beliebigen Vereinigungen istTX abgeschlossen, d. h. es gilt für eine
+beliebige IndexmengeI und alleUi ∈TX für allei∈I : ⋃
+i∈I Ui ∈TX
+Also ist(X,TX) ein topologischer Raum.
+Teilaufgabe b)Wähle x= 1,y = 0. Dann giltx̸= y und die einzige Umgebung vonx
+ist X. Day = 0 ∈X können alsox und y nicht durch offene Mengen getrennt werden.
+(X,TX) ist also nicht hausdorffsch.
+Teilaufgabe c)Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da(X,TX) nach
+(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass(X,TX)
 kein metrischer Raum sein kann.
 Lösung zu Aufgabe 2
 Teilaufgabe a)
-Beh.:∀a∈Z:{a}ist abgeschlossen.
-Seia∈Zbeliebig. Dann gilt:
+Beh.: ∀a∈Z : {a}ist abgeschlossen.
+Sei a∈Z beliebig. Dann gilt:
 Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de
 schicken.
 Teilaufgabe b)
-Beh.:{−1,1}ist nicht offen
-Bew.:durch Widerspruch
-Annahme:{−1,1}ist offen.
-Dann gibt es T⊆B, sodass⋃
-M∈TM={−1,1}. Aber alleU∈Bhaben unendlich viele
-Elemente. Auch endlich viele Schnitte von Elementen in Bhaben unendlich viele Elemente
-⇒keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒{− 1,1}ist
+Beh.: {−1,1 }ist nicht offen
+Bew.: durch Widerspruch
+Annahme: {−1,1 }ist offen.
+Dann gibt esT ⊆B, sodass⋃
+M∈T M = {−1,1 }. Aber alleU ∈B haben unendlich viele
+Elemente. Auch endlich viele Schnitte von Elementen inB haben unendlich viele Elemente
+⇒keine endliche nicht-leere Menge kann in dieser Topologie offen sein⇒{− 1,1 }ist
 nicht offen. ■
 Teilaufgabe c)
-Beh.:Es gibt unendlich viele Primzahlen.
+Beh.: Es gibt unendlich viele Primzahlen.
  Lösungen der Übungsaufgaben
-Bew.:durch Widerspruch
-Annahme: Es gibt nur endlich viele Primzahlen p∈P
+Bew.: durch Widerspruch
+Annahme: Es gibt nur endlich viele Primzahlenp∈P
 Dann ist
-Z\{− 1,+1}FS d. Arithmetik=⋃
-p∈PU0,p
-endlich. Das ist ein Widerspruch zu |Z|ist unendlich und |{−1,1}|ist endlich. ■
+Z \{−1,+1 }FS d. Arithmetik=
+⋃
+p∈P
+U0,p
+endlich. Das ist ein Widerspruch zu|Z|ist unendlich und|{−1,1 }|ist endlich. ■
 Lösung zu Aufgabe 3
-(a)Beh.:Die offenen Mengen von Psind Vereinigungen von Mengen der Form
+(a) Beh.: Die offenen Mengen vonP sind Vereinigungen von Mengen der Form
+∏
+j∈J
+Uj ×
 ∏
-j∈JUj×∏
-i∈N,i̸=jPi
-wobeiJ⊆Nendlich und Uj⊆Pjoffen ist.
+i∈N,i̸=j
+Pi
+wobei J ⊆N endlich undUj ⊆Pj offen ist.
 Beweis: Nach Definition der Produkttopologie bilden Mengen der Form
 ∏
-i∈JUj×∏
-i∈N\JPi
-wobeiJ⊆Nendlich und Uj⊆Pjoffen∀j∈Jeine Basis der Topologie.
-Damit sind die offenen Mengen von PVereinigungen von Mengen der obigen
+i∈J
+Uj ×
+∏
+i∈N\J
+Pi
+wobei J ⊆N endlich undUj ⊆Pj offen ∀j ∈J eine Basis der Topologie.
+Damit sind die offenen Mengen vonP Vereinigungen von Mengen der obigen
 Form. ■
-(b)Beh.:Die Zusammenhangskomponenten von Psind alle einpunktig.
-Beweis: Es seinenx,y∈Pundxsowieyliegen in der gleichen Zusammenhangskomponente
- Z⊆P. DaZzusammenhängend ist und ∀i∈I:pi:P→Piist
-stetig, istpi(Z)⊆Pizusammenhängend für alle i∈N. Die zusammenhängenden
-Mengen von Pisind genau{0}und{1}, d. h. für alle i∈Ngilt entweder
-pi(Z)⊆{0}oderpi(Z)⊆{1}. Es seizi∈{0,1}so, dasspi(Z)⊆{zi}für
-allei∈N. Dann gilt also:
+(b) Beh.: Die Zusammenhangskomponenten vonP sind alle einpunktig.
+Beweis: Es seinenx,y ∈P und x sowie y liegen in der gleichen Zusammenhangskomponente
+ Z ⊆P. DaZ zusammenhängend ist und∀i∈I : pi : P →Pi ist
+stetig, istpi(Z) ⊆Pi zusammenhängend für allei∈N. Die zusammenhängenden
+Mengen von Pi sind genau {0 }und {1 }, d. h. für allei ∈N gilt entweder
+pi(Z) ⊆{ 0 }oder pi(Z) ⊆{ 1 }. Es seizi ∈{ 0,1 }so, dasspi(Z) ⊆{ zi }für
+alle i∈N. Dann gilt also:
 pi(x)
-=xi=zi=pi(y)
-=yi∀i∈N
-Somit folgt: x=y ■
+=xi
+= zi = pi(y)
+=yi
+∀i∈N
+Somit folgt:x= y ■
 Lösung zu Aufgabe 4
-(a)Beh.: GLn(R)ist nicht kompakt.
-Bew.: det:GLn(R)→R\{0}ist stetig. Außerdem ist det(GLn(R)) =R\{0}
-nicht kompakt.22⇒GLn(R)ist nicht kompakt. ■
-(b)Beh.: SL1(R)ist nicht kompakt, für n>1istSLn(R)kompakt.
-Bew.:FürSL1(R)gilt:SL1(R) ={
-A∈R1×1⏐⏐detA= 1}
-=(
-1)∼={1}.22⇒SL1(R)
+(a) Beh.: GLn(R) ist nicht kompakt.
+Bew.: det : GLn(R) →R \{0 }ist stetig. Außerdem istdet(GLn(R)) = R \{0 }
+nicht kompakt.22⇒GLn(R) ist nicht kompakt. ■
+(b) Beh.: SL1(R) ist nicht kompakt, fürn> 1 ist SLn(R) kompakt.
+Bew.: Für SL1(R) gilt: SL1(R) =
+{
+A∈R1×1 ⏐⏐det A= 1
+}
+=
+(
+1
+)∼= {1 }. 22⇒SL1(R)
 ist kompakt.
  Lösungen der Übungsaufgaben
-SLn(R)⊆GLn(R)lässt sich mit einer Teilmenge des Rn2identifizieren. Nach Satz 1.1
+SLn(R) ⊆GLn(R) lässt sich mit einer Teilmenge desRn2
+identifizieren. Nach Satz 1.1
 sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere
-nun für für n∈N≥2,m∈N:
-Am=diagn(m,1
+nun für fürn∈N≥2,m ∈N:
+Am = diagn(m, 1
 m,..., 1)
-Dann gilt: detAm= 1, d. h.Am∈SLn(R), undAmist unbeschränkt, da ∥Am∥∞=
+Dann gilt:det Am = 1, d. h.Am ∈SLn(R), undAm ist unbeschränkt, da∥Am∥∞=
 m−−−−→
-m→∞∞. ■
-(c)Beh.:P(R)ist kompakt.
-Bew.:P(R)∼=Sn/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung
- stetig. Da Snals abgeschlossene und beschränkte Teilmenge des Rn+1kompakt
-ist22⇒P (R)ist kompakt. ■
+m→∞
+∞. ■
+(c) Beh.: P(R) ist kompakt.
+Bew.: P(R) ∼= Sn/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung
+ stetig. DaSn als abgeschlossene und beschränkte Teilmenge desRn+1 kompakt
+ist 22⇒P(R) ist kompakt. ■
 Lösung zu Aufgabe 5
 Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden.
 Definition 79
-Seien (G,∗)und(H,◦)Gruppen und ϕ:G→Heine Abbildung.
-ϕheißtHomomorphismus , wenn
-∀g1,g2∈G:ϕ(g1∗g2) =ϕ(g1)◦ϕ(g2)
+Seien (G,∗) und (H,◦) Gruppen undϕ: G→H eine Abbildung.
+ϕ heißtHomomorphismus, wenn
+∀g1,g2 ∈G: ϕ(g1 ∗g2) = ϕ(g1) ◦ϕ(g2)
 gilt.
 Es folgt direkt:
-1)SeiX=Rmit der Standarttopologie und ϕ1:idRundR= (R,+). Dann istϕ1ein
+1) Sei X = R mit der Standarttopologie undϕ1 : idR und R = (R,+). Dann istϕ1 ein
 Gruppenhomomorphismus und ein Homöomorphismus.
-2)SeiG= (Z,+)undH= (Z/3Z,+). Dann ist ϕ2:G→H,x↦→xmod 3ein
-Gruppenhomomorphismus. Jedoch ist ϕ2nicht injektiv, also sicher kein Homöomorphismus.
+2) Sei G = ( Z,+) und H = ( Z/3Z,+). Dann ist ϕ2 : G →H,x ↦→x mod 3 ein
+Gruppenhomomorphismus. Jedoch istϕ2 nicht injektiv, also sicher kein Homöomorphismus.
 
-3)SeiXein topologischer Raum. Dann ist idXein Homöomorphismus. Da keine
-Verknüpfung auf Xdefiniert wurde, ist Xkeine Gruppe und daher auch kein Gruppenhomomorphismus.
+3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine
+Verknüpfung aufX definiert wurde, istX keine Gruppe und daher auch kein Gruppenhomomorphismus.
 
 Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten
 verwendet.
@@ -3536,300 +4255,325 @@ Lösung zu Aufgabe 6
 Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf
 Seite 6.
 Definition 80
-Seien (G,∗)und(H,◦)Gruppen und ϕ:G→Heine Abbildung.
-ϕheißtIsomorphismus , wennϕein bijektiver Homomorphismus ist.
+Seien (G,∗) und (H,◦) Gruppen undϕ: G→H eine Abbildung.
+ϕ heißtIsomorphismus, wennϕ ein bijektiver Homomorphismus ist.
 Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen
 Sinn und ein Isomorphismus benötigt eine Gruppenstruktur.
  Lösungen der Übungsaufgaben
 Lösung zu Aufgabe 7
-(a)Vor.:SeiMeine topologische Mannigfaltigkeit.
-Beh.:Mist wegzusammehängend ⇔Mist zusammenhängend
-Beweis: „⇒“: DaMinsbesondere ein topologischer Raum ist folgt diese Richtung
+(a) Vor.:Sei M eine topologische Mannigfaltigkeit.
+Beh.: M ist wegzusammehängend⇔M ist zusammenhängend
+Beweis: „⇒“: DaM insbesondere ein topologischer Raum ist folgt diese Richtung
 direkt aus Bemerkung 23.
-„⇐“: Seienx,y∈Mund
-Z:={z∈M|∃Weg vonxnachz}
+„⇐“: Seienx,y ∈M und
+Z := {z∈M |∃Weg vonx nach z}
 Es gilt:
-(i)Z̸=∅, daMlokal wegzusammenhängend ist
-(ii)Zist offen, da Mlokal wegzusammenhängend ist
-(iii)ZC:={˜z∈M|∄Weg vonxnach ˜z}ist offen
-DaMeine Mannigfaltigkeit ist, existiert zu jedem ˜z∈ZCeine offene und
-wegzusammenhängende Umgebung U˜z⊆M.
-Es gilt sogar U˜z⊆ZC, denn gäbe es ein U˜z∋z∈Z, so gäbe es Wege γ2:
-[0,1]→M,γ 2(0) =z,γ2(1) =xundγ1: [0,1]→M,γ 1(0) = ˜z,γ1(1) =z.
+(i) Z ̸= ∅, daM lokal wegzusammenhängend ist
+(ii) Z ist offen, daM lokal wegzusammenhängend ist
+(iii) ZC := {˜z∈M |∄Weg vonx nach ˜z}ist offen
+Da M eine Mannigfaltigkeit ist, existiert zu jedem˜z∈ZC eine offene und
+wegzusammenhängende UmgebungU˜z ⊆M.
+Es gilt sogarU˜z ⊆ZC, denn gäbe es einU˜z ∋z ∈Z, so gäbe es Wegeγ2 :
+[0,1] →M,γ2(0) = z,γ2(1) = x und γ1 : [0,1] →M,γ1(0) = ˜z,γ1(1) = z.
 Dann wäre aber
-γ: [0,1]→M,
-γ(x) ={
-γ1(2x)falls0≤x≤1
+γ : [0,1] →M,
+γ(x) =
+{
+γ1(2x) falls 0 ≤x≤1
 2
-γ2(2x−1)falls1
-2<x≤1
-ein stetiger Weg von ˜znachx⇒Widerspruch.
-DaMzusammenhängend ist und M=Z
-offen∪ZC
+γ2(2x−1) falls 1
+2 <x ≤1
+ein stetiger Weg von˜z nach x ⇒Widerspruch.
+Da M zusammenhängend ist undM = Z
+offen
+∪ZC
 
-offen, sowieZ̸=∅folgtZC=∅.
-Also istM=Zwegzusammenhängend. ■
-(b)Beh.:Xist wegzusammenhängend.
-Beweis:X:= (R\{0})∪{01,02}und(R\{0})∪{02}sind homöomorph zu R.
+offen
+, sowieZ ̸= ∅folgt ZC = ∅.
+Also istM = Z wegzusammenhängend. ■
+(b) Beh.: X ist wegzusammenhängend.
+Beweis: X := (R\{0 }) ∪{01,02 }und (R\{0 }) ∪{02 }sind homöomorph zuR.
 Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte
-01und02.
-Da(R\{0})∪{01}homöomorph zu Rist, exisitert ein Weg γ1von01zu einem
-beliebigen Punkt a∈R\{0}.
-Da(R\{0})∪{02}ebenfalls homöomorph zu Rist, existiert außerdem ein
-Wegγ2vonanach 02. Damit existiert ein (nicht einfacher) Weg γvon01nach
+01 und 02.
+Da (R\{0 }) ∪{01 }homöomorph zuR ist, exisitert ein Wegγ1 von 01 zu einem
+beliebigen Punkta∈R \{0 }.
+Da (R \{0 }) ∪{02 }ebenfalls homöomorph zuR ist, existiert außerdem ein
+Wegγ2 von a nach 02. Damit existiert ein (nicht einfacher) Wegγ von 01 nach
 02. ■
 Lösung zu Aufgabe 9
-Vor.:Sei(X,d)eine absolute Ebene, A,B,C∈Xund△ABCein Dreieck.
+Vor.:Sei (X,d) eine absolute Ebene,A,B,C ∈X und △ABC ein Dreieck.
  Lösungen der Übungsaufgaben
-(a)Beh.:AB∼=AC⇒∠ABC∼=∠ACB
-Bew.:SeiAB∼=AC.
-⇒∃Isometrieϕmitϕ(B) =Cundϕ(C) =Bundϕ(A) =A.
-⇒ϕ(∠ABC ) =∠ACB
-⇒∠ABC∼=∠ACB ■
-(b)Beh.:Der längeren Seite von △ABCliegt der größere Winkel gegenüber und umgekehrt.
+(a) Beh.: AB ∼= AC ⇒∠ABC ∼= ∠ACB
+Bew.: Sei AB ∼= AC.
+⇒∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
+⇒ϕ(∠ABC) = ∠ACB
+⇒∠ABC ∼= ∠ACB ■
+(b) Beh.: Der längeren Seite von△ABC liegt der größere Winkel gegenüber und umgekehrt.
 
-Bew.:Seid(A,C)>d(A,B). Nach §3 (i) gibt es C′∈AC+mitd(A,C′) =d(A,B)
-⇒C′liegt zwischen AundC.
-Es gilt∡ABC′<∡ABCund aus Aufgabe 9 (a) folgt: ∡ABC′=∡AC′B.
-∠BC′Aist ein nicht anliegender Außenwinkel zu ∠BCABem. 66= = = = =⇒∡BC′A>∡BCA
-⇒∡BCA <∡BC′A=∡ABC′<∡ABCSei umgekehrt∡ABC >∡BCA, kann
-wegen 1. Teil von Aufgabe 9 (b) nicht d(A,B)>d(A,C)gelten.
-Wegen Aufgabe 9 (a) kann nicht d(A,B) =d(A,C)gelten.
-⇒d(A,B)<d(A,C) ■
-(c)Vor.:Seigeine Gerade, P∈XundP /∈g
-Beh.:∃!Lot
-Bew.:ÜB10 A4(a): Es gibt Geradenspiegelung ϕang.ϕvertauscht die beiden
-Halbebenen bzgl. g.
-⇒ϕ(P)PschneidetginF.
-Es gibt eine Geradenspiegelung ϕang.ϕvertauscht die beiden Halbebenen bzgl. g
-⇒ϕ(P)PschneidetginF.
-SeiA∈g\{F}. Dann giltϕ(∠AFP ) =∠AFϕ (P) =π⇒∠AFPist rechter Winkel.
-Gäbe es nun G∈g\{F}, so dassPGweiteres Lot von Paufgist, wäre△PFG
+Bew.: Sei d(A,C) >d(A,B). Nach §3 (i) gibt esC′∈AC+ mit d(A,C′) = d(A,B)
+⇒C′liegt zwischenA und C.
+Es gilt∡ABC′<∡ABC und aus Aufgabe 9 (a) folgt:∡ABC′= ∡AC′B.
+∠BC′A ist ein nicht anliegender Außenwinkel zu∠BCA Bem. 66= = = = =⇒∡BC′A> ∡BCA
+⇒∡BCA <∡BC′A = ∡ABC′ < ∡ABC Sei umgekehrt∡ABC >∡BCA, kann
+wegen 1. Teil von Aufgabe 9 (b) nichtd(A,B) >d(A,C) gelten.
+Wegen Aufgabe 9 (a) kann nichtd(A,B) = d(A,C) gelten.
+⇒d(A,B) <d(A,C) ■
+(c) Vor.:Sei g eine Gerade,P ∈X und P /∈g
+Beh.: ∃! Lot
+Bew.: ÜB10 A4(a): Es gibt Geradenspiegelungϕ an g. ϕ vertauscht die beiden
+Halbebenen bzgl.g.
+⇒ϕ(P)P schneidet g in F.
+Es gibt eine Geradenspiegelungϕ an g. ϕ vertauscht die beiden Halbebenen bzgl.g
+⇒ϕ(P)P schneidet g in F.
+Sei A∈g\{F }. Dann giltϕ(∠AFP) = ∠AFϕ(P) = π⇒∠AFP ist rechter Winkel.
+Gäbe es nunG∈g\{F }, so dassPG weiteres Lot vonP auf g ist, wäre△PFG
 ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
 ·
-·A
-GP
+·
+A
+G
+P
 F
 g
-Abbildung 5.4: Zwei Lote zu einer Geraden gdurch einen Punkt P
-Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer <π
-⇒Ggibt es nicht. ■
+Abbildung 5.4: Zwei Lote zu einer Geradeng durch einen PunktP
+Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer<π
+⇒G gibt es nicht. ■
 Lösung zu Aufgabe 10
-Seif∥hund o. B. d. A. f∥g.
-f∦h⇒f∩h̸=∅, sei alsox∈f∩h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
-zugdurchx, dax /∈g. Diese istf, dax∈fundf∥g. Da aberx∈h, kannhnicht
+Sei f ∥h und o. B. d. A.f ∥g.
+f ∦ h⇒f∩h̸= ∅, sei alsox∈f∩h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
+zu g durch x, dax /∈g. Diese istf, dax ∈f und f ∥g. Da aberx ∈h, kannh nicht
  Lösungen der Übungsaufgaben
-parallel zugsein, denn ansonsten gäbe es zwei Parallelen zu gdurchx(f̸=h).⇒g∦h■
+parallel zug sein, denn ansonsten gäbe es zwei Parallelen zug durch x(f ̸= h). ⇒g∦ h■
 Lösung zu Aufgabe 11
-Sei(X,d,G )eine Geometrie, die §1-§4 erfüllt. Seien außerdem △ABCund△A′B′C′
+Sei (X,d,G ) eine Geometrie, die §1-§4 erfüllt. Seien außerdem△ABC und △A′B′C′
 Dreiecke, für die gilt:
-d(A,B) =d(A′,B′)
-d(A,C) =d(A′,C′)
-d(B,C) =d(B′,C′)
-Seiϕdie Isometrie mit ϕ(A) =A′,ϕ(B) =B′undϕ(C′)liegt in der selben Halbebene
-bzgl.ABwieC. Diese Isometrie existiert wegen §4.
-Es giltd(A,C) =d(A′,C′) =d(ϕ(A′),ϕ(C′)) =d(A,ϕ(C′))undd(B,C) =d(B′,C′) =
-d(ϕ(B′),ϕ(C′)) =d(B,ϕ(C′)).
-Bem. 62= = = = =⇒C=ϕ(C).
-Es gilt also ϕ(△A′B′C′) =△ABC. ■
+d(A,B) = d(A′,B′)
+d(A,C) = d(A′,C′)
+d(B,C) = d(B′,C′)
+Sei ϕ die Isometrie mitϕ(A) = A′, ϕ(B) = B′und ϕ(C′) liegt in der selben Halbebene
+bzgl. AB wie C. Diese Isometrie existiert wegen §4.
+Es giltd(A,C) = d(A′,C′) = d(ϕ(A′),ϕ(C′)) = d(A,ϕ(C′)) und d(B,C) = d(B′,C′) =
+d(ϕ(B′),ϕ(C′)) = d(B,ϕ(C′)).
+Bem. 62= = = = =⇒C = ϕ(C).
+Es gilt alsoϕ(△A′B′C′) = △ABC. ■
 Bildquellen
 Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt.
 Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert.
-Abb. 0.1aS2: Tom Bombadil, tex.stackexchange.com/a/42865
+Abb. 0.1a S2: Tom Bombadil, tex.stackexchange.com/a/42865
 Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069
-Abb. 0.1eT2: Jake, tex.stackexchange.com/a/70979/5645
+Abb. 0.1e T2: Jake, tex.stackexchange.com/a/70979/5645
 Abb. 1.6 Stereographische Projektion: texample.net/tikz/examples/map-projections
 Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie:
-–Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png
-–Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
-–Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png
-–62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
+– Trivialer Knoten:commons.wikimedia.org/wiki/File:Blue_Unknot.png
+– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
+– Achterknoten:commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png
+– 62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
 Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3)
 Abb. 1.13 Kleeblattknoten,3-Färbung:Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring.
 png
 Abb. 2.1 Doppeltorus:OlegAlexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration.
 png
 Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014.
-Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
-Abb. 3.10 Überlagerung von S1mitR: Alex, tex.stackexchange.com/a/149706/5645
+Abb. 3.3b 3 Pfade auf Torus: Charles Staats,tex.stackexchange.com/a/149991/5645
+Abb. 3.10 Überlagerung vonS1 mit R: Alex,tex.stackexchange.com/a/149706/5645
 Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint,
 commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png
-Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645
-Abb. 5.3 Krümmung des Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
+Abb. 5.1 Möbiusband: Jake,tex.stackexchange.com/a/118573/5645
+Abb. 5.3 Krümmung des Torus: Charles Staats,tex.stackexchange.com/a/149991/5645
 Abkürzungsverzeichnis
-Beh.Behauptung
-Bew.Beweis
-bzgl.bezüglich
-bzw.beziehungsweise
-ca.circa
-d. h.das heißt
-Def.Definition
-etc.et cetera
-ex.existieren
-Hom.Homomorphismus
-o. B. d. A. ohne Beschränkung der Allgemeinheit
-Prop.Proposition
-sog.sogenannte
-Vor.Voraussetzung
-vgl.vergleiche
-z. B.zum Beispiel
-zhgd.zusammenhängend
-z. z.zu zeigen
+Beh. Behauptung
+Bew. Beweis
+bzgl. bezüglich
+bzw. beziehungsweise
+ca. circa
+d. h. das heißt
+Def. Definition
+etc. et cetera
+ex. existieren
+Hom. Homomorphismus
+o. B. d. A.ohne Beschränkung der Allgemeinheit
+Prop. Proposition
+sog. sogenannte
+Vor. Voraussetzung
+vgl. vergleiche
+z. B. zum Beispiel
+zhgd. zusammenhängend
+z. z. zu zeigen
 Ergänzende Definitionen und Sätze
 Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle
 benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet,
 aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra
 und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen.
 Definition 81
-SeiD⊆Rundx0∈R.x0heißt einHäufungspunkt vonD:⇔∃FolgexninD\{x0}
-mitxn→x0.
+Sei D ⊆R und x0 ∈R. x0 heißt einHäufungspunkt von D :⇔∃ Folge xn in D\{x0 }
+mit xn →x0.
 Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra
 entnommen:
 Definition 82
-Es seienVundWK-Vektorräume und A(V)undA(W)die zugehörigen affinen Räume.
-Eine Abbildung f:V→Wheißtaffin, falls für alle a,b∈Vund alleλ,µ∈Kmitλ+µ= 1
+Es seienV und W K-Vektorräume undA(V) und A(W) die zugehörigen affinen Räume.
+Eine Abbildungf : V →W heißtaffin , falls für allea,b ∈V und alleλ,µ ∈K mit λ+µ= 1
 gilt:
-f(λa+µb) =λf(a) +µf(b)
+f(λa+ µb) = λf(a) + µf(b)
 Definition 83
-SeiVein Vektorraum und S⊆Veine Teilmenge.
-Sheißt eine Orthonormalbasis vonV, wenn gilt:
-(i)Sist eine Basis von V
-(ii)∀v∈S:∥v∥= 1
-(iii)∀v1,v2∈S:v1̸=v2⇒⟨v1,v2⟩= 0
+Sei V ein Vektorraum undS ⊆V eine Teilmenge.
+S heißt eineOrthonormalbasis von V, wenn gilt:
+(i) S ist eine Basis vonV
+(ii) ∀v∈S : ∥v∥= 1
+(iii) ∀v1,v2 ∈S : v1 ̸= v2 ⇒⟨v1,v2⟩= 0
 Satz (Zwischenwertsatz)
-Seia < bundf∈C[a,b] :=C([a,b]), weiter sei y0∈Rundf(a)< y 0< f(b)oder
-f(b)<y0<f(a). Dann existiert ein x0∈[a,b]mitf(x0) =y0.
+Sei a < bund f ∈ C[a,b] := C([a,b]), weiter sei y0 ∈R und f(a) < y0 < f(b) oder
+f(b) <y0 <f (a). Dann existiert einx0 ∈[a,b] mit f(x0) = y0.
 Definition 84
-SeiVein Vektorraum über einem Körper Kundf:V→Veine lineare Abbildung.
-v∈V\{0}heißtEigenvektor :⇔∃λ∈K:f(v) =λv.
-Wenn ein solches λ∈Kexistiert, heißt es Eigenwert vonf.
+Sei V ein Vektorraum über einem KörperK und f : V →V eine lineare Abbildung.
+v∈V \{0 }heißtEigenvektor:⇔∃λ∈K : f(v) = λv.
+Wenn ein solchesλ∈K existiert, heißt esEigenwertvon f.
 Satz (Binomischer Lehrsatz)
-Seix,y∈R. Dann gilt:
-(x+y)n=n∑
-k=0(n
-k)
-xn−kyk∀n∈N0
+Sei x,y ∈R. Dann gilt:
+(x+ y)n =
+n∑
+k=0
+(n
+k
+)
+xn−kyk ∀n∈N0
 Definition 85
-Seiena,b∈R3Vektoren.
-a×b:=
-a1
+Seien a,b ∈R3 Vektoren.
+a×b:=
+
+
+a1
 b3
-a3
-×
-a1
+a3
+
+×
+
+
+a1
 b3
-a3
-=
-a2b3−a3b2
-a3b1−a1b3
-a1b2−a2b1
+a3
+
+=
+
+
+a2b3 −a3b2
+a3b1 −a1b3
+a1b2 −a2b1
+
 
 Symbolverzeichnis
 Mengenoperationen
-SeienA,BundMMengen.
-ACKomplement von A
-P(M)Potenzmenge von M
-MAbschluss von M
-∂MRand der Menge M
-M◦Inneres der Menge M
-A×BKreuzprodukt
-A⊆BTeilmengenbeziehung
-A⊊Bechte Teilmengenbeziehung
-A\BDifferenzmenge
-A∪BVereinigung
-A˙∪BDisjunkte Vereinigung
-A∩BSchnitt
+Seien A,B und M Mengen.
+AC Komplement vonA
+P(M) Potenzmenge vonM
+M Abschluss vonM
+∂M Rand der MengeM
+M◦ Inneres der MengeM
+A×B Kreuzprodukt
+A⊆B Teilmengenbeziehung
+A⊊ B echte Teilmengenbeziehung
+A\B Differenzmenge
+A∪B Vereinigung
+A ˙∪B Disjunkte Vereinigung
+A∩B Schnitt
 Geometrie
-AB Gerade durch die Punkte Aund
+AB Gerade durch die PunkteA und
 B
-AB Strecke mit Endpunkten AundB
-△ABC Dreieck mit Eckpunkten A,B,C
-AB∼=CDDie Strecken ABundCDsind
+AB Strecke mit EndpunktenA und B
+△ABC Dreieck mit EckpunktenA,B,C
+AB ∼= CD Die StreckenAB und CD sind
 isometrisch
 |K| Geometrische Realisierung des
 Simplizialkomplexes K
 Gruppen
-SeiXein topologischer Raum und Kein Körper.
+Sei X ein topologischer Raum undK ein Körper.
 
-Homöo (X)Homöomorphismengruppe
-Iso(X)Isometriengruppe
-GLn(K)Allgemeine lineare Gruppe (von
-General Linear Group )
-SLn(K)Spezielle lineare Gruppe
-PSLn(K)Projektive lineare GruppePerm(X)Permutationsgruppe
-Sym(X)Symmetrische Gruppe
+Homöo(X) Homöomorphismengruppe
+Iso(X) Isometriengruppe
+GLn(K) Allgemeine lineare Gruppe (von
+General Linear Group)
+SLn(K) Spezielle lineare Gruppe
+PSLn(K) Projektive lineare Gruppe
+Perm(X) Permutationsgruppe
+Sym(X) Symmetrische Gruppe
 Wege
-Seiγ:I→Xein Weg.
-[γ]Homotopieklasse von γ
-γ1∗γ2Zusammenhängen von Wegen
-γ1∼γ2Homotopie von Wegen
-γ(x)Inverser Weg, also γ(x) :=γ(1−x)
-C Bild eines Weges γ, alsoC:=
+Sei γ : I →X ein Weg.
+[γ] Homotopieklasse vonγ
+γ1 ∗γ2 Zusammenhängen von Wegen
+γ1 ∼γ2 Homotopie von Wegen
+γ(x) Inverser Weg, alsoγ(x) := γ(1 −x)
+C Bild eines Wegesγ, alsoC :=
 γ([0,1])
 Weiteres
-BBasis einer Topologie
-Bδ(x)δ-Kugel um x
-SSubbasis einer Topologie
-TTopologie
-AAtlas
-PProjektiver Raum
-⟨·,·⟩Skalarprodukt
-X/∼Xmodulo∼
-[x]∼Äquivalenzklassen von xbzgl.∼
-∥x∥Norm vonx
-|x|Betrag von x
-⟨a⟩Erzeugnis von a
-SnSphäre
-TnTorus
-f◦gVerkettung von fundg
-πXProjektion auf X
-f|Ufeingeschränkt auf U
-f−1(M)Urbild von M
-Rg(M)Rang vonM
-χ(K)Euler-Charakteristik von K
+B Basis einer Topologie
+Bδ(x) δ-Kugel umx
+S Subbasis einer Topologie
+T Topologie
+A Atlas
+P Projektiver Raum
+⟨·,·⟩ Skalarprodukt
+X/∼ X modulo ∼
+[x]∼ Äquivalenzklassen vonx bzgl. ∼
+∥x∥ Norm vonx
+|x| Betrag vonx
+⟨a⟩ Erzeugnis vona
+Sn Sphäre
+Tn Torus
+f ◦g Verkettung vonf und g
+πX Projektion aufX
+f|U f eingeschränkt aufU
+f−1(M) Urbild vonM
+Rg(M) Rang vonM
+χ(K) Euler-Charakteristik vonK
  Symbolverzeichnis
-∆kStandard-Simplex
-X#YVerklebung von XundY
-dnLineare Abbildung aus Bemerkung
+∆k Standard-Simplex
+X#Y Verklebung vonX und Y
+dn Lineare Abbildung aus Bemerkung
  37
-A∼=B Aist isometrisch zu B
-f∗Abbildung zwischen Fundamentalgruppen
+A∼= B A ist isometrisch zuB
+f∗ Abbildung zwischen Fundamentalgruppen
  (vgl. Seite 49)
  Symbolverzeichnis
 Zahlenmengen
-N={1,2,3,...}Natürliche Zahlen
-Z=N∪{0,−1,−2,...}Ganze Zahlen
-Q=Z∪{1
-2,1
-3,2
-3}
-={z
-nmitz∈Zundn∈Z\{0}}
+N = {1,2,3,... } Natürliche Zahlen
+Z = N ∪{0,−1,−2,... } Ganze Zahlen
+Q = Z ∪
+{1
+2 ,1
+3 ,2
+3
+}
+=
+{z
+n mit z∈Z und n∈Z \{0 }
+}
 Rationale Zahlen
-R=Q∪{√
-2,−3√
-3,...}
+R = Q ∪
+{√
+2,−
+3√
+3,...
+}
 Reele Zahlen
-R+Echt positive reele Zahlen
+R+ Echt positive reele Zahlen
 Rn
-+,0:={(x1,...,xn)∈Rn|xn≥0}Halbraum
-R×=R\{0}Einheitengruppe von R
-C={a+ib|a,b∈R}Komplexe Zahlen
-P={2,3,5,7,...}Primzahlen
-H={z∈C|ℑz>0}obere Halbebene
-I= [0,1]⊊ REinheitsintervall
-f:S1↪→R2Einbettung der Kreislinie in die Ebene
-π1(X,x)Fundamentalgruppe im topologischen Raum Xumx∈X
-Fix(f)Menge der Fixpunkte der Abbildung f
-∥·∥ 2 2-Norm; Euklidische Norm
++,0 := {(x1,...,x n) ∈Rn |xn ≥0 } Halbraum
+R×= R \{0 } Einheitengruppe vonR
+C = {a+ ib|a,b ∈R } Komplexe Zahlen
+P = {2,3,5,7,... } Primzahlen
+H = {z∈C |ℑz >0 } obere Halbebene
+I = [0,1] ⊊ R Einheitsintervall
+f : S1 ↪→R2 Einbettung der Kreislinie in die Ebene
+π1(X,x) Fundamentalgruppe im topologischen RaumX um x∈X
+Fix(f) Menge der Fixpunkte der Abbildungf
+∥·∥2 2-Norm; Euklidische Norm
 κ Krümmung
 κNor Normalenkrümmung
-V(f) Nullstellenmenge von f2
+V(f) Nullstellenmenge vonf2
 Krümmung
-DpF:R2→R3Lineare Abbildung mit Jacobi-Matrix in p(siehe Seite 89)
-TsS Tangentialebene an S⊆R3durchs∈S
+DpF : R2 →R3 Lineare Abbildung mit Jacobi-Matrix inp (siehe Seite 89)
+TsS Tangentialebene anS ⊆R3 durch s∈S
 dsn(x) Weingarten-Abbildung
-2vonVanishing Set
+2von Vanishing Set
 Stichwortverzeichnis
 Abbildung
 affine, 107
@@ -3842,7 +4586,7 @@ Abschluss, 3
 Abstand, 86
 Abstandsaxiom, 65
 Achterknoten, 20
-Aktion, sieheGruppenoperation
+Aktion, siehe Gruppenoperation
 Anordnungsaxiome, 66
 Atlas, 24
 Außenwinkel, 70
@@ -3868,18 +4612,19 @@ begleitendes, 89
 Ebene
 euklidische, 64
 Eigenvektor, 107
-Eigenwert, 107einfach zusammenhängend, 49
+Eigenwert, 107
+einfach zusammenhängend, 49
 Einheitsnormalenfeld, 90
-Euler-Charakteristik, sieheEulerzahl
+Euler-Charakteristik, siehe Eulerzahl
 Eulersche Polyederformel, 38
 Eulerzahl, 36
 Färbbarkeit, 21
-Faser, sieheUrbild
+Faser, siehe Urbild
 Fläche
 orientierbare, 90
 reguläre, 30
 Flächenelement, 95
-Formoperator, sieheWeingarten-Abbildung
+Formoperator, siehe Weingarten-Abbildung
 Fundamentalform
 erste, 94
 zweite, 97
@@ -3896,7 +4641,7 @@ spezielle lineare, 22
 topologische, 33
 Gruppe operiert durch Homöomorphismen,
 61
-Gruppenaktion, sieheGruppenoperation
+Gruppenaktion, siehe Gruppenoperation
 Gruppenoperation, 60, 60–63
 stetige, 61
 Häufungspunkt, 107
@@ -3929,14 +4674,14 @@ Kartenwechsel, 28
 Kern
 offener, 3
 Kleeblattknoten, 20
-Klumpentopologie, siehetriviale Topologie
+Klumpentopologie, siehe triviale Topologie
 Knoten, 20, 17–21
 äquivalente, 20
 trivialer, 20
 Knotendiagramm, 20
 kollinear, 65
-kongruent, sieheisometrisch
-Kongruenz, sieheIsometrie
+kongruent, siehe isometrisch
+Kongruenz, siehe Isometrie
 Kongruenzsatz
 SSS, 104
 SWS, 69
@@ -3954,7 +4699,8 @@ Binomischer, 107
 Lie-Gruppe, 33
 liegt zwischen, 65
 Liftung, 54
-Limes, 8lokal, 3
+Limes, 8
+lokal, 3
 Lot, 86
 Lotfußpunkt, 86
 Möbiusband, 91
@@ -4038,15 +4784,16 @@ Torus, iii, 5, 38, 51, 93
 Total Unzusammenhängend, 100
 Triangulierung, 38
 Überdeckung, 14
-Übergangsfunktion, sieheKartenwechsel
+Übergangsfunktion, siehe Kartenwechsel
 Überlagerung, 51, 51–60
 reguläre, 59
 universelle, 57
 Umgebung, 3
 Umgebungsbasis, 58
 vanishing set, 26
-Vektorprodukt, sieheKreuzprodukt
-Verklebung, 26verträglich, 29
+Vektorprodukt, siehe Kreuzprodukt
+Verklebung, 26
+verträglich, 29
 Würfel, 34
 Weg, 17
 einfacher, 17
diff --git a/read/results/tika/1602.06541.txt b/read/results/tika/1602.06541.txt
index 5778993..387aa43 100644
--- a/read/results/tika/1602.06541.txt
+++ b/read/results/tika/1602.06541.txt
@@ -1013,6 +1013,132 @@ i=1
 
 αiyi = 0
 
+C. Random Decision Forests
+
+Random Decision Forests were first proposed
+in [Ho95]. This type of classifier applies techniques
+called ensemble learning, where multiple classifiers
+are trained and a combination of their hypotheses is
+used. One ensemble learning technique is the random
+subspaces method where each classifier is trained
+on a random subspace of the feature space. Another
+ensemble learning technique is bagging, which is
+training the trees on random subsets of the training set.
+In the case of Random Decision Forests, the classifiers
+are decision trees. A decision tree is a tree where each
+inner node uses one or more features to decide in which
+branch to descend. Each leaf is a class.
+
+One strength of Random Decision Forests compared
+to many other classifiers like[SVMs|and neural networks
+is that the scale of measure of the features (nominal,
+ordinal, interval, ratio) can be arbitrary. Another advan-
+tage of Random Decision Forests compared to
+for example, is the speed of training and classification.
+
+Decision trees were extensively studied in the past
+20 years and a multitude of training algorithms have
+been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]).
+Possible training hyperparameters are the measure to
+evaluate the “goodness of split” [Min89], the number of
+decision trees being used, and if the depth of the trees
+is restricted. Typically in the context of classification,
+decision trees are trained by adding new nodes until
+each leaf contains only nodes of a single class or until it
+is not possible to split further. This is called a stopping
+criterion.
+
+There are two typical training modes: Central axis
+projection and perceptron training. In training, for
+each node a hyperplane is searched which is optimal
+according to an error function.
+
+Random Decision Forests with texton features (see
+are applied in [SJCO8] for segmentation.
+In the [MSC] dataset, they report a per-pixel accuracy
+rate of 66.9% for their best system. This system
+requires 415 ms for the segmentation of 320 px x 213 px
+images on a single 2.7GHz core. On the Pascal
+VOC 2007 dataset, they report an average per-pixel
+accuracy for their best segmentation system of 42 %.
+
+An excellent introduction to Random Decision
+Forests for semantic segmentation is given by [SCZ08].
+
+D. SVMs
+
+are well-studied binary classifiers which can
+be described by five central ideas. For those ideas, the
+training data is represented as (x;, y;) where x; is the
+feature vector and y; € { —1,1} the binary label for
+training example i € { 1,...,m }.
+
+1) If data is linearly separable, it can be separated
+by a hyperplane. There is one hyperplane which
+maximizes the distance to the next datapoints
+(support vectors). This hyperplane should be taken:
+
+minimize : || w ||?
+w,b 2
+s.t. Vi yi: ((w,x;) +b) > 1
+sgn applied to this gives the classification
+
+2) Even if the underlying process which generates the
+features for the two classes is linearly separable,
+noise can make the data not separable. The intro-
+duction of slack variables to relax the requirement
+of linear separability solves this problem. The
+trade-off between accepting some errors and a
+more complex model is weighted by a parameter
+C € Rg. The bigger C, the more errors are
+accepted. The new optimization problem is:
+
+1 m
+
+minimize 5llwil? +C- SO&
+i=1
+
+st. Vit iyi: ((w, xi) +6) 2 1- &
+
+Note that 0 < €; < 1 means that the data point
+is within the margin, whereas €; > 1 means it is
+misclassified. An with C > 0 is also called
+a soft-margin
+
+3) The primal problem is to find the normal vector
+w and the bias b. The dual problem is to express
+w as a linear combination of the training data x;:
+
+m
+w= y OGYi Xi
+i=1
+
+where y; € { —1,1} represents the class of the
+training example and a; are Lagrange multipliers.
+The usage of Lagrange multipliers is explained
+with some examples in [[Sm104]. The usage of the
+Lagrange multipliers a; changes the optimization
+problem depend on the a; which are weights for
+the feature vectors. It turns out that most a; will
+be zero. The non-zero weighted vectors are called
+support vectors.
+
+The optimization problem is now, according
+to [Bur98]:
+
+m mom
+
+os 1
+maximize S> ai 3 S> S> Oj YiYy (Xi, Xj)
+
+i=l i=1 j=1
+st. Vi O0< a, <C
+
+m
+S.t. So aii =0
+i=1
+
+
 
 
 8
@@ -1181,6 +1307,134 @@ According to [Mur12], the most common way of
 inference over the posterior MRF in computer vision
 problems is Maximum A Posteriori (MAP) estimation.
 
+4) Not every dataset is linearly separable. This prob-
+lem is approached by transforming the feature
+vectors x with a non-linear mapping ® into
+a higher dimensional (probably oo-dimensional)
+space. As the feature vectors x are only used
+within scalar product (x;,x;), it is not necessary
+to do the transformation. It is enough to do the
+calculation
+
+K (xi, xj) = (Xi, Xj)
+
+This function K is called a kernel. The idea of
+never explicitly transforming the vectors x; to the
+higher dimensional space is called the kernel trick.
+Common kernels include the polynomial kernel
+
+K p(x, X;) = ((xi, X;) + ry?
+
+of degree p and coefficient r, the Gaussian
+basis function (RBF) kernel
+
+2
+=i => Il
+
+Kauss (Xi; x;) =e 2Qo2
+
+and the sigmoid kernel
+Kanh(Xi, xj) = tanh(y(xi, Xj) ~~ r)
+
+where the parameter y determines how much
+influence single training examples have.
+
+5) The described can only distinguish between
+two classes. Common strategies to expand those
+binary classifiers to multi-class classification is
+the one-vs-all and the one-vs-one strategy. In the
+one-vs-all strategy n classifiers have to be trained
+which can distinguish one of the n classes against
+all other classes. In the one-vs-one strategy “.-
+classifiers are trained; one classifier for each pair
+of classes.
+
+A detailed description of can be found
+in [Bur98].
+are used by [YHRF12] on the 2009 and 2010
+PASCAL segmentation challenge [EVGW* 10]. They
+did not hand their classifier in to the challenge itself,
+but calculated an average rank of 7 among the different
+categories.
+[FGMRT0] also used an [SVM] based method with
+features and achieved the 7 rank in the 2010
+PASCAL segmentation challenge by mean accuracy. It
+needs about 2s on a 2.8 GHz 8-core Intel processor.
+
+ 
+
+E. Markov Random Fields
+
+are undirected probabilistic graphical models
+which are wide-spread model in computer vision. The
+overall idea of is to assign a random variable for
+each feature and a random variable for each pixel which
+
+ 
+
+Figure 3: with 4-neighborhood. Each node 2;
+represents a pixel and each node y; represents
+a label.
+
+gets labeled as shown in For example, a MRF
+
+which is trained on images of the size 224 px x 224 pixel
+and gets the raw RGB values as features has
+
+224 - 224-34 224 - 224 = 200 704
+—S=_—Sr Ss ~~
+
+input output
+
+random variables. Those random variables are condi-
+tionally independent, given their local neighborhood.
+These (in)dependencies can be expressed with a graph.
+
+Let G = (V,€) be the associated undirected graph
+of an and C be the set of all maximal cliques in
+that graph. Nodes represent random variables x, y and
+edges represent conditional dependencies. Just like in
+he 4-neighborhood [SWRCO06] and the 8-neighborhood
+are reasonable choices for constructing the graph.
+
+Typically, random variables y represent the class of a
+single pixel, random variables x represent a pixel values
+and edges represent pixel neighborhood in computer
+vision problems segmentation problems where
+are used. Accordingly, the random variables y live
+on 1,...,nr of classes and the random variables x
+typically live on 0,...,255 or [0, 1].
+
+The probability of x, y can be expressed as
+
+1
+Plx,y) = pe PO)
+
+where Z = Vy © UY) is a normalization term
+called the partition function and E is called the energy
+function. A common choice for the energy function is
+
+E(x,y) =o ve(xy)
+cEC
+where 7 is called a clique potential. One choice for
+
+cliques of size two x, y = (x1, 22) is [KP06]
+
++w
+
+—W
+
+if XY x LQ
+if v1, = 7%
+
+We(21, £2) = wd(ax1, 22) =
+
+According to [Mur12], the most common way of
+inference over the posterior MRF in computer vision
+
+problems is [Maximum A Posteriori (MAP) estimation.
+
+
 
 
 9
diff --git a/read/results/tika/1707.09725.txt b/read/results/tika/1707.09725.txt
index 194789f..4c9298a 100644
--- a/read/results/tika/1707.09725.txt
+++ b/read/results/tika/1707.09725.txt
@@ -101,6 +101,8 @@ l 2
 
 
 
+
+
 Analysis and Optimization of Convolutional Neural
 Network Architectures
 
@@ -136,6 +138,8 @@ v
 
 
 
+
+
 Abstract
 
 Convolutional Neural Networks (CNNs) dominate various computer vision tasks since
@@ -349,6 +353,8 @@ I Glossary 119
 
 
 
+
+
 1. Introduction
 
 Computer vision is the academic field which aims to gain a high-level understanding of the
@@ -661,6 +667,49 @@ output image, k2 multiplications and k2 additions of the products have to be cal
 
 3
 
+2. Convolutional Neural Networks
+
+ 
+
+In the following, it is assumed that the reader knows what a|multilayer perceptron (MLP)}
+
+ 
+
+is and how they are designed for classification problems, what activation functions are and
+
+how gradient descent works. In case the reader needs a refresher on any of those topics, I
+recommend chapter 4.3 and 4.4 of [Thol4a] as well as [EBH15}.
+
+This chapter introduces linear image filters in then standard layer types of
+
+are explained in The layer block pattern is described in
+transition layers in and nine ways to analyze are described in
+
+2.1. Linear Image Filters
+
+A linear image filter (also called a filter bank or a kernel) is an element F € Rew *knxd,
+where k,, represents the filter’s width, ky, the filter’s height and d the number of input
+channels. The filter F is convolved with the image J € R”*’*¢ to produce a new image I’.
+The output image I’ has only one channel. Each pixel I'(z,y) of the output image gets
+calculated by point-wise multiplication of one filter element with one element of the original
+
+image I:
+
+[Ae | [Pld
+I'(a,y) = S- (a + ix, y + ty, tc) - Flix; iy, ie)
+=1
+
+io=1—[ 99] iy=1— [4
+
+Filter kernel Result of point-wise
+Fe RS multiplication
+
+  
+
+Figure 2.1.: Visualization of the application of a linear k x k x 1 image filter. For each pixel of the
+output image, k? multiplications and k? additions of the products have to be calculated.
+
+
 
 
 2. Convolutional Neural Networks
@@ -865,6 +914,59 @@ s = 1 to input data of size width× height with three channels.
 
 6
 
+2. Convolutional Neural Networks
+
+This is easier to see when the filtering operation is denoted formally:
+
+k
+o%(x)=b+S ow x) with ie {1,....w} x {1,...,h} x {1...,d} [2]
+j=l
+
+[SJ [Bld
+ofH)(D) = b+ » » S > Filins ty, te) T(x + ix, y + ty; te) [2.2]
+ig=1—-[ M2] iy=1—p Ab] t=!
+
+with abiasbe R,xe{l,...,.w},ye{l,...,h} andze{l,...,d}
+
+One can see that most weights of the equivalent [MLP] are zero and many weights are
+equivalent. Hence the advantage of compared to is the reduction of parameters.
+The effect of fewer parameters is that less training data is necessary to get suitable
+estimations for those. This means a[MLP] which is able to compute the same functions as a
+[CNN] will likely have worse results on the same dataset, if a architecture is suitable
+for the dataset.
+
+See for a visualization of the application of a convolutional layer.
+
+n filters of
+sizekxkx3
+
+Be
+
+neural
+network
+apply
+
+re re
+
+» a)
+
+data “eb “eb
+
+oO o
+
+a GS
+
+Vo NEGA Sas
+3 feature maps
+
+(e.g. RGB)
+
+n feature maps
+
+Figure 2.2.: Application of a single convolutional layer with n filters of size k x k x 3 with stride
+5 = 1 to input data of size width x height with three channels.
+
+
 
 
 2.2. CNN Layer Types
@@ -1513,6 +1615,78 @@ typically smaller than 0.1.
 
 17
 
+2.5. Analysis Techniques
+
+ 
+
+plotting the error on the training set as well as the error on a validation set, one can also
+estimate if overfitting might become a problem. See for an example.
+
+ 
+
+ 
+
+ 
+
+ 
+
+   
+
+ 
+
+ 
+
+Error | . --- Training set
+08. . — Validation set
+0.6 +
+0.4 4
+0.2 4 ws 1 overfitting >
+
+' Epochs
+t
+
+ 
+
+10 20 30 40 50 60 70 80 90 100
+
+Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs
+and the quality metric is the error (1 — accuracy). The longer the network is trained,
+the better it gets on the training set. At some point the network is fit too well to the
+training data and loses its capability to generalize. At this point the quality curve of
+the training set and the validation set diverge. While the classifier is still improving on
+the training set, it gets worse on the validation and the test set.
+
+When the epoch-loss validation curve has plateaus as in this means the opti-
+mization process did not improve for several epochs. Three possible ways to reduce the
+problem of plateaus are (i) to change weight initialization if the plateau was at the beginning,
+
+(ii) regularizing the model or (iii) changing the optimization algorithm.
+
+Loss functions
+
+The loss function (also called error function or cost function) is a function which assigns a
+real value to a complex event like the predicted class of a feature vector. It is used to define
+the objective function. For classification problems the loss function is typically cross-entropy
+
+with ¢; or @2 regularization, as it was described in [NH92]:
+
+ 
+
+£ £
+K — —
+Eor(W) = — >> Sef log(of) + (1 = #8) loa(t = of +1» S7 Jw) Ar» SO
+cEX k=1 wew wEew
+cross-entropy data loss model complexity loss
+
+where W are the weights, X is the training data set, kK € N>0 is the number of classes and
+;, indicates if the training example z is of class k. of is the output of the classification
+algorithm which depends on the weights. A1, A2 € [0,0o) weights the regularization and is
+
+typically smaller than 0.1.
+
+17
+
+
 
 
 2. Convolutional Neural Networks
@@ -1981,6 +2155,85 @@ ij)
 
 28
 
+3. Topology Learning
+
+ 
+
+4. Correlation Maximization: Train the weights of the candidates by maximizing S,
+
+the correlation between candidates output value V with the networks residual error:
+
+$= 30 |N (W-P) BoB)
+
+o€O |pEeT
+
+where O is the set of output nodes, T is the training set, V, is the candidate neurons
+activation for a training pattern p. E, is the residual output error at node o for
+pattern p. V and E, are averaged values over all elements of T’. This step is finished
+
+when the correlation no longer increases.
+
+5. Candidate selection: Keep the candidate node with the highest correlation, freeze
+
+its incoming weights and add connections to the output nodes.
+6. Continue: If the error is higher than desired, continue with step 2.
+
+One network with three hidden nodes trained by Cascade-Correlation is shown in
+
+O
+©
+©
+O
+©
+
+Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray)
+to the left, three hidden nodes (green) in the middle and two output nodes in the upper
+right corner. The black squares represent frozen weights which are found by correlation
+maximization whereas the white squares are trainable weights.
+
+ 
+
+Vv
+
+©
+
+ 
+
+Vv
+
+ 
+
+Vv
+
+Vv
+
+ 
+
+Vv
+
+ 
+
+Vv
+
+ 
+
+PH)
+PHI)
+
+»
+>
+
+3.1.2. Meiosis Networks
+
+Meiosis Networks are introduced in [Han89]. In contrast to most and where
+weights are deterministic and fixed at prediction time, each weight w;; in Meiosis networks
+follows a normal distribution:
+
+Wij ~ N (Miz, 074)
+
+28
+
+
 
 
 3.2. Pruning approaches
@@ -2944,6 +3197,139 @@ Table 5.5.: Differences in spectral clustering and CMO.
 
 52
 
+5. Experimental Evaluation
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+Cluster Spectral clustering Errors |CMO Errors
+
+fish aquarium fish, orchid + flatfish 5 aquarium fish, orchid + flatfish 4
++ ray, shark + trout, lion + ray + shark, trout
+
+flowers orchid, aquarium fish + sun- 5 orchid, aquarium fish + sun- 2
+flower + poppy, tulip + rose, flower, poppy, tulip, rose
+train
+
+people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0
+
+reptiles crocodile, plain, road, table, 9 crocodile, lizard, lobster, cater- 6
+wardrobe + dinosaur + lizard pillar + dinosaur + snake + tur-
++ snake, worm + turtle tle, crab
+
+trees maple, oak, pine + willow, forest 3 palm, willow, pine, maple, oak 0
++ palm
+
+Total 24 12
+
+ 
+
+Table 5.4.:
+
+Differences in spectral clustering and Classes in a cluster are separated by ,
+whereas clusters are separated by +.
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+Cluster Spectral clustering Errors |CMO Errors
+A, A, A, A, &, A
+B,B B,B
+C,c, C and @, €, €& andC C,c, C,C and @
+D,D, 9, > D,D,G
+E and €,é€ FE and €, €, €, €
+F and ¥, F Fand ¥, F
+A and #, x and H A andH, #
+K,k K,k
+L,| andl, & L,| andl, &
+
+NK MSs GCGHnDOVOAZZOrO A eRe eva
+
+M and M and IN
+
+N and N, N and NV
+
+O, O, 0, 0, °, O and o
+P,P and p, p and Y and o
+Q, Q, Q, 4, U, 2, €, S, #, 1
+R,R and R, R, & and R
+S,s,S
+
+T, T and 7,7
+
+U, U and u, U, 2
+
+Vi,vu,V
+
+W,w,w
+
+X, x, X, xX, X
+
+Y andy
+
+Z,2,ZandZ,Z
+
+FPrRFooorRrrRFOoOWN wWworRnnroewrnrF & OO
+
+M and p, M and I
+
+N and N, N and N, 8
+O, O, 0, 0, ° and O and o
+PandP, FY, 9 and p, p
+Q and Q, Q
+Rand k, R, R, R
+S,s,S
+
+T, T and 7,7
+
+U, u, U, A and U
+Vi,vu,V
+
+W,w andw
+
+X, x, X, xX, X
+
+Yyy
+
+Z,2,Z, 2,2
+
+GO co OF CO NWMrFR OF FN NHN WWrR DWF FP FOF OD FE
+
+ 
+
+Total
+
+oo
+neg
+
+No
+oO
+
+ 
+
+52
+
+Table 5.5.: Differences in spectral clustering and
+
+
 
 
 5.4. Hierarchy of Classifiers
@@ -4051,6 +4437,9 @@ might be crucial for the models quality.
 
 74
 
+74
+
+
 
 
 A. Figures, Tables and Algorithms
@@ -4622,6 +5011,9 @@ Regularization techniques are:
 
 86
 
+86
+
+
 
 
 C. Calculating Network Characteristics
@@ -5031,6 +5423,9 @@ Table D.4.: Inception-v4 network.
 
 96
 
+96
+
+
 
 
 E. Datasets
diff --git a/read/results/tika/2201.00021.txt b/read/results/tika/2201.00021.txt
index 43b8728..d538039 100644
--- a/read/results/tika/2201.00021.txt
+++ b/read/results/tika/2201.00021.txt
@@ -300,6 +300,147 @@ sociated Universities, Inc.
 
 Article number, page 2 of 10
 
+A&A proofs: manuscript no. mainArxiv
+
+tions (e.g., Mauersberger et al.|/1987) |1988; Walsh et al.|/2007;
+Henkel et al.|/2013; (Mei et al.|2020). Except for the NH3 (3,3)
+masers proposed to be associated with four supernova remnants
+(McEwen et al.|/2016), almost all the other ammonia masers are
+detected in high-mass star-forming regions (HMSFRs). How-
+ever, while many HMSFRs host water (H2O), hydroxyl (OH),
+or methanol (CH3;OH) masers, ammonia masers are quite rare
+in these sources, and the role that the environment of a young
+high-mass star plays in their excitation remains unclear. There-
+fore, dedicated searches for ammonia masers in HMSFRs are
+indispensable in regard to their overall incidence and associa-
+tion with different environments, which can provide additional
+constraints on the pumping mechanism of ammonia masers.
+
+So far, a total of 32 NH3 inversion transitions (AK = 0
+and AJ = 0) have been identified as masers. Among these, and
+despite arising from energy levels as high as 1090 K above
+the ground state, the NH3 (9,6) maser stands out as being the
+strongest and most variable one in W51-IRS2 (e.g., Henkel et al.
+2013). Maser emission in this line has only been detected in five
+HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al.
+1986), and Sgr B2(N) (Mei et al.|/2020). The NH3 (3,3) masers
+are thought to be collisionally excited (e.g., [Flower et al.||1990;
+Mangum & Wootten} |1994); in contrast, the pumping mecha-
+nism of NH3 (9,6) masers is less well constrained (Madden et al.
+1986). [Brown & Cragg) (1991) have studied ortho-ammonia and
+found that it could possibly pump the (6,3) inversion line, but
+they did not extend their model to the (9,6) transition due to the
+fact that collision rates are only known for inversion levels up to
+J = 6(e.g.,/Danby et al.1988).
+
+NH; (9,6) masers are found to be strongly variable, similar to
+H2O masers (Madden et al. 1986; Pratap et al. 1991; {Henkel et al.
+2013). In W51-IRS2, |Henkel et al.| (2013) found that the (9,6)
+line showed significant variation in line shape within a time in-
+terval of only two days. Mapping of the (9,6) maser toward W51
+with very long baseline interferometry (VLBI) suggests that the
+masers are closer to the H.O masers than to the OH masers or
+to ultracompact (UC) Hum regions (Pratap et al.||1991). While
+Henkel et al.) (2013) and|Godd1 et al.) (2015) showed that the SiO
+and NH3 masers in W51-IRS2 are very close to each other, their
+positions, differing by 0’/065 (~0.015 pc), do not fully coincide.
+
+In this paper we report the discovery of NH3 (9,6) masers
+in two HMSFRs, Cepheus A and G34.26+0.15. This increases
+the number of (9,6) maser detections in our Galaxy from five
+to seven. In Sect. 2] observations with the Effelsberg 100-meter
+telescope and the Karl G. Jansky Very Large Array (JVLA) are
+described. Results are presented in Sect. B| The morphology of
+Cep A and G34.26+0.15 as well as a comparison of the emission
+distributions of different tracers with the NH3 (9,6) masers are
+presented in Sect. | Our main results are summarized in Sect. 5}
+
+2. Observations and data reduction
+2.1. Effelsberg observations and data reduction
+
+The NH3 (9,6) line was observed toward Cep A and
+G34.26+0.15 with the 100-meter Effelsberg telescopd!| in 2020
+January and 2021 February, July, and August. The S14mm dou-
+ble beam secondary focus receiver was employed. The full width
+at half maximum (FWHM) beam size is 49” at 18.5 GHz, the
+frequency of the target line. The observations were performed in
+position switching mode, and the off position was 10’ in azimuth
+
+' Based on observations with the 100-meter telescope of the MPIfR
+(Max-Planck-Institut fiir Radioastronomie) at Effelsberg.
+
+Article number, page 2 of 10
+
+away from the source. For observations made before 2021 Au-
+gust, we used a spectrometer that covered 2 GHz wide backends
+with a channel width of 38.1 kHz, corresponding to ~0.62 km s~!
+at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar
+1975). A high spectral resolution backend with 65536 channels
+and a bandwidth of 300 MHz was employed in 2021 August,
+providing a channel width of 0.07 km s7! at 18.5 GHz. Point-
+ing was checked every 2 hours using 3C 286 or NGC 7027.
+Focus calibrations were done at the beginning of the observa-
+tions and during sunset and sunrise toward the abovementioned
+pointing sources. The system temperatures were 100-130 K on
+a main-beam brightness temperature, T)yp, scale. This flux den-
+sity was calibrated assuming a Typ/S ratio of 1.95 K/Jy, derived
+from continuum cross scans of NGC 7027 (the flux density was
+adopted from |Ott et al.||1994)). Calibration uncertainties are esti-
+mated to be ~ 10%.
+
+We used the GILDAS/CLASS}| package (Pety|/2005) to re-
+duce the spectral line data. A first-order polynomial was sub-
+tracted from each spectrum for baseline removal.
+
+2.2. JVLA observations and data reduction
+
+Observations of the NH3 (9,6) line toward Cep A and
+G34.26+0.15 were obtained on 2021 July 13 with the JVLA
+of the National Radio Astronomy Observatoryp] (NRAO) in the
+C configuration (project ID: 21A-157, PI: Yaoting Yan). We
+employed 27 antennas for the observations. The primary beam
+of the JVLA antennas is 150’ (FWHM) at 18.5 GHz. A mix-
+ture of mixed three-bit and eight-bit samplers were used to per-
+form the observations. For the NH3 (9,6) line observations, we
+used one subband with the eight-bit sampler covering a band-
+width of 16 MHz with full polarization, eight recirculations, and
+four baseline board pairs (BIBPs) to provide a velocity range
+of 260 km s~! with a channel spacing of 0.13 km s~!. Two
+additional subbands of bandwidth 16 MHz were used to cover
+the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32
+subbands, each with a bandwidth of 128 MHz to cover a to-
+tal range of 4 GHz between 20—24 GHz, was used to mea-
+sure the continuum emission. 3C 286 with a flux density of
+2.89 Jy at 18.5 GHz (Perley & Butler) |2013) was used as a
+calibrator for pointing, flux density, bandpass, and polarization.
+J2230+6946 and J1851+0035 served as gain calibrators for Cep
+A and G34.26+0.15, respectively. The on-source times were
+430° and 4’"50° toward Cep A and G34.26+0.15, respectively.
+
+Data from two antennas were lost due to technical is-
+sues. The data from the remaining 25 antennas were reduced
+through the Common Astronomy Software Applications pack-
+age (CASAFt McMullin et al.|2007)). We calibrated the data with
+the JVLA CASA calibration pipeline using CASA 6.1.2. The
+results were obtained after flagging data that contain artifacts.
+We inspected the phase, amplitude, and bandpass variations of
+the calibrated visibility data to search for additional artifacts be-
+fore imaging. Then, the uvcontsub task in CASA was used to
+separate the calibrated visibilities into two parts, one with line-
+only data and the other with the continuum data. The tclean task
+with a cell size of 02 and Briggs weighting with robust=0 was
+used to produce the images of spectral line and continuum emis-
+sion. The synthesized beams for NH3 (9,6) are 1’’47 x 0°99 at
+
+> https://www.iram.fr/IRAMFR/GILDAS/
+
+3 The National Radio Astronomy Observatory is a facility of the Na-
+tional Science Foundation operated under cooperative agreement by As-
+sociated Universities, Inc.
+
+4 https://casa.nrao.edu/
+
+
 
 
 Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
@@ -394,6 +535,217 @@ tainties, as unresolved.
 
 Article number, page 3 of 10
 
+Y. T. Yan (12) #2 #€) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+
+PA. = 58°79 and 17733 x 106 at PA. = 5°36 toward Cep A
+and G34.26+0.15, respectively. For the 1.36cm (20-24 GHz)
+continuum emission, the synthesized beams are 1’’08 x 0/’67 at
+P.A. = 60°64 and 095 x 071 at P.A. = 5°91 toward Cep A and
+G34.26+0.15. The typical absolute astrometric accuracy of the
+JVLA is ~10% of the synthesized beanp} The flux density scale
+calibration accuracy is estimated to be within 15%.
+
+ 
+
+ 
+
+   
+
+       
+    
+   
+ 
+
+ 
+
+  
+
+ 
+
+ 
+
+ 
+
+ 
+
+      
+  
+
+ 
+
+ 
+
+     
+
+  
+
+    
+
+ 
+
+ 
+
+ 
+
+   
+
+ 
+
+ 
+
+ 
+
+ 
+
+0.6 fil a 7 0.3
+F | CepA 04-Jan-2020 J
+0.4 FI 4 O.2 6
+rE | Effelsberg 4 c
+0.2 Fl 4 O.1E
+| q c
+Of 4 OE
+E J o2k
+L J oo4k
+E | of
+E 4 o2F
+~ E J O15=
+> rt 4 E
+2 FE 4 OWE
+> E 7 0.056
+o G | OF
+c E' "| FE
+o E J 026
+x E 4 016
+i E 0
+—0.9 BH 0-1
+Fi fF 0.1E
+0.5 Fi 4 0.05 F Ht)
+E | f  O; rh
+Of c) — Et ty
+7 BRE EEE 0.09 HAH EH
+t | Effelsberg 12-Aug-2021]. E | 12-Aug-2021
+0.5 FI + E Ln) dl 5
+E | {| 0 !
+Ok 1
+—10 0 10 40 50 60 70
+
+Velocity (km/s)
+
+Velocity (km/s)
+
+Fig. 1. Spectra from NH; (9,6) transition lines. Left: Top to bottom:
+Time sequence of NH3 (9,6) profiles observed toward Cep A with the
+Effelsberg 100-meter telescope (after subtracting a first-order polyno-
+mial baseline). A JVLA spectrum is interspersed. The systemic veloc-
+ity from CO and HCO? lines is indicated by a dashed blue line. The
+two dashed red lines at LSR velocities, Visp, of —0.90 km s7! and
+—0.28 km s7! indicate the central velocities of the two major compo-
+nents. Right: NH; (9,6) spectra from G34.26+0.15. The systemic ve-
+locity from C!70 is indicated by a dashed blue line. The three dashed
+red lines at Visp = 54.1 kms7!, 55.8 km s7!, and 62.5 kms“! show the
+central velocities of the main ammonia emission components.
+
+3. Results
+
+The spectra from different epochs are shown in Figs. [I] and 2}
+Toward Cep A, the NH3 (9,6) line profile from the JVLA is ex-
+tracted from an Effelsberg-beam-sized region (FWHM, 49’). In
+the case of G34.26+0.15, the NH3 spectrum is below the noise
+level if a similarly large beam size is used. Therefore, we de-
+rived the JVLA NHs3 (9,6) spectrum from a smaller region, with
+radius 3’’5, that contains all the detected NH3 (9,6) emission. In
+Table [A.1] the observed NH; (9,6) line parameters obtained by
+Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not de-
+tected by our JVLA observations. The 3a upper limits for the
+NH; (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam™!
+
+> https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance-
+/positional-accuracy
+
+       
+  
+
+ 
+
+ 
+
+ 
+
+ 
+
+0.04 F q (0.2 E-634.2640.15 | 13-Jul-2021 4
+0.02 F 401 BN Wid 4
+k i WL A
+Ss ° 4 0 EF ol 5
+LS Ei itt 4 O.1F ! 1
+E lEtfelsberg th—Aug—2021 4 E ertelcber ee
+S in f E
+S oR Po i
+3 O1F | EFfelsber 12-Au tin OTE ane 4
+tL F | 8 | 9 q EF Effelsberg ii 12-Aug~2021 4
+0.05 5 ii! J 0.05 i 3
+i | 4 A ei a
+0 i rl tH 0 i i
+E ll H E roid
+C —0.05 & |
+—10 0 10 45 50 55 60 65
+
+Velocity (km/s) Velocity (km/s)
+
+Fig. 2. NH; (9,6) line profiles emphasizing, in contrast to the spectra
+in Fig. [I] weaker features. Cep A spectra are presented on the left,
+G34.26+0.15 spectra on the right. The two dashed red lines in the left
+panels indicate Vi sp = 1.48 km s7! and 2.89 km s“!. In the right panels,
+the two dashed red lines refer to 54.1 kms"! and 55.8 kms"!.
+
+and 27.2 mJy beam™', respectively. In G34.26+0.15, the corre-
+sponding 3c upper limits for the NH; (8,5) and (10,7) lines are
+22.1 mJy beam™! and 30.4 mJy beam™!. For both sources, sen-
+sitivity levels refer to emission from a single channel of width
+0.13 km s~!. Taking the larger measured line widths of the (9,6)
+maser features (see Table[A.1), these limits could be further low-
+ered by factors of two to four.
+
+3.1. Centimeter-continuum emission
+
+The 1.36cm continuum, derived from our JVLA observations,
+toward Cep A is presented in Fig. 8] Six published compact
+sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are de-
+tected in our observations. Figure 4] shows the 1.36cm contin-
+uum in G34.26+0.15. Three main continuum objects, A, B, and
+C, are detected. By using the imfit task in CASA, we measured
+the continuum flux at 1.36 cm toward individual compact source
+components in Cep A and G34.26+0.15. Details are given in Ta-
+ble
+
+3.2. NH; (9,6) emission in Cep A
+
+In 2020 January, NH3 (9,6) emission with a peak flux density of
+0.67 + 0.07 Jy was first detected with the Effelsberg 100-meter
+telescope in Cep A. Emission with similar strength was also de-
+tected in 2021 February and August with the same telescope.
+Higher velocity resolution data, which were obtained in 2021
+August, again with the Effelsberg 100-meter telescope, show
+that the (9,6) emission contains two main velocity components.
+Overall, the flux densities of the NH3 (9,6) emission line mea-
+sured with the Effelsberg 100-meter telescope are, within the cal-
+ibration uncertainties, unchanged. This is valid for the time inter-
+val between 2020 January and August 2021, when we smoothed
+the obtained spectra to the same velocity resolution. We also
+see another two weaker components. Figure 2] emphasizes these
+weak components with an expanded flux density scale.
+
+Higher angular resolution data from the JVLA pinpoint the
+position of the NH3 (9,6) emission with an offset of (—0/28,
+0’’02) relative to the 1.36cm continuum peak of Cep A HW2
+(Fig. Bh). The deconvolved NH3 (9,6) component size is (0729 +
+0715) x (019 + 014) at PA. = 174°, derived with the imfit task
+in CASA, and can thus be considered, accounting for the uncer-
+tainties, as unresolved.
+
+Article number, page 3 of 10
+
+
 
 
 A&A proofs: manuscript no. mainArxiv
@@ -434,6 +786,126 @@ component sizes are (1′′.42±0′′.43)× (0′′.54±0′′.62) at P.A.
 
 Article number, page 4 of 10
 
+A&A proofs: manuscript no. mainArxiv
+
+ke NH3 (9,6) -
+
+  
+ 
+
+Velocity (km s~})
+
+DEC_offset (arcsec)
+DEC_offset (arcsec)
+
+ 
+
+ 
+
+RA _offset (arcsec)
+
+RA_offset (arcsec)
+
+Fig. 3. Cepheus A. White contours mark the 1.36cm JVLA continuum map of Cep A; levels are —5, 5, 10, 20, 30, 40, 50, 70, 90,
+and 110 x 0.125 mJy beam™!. The background image is the Spitzer 4.5m emission, taken from the Galactic Legacy Infrared Mid-Plane
+Survey Extraordinaire (GLIMPSE; [Benjamin et al. |2003} |Churchwell et al.|{2009). The reference position is @y2999 = 22"56"173972, and
+62000 = 62°01'49”587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black
+ellipse denoting the position of the NH; (9,6) emission with a purple star at its center. OH (Bartkiewicz et al.|/2005), HO (Sobolev et al.|/2018),
+and CH;0H masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates
+the LSR velocity range of the maser spots.
+
+1 1. . 1. 1 ’
+Ke NH: (9.6) of | om Nh 8.6)
+3 OH ia 3 OH
+H:0 -2.0 S 62
+44M ct.0H 0H
+
+ 
+
+ 
+
+7 H20
+
+bw)
+2.4 M2
+
+w
+©
+
+w
+8
+Velocity (km s~?)
+
+M3
+
+DEC_offset (arcsec)
+
+DEC_offset (arcsec)
+°
+
+3.0
+
+ 
+
+ 
+
+10
+
+4 2
+
+ 
+
+ 
+
+see
+
+ 
+
+ 
+
+ 
+
+ 
+
+2.4 2.2 2.0 18 16 14 1.2 1.0
+RA_offset (arcsec)
+
+ 
+
+RA_offset (arcsec)
+
+Fig. 4. 1.36cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of —5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130,
+150, 180, and 200 x 5.0 mJy beam™!. The background image is the Spitzer 4.5 ym emission, taken from GLIMPSE. The reference position is
+@y000 = 18"53™188560, and dy2999 = 01°14’58”201, the peak position, is marked by a black cross. The black ellipses show the positions of NH;
+(9,6) emissions with stars at their center (i.e, M1, M2, and M3). OH (Zheng et al/2000), H.O (Imai et al/2011), and CH;0H
+
+[2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (Vis) of maser spots.
+
+velocity resolution data from 2021 August show the NH; (9,6)
+
+In view of the constancy of the flux densities obtained at Ef-
+emission to be composed of two different components. The spec-
+
+felsberg and the similar JVLA flux density, measured in 2021
+
+July, there is no missing interferometric flux density in the JVULA
+data.
+3.3. NH; (9,6) emission in G34.26+0.15
+
+The NH; (9,6) emission was first detected toward G34.26+0.15
+in 2020 January with the Effelsberg 100-meter telescope. Higher
+
+Article number, page 4 of 10
+
+tra of weak components on a smaller flux density scale are pre-
+sented in Fig.
+
+Three different locations showing NH3 (9,6) emission are
+found toward G34.26+0.15 (Fig. A). The deconvolved NH; (9,6)
+component sizes are (1/742 + 0’'43) x (054 + 062) at PA. = 97°
+(M1), (0/742 + 0°'27) x (015 + 0’’27) at P.A. = 150° (M2), and
+
+
 
 
 Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
@@ -577,6 +1049,143 @@ against continuum source C (∼ 7′′ resolution; Keto et al. 1987)
 
 Article number, page 5 of 10
 
+Y. T. Yan (12) #2 #€) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
+
+(1717 + 034) x (0/27 + 0/46) at P.A. = 53° (M3) and are thus
+comparable to or smaller than the beam size.
+
+Overall, the NH3 (9,6) line from G34.26+0.15 weakened
+during the time interval from 2020 January to 2021 August by
+about 70%. A comparison between the JVLA spectrum and the
+Effelsberg data, assuming a linear decrease in the integrated in-
+tensity as a function of time between different epochs of the
+100-meter observations, suggests there is no missing flux in the
+JVLA data. This is similar to the situation in Cep A.
+
+4. Discussion
+4.1. Morphology of Cep A and G34.26+0.15
+
+Cep A, at a trigonometric parallax distance of 0.7040.04 kpc
+(Moscadelli et al.|/2009} |Dzib et al.|/2011)), is the second closest
+HMSFR (after Orion) and by far the closest NH3 (9,6) maser
+known. About 16 compact (~1’’) radio sources (e.g., Hughes &
+Wouterloot |1984; |Hughes||1991; |Garay et al.||1996) have been
+identified in Cep A. [Hughes & Wouterloot| (1984) discovered
+these targets at radio wavelengths, which are UC and hypercom-
+pact (HC) Hm regions and/or stellar wind sources, subsequently
+named as HW sources. The HW2 object is one of the best known
+examples of a protostellar jet or disk system driving a powerful
+outflow (e.g.,/Rodriguez et al.|1980;|Giisten et al.|1984; /Torrelles
+et al.|/1986; \Curiel et al.|/2006; \Carrasco-Gonzalez et al.|2021)).
+The observed NH3 (9,6) emission is slightly offset (—0’28, 0702)
+from the center of HW2 (see Fig. B).
+
+G34.26+0.15 is an HMSER located at a distance of 3.3 kpc
+(Kuchar & Bania|1994). It hosts four radio continuum compo-
+nents named A, B, C, and D. Component C is a prototypical
+cometary UC Hr region containing a compact head and a diffuse
+tail that extends from east to west (e.g., [Reid & Ho} 1985} |Garay
+et al.|/19865 |Sewilo et al.|/2004} |Sewito et al.201 1). Components
+A and B are HC Hn regions, located to the east of component
+C. An extended ring-like Hm region, called component D, is lo-
+cated southeast of components A-C. One of the three observed
+NHz (9,6) emission line sources, M1, is close to the head of com-
+ponent C, whereas M2 and M3 originate from another compact
+region in the west of the HC H1 component A (see Fig. 4).
+
+4.2. NH; (9,6) emission possibly caused by maser action
+
+As shown in Fig. [I} the NH3 (9,6) profiles in Cep A and
+G34.26+0.15 are narrow (AVj;2 <2.0 km s7!), much narrower
+than the expected line widths (>4 km s~') of thermal lines ob-
+served at a similar angular resolution (e.g., /Torrelles et al.|/1985)
+1986, |1993)|1999; Henkel et al.|1987; Comito et al.2007; |Mook-
+erjea et al.2007; (Wyrowski et al.|/2012; |Beuther et al.2018). Ve-
+locity shifts with respect to the systemic velocities of the two
+sources are both observed, that is, V ~10 km s~! in Cep A and
+V ~4km s"! in G34.26+0.15 (see details in Sect. 4.3). Further-
+more, time variability is observed in the case of G34.26+0.15,
+which is also a characteristic feature of maser emission.
+Additional evidence of their maser nature is the high bright-
+ness temperatures of the (9,6) emission spots toward Cep A and
+G34.26+0.15. The spectral parameters are listed in Table [A.3}
+Because at least a significant part of the NH3 (9,6) emission
+is not resolved by our JVLA observations, the derived bright-
+ness temperatures are only lower limits. Nevertheless, the lower
+limits on the brightness temperature are >800 K in Cep A (see
+Table [A.3), which is much higher than the expected thermal
+gas temperature of ~250 K (e.g., [Patel et al.| (2005; (Comito
+et al. (2007; |Beuther et al. |2018). This strongly suggests that
+
+the NH3 (9,6) emission in Cep A is due to maser action. Be-
+cause G34.26+0.15 is located at about five times the distance to
+Cep A, beam dilution effects reduce the lower main beam bright-
+ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta-
+ble[A.3). We also note that the luminosity of the NH3 (9,6) emis-
+sion in G34.26+0.15 is higher than or comparable to that in Cep
+A, depending on the epoch of our observations.
+
+Finally, the non-detections of the (8,5) and (10,7) lines also
+indicate that the (9,6) line is special. This allows us to derive
+lower 3c limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity
+ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas
+the NH3 (8,5) and (10,7) lines are para-NH3 (K # 3n) lines.
+The minimum ortho-to-para ratios are in the range 12-42 and 1|-
+8 toward Cep A and G34.26+0.15, respectively. The statistical
+weights for the ortho states are twice as large as those for the
+para states (e.g.,,; Umemoto et al.}1999;|Goddi et al./201 1; Henkel
+et al.2013). In Cep A, the line intensity ratios are far higher than
+this factor of two. Thus, at least in Cep A the higher main beam
+brightness peak temperature of the (9,6) emission is caused by
+maser action, perhaps involving exponential amplification, and
+the case of G34.26+0.15 is likely similar.
+
+4.3. Comparison of NH; (9,6) masers with previously
+published (quasi-)thermal NH; emission
+
+The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines
+show thermal emission toward Cep A over a velocity range of
+-13kms"! < Visp < -4kms7! (Brown et al.|/1981; |Giisten
+et al.|/1984} |Torrelles et al.) 1985) |1986) |1993] |1999). An average
+NH; column density of ~5x10!° cm~? was estimated for a region
+of 3” around HW? (Torrelles et al.||1999). This high NH3 abun-
+dance could provide a suitable environment for maser species.
+Large line widths (AV, /2 ~7.0 km s~') with Visp ~ —10 kms“!
+in both (1,1) and (2,2) lines were found toward HW2 (Torrelles
+et al.|1993). The velocity is similar to the cloud’s systemic lo-
+cal standard of rest (LSR) velocity of —11.2 km s7!, which
+is based on CO (Narayanan & Walker |/1996) and HCO* ob-
+servations (Gomez et al.|/1999). Our (9,6) maser is redshifted
+(-0.9 kms"! < Vise <2.9 km s7!) and shares positions with
+the outflowing gas seen in CO and HCO? with similarly red-
+shifted velocities. Therefore, we argue that the (9,6) masers are
+related to outflowing gas.
+In G34.26+0.15, a large NH; column density,
+10!85#02 cm-2, and a kinetic temperature of 225475 K
+were derived by |Henkel et al.| (1987) based on measurements
+of 15 NH3 inversion transitions in the frequency range of
+22.0-26.0 GHz. These did not include the (9,6) transition.
+While these lines were measured with a beam size of about
+40”, a comparison of the peak intensities of the optically thick
+lines with the kinetic temperature reveals the size of the hot,
+ammonia-emitting core to be only ~2.5’’. All those measured
+NH3 lines were quasi-thermal and had LSR velocities of
+~ 58.5 km s~!, close to the systemic velocity of ~ 58.1 km sg!
+obtained from C!’O observations (Wyrowski et al.) |2012).
+Their line widths (AV;;2 >3.6 km s~') are larger than what
+we find (0.35 km s7! < AV\/2 < 0.94 km s!) for each (9,6)
+maser component (see details in Table [A.3). In all, we may
+have observed four different (9,6) velocity features. Three
+are blueshifted at Visp ~ 53.8 km s7!, 55.8 km s7!, and
+56.8 km s~!, and a fourth, tentatively detected, at 62.5 km s7!.
+This tentative redshifted feature was only potentially detected
+with Effelsberg in 2020 January. The velocity is similar to that
+of the JVLA measurements on the NH; (1,1) absorption line
+against continuum source C (~ 7” resolution; |Keto et al.||1987)
+
+Article number, page 5 of 10
+
+
 
 
 A&A proofs: manuscript no. mainArxiv
@@ -947,6 +1556,75 @@ C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20)
 
 Article number, page 8 of 10
 
+A&A proofs: manuscript no. mainArxiv
+
+Appendix A:
+
+Table A.1. Summary of NH; (9, 6) maser observations.
+
+ 
+
+ 
+
+ 
+
+Source Telescope Beam Epoch Channel Sy rms f S,dv Visr AVi/2
+size spacing
+(kms!) Gy) (mJy) Gy kms“) (km s7!)
+
+Cep A Effelsberg 49” 2020, Jan. 04 0.62 0.67 3.41 1.19+0.02 -1.11+002 1.67+0.04
+Effelsberg 49” 2021, Feb. 11 0.62 0.59 5.97 1.08+0.02 -0.74+0.02 1.70+0.04
+Effelsberg 49” 2021, Feb. 15 0.62 0.65 10.98 1.11+40.03 -0.75+0.02 1.60+0.05
+JVLA‘ 1”47 x 0799 =. 2021, Jul. 13 0.13 1.13 144 0.89+0.09 -0.86+0.03 0.74+0.12
+Effelsberg 49” 2021, Aug. 11 0.07 0.98 13.36 049+0.02 -0.90+0.01 0.47+0.01
+0.35 0.26+0.02 -0.28+0.02 0.69+0.05
+Effelsberg 49” 2021, Aug. 12 0.07 0.98 13.35 0.50+0.01 -0.89+0.07 0.48 + 0.07
+0.35 0.20+0.01 -0.29+0.07 0.54 + 0.07
+0.06 0.07 + 0.01 0.51+0.07 1.09 +0.07
+0.02 0.02 + 0.01 2.15+0.07 0.80+0.07
+0.07 0.06 + 0.01 2.89 +0.07 0.92 + 0.07
+G34.26+0.15 Effelsberg 49” 2020, Jan. 03 0.62 0.30 1.26 0.65+0.03 62.50+0.05 2.05+0.13
+Effelsberg 49” 2021, Feb. 11 0.62 0.24 2.42 0.40+0.02 55.76+0.04 1.60+0.12
+Effelsberg 49” 2021, Feb. 15 0.62 0.20 4.86 0.38+0.02 55.71+0.05 1.80+0.14
+JVLA? 1733 x 1706 2021, Jul. 13 0.13 0.23 37.1 0.09+0.02 54.41+0.03 0.38 + 0.09
+0.22 0.22+0.02 55.82+0.05 0.95+0.12
+0.15 0.06+0.01 57.214+0.04 0.35+0.08
+Effelsberg 49” 2021, Aug. 11 0.07 0.08 13.92 0.06+0.007 54.10+0.05 0.68 +0.12
+0.07 0.02+0.006 54.82+0.03 0.31 40.09
+0.12 0.10+0.006 55.85+0.02 0.75 + 0.06
+Effelsberg 49” 2021, Aug. 12 0.07 0.16 27.40 0.09+0.008 55.83+0.02 0.56 + 0.05
+
+ 
+
+Notes. The spectral parameters are obtained from Gaussian fitting. “ The JVLA spectrum toward Cep A is extracted from the Effelsberg-beam-
+sized region (FWHM 49”). © For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 35, which contains all
+detected NH; (9,6) emissions.
+
+Table A.2. 1.36 cm JVLA flux densities of individual continuum sources.
+
+ 
+
+ 
+
+ 
+
+Source R.A. Dec. Size PA. Sy
+(h m= s) ce’ ") (arcsec) (deg) (mJy)
+Cep A HW2 = 225617.972 + 0.003 +6201 49.587+0.015 (0.45 + 0.19) x (0.22 + 0.10) 50.0 20.2 + 1.4
+HW3a_ 2256 17.420+0.022 +620144.576+0.076 (2.35+40.45)x(0.55+0.14) 666 4.75 +0.74
+HW3b = 22 56.17.578 + 0.009 +6201 45.041 +0.043 (1.43 +0.24)x(0.45+0.10) 59.9 3.19+0.36
+HW3c 225617.956+0.016 +6201 46.224+0.038 (1.44 +0.37) x (0.36+0.19) 86.0 9.90 + 1.7
+HW3d 225618.195+0.005 +6201 46.325+0.014 (1.26+0.12) x (0.30+0.19) 102.5 13.75 +0.92
+HW9 2256 18.626+0.014 +6201 47.851+0.137 (1.53 +0.51) x (0.29+0.30) 28.0 3.26+0.78
+G34.26+0.15 A 18 53 18.774 +0.005 +01 1456.208+0.125 (0.66 + 0.49) x (0.50 + 0.33) 10.0 94 + 33
+B 18 53 18.649+ 0.005 +01 1500.071+0.180 (2.31 +0.49) x (0.85+0.21) 17.4 597 + 110
+C 18 53 18.560 + 0.004 +01 1458.201+0.112 (2.03 + 0.30) x (1.34+0.20) 178.0 5070 +660
+
+ 
+
+Article number, page 8 of 10
+
+
 
 
 Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions
diff --git a/read/results/tika/2201.00022.txt b/read/results/tika/2201.00022.txt
index ed7049d..21f3bbf 100644
--- a/read/results/tika/2201.00022.txt
+++ b/read/results/tika/2201.00022.txt
@@ -34,7 +34,7 @@
 
 
 
-Draft version January 4, 2022
+Draft version July 7, 2022
 Typeset using LATEX twocolumn style in AASTeX631
 
 The Formation of Intermediate Mass Black Holes in Galactic Nuclei
@@ -50,27 +50,37 @@ ABSTRACT
 
 Most stellar evolution models predict that black holes (BHs) should not exist above approximately
 
-50−70 M�. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and
+50− 70 M�, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections
 
-above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs),
+indicate the existence of BHs with masses at and above this threshold. We suggest that massive
 
-can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding
+BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions
 
-main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax-
+between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical
 
-ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M�. Our
+processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite
 
-results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for-
+efficient, forming IMBHs as massive as 104 M�. This upper limit assumes that (1) the BHs accrete a
 
-mation channel also has implications for observations. Collisions between stars and BHs can produce
+substantial fraction of the stellar mass captured during each collision and (2) that the rate at which
 
-electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally,
+new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar
 
-formed through this channel, both black holes in the mass gap and IMBHs can merge with the super-
+disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our
 
-massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational
+results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic
 
-wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively).
+centers. This formation channel has implications for observations. Collisions between stars and BHs
+
+can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events.
+
+Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge
+
+with the supermassive black hole at the center of a galactic nucleus through gravitational waves.
+
+These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs,
+
+respectively).
 
 1. INTRODUCTION
 
@@ -81,7 +91,6 @@ GW190521 (The LIGO Scientific Collaboration et al.
 2020a,b) produced an intermediate mass black hole of
 
 approximately 142 M�. This event may have also had a
-
 85 M� progenitor, which falls within the pair-instability
 
 mass gap that limits stellar black holes (BHs) to no
@@ -91,6 +100,7 @@ more than ∼< 50 M� (e.g., Heger et al. 2003; Woosley
 2017)1. Similarly, the merger products of GW150914,
 
 GW170104, and GW170814 fall within the mass gap
+
 (e.g., Abbott et al. 2016, 2017a,b). BH mergers that
 
 form second generation BHs and, in some cases, inter-
@@ -99,16 +109,6 @@ mediate mass BHs (IMBHs), these gravitational wave
 
 (GW) events can occur in globular clusters, young stel-
 
-lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro-
-
-driguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
-
-2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
-
-2021; Arca Sedda et al. 2021). However, IMBHs are
-
-not limited to these locations and may reside in galac-
-
 Corresponding author: Sanaea C. Rose
 
 srose@astro.ucla.edu
@@ -118,6 +118,15 @@ metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli
 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski
 et al. 2020a; Renzo et al. 2020; Vink et al. 2021).
 
+lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro-
+
+driguez et al. 2019; Fishbach et al. 2020; Mapelli et al.
+
+2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al.
+
+2021; Arca Sedda et al. 2021). However, IMBHs are
+
+not limited to these locations and may reside in galac-
 tic nuclei as well. Several studies propose that our
 
 own galactic center may host an IMBH in the inner pc
@@ -152,26 +161,6 @@ Ferrara et al. 2014; Choi et al. 2015; Shlosman et al.
 
 vive galaxy evolution and mergers to present day (e.g.,
 
-Rashkov & Madau 2014), with significant effects on their
-
-stellar and even dark matter surroundings (e.g., Bertone
-
-et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
-
-et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
-
-popular formation channel relies on the coalescence of
-
-many stellar-mass black holes. For example, IMBHs
-
-may form in the centers of globular clusters, where few-
-
-body interactions lead to the merger of stellar-mass BHs
-
-(e.g., O’Leary et al. 2006; Gürkan et al. 2006; Blecha
-
-et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
-
 ar
 X
 
@@ -185,7 +174,7 @@ iv
 02
 
 2v
-1 
+2 
 
  [
 as
@@ -199,33 +188,55 @@ ph
 A
 ] 
 
- 3
-1 
-
-D
-ec
+ 6
+ J
 
+ul
  2
-02
 
-1
+02
+2
 
 mailto: srose@astro.ucla.edu
 
 
 2 Rose et al.
 
+Rashkov & Madau 2014), with significant effects on their
+
+stellar and even dark matter surroundings (e.g., Bertone
+
+et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda
+
+et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another
+
+popular formation channel relies on the coalescence of
+
+many stellar-mass black holes, which may seed objects
+
+as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs
+
+may form in the centers of globular clusters, where few-
+
+body interactions lead to the merger of stellar-mass BHs
+
+(e.g., O’Leary et al. 2006; Gürkan et al. 2006; Blecha
+
+et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-
+
 driguez et al. 2018; Rodriguez et al. 2019; Fragione et al.
 
 2020b). Other formation mechanisms invoke successive
 
-collisions and mergers of massive stars (e.g., Portegies
+collisions and mergers of massive stars (e.g., Ebisuzaki
 
-Zwart & McMillan 2002; Portegies Zwart et al. 2004;
+et al. 2001; Portegies Zwart & McMillan 2002; Portegies
 
-Freitag et al. 2006; Kremer et al. 2020; González et al.
+Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017;
 
-2021; Di Carlo et al. 2021).
+Kremer et al. 2020; González et al. 2021; Di Carlo et al.
+
+2021; Das et al. 2021a,b; Escala 2021).
 
 The main obstacle to sequential BH mergers in clus-
 
@@ -257,27 +268,49 @@ tion timescale. Using this approach, they showed that
 
 time of a cluster.
 
-However, as discussed in Section 2.2, direct star-BH
+However, as discussed in Section 2.2, direct BH-star
 
 collisions are much more frequent than BH-BH collision
 
 in galactic nuclei, making the former a promising chan-
 
-nel for BH growth. We propose that IMBHs can form
+nel for BH growth. In an N-body study of young star
+
+clusters, Rizzuto et al. (2022) find that BH-star colli-
+
+sions are a main contributor to the formation of BHs
+
+in the mass gap and IMBHs. In a similar vein, Stone
+
+et al. (2017) demonstrate that massive BHs can form
+
+from repeated tidal encounters between stars and BHs.
+
+More generally, several studies have explored the role of
+
+collisions in a GN, with implications for the stellar and
+
+red giant populations (e.g., Dale & Davies 2006; Dale
+
+et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti
+
+et al. 2021). We propose that IMBHs can form naturally
+
+within the central pc of a galactic center through re-
 
-naturally within the central pc of a SMBH in a galactic
+peated collisions between BHs and main sequence stars.
 
-center. Specifically, these IMBHs form through repeated
+During a collision, the BH can accrete some portion of
 
-collisions with main sequence stars, accreting some or
+the star’s mass. Over many collisions, it can grow ap-
 
-all of the star’s mass depending on the details of the
+preciably in size. We demonstrate that this channel can
 
-collision. We demonstrate that this channel can create
+create IMBHs with masses as large as 104 M�, an upper
 
-IMBHs with masses as large as 104 M�, depending on
+limit that depends on the density profile of the surround-
 
-the density profile of the surrounding stars.
+ing stars and the efficiency of the accretion.
 
 The paper is structured as follows: we describe rele-
 
@@ -295,15 +328,15 @@ solutions to our equations in two different regimes, ef-
 
 ficient collisions and inefficient collisions We compare
 
-these solutions to our statistical results. Sections 2.5
+these solutions to our statistical results. Sections 2.6
 
-and 2.7 discuss implications for GW merger events be-
+and 2.8 discuss implications for GW merger events be-
 
 tween IMBHs and the SMBH. We then incorporate re-
 
 laxation processes and discuss the subsequent results in
 
-Section 2.8. Finally, we discuss and summarize our find-
+Section 2.9. Finally, we discuss and summarize our find-
 
 ings in Section 3.
 
@@ -372,7 +405,21 @@ Otherwise, the innermost region of the GN would be
 
 poorly represented in our sample. We consider other
 
-observationally motivated distributions in Section 2.8,
+
+
+IMBH Formation in Galactic Nuclei 3
+
+Figure 1. We plot the relevant timescales, including col-
+lision (green), relaxation (gold), and BH-BH GW capture
+(purple), for a single BH in the GN as a function of distance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit. The timescales depend on the
+density, so we adopt a range of density profiles, bounded by
+α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
+blue line represents the time for a 105 M� BH to merge with
+the SMBH through GW emission.
+
+observationally motivated distributions in Section 2.9,
 
 but reserve a more detailed examination of the distribu-
 
@@ -422,20 +469,6 @@ the collision rate, while n and σ are simply evaluated
 
 at the semimajor axis of the orbit (see below). Note
 
-
-
-IMBH Formation in Galactic Nuclei 3
-
-Figure 1. We plot the relevant timescales, including col-
-lision (green), relaxation (gold), and BH-BH GW capture
-(purple), for a single BH in the GN as a function of distance
-from the SMBH. For the collision timescale, we assume the
-BH is on a circular orbit. The timescales depend on the
-density, so we adopt a range of density profiles, bounded by
-α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark
-blue line represents the time for a 105 M� BH to merge with
-the SMBH through GW emission.
-
 that this timescale equation includes the effects of grav-
 
 itational focusing, which enhances the cross-section of
@@ -488,9 +521,6 @@ n(r•) =
 1M�
 . (3)
 
-2 We note that the eccentricity has a very minor effect on the
-collision timescale (Rose et al. 2020).
-
 The collision timescale also depends on the velocity dis-
 
 persion, which we express as:
@@ -549,6 +579,144 @@ lisions will be the main driver of IMBH growth in the
 
 GN.
 
+2 We note that the eccentricity has a very minor effect on the
+collision timescale (Rose et al. 2020).
+
+IMBH ForMATION IN GALACTIC NUCLEI 3
+
+ 
+
+1015 4
+
+1013 4
+
+101! 4
+
+Timescale [yr]
+
+10? 4
+
+107 4
+
+ 
+
+ 
+
+ 
+
+ 
+
+107 10-2 10-1 10°
+Distance from SMBH [pc]
+
+Figure 1. We plot the relevant timescales, including col-
+lision (green), relaxation (gold), and BH-BH GW capture
+(purple), for a single BH in the GN as a function of distance
+from the SMBH. For the collision timescale, we assume the
+BH is on a circular orbit. The timescales depend on the
+density, so we adopt a range of density profiles, bounded by
+a = 1 (dashed curve) to a = 2 (dark, solid curve). The dark
+blue line represents the time for a 10° Mo BH to merge with
+the SMBH through GW emission.
+
+observationally motivated distributions in Section 2.9,
+but reserve a more detailed examination of the distribu-
+tion’s impact for future work.
+
+2.2. Direct Collisions
+
+BHs in the GN can undergo direct collisions with other
+objects. The timescale for this process, tgo, can be es-
+timated using a simple rate calculation: t{j, = noA,
+where n is the number density of objects, o is the ve-
+locity dispersion, and A is the cross-section. We use the
+collision timescale from Rose et al. (2020):
+
+t— | =1n(de)o(de)
+
+coll
+x (salca)r2 + falee)ra7 pues) ) (1)
+
+o(ae)*
+
+where G is the gravitational constant and r, is the sum
+of the radii of the interacting objects, a black hole with
+mass mpy and a star with mass m,. Detailed in Rose
+et al. (2020), fi(es) and fo(e.) account for the effect of
+the eccentricity of the BH’s orbit about the SMBH on
+the collision rate, while n and o are simply evaluated
+at the semimajor axis of the orbit (see below). Note
+that this timescale equation includes the effects of grav-
+itational focusing, which enhances the cross-section of
+interaction.
+
+Assuming a circular orbit for simplicity, we plot the
+timescale for a BH orbiting in the GN to collide with
+a1 Mo star as a function of distance from the SMBH
+
+in Figure 1.2 As this timescale depends on the density
+of surrounding stars, we adopt a density profile of the
+form:
+
+plre) = pa (=) | (2)
+
+To
+
+where r, denotes the distance from the SMBH. We adopt
+a SMBH mass of 4 x 10° Mo such that our fiducial GN
+matches our own galactic center (e.g., Ghez et al. 2005;
+Genzel et al. 2003). In this case, the normalization in
+Eq. (2) is pp = 1.35 x 10° Me /pc3 at ro = 0.25 pe (Gen-
+zel et al. 2010). Additionally, in Eq. (2), @ gives the
+slope of the power law. We assume that a uniform pop-
+ulation of solar mass stars account for most of the mass
+in the GN, making the stellar number density:
+
+_ PlTe)
+n(re) = Mo (3)
+
+ 
+
+The collision timescale also depends on the velocity dis-
+persion, which we express as:
+
+GM,
+o(re) = pay’ (4)
+
+where a is the slope of the density profile and M, de-
+notes the mass of the SMBH (Alexander 1999; Alexan-
+der & Pfuhl 2014). As mentioned above, Eq. (1) depends
+on the sum of the radii of the colliding objects, r.. We
+take re = 1 Re because these interactions involve a BH
+and a star, and the former has a much smaller physi-
+cal cross-section. For example, the Schwarzschild radius
+of a 10 Ms BH is only 30 km, or 4.31 x 107° Ro. For
+this reason, direct collisions between compact objects
+are very rare and not included in our model.
+
+We note that direct collisions between BHs, via GW
+emission, were shown to be efficient in nuclear star clus-
+ters without SMBHs (e.g., Portegies Zwart & McMil-
+lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016).
+However, in the GN, star-BH collisions are much more
+frequent than direct BH-BH collisions. As depicted in
+Figure 1, the star-BH collision timescale for a range
+of density profiles is many orders of magnitude shorter
+than the BH-BH GW collision timescale (for the rele-
+vant equations, see O’Leary et al. 2009; Gondan et al.
+2018, for example). Thus, we expect that star-BH col-
+lisions will be the main driver of IMBH growth in the
+GN.
+
+2 We note that the eccentricity has a very minor effect on the
+
+collision timescale (Rose et al. 2020).
+
+
+
+
+4 Rose et al.
+
 2.3. Statistical Approach to Collisions
 
 We simulate the mass growth of a population of BHs
@@ -583,16 +751,6 @@ the updated BH mass and repeat this process until the
 
 time elapsed equals the simulation time of 10 Gyr3.
 
-3 Closer to the SMBH, ∆t may exceed the collision timescale by
-a factor of a few for steep density profiles. We include a safe-
-guard in our code which takes the ratio tcoll/∆t and rounds it
-to the nearest integer. We take this integer to be the number of
-collisions and increase the BH mass accordingly.
-
-
-
-4 Rose et al.
-
 2.4. Mass Growth
 
 When a BH collides with a star, it may accrete ma-
@@ -613,7 +771,7 @@ outermost point, its surface, which corresponds to the
 
 maximum impact parameter 1 R�. Qualitatively, one
 
-might expect that the BH could accrete the entire star
+might expect that the BH could capture the entire star
 
 (i.e., ∆m ∼ 1 M�) if the relative velocity is smaller than
 
@@ -625,11 +783,19 @@ the stars may be much larger than the escape velocity
 
 from the BH at the star’s surface. In this case, the BH
 
-accretes a “tunnel” of material through the star. This
+captures a “tunnel” of material through the star. This
 
 tunnel has radius equal to the Bondi radius and length
 
-approximately 1R�.
+approximately 1R�. For the purposes of this study, we
+
+assume that the BH accretes all of the material that
+
+it captures. The details of the accretion are uncertain,
+
+however, and it may be much less efficient than our re-
+
+sults imply. We discuss accretion in Section 2.5.
 
 To estimate ∆m, we begin with the Bondi-Hoyle ac-
 
@@ -645,6 +811,25 @@ BHρstar
 
 , (5)
 
+3 Closer to the SMBH, ∆t may exceed the collision timescale by
+a factor of a few for steep density profiles. We include a safe-
+guard in our code which takes the ratio tcoll/∆t and rounds it
+to the nearest integer. We take this integer to be the number of
+collisions and increase the BH mass accordingly.
+
+Figure 2. We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10M� for all of the BHs. Assuming
+the density profile of stars has α = 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+
 where cs is the speed of sound in the star and ρstar is its
 
 density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
@@ -701,26 +886,158 @@ tions assume α = 1 for the stellar density profile, ensur-
 
 ing the collision timescale is long compared to the sim-
 
-ulation time, 10 Gyr. Therefore, the BHs grow slowly,
+ROSE ET AL.
 
-Figure 2. We consider an example that highlights the mass
-growth as a function of distance from the SMBH. Grey dots
-represent the initial masses and distances from the SMBH
-of the BHs involved in the simulation. For simplicity, we set
-the inital mass equal to 10M� for all of the BHs. Assuming
-the density profile of stars has α = 1, we consider two cases:
-BHs accrete all of the star’s mass during a collision (red) and
-only a portion of the star’s mass is accreted during a collision
-given by Eq. 6 (blue). The latter case results in less growth
-closer to the SMBH where the velocity dispersion becomes
-high. The shaded regions and dashed lines represent the
-analytical predictions detailed in Section 2.4.
+2.3. Statistical Approach to Collisions
 
-and their final masses can be approximated using the
+We simulate the mass growth of a population of BHs
 
-following equation:
+with initial conditions detailed in Section 2.1. Over an
 
-mfinal(tcoll → const.) =minitial + ∆m
+increment At of 10° yr, we calculate the probability of
+a collision occurring, given by At/tcon. This choice of
+At is motivated by our galactic center’s star formation
+timescale (e.g., Lu et al. 2009), allowing for regular re-
+plenishment of the stellar population in the GN. We have
+
+checked that the results are not sensitive to this choice
+
+of At, omitted here to avoid clutter. We draw a number
+between 0 and 1 using a random number generator. If
+that number is less than or equal to the probability, we
+increase the BH’s mass by Am, the mass that the BH is
+expected to accrete in a single collision (see Section 2.4
+for details). We recalculate the collision timescale using
+the updated BH mass and repeat this process until the
+time elapsed equals the simulation time of 10 Gyr’.
+
+2.4. Mass Growth
+
+When a BH collides with a star, it may accrete ma-
+terial and grow in mass. The details of the accretion
+depend on the relative velocity between the BH and
+star. For simplicity, this calculation assumes that the
+two objects experience a head on collision, with the BH
+passing through the star’s center. We begin by con-
+sidering the escape velocity from the BH at the star’s
+outermost point, its surface, which corresponds to the
+maximum impact parameter 1 Ro. Qualitatively, one
+might expect that the BH could capture the entire star
+(i.e., Am ~ 1 Mo) if the relative velocity is smaller than
+the escape velocity from the BH at this point. However,
+in the vicinity of the SMBH, the dispersion velocity of
+the stars may be much larger than the escape velocity
+from the BH at the star’s surface. In this case, the BH
+captures a “tunnel” of material through the star. This
+tunnel has radius equal to the Bondi radius and length
+approximately 1 Re. For the purposes of this study, we
+
+assume that the BH accretes all of the material that
+
+it captures. The details of the accretion are uncertain,
+however, and it may be much less efficient than our re-
+sults imply. We discuss accretion in Section 2.5.
+
+To estimate Am, we begin with the Bondi-Hoyle ac-
+cretion rate, m, given by:
+
+Q 2
+. 4nG MBH Pstar
+
+e422? (5)
+
+3 Closer to the SMBH, At may exceed the collision timescale by
+a factor of a few for steep density profiles. We include a safe-
+guard in our code which takes the ratio teo/At and rounds it
+to the nearest integer. We take this integer to be the number of
+collisions and increase the BH mass accordingly.
+
+ 
+
+% Initial
+4x10) e AM=1M,
+« Bondi-Hoyle-Lyttleton
+
+ax 101 ¥
+
+"3 at .
+s
+=, Xie. ‘
+uw 2x10 ‘cs *
+m1 "A509,
+=
+10?
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+103 10°? 107? 10°
+
+Distance from SMBH [pc]
+
+Figure 2. We consider an example that highlights the mass
+growth as a function of distance from the SMBH. Grey dots
+represent the initial masses and distances from the SMBH
+of the BHs involved in the simulation. For simplicity, we set
+the inital mass equal to 10 Mo for all of the BHs. Assuming
+the density profile of stars has a = 1, we consider two cases:
+BHs accrete all of the star’s mass during a collision (red) and
+only a portion of the star’s mass is accreted during a collision
+given by Eq. 6 (blue). The latter case results in less growth
+closer to the SMBH where the velocity dispersion becomes
+high. The shaded regions and dashed lines represent the
+analytical predictions detailed in Section 2.4.
+
+where c, is the speed of sound in the star and Pgtar is its
+density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima
+et al. 1985; Edgar 2004, see latter for a review). We
+approximate the density as 1Mo/(4rR3,/3) and take
+the conservative value of c, = 500 km s~', which is
+consistent with the sound speed inside a 1 Mo star
+(Christensen-Dalsgaard et al. 1996) and allows us to set
+a lower limit on Am. To find Am, at each collision, we
+have:
+
+Am = min(7m x tx,cross) 1 Mo) ’ (6)
+
+where ty cross ~ Ro/o is the crossing time of the BH in
+the star. We take the minimum between 7m ty cross and
+1 Mo because the BH cannot accrete more mass than
+one star at each collision.
+
+Figure 2 juxtaposes the expected growth using Bondi-
+Hoyle-Lyttleton accretion (blue small points) with a
+much simpler model in which the BH accretes the star’s
+entire mass, 1 Mo (red large points). Both examples
+start with identical populations of 10 Ms BHs (grey)
+and simulate growth through collisions using a statisti-
+cal approach. As the BHs grow, the collision timescale,
+which depends on mgy, decreases. Simultaneously,
+Am, which also depends on mgy, increases. The re-
+sult is exponential growth (see discussion and details
+surrounding Eq. (8)). In Figure 2, however, the simula-
+tions assume a = 1 for the stellar density profile, ensur-
+ing the collision timescale is long compared to the sim-
+
+
+
+
+IMBH Formation in Galactic Nuclei 5
+
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
+
+and their final masses can be approximated using the
+
+following equation:
+
+mfinal(tcoll → const.) =minitial + ∆m
 T
 
 tcoll
@@ -772,10 +1089,6 @@ collision timescale is shorter, corresponding to a larger
 
 index α in the density profile (see Figure 1), the growth
 
-
-
-IMBH Formation in Galactic Nuclei 5
-
 is very efficient and ∆m quickly approaches 1 M�. Con-
 
 sequently, while we can now assume ∆m = 1 M�, we
@@ -798,7 +1111,93 @@ example, we plot this curve in purple for the α = 2 case,
 
 in Figure 3, which agrees with the simulated masses.
 
-2.5. GW Inspiral
+2.5. Uncertainties in Accretion
+
+We note that the ∆M calculated in this proof-of-
+
+concept study assumes that the BH accretes all of the
+
+material that it captures. Estimating the true fraction
+
+of the material accreted by the BH is very challeng-
+
+ing; this complex problem requires numerically solving
+
+the generalized GR fluid equations with cooling, heat-
+
+ing, and radiative transfer, etc. and remains an active
+
+field of research (e.g., Blandford & Begelman 1999; Park
+
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+
+Heuristically, if a collision between a BH and a star re-
+
+sults in an accretion disk, the disk’s viscous timescale
+
+may be as low as days. The resultant luminosity can
+
+unbind most of the captured material, though details
+
+such as the amount accreted and peak luminosity re-
+
+main uncertain (e.g., Yuan et al. (2012); Jiang et al.
+
+(2014), see also the discussion in Stone et al. (2017),
+
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+
+question becomes whether or not a BH can still accu-
+
+mulate significant amounts of mass over many collisions
+
+even if it accretes very little in a single one. We ex-
+
+plore the viability of our channel using a physically mo-
+
+tivated inefficient accretion model. Several studies have
+
+invoked momentum-driven winds in BH accretion (e.g.,
+
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+
+2018). We thus estimate the fraction of captured mass
+
+accreted to be approximately vesc/(cη), where vesc is
+
+the escape velocity from the BH at 1 R� and η is the
+
+accretion efficiency at the ISCO. We take η to be 0.1
+
+(e.g., Yu & Tremaine 2002). This expression for the
+
+fraction accreted is consistent with Kremer et al. (2022)
+
+equation 19 for s = 0.5, which is a reasonable value for
+
+s, a free parameter between 0.2 and 0.8. We discuss
+
+the results of the momentum-driven winds estimate in
+
+Section 3. We note that the accretion process may be
+
+more efficient than this estimate implies if, for example,
+
+jets or other instabilities result in the beaming of radi-
+
+ation away from the captured material (e.g., Blandford
+
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+
+McKinney & Gammie 2004; McKinney 2006; Igumen-
+
+shchev 2008; Begelman 2012a,b; McKinney et al. 2014).
+
+2.6. GW Inspiral
 
 When a BH is close to the SMBH, GW emission can
 
@@ -835,7 +1234,7 @@ M• +mBH
 )−1(
 a•
 
-10−4 pc
+10−2 pc
 
 )4
 
@@ -850,6 +1249,131 @@ plot this timescale for a 1 × 105M� BH in Figure 1 in
 
 blue.
 
+IMBH ForMATION IN GALACTIC NUCLEI 5
+
+ulation time, 10 Gyr. Therefore, the BHs grow slowly,
+and their final masses can be approximated using the
+following equation:
+
+ 
+
+Ménal (toll md const.) = Minitial + Am — ’ (7)
+co
+in which T represents the simulation time and Am and
+teoll Temain constant, approximated as their initial val-
+ues.
+
+This equation is plotted in Figure 2 for both cases,
+Am = 1 Mo (red) and Am from Bondi-Hoyle-Lyttleton
+accretion (blue), and the curves coincide with the cor-
+responding simulated results. The shaded regions rep-
+resent one standard deviation from Eq. (7), calculated
+using the square root of the number of collisions, T/teon-
+As indicated by the results in red, in the absence of
+Bondi-Hoyle-Lyttleton accretion, the BHs closest to the
+SMBH experience the most growth because they have
+shorter collision timescales. However, Bondi-Hoyle-
+Lyttleton accretion becomes important closer to the
+SMBH, where the velocity dispersion is large compared
+with the stars’ escape velocity, and curtails the mass
+growth for BHs in this region. Outside of 10~? pc, a BH
+consumes the star’s entire mass: the accretion-limited
+Am governed by Eq. (7) is greater than or equal to the
+star’s mass.
+
+Eq. 7 does not apply for other values of a. When the
+collision timescale is shorter, corresponding to a larger
+index a in the density profile (see Figure 1), the growth
+is very efficient and Am quickly approaches 1 Mg. Con-
+sequently, while we can now assume Am = 1Mo, we
+can no longer assume the collision timescale is constant.
+The final mass grows exponentially as a result. For
+Am = 1Mo, the general solution is reached by solving
+the differential equation dm/dt = 1Mo/tcon(m), which
+gives:
+
+Méinal(Am + 1M) =—A + (minitiat + A) eC? (8)
+
+where A = 07 Retar/G and C = 27GngtarRstar/7- As an
+example, we plot this curve in purple for the a = 2 case,
+in Figure 3, which agrees with the simulated masses.
+
+2.5. Uncertainties in Accretion
+
+We note that the AM calculated in this proof-of-
+concept study assumes that the BH accretes all of the
+material that it captures. Estimating the true fraction
+of the material accreted by the BH is very challeng-
+ing; this complex problem requires numerically solving
+the generalized GR fluid equations with cooling, heat-
+ing, and radiative transfer, etc. and remains an active
+field of research (e.g., Blandford & Begelman 1999; Park
+& Ostriker 2001; Narayan et al. 2003; Igumenshchev
+
+et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang
+et al. 2014; McKinney et al. 2014; Narayan et al. 2022).
+Heuristically, if a collision between a BH and a star re-
+sults in an accretion disk, the disk’s viscous timescale
+may be as low as days. The resultant luminosity can
+unbind most of the captured material, though details
+such as the amount accreted and peak luminosity re-
+main uncertain (e.g., Yuan et al. (2012); Jiang et al.
+(2014), see also the discussion in Stone et al. (2017),
+Rizzuto et al. (2022), and Kremer et al. (2022)). The
+question becomes whether or not a BH can still accu-
+mulate significant amounts of mass over many collisions
+even if it accretes very little in a single one. We ex-
+plore the viability of our channel using a physically mo-
+tivated inefficient accretion model. Several studies have
+invoked momentum-driven winds in BH accretion (e.g.,
+Murray et al. 2005; Ostriker et al. 2010; Brennan et al.
+2018). We thus estimate the fraction of captured mass
+accreted to be approximately vese/(cn), where Vesc is
+the escape velocity from the BH at 1 Reo and 7 is the
+accretion efficiency at the ISCO. We take 7 to be 0.1
+(e.g., Yu & Tremaine 2002). This expression for the
+fraction accreted is consistent with Kremer et al. (2022)
+equation 19 for s = 0.5, which is a reasonable value for
+s, a free parameter between 0.2 and 0.8. We discuss
+the results of the momentum-driven winds estimate in
+Section 3. We note that the accretion process may be
+more efficient than this estimate implies if, for example,
+jets or other instabilities result in the beaming of radi-
+ation away from the captured material (e.g., Blandford
+& Znajek 1977; Begelman 1979; De Villiers et al. 2005;
+McKinney & Gammie 2004; McKinney 2006; Igumen-
+shchev 2008; Begelman 2012a,b; McKinney et al. 2014).
+
+2.6. GW Inspiral
+
+When a BH is close to the SMBH, GW emission can
+circularize and shrink its orbit. We implement the ef-
+fects of GW emission on the BH’s semimajor axis and
+eccentricity following Peters & Mathews (1963a). The
+characteristic timescale to merge a BH with an SMBH
+is given by:
+
+12 MM, 7 MBH -
+tew 2.9 x 10°° yr (mic) (eet)
+« (Meee) Ge )
+2x 10° Mo 10-2 pe
+x f(ee)\(1— es)? , (9)
+
+where f(e.) is a function of e,. For all values of eé.,
+f (ee) is between 0.979 and 1.81 (Blaes et al. 2002). We
+plot this timescale for a 1 x 10° Mo BH in Figure 1 in
+blue.
+
+
+
+
+6 Rose et al.
+
+Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
+cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
+of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
+merger times of these BHs.
+
 In our simulations, we assume a BH has merged with
 
 the SMBH when the condition tGW < telapsed is met.
@@ -858,7 +1382,7 @@ When this condition is satisfied, we terminate mass
 
 growth through collisions for that BH.4
 
-2.6. IMBH growth
+2.7. IMBH growth
 
 As detailed above, BH-stellar collisions can increase
 
@@ -878,12 +1402,6 @@ Figure 1, larger values of α lead to collision timescales
 
 in the GN’s inner region, inwards of 0.25 pc, that are
 
-4 For comparison, we also incrementally changed the semimajor
-axis and eccentricity from GW emission following the equations
-in Peters & Mathews (1963b). This method leads to a slight
-increase in the final IMBH masses because it accounts for the
-collisions that take place while the orbit is gradually shrinking.
-
 much smaller that the 10 Gyr simulation time. Figure 3
 
 confirms this expectation. It depicts the mass growth of
@@ -896,7 +1414,7 @@ to 2 (purple). The most massive IMBHs form inwards
 
 of 0.25 pc for the α = 2 case.
 
-2.7. Gravitational Wave Mergers and Intermediate
+2.8. Gravitational Wave Mergers and Intermediate
 
 and Extreme Mass Ratio Inspiral Candidates
 
@@ -904,10 +1422,16 @@ Towards the SMBH, efficient collisions can create BHs
 
 massive enough to merge with the SMBH through GWs.
 
-Following the method detailed in Section 2.5, when a
+Following the method detailed in Section 2.6, when a
 
 given BH meets the criterion tGW < telapsed, we mark
 
+4 For comparison, we also incrementally changed the semimajor
+axis and eccentricity from GW emission following the equations
+in Peters & Mathews (1963b). This method leads to a slight
+increase in the final IMBH masses because it accounts for the
+collisions that take place while the orbit is gradually shrinking.
+
 it as merged with the SMBH. We assume that at this
 
 point the dynamics of the BH will be determined by GW
@@ -928,7 +1452,7 @@ to explain the formation of EMRIs, EMRIs and notably
 
 IMRIs can form in this region.
 
-2.8. Two Body Relaxation Processes
+2.9. Two Body Relaxation Processes
 
 A BH orbiting the SMBH experiences weak gravita-
 
@@ -964,34 +1488,47 @@ momentum and energy as a function of time (depending
 
 on the eccentricity of the orbit, this process can be more
 
-efficient Fragione & Sari 2018; Sari & Fragione 2019). In
+efficient Fragione & Sari 2018; Sari & Fragione 2019).
 
-Figure 1, we plot the relaxation timescale in gold for a
+Relaxation can cause the orbit of an object in a GN to
 
-range of α. We note that the Bahcall & Wolf (1976) pro-
+reach high eccentricities. If the object is a BH, it can
 
-file, α = 7/4, corresponds to zero net flux and therefore
+spiral into the SMBH and form an EMRI, while a star
 
-does not preferentially migrate objects inward.
 
-Additionally, because they are more massive on
 
-average than the surrounding objects, BHs are ex-
+IMBH Formation in Galactic Nuclei 7
 
-pected to segregate inwards in the GN (e.g., Shapiro
+can be tidally disrupted by the SMBH (e.g. Magorrian
 
-& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+& Tremaine 1999; Wang & Merritt 2004; Hopman &
 
-Miralda-Escudé & Gould 2000; Baumgardt et al. 2004).
+Alexander 2005; Aharon & Perets 2016; Stone & Met-
 
+zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019;
 
+Naoz et al. 2022). The relaxation process is therefore
 
-6 Rose et al.
+crucial to our study. In Figure 1, we plot the relaxation
 
-Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to
-cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass
-of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and
-merger times of these BHs.
+timescale in gold for a range of α. We note that the Bah-
+
+call & Wolf (1976) profile, α = 7/4, corresponds to zero
+
+net flux and therefore does not preferentially migrate
+
+objects inward.
+
+Additionally, because BHs are more massive on av-
+
+erage than the surrounding objects, they are expected
+
+to segregate inwards in the GN (e.g., Shapiro &
+
+Marchant 1978; Cohn & Kulsrud 1978; Morris 1993;
+
+Miralda-Escudé & Gould 2000; Baumgardt et al. 2004).
 
 They sink toward the SMBH on the mass segregation
 
@@ -1026,11 +1563,12 @@ approach to changes in the angular momentum). The
 
 new orbital parameters can be calculated following Lu
 
-& Naoz (2019), and see Naoz et al. in prep for full set
+& Naoz (2019), and see Naoz et al. (2022) for the full
 
-of equations.
+set of equations.
 
 We account for the effects of relaxation processes,
+
 including mass-segregation, using a multi-faceted ap-
 
 proach. We begin by migrating each BH towards the
@@ -1111,7 +1649,7 @@ scattering. We reserve the inclusion of these interactions
 
 for future study.
 
-2.9. Effect of Relaxation Processes
+2.10. Effect of Relaxation Processes
 
 As depicted in Figure 4, two-body relaxation processes
 
@@ -1125,19 +1663,6 @@ However, it also impedes the growth of BHs that are
 
 initially closer to the SMBH by allowing them to dif-
 
-
-
-IMBH Formation in Galactic Nuclei 7
-
-Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red)
-for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We
-assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward
-migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more
-BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two
-different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes.
-The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded
-stars represent BHs that merged with the SMBH.
-
 fuse out of the inner region where collisions are efficient.
 
 As can be seen in Figure 4, the net result is that more
@@ -1169,6 +1694,7 @@ bution with an average of ∼ 200 M� and a median of
 3. DISCUSSION AND PREDICTIONS
 
 We explore the feasibility of forming IMBHs in a
+
 GN through successive collisions between a stellar-mass
 
 BH and main-sequence stars. Taking both a statisti-
@@ -1179,9 +1705,22 @@ can produce IMBHs efficiently with masses as high as
 
 103−4 M� and may result in many IMBH-SMBH merg-
 
-ers (intermediate-mass ratio inspiral, IMRIs) and EM-
+ers (intermediate-mass ratio inspirals, or IMRIs) and
+
+EMRIs.
+
+
+
+8 Rose et al.
 
-RIs.
+Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance
+(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction.
+We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward
+migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally,
+more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses
+for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation
+processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted).
+Despite the substantially reduced accretion, BHs in the mass gap still form.
 
 As the stellar mass BH collides with a star, the BH
 
@@ -1195,7 +1734,7 @@ SMBH, the velocity dispersion may be larger than the
 
 escape velocity from the BH at the star’s radius. In this
 
-limit, the BH accretes a “tunnel” of material through
+limit, the BH captures a “tunnel” of material through
 
 the star, estimated using Bondi-Hoyle-Lyttleton accre-
 
@@ -1203,7 +1742,7 @@ tion. In our statistical analysis, we account for Bondi-
 
 Hoyle-Lyttleton accretion and find that BHs outside of
 
-10−2 pc from the SMBH can accrete the entire star (see
+10−2 pc from the SMBH can capture the entire star (see
 
 Figure 2).
 
@@ -1235,6 +1774,50 @@ Additionally, the final masses have no apparent depen-
 
 dence on distance from the SMBH (see Figure 4).
 
+Most simulations in our study assume that the BHs
+
+accrete all of the mass that they capture. The final BH
+
+masses can be taken as an upper limit. We note that
+
+the accretion is a highly uncertain process and repre-
+
+sents an active field of study (e.g., Blandford & Begel-
+
+man 1999; Park & Ostriker 2001; Narayan et al. 2003;
+
+Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan
+
+et al. 2012; Jiang et al. 2014; McKinney et al. 2014;
+
+Narayan et al. 2022). To assess the limits of our model,
+
+we also consider a physically motivated accretion model,
+
+momentum-driven winds (Section 2.5). We present the
+
+final mass distribution for momentum-driven winds in
+
+Figure 4. Importantly, we find that BHs within the
+
+mass gap still form naturally despite the substantially
+
+reduced accretion. About 5% of the BHs grow by 10
+
+to 100 M�. Furthermore, if we increase this ∆M esti-
+
+mate by a factor of 2 (i.e., use η = 0.05), the simula-
+
+tion produces a 3.5× 103 M� IMBH for the same initial
+
+conditions. Our proof-of-concept demonstrates that col-
+
+lisions between BH and stars are an important process
+
+that should be taken into account in dense places such
+
+as a GN.
+
 Mass growth through BH-main-sequence star colli-
 
 sions may act in concert with other IMBH formation
@@ -1271,37 +1854,168 @@ Hoang et al. 2018). Additionally, to be susceptible to
 
 evaporation, BH binaries must have a wider configura-
 
-tion. Otherwise, they will be more tightly bound that
+tion. Otherwise, they will be more tightly bound than
 
+the average kinetic energy of the surrounding objects
 
+and will only harden through weak gravitational inter-
 
-8 Rose et al.
 
-the average kinetic energy of the surrounding objects,
 
-and will only harden through weak gravitational inter-
+IMBH Formation in Galactic Nuclei 9
 
 actions with neighboring stars (see for example Figure
 
 6 in Rose et al. 2020).
 
-Not included in this study, collisions between the BH
+We note that we assume a steady-state and treat the
 
-and other compact objects will increase the BH growth
+stars as a reservoir in this model. Future work will take a
 
-rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra-
+more nuanced approach to the background stars, whose
 
-gione et al. 2021) and even neutron star BH mergers
+density as a function of time can be influenced by several
 
-(e.g., Hoang et al. 2020) become more likely as the BHs
+factors. Firstly, the relaxation of the stellar population
 
-increase in mass through stellar collisions. As a result,
+occurs on Gyr timescales. Some studies have suggested
 
-the BH-BH collision timescale, discussed in Section 2.2,
+that in situ star formation can occur in the Galactic
 
-will become relevant to our simulations, allowing the
+Center as close as 0.04 pc from the SMBH (e.g., Levin
 
-BHs to grow through this channel in addition to stel-
+& Beloborodov 2003; Paumard et al. 2006), and star
+
+formation episodes can occur as often as every ∼ 5 Myr
+
+(e.g. Lu et al. 2009). Therefore, we expect that after
+
+the first Gyr, stars within . 0.01 pc will be replenished
+
+at intervals consistent with the star formation episodes;
+
+the infalling populations of stars are separated by ∼
+5−10 Myr, which is shorter than the collision timescale.
+
+However, star-star collisions may complicate this pic-
+
+ture within ∼ 0.01 pc. As discussed above, regular star
+
+formation ensures the BHs always have a stellar popula-
+
+tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc,
+
+however, the kinetic energy during a collision between
+
+two 1 M� stars is larger than their binding energies.
+
+Collisions can therefore thin out the stellar populations
+
+during the time it takes them to diffuse to these small
+
+radii, . 0.01 pc, and may reduce the BH growth in the
+
+innermost region. We reserve the inclusion of star-star
+
+collisions for future work. We also note that the disrup-
+
+tion of binary stars by the SMBH may help replenish
+
+the stellar population even as collisions work to deplete
+
+it (e.g., Balberg et al. 2013); when a binary is disrupted,
+
+one of the stars is captured on a tightly bound orbit
+
+about the SMBH.
+
+An IMBH may also affect the stellar density profile.
+
+As it spirals into the SMBH, it can perturb stellar orbits,
+
+and these interactions can lead to hypervelocity stars
+
+(e.g., Baumgardt et al. 2006a; Löckmann & Baumgardt
+
+2008). Löckmann & Baumgardt (2008) show that an
+
+IMBH can modify an initially steep stellar density pro-
+
+file to become consistent with the flatter cusp observed
+
+in the Galactic Center. The stars may then be replen-
+
+ished on 100 Myr timescales (Baumgardt et al. 2006a).
+
+Therefore, after the formation of the first few IMBHs,
+
+subsequent BH growth may occur in bursts, coinciding
+
+with replenishment of the stars.
+
+While there are many competing dynamical processes
+
+that shape the stellar density profile, we stress that α
+
+5 In fact, the star-star collision timescale is greater than 10 Myr
+for the entire parameter space, save at 0.001 pc for larger values
+of α; the BH-star collision timescale plotted in Fig. 1 is the same
+order of magnitude as the star-star collision timescale.
+
+can simply be chosen to encapsulate all of the relevant
+
+physics. A value for α that is constrained by observa-
+
+tions must already reflect ongoing processes like star-
+
+star collisions and replenishment. Schödel et al. (2018)
+
+find the observed stellar mass enclosed within 0.01 pc of
+
+the Milky Way’s Galactic Center to be approximately
+
+180 M�. This estimate is consistent to order of magni-
+
+tude with our α = 1.25 case. In a simulation like those
+
+depicted in Figure 4, which include relaxation, α = 1.25
+
+leads to a maximum IMBH mass of 140 M�. Further-
+
+more, while the stellar mass within 0.01 pc may be a
+
+few hundred M�, Do et al. (2019) and GRAVITY Col-
+
+laboration et al. (2020) set an upper limit on the mass
+
+enclosed within the orbit of S0-2 to be about a few thou-
+
+sand M�, or 0.1% of the central mass. This upper limit
+
+can include mass that was previously in stars but is now
+
+in BHs. In that case, the 180 M� is what remains of the
+
+stars, while BHs and IMBHs make up the ∼ 1000 M�
+in the innermost region.
+
+Also not included in this study, collisions between the
+
+BH and other compact objects will increase the BH
+
+growth rate. BH-BH mergers (e.g., O’Leary et al. 2009;
+
+Fragione et al. 2021) and even neutron star BH mergers
+
+(e.g., Hoang et al. 2020) become more likely as the BHs
+
+increase in mass through stellar collisions. As a result,
+
+the BH-BH collision timescale, discussed in Section 2.2,
+
+will become relevant to our simulations, allowing the
+
+BHs to grow through this channel in addition to stel-
 
 lar collisions. Additionally, this compact object mergers
 
@@ -1309,67 +2023,82 @@ result in GW recoil, which may have a large impact on
 
 the dynamics (e.g., Baibhav et al. 2020; Fragione et al.
 
-2021)
+2021).
 
 The BH’s mass growth increases GW emission, which
 
-dissipates energy from the orbit. Along with relaxation
+dissipates energy from the orbit. Along with relaxation,
 
-processes, GW emission causes BHs to sink towards the
+GW emission causes BHs to sink towards the SMBH
 
-SMBH and eventually undergo a merger. As a result,
+and eventually undergo a merger. As a result, the GN
+environment is conducive to the formation of EMRIs
 
-the GN environment is conducive to the formation of
+and IMRIs. The GW emission from EMRIs and IM-
 
-EMRIs and IMRIs. The GW emission from EMRIs and
+RIs is expected to be at mHz frequencies, making them
 
-IMRIs is expected to be at mHz frequencies, making
+promising candidates for LISA to observe. While the
 
-them promising candidates for LISA to observe. While
+exact rate calculation is beyond the scope of this study,
 
-the exact rate calculation is beyond the scope of this
+the mechanism outlined here seems very promising.
 
-study, the mechanism outlined here seems very promis-
+Our results also suggest that BHs within the mass gap
 
-ing.
+as well as IMBHs likely exist in many galactic nuclei, as
 
-Our results also suggest that IMBHs are likely to ex-
+well as within our own galactic center. This implication
 
-ists in many galactic nuclei, as well as within our own
+seems to be consistent with recent observational and
 
-galactic center. This implication seems to be consis-
+theoretical studies (e.g., Hansen & Milosavljević 2003;
 
-tent with recent observational and theoretical studies
+Maillard et al. 2004; Gürkan & Rasio 2005; Gualandris
 
-(e.g., Hansen & Milosavljević 2003; Maillard et al. 2004;
+& Merritt 2009; Chen & Liu 2013; Generozov & Madi-
 
-Gürkan & Rasio 2005; Gualandris & Merritt 2009; Chen
+gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz
 
-& Liu 2013; Generozov & Madigan 2020; Fragione et al.
+et al. 2020; GRAVITY Collaboration et al. 2020).
 
-2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY
 
-Collaboration et al. 2020).
+
+10 Rose et al.
 
 Lastly, the collisions between stellar mass BHs and
 
 stars may contribute to the x-ray emission from our
 
-galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al.
+galactic centre (e.g., Muno et al. 2005, 2009; Hailey
+
+et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre-
+
+mer et al. (2022) for a discussion of electromagnetic sig-
+
+natures from BH-star collisions)6. These interactions,
+
+in particular grazing collisions, may also result in tidal
+
+disruption events (e.g., Baumgardt et al. 2006b; Perets
 
-2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter-
+et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre-
 
-actions, in particular grazing collisions, may also result
+mer et al. 2021). Thus, the process outlined here may
 
-in tidal disruption events (e.g., Perets et al. 2016; Sam-
+produce electromagnetic signatures in addition to GW
 
-sing et al. 2019; Kremer et al. 2021). Thus, the process
+mergers.
 
-outlined here may produce electromagnetic signatures
+We thank the anonymous referee for useful comments.
 
-in addition to GW mergers.
+We also thank Jessica Lu, Fred Rasio, Kyle Kremer,
 
-SR thanks the Charles E Young fellowship, the Nina
+Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use-
+
+ful discussion.
+
+SR thanks the Charles E. Young Fellowship, the Nina
 
 Byers Fellowship, and the Michael A. Jura Memorial
 
@@ -1383,7 +2112,11 @@ ous support. IL thanks support from the Adams Fellow-
 
 ship. SN and RS thank the Bhaumik Institute visitor
 
-program.
+program. This work was performed in part at the As-
+
+pen Center for Physics, which is supported by National
+
+Science Foundation grant PHY-1607611.
 
 REFERENCES
 
@@ -1401,12 +2134,25 @@ doi: 10.1103/PhysRevLett.118.221101
 
 doi: 10.1103/PhysRevLett.119.141101
 
+Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1,
+
+doi: 10.3847/2041-8205/830/1/L1
+
 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129
 
 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148,
 
 doi: 10.1088/0004-637X/780/2/148
 
+Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4,
+
+doi: 10.1007/s41114-018-0013-8
+
+6 The connection between the observed X-ray sources at the Galac-
+tic Center and tidal capture has been suggested by Generozov
+et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
+alternative channels.
+
 Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M.
 
 2021, arXiv e-prints, arXiv:2109.12119.
@@ -1417,19 +2163,60 @@ Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214,
 
 doi: 10.1086/154711
 
-5 The connection between the observed X-ray sources at the Galac-
-tic Center and tidal capture has been suggested by Generozov
-et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for
-alternative channels.
-
 Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102,
 
 043002, doi: 10.1103/PhysRevD.102.043002
 
+Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26,
+
+doi: 10.1093/mnrasl/slt071
+
+Baumgardt, H., Gualandris, A., & Portegies Zwart, S.
+
+2006a, MNRAS, 372, 174,
+
+doi: 10.1111/j.1365-2966.2006.10818.x
+
+Baumgardt, H., Hopman, C., Portegies Zwart, S., &
+
+Makino, J. 2006b, MNRAS, 372, 467,
+
+doi: 10.1111/j.1365-2966.2006.10885.x
+
 Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ,
 
 613, 1143, doi: 10.1086/423299
 
+Begelman, M. C. 1979, MNRAS, 187, 237,
+
+doi: 10.1093/mnras/187.2.237
+
+—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3
+
+http://doi.org/10.1103/PhysRevLett.116.241102
+http://doi.org/10.1103/PhysRevLett.118.221101
+http://doi.org/10.1103/PhysRevLett.119.141101
+http://doi.org/10.3847/2041-8205/830/1/L1
+http://doi.org/10.1086/308129
+http://doi.org/10.1088/0004-637X/780/2/148
+http://doi.org/10.1007/s41114-018-0013-8
+https://arxiv.org/abs/2109.12119
+http://doi.org/10.1086/154711
+http://doi.org/10.1103/PhysRevD.102.043002
+http://doi.org/10.1093/mnrasl/slt071
+http://doi.org/10.1111/j.1365-2966.2006.10818.x
+http://doi.org/10.1111/j.1365-2966.2006.10885.x
+http://doi.org/10.1086/423299
+http://doi.org/10.1093/mnras/187.2.237
+http://doi.org/10.1088/2041-8205/749/1/L3
+
+
+IMBH Formation in Galactic Nuclei 11
+
+—. 2012b, MNRAS, 420, 2912,
+
+doi: 10.1111/j.1365-2966.2011.20071.x
+
 Begelman, M. C., Volonteri, M., & Rees, M. J. 2006,
 
 MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x
@@ -1454,6 +2241,14 @@ Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775,
 
 doi: 10.1086/342655
 
+Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303,
+
+L1, doi: 10.1046/j.1365-8711.1999.02358.x
+
+Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433,
+
+doi: 10.1093/mnras/179.3.433
+
 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642,
 
 427, doi: 10.1086/500727
@@ -1462,26 +2257,6 @@ Bondi, H. 1952, MNRAS, 112, 195,
 
 doi: 10.1093/mnras/112.2.195
 
-http://doi.org/10.1103/PhysRevLett.116.241102
-http://doi.org/10.1103/PhysRevLett.118.221101
-http://doi.org/10.1103/PhysRevLett.119.141101
-http://doi.org/10.1086/308129
-http://doi.org/10.1088/0004-637X/780/2/148
-https://arxiv.org/abs/2109.12119
-http://doi.org/10.1086/154711
-http://doi.org/10.1103/PhysRevD.102.043002
-http://doi.org/10.1086/423299
-http://doi.org/10.1111/j.1365-2966.2006.10467.x
-http://doi.org/10.3847/1538-4357/ab6d77
-http://doi.org/10.3847/1538-4357/ab6d77
-http://doi.org/10.1088/1367-2630/11/10/105016
-http://doi.org/10.1086/342655
-http://doi.org/10.1086/500727
-http://doi.org/10.1093/mnras/112.2.195
-
-
-IMBH Formation in Galactic Nuclei 9
-
 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273,
 
 doi: 10.1093/mnras/104.5.273
@@ -1490,6 +2265,10 @@ Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469,
 
 2042, doi: 10.1093/mnras/stx1007
 
+Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ,
+
+860, 14, doi: 10.3847/1538-4357/aac2c4
+
 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger,
 
 C. 2012, JCAP, 2012, 054,
@@ -1524,10 +2303,36 @@ Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087,
 
 doi: 10.1086/156685
 
+Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424,
+
+doi: 10.1111/j.1365-2966.2005.09937.x
+
+Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M.
+
+2009, MNRAS, 393, 1016,
+
+doi: 10.1111/j.1365-2966.2008.14254.x
+
 Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021,
 
 MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783
 
+Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T.
+
+C. N. 2021a, MNRAS, 505, 2186,
+
+doi: 10.1093/mnras/stab1428
+
+Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt,
+
+T. C. N. 2021b, MNRAS, 503, 1051,
+
+doi: 10.1093/mnras/stab402
+
+De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S.
+
+2005, ApJ, 620, 878, doi: 10.1086/427142
+
 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019,
 
 MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453
@@ -1536,6 +2341,14 @@ Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021,
 
 MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390
 
+Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664,
+
+doi: 10.1126/science.aav8137
+
+Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL,
+
+562, L19, doi: 10.1086/338118
+
 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL,
 
 110, 221101, doi: 10.1103/PhysRevLett.110.221101
@@ -1544,6 +2357,10 @@ Edgar, R. 2004, NewAR, 48, 843,
 
 doi: 10.1016/j.newar.2004.06.001
 
+Escala, A. 2021, ApJ, 908, 57,
+
+doi: 10.3847/1538-4357/abd93c
+
 Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014,
 
 Monthly Notices of the Royal Astronomical Society, 443,
@@ -1596,6 +2413,54 @@ J. P. 2018, MNRAS, 478, 4030,
 
 doi: 10.1093/mnras/sty1262
 
+http://doi.org/10.1111/j.1365-2966.2011.20071.x
+http://doi.org/10.1111/j.1365-2966.2006.10467.x
+http://doi.org/10.3847/1538-4357/ab6d77
+http://doi.org/10.3847/1538-4357/ab6d77
+http://doi.org/10.1088/1367-2630/11/10/105016
+http://doi.org/10.1086/342655
+http://doi.org/10.1046/j.1365-8711.1999.02358.x
+http://doi.org/10.1093/mnras/179.3.433
+http://doi.org/10.1086/500727
+http://doi.org/10.1093/mnras/112.2.195
+http://doi.org/10.1093/mnras/104.5.273
+http://doi.org/10.1093/mnras/stx1007
+http://doi.org/10.3847/1538-4357/aac2c4
+http://doi.org/10.1088/1475-7516/2012/07/054
+http://doi.org/10.1103/RevModPhys.82.3069
+http://doi.org/10.1088/0004-637X/762/2/95
+http://doi.org/10.3847/1538-4357/aaba16
+http://doi.org/10.1093/mnras/stv694
+http://doi.org/10.1126/science.272.5266.1286
+http://doi.org/10.1086/156685
+http://doi.org/10.1111/j.1365-2966.2005.09937.x
+http://doi.org/10.1111/j.1365-2966.2008.14254.x
+http://doi.org/10.1093/mnras/stab2783
+http://doi.org/10.1093/mnras/stab1428
+http://doi.org/10.1093/mnras/stab402
+http://doi.org/10.1086/427142
+http://doi.org/10.1093/mnras/stz1453
+http://doi.org/10.1093/mnras/stab2390
+http://doi.org/10.1126/science.aav8137
+http://doi.org/10.1086/338118
+http://doi.org/10.1103/PhysRevLett.110.221101
+http://doi.org/10.1016/j.newar.2004.06.001
+http://doi.org/10.3847/1538-4357/abd93c
+http://doi.org/10.1093/mnras/stu1280
+http://doi.org/10.3847/2041-8213/ab77c9
+https://arxiv.org/abs/2107.04639
+http://doi.org/10.3847/1538-4357/ab94b2
+http://doi.org/10.3847/2041-8213/abbc0a
+http://doi.org/10.3847/1538-4357/aaa0d7
+http://doi.org/10.1111/j.1365-2966.2004.07914.x
+http://doi.org/10.1086/339576
+http://doi.org/10.1086/506193
+http://doi.org/10.3847/1538-4357/ab94bc
+http://doi.org/10.1093/mnras/sty1262
+
+
+12 Rose et al.
+
 Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of
 
 Modern Physics, 82, 3121,
@@ -1658,12 +2523,34 @@ Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8,
 
 doi: 10.3847/1538-4357/abb66a
 
+Hopman, C., & Alexander, T. 2005, ApJ, 629, 362,
+
+doi: 10.1086/431475
+
+Igumenshchev, I. V. 2008, ApJ, 677, 317,
+
+doi: 10.1086/529025
+
+Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A.
+
+2003, ApJ, 592, 1042, doi: 10.1086/375769
+
+Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796,
+
+106, doi: 10.1088/0004-637X/796/2/106
+
 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the
 
 Royal Astronomical Society, 374, 1557,
 
 doi: 10.1111/j.1365-2966.2006.11275.x
 
+Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., &
+
+Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368.
+
+https://arxiv.org/abs/2201.12368
+
 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104,
 
 doi: 10.3847/1538-4357/abeb14
@@ -1672,64 +2559,27 @@ Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903,
 
 45, doi: 10.3847/1538-4357/abb945
 
+Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020,
+
+MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276
+
+Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33,
+
+doi: 10.1086/376675
+
 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13,
 
 doi: 10.3847/1538-4365/aacb24
 
 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24
 
-Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
-
-doi: 10.1093/mnras/stz036
+Löckmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323,
 
-http://doi.org/10.1093/mnras/104.5.273
-http://doi.org/10.1093/mnras/stx1007
-http://doi.org/10.1088/1475-7516/2012/07/054
-http://doi.org/10.1103/RevModPhys.82.3069
-http://doi.org/10.1088/0004-637X/762/2/95
-http://doi.org/10.3847/1538-4357/aaba16
-http://doi.org/10.1093/mnras/stv694
-http://doi.org/10.1126/science.272.5266.1286
-http://doi.org/10.1086/156685
-http://doi.org/10.1093/mnras/stab2783
-http://doi.org/10.1093/mnras/stz1453
-http://doi.org/10.1093/mnras/stab2390
-http://doi.org/10.1103/PhysRevLett.110.221101
-http://doi.org/10.1016/j.newar.2004.06.001
-http://doi.org/10.1093/mnras/stu1280
-http://doi.org/10.3847/2041-8213/ab77c9
-https://arxiv.org/abs/2107.04639
-http://doi.org/10.3847/1538-4357/ab94b2
-http://doi.org/10.3847/2041-8213/abbc0a
-http://doi.org/10.3847/1538-4357/aaa0d7
-http://doi.org/10.1111/j.1365-2966.2004.07914.x
-http://doi.org/10.1086/339576
-http://doi.org/10.1086/506193
-http://doi.org/10.3847/1538-4357/ab94bc
-http://doi.org/10.1093/mnras/sty1262
-http://doi.org/10.1103/RevModPhys.82.3121
-http://doi.org/10.1086/377127
-http://doi.org/10.1086/427175
-http://doi.org/10.3847/1538-4357/aabfee
-http://doi.org/10.3847/2041-8213/abdf5b
-http://doi.org/10.1051/0004-6361/202037813
-http://doi.org/10.1088/0004-637X/705/1/361
-http://doi.org/10.1086/503295
-http://doi.org/10.1086/430694
-http://doi.org/10.1038/nature25029
-http://doi.org/10.1086/378182
-http://doi.org/10.1086/375341
-http://doi.org/10.3847/1538-4357/aaafce
-http://doi.org/10.3847/1538-4357/abb66a
-http://doi.org/10.1111/j.1365-2966.2006.11275.x
-http://doi.org/10.3847/1538-4357/abeb14
-http://doi.org/10.3847/1538-4357/abb945
-http://doi.org/10.3847/1538-4365/aacb24
-http://doi.org/10.3847/1538-4365/aacb24
-http://doi.org/10.1093/mnras/stz036
+doi: 10.1111/j.1365-2966.2007.12699.x
 
+Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506,
 
-10 Rose et al.
+doi: 10.1093/mnras/stz036
 
 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ,
 
@@ -1739,6 +2589,10 @@ Madau, P., & Rees, M. J. 2001, ApJL, 551, L27,
 
 doi: 10.1086/319848
 
+Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447,
+
+doi: 10.1046/j.1365-8711.1999.02853.x
+
 Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F.
 
 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147
@@ -1753,6 +2607,24 @@ Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b,
 
 MNRAS, 505, 339, doi: 10.1093/mnras/stab1334
 
+Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B.
+
+2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409
+
+McKinney, J. C. 2006, MNRAS, 368, 1561,
+
+doi: 10.1111/j.1365-2966.2006.10256.x
+
+McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977,
+
+doi: 10.1086/422244
+
+McKinney, J. C., Tchekhovskoy, A., Sadowski, A., &
+
+Narayan, R. 2014, MNRAS, 441, 3177,
+
+doi: 10.1093/mnras/stu762
+
 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513,
 
 doi: 10.1088/0034-4885/69/9/R01
@@ -1771,6 +2643,14 @@ Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009,
 
 ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110
 
+Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ,
+
+618, 569, doi: 10.1086/426067
+
+Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927,
+
+L18, doi: 10.3847/2041-8213/ac574b
+
 Naoz, S., & Silk, J. 2014, ApJ, 795, 102,
 
 doi: 10.1088/0004-637X/795/2/102
@@ -1779,10 +2659,75 @@ Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885,
 
 L35, doi: 10.3847/2041-8213/ab4fed
 
+http://doi.org/10.1103/RevModPhys.82.3121
+http://doi.org/10.1086/377127
+http://doi.org/10.1086/427175
+http://doi.org/10.3847/1538-4357/aabfee
+http://doi.org/10.3847/2041-8213/abdf5b
+http://doi.org/10.1051/0004-6361/202037813
+http://doi.org/10.1088/0004-637X/705/1/361
+http://doi.org/10.1086/503295
+http://doi.org/10.1086/430694
+http://doi.org/10.1038/nature25029
+http://doi.org/10.1086/378182
+http://doi.org/10.1086/375341
+http://doi.org/10.3847/1538-4357/aaafce
+http://doi.org/10.3847/1538-4357/abb66a
+http://doi.org/10.1086/431475
+http://doi.org/10.1086/529025
+http://doi.org/10.1086/375769
+http://doi.org/10.1088/0004-637X/796/2/106
+http://doi.org/10.1111/j.1365-2966.2006.11275.x
+https://arxiv.org/abs/2201.12368
+http://doi.org/10.3847/1538-4357/abeb14
+http://doi.org/10.3847/1538-4357/abb945
+http://doi.org/10.1093/mnras/staa2276
+http://doi.org/10.1086/376675
+http://doi.org/10.3847/1538-4365/aacb24
+http://doi.org/10.3847/1538-4365/aacb24
+http://doi.org/10.1111/j.1365-2966.2007.12699.x
+http://doi.org/10.1093/mnras/stz036
+http://doi.org/10.1088/0004-637X/690/2/1463
+http://doi.org/10.1086/319848
+http://doi.org/10.1046/j.1365-8711.1999.02853.x
+http://doi.org/10.1051/0004-6361:20034147
+https://arxiv.org/abs/2109.06222
+http://doi.org/10.1093/mnras/stab1334
+http://doi.org/10.1093/mnras/stab1409
+http://doi.org/10.1111/j.1365-2966.2006.10256.x
+http://doi.org/10.1086/422244
+http://doi.org/10.1093/mnras/stu762
+http://doi.org/10.1088/0034-4885/69/9/R01
+http://doi.org/10.1086/317837
+http://doi.org/10.1086/172607
+http://doi.org/10.1086/429721
+http://doi.org/10.1088/0067-0049/181/1/110
+http://doi.org/10.1086/426067
+http://doi.org/10.3847/2041-8213/ac574b
+http://doi.org/10.1088/0004-637X/795/2/102
+http://doi.org/10.3847/2041-8213/ab4fed
+
+
+IMBH Formation in Galactic Nuclei 13
+
 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL,
 
 888, L8, doi: 10.3847/2041-8213/ab5e3b
 
+Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., &
+
+Curd, B. 2022, MNRAS, 511, 3795,
+
+doi: 10.1093/mnras/stac285
+
+Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A.
+
+2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69
+
+Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005,
+
+ApJ, 628, 368, doi: 10.1086/430728
+
 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395,
 
 2127, doi: 10.1111/j.1365-2966.2009.14653.x
@@ -1793,6 +2738,20 @@ O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N.,
 
 doi: 10.1086/498446
 
+Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga,
+
+D. 2010, ApJ, 722, 642,
+
+doi: 10.1088/0004-637X/722/1/642
+
+Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100,
+
+doi: 10.1086/319042
+
+Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643,
+
+1011, doi: 10.1086/503273
+
 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek,
 
 Stephen R., J. 2016, ApJ, 823, 113,
@@ -1827,6 +2786,10 @@ Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640,
 
 A56, doi: 10.1051/0004-6361/202037710
 
+Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022,
+
+MNRAS, doi: 10.1093/mnras/stac231
+
 Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., &
 
 Rasio, F. A. 2018, PhRvL, 120, 151101,
@@ -1853,6 +2816,10 @@ Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C.,
 
 https://arxiv.org/abs/2009.01213
 
+Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017,
+
+MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044
+
 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD,
 
 100, 043009, doi: 10.1103/PhysRevD.100.043009
@@ -1871,6 +2838,10 @@ Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63,
 
 doi: 10.1086/519309
 
+Schödel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A,
+
+609, A27, doi: 10.1051/0004-6361/201730452
+
 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603,
 
 doi: 10.1086/156521
@@ -1901,6 +2872,14 @@ e-prints. https://arxiv.org/abs/1603.02709
 
 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d
 
+Stone, N. C., Küpper, A. H. W., & Ostriker, J. P. 2017,
+
+MNRAS, 467, 4180, doi: 10.1093/mnras/stx097
+
+Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859,
+
+doi: 10.1093/mnras/stv2281
+
 The LIGO Scientific Collaboration, the Virgo
 
 Collaboration, Abbott, R., et al. 2020a, arXiv e-prints,
@@ -1927,21 +2906,15 @@ G. N. 2021, MNRAS, 504, 146,
 
 doi: 10.1093/mnras/stab842
 
-http://doi.org/10.1088/0004-637X/690/2/1463
-http://doi.org/10.1086/319848
-http://doi.org/10.1051/0004-6361:20034147
-https://arxiv.org/abs/2109.06222
-http://doi.org/10.1093/mnras/stab1334
-http://doi.org/10.1088/0034-4885/69/9/R01
-http://doi.org/10.1086/317837
-http://doi.org/10.1086/172607
-http://doi.org/10.1086/429721
-http://doi.org/10.1088/0067-0049/181/1/110
-http://doi.org/10.1088/0004-637X/795/2/102
-http://doi.org/10.3847/2041-8213/ab4fed
 http://doi.org/10.3847/2041-8213/ab5e3b
+http://doi.org/10.1093/mnras/stac285
+http://doi.org/10.1093/pasj/55.6.L69
+http://doi.org/10.1086/430728
 http://doi.org/10.1111/j.1365-2966.2009.14653.x
 http://doi.org/10.1086/498446
+http://doi.org/10.1088/0004-637X/722/1/642
+http://doi.org/10.1086/319042
+http://doi.org/10.1086/503273
 http://doi.org/10.3847/0004-637X/823/2/113
 http://doi.org/10.1103/PhysRev.131.435
 http://doi.org/10.1103/PhysRev.131.435
@@ -1950,15 +2923,18 @@ http://doi.org/10.1086/312422
 http://doi.org/10.1086/341798
 http://doi.org/10.1088/0004-637X/780/2/187
 http://doi.org/10.1051/0004-6361/202037710
+http://doi.org/10.1093/mnras/stac231
 http://doi.org/10.1103/PhysRevLett.120.151101
 http://doi.org/10.1103/PhysRevD.93.084029
 http://doi.org/10.1103/PhysRevD.100.043027
 http://doi.org/10.3847/1538-4357/abc557
 https://arxiv.org/abs/2009.01213
+http://doi.org/10.1093/mnras/stx2044
 http://doi.org/10.1103/PhysRevD.100.043009
 http://doi.org/10.3847/1538-4357/ab43df
 http://doi.org/10.1086/339917
 http://doi.org/10.1086/519309
+http://doi.org/10.1051/0004-6361/201730452
 http://doi.org/10.1086/156521
 http://doi.org/10.1093/mnras/217.2.367
 http://doi.org/10.1093/mnras/stv2700
@@ -1967,6 +2943,8 @@ http://doi.org/10.1093/mnras/stx1576
 http://doi.org/10.1093/mnras/stx1576
 https://arxiv.org/abs/1603.02709
 http://doi.org/10.3847/1538-4357/ab1e4d
+http://doi.org/10.1093/mnras/stx097
+http://doi.org/10.1093/mnras/stv2281
 https://arxiv.org/abs/2009.01075
 https://arxiv.org/abs/2009.01190
 http://doi.org/10.1088/0004-637X/750/1/31
@@ -1974,7 +2952,7 @@ http://doi.org/10.1093/mnras/stw225
 http://doi.org/10.1093/mnras/stab842
 
 
-IMBH Formation in Galactic Nuclei 11
+14 Rose et al.
 
 Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., &
 
@@ -1982,10 +2960,22 @@ Breivik, K. 2021, ApJ, 917, 76,
 
 doi: 10.3847/1538-4357/ac088d
 
+Wang, J., & Merritt, D. 2004, ApJ, 600, 149,
+
+doi: 10.1086/379767
+
 Woosley, S. E. 2017, ApJ, 836, 244,
 
 doi: 10.3847/1538-4357/836/2/244
 
+Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965,
+
+doi: 10.1046/j.1365-8711.2002.05532.x
+
+Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129,
+
+doi: 10.1088/0004-637X/761/2/129
+
 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X.
 
 2014, Monthly Notices of the Royal Astronomical
@@ -2001,7 +2991,10 @@ Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26,
 doi: 10.3847/1538-4365/aab14f
 
 http://doi.org/10.3847/1538-4357/ac088d
+http://doi.org/10.1086/379767
 http://doi.org/10.3847/1538-4357/836/2/244
+http://doi.org/10.1046/j.1365-8711.2002.05532.x
+http://doi.org/10.1088/0004-637X/761/2/129
 http://doi.org/10.1093/mnras/stu351
 https://arxiv.org/abs/2011.04653
 http://doi.org/10.3847/1538-4365/aab14f
@@ -2012,11 +3005,12 @@ http://doi.org/10.3847/1538-4365/aab14f
 	2.2 Direct Collisions
 	2.3 Statistical Approach to Collisions
 	2.4 Mass Growth
-	2.5 GW Inspiral
-	2.6 IMBH growth
-	2.7 Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates
-	2.8 Two Body Relaxation Processes
-	2.9 Effect of Relaxation Processes
+	2.5 Uncertainties in Accretion
+	2.6 GW Inspiral
+	2.7 IMBH growth
+	2.8 Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates
+	2.9 Two Body Relaxation Processes
+	2.10 Effect of Relaxation Processes
 
 	3 Discussion and Predictions
 
diff --git a/read/results/tika/2201.00037.txt b/read/results/tika/2201.00037.txt
index 6c9f216..df3a299 100644
--- a/read/results/tika/2201.00037.txt
+++ b/read/results/tika/2201.00037.txt
@@ -1077,6 +1077,67 @@ where
 
 –10–
 
+at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely
+real. We concentrate our analysis in this work on the real part of the solutions, which corre-
+sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, é,
+corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to em,
+though we keep the tilde notation in the presentation of our results to emphasize that it rep-
+resents the real part of the solution from our system. Furthermore, since m < &m, we often
+refer to €m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is
+more customarily described in terms of the latter in the literature.
+
+The model of Mathews et al. [1991] is developed under the assumption of small angles as
+appropriate for the nutations on Earth. The details on how the equations of the model are de-
+rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equa-
+tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer-
+cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three
+equations are
+
+ 
+
+ 
+
+ 
+
+- As. Ag. . 1 /
+(w—e)m+ (1+w) spiny + qs + A3es Ms | = i@A (Foun) , (12a)
+i+ (L+w+ep) i As . (= DPomo — Tico) (12b)
+wm Ww e Mf — WALEs =—Ns = = = —~+Lemb~— tic ’
+PPINP COLES TS GORA, pee
+1 ~ ~
+(w —ages)ia + ayestiny + (1+) tas + (L+w— a9) esits = 55 (ee + Pe) ,  (12¢)
+
+and a fourth equation consists of a kinematic relation that expresses the change in the orien-
+tation of the inner core figure as a result of its own rotation,
+
+Ms + wits =0. (12d)
+
+In these equations, the parameters a1, @2 and a3 involve the density contrast at the ICB
+and are given by
+
+a, — Pf. a3=l-ay, a,=a,—aszd,, (13a)
+Ps :
+where the parameter a, is a measure of the ratio of the gravitational to inertial torque applied
+
+on the inner core,
+
+_ 81G
+52
+
+where G is the gravitational constant.
+
+Qg [Pcl€r — Em) + Pm(€m — Ef) + Pres] » (13b)
+
+Tsun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For
+a small mantle obliquity €,, and a small inner core tilt ,, it is given by
+
+Peun = —i02A (¢nén + Tobutis) ’ (14)
+
+where
+
+—10—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
@@ -1582,6 +1643,71 @@ Kicb − α1es
 
 –14–
 
+MR?
+C
+
+which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com-
+
+ 
+
+Wfp =n (GaroJe + 2G201C22) (29)
+
+ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid-
+body precession and hence C was replaced by C,,. Using C = 0.346 - MR? [Margot et al.,
+2012] and the numerical values for n, Jo, C22 and e, given in Table 1, we obtain a free preces-
+sion period of Ts, = 27/w yp, = 1298 yr. If we use C,, instead of C' in Equation (29), and take
+Cm = 0.431-C = 0.431-0.346-M R? [Margot et al., 2012], we obtain Typ = 27/wy, = 560 yr.
+These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical,
+the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid
+core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The
+true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value,
+the free precession period is much shorter than the forcing period of 325 kyr. Using Equation
+(29), Equation (27) can be written as [e.g. Baland et al., 2017]
+
+Q, sin I
+—Q, cosI + wep
+
+Em =
+
+(30)
+
+The obliquity of Mercury is thus determined by how the forcing frequency 2, compares with
+the free precession frequency wy). Because wr, > Qp, Mercury occupies Cassini state 1 [Peale,
+1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant
+amplification if Q, % wp. Since wrp “> Q,, resonant amplification is minimal and the re-
+sulting obliquity, €,, * 2 arcmin, is much smaller than the inclination angle I * 8.5°.
+
+2.3.2 The misalignment of the fluid and solid cores
+
+With w = —1—dwcosT and dw <« 1, Equation (12d) gives i, ~ ms; as for the mantle,
+the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state.
+The relationship between m and é,, of Equation (24b) is independent of the interior structure,
+so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa-
+tion (12a), and setting i, = ms, the angular momentum equation of the whole planet becomes
+
+CQ, (sin I + cos I En) + (Af cos IQ,) mz + As(cos I QW — Q503¢s)s = AQOmEn - (31)
+
+This latter equation shows how the misaligned inner core and fluid core can lead to a modifi-
+cation of the mantle obliquity €,,. Approximate analytical solutions of i, and my are given by
+
+ 
+
+ 
+
+Q Q, Kic _ Ss ‘ ~ Q, S~
+fig & = (1 + Regents) (sin + cosI Em) — me Em, (32a)
+Q Q, As .
+mp & xy ooind + cos I Em) + Np Ay (Mit — Q1€s) is , (32b)
+where
+Ag 93 (Kieb — ares)”
+Ka] — Seek OS) (33a)
+Ay de Ap
+Ap = of —Qpcosl, (33b)
+As = Gs; —Q,cosl, (33c)
+
+—|4—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
@@ -1688,6 +1814,63 @@ Obviously, this reflects a Cassini state equilibrium in which the fluid core and
 
 –15–
 
+and where we have introduced the frequencies
+
+As
+
+of= OQ («; + Kemp + Ke) ; (33d)
+Af
+
+G5 =o (casa, — €,a4 + aghs + Kies) . (33e)
+
+These solutions are good approximations for all the results that we present in section 3. For
+an observed mantle obliquity €,, and for a chosen set of interior model parameters, they pro-
+vide useful predictions of n, and mf.
+
+In the limit of a very strong coupling between the fluid core, solid core and mantle, o, >
+Q, and of > Dp, so that rs > 0, mz > 0 and Equation (31) reverts back to Equation (25)
+for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and
+mantle (i.e. for spherical internal boundaries, e f = €s = Ys = 0 and no viscous or EM cou-
+pling, Kemp = Kich = 0), then
+
+d,=0, K=1, Ap=HAs=—Hpcosl, My =As = —(tanI+é,,). (34)
+
+Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C,, =
+C — Ay — Ag, we obtain
+
+Cm Q, (sin I + cos I En) = ANodmém - (35)
+
+which describes, as expected, a forced precession of the mantle alone. If this was the case for
+Mercury, taking C,,/C = 0.431, the obliquity should be é,, ~ 0.88 arcmin, substantially smaller
+than the observed obliquity of €,, * 2 arcmin.
+
+If of © Q, (and thus A~ — 0) and/or a, & Q, (and thus A, — 0) resonant amplifica-
+tion leads to large amplitudes for mf, %, and the mantle obliquity €,,. The frequencies a, and
+a, are closely related to the FCN and FICN frequencies wyen and wricn, respectively. Hence,
+just as a large mantle obliquity can result from resonant amplification when the forcing frequency
+approaches the free precession frequency, a large mantle obliquity can likewise result from res-
+onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These
+frequencies depend on the interior density structure and are not known. However, we will show
+that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of
+a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex-
+pect an important amplification effect. Furthermore, since wWfen,Wficen > Qp, then Tf > OQp
+and @, > Qp»), and we are in the strong coupling limit. The mantle obliquity should be close
+to that expected for a rigid planet, as observations suggest. Therefore, we expect that my and
+ns should be of the order of €,, or smaller. This further justifies the assumption of small an-
+gles that we have adopted.
+
+3 Results
+3.1 Geodetic constraints and interior density structure
+
+All our interior models are constrained to match the mass M of Mercury and specific choices
+of C = C/MR? and C,,/C. The choice of C is determined from Equation (28). For the pa-
+rameters listed in Table 1, and an observed obliquity of ¢,, = 2.04 arcmin [Margot et al., 2012],
+this gives C = C /M R? = 0.3455 and all our interior models are consistent with this choice.
+Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are
+
+—15-—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
@@ -2508,6 +2691,68 @@ cosity that we have identified above (i.e ν ≈ 5 × 10−4 m2 s−1), the infl
 
 –21–
 
+The above parameterizations are valid only under the assumption that the flow in the bound-
+ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds
+number Re = rpAuy/v, associated with the differential velocity Aur = rpQommy at the CMB.
+
+For rs = 2000 km, and taking mys = 4 arcmin ~ 0.001 rad from the results in the previous
+section, we get Aus ~ 2 mm/s and Re ~ 6 x 10°. Such a large Reynolds number indicates
+that the viscous friction between the fluid core and mantle should induce turbulent flows, as
+is the case for the Cassini state of the Moon [ Yoder, 1981; Williams et al., 2001; Cébron et al.,
+2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde-
+pendent of the fluid viscosity and proportional to the square of the differential velocity. The
+coupling constant Kemp should be in the form
+
+Kemb = femo|tirs| (0.195 — 1.976i) . (45)
+
+where femp is a numerical factor that depends among other things on surface roughness. In-
+corporating a viscous coupling of this form in our rotational model is more challenging not only
+because femp is not known but also because the viscous torque is no longer linear in my. One
+strategy is to find solutions through an iterative process. The simpler alternative strategy that
+we adopt is to use the laminar formulas of Equation (44) but with the understanding that v
+represents an effective turbulent viscosity.
+
+To give an estimate of an appropriate turbulent value for v, we turn to the Cassini state
+of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained
+by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR)
+[Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms
+of a coupling parameter K and a recent estimate is K/C, = (1.4140.34) x10~° day~! [ Williams
+and Boggs, 2015], where Cz is the lunar polar moment of inertia. The connection between KC
+and Kemp is
+
+K Cy, 1
+Im[Ken | =A SE 46
+|Tra[Kems) Gt (46)
+where C'ry is the moment of inertia of the lunar core and Q;, = 2.66 x 10-® s—! the lunar
+
+rotation rate. With Cr,/Cr ~ 7 x 10~4 [e.g. Williams et al., 2014], this gives |Im[Kems]| ~
+9x10~°. In order to match this amplitude in Equation (44a), with lunar parameters and as-
+suming a lunar core radius of 400 km, the required turbulent viscosity is v =~ 5 x 1074+ m?
+
+s-', about 500 times larger than the laminar viscosity. Note that the differential velocity at the
+CMB of the Moon is closer to 3 cm/s [ Yoder, 1981; Williams et al., 2001], more than 10 times
+larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kemp
+is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer-
+cury should be smaller. Thus, v & 5x 1074 m? s~! gives a conservative upper bound for the
+possible effective turbulent viscosity that can be expected for Mercury.
+
+Figure 5 shows how €,,, my and m, vary as functions of inner core radius for different choices
+
+of effective viscosities. For vy = 107° m? s~!, viscous coupling is too weak to affect €,, and
+
+my and they are essentially unchanged from the solutions shown in Figure 4. With increasing
+v, the stronger viscous coupling between the core and the mantle reduces their differential ve-
+locity, and my is reduced. With the reduced differential velocity at the CMB, the prediction
+
+of Em gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB
+viscous coupling model is different than the one used by Peale et al. [2014], our results for é,,
+and my are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the
+fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis-
+cosity that we have identified above (i.e v © 5 x 107+ m? s~'), the influence of viscous cou-
+
+—21-—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
@@ -2825,6 +3070,66 @@ in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 
 
 –23–
 
+3.4 Electromagnetic coupling
+
+Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium
+Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to re-
+main attached to electrically conducting materials, a differential tangential motion between two
+electrically conducting regions stretches existing magnetic field lines that thread their interface.
+This induces a secondary magnetic field (or equivalently, an electrical current) and an associ-
+ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB
+acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength
+of the radial magnetic field B, and the electrical conductivity o on either side of the bound-
+ary [Rochester, 1960, 1962, 1968).
+
+The parametrization of EM coupling in terms of the coupling constants Kemp and Kicp
+has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot,
+2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given
+by B, = V3 (Be) cos @, where (Be) is the r.m.s. strength of the field, the coupling constant
+Kemp can be written is the form
+
+Kemb = 3(1 _ t)Femb (Be)” ’ (47)
+
+where
+
+ 
+
+1 1 1 \7
+Femb = ( + ) 5 (48)
+
+Qoprre OmOm oof
+
+and where Om, Om = \/2/(OmpQo) and of, df = v/2/(ofHQo) are the electrical conductivi-
+ties and magnetic skin depths in the mantle and fluid core, respectively, with pp = 40 x 1077
+
+N A~? the magnetic permeability of free space. The r.m.s. field strength (Be) is connected to
+the Gauss coefficient g? of the surface magnetic field by
+
+ay 2 (RY)
+(at) = = (F) 9? | - (49)
+
+We can readily build an estimate of the amplitude of Kemp. The electrical conductivity
+of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding
+to the CMB of Mercury is in the range of om ~ 0.01 — 1S m~! [Constable, 2015]. In con-
+trast, the electrical conductivity of Fe in planetary cores is expected to be close af ~ 10° S
+m7! [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (0m6m)~' >> (ofdf)~. Tak-
+ing om = 1S mt, |g?| = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rp =
+2000 km, p¢ = 7000 kg m~?, this gives Kemp © (3.1 x 107!) -(1—1). To put this amplitude
+in perspective, taking a molecular viscosity of y = 10~° m? s~! in Equation (44a) gives a vis-
+cous coupling constant of Kemp © (6.0 x 1077) - (0.195 — 1.9767). Hence, EM coupling at the
+CMB is much weaker than viscous coupling, even if we include other spherical harmonic com-
+ponents of the radial magnetic field.
+
+EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by
+CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective o,, could be
+closer to af. Likewise, a, can be increased if a more electrically conducting layer has formed
+at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction
+of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even
+in the extreme case of om = of = 10° S m7!, Kemp © (1.6 x 1078) - (1 — 7), which remains
+
+—23-—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
@@ -2929,6 +3234,74 @@ monics of the field would penetrate through a thermally stratified layer in the
 
 –24–
 
+smaller by a factor ~ 60 than the smallest possible viscous coupling constant. Viscous forces
+dominate the tangential stress on the CMB of Mercury.
+
+At the ICB, because we can expect the electrical conductivity in both the solid inner core
+and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM
+coupling can be much larger and dominate viscous coupling. We assume that the magnetic field
+morphology at the ICB is dominantly comprised of small spatial scales for example as predicted
+by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in
+terms of an equivalent uniform radial magnetic field (B,) capturing its r.m.s. strength [Buf-
+fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity o equal in the
+fluid and solid core, the coupling constant K;., can be written in the form
+
+ 
+
+5 .
+Kies = 71 i) Fien (Br)” (50)
+where
+exe)
+F,,= 51
+icb Q6Psrs ’ ( )
+and where 6 = \/2/(o~Q,) is the magnetic skin depth. As F;.» is inversely proportional to
+
+rs, Kicp is inversely proportional to inner core size. Note that computing the EM coupling based
+on the r.m.s. strength (B,) rather than a true field morphology tends to overestimate the strength
+of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic
+field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are
+absorbed in the range of possible (B,.) values.
+
+The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al.,
+2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected.
+When (B,) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong
+field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which
+Kip increases linearly with (B,.) instead of quadratically. A good approximation of K;j,y cal-
+culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012],
+
+KE, = (0.175 — i0.138) (B,) , (52)
+
+ich —_
+
+where (B,.) is in units of Tesla. The superscript E emphasizes that the numerical factors are
+appropriate for the parameter values adopted for Earth in the computation of Dumberry and
+Koot [2012]. To adapt these numerical factors to Mercury, we write,
+
+ 
+
+Fic
+
+Kies = (0.175 — 10.138) 2 (B,) , (53)
+Fic
+
+where FE, is defined as in Equation (51) but using the parameters for Earth as defined in Dumb-
+
+erry and Koot [2012]. These are Q, = 7.292 x 107° s-!, ps = 12846 kg m~3, rz = 1221.5
+
+km, o = 5 x 10° S m~!, which gives F%, = 90.36 T~?.
+
+icb
+
+To compute F;.p, we assume an electrical conductivity of ¢ = 10° S m7! in the core of
+Mercury |e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and
+strong field regime occurs when (B,) ~ 1.53 mT for the real part of K;.5. (B,) at the ICB
+of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom-
+etry inside the core could be dominated by small length scales, yet only the weaker lower har-
+monics of the field would penetrate through a thermally stratified layer in the upper region of
+
+—24—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
@@ -3031,6 +3404,67 @@ Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of
 
 –25–
 
+the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur-
+face field strength by a factor 1000. Taking a surface field strength equal to ~ 300 nT [e.g An-
+derson et al., 2012], (B,) at the ICB could be as large as 0.3 mT, corresponding to approxi-
+mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer-
+cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of
+Mercury remains in the weak field regime.
+
+Figure 6 shows how ém, my and mr. vary as functions of inner core radius for different choices
+of (B,). The larger (B,) is, the stronger is the EM coupling at the ICB, and the smaller is the
+differential rotation between the fluid core and inner core. The inner core and fluid core are vir-
+tually locked into a common precession motion when (B,) > 0.3 mT. Further increasing (B,-)
+above 1 mT does not change the solution as EM coupling already dominates all other torques
+on the inner core. This is the case even when EM coupling transitions into the strong field regime.
+EM coupling at the CMB is included in these calculations, with ao, = 1S m7! and | 9) | =
+190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core
+we retrieved the solutions of €,, and my shown in Figure 4.
+
+As the inner core radius is increased, both €,, and my get smaller, as it was the case with
+viscous coupling alone, although the addition of EM coupling lead to more substantial changes.
+The inner core needs to be larger than approximately 500 km for changes in the Cassini state
+equilibrium to be noticeable. It is important to point out that my is reduced not because of
+EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which
+pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the
+inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the
+greater is the reduction in €,, and mf.
+
+When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are
+locked into a common precession motion, a good approximation of €,, is given by the same pre-
+
+diction as Equations (39-40) involving the effective moment of inertia C’, except y is now given
+by
+
+_ AQy cos I — A,Q.a3¢s
+ ApQoler + Kem) + AsQo€s03Aq — AcQ» cos I
+
+ 
+
+x (54)
+For a small inner core, AQ, cosI > A,Q,a3¢, and x is positive. Because A,Q,03¢, increases
+with inner core size, y gets smaller, and so do C’ and €,,. The mantle obliquity drops from 2.049
+arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015
+arcmin. For an inner core larger than ~ 1000 km, AQy cosI < A,Q,a3¢s5, so x becomes neg-
+ative, C’ becomes smaller than the moment of inertia of a rigid Mercury C, and é,, becomes
+smaller than the prediction based on a rigid planet.
+
+The larger the inner core is, the smaller are the misalignments of the fluid and solid cores
+with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone
+is not altered with the addition of EM coupling but further strengthened; the larger the inner
+core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the
+obliquity of the gravity field €, which, for a large inner core, asymptotically approaches the oblig-
+uity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset be-
+tween €,, and &, can be as large as 0.008 arcmin for a large inner core.
+
+3.5 Fixed inner core density versus fixed ICB density contrast
+
+Coupling models when viscous and EM stresses are both present have been presented in
+Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results,
+
+—25—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
@@ -3352,6 +3786,104 @@ be more strongly aligned with the mantle. The more strongly the inner core and m
 
 –27–
 
+ 
+
+— Ps=8800kgm3 G3: —— 0.20 0.15 —010 —005 —0.01
+
+164
+2.050 4] A L
+
+4.04
+
+ 
+
+ 
+
+ 
+
+    
+
+= Em 354
+& 2.046 4 __¢ L
+
+5 g 3.04
+& 2.044 4 L
+
+© J
+& 2.042 4 L 25
+
+Em for a rigid planet
+
+ 
+
+Obliquity angle (arcmin
+nm
+°
+
+Obliquity a
+ie) np
+oa oOo
+& 8
+3
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+2.036 +
+2.034 4 L 0.54
+2.032 , , , t 0.0 + t 1
+0 200 400 600 800 1000 1200 1400 0) 200 400 600 800 1000 1200 1400
+Inner core radius (km) Inner core radius (km)
+
+Figure 7. a) Obliquity of the mantle (€, solid lines) and gravity field (€,, dashed lines) b) my
+(solid lines) and 7s (dashed lines) as a function of inner core radius, for a fixed inner core density of
+
+8800 kg m~® (black lines) and for different choices of a3 (coloured lines).
+
+vas ap and i; these represent the obliquities with respect to the orbital plane and are connected
+to our variables by: 7), = Em, ue =EmtM+Ms & Em +My and i, =Em+n,. To summarize
+their results, ue and i, vary substantially for different inner core sizes, are always of compara-
+ble amplitude, and 7, is always larger than i. Furthermore, they find that as the inner core
+size is increased, the mantle obliquity 7/,, gets progressively larger and is displaced further away
+from its expected orientation based of a rigid planet (see their Figure 6). The change in i/,, they
+obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan-
+etary radius (* 1463 km, close to the maximum inner core size of 1500 km we have considered),
+is approximately an increase of 5 x 107° rad = 0.17 arcmin. This also corresponds approxi-
+mately to the deviation of the obliquity with respect to that of a rigid planet.
+
+When only viscous stress is included in our model (section 3.3), our results are substan-
+tially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core
+gets smaller with inner core size and that the change is very modest. In contrast with the re-
+sults of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that
+of the fluid core, except when the inner core is very small or when the effective viscosity is un-
+reasonably large. We also find that as the inner core size is increased, the mantle obliquity gets
+smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most
+of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling
+model that we use. But even when we adopt their model parameters and use their viscosity model,
+we were not able to reproduce their results.
+
+In the absence of viscous and EM coupling, the strong gravitational torque exerted on the
+inner core by the mantle should prevent any large misalignment between the two. This is cap-
+tured by the period of the FICN, which is of the order of 100 yr, much shorter than the forc-
+ing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational
+torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscous-
+EM torque to the gravitational torque decreases with inner core size, so a large inner core should
+be more strongly aligned with the mantle. The more strongly the inner core and mantle are
+
+—27-—
+
+
 
 
 Confidential manuscript submitted to JGR-Planets
diff --git a/read/results/tika/2201.00069.txt b/read/results/tika/2201.00069.txt
index 3e3d064..59c7d4c 100644
--- a/read/results/tika/2201.00069.txt
+++ b/read/results/tika/2201.00069.txt
@@ -127,9 +127,9 @@ with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma ra
 searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV
 flux upper limit of 1.39×10−16 erg cm−2 s−1Å−1, X-ray limit of ∼ 6.6×10−14 erg cm−2 s−1 and
 a limit on the very-high-energy gamma-ray fluxΦ(𝐸 > 120GeV) < 1.7× 10−12 erg cm−2 s−1.
-We obtain a radio upper limit of∼15`Jy beam−1 for persistent emission at the locations of both
+We obtain a radio upper limit of∼15𝜇Jy beam−1 for persistent emission at the locations of both
 FRBs 20190711A and 20171019A, but detect diffuse radio emission with a peak brightness
-of ∼53`Jy beam−1 associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first
+of ∼53𝜇Jy beam−1 associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first
 detection of the radio continuum emission potentially associated with the host (galaxy) of FRB
 20190714A, and is only the third known FRB to have such an association. Given the possible
 association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB
@@ -196,14 +196,14 @@ ies2, only the sub-arcsecond localisation of the repeating FRB
 20121102A to a host galaxy at a redshift of 𝑧 = 0.19273 ± 0.0008
 (Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physi-
 cally associated with a compact (≤ 0.7 pc), persistent radio source
-of luminosity a𝐿a ∼ 1039 erg s−1 at a few GHz (Marcote et al.
+of luminosity 𝜈𝐿𝜈 ∼ 1039 erg s−1 at a few GHz (Marcote et al.
 2017). This source is detectable from 300MHz – 26GHz (Resmi
 et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% vari-
 ability on day timescales. In contrast, a similar sub-milliarcsecond
 localisation of another repeating FRB20180916B to a nearby mas-
 sive spiral galaxy at 𝑧 = 0.0337 ± 0.0002 (Marcote et al. 2020)
 showed no associated persistent radio emission. This places a strong
-upper limit on the persistent source luminosity of a𝐿a . 7.6×1035
+upper limit on the persistent source luminosity of 𝜈𝐿𝜈 . 7.6×1035
 erg s−1 at 1.6GHz, which is three orders of magnitude lower than
 that of FRB 20121102A. Recently, the CHIME/FRB collaboration
 announced heightened activity in the repeating FRB 20201124A
@@ -450,19 +450,19 @@ The theoretical thermal noise of the MeerKAT can be calculated as
 
 𝑆rms =
 1
-[𝑐
+𝜂𝑐
 
 SEFD√︃
-𝑛pol × 𝑁 (𝑁 − 1) × Δa × 𝑡int
+𝑛pol × 𝑁 (𝑁 − 1) × Δ𝜈 × 𝑡int
 
 . (1)
 
 The system equivalent flux density (SEFD) of MeerKAT at the
-1.28GHz is 443 Jy and [𝑐 is the correlator efficiency. We used 𝑛pol
-= 2 polarisation products (XX and YY), N = 64 telescopes, Δa =
+1.28GHz is 443 Jy and 𝜂𝑐 is the correlator efficiency. We used 𝑛pol
+= 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 =
 856MHz bandwidth and 𝑡int = 21600 sec observing time for one
-epoch. This gives the theoretical rms of∼ 2 `Jy beam−1. The typical
-image rms obtained from our residual images is ∼ 5 `Jy beam−1,
+epoch. This gives the theoretical rms of∼ 2 𝜇Jy beam−1. The typical
+image rms obtained from our residual images is ∼ 5 𝜇Jy beam−1,
 which is 2.5 times the expected theoretical rms. The widebandMFS
 image does not allow primary beam correction procedure as this can
 only be done on the sub-band images with limited rms for detection
@@ -497,6 +497,138 @@ uum sources in the MeerKAT observations with the FRB loca-
 MNRAS 000, 1–15 (2021)
 
 https://github.com/e-merlin/eMERLIN_CASA_pipeline
+4 = Chibueze et al.
+
+2.2 e-MERLIN Observations
+
+To constrain the position of the persistent continuum emission
+associated with FRB 20190714A, we conducted L-band (centre
+frequency of 1.51 GHz) observations of the target with the en-
+hanced Multi-Element Remote-Linked Interferometer Network, e-
+MERLIN array in the United Kingdom (project code: CY 10003)
+on 13 January, 2021 (see Section 3.1.2). Six antennas were used
+including the 75-m Lovell telescope and the target pointing cen-
+tre was R.A. = 12/15'55%.12, Dec. = —13°01/15!’7. 1407+2827
+was used as the bandpass calibrator, 1331+3030 as the flux cal-
+ibrator and 1216-1033 as the phase calibrator. The angular sep-
+aration between the target and the phase calibrator is 2.47°. The
+data reduction was done following standard e-MERLIN calibra-
+tion procedures with additional flagging of bad visibilities fol-
+lowed by imaging. We found two confusing sources in the field,
+at R.A. = 12715'445 669, Dec. = —12°57/59/’56 and R.A. =
+12" 15378 216, Dec. = —13°09/33/'44 at 4.1’ and 9.4’ from the
+pointing centre, respectively. They had apparent flux densities of 4
+and 1.3 mJy without primary beam correction. We used these for
+self-calibration of the field and then subtracted them before final
+imaging. The final image synthesized beam is 0°65 x 0/15, posi-
+tion angle 15° elongated in the Declination direction due to the low
+target elevation from the UK.
+
+2.3 The Swift satellite. UVOT and XRT observations
+
+Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA
+space mission operating in soft-X-rays and optical/UV. Here we
+use data from the X-ray Telescope (XRT) (Burrows et al. 2005)
+which operates in the soft X-ray domain of 0.3 — 10 keV as well as
+data taken by the UV/Optical Telescope (UVOT) (Roming et al.
+2005) operating in the UV to optical domain (170 — 600 nm).
+During the FRB 20171019A multi-wavelength (MWL) observing
+campaign, two 2 ks target-of-opportunity (ToO) observations were
+performed with Swift from 2019-09-28 18:37:02 to 2019-09-28
+21:52:54 and 2019-10-18 18:03:00 to 2019-10-18 20:03:00 on the
+FRB 20171019A localisation region. Simultaneously with Swift-
+XRT, five UVOT images were taken with the UVM2 filter (central
+wavelengh = 2246 A) over the 2 epochs with a total exposure of 4 ks.
+The images are aspect-corrected and summed with the uvotimsum
+tool (HEASOFT 6.26). Observations were performed with Swift-
+XRT in the standard Photon Counting observing mode (PC). The
+XRT PC data are processed with xrtpipeline (HEASOFT 6.26).
+A summed image is extracted with xselect.
+
+2.4 Very-high energy gamma-ray observations with H.E.S.S.
+
+Observations of FRB 20171019A were also obtained in the very-
+high energy gamma-ray domain with the H.E.S.S. imaging atmo-
+spheric Cherenkov telescope array, sensitive in the range between a
+few tens of GeVs and 100 TeV. H.E.S.S. is located on the Khomas
+Highland plateau of Namibia (23°16’18’ South, 16°30’00” East),
+at an elevation of ~1800 m above sea level. Observations took place
+contemporaneously to the first epoch of MeerKAT observations of
+FRB 20171019A described above. The data set was obtained with
+the H.E.S.S. phase II array, including the upgraded 12 m-diameter
+CT 1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter
+
+6 https://github.com/e-merlin/eMERLIN_CASA_pipeline
+
+CT5 telescope (Bolmont et al. 2014). A standard data quality selec-
+tion was applied to the data (Aharonian et al. 2006). The events have
+then been selected and their direction and energy reconstructed us-
+ing a log-likelihood minimization comparing the recorded shower
+images of all triggered telescopes (requiring at least two telescopes
+to see the same gamma-ray event) to a semi-analytical model of air
+showers (de Naurois & Rolland 2009).
+
+We define a circular region-of-interest centered on the position
+of FRB 20171019A with a radius of 0.12°, optimal for a point-like
+source of emission as expected from FRB 20171019A. The back-
+ground level in this ON region was determined using the standard
+“ring background” technique (Berge et al. 2007) based on a radially
+symmetric ring around the source position. This technique allows us
+to derive the background level from the same field of view and as-
+sures that the gamma-ray signal and background are estimated with
+the same acceptance and under the same observation conditions.
+
+3 RESULTS
+3.1 MeerKAT
+
+The theoretical thermal noise of the MeerKAT can be calculated as
+
+1 SEFD
+SS (1)
+
+Stms = :
+Me [ropot x N(N - 1) xX Av X tint
+
+The system equivalent flux density (SEFD) of MeerKAT at the
+1.28 GHz is 443 Jy and 77- is the correlator efficiency. We used nyo}
+= 2 polarisation products (XX and YY), N = 64 telescopes, Av =
+856 MHz bandwidth and fy, = 21600 sec observing time for one
+epoch. This gives the theoretical rms of ~ 2 Jy beam~!. The typical
+image rms obtained from our residual images is ~5 Jy beam@!,
+which is 2.5 times the expected theoretical rms. The wideband MFS
+image does not allow primary beam correction procedure as this can
+only be done on the sub-band images with limited rms for detection
+of the sources. However, our sources are the phase centres of our
+fields and thus unaffected by the effect of the primary beam.
+
+Due to the lack of MeerKAT primary beam correction, we
+did not compare the flux densities of the discrete sources with
+their NRAO (National Radio Astronomy Observatory) VLA (Very
+Large Array) Sky Survey (NVSS) counterparts. However, Chibueze
+et al. (2021, submitted) confirmed that the overall flux densities
+obtained with MeerKAT and NVSS are in good agreement with
+each other within errors of ~ 5%. We compared the astrometry of
+the discrete radio sources obtained with MeerKAT and NVSS in
+Figure 1. The position uncertainty of the MeerKAT ranges from
+0/’2 (close to the centre of the primary beam) to a few arcseconds
+towards the edge of the primary beam. The scatter observed in
+Figure 1 is mostly due to the probability of the centroids of emission
+in the ~45’” NVSS resolution being different from the centroids at
+MeerKAT’s resolution and partly due to higher position uncertainty
+of the fainter sources. Therefore, we conclude that our MeerKAT
+data are well calibrated and the flux density and astrometry are as
+accurate as the errors indicate.
+
+3.1.1 Looking for persistent continuum emission associated with
+the FRB fields
+
+Considering the results of the astrometric comparison with NVSS
+(see Figure 1), we considered potential associations of contin-
+uum sources in the MeerKAT observations with the FRB loca-
+
+MNRAS 000, 1-15 (2021)
+
+
 
 
 MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 5
@@ -517,14 +649,14 @@ FRB 20190714
 
 Compact persistent emission was detected in the 1.51GHz e-
 MERLIN image at R.A. = 12ℎ15𝑚55𝑠 .116, Dec. = −13◦01′14.′′48
-at 86 `Jy beam−1 by e-MERLIN. The stochastic position uncer-
+at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncer-
 tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa-
 ration between phase-calibrator and target, and antenna position
 uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric
 uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively.
 The offset from the FRB position is negligible in R.A. and 1.2
 arcsec in Dec. The rms in this region (of full primary beam sen-
-sitivity) is 20 `Jy beam−1, making this a 4.3𝜎rms detection. It is
+sitivity) is 20 𝜇Jy beam−1, making this a 4.3𝜎rms detection. It is
 ∼1.5𝜎rms higher than that of the MeerKAT detection. Although the
 e-MERLIN flux scale nominal uncertainty is ∼5%, in these data it
 is possibly higher due to the low declination of the phase-reference
@@ -540,11 +672,11 @@ ground persistent radio source and the host galaxy, following the
 procedure of Eftekhari et al. (2018). Instead of using the FRB lo-
 calisation region, we use the area of the galaxy, which is taken as
 2′′ × 2′′, twice the half light radius from Heintz et al. (2020). Given
-the source has a flux density of ∼ 90`Jy we estimate the chance
+the source has a flux density of ∼ 90𝜇Jy we estimate the chance
 alignment probability of 0.0008, which corresponds to 3.4𝜎. The
 flux density threshold, assuming 3𝜎, for an unresolved radio source
-is ∼ 15 `Jy. If instead we consider the probability of detecting any
-radio source above our flux density threshold of 15`Jy, the probabil-
+is ∼ 15 𝜇Jy. If instead we consider the probability of detecting any
+radio source above our flux density threshold of 15𝜇Jy, the probabil-
 ity of a chance alignment is, therefore, approximately 0.8%, making
 the statistical significance of our detection 2.6𝜎. This represents the
 first detection of radio continuum emission associated with the host
@@ -554,8 +686,8 @@ first detection of radio continuum emission associated with the host
 
 No continuum emission was detected near FRBs 20171019A and
 20190711A. As each of the images of these sources has an rms
-of ∼ 5 `Jy beam−1, the 3𝜎 intensity upper limit of any emission
-associated with FRBs 20171019A and 20190711A will be ∼ 15 `Jy
+of ∼ 5 𝜇Jy beam−1, the 3𝜎 intensity upper limit of any emission
+associated with FRBs 20171019A and 20190711A will be ∼ 15 𝜇Jy
 beam−1 (see Table 1).
 
 Candidate pulses above a signal-to-noise (S/N) of 10 from the
@@ -635,6 +767,140 @@ https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/w3pimms.pl
 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/w3pimms.pl
 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.pl
 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.pl
+MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs — 5
+
+tion to sources within 5’’. Using this spatial coincidence criterion,
+we identified a persistent 1283 MHz continuum source near FRB
+20190714A, detected in both the 14 September 2019 and the 28
+September 2019 epoch. The peak of the MeerKAT radio emission
+is offset by ~ 2’’.1 from the peak of the i-band magnitude of the op-
+tical galaxy identified in the Panoramic Survey Telescope and Rapid
+Response System (PanSTARRS, located at Haleakala Observatory)
+image (shown as contours in Figures 2 and 3). The MeerKAT ra-
+dio source is offset by 1/’68 from the localisation region of FRB
+20190714 (cyan circle in Figures 2 and 3).
+
+3.1.2. e-MERLIN detection of compact emission towards
+FRB 20190714
+
+Compact persistent emission was detected in the 1.51 GHz e-
+MERLIN image at R.A. = 12"15’"55.116, Dec. = —13°01/14/’48
+at 86 uJy beam! by e-MERLIN. The stochastic position uncer-
+tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa-
+ration between phase-calibrator and target, and antenna position
+uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric
+uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively.
+The offset from the FRB position is negligible in R.A. and 1.2
+arcsec in Dec. The rms in this region (of full primary beam sen-
+sitivity) is 20 wy beam!, making this a 4.30;ms detection. It is
+~1.50;ms higher than that of the MeerKAT detection. Although the
+e-MERLIN fiux scale nominal uncertainty is ~5%, in these data it
+is possibly higher due to the low declination of the phase-reference
+source and to the strong RFI which were removed from the data
+but may have affected the linearity of the receiver response. The
+peak of the e- MERLIN radio emission is offset by ~ 1°’4 from the
+peak of the PanSTARRS i-band emission in Figures 2 and 3. The
+e-MERLIN radio source (shown by the cyan cross in Figures 2 and
+3) is offset by 0’’53 from the localised position of FRB 20190714.
+
+We estimate the probability of a chance alignment of a back-
+ground persistent radio source and the host galaxy, following the
+procedure of Eftekhari et al. (2018). Instead of using the FRB lo-
+calisation region, we use the area of the galaxy, which is taken as
+2” x 2’’, twice the half light radius from Heintz et al. (2020). Given
+the source has a flux density of ~ 90uJy we estimate the chance
+alignment probability of 0.0008, which corresponds to 3.40. The
+flux density threshold, assuming 3c, for an unresolved radio source
+is ~ 15 wJy. If instead we consider the probability of detecting any
+radio source above our flux density threshold of 15 Jy, the probabil-
+ity of a chance alignment is, therefore, approximately 0.8%, making
+the statistical significance of our detection 2.60. This represents the
+first detection of radio continuum emission associated with the host
+(galaxy) of FRB 20190714A (see Figure 2 and 3).
+
+3.1.3. MeerKAT non-detections
+
+No continuum emission was detected near FRBs 20171019A and
+20190711A. As each of the images of these sources has an rms
+of ~5uJy beam™!, the 3 intensity upper limit of any emission
+associated with FRBs 20171019A and 20190711A will be ~ 15 wJy
+beam! (see Table 1).
+
+Candidate pulses above a signal-to-noise (S/N) of 10 from the
+single pulse search with MeerTRAP were visually inspected offline.
+No new FRBs or repeat bursts from the known FRBs were detected
+above a fluence threshold of 0.08 Jy ms assuming a 1 ms duration
+burst.
+
+MNRAS 000, 1-15 (2021)
+
+3.2 Swift
+
+The UVOT summed image is presented in Figure 4. The UVOT
+field of view corresponds roughly to the uncertainty’ of the locali-
+sation region of FRB 20171019A (RA = 7.5’and DEC = 7’). Using
+uvotdetect, we find 30 sources above the 5c level and within the
+FRB 20171019A uncertainty region. Using a 3 arcsec maximum
+separation, which is slightly larger than the UVOT PSF (Breeveld
+et al. 2010), these sources are cross-matched with known catalogue
+sources. We find that out of the 30 sources detected by UVOT, 28
+are spatially coincident with stars catalogued in the SDSS catalogue
+(DR12; Alam et al. 2015), and one source is coincident with a galaxy
+(AGN broadline SDSS ID: 1237652599570890948 at z ~ 0.156).
+This galaxy is also detected by the MeerKAT radio observations. We
+use the NASA/IPAC Extragalactic Database (NED)® to search for
+known galaxies in the FRB 20171019A uncertainty regions. We find
+multiple galaxies with unknown redshifts, therefore we cannot draw
+conclusions on the host galaxy from our observations. Using a 50’”
+circular ON region centred on the position of FRB 20171019A and
+a50” OFF region that does not contain any of the detected sources,
+we run the uvotsource tool with a 5a background threshold and
+obtain a flux upper limit of 1.4 x 10-16 erg cm? s~!A-! without
+applying a Calactic extinction correction.
+
+The XRT summed image is shown in Figure 5. At the edge
+of the field-of-view, we detect a source spatially coincident with
+the Wolf 1561 star. As we consider this source unrelated to the
+FRB, we use the online Swift-XRT data products generator (Evans
+et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3-
+10 keV range on the count rate of 0.001885 counts.s~!. Using
+WebPIMMS? (v4.11a) and assuming a weighted average Ny = 5.12
+1029 cm-? from the direction of the source estimated from the
+NASA’s HEASARC !° online tools (HI4PI Collaboration et al.
+2016) and a power law model with a photon index = 2, this upper
+limit translates to an energy flux of 6.6 x 10-14 erg em? s~! (8.3 x
+10714 erg cm~ s7! unabsorbed).
+
+3.3. H.E.S.S.
+
+No significant gamma-ray excess above the expected background
+is detected from the direction of FRB 20171019A, with 52 gamma
+candidate events from the source region and 524 background event.
+A second analysis using an independent event calibration and recon-
+struction (Parsons & Hinton 2014) confirms this result. A search for
+variable emission on timescales ranging from milliseconds to sev-
+eral minutes with tools provided in (Brun et al. 2020) does not reveal
+any variability above 2.2 o-. For the total data set of 1.8 h, 95% confi-
+dence level (C. L.) upper limits on the photon flux are derived using
+the method described by Rolke et al. (2005). The energy threshold
+of the data is highly dependent on the zenith angle of the observa-
+tions. For these observations, the zenith angles range from 15 to 25
+deg, which leads to an energy threshold for the stacked data set of
+Ew, = 120 GeV. The upper limit on the Very High Energy (VHE)
+
+7 https://www.wis-tns.org/object/20171019a
+
+8 https://ned.ipac.caltech.edu; NED is funded by the National
+Aeronautics and Space Administration and operated by the California Insti-
+tute of Technology
+
+9 https: //heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/
+w3pimms.pl
+
+10 https: //heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.
+pl
+
+
 
 
 6 Chibueze et al.
@@ -654,9 +920,9 @@ the full region accessible within the H.E.S.S. field of view above
 
 Of the targeted FRB fields reported here, only FRB 20190714A
 is observed to be spatially coincident with a persistent radio con-
-tinuum source. We obtain an upper limit of ∼ 15 `Jy beam−1 for
+tinuum source. We obtain an upper limit of ∼ 15 𝜇Jy beam−1 for
 FRBs 20190711A and 20171019A, respectively, and a peak inten-
-sity of ∼ 53 `Jy beam−1 for the emission coincident with FRB
+sity of ∼ 53 𝜇Jy beam−1 for the emission coincident with FRB
 20190714A. This source is detected at both epochs with similar
 intensities within the measured rms of the images (see Tables 1 and
 2 for details). The values in the Table 2 are derived by carrying
@@ -720,26 +986,26 @@ et al. (2019).
 
 Table 1. Details of the FRB fields observed with MeerKAT.
 
-Field name Observation date Synthesized beam rms (`Jy beam−1) Detected?
+Field name Observation date Synthesized beam rms (𝜇Jy beam−1) Detected?
 
 FRB 20171019A 28 September 2019 – No (calibration failure)
-FRB 20171019A 18 October 2019 6.′′8 × 5.′′0 5.2 < 15`Jy beam−1
+FRB 20171019A 18 October 2019 6.′′8 × 5.′′0 5.2 < 15𝜇Jy beam−1
 
-FRB 20190711A 23 August 2019 11.′′7 × 4.′′9 4.9 < 15`Jy beam−1
+FRB 20190711A 23 August 2019 11.′′7 × 4.′′9 4.9 < 15𝜇Jy beam−1
 
-FRB 20190711A 09 September 2019 12.′′5 × 4.′′9 4.6 < 15`Jy beam−1
+FRB 20190711A 09 September 2019 12.′′5 × 4.′′9 4.6 < 15𝜇Jy beam−1
 
-FRB 20190714A 14 September 2019 7.′′1 × 6.′′2 4.2 54.4 `Jy beam−1
+FRB 20190714A 14 September 2019 7.′′1 × 6.′′2 4.2 54.4 𝜇Jy beam−1
 
-FRB 20190714A 28 September 2019 6.′′5 × 5.′′1 5.8 52.0 `Jy beam−1
+FRB 20190714A 28 September 2019 6.′′5 × 5.′′1 5.8 52.0 𝜇Jy beam−1
 
 Table 2. Details of the radio continuum source associated with FRB 20190714A.
 
-Field name Observation date Telescope acentre (GHz) 𝛼J2000 𝛿J2000 Maj. × min. axis Pos. angle Int. flux density
+Field name Observation date Telescope 𝜈centre (GHz) 𝛼J2000 𝛿J2000 Maj. × min. axis Pos. angle Int. flux density
 
-FRB 20190714A 28 September 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠 .154 -13◦01′17.′′30 9.′′6 × 7.′′4 88.7◦ 87.4 `Jy
-FRB 20190714A 18 October 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠 .193 −13◦01′17.′′18 8.′′2 × 6.′′4 12.2◦ 80.7 `Jy
-FRB 20190714A 13 January 2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠 .116 −13◦01′14.′′51 0.′′15 × 0.′′65 17.6◦ 107.5 `Jy
+FRB 20190714A 28 September 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠 .154 -13◦01′17.′′30 9.′′6 × 7.′′4 88.7◦ 87.4 𝜇Jy
+FRB 20190714A 18 October 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠 .193 −13◦01′17.′′18 8.′′2 × 6.′′4 12.2◦ 80.7 𝜇Jy
+FRB 20190714A 13 January 2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠 .116 −13◦01′14.′′51 0.′′15 × 0.′′65 17.6◦ 107.5 𝜇Jy
 
 large offset from the centre of the galaxymakes the persistent source
 unlikely to be an AGN. So far this FRB has not been seen to repeat.
@@ -755,6 +1021,135 @@ with FRB 20121102A. At the angular diameter distance of
 
 MNRAS 000, 1–15 (2021)
 
+MeerKAT, e-MERLIN,
+
+Swift and H.E.S.S., observations of three localised FRBs
+
+9
+
+ 
+
+a
+
+-8°30'
+
+33'}
+
+36'
+
+39'
+
+Declination (J2000)
+
+42'
+
+45'
+
+48'
+
+b
+
+ 
+ 
+
+ 
+
+ 
+
+   
+
+100
+
+10
+
+Counts
+
+ 
+
+ 
+
+po ba
+
+ 
+
+wii |
+'00.0" 21'00
+
+Right Ascension
+
+334°33'00.0" 27
+
+Qo"
+(J2000)
+
+15'00.0"
+
+Figure 4. UVOT summed image of FRB 20171019A region taken during the MWL observation campaign in September-October 2019. The white circles
+indicate sources detected above 50°. The cyan dot denotes the location of FRB 201710194, the circle around it indicates the region used to derive the upper
+limits while the magenta region indicates the background region used. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar
+
+et al. (2019).
+
+Table 1. Details of the FRB fields observed with MeerKAT.
+
+ 
+
+Field name Observation date
+
+Synthesized beam
+
+rms (Jy beam™!) Detected?
+
+ 
+
+FRB 20171019A 28 September 2019
+
+No (calibration failure)
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+FRB 20171019A 18 October 2019 678 x 570 5.2 < 15pty beam™!
+
+FRB 20190711A 23 August 2019 11”7 x 4”9 4.9 < 15pJy beam!
+
+FRB 20190711A 09 September 2019 1275 x 49 4.6 < 15pJy beam!
+
+FRB 20190714A — 14 September 2019 TA x 62 4.2 54.4 pJy beam™!
+
+FRB 20190714A = 28 September 2019 675 x 571 5.8 52.0 py beam7!
+
+Table 2. Details of the radio continuum source associated with FRB 20190714A.
+
+Field name Observation date Telescope Veentre (GHz) 32000 032000 Maj. x min. axis Pos. angle __Int. flux density
+FRB 20190714A 28 September 2019 MeerKAT 1.283 12 157755*.154 — -13°0117”30 9”6 x 774 88.7° 87.4 uly
+FRB 20190714A 18 October 2019 MeerKAT 1.283 12 1577558.193  -13°O1/1718 872 x 674 12.2° 80.7 psy
+FRB 20190714A 13 January 2021 e-MERLIN 1.510 127 1577558.116 = —13°01'14”51 0715 x 065 17.6° 107.5 py
+
+ 
+
+large offset from the centre of the galaxy makes the persistent source
+unlikely to be an AGN. So far this FRB has not been seen to repeat.
+Higher resolution imaging will be required to be certain of a direct
+association of the persistent source with the FRB. We did not have
+
+MNRAS 000, 1-15 (2021)
+
+sufficient sensitivity in the sub-band images, thus, we are unable to
+derive the spectral index of the emission of the host galaxy.
+
+Our e-MERLIN observations probe a different spatial
+scale than the size of the persistent radio source associated
+with FRB 20121102A. At the angular diameter distance of
+
+
 
 
 10 Chibueze et al.
@@ -765,11 +1160,11 @@ Wolf 1561 star is shown in cyan and is labelled. The green box indicates FRB2017
 FRB 20190714A (780 Mpc), an unresolved source with an an-
 gular size of 0.′′6 corresponds to a physical extent of .2.3 kpc. The
 uGMRT reported the detection of an unresolved radio emission at
-650MHzwith a flux density of 700±100 `Jy (Wharton et al. 2021),
+650MHzwith a flux density of 700±100 𝜇Jy (Wharton et al. 2021),
 while the JVLA detected persistent emission with a flux density of
-340 ± 30 `Jy at 3GHz (Ricci et al. 2021). Assuming the estimated
+340 ± 30 𝜇Jy at 3GHz (Ricci et al. 2021). Assuming the estimated
 spectral index between these frequencies (∼ −0.5, Ricci et al. 2021),
-the 1.3GHz flux density would be ∼ 500 `Jy (similar to the 3-𝜎
+the 1.3GHz flux density would be ∼ 500 𝜇Jy (similar to the 3-𝜎
 upper limit on observations from 1 − 2GHz; Law et al. 2021). The
 flux density we measured for FRB 20190714A is a factor of ∼10
 lower than FRB20201124A, but FRB 20190714A is also a factor
@@ -857,7 +1252,7 @@ FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and e-
 MERLIN radio telescope. This represents the first detection of the
 radio continuum emission associated with the host (galaxy) of FRB
 20190714A and is only the third known FRB to have such an as-
-sociation. We furthermore obtained a radio upper limit of∼ 15`Jy
+sociation. We furthermore obtained a radio upper limit of∼ 15𝜇Jy
 beam−1 for the repeating FRBs 20190711A and 20171019A.
 
 We also performed UV, X-ray and VHE observations with the
diff --git a/read/results/tika/2201.00178.txt b/read/results/tika/2201.00178.txt
index 015befd..bb683f4 100644
--- a/read/results/tika/2201.00178.txt
+++ b/read/results/tika/2201.00178.txt
@@ -381,6 +381,74 @@ random forcing function (see Duvall & Harvey 1986). Modes are thus generated wit
 
 and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters
 
+IMAGING NEAR-SURFACE FLOWS USING MODE-COUPLING ANALYSIS 3
+
+Pe Petq: Where k is the oscillation mode wavenumber (ka, ky) and w is the temporal frequency. Relate d%* d% +q thus
+
+to the flow coefficients Pj; and Ty; (see eq A7)
+
+(oe k-+q) = Hkkinn! S- Cain Pag + Daj,kTq;: (4)
+j
+
+The weight factor H” (see eq A8) is a function of frequency, capturing information about the extent of coupling between
+the two modes [n, k] and [n’, k’], where n and n’ are the radial orders of the modes, and k = |k| and k’ = |k’| = |k+q].
+The spectral profile of the mode (see eq AQ) is approximated using a Lorentzian (Anderson et al. 1990). The more the
+Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cg;,, and Dgj,~ are poloidal
+and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements
+and are derived from the solar model see Appendix A. They possess the symmetry relation: Cq;,~% = C—qj,-z and
+Daj,k = D—qj,-k (see eq AG). The kernels, as flows, are expressed on the basis f;(z).
+
+1.2. Least-squares of cross-correlation
+
+Even though $;* dg 44 isolates the effect of flow perturbations at individual wavenumbers qg, a more compact mea-
+surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the
+dimension of the problem. A least-squares fit to the cross-correlation ¢* of. q (see Woodard 2006, 2014, 2016) results
+in the B-coefficients By,q, according to
+
+»— Heenan! PR k-+q
+
+w
+» He krnan |? ,
+w
+
+Multiplying eq 4 on both sides by H¥¥,,,,,, and substituting by eq 5 on the left-hand-side results in a concisely defined
+forward problem (compare with eq 4)
+
+Brig = (5)
+
+Brg = S> Cqj,kPaj + Daj,eTaj- (6)
+J
+
+In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over w.
+Here, we sum over both +w within a few mode linewidths I’. Denoting the resonant frequency of a mode using wy,
+
+\w| € (wn _ Eng /2,Wnk + Dnx/2) or
+lw] € (snr = LD yp /2, Wn Ky + Dyrnr/2), (7)
+
+Summing over tw guarantees that the parity Brg = Bq (see Appendix A for derivation) is obeyed, thereby
+ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain.
+Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components —q and
+—k,
+
+Br yg =) C-aj bP gj + Doi KT" gy: (8)
+
+j
+
+Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bx gq is constructed by a
+least-squares fitting, it is noteworthy that summing over —w will also lead to improvement in its signal-to-noise as a
+by-product.
+
+1.3. Noise model
+
+In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from
+the observed B-coefficients. For estimating the contribution from realization noise to the measurements, we make the
+following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian
+random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations.
+Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a
+random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes
+and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters
+
+
 
 
 4 Mani et al.
@@ -454,6 +522,79 @@ radial order are different. In wavenumber, we restrict our analysis to within 20
 
 frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz).
 
+4 MANI ET AL.
+
+ 
+
+ 
+
+ 
+
+ 
+
+5 4 a :
+P2y Pr ee
+44 7 a
+NM 7 a
+E 7 a
+23 A ae
+E 7 a
+Q 7
+324 a
+“
+14
+so Theoretical
+—-— Observed
+0 T T T T
+0 500 1000 1500 2000 2500
+kRo
+
+Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and pz (green). The shaded
+regions of the same colours indicate 1-linewidth [ about the mode frequency. The yellow shaded region indicates the range of
+kRo and w/27 to which we have restricted ourselves in this analysis. Beyond kRo of 2000, it is seen that the theoretical fitting
+of mode frequencies start deviating from the observed dispersion relation for the f-mode.
+
+such as its amplitude, frequency and linewidth, and consequently in Bz,q in our case. We use the same noise model
+as in H21, which was motivated by the above discussion,
+
+Gq = (|Bral”): (9)
+
+where, unlike H21, we again sum over tw. Gp,q is real, with the symmetry relation Gxq = G_kr,—q (see Appendix A
+for explanation).
+
+2. DATA ANALYSIS
+
+In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the
+Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image
+is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked
+at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 x 194.4 Mm? in size, tracked for 24 hours
+and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number
+2197, Carrington longitude 90°). This Dopplercube is considered as the physical wavefield ¢(z, y;t). The Fourier-space
+wavefield ¢g (and subsequently, the cross-correlation ¢%* ¢¢ @ is obtained by computing the 3D spatial and temporal
+Fourier transform of the Dopplercube.
+
+The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in
+Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon
+& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination
+from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015).
+
+Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral
+profiles of the two modes [n, k] and [n’, k’] closely align in w space. This implies that their mode frequencies should be
+sufficiently close (|wnx — Wnn’| < 6, the separation parameter). Since Lorentzians decay rapidly, the summation over
++w is significant only over a few linewidths (e, the summation parameter; see eq 7). We have empirically found and
+tabulated 6 in Table 1 for the radial order couplings n-n’ € f-f, pi-pi, and p2-p2 (the signal strength depends only
+weakly on €; we set it to 3 line widths).
+
+Figure 1 shows that for any two adjacent ridges (adjacent n and n’), mode frequencies wnz, and wy, become spaced
+farther apart with increasing wavenumber /Ro. It is also known that mode linewidth [ grows with radial orders for
+a given kRo. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of
+observation set the total number of modes within a range of kRe (and w/27) that can be clearly observed, thereby
+affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually
+inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kRo at fixed
+radial order are different. In wavenumber, we restrict our analysis to within 200 < kKRe < 2000 and qRo < 300. Our
+frequency range is confined to span the range over which acoustic modes are observed (2 < w/2a <5 in mHz).
+
+
 
 
 Imaging near-surface flows using mode-coupling analysis 5
@@ -547,6 +688,83 @@ M ×M) and pre-multiplying by Kᵀ,
 
 U =(KᵀΛ−1K)−1KᵀΛ−1B. (12)
 
+IMAGING NEAR-SURFACE FLOWS USING MODE-COUPLING ANALYSIS 5
+
+ 
+
+ 
+
+Coupling kRo range # of 6
+
+modes
+
+f-f [400,1000] 5240 4
+
+[1000,1500] 7784 1.1
+[1500,2000] 10940 0.4
+
+ 
+
+ 
+
+ 
+
+P1-P1 (400, 1000] 5240 4.5
+[1000,1750] 12852 2
+P2-Pp2 [200,1000] 5886 3
+
+ 
+
+[1000,1300] 4280 3
+
+ 
+
+Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different
+ranges of kRo.
+
+3. INVERSION
+
+The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements
+Bx,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and
+leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods
+complement each other (see Sckii 1997), where RLS tries to minimize the misfit between data and model, whereas
+SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis
+functions f;(z) (J < M; see eq 3 and section 3.1), whereas SOLA scales as M? (see Appendix B). For M > 5000,
+computation starts to quickly become expensive for SOLA.
+
+Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While
+f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is
+present even in pj-p1, and p9-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are
+interested in only surface flows, we leave higher order coupling to future work.
+
+It bears mentioning that the slopes of the ridges in the kRo-v spectrum (Figure 1) increase with radial order. This
+limits us to low-to-intermediate kRe (< 1000) for these higher radial orders if we are to remain under the acoustic cut-
+off frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals
+from low kRe - too large an observation region could possibly render invalid the Cartesian geometry approximation.
+Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions
+separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich
+helioseismic technique.
+
+3.1. RLS
+
+For given q, the forward problem may be stated as
+KU =B, (10)
+
+with the aim to minimize the misfit }* ||KU — B||2, with |] ||) denoting the Lz norm. Here, K is the matrix formed
+k
+
+by the sensitivity kernels: {Cqj,n,Pqj,n}. U is a vector composed of the flow coefficients: {P;,Tq;} and B is a vector
+composed of computed B-coefficients: {B,,q}. The least-squares problem is solved simultaneously for poloidal and
+toroidal flow. We use B-spline basis functions as our f;(z), comprising 11 knots spaced uniformly in acoustic radius,
+for both poloidal and toroidal coefficients. Hence, for M modes (total number of k& for a given q is M) and 11 basis
+functions for each poloidal and toroidal, the dimensions of K, U and B are thus M x 22, 22x 1, and M x 1 respectively.
+Normalizing both sides of eq 10 by the noise covariance A (a diagonal matrix with the entries Gz,_; see eq 9; dimension
+M x M) and pre-multiplying by KT,
+
+(KTA~'K)U =(KTA“!)B, (11)
+U =(KTA7!K)~'KTA“!B. (12)
+
+
 
 
 6 Mani et al.
@@ -1138,6 +1356,73 @@ the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. S
 
 the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσk,q = G−σ−k,−q.
 
+12 MANI ET AL.
+Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006)
+Ex = En (2) = ikHyx(2)ez + 2Vnx(2), (A4)
+
+where H and V are real-valued functions; n and n’ are dropped for compactness of notation. Then the coupling of
+two modes €, and €,, (k’ = k + q), by the flow perturbation operator 6£%, denoted by coupling integral AR,(c), is
+given by
+
+a al
+AR (co) = [x (LG &x) «ke = [x - 2iwp >~ {@ fj Poy (kok Hy Ag + Vi Vp)
+J
+
+a al
+— [kg fP%y +h: (axez) fiT%] (bk Hy His + Veve)} (A5)
+
+ 
+
+We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and
+toroidal sensitivity kernels, Cg;,~ and Dgj,~ respectively. Hence, they are given by
+
+Caik = / dzp l@ fi (kk HLS +ViVe)
+
+—k-q fi (le- ke’ Hy Hy, + VaVi)|
+
+Daj.w =k (qxez) [eon (kk Hy His + VeVi). (A6)
+Note the symmetry Cqj,4 = C_—qj,-k and Dgj,x = D—qj,-k- This coupling integral contributes to the cross-spectral
+
+measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on
+wavefield cross-correlation as
+
+(O8* Opig) = AiwoAe (2), (A7)
+where the function H is given by
+fog = —2iw( NelREI? REM? + Ne |GET? RE) (A8)
+
+We absorb the factor —2iw into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4.
+The mode spectral profile R is a Lorentzian, given by
+
+1
+
+_ twnk /2 , (A9)
+
+a =
+k w?, — Ww
+where wpx is the resonant frequency of the mode, and 7ynzx is the mode linewidth. Eq AY can be derived by introducing
+mode damping —iwyp as an operator in the differential equation that governs undamped, driven oscillations (see eq
+5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation.
+
+Also, the parity H%,, = Hy, and RZ = R,** are established. Mode normalization N is given by
+
+1 & & lee?
+N, = — eo A10
+k QX Re (A10)
+
+Ww
+
+Q
+where the o >> on the right-hand-side implies average over all [k,z,k,] (Q terms in all) such that k = |k| is constant.
+
+This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over w is within five linewidths of wy x.
+Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real.
+
+The three equations A8 through A10, along with the symmetry relation for kernels, and summation over +w, serve
+to establish the parity Bg = Bry ¢ This allows for obtaining Py = Pry, and subsequently, purely real flow in
+the real domain. Setting o = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into
+the noise model obtained in H21 and summing over tw establishes the symmetry GZ 4 = Gh ¢
+
+
 
 
 Imaging near-surface flows using mode-coupling analysis 13
@@ -1239,6 +1524,65 @@ As an aside, we note that averaging kernels can similarly be constructed for RLS
 
 and B14.
 
+IMAGING NEAR-SURFACE FLOWS USING MODE-COUPLING ANALYSIS 13
+
+B. SOLA INVERSIONS
+
+Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors
+for the mode q and depth z,, which we will call az,z.. A linear weighted sum of the measurements Bx,q in the fashion
+>) ak,z0Br,q allows for an average value of the flow P,(z) to be estimated at the depth z,. To obtain the coefficients
+k
+
+Qk,zo, it is assumed that a set of sensitivity kernels Ky,,q(z) for the mode q can be summed up coherently to give an
+’averaging kernel’ that is localized at the depth z,. Conventionally, a Gaussian centered at z, and a width A is chosen
+which the averaging kernel should resemble after performing inversion.
+
+B.1. Kernels in the integral form
+
+Since the kernels in eq A6 are manifest as coefficients on a basis f;(z), we first derive kernels that can be expressed
+as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions:
+P = Py(z), p = Pay, F = f(z), B = Beg C = Cajx and K = Kx q(z), we write (assume only poloidal flow for
+simplicity, the same derivations hold true for toroidal flow as well)
+
+P=Fp (B11)
+
+The size of P is thus the same as the length of the radial grid z.
+Now, pre-multiply by F7 and integrate over z on both sides (drop the integral notation for compactness),
+
+F’P=(F'F)p
+p=(F'F)'F'P (B12)
+Now, substituting eq B12 into the forward problem eq 6,
+B=Cp
+=(F'F)'F'CP
+=KP (B13)
+where
+K =(F'F)"'F'C,
+-1
+ie, Keg(2)=>.| / dz fi(2)Fir(2)|  Sir(2)Casre (B14)
+
+i5'
+
+B.2. Obtaining the coefficients a
+
+Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z,
+
+ 
+
+ 
+
+1 Z— Zo
+T (2,20) = Tani exp( A? ). (B15)
+This can be achieved by solving the optimization problem
+2
+minimize ¥ = je [7 (2,20) = Oq(2,0)| ; (B16)
+where we introduce the averaging kernel for mode q thus
+Oq(z, 20) = > On,2oKR,g(2): (B17)
+k
+
+As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13
+and B14.
+
+
 
 
 14 Mani et al.
@@ -1357,6 +1701,100 @@ http://doi.org/10.1051/0004-6361/201937331
 http://doi.org/10.1086/324323
 http://doi.org/10.1103/RevModPhys.74.1073
 http://doi.org/10.1007/s41116-020-00028-3
+14 MANI ET AL.
+
+x10-1 x10-’
+
+6.0 4 — Avg. kernel
+sees Target
+
+ 
+
+ 
+
+ 
+    
+
+qk. =[-112, -45]
+o =[-853,-157]
+
+      
+
+457 z= —-0.48 Mm
+
+—— oe
+
+ 
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+ 
+
+yO hh 3
+Z N 3.07
+- o
+1.5 4
+0.0 fasmererrritivrstsereree?
+—4 —2 0 —5 -4 3 2 -1 0
+2, Mm z, Mm
+
+Figure 8. Left: Kernel Kx,q(z) (eq B14) shown vs depth z for the three radial order couplings ff, pi-p1, and p2-p2e. qRo =
+[—112, —45] and kRo = [-—853,—157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel
+(eq B17) using SOLA, for qRo = [—112, —45] at depth zo = —0.48 Mm, and the corresponding target Gaussian (eq B15).
+Integral of the averaging kernel over z is 0.89.
+
+Setting ox — 0 gives us the matrix problem to be solved
+A{a} =v,
+-1
+{a} = |4 + y| v, (B18)
+
+where the square matrix A = [dz Keq(z) Kp g(z) and vu = [ dz Ke.q(z)T(z, 20). Here, k’ is just a dummy index for
+denoting elements in the matrix A, (k’ 4 k+q). In the last line of eq B18, we introduce regularization using an Identity
+matrix I, with the regularization parameter jz - purpose being the same as that described in section 3.1. Obtaining
+a thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute a
+
+obtained from eq B18 into last line of eq B13, and S> on both sides
+k
+
+So on,2. Bq = So oR,2. ju Kxq(2) PG (2);
+k k
+= f a2 0q(2.20)P§(2).
+~ (Pq (Zo) (B19)
+
+Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di-
+vergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10.
+
+REFERENCES
+Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. Boning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., &
+1990, ApJ, 364, 699, doi: 10.1086/169452 Schou, J. 2020, A&A, 635, A181,
+Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of doi: 10.1051/0004-6361/201937331
+
+Modern Physics, 64, 885,
+doi: 10.1103/RevModPhys.64.885
+
+doi: 10.1086 /324323
+Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, hei . '
+Science Advances, 2, €1600557, Christensen-Dalsgaard, J. 2002, Reviews of Modern
+
+doi: 10.1126/sciadv.1600557 Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073
+
+Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189,
+
+Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, —. 2021, Living Reviews in Solar Physics, 18, 2,
+A&A, 628, A37, doi: 10.1051/0004-6361/201935591 doi: 10.1007/s41116-020-00028-3
+
+
 
 
 Imaging near-surface flows using mode-coupling analysis 15
diff --git a/read/results/tika/GeoTopo-book.txt b/read/results/tika/GeoTopo-book.txt
index ba3c3e7..f1a29e5 100644
--- a/read/results/tika/GeoTopo-book.txt
+++ b/read/results/tika/GeoTopo-book.txt
@@ -1,8339 +1 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Geometrie und Topologie
-
-
-Einführung in die
-
-Geometrie und Topologie
-
-0. Auflage, 31. Dezember 2016 Martin Thoma
-
-
-
-Vorwort
-
-Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet
-die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen
-und Tutorien.
-
-Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (Schwarz-
-Weiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de).
-
-Danksagungen
-
-An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen
-gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich
-basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze
-Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre
-Inhalte in diesem Skript einbauen zu dürfen!
-
-Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsauf-
-gaben und Lösungen zu benutzen.
-
-Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen
-Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per
-E-Mail und nach dem Tutorium beantwortet. Danke!
-
-Was ist Topologie?
-
-Die Kugeloberfläche S2 lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche
-oder der Oberfläche einer Pyramide verformen, aber nicht zum R2 oder zu einem Torus T 2. Für
-den R2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein
-Loch machen.
-
-Erforderliche Vorkenntnisse
-
-Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪,∩, \, ∅,R,P(M))
-und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Wider-
-spruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag,
-Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem
-in „Analysis I“ vermittelt.
-
-Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit,
-der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare
-Algebra II“ wird der Begriff der Orthonormalbasis eingeführt.
-
-http://martin-thoma.com/geotopo/
-
-
-iii
-
-(a) S2 (b) Würfel (c) Pyramide
-
-y
-
-x
-
-(d) R2 (e) T 2
-
-Abbildung 0.1: Beispiele für verschiedene Formen
-
-Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und
-Zahlentheorie“ gehört zu haben.
-
-
-
-Inhaltsverzeichnis
-
-1 Topologische Grundbegriffe 2
-1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2
-1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6
-1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
-1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
-1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14
-1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
-Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22
-
-2 Mannigfaltigkeiten und Simplizialkomplexe 24
-2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24
-2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29
-2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
-Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43
-
-3 Fundamentalgruppe und Überlagerungen 44
-3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44
-3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47
-3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51
-3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61
-
-4 Euklidische und nichteuklidische Geometrie 64
-4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64
-4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74
-
-4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74
-4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77
-Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86
-
-5 Krümmung 87
-5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87
-5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89
-5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91
-5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94
-
-Lösungen der Übungsaufgaben 99
-
-Bildquellen 105
-
-Abkürzungsverzeichnis 106
-
-Ergänzende Definitionen und Sätze 107
-
-Symbolverzeichnis 108
-
-
-
-2 Inhaltsverzeichnis
-
-Stichwortverzeichnis 111
-
-
-
-1 Topologische Grundbegriffe
-
-1.1 Topologische Räume
-
-Definition 1
-Ein topologischer Raum ist ein Paar (X,T) bestehend aus einer Menge X und T ⊆ P(X)
-mit folgenden Eigenschaften
-
-(i) ∅, X ∈ T
-
-(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T
-
-(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist
-⋃
-i∈I
-
-Ui ∈ T
-
-Die Elemente von T heißen offene Teilmengen von X.
-
-A ⊆ X heißt abgeschlossen, wenn X \A offen ist.
-
-Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es
-Mengen, die sowohl abgeschlossen als auch offen sind.
-
-Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.)
-Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }.
-Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem XC = X \X = ∅ ∈ T und
-X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. �
-
-Beispiel 1 (Topologien)
-1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie TEuklid:
-
-U ⊆ Rn offen⇔ für jedes x ∈ U gibt es r > 0,
-sodass Br(x) = { y ∈ Rn | d(x, y) < r } ⊆ U
-
-Diese Topologie wird auch „Standardtopologie des Rn“ genannt. Sie beinhaltet unter
-anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli-
-chem Mittelpunkt (vgl. Definition 1.ii).
-
-2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum.
-
-3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie.
-
-4) X := R,TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie
-Beobachtungen:
-
-• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 }
-• Es gibt keine disjunkten offenen Mengen in TZ .
-
-
-
-4 1.1. TOPOLOGISCHE RÄUME
-
-5) X := Rn,TZ = {U ⊆ Rn|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass
-Rn \ U = V (f1, . . . , fr)}
-
-6) X := { 0, 1 } ,T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum.
-∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen.
-
-Definition 2
-Sei (X,T) ein topologischer Raum und x ∈ X.
-
-Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und
-U0 ⊆ U .
-
-Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt.
-
-Definition 3
-Sei (X,T) ein topologischer Raum und M ⊆ X eine Teilmenge.
-
-a) M◦ := { x ∈M |M ist Umgebung von x } =
-⋃
-U⊆M
-U∈T
-
-U heißt Inneres oder offener
-
-Kern von M .
-
-b) M :=
-⋂
-M⊆A
-
-A abgeschlossen
-
-A heißt abgeschlossene Hülle oder Abschluss von M .
-
-c) ∂M := M \M◦ heißt Rand von M .
-
-d) M heißt dicht in X, wenn M = X ist.
-
-Beispiel 2
-1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅
-
-2) Sei X = R und M = (a, b). Dann gilt: M = [a, b]
-
-3) Sei X = R,T = TZ und M = (a, b). Dann gilt: M = R
-
-Definition 4
-Sei (X,T) ein topologischer Raum.
-
-a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen
-aus B ist.
-
-b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen
-Durchschnitten von Elementen aus S ist.
-
-Beispiel 3 (Basis und Subbasis)
-1) Jede Basis ist auch eine Subbasis, z.B.
-
-S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als
-auch Subbasis.
-
-2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist
-
-B = {Br(x) | r ∈ Q>0, x ∈ Qn }
-
-ist eine abzählbare Basis von T.
-
-3) Sei (X,T) ein topologischer RaummitX = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }.
-Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt:
-
-
-
-5 1.1. TOPOLOGISCHE RÄUME
-
-• S ⊆ T
-
-• ∅, { 0, 1 } und { 0, 2 } ∈ S
-• { 0 } = { 0, 1 } ∩ { 0, 2 }
-• X = { 0, 1 } ∪ { 0, 2 }
-
-Allerings ist S keine Basis von (X,T), da { 0 } nicht als Vereinigung von Elementen
-aus S erzeugt werden kann.
-
-Bemerkung 2
-Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S
-Subbasis ist.
-
-Definition 5
-Sei (X,T) ein topologischer Raum und Y ⊆ X.
-TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y .
-
-TY heißt Teilraumtopologie und (Y,TY ) heißt ein Teilraum von (X,T).
-
-Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt.
-
-Definition 6
-Seien X1, X2 topologische Räume.
-U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit
-i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt.
-
-T = { U ⊆ X1 ×X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie.
-B = { U1 × U2 | Ui offen in Xi, i = 1, 2 } ist eine Basis von T.
-
-U
-
-xx2
-
-x1
-
-U2
-
-U1
-
-X1
-
-X2
-
-Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U
-
-Beispiel 4 (Produkttopologien)
-1) X1 = X2 = R mit euklidischer Topologie.
-⇒ Die Produkttopologie auf R× R = R2 stimmt mit der euklidischen Topologie auf
-R2 überein.
-
-2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R2: U1 × U2
-
-(Siehe Abbildung 1.2)
-
-
-
-6 1.1. TOPOLOGISCHE RÄUME
-
-U1 = R \ N
-
-U
-2
-
-=
-R
-\
-N
-
-Abbildung 1.2: Zariski-Topologie auf R2
-
-Definition 7
-Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge
-der Äquivalenzklassen, π : X → X, x 7→ [x]∼.
-
-TX :=
-{
-U ⊆ X
-
-∣∣ π−1(U) ∈ TX
-}
-
-(X,TX) heißt Quotiententopologie.
-
-Beispiel 5
-X = R, a ∼ b :⇔ a− b ∈ Z
-
-R-1 0 1 2 3 4 5
-
-0
-
-a
-
-U
-
-aπ−1(u)
-
-0 ∼ 1, d. h. [0] = [1]
-
-Beispiel 6
-Sei X = R2 und (x1, y1) ∼ (x2, y2)⇔ x1− x2 ∈ Z und y1− y2 ∈ Z. Dann ist X/∼ ein Torus.
-
-Beispiel 7 (Projektiver Raum)
-
-X = Rn+1 \ { 0 } , x ∼ y ⇔ ∃λ ∈ R× mit y = λx
-
-⇔ x und y liegen auf der gleichen
-Ursprungsgerade
-
-X = Pn(R)
-
-
-
-7 1.2. METRISCHE RÄUME
-
-Also für n = 1:
-
-−4 −2 2 4 6 8
-
-−4
-
-−2
-
-2
-
-4
-
-1.2 Metrische Räume
-
-Definition 8
-Sei X eine Menge. Eine Abbildung d : X ×X → R+
-
-0 heißt Metrik, wenn gilt:
-
-(i) Definitheit: d(x, y) = 0⇔ x = y ∀x, y ∈ X
-(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X
-(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X
-Das Paar (X, d) heißt ein metrischer Raum.
-
-Bemerkung 3
-Sei (X, d) ein metrischer Raum und
-
-Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R+
-
-B = {Br(x) ⊆ P(X) | x ∈ X, r ∈ R+ } ist Basis einer Topologie auf X.
-
-Definition 9
-Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit
-
-∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2))
-
-Dann heißt ϕ eine Isometrie von X nach Y .
-
-Beispiel 8 (Skalarprodukt erzeugt Metrik)
-Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt 〈·, ·〉. Dann wird V
-durch d(x, y) :=
-
-√
-〈x− y, x− y〉 zum metrischen Raum.
-
-Beispiel 9 (diskrete Metrik)
-Sei X eine Menge. Dann heißt
-
-d(x, y) =
-
-{
-0 falls x = y
-
-1 falls x 6= y
-
-die diskrete Metrik. Die Metrik d induziert die diskrete Topologie.
-
-
-
-8 1.2. METRISCHE RÄUME
-
-Beispiel 10
-X = R2 und d ((x1, y1), (x2, y2)) := max(‖x1 − x2‖, ‖y1 − y2‖) ist Metrik.
-
-Beobachtung: d erzeugt die euklidische Topologie.
-
-Br(0) =
-
-r r
-
-r
-
-r
-
-(a) Br(0) (b) Euklidische Topologie
-
-Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10
-
-
-
-9 1.2. METRISCHE RÄUME
-
-Beispiel 11 (SNCF-Metrik1)
-X = R2
-
-−4 −2 2 4 6 8
-
-−4
-
-−2
-
-2
-
-4
-
-Definition 10
-Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x 6= y in X
-Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅.
-
-Bemerkung 4 (Trennungseigenschaft)
-Metrische Räume sind hausdorffsch, wegen
-
-d(x, y) > 0⇒ ∃ε > 0 : Bε(x) ∩Bε(y) = ∅
-
-Beispiel 12 (Topologische Räume und Hausdorff-Räume)
-1) (R,TZ) ist ein topologischer Raum, der nicht hausdorffsch ist.
-
-2) (R,TEuklid) ist ein topologischer Hausdorff-Raum.
-
-Bemerkung 5 (Eigenschaften von Hausdorff-Räumen)
-Seien X,X1, X2 Hausdorff-Räume.
-
-a) Jeder Teilraum von X ist hausdorffsch.
-
-b) X1 ×X2 ist hausdorffsch (vgl. Abbildung 1.4).
-
-Definition 11
-Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder
-Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle
-n ≥ n0.
-
-Bemerkung 6
-Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert.
-
-Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge.
-
-Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls
-x 6= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0
-
-⇒ x = y �
-
-1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt.
-
-https://de.wikipedia.org/wiki/Franz%C3%B6sische_Eisenbahnmetrik
-
-
-10 1.3. STETIGKEIT
-
-(x1, y1) (x2, y2)
-
-x1 x2
-U1 ×X2 U2 ×X2
-
-X1
-
-X2
-
-Abbildung 1.4: Wenn X1, X2 hausdorffsch sind, dann auch X1 ×X2
-
-1.3 Stetigkeit
-
-Definition 12
-Seien (X,TX), (Y,TY ) topologische Räume und f : X → Y eine Abbildung.
-
-a) f heißt stetig :⇔ ∀U ∈ TY : f−1(U) ∈ TX .
-
-b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g :
-Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY .
-
-Bemerkung 72
-
-Seien X,Y metrische Räume und f : X → Y eine Abbildung.
-
-Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für
-alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε.
-
-Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)).
-Dann ist U offen in Y .
-Def. 12.a
-=====⇒ f−1(U) ist offen in X. Dann ist x ∈ f−1(U).
-⇒ ∃δ > 0, sodass Bδ(x) ⊆ f−1(U)
-⇒ f(Bδ(x)) ⊆ U
-⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh.
-
-„⇐“: Sei U ⊆ Y offen, X ∈ f−1(U).
-Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U
-Vor.
-==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x)))
-⇒ Bδ(x) ⊆ f−1(Bε(f(x))) ⊆ f−1(U) �
-
-Bemerkung 8
-Seien X,Y topologische Räume und f : X → Y eine Abbildung. Dann gilt:
-
-f ist stetig
-⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f−1(A) ⊆ X ist abgeschlossen.
-
-Beispiel 13 (Stetige Abbildungen und Homöomorphismen)
-1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus.
-
-2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt.
-
-
-
-11 1.3. STETIGKEIT
-
-2) Ist (Y,TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung
-f : X → Y stetig.
-
-3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen
-Raum Y und jede Abbildung f .
-
-4) Sei X = [0, 1), Y = S1 = { z ∈ C | ‖z‖ = 1 } und f(t) = e2πit.
-
-R0 1
-0
-
-f
-
-g
-
-Abbildung 1.5: Beispiel einer stetigen Funktion f , deren Umkehrabbildung g nicht stetig ist.
-
-Die Umkehrabbildung g ist nicht stetig, da g−1(U) nicht offen ist (vgl. Abbildung 1.5).
-
-Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig)
-Seien X,Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen.
-
-Dann ist g ◦ f : X → Z stetig.
-
-X
-f //
-
-g◦f   
-
-Y
-
-g��
-Z
-
-Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig
-ist, f−1(g−1(U)) ist offen in X, weil f stetig ist. �
-
-Bemerkung 10
-a) Für jeden topologischen Raum X ist
-
-Homöo(X) := { f : X → X | f ist Homöomorphismus }
-
-eine Gruppe.
-
-b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus.
-
-c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für
-jeden metrischen Raum X.
-
-Bemerkung 11 (Projektionen sind stetig)
-Seien X,Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen
-
-πX : (x, y) 7→ x und πY : (x, y) 7→ y
-
-Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig.
-
-Beweis: Sei U ⊆ X offen
-⇒ π−1
-
-X (U) = U × Y ist offen in X × Y . �
-
-Bemerkung 12
-Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum
-versehen mit der Quotiententopologie, π : X → X, x 7→ [x]∼.
-
-Dann ist π stetig.
-
-
-
-12 1.4. ZUSAMMENHANG
-
-Beweis: Nach Definition ist U ⊆ X offen ⇔ π−1(U) ⊆ X offen. �
-
-Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird.
-
-Beispiel 14 (Stereographische Projektion)
-Rn und Sn \ {N } sind homöomorph für beliebiges N ∈ Sn. Es gilt:
-
-Sn =
-{
-x ∈ Rn+1
-
-∣∣ ‖x‖ = 1
-}
-
-=
-
-{
-x ∈ Rn+1
-
-∣∣∣∣∣
-n+1∑
-i=1
-
-x2
-i = 1
-
-}
-
-O. B. d. A. sei N =
-
-
-0
-...
-0
-1
-
-. Die Gerade durch N und P schneidet die Ebene H in genau
-
-einem Punkt P̂ . P wird auf P̂ abgebildet.
-
-f :Sn \ {N } → Rn
-
-P 7→
-genau ein Punkt︷ ︸︸ ︷
-LP ∩H
-
-wobei Rn = H =
-
-
- x1
-
-...
-xn+1
-
- ∈ Rn+1
-
-∣∣∣∣∣∣∣ xn+1 = 0
-
- und LP die Gerade in Rn+1 durch N
-
-und P ist.
-
-Sei P =
-
- x1
-...
-
-xn+1
-
-, so ist xn+1 < 1, also ist LP nicht parallel zu H. Also schneiden sich LP
-
-und H in genau einem Punkt P̂ .
-
-Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig.
-
-1.4 Zusammenhang
-
-Definition 13
-
-a) Ein RaumX heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen
-U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X.
-
-b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit
-der Teilraumtopologie zusammenhängend ist.
-
-
-
-13 1.4. ZUSAMMENHANG
-
-x
-
-y
-
-z
-
-N
-
-P̂
-
-0
-
-P
-
-Abbildung 1.6: Visualisierung der stereographischen Projektion
-
-Bemerkung 13
-X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2
-
-mit A1 ∩A2 = ∅ und A1 ∪A2 = X.
-
-Beispiel 15 (Zusammenhang von Räumen)
-1) (Rn,TEuklid) ist zusammenhängend, denn:
-
-Annahme: Rn = U1 ∪̇ U2 mit ∅ 6= U1, U2 ∈ TEuklid existieren.
-
-Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun
-betrachten wir V ( Rn als (metrischen) Teilraum mit der Teilraumtopologie TV .
-Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie.
-
-Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder
-Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen.
-
-2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0
-
-3) R2 \ { 0 } ist zusammenhängend.
-
-4) Q ( R ist nicht zusammenhängend, da (Q ∩ R<√2) ∪ (Q ∩ R>√2) = Q
-
-5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist.
-
-6) R mit Zariski-Topologie ist zusammenhängend.
-
-Bemerkung 14
-Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen-
-hängend.
-
-
-
-14 1.4. ZUSAMMENHANG
-
-Beweis: durch Widerspruch
-Annahme: A = A1 ∪A2, Ai abgeschlossen, Ai 6= ∅, A1 ∩A2 = ∅
-
-⇒ A = (A ∩A1)︸ ︷︷ ︸
-abgeschlossen
-
-∪̇ (A ∩A2)︸ ︷︷ ︸
-abgeschlossen︸ ︷︷ ︸
-
-disjunkt
-
-Wäre A ∩A1 = ∅
-⇒ A ⊆ A = A1 ∪̇A2
-
-⇒ A ⊆ A2 ⇒ A ⊆ A2
-
-⇒ A1 = ∅
-⇒ Widerspruch zu A1 6= ∅
-⇒ A ∩A1 6= ∅ und analog A ∩A2 6= ∅
-⇒ Widerspruch zu A ist zusammenhängend. �
-
-Bemerkung 15
-Sei X ein topologischer Raum und A,B ⊆ X zusammenhängend.
-
-Ist A ∩B 6= ∅, dann ist A ∪B zusammenhängend.
-
-Beweis: Sei A ∪B = U1 ∪̇ U2, Ui 6= ∅ offen
-o. B. d. A.
-======⇒ A = (A ∩ U1) ∪̇ (A ∩ U2) offen
-A zhgd.
-====⇒ A ∩ U1 = ∅
-A∩B 6=∅
-====⇒ U1 ⊆ B
-B = (B ∩ U1)︸ ︷︷ ︸
-
-=U1
-
-∪ (B ∩ U2)︸ ︷︷ ︸
-=∅
-
-ist unerlaubte Zerlegung.
-
-�
-
-Definition 14
-Sei X ein topologischer Raum.
-
-Für x ∈ X sei Z(x) ⊆ X definiert durch
-
-Z(x) :=
-⋃
-
-A⊆Xzhgd.
-x∈A
-
-A
-
-Z(x) heißt Zusammenhangskomponente.
-
-Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten)
-Sei X ein topologischer Raum. Dann gilt:
-
-a) Z(x) ist die größte zusammenhängende Teilmenge von X, die x enthält.
-
-b) Z(x) ist abgeschlossen.
-
-c) X ist disjunkte Vereinigung von Zusammenhangskomponenten.
-
-Beweis:
-
-
-
-15 1.5. KOMPAKTHEIT
-
-a) Sei Z(x) = A1 ∪̇A2 mit Ai 6= ∅ abgeschlossen.
-O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A,
-die auch x enthält. ⇒ A = (A ∩A1)︸ ︷︷ ︸
-
-3x
-
-∪ (A ∩A2)︸ ︷︷ ︸
-3y
-
-ist unerlaubte Zerlegung.
-
-b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x)
-
-c) Ist Z(y) ∩ Z(x) 6= ∅ Bem. 15
-=====⇒ Z(y) ∪ Z(x) ist zusammenhängend.
-
-⇒ Z(x) ∪ Z(y) ⊆ Z(x)⇒ Z(y) ⊆ Z(x)
-
-⊆ Z(y)⇒ Z(x) ⊆ Z(y)
-
-�
-
-Bemerkung 17
-Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend.
-
-Beweis: Sei f(A) = U1 ∪ U2, Ui 6= ∅, offen, disjunkt.
-⇒ f−1(f(A)) = f−1(U1) ∪ f−1(U2)
-
-⇒ A = (A ∩ f−1(U1))︸ ︷︷ ︸
-6=∅
-
-∪ (A ∩ f−1(U2))︸ ︷︷ ︸
-6=∅
-
-�
-
-1.5 Kompaktheit
-
-Definition 15
-Sei X eine Menge und U ⊆ P(X).
-
-U heißt eine Überdeckung von X, wenn gilt:
-
-∀x ∈ X : ∃M ∈ U : x ∈M
-Definition 16
-
-Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X
-
-U = { Ui }i∈I mit Ui offen in X
-
-eine endliche Teilüberdeckung ⋃
-i∈J⊆I
-
-Ui = X mit |J | ∈ N
-
-besitzt.
-
-Bemerkung 18
-Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie.
-
-Beweis: Sei (Ui)i∈J eine offene Überdeckung von I.
-
-Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in
-einem der Ui enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle
-
-
-
-16 1.5. KOMPAKTHEIT
-
-der Länge δ unterteilen und alle Ui in die endliche Überdeckung aufnehmen, die Teilintervalle
-enthalten.
-
-Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1]
-der Länge 1/n sodass In ( Ui für alle i ∈ J .
-Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann
-gibt es i ∈ J mit x ∈ Ui. Da Ui offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui.
-Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x− xn| < ε/2, also
-In ⊆ (x− ε, x+ ε) ⊆ Ui für mindestens ein n ∈ N.4
-
-⇒ Widerspruch
-
-Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in
-Uij enthalten.
-
-⇒ Uj1 , . . . , Ujd ist endliche Teilüberdeckung von U . �
-
-Beispiel 16 (Kompakte Räume)
-1) R ist nicht kompakt.
-
-2) (0, 1) ist nicht kompakt.
-Un = (1/n, 1− 1/n)⇒ ⋃
-
-n∈N Un = (0, 1)
-
-3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch.
-
-Bemerkung 19
-Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt.
-
-Beweis: Sei (Vi)i∈I offene Überdeckung von A.
-Dann gibt es für jedes i ∈ I eine offene Teilmenge Ui ⊆ X mit Vi = Ui ∩A.
-
-⇒ A ⊆
-⋃
-i∈I
-
-Ui
-
-⇒ U = { Ui | i ∈ I } ∪ {X \A } ist offene Überdeckung von X
-
-X kompakt
-=======⇒ es gibt i1, . . . , in ∈ I, sodass
-
-n⋃
-j=1
-
-Uij ∪ (X \A) = X
-
-⇒
-
- n⋃
-j=1
-
-Uij ∪ (X \A)
-
- ∩A = A
-
-⇒
-n⋃
-j=1
-
-(Uij ∩A)︸ ︷︷ ︸
-=Vij
-
-∪ ((X \A) ∩A)︸ ︷︷ ︸
-=∅
-
-= A
-
-⇒ Vi1 , . . . , Vin überdecken A.
-
-�
-
-Bemerkung 20
-Seien X,Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie
-kompakt.
-
-Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es
-offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆Wi.
-
-3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert.
-4Sogar für unendlich viele.
-
-
-
-17 1.5. KOMPAKTHEIT
-
-Wi
-
-xy
-
-x
-
-Vx,y
-
-Ux,y
-
-Y
-
-X
-
-Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen
-
-Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y
-kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit
-
-⋃m(x0)
-i=1 Ux0,yi ×
-
-Vx0,yi ⊇ { x0 } × Y .
-
-Sei Ux0 :=
-⋂m(x)
-i=1 Ux0,yi . Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit
-
-⋃n
-j=1 Uxj = X
-
-⇒ ⋃k
-j=1
-
-⋃m(xj)
-i=1
-
-(
-Uxj ,yi × Vxj ,yi
-
-)︸ ︷︷ ︸
-Ein grün-oranges Kästchen
-
-⊇ X × Y
-
-⇒ ⋃
-j
-
-⋃
-iWi(xj , yi) = X × Y �
-
-Bemerkung 21
-Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen.
-
-Beweis: z. Z.: Komplement ist offen
-
-Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \K. Für jedes x ∈ K seien
-Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅.
-
-Xi
-
-K
-
-x
-
-y
-
-Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass
-⋃m
-i=1 Uxi ⊇ K.
-
-Sei V :=
-
-n⋂
-i=1
-
-Vxi
-
-
-
-18 1.6. WEGE UND KNOTEN
-
-⇒ V ∩
-(
-
-n⋃
-i=1
-
-Uxi
-
-)
-= ∅
-
-⇒ V ∩K = ∅
-⇒ V ist Überdeckung von y, die ganz in X \K enthalten ist.
-⇒ X \K ist offen
-
-Damit ist K abgeschlossen. �
-
-Bemerkung 22
-Seien X,Y topologische Räume, f : X → Y stetig.
-Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt.
-
-Beweis: Sei (Vi)i∈I offene Überdeckung von f(K)
-f stetig
-====⇒ (f−1(Vi))i∈I ist offene Überdeckung von K
-Kompakt
-=====⇒ es gibt i1, . . . , in, sodass f−1(Vi1), . . . , f−1(Vin) Überdeckung von K ist.
-⇒ f(f−1(Vi1)), . . . , f(f−1(Vin)) überdecken f(K).
-
-Es gilt: f(f−1(V )) = V ∩ f(X) �
-
-Satz 1.1 (Heine-Borel)
-Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und
-abgeschlossen ist.
-
-Beweis: „⇒“: Sei K ⊆ Rn (oder Cn) kompakt.
-
-Da Rn und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset-
-zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist
-beschränkt.
-
-„⇐“ Sei A ⊆ Rn (oder Cn) beschränkt und abgeschlossen.
-
-Dann gibt es einen Würfel W = [−N,N ]× · · · × [−N,N ]︸ ︷︷ ︸
-n mal
-
-mit A ⊆W bzw. „Polyzylinder“
-
-Z = { (z1, . . . , zn) ∈ Cn | zi ≤ N für i = 1, . . . , n }
-
-Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch
-kompakt. Genauso ist Z kompakt, weil
-
-{ z ∈ C ‖ z| ≤ 1 }
-
-homöomorph zu {
-(x, y) ∈ R2
-
-∣∣ ‖(x, y)‖ ≤ 1
-}
-
-ist. �
-
-1.6 Wege und Knoten
-
-Definition 17
-Sei X ein topologischer Raum.
-
-
-
-19 1.6. WEGE UND KNOTEN
-
-a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1]→ X.
-
-b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt.
-
-c) γ heißt einfach, wenn γ|[0,1) injektiv ist.
-
-Beispiel 17
-Ist X diskret, so ist jeder Weg konstant, d. h. von der Form
-
-∀x ∈ [0, 1] : γ(x) = c, c ∈ X
-
-Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ.
-
-Definition 18
-Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten
-x, y ∈ X einen Weg γ : [0, 1]→ X gibt mit γ(0) = x und γ(1) = y.
-
-Bemerkung 23
-Sei X ein topologischer Raum.
-
-a) X ist wegzusammenhängend ⇒ X ist zusammenhängend
-
-b) X ist wegzusammenhängend 6⇐ X ist zusammenhängend
-
-Beweis:
-
-a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte,
-abgeschlossene Teilmengen von X mit A1 ∪A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1]→ X
-ein Weg von x nach y.
-
-Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist.
-
-C = (C ∩A1)︸ ︷︷ ︸
-3x
-
-∪ (C ∩A2)︸ ︷︷ ︸
-3y
-
-ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch
-
-b) Sei X =
-{
-
-(x, y) ∈ R2
-∣∣∣ x2 + y2 = 1 ∨ y = 1 + 2 · e− 1
-
-10
-x
-}
-.
-
-Abbildung 1.8a veranschaulicht diesen Raum.
-
-Sei U1 ∪ U2 = X,U1 6= U2 = ∅, Ui offen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2,
-weil C und S zusammenhängend sind.
-
-Also ist C = U1 und S = U2 (oder umgekehrt).
-
-Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist.
-
-Aber: Bε(y) ∩ S 6= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht
-wegzusammenhängend. �
-
-Beispiel 18 (Hilbert-Kurve)
-Es gibt stetige, surjektive Abbildungen [0, 1]→ [0, 1]× [0, 1]. Ein Beispiel ist die in Abbil-
-dung 1.9 dargestellte Hilbert-Kurve.
-
-Definition 19
-Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ :
-[0, 1]→ C ⊆ X bzw. γ : S1 → C ⊆ X, wobei C := Bild γ.
-
-
-
-20 1.6. WEGE UND KNOTEN
-
-(a) Spirale S mit Kreis C
-
-0.1 1
-
-−1
-
-0
-
-1
-
-X
-
-Y
-
-{(x, sin( 1
-x)) ∈ X × Y }
-
-(−1, 1) ⊆ Y
-
-(b) Sinus
-
-Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend
-sind.
-
-(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5
-
-Abbildung 1.9: Hilbert-Kurve
-
-Jede Jordankurve ist also ein einfacher Weg.
-
-Satz 1.2 (Jordanscher Kurvensatz)
-Ist C = γ([0, 1]) eine geschlossene Jordankurve in R2, so hat R2 \ C genau zwei
-Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt.
-
-außen
-innen
-
-Jordankurve
-
-Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die be-
-schränkte äußeres genannt.
-
-Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie:
-Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265)
-nachgelesen werden.
-
-Idee: Ersetze Weg C durch Polygonzug.
-
-
-
-21 1.6. WEGE UND KNOTEN
-
-Definition 20
-Eine geschlossene Jordankurve in R3 heißt Knoten.
-
-Beispiel 19 (Knoten)
-
-(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 62-Knoten
-
-Abbildung 1.11: Beispiele für verschiedene Knoten
-
-Definition 21
-Zwei Knoten γ1, γ2 : S1 → R3 heißen äquivalent, wenn es eine stetige Abbildung
-
-H : S1 × [0, 1]→ R3
-
-gibt mit
-
-H(z, 0) = γ1(z) ∀z ∈ S1
-
-H(z, 1) = γ2(z) ∀z ∈ S1
-
-und für jedes feste t ∈ [0, 1] ist
-
-Hz : S1 → R3, z 7→ H(z, t)
-
-ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2.
-
-Definition 22
-Sei γ : [0, 1]→ R3 ein Knoten, E eine Ebene und π : R3 → E eine Projektion auf E.
-
-π heißt Knotendiagramm von γ, wenn gilt:∣∣π−1(x)
-∣∣ ≤ 2 ∀x ∈ π(γ)
-
-Ist (π|γ([0,1]))
-−1(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt:
-
-∃λ > 1 : (y1 − x) = λ(y2 − x)
-
-Satz 1.3 (Satz von Reidemeister)
-Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie
-durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können.
-
-
-
-22 1.6. WEGE UND KNOTEN
-
-(a) Ω1 (b) Ω2
-
-(c) Ω3
-
-Abbildung 1.12: Reidemeister-Züge
-
-Beweis: Durch sorgfältige Fallunterscheidung.5
-
-Definition 23
-Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt
-werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben
-auftreten.
-
-Abbildung 1.13: Ein 3-gefärber Kleeblattknoten
-
-5Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177.
-
-
-
-23 1.6. WEGE UND KNOTEN
-
-Übungsaufgaben
-
-Aufgabe 1 (Sierpińskiraum)
-
-Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum.
-
-(a) Beweisen Sie, dass (X,TX) ein topologischer Raum ist.
-
-(b) Ist (X,TX) hausdorffsch?
-
-(c) Ist TX von einer Metrik erzeugt?
-
-Aufgabe 2
-
-Es sei Z mit der von den Mengen Ua,b := a+ bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie
-versehen.
-
-Zeigen Sie:
-
-(a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen.
-
-(b) { −1, 1 } ist nicht offen.
-(c) Es gibt unendlich viele Primzahlen.
-
-Aufgabe 3 (Cantorsches Diskontinuum)
-
-Für jedes i ∈ N sei Pi := { 0, 1 } mit der diskreten Topologie. Weiter Sei P :=
-∏
-i∈N Pi.
-
-(a) Wie sehen die offenen Mengen von P aus?
-
-(b) Was können Sie über den Zusammenhang von P sagen?
-
-Aufgabe 4 (Kompaktheit)
-
-(a) Ist GLn(R) = {A ∈ Rn×n | det(A) 6= 0 } kompakt?
-
-(b) Ist SLn(R) = {A ∈ Rn×n | det(A) = 1 } kompakt?
-
-(c) Ist P(R) kompakt?
-
-Aufgabe 5 (Begriffe)
-
-Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“.
-
-Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist,
-begründen Sie warum.
-
-1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist,
-
-2) ein Homomorphismus, der kein Homöomorphismus ist,
-
-
-
-24 1.6. WEGE UND KNOTEN
-
-3) ein Homöomorphismus, der kein Homomorphismus ist
-
-Aufgabe 6 (Begriffe)
-
-Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“.
-
-
-
-2 Mannigfaltigkeiten und
-Simplizialkomplexe
-
-2.1 Topologische Mannigfaltigkeiten
-
-Definition 24
-Sei (X,T) ein topologischer Raum und n ∈ N.
-
-a) Eine n-dimensionale Karte auf X ist ein Paar (U,ϕ), wobei U ∈ T und ϕ : U → V
-Homöomorphismus von U auf eine offene Teilmenge V ⊆ Rn.
-
-b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui, ϕi)i∈I von Karten auf X,
-sodass
-
-⋃
-i∈I Ui = X.
-
-c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist,
-eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt.
-
-Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn ähnlich.
-
-Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten)
-Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R.
-
-Beweis: Sei (X,T) ein topologischer Raum und (U,ϕ) mit U ∈ T und ϕ : U → V ⊆ Rn, wobei
-V offen und ϕ ein Homöomorphismus ist, eine Karte auf X.
-
-Da jede offene Teilmenge des Rn genauso mächtig ist wie der Rn, ϕ als Homöomorphismus
-insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig
-sind, ist U genauso mächtig wie der Rn. Da jede Mannigfaltigkeit mindestens eine Karte
-hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn. �
-
-Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können
-beliebig viele Elemente haben.
-
-Bemerkung 25
-a) Es gibt surjektive, stetige Abbildungen [0, 1]→ [0, 1]× [0, 1]
-
-b) Für n 6= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz
-von der Gebietstreue“ (Brouwer):
-
-Ist U ⊆ Rn offen und f : U → Rn stetig und injektiv, so ist f(U) offen.
-
-Ist n < m und Rm homöomorph zu Rn, so wäre
-
-f : Rn → Rm → Rn, (x1, . . . , xn) 7→ (x1, x2, . . . , xn, 0, . . . , 0)
-
-eine stetige injektive Abbildung. Also müsste f(Rn) offen sein ⇒ Widerspruch
-
-
-
-26 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-
-Beispiel 20 (Mannigfaltigkeiten)
-1) Jede offene Teilmenge U ⊆ Rn ist eine n-dimensionale Mannigfaltigkeit mit einem
-
-Atlas aus einer Karte.
-
-2) Cn ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte:
-
-(z1, . . . , zn) 7→ (<(z1),=(z1), . . . ,<(zn),=(zn))
-
-3) Pn(R) = (Rn+1 \ { 0 })/∼ = Sn/∼ und Pn(C) sind Mannigfaltigkeiten der Dimension
-n bzw. 2n, da gilt:
-
-Sei Ui := { (x0 : · · · : xn) ∈ Pn(R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist Pn(R) =
-⋃n
-i=0 Ui
-
-und die Abbildung
-
-Ui → Rn
-
-(x0 : · · · : xn) 7→
-(
-x0
-
-xi
-, . . . ,
-
-�
-��
-xi
-xi
-, . . . ,
-
-xn
-xi
-
-)
-(y1 : · · · : yi−1 : 1 : yi : · · · : yn) 7→(y1, . . . , yn)
-
-ist bijektiv.
-
-Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas:
-
-x = (1 : 0 : 0) ∈ U0 → R2 x 7→ (0, 0)
-
-y = (0 : 1 : 1) ∈ U2 → R2 y 7→ (0, 1)
-
-Umgebung: B1(0, 1)→ { (1 : u : v) | ‖(u, v)‖ < 1 } = V1
-
-Umgebung: B1(0, 1)→
-{
-
-(w : z : 1)
-∣∣ w2 + z2 < 1
-
-}
-= V2
-
-V1 ∩ V2 = ∅?
-(a : b : c) ∈ V1 ∩ V2
-
-⇒ a 6= 0 und ( ba)2 + ( ca)2 < 1⇒ c
-a < 1
-
-⇒ c 6= 0 und (ac )2 + ( bc)
-2 < 1⇒ a
-
-c < 1
-⇒ Widerspruch
-
-4) Sn =
-{
-x ∈ Rn+1
-
-∣∣ ‖x‖ = 1
-}
-ist n-dimensionale Mannigfaltigkeit.
-
-Karten:
-Di := {(x1, . . . , xn+1) ∈ Sn|xi > 0} → B1(0, . . . , 0︸ ︷︷ ︸
-
-∈Rn
-
-)
-
-Ci := {(x1, . . . , xn+1) ∈ Sn|xi < 0} → B1(0, . . . , 0)
-(x1, . . . , xn+1) 7→ (x1, . . . ,��xi, . . . , xn+1)1
-
-(x1, . . . , xn) 7→ (x1, . . . , xi−1,
-√
-
-1−∑n
-k=1 x
-
-2
-k, xi, . . . , xn), oder −
-
-√
-1−∑n
-
-k=1 x
-2
-k für Ci
-
-Sn =
-⋃n+1
-i=1 (Ci ∪Di)
-
-Als kompakte Mannigfaltigkeit wird Sn auch „geschlossene Mannigfaltigkeit“ genannt.
-
-5) [0, 1] ist keine Mannigfaltigkeit, denn:
-Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall
-ist.
-
-1xi wird rausgenommen
-
-
-
-27 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-
-6) V1 =
-{
-
-(x, y) ∈ R2
-∣∣ x · y = 0
-
-}
-ist keine Mannigfaltigkeit.
-
-Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4
-Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen-
-hangskomponenten, wenn man einen Punkt entfernt.
-
-7) V2 =
-{
-
-(x, y) ∈ R2
-∣∣ x3 = y2
-
-}
-ist eine Mannigfaltigkeit.
-
-8) X = (R \ { 0 }) ∪ (01, 02)
-
-U ⊆ X offen ⇔
-{
-U offen in R \ { 0 } , falls 01 /∈ U, 02 ∈ U
-∃ε > 0 : (−ε, ε) ⊆ U falls 01 ∈ U, 02 ∈ U
-
-Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und homöomorph
-zu R.
-
-Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01
-
-und 02.
-
-9) GLn(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2
-
-eine Mannigfaltigkeit bilden.
-
-Definition 25
-Seien X,Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho-
-möomorphismus Z = (X ∪̇Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation
-und der von ∼ induzierten Quotiententopologie.
-
-Z heißtVerklebung vonX und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen
-Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit.
-
-Bemerkung 26
-Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit
-der Dimension n+m.
-
-Beweis: Produkte von Karten sind Karten. �
-
-Beispiel 21
-Mannigfaltigkeiten mit Dimension 1:
-
-1) Offene Intervalle, R, (0, 1) sind alle homöomorph
-
-2) S1
-
-Mannigfaltigkeiten mit Dimension 2:
-
-1) R2
-
-2) S2 (0 Henkel)
-
-3) T 2 (1 Henkel)
-
-4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1
-
-Bemerkung 27
-Sei n ∈ N, F : Rn → R stetig differenzierbar und X = V (F ) := { x ∈ Rn | F (x) = 0 } das
-„vanishing set“.
-
-Dann gilt:
-
-
-
-28 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-
-Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus.
-
-a) X ist abgeschlossen in Rn
-
-b) Ist grad(F )(X) 6= 0 ∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n− 1.
-
-Beweis:
-
-a) Sei y ∈ Rn \ V (F ). Weil F stetig ist, gibt es δ > 0, sodass F (Bδ(y)) ⊆ Bε(F (y)) mit
-ε = 1
-
-2‖F (y)‖. Folgt Bδ(y) ∩ V (F ) = ∅ ⇒ Rn \ V (F ) ist offen.
-
-b) Sei x ∈ X mit grad(F )(x) 6= 0, also o. B. d. A. ∂F
-∂X1
-
-(x) 6= 0, x = (x1, . . . , xn),
-x′ := (x2, . . . , xn) ∈ Rn−1. Der Satz von der impliziten Funktion liefert nun: Es
-gibt Umgebungen U von x′ und differenzierbare Funktionen g : U → R, sodass
-G : U → Rn, u 7→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x
-in X ist.
-
-�
-
-Beispiel 22
-
-1) F : R3 → R, (x, y, z) 7→ x2+y2+z2−1, V (F ) = S2, grad(F ) = (2x, 2y, 2z)
-Bem. 27.b
-======⇒
-
-Sn ist n-dimensionale Mannigfaltigkeit in Rn+1
-
-2) F : R2 → R, (x, y) 7→ y2−x3 Es gilt: grad(F ) = (−3x2, 2y). Also: grad(0, 0) = (0, 0).
-
-−5−4−3−2−10
-1
-
-2
-3
-
-4
-5
-
-−4
-
-−2
-
-0
-
-2
-
-4
-
-−100
-
-0
-
-100
-
-x
-
-y
-
-z
-
-−100
-
-0
-
-100
-
-f(x, y)
-
-(a) F (x, y) = y2 − x3
-
-2 4 6 8 10 12
-
-−10
-
-−5
-
-5
-
-10
-
-x
-
-y
-
-a = 1
-3
-
-a = 1
-a = 2
-
-(b) y2 − ax3 = 0
-
-Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a.
-
-Daher ist Bemerkung 27.b nicht anwendbar, aber V (F ) ist trotzdem eine 1-dimensionale
-topologische Mannigfaltigkeit.
-
-
-
-29 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN
-
-Definition 26
-Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale
-Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆ Xi offen und
-ϕi ein Homöomorphismus auf eine offene Teilmenge von
-
-Rn+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 }
-
-ist.
-
-Rn+,0 ist ein „Halbraum“.
-
-Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten.
-
-∼
-=
-
-(a) Halbraum
-
-∼
-=
-
-(b) Pair of pants
-
-∼
-=
-
-(c) Sphäre mit einem Loch
-
-Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand
-
-Definition 27
-Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt
-
-∂X :=
-⋃
-
-(U,ϕ)∈A
-
-{ x ∈ U | ϕ(x) = 0 }
-
-Rand von X.
-
-∂X ist eine Mannigfaltigkeit der Dimension n− 1.
-
-Definition 28
-Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I
-
-Für i, j ∈ I mit Ui ∩ Uj 6= ∅ heißt
-
-ϕij := ϕj ◦ ϕ−1
-i
-
-ϕi(Ui ∩ Uj)→ ϕj(Ui ∩ Uj)
-
-Kartenwechsel oder Übergangsfunktion.
-
-
-
-30 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-
-Rn Rn
-
-Ui Uj
-
-Vi Vj
-
-X
-
-ϕi ϕj
-
-Abbildung 2.4: Kartenwechsel
-
-2.2 Differenzierbare Mannigfaltigkeiten
-
-Definition 29
-Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I .
-
-a) X heißt differenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Karten-
-wechselabbildung ϕij , i, j ∈ I k-mal stetig differenzierbar ist.
-
-b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig-
-faltigkeit der Klasse C∞ ist.
-
-Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt.
-
-Definition 30
-Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k ∈ N ∪ {∞ }) mit Atlas
-A = (Ui, ϕi)i∈I .
-
-a) Eine Karte (U,ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1
-i
-
-und ϕi ◦ ϕ−1 (i ∈ I mit Ui ∩ U 6= ∅) differenzierbar von Klasse Ck sind.
-
-b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der
-
-Klasse Ck. Er heißt Ck-Struktur auf X.
-
-Eine C∞-Struktur heißt auch differenzierbare Struktur auf X.
-
-Bemerkung 28
-Für n ≥ 4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten
-
-„exotische Sphären“.
-
-Definition 31
-Seien X,Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X.
-
-a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse Ck), wenn
-es Karten (U,ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass
-ψ ◦ f ◦ ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist.
-
-b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x ∈ X differenzierbar ist.
-
-c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine
-differenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und
-f ◦ g = idY .
-
-
-
-31 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-
-Bemerkung 29
-Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab.
-
-Beweis: Seien (U ′, ϕ′) und (V ′, ψ′) Karten von X bzw. Y um x bzw. f(x) mit f(U ′) ⊆ V ′.
-⇒ ψ′ ◦ f ◦ (ϕ′)−1
-
-= ψ′ ◦ (ψ−1 ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ′)−1
-
-ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 differenzierbar ist.
-
-Beispiel 23
-f : R→ R, x 7→ x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := 3
-
-√
-x
-
-gilt: f ◦ g = idR, g ◦ f = idR
-
-Bemerkung 30
-Sei X eine glatte Mannigfaltigkeit. Dann ist
-
-Diffeo(X) := { f : X → X | f ist Diffeomorphismus }
-
-eine Untergruppe von Homöo(X).
-
-Definition 32
-S ⊆ R3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 ∃U ⊆ R2 offen:
-∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 ∀u ∈ U .
-
-F heißt (lokale) reguläre Parametrisierung von S.
-
-F (u, v) = (x(u, v), y(u, v), z(u, v))
-
-JF (u, v) =
-
-∂x
-∂u(p) ∂x
-
-∂v (p)
-∂y
-∂u(p) ∂y
-
-∂v (p)
-∂z
-∂u(p) ∂z
-
-∂v (p)
-
-
-Beispiel 24
-
-1) Rotationsflächen: Sei r : R→ R>0 eine differenzierbare Funktion.
-
-F : R2 → R3 (u, v) 7→ (r(u) cos(u), r(v) sin(u), v)
-
-JF (u, v) =
-
-−r(v) sinu r′(v) cosu
-r(v) cosu r′(v) sinu
-
-0 1
-
-
-hat Rang 2 für alle (u, v) ∈ R2.
-
-2) Kugelkoordinaten: F : R2 → R3,
-(u, v) 7→ (R cos v cosu,R cos v sinu,R sin v)
-Es gilt: F (u, v) ∈ S2
-
-R, denn
-
-R2 cos2(v) cos2(u) +R2 cos2(v) sin2(u) +R2 sin2(v)
-
-=R2(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v))
-
-=R2
-(
-cos2(v)(cos2(u) + sin2(u)) + sin2(v)
-
-)
-=R2
-
-(
-cos2(v) + sin2(v)
-
-)
-=R2
-
-
-
-32 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-
-N
-
-S
-
-vu
-
-(a) Kugelkoordinaten
-
-−1
-0
-
-1
-2−2
-
-−1
-0
-
-1
-2
-
-0.6
-
-0.8
-
-1
-
-(b) Rotationskörper
-
-π
-2
-
-π 3π
-2
-
-2π
-
-−1
-
-−0.5
-
-0.5
-
-1
-
-x
-
-y
-
-sinx
-cosx
-
-(c) Sinus und Kosinus haben keine gemeinsame Nullstelle
-
-
-
-33 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-
-Die Jacobi-Matrix
-
-JF (u, v) =
-
-−R cos v sinu −R sin v cosu
-R cos v cosu −R sin v sinu
-
-0 R cos v
-
-
-hat Rang 2 für cos v 6= 0. In N und S ist cos v = 0.
-
-Bemerkung 31
-Jede reguläre Fläche S ⊆ R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit.
-
-Beweis:
-
-S ⊆ R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von
-regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆ R2, Fj : R2 → R3) von S mit
-Ui ∩ Uj 6= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind.
-
-z.Z.: F−1
-j ◦ Fi ist ein Diffeomorphismus.
-
-Ui Uj
-
-S
-
-s
-
-Fi Fj
-
-F−1
-j ◦Fi
-
-Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31
-
-Idee: Finde differenzierbare Funktion F̃−1
-j in Umgebung W von s, sodass F̃−1
-
-j |S∩W = F−1
-j .
-
-Ausführung: Sei u0 ∈ Ui, v0 ∈ Uj mit Fi(u0) = s = Fj(v0).
-
-Da Rg(JFj (v0)) = 2 ist, ist o. B. d. A.
-
-det
-
-(∂x
-∂u
-
-∂x
-∂v
-
-∂y
-∂u
-
-∂y
-∂v
-
-)
-(v0) 6= 0
-
-und Fj(u, v) = (x(u, v), y(u, v), z(u, v)).
-
-Definiere F̃j : Uj × R→ R3 durch
-
-F̃j(u, v, t) := (x(u, v), y(u, v), z(u, v) + t)
-
-Offensichtlich: F̃j |Uj×{ 0 } = Fj
-
-J
-F̃j
-
-=
-
-∂x
-∂u
-
-∂x
-∂v 0
-
-∂y
-∂u
-
-∂y
-∂v 0
-
-∂z
-∂u
-
-∂z
-∂v 1
-
-⇒ det J
-F̃j
-
-(v0, 0) 6= 0
-
-Analysis II
-======⇒ Es gibt Umgebungen W von Fj von F̃j(v0, 0) = Fj(v0) = s, sodass F̃j auf W eine
-differenzierbar Inverse F−1
-
-j hat.
-
-
-
-34 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN
-
-Weiter gilt:
-
-F̃j
-−1|W∩S = F−1
-
-j |W∩S
-⇒ F−1
-
-j ◦ Fi|F−1
-i (W∩S) = F−1
-
-j ◦ Fi|F−1
-i (W∩S)
-
-ist differenzierbar.
-
-Definition 33
-Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe.
-
-a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G→ G und ι : G→ G
-definiert durch
-
-g ◦ h := g · h und ι(g) := g−1
-
-stetig sind.
-
-b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und
-(G, ι) differenzierbar sind.
-
-Beispiel 25 (Lie-Gruppen)
-1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen.
-
-2) GLn(R)
-
-3) (R×, ·)
-4) (R>0, ·)
-5) (Rn,+), denn A ·B(i, j) =
-
-∑n
-k=1 aikbkj ist nach allen Variablen differenzierbar
-
-(A−1)(i, j) =
-det(Aij)
-
-detA
-
-Aij =
-
-ai1 . . . ain
-...
-
-. . .
-...
-
-an1 . . . ann
-
- ∈ R(n−1)×(n−1)
-
-ist differenzierbar.
-
-detAij kann 0 werden, da: (
-1 1
-−1 0
-
-)
-6) SLn(R) = {A ∈ GLn(R) | det(A) = 1 }
-
-Bemerkung 32
-Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung
-
-lg : G→ G
-
-h 7→ g · h
-
-ein Diffeomorphismus.
-
-
-
-35 2.3. SIMPLIZIALKOMPLEX
-
-2.3 Simplizialkomplex
-
-Definition 34
-Seien v0, . . . , vk ∈ Rn Punkte.
-
-a) v0, . . . , vk sind in allgemeiner Lage
-⇔ es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält
-⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig.
-
-b) conv(v0, . . . , vk) :=
-{∑k
-
-i=0 λivi
-
-∣∣∣ λi ≥ 0,
-∑k
-
-i=0 λi = 1
-}
-heißt die konvexe Hülle von
-
-v0, . . . , vk.
-
-Definition 35
-a) Sei ∆n = conv(e0, . . . , en) ⊆ Rn+1 die konvexe Hülle der Standard-Basisvektoren
-
-e0, . . . , en.
-
-Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex.
-
-b) Für Punkte v0, . . . , vk im Rn in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk)
-
-ein k-Simplex in Rn.
-
-c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir :=
-
-conv(vi0 , . . . , vir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆.
-
-(a) 0-Simplex ∆0
-
-1 2 3
-
-1
-
-2
-
-3
-
-e0
-
-e1
-
-(b) 1-Simplex ∆1
-
-1 2 3
-
-1
-
-2
-
-3
-
-e0
-
-e1
-
-e2
-
-(c) 2-Simplex ∆2
-
-e0 e1
-
-e2
-
-e3
-
-(d) 3-Simplex ∆3
-
-Abbildung 2.6: Beispiele für k-Simplexe
-
-Definition 36
-a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomplex,
-
-wenn gilt:
-
-(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K.
-
-(ii) Für ∆1,∆2 ∈ K ist ∆1 ∩∆2 leer oder ein Teilsimplex von ∆1 und von ∆2.
-
-b) |K| := ⋃∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K.
-
-c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension von K.
-
-
-
-36 2.3. SIMPLIZIALKOMPLEX
-
-(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex
-(ohne untere Fläche!)
-
-(c) 2D Simplizialkomplex
-
-(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex
-
-P
-
-(f) P ist kein Teilsimplex, da Eigen-
-schaft Punkt b.ii verletzt ist
-
-P
-
-(g) Simplizialkomplex
-
-Abbildung 2.7: Beispiele für Simplizialkomplexe
-
-Definition 37
-Seien K,L Simplizialkomplexe. Eine stetige Abbildung
-
-f : |K| → |L|
-
-heißt simplizial, wenn für jedes ∆ ∈ K gilt:
-
-a) f(∆) ∈ L
-b) f |∆ : ∆→ f(∆) ist eine affine Abbildung.
-
-Beispiel 26 (Simpliziale Abbildungen)
-1) ϕ(e1) := b1, ϕ(e2) := b2
-
-ϕ ist eine eindeutig bestimmte lineare Abbildung
-
-
-
-37 2.3. SIMPLIZIALKOMPLEX
-
-0 e2
-
-e1
-
-0 b1
-
-b2
-
-ϕ
-
-2) Folgende Abbildung ϕ : ∆n → ∆n−1 ist simplizial:
-
-ϕ
-
-3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8)
-
-M M
-
-a
-
-a
-
-a
-
-b
-
-b
-
-b
-
-c
-
-c
-
-c
-
-d
-
-d
-
-d
-
-M
-
-a
-
-b
-
-c
-
-d
-b b b
-
-b b b
-
-b b b
-
-b
-
-b
-
-b
-
-b
-
-b
-
-b
-
-bb
-
-b
-
-b b
-
-b b
-
-b b
-b
-
-b
-
-b
-
-b
-
-Abbildung 2.8: Abbildung eines Torus auf eine Sphäre
-
-Definition 38
-Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in
-K.
-
-Dann heißt
-
-χ(K) :=
-dimK∑
-n=0
-
-(−1)nan(K)
-
-Eulerzahl (oder Euler-Charakteristik) von K.
-
-Beispiel 27
-1) χ(∆1) = 2− 1 = 1
-
-χ(∆2) = 3− 3 + 1 = 1
-χ(∆3) = 4− 6 + 4− 1 = 1
-
-2) χ(Oktaeder-Oberfläche) = 6− 12 + 8 = 2
-χ(Rand des Tetraeders) = 2
-χ(Ikosaeder) = 12− 30 + 20 = 2
-
-3) χ(Würfel) = 8− 12 + 6 = 2
-χ(Würfel, unterteilt in Dreiecksflächen) = 8− (12 + 6) + (6 · 2) = 2
-
-Bemerkung 33
-χ(∆n) = 1 für jedes n ∈ N0
-
-
-
-38 2.3. SIMPLIZIALKOMPLEX
-
-Beweis: ∆n ist die konvexe Hülle von (e0, . . . , en) in Rn+1. Jede (k + 1)-elementige Teilmenge
-von { e0, . . . , en } definiert ein k-Simplex.
-⇒ ak(∆
-
-n) =
-(
-n+1
-k+1
-
-)
-, k = 0, . . . , n
-
-⇒ χ(∆n) =
-∑n
-
-k=0(−1)k
-(
-n+1
-k+1
-
-)
-f(x) = (x+ 1)n+1
-
-Binomischer
-Lehrsatz=
-
-∑n+1
-k=0
-
-(
-n+1
-k
-
-)
-xk
-
-⇒ 0 =
-∑n+1
-
-k=0
-
-(
-n+1
-k
-
-)
-(−1)k = χ(∆n)− 1
-
-⇒ χ(∆n) = 1 �
-
-Definition 39
-a) Ein 1D-Simplizialkomplex heißt Graph.
-
-b) Ein Graph, der homöomorph zu S1 ist, heißt Kreis.
-
-c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält.
-
-(a) Dies wird häufig auch als
-Multigraph bezeichnet.
-
-(b) Planare Einbettung des Te-
-traeders
-
-(c) K5 (d) K3,3
-
-Abbildung 2.9: Beispiele für Graphen
-
-Bemerkung 34
-Für jeden Baum T gilt χ(T ) = 1.
-
-Beweis: Induktion über die Anzahl der Ecken.
-
-Bemerkung 35
-a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T , der alle Ecken von Γ
-
-enthält.2
-
-b) Ist n = a1(Γ)− a1(T ), so ist χ(Γ) = 1− n.
-
-Beweis:
-
-a) Siehe „Algorithmus von Kruskal“.
-
-2T wird „Spannbaum“ genannt.
-
-
-
-39 2.3. SIMPLIZIALKOMPLEX
-
-b) χ(Γ) = a0(Γ)− a1(Γ)
-
-= a0(Γ)− (n+ a1(T ))
-
-= a0(T )− a1(T )− n
-= χ(T )− n
-= 1− n
-
-Bemerkung 36
-Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ Rn. Sei K der Simplizialkomplex, der aus ∆ durch
-„Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1.
-
-(a) K (b) ∆, das aus K durch Unter-
-teilung entsteht
-
-Abbildung 2.10: Beispiel für Bemerkung 36.
-
-Beweis: χ(K) = χ(∆)− (−1)n︸ ︷︷ ︸
-n-Simplex
-
-+
-n∑
-k=0
-
-(−1)k
-(
-n+ 1
-
-k
-
-)
-︸ ︷︷ ︸
-
-(1+(−1))n+1
-
-= χ(∆) �
-
-Definition 40
-Sei X ein topologischer Raum, K ein Simplizialkomplex und
-
-h : |K| → X
-
-ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine
-Triangulierung von X.
-
-Beispiel 28 (Triangulierung des Torus)
-Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für
-fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind
-in Beispiel 28.
-
-Satz 2.1 (Eulersche Polyederformel)
-Sei P ein konvexes Polyeder in R3, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex,
-sodass gilt:
-
-∀x, y ∈ ∂P : [x, y] ⊆ P
-
-Dann ist χ(∂P ) = 2.
-
-Beweis:
-
-1) Die Aussage ist richtig für den Tetraeder.
-
-2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2.
-Erhalte Triangulierung von S2.
-
-
-
-40 2.3. SIMPLIZIALKOMPLEX
-
-(a) Die beiden markierten Dreiecke schneiden sich im
-Mittelpunkt und in einer Seite.
-
-(b) Die beiden markierten Dreiecke schneiden sich im
-Mittelpunkt und außen.
-
-Abbildung 2.11: Fehlerhafte Triangulierungen
-
-(a) Einfache Triangulierung (b) Minimale Triangulierung
-
-Abbildung 2.12: Triangulierungen des Torus
-
-
-
-41 2.3. SIMPLIZIALKOMPLEX
-
-3) Sind P1 und P2 konvexe Polygone und T1, T2 die zugehörigen Triangulierungen von
-S2, so gibt es eine Triangulierung T , die sowohl um T1 als auch um T2 Verfeinerung
-ist (vgl. Abbildung 2.13).
-
-T1
-
-T2
-
-T
-
-Abbildung 2.13: T ist eine Triangulierung, die für T1 und T2 eine Verfeinerung ist.
-
-Nach Bemerkung 36 ist χ(∂P1) = χ(T1) = χ(T ) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A.
-P2 ein Tetraeder ist.
-
-Bemerkung 37 (Der Rand vom Rand ist 0)
-Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V .
-
-Sei An die Menge der n-Simplizes in K, d. h.
-
-An(K) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K)
-
-und Cn(K) der R-Vektorraum mit Basis An(K), d. h.
-
-Cn(K) =
-
- ∑
-σ∈An(K)
-
-cσ · σ
-
-∣∣∣∣∣∣ cσ ∈ R
-
-
-Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn.
-
-Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , x̂i, . . . , xn) die i-te Seite von σ und dσ = dnσ :=∑
-i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare
-
-Abbildung.
-
-Dann gilt: dn−1 ◦ dn = 0
-
-a b
-
-c
-
-σ
-
-e3
-
-e1e2
-
-Abbildung 2.14: Simplizialkomplex mit Totalordnung
-
-Beispiel 29
-Sei a < b < c. Dann gilt:
-
-d2σ = e1 − e2 + e3
-
-d1(e1 − e2 + e3) = (c− b)− (c− a) + (b− a)
-
-
-
-42 2.3. SIMPLIZIALKOMPLEX
-
-= 0
-
-Sei a < b < c < d. Dann gilt für Tetraeder:
-
-d3(∆(a, b, c, d)) = ∆(b, c, d)−∆(a, c, d) + ∆(a, b, d)−∆(a, b, c),wobei:
-d2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c)
-
-d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c)
-
-d2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b)
-
-d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b)
-
-⇒ d2(d3(∆(a, b, c, d))) = 0
-
-Beweis: Sei σ ∈ An. Dann gilt:
-
-dn−1(dnσ) = dn−1(
-
-n∑
-i=0
-
-(−1)i∂iσ)
-
-=
-
-n∑
-i=0
-
-(−1)idn−1(∂iσ)
-
-=
-
-n∑
-i=0
-
-(−1)i
-n−1∑
-j=0
-
-∂i(∂jσ)(−1)j
-
-=
-∑
-
-0≤i≤j≤n−1
-
-(−1)i+j∂j(∂i(σ)) +
-∑
-
-0≤j<i≤n
-(−1)i+j∂i−1(∂jσ)
-
-= 0
-
-weil jeder Summand aus der ersten Summe auch in der zweiten Summe vorkommt, aber mit
-umgekehrten Vorzeichen. �
-
-Definition 41
-Sei K ein Simplizialkomplex, Zn := Kern(dn) ⊆ Cn und Bn := Bild(dn+1) ⊆ Cn.
-
-a) Hn = Hn(K,R) := Zn/Bn heißt n-te Homologiegruppe von K.
-
-b) bn(K) := dimRHn heißt n-te Betti-Zahl von K.
-
-Bemerkung 38
-Nach Bemerkung 37 ist Bn ⊆ Zn, denn dn+1(C) ∈ Kern(dn) für C ∈ Cn+1.
-
-Satz 2.2
-Für jeden endlichen Simplizialkomplex K der Dimension d gilt:
-
-d∑
-k=0
-
-(−1)kbk(K) =
-d∑
-
-k=0
-
-(−1)kak(K) = χ(K)
-
-Bemerkung 39
-Es gilt nicht ak = bk ∀k ∈ N0.
-
-
-
-43 2.3. SIMPLIZIALKOMPLEX
-
-Beweis:
-
-• Dimensionsformel für dn: an = dimZn + dimBn−1 für n ≥ 1
-
-• Dimensionsformel für Zn → Hn = Zn/Bn : dimZn = bn + dimBn
-
-• dimZd = bd, da dimZd = bd + dimBd, wobei dimBd = 0, da ad+1 = 0
-
-• a0 − dimB0 = b0, da a0 − dimB0 = a0 − dimZ0 + b0 und a0 = dimZ0, weil a−1 = 0
-
-⇒
-d∑
-
-k=0
-
-(−1)kak = a0 +
-d∑
-
-k=1
-
-(−1)k(dimZk + dimBk−1)
-
-= a0 +
-d∑
-
-k=1
-
-(−1)k dimZk +
-d−1∑
-k=0
-
-(−1)k+1 dimBk
-
-= a0 +
-d∑
-
-k=1
-
-(−1)k dimZk −
-d−1∑
-k=0
-
-(−1)k dimBk
-
-= a0 +
-
-d−1∑
-k=1
-
-(−1)kbk + (−1)d dimZd︸ ︷︷ ︸
-=bd
-
-−dimB0
-
-= b0 +
-d−1∑
-k=1
-
-(−1)kbk + (−1)dbd
-
-=
-d∑
-
-k=0
-
-(−1)kbk
-
-
-
-44 2.3. SIMPLIZIALKOMPLEX
-
-Übungsaufgaben
-
-Aufgabe 7 (Zusammenhang)
-
-(a) Beweisen Sie, dass eine topologische Mannigfaltigkeit genau dann wegzusammenhän-
-gend ist, wenn sie zusammenhängend ist
-
-(b) Betrachten Sie nun wie in Beispiel 20.8 den Raum X := (R\{ 0 })∪{ 01, 02 } versehen
-mit der dort definierten Topologie. Ist X wegzusammenhängend?
-
-
-
-3 Fundamentalgruppe und Überlagerungen
-
-3.1 Homotopie von Wegen
-
-a b
-
-γ1
-
-γ2
-
-(a) γ1 und γ2 sind homotop,
-da man sie „zueinander ver-
-schieben“ kann.
-
-a b
-
-γ1
-
-γ2
-
-(b) γ1 und γ2 sind wegen dem
-Hindernis nicht homotop.
-
-Abbildung 3.1: Beispiele für Wege γ1 und γ2
-
-Definition 42
-Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b, d. h. γ1(0) =
-γ2(0) = a, γ1(1) = γ2(1) = b
-
-γ1 und γ2 heißen homotop, wenn es eine stetige Abbildung H : I × I → X mit
-
-H(t, 0) = γ1(t) ∀t ∈ I
-H(t, 1) = γ2(t) ∀t ∈ I
-
-und H(0, s) = a und H(1, s) = b für alle s ∈ I gibt. Dann schreibt man: γ1 ∼ γ2
-
-H heißt Homotopie zwischen γ1 und γ2.
-
-Bemerkung 40
-Sei X ein topologischer Raum, a, b ∈ X, γ1, γ2 : I → X Wege von a nach b und H eine
-Homotopie zwischen γ1 und γ2.
-
-Dann gilt: Der Weg
-γs : I → X, γs(t) = H(t, s)
-
-ist Weg in X von a nach b für jedes s ∈ I.
-
-Beweis: H ist stetig, also ist H(t, s) insbesondere für jedes feste s stetig. Da H(0, s) = a und
-H(1, s) = b für alle s ∈ I und γs eine Abbildung von I auf X ist, ist γs ein Weg in X von a
-nach b für jedes s ∈ I. �
-
-Bemerkung 41
-Durch Homotopie wird eine Äquivalenzrelation auf der Menge aller Wege in X von a nach b
-definiert.
-
-Beweis:
-
-
-
-46 3.1. HOMOTOPIE VON WEGEN
-
-• reflexiv: H(t, s) = γ(t) für alle (t, s) ∈ I × I
-• symmetrisch: H ′(t, s) = H(t, 1− s) für alle (t, s) ∈ I × I
-• transitiv: Seien H ′ bzw. H ′′ Homotopien von γ1 nach γ2 bzw. von γ2 nach γ3.
-
-Dann sei H(t, s) :=
-
-{
-H ′(t, 2s) falls 0 ≤ s ≤ 1
-
-2
-
-H ′′(t, 2s− 1) falls 1
-2 ≤ s ≤ 1
-
-⇒ H ist stetig und Homotopie von γ1 nach γ3.
-
-�
-
-Beispiel 30
-1) Sei X = S1. γ1 und γ2 aus Abbildung 3.3a nicht homotop.
-
-2) Sei X = T 2. γ1, γ2 und γ3 aus Abbildung 3.3b sind paarweise nicht homotop.
-
-3) Sei X = R2 und a = b = (0, 0).
-
-Je zwei Wege im R2 mit Anfangs- und Endpunkt (0, 0) sind homotop.
-
-Abbildung 3.2: Zwei Wege im R2 mit Anfangs- und Endpunkt (0, 0)
-
-Sei γ0 : I → R2 der konstante Weg γ0(t) = (0, 0) ∀t ∈ I. Sei γ(0) = γ(1) = (0, 0).
-
-H(t, s) := (1− s)γ(t) ist stetig, H(t, 0) = γ(t) ∀t ∈ I und H(t, 1) = (0, 0) ∀t ∈ I.
-Bemerkung 42
-
-Sei X ein topologischer Raum, γ : I → X ein Weg und ϕ : I → I stetig mit ϕ(0) = 0,
-ϕ(1) = 1. Dann sind γ und γ ◦ ϕ homotop.
-
-Beweis: Sei H(t, s) = γ((1− s)t+ s · ϕ(t)).
-
-Dann ist H stetig, H(t, 0) = γ(t), H(t, 1) = γ(ϕ(t)), H(0, s) = γ(0) und H(1, s) =
-γ(1− s+ s) = γ(1)
-⇒ H ist Homotopie. �
-
-
-
-47 3.1. HOMOTOPIE VON WEGEN
-
-a
-
-b
-
-γ1γ2
-
-(a) Kreis mit zwei Wegen
-
-a
-
-b
-
-(b) Torus mit drei Wegen
-
-Abbildung 3.3: Beispiele für (nicht)-Homotopie von Wegen
-
-Definition 43
-Seien γ1, γ2 Wege in X mit γ1(1) = γ2(0). Dann ist
-
-γ(t) =
-
-{
-γ1(2t) falls 0 ≤ t < 1
-
-2
-
-γ2(2t− 1) falls 1
-2 ≤ t ≤ 1
-
-ein Weg in X. Er heißt zusammengesetzter Weg und man schreibt γ = γ1 ∗ γ2.
-
-Bemerkung 43
-Das Zusammensetzen von Wegen ist nur bis auf Homotopie assoziativ, d. h.:
-
-γ1 ∗ (γ2 ∗ γ3) 6= (γ1 ∗ γ2) ∗ γ3
-
-γ1 ∗ (γ2 ∗ γ3) ∼ (γ1 ∗ γ2) ∗ γ3
-
-mit γ1(1) = γ2(0) und γ2(1) = γ3(0).
-
-γ1 γ2 γ3
-
-0 1/2 3/4 1
-
-(a) γ1 ∗ (γ2 ∗ γ3)
-
-γ1 γ2 γ3
-
-0 1/4 1/2 1
-
-(b) (γ1 ∗ γ2) ∗ γ3
-
-Abbildung 3.4: Das Zusammensetzen von Wegen ist nicht assoziativ
-
-Beweis: Das Zusammensetzen von Wegen ist wegen Bemerkung 42 bis auf Homotopie assoziativ.
-Verwende dazu
-
-ϕ(t) =
-
-
-1
-2 t falls 0 ≤ t < 1
-
-2
-
-t− 1
-4 falls 1
-
-2 ≤ t < 3
-4
-
-2t− 1 falls 3
-4 ≤ t ≤ 1
-
-Bemerkung 44
-Sei X ein topologischer Raum, a, b, c ∈ X, γ1, γ
-
-′
-1 Wege von a nach b und γ2, γ
-
-′
-2 Wege von b
-
-nach c.
-
-Sind γ1 ∼ γ′1 und γ2 ∼ γ′2, so ist γ1 ∗ γ2 ∼ γ′1 ∗ γ′2.
-
-
-
-48 3.2. FUNDAMENTALGRUPPE
-
-γ1γ′1
-
-a
-b
-
-c
-
-γ′2
-
-γ2
-
-Abbildung 3.5: Situation aus Bemerkung 44
-.
-
-Beweis: Sei Hi eine Homotopie zwischen γi und γ′i, i = 1, 2.
-
-Dann ist
-
-H(t, s) :=
-
-{
-H1(2t, s) falls 0 ≤ t ≤ 1
-
-2 ∀s ∈ I
-H2(2t− 1, s) falls 1
-
-2 ≤ t ≤ 1
-
-eine Homotopie zwischen γ1 ∗ γ2 und γ′1 ∗ γ′2.
-
-Eine spezielle Homotopieäquivalenz sind sog. Deformationsretraktionen:
-
-Definition 44
-Sei X ein topologischer Raum, A ⊆ X, r : X → A eine stetige Abbildung und ι = (idX)|A.
-
-a) ι : A→ X mit ι(x) = x heißt die Inklusionsabbildung und man schreibt: ι : A ↪→ X.
-
-b) r heißt Retraktion, wenn r|A = idA ist.
-
-c) A heißt Deformationsretrakt, wenn es eine Retraktion r auf A mit ι ◦ r ∼ idX gibt.
-
-Beispiel 31 (Zylinder auf Kreis)
-Sei X = S1 × R ein topologischer Raum und
-
-r : S1 × R→ S1 × { 0 } ∼= S1
-
-mit
-r(x, y) := (x, 0)
-
-eine Abbildung. r ist eine Retraktion, da r|S1
-∼= idS1 .
-
-ι ◦ r : S1 × R→ S1 × R
-(x, y) 7→ (x, 0)
-
-H : (S1 × R)× I → S1 × R
-(x, y, t) 7→ (x, ty)
-
-3.2 Fundamentalgruppe
-
-Für einen Weg γ sei [γ] seine Homotopieklasse.
-
-Definition 45
-Sei X ein topologischer Raum und x ∈ X. Sei außerdem
-
-π1(X,x) := { [γ] | γ ist Weg in X mit γ(0) = γ(1) = x }
-
-
-
-49 3.2. FUNDAMENTALGRUPPE
-
-Durch [γ1] ∗G [γ2] := [γ1 ∗ γ2] wird π1(X,x) zu einer Gruppe. Diese Gruppe heißt Funda-
-mentalgruppe von X im Basispunkt x.
-
-Bemerkung 45
-Im R2 gibt es nur eine Homotopieklasse.
-
-Beweis: (Fundamentalgruppe ist eine Gruppe)
-
-a) Abgeschlossenheit folgt direkt aus der Definition von ∗G
-b) Assoziativität folgt aus Bemerkung 43
-
-c) Neutrales Element e = [γ0], γ0(t) = x ∀t ∈ I. e ∗ [γ] = [γ] = [γ] ∗ e, da γ0 ∗ γ ∼ γ
-
-d) Inverses Element [γ]−1 = [γ] = [γ(1− t)], denn γ ∗ γ ∼ γ0 ∼ γ ∗ γ
-Beispiel 32
-
-1) S1 = { z ∈ C | |z| = 1 } =
-{
-
-(cosϕ, sinϕ) ∈ R2
-∣∣ 0 ≤ ϕ ≤ 2π
-
-}
-π1(S1, 1) =
-
-{
-[γk]
-
-∣∣ k ∈ Z
-} ∼= Z. Dabei ist γ(t) = e2πit = cos(2πt) + i sin(2πt) und
-
-γk := γ ∗ · · · ∗ γ︸ ︷︷ ︸
-k mal
-
-[γk] 7→ k ist ein Isomorphismus.
-
-2) π1(R2, 0) = π1(R2, x) = { e } für jedes x ∈ R2
-
-3) π1(Rn, x) = { e } für jedes x ∈ Rn
-
-4) G ⊆ Rn heißt sternförmig bzgl. x ∈ G, wenn für jedes y ∈ G auch die Strecke
-[x, y] ⊆ G ist.
-
-Für jedes sternförmige G ⊆ Rn ist π1(G, x) = { e }
-
-x
-
-Abbildung 3.6: Sternförmiges Gebiet
-.
-
-5) π1(S2, x0) = { e }, da im R2 alle Wege homotop zu { e } sind. Mithilfe der stereogra-
-phischen Projektion kann von S2 auf den R2 abgebildet werden.
-
-Dieses Argument funktioniert nicht mehr bei flächenfüllenden Wegen, d. h. wenn
-γ : I → S2 surjektiv ist.
-
-Bemerkung 46
-Sei X ein topologischer Raum, a, b ∈ X, δ : I → X ein Weg von a nach b.
-
-Dann ist die Abbildung
-
-α : π1(X, a)→ π1(X, b) [γ] 7→ [δ ∗ γ ∗ δ]
-ein Gruppenisomorphismus.
-
-
-
-50 3.2. FUNDAMENTALGRUPPE
-
-a b
-
-γ
-
-δ
-
-Abbildung 3.7: Situation aus Bemerkung 46
-.
-
-Beweis:
-
-α([γ1] ∗ [γ2]) = [δ ∗ (γ1 ∗ γ2) ∗ δ]
-= [δ ∗ γ1 ∗ δ ∗ δ ∗ γ2 ∗ δ]
-= [δ ∗ γ1 ∗ δ] ∗ [δ ∗ γ2 ∗ δ]
-= α([γ1]) ∗ α([γ2])
-
-Definition 46
-Ein wegzusammenhängender topologischer Raum X heißt einfach zusammenhängend,
-wenn π1(X,x) = { e } für ein x ∈ X.
-
-Wenn π1(X,x) = { e } für ein x ∈ X gilt, dann wegen Bemerkung 46 sogar für alle x ∈ X.
-
-Bemerkung 47
-Es seien X,Y topologische Räume, f : X → Y eine stetige Abbildung, x ∈ X, y := f(x) ∈ Y .
-
-a) Dann ist die Abbildung f∗ : π1(X,x)→ π1(Y, y), [γ]→ [f ◦ γ] ein Gruppenhomomor-
-phismus.
-
-b) Ist Z ein weiterer topologischer Raum und g : Y → Z eine stetige Abbildung z := g(y).
-Dann ist (g ◦ f)∗ = g∗ ◦ f∗ : π1(X,x)→ π1(Z, z)
-
-Beweis:
-
-a) f∗ ist wohldefiniert: Seien γ1, γ2 homotope Wege von x. z.Z.: f ◦ γ1 ∼ f ◦ γ2: Nach
-Voraussetzung gibt es stetige Abbildungen H : I × I → X mit
-
-H(t, 0) = γ1(t),
-
-H(t, 1) = γ2(t),
-
-H(0, s) = H(1, s) = x.
-
-Dann ist f ◦H : I×I → Y stetig mit (f ◦H)(t, 0) = f(H(t, 0)) = f(γ1(t)) = (f ◦γ1)(t)
-etc. ⇒ f ◦ γ1 ∼ f ◦ γ2.
-
-f∗([γ1] ∗ [γ2]) = [f ◦ (γ1 ∗ γ2)] = [(f ◦ γ1)] ∗ [(f ◦ γ2)] = f∗([γ1]) ∗ f∗([γ2])
-
-b) (g ◦ f)∗([γ]) = [(g ◦ f) ◦ γ] = [g ◦ (f ◦ γ)] = g∗([f ◦ γ]) = g∗(f∗([γ])) = (g∗ ◦ f∗)([γ])
-
-Beispiel 33
-1) f : S1 ↪→ R2 ist injektiv, aber f∗ : π1(S1, 1) ∼= Z→ π1(R2, 1) = { e } ist nicht injektiv.
-2) f : R→ S1, t 7→ (cos 2πt, sin 2πt) ist surjektiv, aber f∗ : π1(R, 0) = { e } → π1(S1, 1) ∼=
-
-Z ist nicht surjektiv.
-
-
-
-51 3.2. FUNDAMENTALGRUPPE
-
-Bemerkung 48
-Sei f : X → Y ein Homöomorphismus zwischen topologischen Räumen X,Y . Dann gilt:
-
-f∗ : π1(X,x)→ π1(Y, f(x))
-
-ist ein Isomorphismus für jedes x ∈ X.
-
-Beweis: Sei g : Y → X die Umkehrabbildung, d. h. g ist stetig und f ◦ g = idY , g ◦ f = idX
-
-⇒ f∗ ◦ g∗ = (f ◦ g)∗ = (idY )∗ = idπ1(Y,f(X) und g∗ ◦ f∗ = idπ1(X,x).
-
-Definition 47
-Seien X,Y topologische Räume, x0 ∈ X, y0 ∈ Y, f, g : X → Y stetig mit f(x0) = y0 = g(x0).
-
-f und g heißen homotop (f ∼ g), wenn es eine stetige Abbildung H : X × I → Y mit
-
-H(x, 0) = f(x) ∀x ∈ X
-H(x, 1) = g(x) ∀x ∈ X
-H(x0, s) = y0 ∀s ∈ I
-
-gibt.
-
-Bemerkung 49
-Sind f und g homotop, so ist f∗ = g∗ : π1(X,x0)→ π1(Y, y0).
-
-Beweis: Sei γ ein geschlossener Weg in X um x0, d. h. [γ] ∈ π1(X,x0).
-
-Z. z.: f ◦ γ ∼ g ◦ γ
-Sei dazu Hγ : I × I → Y, (t, s) 7→ H(γ(t), s). Dann gilt:
-
-Hγ(t, 0) = H(γ(t), 0) = (f ◦ γ)(t) ∀t ∈ I
-Hγ(1, s) = H(γ(1), s) = H(x0, s) = y0 ∀s ∈ I
-Hγ(t, 1) = H(γ(t), 1) = g(γ(t)) ∀t ∈ I
-
-Beispiel 34
-f : X → Y, g : Y → X mit g ◦ f ∼ idX , f ◦ g ∼ idY
-
-⇒ f∗ ist Isomorphismus. Konkret: f : R2 → { 0 } , g : { 0 } → R2
-
-⇒ f ◦ g = id{ 0 }, g ◦ f : R2 → R2, x 7→ 0 für alle x.
-
-g ◦ f ∼ idR2 mit Homotopie: H : R2 × I → R2, H(x, s) = (1− s)x (stetig!)
-
-⇒ H(x, 0) = x = idR2(x), H(x, 1) = 0, H(0, s) = 0 ∀s ∈ I.
-
-Satz 3.1 (Satz von Seifert und van Kampen „light“)
-Sei X ein topologischer Raum, U, V ⊆ X offen mit U ∪ V = X und U ∩ V wegzusam-
-menhängend.
-
-Dann wird π1(X,x) für x ∈ U ∩ V erzeugt von geschlossenen Wegen um x, die ganz in
-U oder ganz in V verlaufen.
-
-
-
-52 3.3. ÜBERLAGERUNGEN
-
-Beweis: Sei γ : I → X ein geschlossener Weg um x. Überdecke I mit endlich vielen offenen
-Intervallen I1, I2, . . . , In, die ganz in γ−1(U) oder ganz in γ−1(V ) liegen.
-
-O. B. d. A. sei γ(I1) ⊆ U, γ(I2) ⊆ V , etc.
-
-Wähle ti ∈ Ii ∩ Ii+1, also γ(ti) ∈ U ∩ V . Sei σi Weg in U ∩ V von x0 nach γ(ti) ⇒ γ ist
-homotop zu
-
-γ1 ∗ σ1︸ ︷︷ ︸
-in U
-
-∗σ1 ∗ γ2 ∗ σ2︸ ︷︷ ︸
-in V
-
-∗ · · · ∗ σn−1 ∗ γ2 mit γi := γ|Ii
-
-a b
-
-x
-
-Abbildung 3.8: Topologischer Raum X
-
-Beispiel 35 (Satz von Seifert und van Kampen)
-1) Sei X wie in Abbildung 3.8. π1(X,x) wird „frei“ erzeugt von a und b, weil π1(U, x) =
-〈a〉 ∼= Z, π1(V, x) = 〈b〉 ∼= Z, insbesondere ist a ∗ b nicht homotop zu b ∗ a.
-
-2) Torus: π1(T 2, X) wird erzeugt von a und b.
-
-V
-
-U
-
-a
-
-b
-
-V
-
-a b
-
-Abbildung 3.9: a ∗ b = b ∗ a⇔ a ∗ b ∗ a ∗ b ∼ e
-
-3.3 Überlagerungen
-
-Definition 48
-Es seien X,Y zusammenhängende topologische Räume und p : Y → X eine stetige Abbil-
-dung.
-
-p heißt Überlagerung, wenn jedes x ∈ X eine offene Umgebung U = U(x) ⊆ X besitzt,
-sodass p−1(U) disjunkte Vereinigung von offenen Teilmengen Vj ⊆ Y ist (j ∈ I) und
-p|Vj : Vj → U ein Homöomorphismus ist.
-
-|I| heißt Grad der Überlagerung p und man schreibt:
-
-deg p := |I|
-
-
-
-53 3.3. ÜBERLAGERUNGEN
-
-Abbildung 3.10: R→ S1,
-t 7→ (cos 2πt, sin 2πt)
-
-Beispiel 36
-1) siehe Abbildung 3.10
-
-2) siehe Abbildung 3.11
-
-3) Rn → Tn = Rn/Zn
-
-4) Sn → Pn(R)
-
-5) S1 → S1, z 7→ z2, siehe Abbildung 3.12
-
-0 1 2 3 4 5 6
-0
-
-1
-
-2
-
-3
-
-4
-
-5
-
-6
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-*
-
-−−−→
-
-Abbildung 3.11: R2 → T 2 = R2/Z2
-
-Bemerkung 50
-Überlagerungen sind surjektiv.
-
-Beweis: Sei p : Y → X eine Überlagerung und x ∈ X beliebig. Dann existiert eine offene
-Umgebung U(x) ⊆ X und offene Teilmengen Vj ⊆ X mit p−1(U) =
-
-⋃̇
-Vj und p|Vj : Vj → U
-
-ist Homöomorphismus.
-
-D. h. es existiert ein y ∈ Vj , so dass p|Vj (y) = x. Da x ∈ X beliebig war und ein y ∈ Y
-existiert, mit p(y) = x, ist p surjektiv. �
-
-
-
-54 3.3. ÜBERLAGERUNGEN
-
-1
-
-i
-z
-
-z2
-
-ϕ
-ϕ
-
-z2
-
-Abbildung 3.12: t 7→ (cos 4πt, sin 4πt)
-
-Definition 49
-Seien (X,TX), (Y,TY ) topologische Räume und f : X → Y eine Abbildung.
-
-f heißt offen :⇔ ∀U ∈ TX : f(U) ∈ TY .
-
-Beispiel 37 (Offene und stetige Abbildungen)
-Sei X ein topologischer Raum und seien fi : R → R mit i ∈ { 1, 2, 3 } und g : R → S1 =
-{ z ∈ C | ‖z‖ = 1 } Abbildungen.
-
-1) f1 := idR ist eine offene und stetige Abbildung.
-
-2) g(x) := e2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5).
-
-3) f2(x) := 42 ist eine stetige, aber keine offene Abbildung.
-
-4) f3(x) :=
-
-{
-0 falls x ∈ Q
-42 falls x ∈ R \Q
-
-ist weder stetig noch offen.
-
-Bemerkung 51
-Überlagerungen sind offene Abbildungen.
-
-Beweis: Sei y ∈ V und x ∈ p(V ), sodass x = p(y) gilt. Sei weiter U = Ux eine offene Umgebung
-von x wie in Definition 48 und Vj die Komponente von p−1(U), die y enthält.
-
-Dann ist V ∩ Vj offene Umgebung von y.
-
-⇒ p(V ∩Vj) ist offen in p(Vj), also auch offen in X. Außerdem ist p(y) = x ∈ p(V ∩Vj) und
-p(V ∩ Vj) ⊆ p(V ).
-
-⇒ p(V ) ist offen.
-
-Definition 50
-Sei X ein topologischer Raum und M ⊆ X.
-
-M heißt diskret in X, wenn M in X keinen Häufungspunkt hat.
-
-Bemerkung 52
-Sei p : Y → X Überlagerung, x ∈ X.
-
-a) X hausdorffsch ⇒ Y hausdorffsch
-
-b) p−1(x) ist diskret in Y für jedes x ∈ X.
-
-Beweis:
-
-a) Seien y1, y2 ∈ Y .
-
-1. Fall: p(y1) = p(y2) = x.
-
-
-
-55 3.3. ÜBERLAGERUNGEN
-
-Sei U Umgebung von x wie in Definition 48, Vj1 bzw. Vj2 die Komponente von p−1(U),
-die y1 bzw. y2 enthält.
-
-Dann ist Vj1 6= Vj2 , weil beide ein Element aus p−1(x) enthalten.
-
-⇒ Vj1 ∩ Vj2 = ∅ nach Voraussetzung.
-
-2. Fall: p(y1) 6= p(y2).
-
-Dann seien U1 und U2 disjunkte Umgebungen von p(y1) und p(y2).
-
-⇒ p−1(U1) und p−1(U2) sind disjunkte Umgebungen von y1 und y2.
-
-b) Sei x ∈ X beliebig, aber fest.
-
-Zu zeigen: ∀yi ∈ p−1(x) : ∃Vi ∈ TY mit yi ∈ Vi, sodass gilt:i 6= j ⇒ Vi ∩ Vj = ∅.
-
-Die Vi existieren wegen der Definition einer Überlagerung: p heißt Überlagerung
-:⇔ ∀x ∈ X∃U = U(x) ∈ TX : p−1(U) =
-
-⋃̇
-Vi∈TY Vi und p|Vi ist Homöomorphismus.
-
-⇒ (p|Vi)−1(x) = { yi }
-⇒ Alle yi liegen diskret in Y , da Häufungspunkte unendlich viele Elemente in jeder
-Umgebung benötigen. �
-
-Bemerkung 53 (Eindeutigkeit des Überlagerungsgrades)
-Sei p : Y → X Überlagerung. Dann gilt:
-
-∀x1, x2 ∈ X : |p−1(x1)| = |p−1(x2)|
-
-Hinweis: |p−1(x1)| =∞ ist erlaubt!
-
-Beweis: Sei U Umgebung von x1 wie in Definition 48, x ∈ U . Dann enthält jedes Vj mit j ∈ I
-genau ein Element von p−1(x).
-
-⇒ |p−1(x)| ist konstant für x ∈ U
-X zhgd.
-====⇒ |p−1(x)| ist konstant für x ∈ X.
-
-Definition 51
-Es seien X,Y, Z topologische Räume, p : Y → X eine Überlagerung und f : Z → X stetig.
-
-Eine stetige Abbildung f̃ : Z → Y heißt Liftung von f , wenn p ◦ f̃ = f ist.
-
-Y
-
-X
-
-Z
-
-p
-
-f̃
-
-f
-
-Bemerkung 54 (Eindeutigkeit der Liftung)
-Sei Z zusammenhängend und f0, f1 : Z → Y Liftungen von f .
-
-∃z0 ∈ Z : f0(z0) = f1(z0)⇒ f0 = f1
-
-Beweis: Sei T = { z ∈ Z | f0(z) = f1(z) }.
-Z. z.: T ist offen und Z \ T ist auch offen.
-
-
-
-56 3.3. ÜBERLAGERUNGEN
-
-0 1 2 3 4 5 6
-0
-
-1
-
-2
-
-3
-
-4
-
-5
-
-6
-
-T
-Liften−−−→ R2/Z2
-
-Abbildung 3.13: Beim Liften eines Weges bleiben geschlossene Wege im allgemeinen nicht ge-
-schlossen
-
-Sei z ∈ T, x = f(z), U Umgebung von x wie in Definition 48, V die Komponente von p−1(U),
-die y := f0(z) = f1(z) enthält.
-
-Sei q : U → V die Umkehrabbildung zu p|V .
-Sei W := f−1(U) ∩ f−1
-
-0 (V ) ∩ f−1
-1 (V ). W ist offene Umgebung in Z von z.
-
-Behauptung: W ⊆ T
-Denn für w ∈W ist q(f(w)) = q((p ◦ f0))(w) = ((q ◦ p) ◦ f0)(w) = f0(w) = q(f(w)) = f1(w)
-
-⇒ T ist offen.
-
-Analog: Z \ T ist offen.
-
-Satz 3.2
-Sei p : Y → X Überlagerung, γ : I → X ein Weg, y ∈ Y mit p(y) = γ(0) =: x.
-
-Dann gibt es genau einen Weg γ̃ : I → Y mit γ̃(0) = y und p ◦ γ̃ = γ.
-
-p : Y → X Überlagerung, X,Y wegzusammenhängend. p stetig und surjektiv, zu x ∈ X∃
-Umgebung U , so dass p−1(U) =
-
-⋃
-Vj
-
-p|Vj : Vj → U Homöomorphismus.
-
-Bemerkung 55
-Wege in X lassen sich zu Wegen in Y liften.
-
-Zu jedem y ∈ p−1(γ(0)) gibt es genau einen Lift von γ.
-
-
-
-57 3.3. ÜBERLAGERUNGEN
-
-Proposition 3.3
-Seien p : Y → X eine Überlagerung, a, b ∈ X, γ0, γ1 : I → X homotope Wege von a
-nach b, ã ∈ p−1(a), γ̃0, γ̃1 Liftungen von γ0 bzw. γ1 mit γ̃i(0) = ã.
-
-Dann ist γ̃0(1) = γ̃1(1) und γ̃0 ∼ γ̃1.
-
-Beweis: Sei H : I × I → X Homotopie zwischen γ1 und γ2.
-
-Für s ∈ I sei γs : I → X, t 7→ H(t, s).
-
-Sei γ̃s Lift von γs mit γ̃s(0) = ã
-
-Sei H̃ : I × I → Y, H̃(t, s) := (γ̃s(t), s)
-
-Dann gilt:
-
-(i) H̃ ist stetig (Beweis wie für Bemerkung 54)
-
-(ii) H̃(t, 0) = γ̃0(t), H̃(t, 1) = γ̃1(t)
-
-(iii) H̃(0, s) = γ̃s(0) = ã
-
-(iv) H̃(1, s) ∈ p−1(b)
-
-Da p−1(b) diskrete Teilmenge von Y ist
-⇒ b̃s = H̃(1, s) = H̃(1, 0) ∀s ∈ I
-⇒ b̃0 = b̃1 und H̃ ist Homotopie zwischen γ̃0 und γ̃1. �
-
-Folgerung 3.4
-Sei p : Y → X eine Überlagerung, x0 ∈ X, y0 ∈ p−1(x0)
-
-a) p∗ : π1(Y, y0)→ π1(X,x0) ist injektiv
-
-b) [π1(X,x0) : p∗(π1(Y, y0))] = deg(p)
-
-Beweis:
-
-a) Sei γ̃ ein Weg in Y um y0 und p∗([γ̃]) = e, also p ◦ γ̃ ∼ γx0
-Nach Proposition 3.3 ist dann γ̃ homotop zum Lift des konstanten Wegs γx0 mit
-Anfangspunkt y0, also zu γy0 ⇒ [γ̃] = e
-
-b) Sei d = deg p und p−1(x0) = { y0, y1, . . . , yd−1 }. Für einen geschlossenen Weg γ in X
-um x0 sei γ̃ die Liftung mit γ̃(0) = y0.
-
-γ̃(1) ∈ { y0, . . . , yd−1 } hängt nur von [γ] ∈ π1(X,x0) ab.
-
-Für geschlossene Wege γ0, γ1 um x gilt:
-
-γ̃0(1) = γ̃1(1)
-
-⇔[γ̃0 ∗ γ̃1
-−1] ∈ π1(Y, y0)
-
-⇔[γ0 ∗ γ−1
-1 ] ∈ p∗(π1(Y, y0))
-
-⇔[γ0] und [γ1]liegen in der selben Nebenklasse bzgl. p∗(π1(Y, y0))
-
-
-
-58 3.3. ÜBERLAGERUNGEN
-
-Zu i ∈ { 0, . . . , d− 1 } gibt es Weg δi in Y mit δi(0) = y0 und δi(1) = yi
-⇒ p ∪ δi ist geschlossener Weg in X um x0.
-⇒ Jedes yi mit i = 0, . . . , d− 1 ist γ̃(1) für ein [γ] ∈ π1(X,x0).
-
-Bemerkung 56
-Sei p : Y → X Überlagerung und X einfach zusammenhängend.
-
-Dann ist p ein Homöomorphismus.
-
-Beweis: Wegen Bemerkung 55.a ist auch Y einfach zusammenhängend und wegen Bemer-
-kung 55.b ist deg(p) = 1, p ist also bijektiv.
-
-Nach Bemerkung 51 ist p offen ⇒ p−1 ist stetig. ⇒ p ist Homöomorphismus. �
-
-Definition 52
-Eine Überlagerung p : X̃ → X heißt universell, wenn X̃ einfach zusammenhängend ist.
-
-Beispiel 38 (Universelle Überlagerungen)
-R→ S1, t 7→ (cos 2πt, sin 2πt)
-
-R2 → T 2 = R2/Z2
-
-Sn → Pn(R) für n ≥ 2
-
-Satz 3.5
-Sei p : X̃ → X eine universelle Überlagerung, q : Y → X weitere Überlagerung.
-
-Sei x0 ∈ X, x̃0 ∈ X̃, y0 ∈ Y mit q(y0) = x0 = p(x̃0).
-
-Dann gibt es genau eine Überlagerung p̃ : X̃ → Y mit p̃(x̃0) = y0.
-
-Beweis: Sei z ∈ X̃, γz : I → X̃ ein Weg von x̃0 nach z.
-
-Sei δz die eindeutige Liftung von p ◦ γz nach Y mit δz(0) = y0.
-
-Setze p̃(z) = δz(1).
-
-Da X̃ einfach zusammenhängend ist, hängt p̃(z) nicht vom gewählten Weg γz ab.
-
-Offensichtlich ist q(p̃(z)) = p(z).
-
-Zu zeigen: p̃ ist stetig in z ∈ X̃:
-
-Sei W ⊆ Y offene Umgebung von p̃(z).
-q offen
-====⇒ q(W ) ist offene Umgebung von p(z) · d(p̃(z)).
-
-Sei U ⊆ q(W ) offen wie in Definition 48 und V ⊆ q−1(U) die Komponente, die p̃(z) enthält.
-
-O. B. d. A. sei V ⊆W .
-
-Sei Z := p−1(U). Für u ∈ Z sei δ ein Weg in Z von z nach u.
-
-⇒ γz ∗ δ ist Weg von x0 nach u
-⇒ p̃(u) ∈ V
-⇒ Z ⊆ ˜p−1(W )
-⇒ p̃ ist stetig
-
-
-
-59 3.3. ÜBERLAGERUNGEN
-
-Folgerung 3.6
-Sind p : X̃ → X und q : Ỹ → X universelle Überlagerungen, so sind X̃ und Ỹ homöomorph.
-
-Beweis: Seien x0 ∈ X, x̃0 ∈ X̃ mit p(x̃0) = x0 und ỹ0 ∈ q−1(x0) ⊆ Ỹ .
-
-Nach Satz 3.5 gibt es genau eine Überlagerung
-
-f : X̃ → Ỹ mit f(x0) = ỹ0 und q ◦ f = p
-
-und genau eine Überlagerung
-
-g : Ỹ → X̃ mit g(ỹ0) = x̃0 und p ◦ g = q
-
-Damit gilt: p ◦ q ◦ f = q ◦ f = p, q ◦ f ◦ g = p ◦ g = q. Also ist g ◦ f : X̃ → X̃ Lift von
-p : X̃ → X mit (g ◦ f)(x̃0) = x̃0.
-
-Da auch idx̃ diese Eigenschaft hat, folgt mit Bemerkung 53: g ◦ f = idX̃ .
-Analog gilt f ◦ g = idỸ . �
-
-Die Frage, wann es eine universelle Überlagerung gibt, beantwortet der folgende Satz:
-Definition 53
-
-Sei (X,T) ein topologischer Raum und x ∈ X.
-
-U ⊆ T heißt eineUmgebungsbasis von x, wenn jede offene Umgebung von x eine Teilmenge
-von U enthält.
-
-Satz 3.7
-Es sei X ein wegzusammenhängender topologischer Raum in dem jeder Punkt eine
-Umgebungsbasis aus einfach zusammenhängenden Mengen hat.
-
-Dann gibt es eine universelle Überlagerung.
-
-Beweis: Sei x0 ∈ X und X̃ := { (x, [γ]) | x ∈ X, γ Weg von xo nach x } und p : X̃ → X, (x, [γ]) 7→
-x.
-
-Die Topologie auf X̃ ist folgende: Definiere eine Umgebungsbasis von (x, [γ]) wie folgt: Es
-sei U eine einfach zusammenhängende Umgebung von x und
-
-Ũ = Ũ(x, [γ]) := { (y, [γ ∗ α]) | y ∈ U,α Weg in U von x nach y }
-
-p ist Überlagerung: p|Ũ : Ũ → U bijektiv. p ist stetig und damit p|Ũ ein Homöomorphismus.
-
-Sind γ1, γ2 Wege von x0 nach x und γ1 ∼ γ2, so ist Ũ(x, [γ1]) ∩ Ũ(x, [γ2]) = ∅, denn: Ist
-γ1 ∗ α ∼ γ2 ∗ α, so ist auch γ1 ∼ γ2. Also ist p eine Überlagerung.
-
-X̃ ist einfach zusammenhängend: Es sei x̃0 := (x0, e) und γ̃ : I → X̃ ein geschlossener Weg
-um x̃0.
-
-Sei γ := p(γ̃).
-
-Annahme: [γ̃] 6= e
-
-Mit Bemerkung 55.a folgt dann: [γ] 6= e.
-
-Dann ist der Lift von γ nach x̃ mit Anfangspunkt x̃0 ein Weg von x̃0 nach (x0, [γ]). Wider-
-spruch.
-
-
-
-60 3.3. ÜBERLAGERUNGEN
-
-Definition 54
-Es sei p : Y → X eine Überlagerung und f : Y → Y ein Homöomorphismus.
-
-a) f heißt Decktransformation von p :⇔ p ◦ f = p.
-
-b) Die Decktransformationen von p : Y → X bilden mit der Verkettung eine Gruppe,
-
-die sog. Decktransformationsgruppe. Man schreibt: Deck(p), Deck(Y/X) oder
-Deck(Y → X).
-
-c) p heißt regulär, wenn |Deck(Y/X)| = deg p gilt.
-
-Bemerkung 57 (Eigenschaften der Decktransformation)
-a) (DeckY/X, ◦) ist eine Gruppe
-
-b) Ist f ∈ Deck(Y/X) und f 6= id, dann hat f keinen Fixpunkt.
-
-c) |Deck(Y/X)| ≤ deg p
-
-d) Ist f eine reguläre Überlagerung, dann gilt: ∀x ∈ X : Deck(Y/X) operiert transitiv
-auf der Menge der Urbilder f−1(x).
-
-Beweis:
-
-a) Es gilt:
-
-• idY ∈ DeckY/X,
-
-• f, g ∈ DeckY/X ⇒ p ◦ (f ◦ g) = (p ◦ f) ◦ g = p ◦ g ⇒ f ◦ g ∈ DeckY/X
-
-• f ∈ DeckY/X ⇒ p ◦ f = p ⇒ p ◦ f−1 = (p ◦ f) ◦ f−1 = p ◦ (f ◦ f−1) = p ⇒
-f−1 ∈ DeckY/X
-
-b) Die Menge
-Fix(f) = { y ∈ Y | f(y) = y }
-
-ist abgeschlossen als Urbild der Diagonale ∆ ⊆ Y × Y unter der stetigen Abbildung
-y 7→ (f(y), y). Außerdem ist Fix(f) offen, denn ist y ∈ Fix(f), so sei U eine Umgebung
-von p(y) ∈ X wie in Definition 48 und U ⊆ p−1(U) die Komponente, die y enthält;
-also p : V → U ein Homöomorphismus. Dann ist W := f−1(V ) ∩ V offene Umgebung
-von y.
-
-Für z ∈ W ist f(z) ∈ V und p(f(z)) = p(z). Da p injektiv auf V ist, folgt f(z) = z,
-d. h. Fix(f) 6= ∅.
-Da Y zusammenhängend ist, folgt aus Fix(f̃) 6= ∅ schon Fix(f) = Y , also f = idY .
-
-c) Es sei x0 ∈ X, deg(p) = d und p−1(x0) = { y0, . . . , yd−1 }. Für f ∈ Deck(Y/X) ist
-f(y0) = { y0, . . . , yd−1 }.
-Zu i ∈ { 0, . . . , d− 1 } gibt es höchstens ein f ∈ Deck(Y/X) mit f(y0) = y1, denn ist
-f(y0) = g(y0), so ist (g−1 ◦ f)(y0) = y0, also nach Bemerkung 57.c g−1 ◦ f = idY .
-
-d) Wenn jemand den Beweis macht, bitte an info@martin-thoma.de schicken.
-
-Beispiel 39 (Decktransformationen)
-1) p : R→ S1 : Deck(R/S1) = { t 7→ t+ n | n ∈ Z } ∼= Z
-
-2) p : R2 → T 2 : Deck(R2/T 2) ∼= Z× Z = Z2
-
-3) p : Sn → Pn(R) : Deck(Sn/Pn(R)) = { x 7→ ±x } ∼= Z/2Z
-
-
-
-61 3.3. ÜBERLAGERUNGEN
-
-Nun werden wir eine Verbindung zwischen der Decktransformationsgruppe und der Fundamen-
-talgruppe herstellen:
-
-Satz 3.8
-Ist p : X̃ → X eine universelle Überlagerung, so gilt:
-
-Deck(X̃/X) ∼= π1(X,x0) ∀x0 ∈ X
-
-Beweis: Wähle x̃0 ∈ p−1(x0). Es sei ρ : Deck(x̃/x)→ π1(X,x0) die Abbildung, die f auf [p(γf )]
-abbildet, wobei γf ein Weg von x̃0 nach f(x̃0) sei. Da x̃ einfach zusammenhängend ist, ist
-γf bis auf Homotopie eindeutig bestimmt und damit auch ρ wohldefiniert.
-
-• ρ ist Gruppenhomomorphismus: Seien f, g ∈ Deck(X̃/X) ⇒ γg◦f = γg ∗ g(γf ) ⇒
-p(γg◦f ) = p(γg) ∗ (p ◦ g)︸ ︷︷ ︸
-
-=p
-
-(γf ) = ρ(g) 6= ρ(f)
-
-• ρ ist injektiv: ρ(f) = e ⇒ p(γf ) ∼ γx0
-Satz 3.2
-====⇒ γf ∼ γx̃0 ⇒ f(x0) = x̃0
-
-Bem. 57.c
-======⇒ f =
-
-idx̃.
-
-• ρ ist surjektiv: Sei [γ] ∈ π1(X,x0), γ̃ Lift von γ nach x̃ mit Anfangspunkt x̃0. Der
-Endpunkt von γ̃ sei x̃1.
-
-p ist reguläre Überlagerung: Seien x̃0, x̃1 ∈ X̃ mit p(x̃0) = p(x̃1). Nach Satz 3.5 gibt
-es genau eine Überlagerung p̃ : X̃ → X mit p = p ◦ p̃ und p̃(x̃0) = x̃1. Somit ist p̃ eine
-Decktransformation und damit p eine reguläre Überlagerung.
-
-Da p reguläre Überlagerung ist, gibt es ein f ∈ Deck(X̃/X) mit f(x̃0) = x̃1.
-
-Aus der Definition von ρ folgt: ρ(f) = p(γf ) = γ
-
-�
-
-Beispiel 40 (Bestimmung von π1(S
-1))
-
-p : R→ S1, t 7→ (cos 2πt, sin 2πt) ist universelle Überlagerung, da R zusammenhängend ist.
-
-Für n ∈ Z sei fn : R→ R, t 7→ t+ n die Translation um n.
-
-Es gilt: (p ◦ fn)(t) = p(fn(t)) = p(t) ∀t ∈ R, d. h. fn ist Decktransformation.
-
-Ist umgekehrt g irgendeine Decktransformation, so gilt insbesondere für t = 0:
-
-(cos(2πg(0)), sin(2πg(0))) = (p ◦ g)(0) = p(0) = (1, 0)
-
-Es existiert n ∈ Z mit g(0) = n. Da auch fn(0) = 0 + n = n gilt, folgt mit Bemerkung 57.c
-g = fn. Damit folgt:
-
-Deck(R/S1) = { fn | n ∈ Z } ∼= Z
-
-Nach Satz 3.8 also π1(S1) ∼= Deck(R/S1) ∼= Z
-
-
-
-62 3.4. GRUPPENOPERATIONEN
-
-3.4 Gruppenoperationen
-
-Definition 55
-Sei (G, ·) eine Gruppe und X eine Menge.
-
-Eine Gruppenoperation von G auf X ist eine Abbildung ◦ : G×X → X für die gilt:
-
-a) 1G ◦ x = x ∀x ∈ X
-b) (g · h) ◦ x = g ◦ (h ◦ x) ∀g, h ∈ G∀x ∈ X
-
-Beispiel 41
-1) G = (Z,+), X = R, n ◦ x = x+ n
-
-2) G operiert auf X = G durch g ◦ h := g · h
-3) G operiert auf X = G durch g ◦ h := g · h · g−1, denn
-
-i) 1G ◦ h = 1G · h · 1−1
-G = h
-
-ii) (g1 · g2) ◦ h = (g1 · g2) · h · (g · g2)−1
-
-= g1 · (g2 · h · g−1
-2 ) · g−1
-
-1
-
-= g1 ◦ (g2 ◦ h)
-
-Definition 56
-Sei G eine Gruppe, X ein topologischer Raum und ◦ : G×X → X eine Gruppenoperation.
-
-a) G operiert durch Homöomorphismen, wenn für jedes g ∈ G die Abbildung
-
-mg : X → X,x 7→ g ◦ x
-
-ein Homöomorphismus ist.
-
-b) Ist G eine topologische Gruppe, so heißt die Gruppenoperation ◦ stetig, wenn
-
-∀g ∈ G : mg ist stetig
-
-gilt.
-
-Bemerkung 58
-Jede stetige Gruppenoperation ist eine Gruppenoperation durch Homöomorphismen.
-
-Beweis: Nach Voraussetzung ist mg := ◦|{ g }×X : X → X,x 7→ g ◦ x stetig.
-
-Die Umkehrabbildung zu mg ist mg−1 :
-
-(mg−1 ◦mg)(x) = mg−1(mg(x))
-
-= mg−1(g ◦ x)
-
-= g−1 ◦ (g ◦ x)
-
-Def. 55.b
-= (g−1 · g) ◦ x
-= 1G ◦ x
-
-Def. 55.a
-= x
-
-Beispiel 42
-In Beispiel 41.1 operiert Z durch Homöomorphismen.
-
-
-
-63 3.4. GRUPPENOPERATIONEN
-
-Bemerkung 59
-Sei G eine Gruppe und X eine Menge.
-
-a) Die Gruppenoperation vonG aufX entsprechen bijektiv den Gruppenhomomorphismen
-% : G→ Perm(X) = Sym(X) = { f : X → X | f ist bijektiv }
-
-b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen durch
-Homöomorphismus den Gruppenhomomorphismen G→ Homöo(X)
-
-Beweis:
-
-Sei ◦ : G × X → X eine Gruppenoperation von G auf X. Dann sei % : G → Perm(X)
-definiert durch %(g)(X) = g · x ∀g ∈ G, x ∈ X, also %(g) = mg.
-
-% ist Homomorphismus: %(g1 · g2) = mg1·g2 = mg1 ◦mg2 = %(g1) ◦ %(g2), denn für x ∈ X :
-%(g1 · g2)(x) = (g1 · g2) ◦ x = g1 ◦ (g2 ◦ x) = %(g1)(%(g2)(x)) = (%(g1) ◦ %(g2))(x)
-
-Umgekehrt: Sei % : G → Perm(X) Gruppenhomomorphismus. Definiere ◦ : G × X → X
-durch g ◦ x = %(g)(x).
-
-z. z. Definition 55.b:
-
-g1 ◦ (g2 ◦ x) = %(g1)(g2 ◦ x)
-
-= %(g1)(%(g2)(x))
-
-= (%(g1) ◦ %(g2))(x)
-
-% ist Hom.
-= %(g1 · g2)(x)
-
-= (g1 · g2) ◦ x
-
-z. z. Definition 55.a: 1G · x = %(1G)(x) = idX(x) = x, weil % ein Homomorphismus ist.
-Beispiel 43
-
-Sei X ein wegzusammenhängender topologischer Raum, p : X̃ → X eine universelle Überla-
-gerung, x0 ∈ X, x̃0 ∈ X̃ mit p(x̃0) = x0.
-
-Dann operiert π1(X,x0) auf X̃ durch Homöomorphismen wie folgt:
-
-Für [γ] ∈ π1(X,x0) und x̃ ∈ X̃ sei [γ] ◦ x̃ = ˜γ ∗ %(1) wobei γ̃ ein Weg von x̃0 nach x̃ in X̃
-sei, % := p(δ̃) = p ◦ δ.
-
-Also: δ ist ein Weg in X von x0 nach x = p(x̃) und γ̃ ∗ δ die Liftung von γ ∗ δ mit
-Anfangspunkt x̃0.
-
-[γ] · x̃ hängt nicht von der Wahl von γ̃ ab; ist γ̃′ ein anderer Weg von x̃0 nach x̃, so sind δ̃
-und δ̃′ homotop, also auch γ̃ ∗ δ und γ̃ ∗ δ′ homotop.
-
-Gruppenoperation, denn:
-
-i) [e] ◦ x̃ = ẽ ∗ δ = x̃
-
-ii) ˜γ1 ∗ γ2 ∗ δ(1) = [γ1 ∗ γ2] ◦ x̃ = ([γ1] ∗ [γ2]) ◦ x̃
-γ1 ∗ γ2 ∗ δ(1) = [γ1] ◦ ( ˜γ2 ∗ δ)(1) = [γ1] ◦ ([γ2] ◦ x̃)
-
-Erinnerung:Die Konstruktion aus Bemerkung 59 induziert zu der Gruppenoperation π1(X,x0)
-aus Beispiel 43 einen Gruppenhomomorphismus % : π1(X,x0)→ Homöo(X). Nach Satz 3.8 ist
-
-%(π1(X,x0)) = Deck(X̃/X)
-
-=
-{
-f : X̃ → X̃ Homöomorphismus
-
-∣∣∣ p ◦ f = p
-}
-
-
-
-64 3.4. GRUPPENOPERATIONEN
-
-Beispiel 44
-Sei X := S2 ⊆ R3 und τ die Drehung um die z-Achse um 180◦.
-
-g = 〈τ〉 = { id, τ } operiert auf S2 durch Homöomorphismen.
-
-Frage: Was ist S2/G? Ist S2/G eine Mannigfaltigkeit?
-
-
-
-4 Euklidische und nichteuklidische
-Geometrie
-
-Definition 57
-Das Tripel (X, d,G) heißt genau dann eine Geometrie, wenn (X, d) ein metrischer Raum
-
-und ∅ 6= G ⊆ P(X) gilt. Dann heißt G die Menge aller Geraden.
-
-4.1 Axiome für die euklidische Ebene
-
-Axiome bilden die Grundbausteine jeder mathematischen Theorie. Eine Sammlung aus Axiomen
-nennt man Axiomensystem. Da der Begriff des Axiomensystems so grundlegend ist, hat man
-auch ein paar sehr grundlegende Forderungen an ihn: Axiomensysteme sollen widerspruchsfrei
-sein, die Axiome sollen möglichst unabhängig sein und Vollständigkeit wäre auch toll. Mit
-Unabhängigkeit ist gemeint, dass kein Axiom sich aus einem anderem herleiten lässt. Dies scheint
-auf den ersten Blick eine einfache Eigenschaft zu sein. Auf den zweiten Blick muss man jedoch
-einsehen, dass das Parallelenproblem, also die Frage ob das Parallelenaxiom unabhängig von
-den restlichen Axiomen ist, über 2000 Jahre nicht gelöst wurde. Ein ganz anderes Kaliber ist
-die Frage nach der Vollständigkeit. Ein Axiomensystem gilt als Vollständig, wenn jede Aussage
-innerhalb des Systems verifizierbar oder falsifizierbar ist. Interessant ist hierbei der Gödelsche
-Unvollständigkeitssatz, der z. B. für die Arithmetik beweist, dass nicht alle Aussagen formal
-bewiesen oder widerlegt werden können.
-
-Kehren wir nun jedoch zurück zur Geometrie. Euklid hat in seiner Abhandlung „Die Elemente“
-ein Axiomensystem für die Geometrie aufgestellt.
-
-Euklids Axiome
-
-• Strecke zwischen je zwei Punkten
-
-• Jede Strecke bestimmt genau eine Gerade
-
-• Kreis (um jeden Punkt mit jedem Radius)
-
-• Je zwei rechte Winkel sind gleich (Isometrie, Bewegung)
-
-• Parallelenaxiom von Euklid:
-Wird eine Gerade so von zwei Geraden geschnitten, dass die Summe der Innenwinkel
-kleiner als zwei Rechte ist, dann schneiden sich diese Geraden auf der Seite dieser Winkel.
-
-Man mache sich klar, dass das nur dann nicht der Fall ist, wenn beide Geraden par-
-allel sind und senkrecht auf die erste stehen.
-
-Definition 58
-Eine euklidische Ebene ist eine Geometrie (X, d,G), die Axiome §1 - §5 erfüllt:
-
-§1) Inzidenzaxiome:
-
-
-
-66 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-(i) Zu P 6= Q ∈ X gibt es genau ein g ∈ G mit { P,Q } ⊆ g.
-(ii) |g| ≥ 2 ∀g ∈ G
-(iii) X /∈ G
-
-§2) Abstandsaxiom: Zu P,Q,R ∈ X gibt es genau dann ein g ∈ G mit { P,Q,R } ⊆ g,
-wenn gilt:
-
-• d(P,R) = d(P,Q) + d(Q,R) oder
-
-• d(P,Q) = d(P,R) + d(R,Q) oder
-
-• d(Q,R) = d(Q,P ) + d(P,R)
-
-Definition 59
-Sei (X, d,G) eine Geometrie und seien P,Q,R ∈ X.
-
-a) P,Q,R liegen kollinear, wenn es g ∈ G gibt mit { P,Q,R } ⊆ g.
-
-b) Q liegt zwischen P und R, wenn d(P,R) = d(P,Q) + d(Q,R)
-
-c) Strecke PR := {Q ∈ X | Q liegt zwischen P und R }
-
-d) Halbgeraden:
-PR+ := {Q ∈ X|Q liegt zwischen P und R oder
-
-R liegt zwischen P und Q}
-PR− := {Q ∈ X | P liegt zwischen Q und R }
-
-P R
-
-PR− PR
-
-PR+
-
-Abbildung 4.1: Halbgeraden
-
-Bemerkung 60
-a) PR+ ∪ PR− = PR
-
-b) PR+ ∩ PR− = { P }
-
-Beweis:
-
-a) „⊆“ folgt direkt aus der Definition von PR+ und PR−
-
-„⊇“: Sei Q ∈ PR⇒ P,Q,R sind kollinear.
-
-2⇒
-
-
-Q liegt zwischen P und R⇒ Q ∈ PR
-R liegt zwischen P und Q⇒ Q ∈ PR
-P liegt zwischen Q und R⇒ Q ∈ PR
-
-b) „⊇“ ist offensichtlich
-„⊆“: Sei PR+ ∩ PR−. Dann ist d(Q,R) = d(P,Q) + d(P,R) weil Q ∈ PR− und{
-
-d(P,R) = d(P,Q) + d(Q,R) oder
-d(P,Q) = d(P,R) + d(R,Q)
-
-}
-
-
-
-67 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-⇒ d(Q,R) = 2d(P,Q) + d(Q,R)
-
-⇒ d(P,Q) = 0
-
-⇒ P = Q
-
-d(P,Q) = 2d(P,R) + d(P,Q)
-
-⇒ P = R
-
-⇒Widerspruch
-
-Definition 60
-§3) Anordnungsaxiome
-
-(i) Zu jeder Halbgerade H mit Anfangspunkt P ∈ X und jedem r ∈ R≥0 gibt es
-genau ein Q ∈ H mit d(P,Q) = r.
-
-(ii) Jede Gerade zerlegt X \g = H1 ∪̇H2 in zwei nichtleere Teilmengen H1, H2, sodass
-für alle A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j.
-
-Diese Teilmengen Hi heißen Halbebenen bzgl. g.
-
-§4) Bewegungsaxiom: Zu P,Q, P ′, Q′ ∈ X mit d(P,Q) = d(P ′, Q′) gibt es mindestens
-2 Isometrien ϕ1, ϕ2 mit ϕi(P ) = P ′ und ϕi(Q) = Q′ mit i = 1, 2.1
-
-§5) Parallelenaxiom: Zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g gibt es
-höchstens ein h ∈ G mit P ∈ h und h ∩ g = ∅. h heißt Parallele zu g durch P .
-
-Satz 4.1 (Satz von Pasch)
-Seien P , Q, R nicht kollinear, g ∈ G mit g ∩ { P,Q,R } = ∅ und g ∩ PQ 6= ∅.
-
-Dann ist entweder g ∩ PR 6= ∅ oder g ∩QR 6= ∅.
-
-Dieser Satz besagt, dass Geraden, die eine Seite eines Dreiecks (also nicht nur eine Ecke)
-schneiden, auch eine weitere Seite schneiden.
-
-Beweis: g ∩ PQ 6= ∅
-3(ii)⇒ P und Q liegen in verschiedenen Halbebenen bzgl. g
-⇒ o. B. d. A. R und P liegen in verschieden Halbebenen bzgl. g
-⇒ g ∩RP 6= ∅
-
-Bemerkung 61
-Sei P,Q ∈ X mit P 6= Q sowie A,B ∈ X \ PQ mit A 6= B. Außerdem seien A und B in der
-selben Halbebene bzgl. PQ sowie Q und B in der selben Halbebene bzgl. PA.
-
-Dann gilt: PB+ ∩AQ 6= ∅
-
-Auch Bemerkung 61 lässt sich umgangssprachlich sehr viel einfacher ausdrücken: Die Diagonalen
-eines konvexen Vierecks schneiden sich.
-
-Beweis: Sei P ′ ∈ PQ−, P ′ 6= P
-Satz 4.1
-====⇒ PB schneidet AP ′ ∪AQ
-
-Sei C der Schnittpunkt. Dann gilt:
-
-1Die „Verschiebung“ von P ′Q′ nach PQ und die Isometrie, die zusätzlich an der Gerade durch P und Q spiegelt.
-
-
-
-68 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-P
-P ′
-
-Q
-
-A B
-
-C
-
-Abbildung 4.2: Situation aus Bemerkung 61
-
-(i) C ∈ PB+, denn A und B liegen in derselben Halbebene bzgl. PQ = P ′Q, also auch
-AP ′ und AQ.
-
-(ii) C liegt in derselben Halbebene bzgl. PA wie B, weil das für Q gilt.
-
-AP ′ liegt in der anderen Halbebene bzgl. PA⇒ C /∈ P ′A⇒ C ∈ AQ
-Da C ∈ PB+ und C ∈ AQ folgt nun direkt: ∅ 6= { C } ⊆ PB+ ∩AQ �
-
-Bemerkung 62
-Seien P,Q ∈ X mit P 6= Q und A,B ∈ X \PQ in der selben Halbebene bzgl. PQ. Außerdem
-sei d(A,P ) = d(B,P ) und d(A,Q) = d(B,Q).
-
-Dann ist A = B.
-
-P
-
-Q
-
-A
-
-B
-
-Abbildung 4.3: Bemerkung 62: Die beiden roten und die beiden blauen Linien sind gleich lang.
-Intuitiv weiß man, dass daraus folgt, dass A = B gilt.
-
-Beweis: durch Widerspruch
-Annahme: A 6= B
-
-Dann ist B /∈ (PA ∪QA) wegen §2.
-
-1. Fall: Q und B liegen in derselben Halbebene bzgl. PA
-Bem. 61
-=====⇒ PB+ ∩AQ 6= ∅.
-Sei C der Schnittpunkt vom PB und AQ.
-
-Dann gilt:
-
-(i) d(A,C) + d(C,Q) = d(A,Q)
-Vor.
-= d(B,Q) < d(B,C) + d(C,Q)⇒ d(A,C) < d(B,C)
-
-
-
-69 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-P Q
-
-B
-
-C
-
-A
-
-(a) 1. Fall
-
-P
-
-Q
-
-AB
-
-(b) 2. Fall
-
-Abbildung 4.4: Fallunterscheidung aus Bemerkung 62
-
-(ii) a) B liegt zwischen P und C.
-
-d(P,A) + d(A,C) > d(P,C) = d(P,B) + d(B,C) = d(P,A) + d(B,C) ⇒
-d(A,C) > d(B,C)⇒ Widerspruch zu Punkt (i)
-
-b) C liegt zwischen P und B
-
-d(P,C) + d(C,A) > d(P,A) = d(P,B) = d(P,C) + d(C,B)
-⇒ d(C,A) > d(C,B)
-⇒ Widerspruch zu Punkt (i)
-
-2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. PA.
-
-Dann liegen A und Q in derselben Halbebene bzgl. PB.
-
-Tausche A und B ⇒ Fall 1 �
-Bemerkung 63
-
-Sei (X, d,G) eine Geometrie, die §1 - §3 erfüllt, P,Q ∈ X mit P 6= Q und ϕ eine Isometrie
-mit ϕ(P ) = P und ϕ(Q) = Q.
-
-Dann gilt ϕ(S) = S ∀S ∈ PQ.
-
-Beweis:
-
-O. B. d. A. sei S ∈ PQ 2⇔ d(P,Q) = d(P, S) + d(S,Q)
-
-ϕ∈Iso(X)⇒ d(ϕ(P ), ϕ(Q)) = d(ϕ(P ), ϕ(S)) + d(ϕ(S), ϕ(Q))
-
-P,Q∈Fix(ϕ)⇒ d(P,Q) = d(P,ϕ(S)) + d(ϕ(S), Q)
-
-⇒ ϕ(S) liegt zwischen P und Q
-⇒ d(P, S) = d(ϕ(P ), ϕ(S)) = d(P,ϕ(S))
-
-3(i)⇒ ϕ(S) = S
-
-�
-
-Proposition 4.2
-In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P ′, Q,Q′ mit d(P,Q) = d(P ′, Q′)
-höchstens zwei Isometrien mit ϕ(P ) = P ′ und ϕ(Q) = Q′
-
-
-
-70 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit
-ϕi(P ) = P ′ und ϕi(Q) = Q′ gibt.
-
-Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P ) = P ′, ϕi(Q) = Q′ mit i = 1, 2, 3.
-
-Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen:
-
-(Teil i) ∃R ∈ X \ PQ mit ϕ1(R) = ϕ2(R).
-
-(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX .
-
-Aus (Teil i) und (Teil ii) folgt, dass ϕ−1
-2 ◦ϕ1 = idX , also ϕ2 = ϕ1, da P , Q und R in diesem
-
-Fall Fixpunkte sind.
-
-Nun zu den Beweisen der Teilaussagen:
-
-(Teil i) Sei R ∈ X \ PQ. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben
-Halbebene bzgl. P ′Q′ = ϕi(PQ).
-
-O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene.
-
-Es gilt: d(P ′, ϕ1(R)) = d(ϕ1(P ), ϕ1(R))
-
-= d(P,R)
-
-= d(ϕ2(P ), ϕ2(R))
-
-= d(P ′, ϕ2(R))
-und analog d(Q′, ϕ1(R)) = d(Q′, ϕ2(R))
-
-(Teil ii) Seien P , Q und R Fixpunkte von ϕ, R /∈ PQ und A /∈ PQ ∪ PR ∪ QR. Sei B ∈
-PQ \ { P,Q }. Dann ist ϕ(B) = B wegen Bemerkung 63.
-
-Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ Bem. 63
-=====⇒ ϕ(A) = A.
-
-P B Q
-
-C
-
-RA
-
-Abbildung 4.5: P,Q,R sind Fixpunkte, B ∈ PQ \ { P,Q }, A /∈ PQ ∪ PR ∪QR
-
-Ist R /∈ AB, so ist AB ∩ PR 6= ∅ oder AB ∈ RQ 6= ∅ nach Satz 4.1. Der Schnittpunkt
-C ist dann Fixpunkt von ϕ′ nach Bemerkung 63 ⇒ ϕ(A) = A.
-
-Bemerkung 64 (SWS-Kongruenzsatz)
-Sei (X, d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A′B′C ′
-Dreiecke, für die gilt:
-
-(i) d(A,B) = d(A′, B′)
-
-(ii) ∠CAB ∼= ∠C ′A′B′
-
-
-
-71 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-(iii) d(A,C) = d(A′, C ′)
-
-Dann ist 4ABC kongruent zu 4A′B′C ′ .
-
-Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(A′C ′+) = AC+ und ϕ(A′B′+) = AB+. Diese
-Isometrie existiert wegen Punkt §4.
-
-⇒ C ∈ ϕ(A′C ′+) und B ∈ ϕ(A′B′+).
-
-d(A′, C ′) = d(ϕ(A′), ϕ(C ′)) = d(A,ϕ(C ′))
-3(i)
-==⇒ ϕ(C ′) = C
-
-d(A′, B′) = d(ϕ(A′), ϕ(B′)) = d(A,ϕ(B′))
-3(i)
-==⇒ ϕ(B′) = B
-
-Also gilt insbesondere ϕ(4A′B′C ′) = 4ABC. �
-
-Bemerkung 65 (WSW-Kongruenzsatz)
-Sei (X, d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A′B′C ′
-Dreiecke, für die gilt:
-
-(i) d(A,B) = d(A′, B′)
-
-(ii) ∠CAB ∼= ∠C ′A′B′
-
-(iii) ∠ABC ∼= ∠A′B′C ′
-
-Dann ist 4ABC kongruent zu 4A′B′C ′ .
-
-Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(B′) = B und ϕ(C ′) liegt in der selben Halbebene
-bzgl. AB wie C. Diese Isometrie existiert wegen §4.
-
-Aus ∠CAB = ∠C ′A′B′ = ∠ϕ(C ′)ϕ(A′)ϕ(B′) = ∠ϕ(C ′)AB folgt, dass ϕ(C ′) ∈ AC+.
-Analog folgt aus ∠ABC = ∠A′B′C ′ = ∠ϕ(A′)ϕ(B′)ϕ(C ′) = ∠ABϕ(C ′), dass ϕ(C ′) ∈
-BC+.
-
-Dann gilt ϕ(C ′) ∈ AC ∩BC = { C } ⇒ ϕ(C ′) = C.
-
-Es gilt also ϕ(4A′B′C ′) = 4ABC. �
-
-Definition 61
-a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P .
-
-Man schreibt: ∠R1PR2 bzw. ∠R2PR1
-2
-
-b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den
-anderen abbildet.
-
-c) ∠R′1P ′R′2 heißt kleiner als ∠R1PR2, wenn es eine Isometrie ϕ gibt, mit ϕ(P ′) = P ,
-ϕ(P ′R′+1 ) = PR+
-
-1 und ϕ(R′2) liegt in der gleichen Halbebene bzgl. PR1 wie R2 und in
-der gleichen Halbebene bzgl. PR2 wie R1
-
-d) Im Dreieck 4PQR gibt es Innenwinkel und Außenwinkel.
-
-Bemerkung 66
-In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel.
-
-Beweis: Zeige ∠PRQ < ∠RQP ′.
-
-Sei M der Mittelpunkt der Strecke QR und P ′ ∈ PQ+ \ PQ. Sei A ∈MP− mit d(P,M) =
-d(M,A).
-
-2Für dieses Skript gilt: ∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel ≤ 180◦.
-
-
-
-72 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-P R′1 R1
-
-R′2
-
-R2
-
-(a) ∠R′1P ′R′2 ist kleiner als ∠R1PR2,
-vgl. Definition 61.c
-
-P
-
-Q R
-
-(b) Innenwinkel und Außenwin-
-kel in 4PQR, vgl. Definiti-
-on 61.d
-
-Abbildung 4.6: Situation aus Definition 61
-
-Q M
-
-A
-
-P
-
-R
-
-(a) Parallelogramm AQPR
-
-α
-
-β
-
-R
-
-Q P
-
-(b) Innen- und Außenwin-
-kel von 4PQR
-
-Abbildung 4.7: Situation aus Bemerkung 66
-
-Es gilt: d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ ⇒ 4MRQ
-ist kongruent zu 4AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet,
-bildet R auf Q und P auf A ab.
-
-⇒ ∠MQA = ∠MRP = ∠QRP = ∠PRQ.
-
-Noch zu zeigen: ∠MQA < ∠RQP ′, denn A liegt in der selben Halbebene bzgl. PQ wie M .
-
-Proposition 4.3 (Existenz der Parallelen)
-Sei (X, d,G) eine Geometrie mit den Axiomen §1 - §4.
-
-Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine
-Parallele h ∈ G mit P ∈ h und g ∩ h = ∅.
-
-Beweis: Seien P,Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P ′ ∈ f mit
-d(P, P ′) = d(P,Q) abbildet und die Halbebenen bzgl. f erhält.
-
-
-
-73 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-Q
-
-h
-
-f
-
-g
-
-P
-
-Abbildung 4.8: Situation aus Proposition 4.3
-
-Annahme: ϕ(g) ∩ g 6= ∅
-⇒ Es gibt einen Schnittpunkt {R } = ϕ(g) ∩ g.
-Dann ist ∠RQP = ∠RQP ′ < ∠RPP ′ nach Bemerkung 66 und ∠RQP = ∠RPP ′, weil
-ϕ(∠RQP ) = ∠RPP ′.
-⇒ Widerspruch
-⇒ ϕ(g) ∩ g = ∅ �
-
-Folgerung 4.4
-Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π.
-
-D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP+) = PR+, sodass ϕ(R) in der gleichen
-Halbebene bzgl. PQ liegt wie R.
-
-Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die
-beiden Halbgeraden bilden eine Gerade.
-
-Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie,
-Dreiecke mit drei 90◦-Winkeln.
-
-Proposition 4.5
-In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der
-Innenwinkel ≤ π.
-
-
-
-74 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE
-
-Sei im Folgenden „IWS“ die „Innenwinkelsumme“.
-
-Beweis: Sei 4 ein Dreieck mit IWS(4) = π + ε
-
-α
-β
-
-γ
-
-P
-
-(a) Summe der Winkel α, β und γ
-
-α1
-α2 β
-
-γ
-M
-
-A B
-
-C A′
-
-α
-
-(b) Situation aus Proposition 4.5
-
-Abbildung 4.10: Situation aus Proposition 4.5
-
-Sei α ein Innenwinkel von 4.
-
-Beh.: Es gibt ein Dreieck 4′ mit IWS(4′) = IWS(4) und einem Innenwinkel α′ ≤ α
-2 .
-
-Dann gibt es für jedes n ein 4n mit IWS(4n) = IWS(4) und Innenwinkel α′ ≤ α
-2n . Für
-
-α
-2n < ε ist dann die Summe der beiden Innenwinkel um 4n größer als π ⇒ Widerspruch
-zu Folgerung 4.4.
-
-Beweis: Es seien A,B,C ∈ X und 4 das Dreieck mit den Eckpunkten A,B,C und α sei
-der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C.
-
-Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM .
-
-Sei weiter A′ ∈MA− mit d(A′,M) = d(A,M).
-
-Die Situation ist in Abbildung 4.10b skizziert.
-
-⇒ 4(MA′C) und 4(MAB) sind kongruent. ⇒ ∠ABM = ∠A′CM und ∠MA′C =
-∠MAB.⇒ α+β+γ = IWS(4ABC) = IWS(4AA′C) und α1 +α2 = α, also o. B. d. A.
-α1 ≤ α
-
-2
-
-Bemerkung 67
-In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π.
-
-α′
-α′′
-
-α β
-
-β′
-
-γ
-
-A B
-
-C
-g
-
-Abbildung 4.11: Situation aus Bemerkung 67
-
-Beweis: Sei g eine Parallele von AB durch C.
-
-• Es gilt α′ = α wegen Proposition 4.3.
-
-• Es gilt β′ = β wegen Proposition 4.3.
-
-• Es gilt α′′ = α′ wegen Aufgabe 8.
-
-
-
-75 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-
-⇒ IWS(4ABC) = γ + α′′ + β′ = π
-
-Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich
-π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW.
-
-4.2 Weitere Eigenschaften einer euklidischen Ebene
-
-Satz 4.6 (Strahlensatz)
-In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich.
-
-x
-
-y
-
-−1 0 1 2 3 4
-0
-
-1
-
-2
-
-3
-
-z
-
-x
-
-λ2z
-
-λ2x
-
-Abbildung 4.12: Strahlensatz
-
-Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar.
-
-A B′
-
-C ′
-
-B
-
-C
-
-c
-
-b a
-
-c′
-
-b′
-
-a′
-
-Abbildung 4.13: Die Dreiecke 4ABC und 4AB′C ′ sind ähnlich.
-
-4.2.1 Flächeninhalt
-
-Definition 62
-„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in
-kongruente Dreiecke zerlegen lassen.
-
-
-
-76 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-
-(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Drei-
-ecke
-
-Abbildung 4.14: Flächengleichheit
-
-Der Flächeninhalt eines Dreiecks ist 1/2 ·Grundseite ·Höhe.
-
-A B
-
-C
-
-LC
-
-hc
-
-c
-
-(a) 1/2 · |AB| · |hc|
-
-·
-
-A B
-
-C
-
-LA
-
-ha
-
-c
-
-(b) 1/2 · |BC| · |ha|
-
-Abbildung 4.15: Flächenberechnung im Dreieck
-
-Zu zeigen: Unabhängigkeit von der gewählten Grundseite.
-
-α
-
-α
-
-γ
-
-γ
-
-A B
-
-C
-
-LA
-
-LC
-
-Abbildung 4.16: 4ABLa und 4CLCB sind ähnlich, weil IWS = π
-
-Strahlensatz
-=======⇒ a
-
-hc
-= c
-
-ha
-→ a · ha = c · hc
-
-Satz 4.7 (Satz des Pythagoras)
-Im rechtwinkligen Dreieck gilt a2 + b2 = c2, wobei c die Hypotenuse und a, b die beiden
-Katheten sind.
-
-Beweis: (a+ b) · (a+ b) = a2 + 2ab+ b2 = c2 + 4 · (1
-2 · a · b)
-
-
-
-77 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE
-
-c
-
-b a
-
-A B
-
-C
-·
-
-(a) a, b sind Katheten und c ist die Hypo-
-tenuse
-
-b a
-
-b
-
-a
-
-ba
-
-b
-
-a
-
-·
-
-··
-
-·
-
-γ
-
-(b) Beweisskizze
-
-Abbildung 4.17: Satz des Pythagoras
-
-Satz 4.8
-Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d,G), nämlich X = R2,
-d = euklidischer Abstand, G = Menge der üblichen Geraden.
-
-Beweis:
-
-(i) (R2, dEuklid) ist offensichtlich eine euklidische Ebene.
-
-(ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0
-im rechten Winkel schneiden.
-
-Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl.
-Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2.
-
-Sei xP := d(PX , 0) und yP := d(PY , 0).
-
-In Abbildung 4.19 wurde die Situation skizziert.
-
-Sei h : X → R2 eine Abbildung mit h(P ) := (xP , yP ) Dadurch wird h auf dem
-Quadranten definiert, in dem P liegt, d. h.
-
-∀Q ∈ X mit PQ ∩ g1 = ∅ = PQ ∩ g2
-
-Fortsetzung auf ganz X durch konsistente Vorzeichenwahl.
-
-Im Folgenden werden zwei Aussagen gezeigt:
-
-(i) h ist surjektiv
-
-(ii) h ist eine Isometrie
-
-Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist.
-
-Nun zu den Beweisen der Teilaussagen:
-
-
-
-78 4.3. HYPERBOLISCHE GEOMETRIE
-
-·
-g1
-
-g2
-
-P
-
-X
-
-(a) Schritt 1
-
-·
-g1
-
-g2
-
-xP
-
-yP
-
-P
-
-0 PX
-
-PY
-
-X
-
-(b) Schritt 2
-
-Abbildung 4.18: Beweis zu Satz 4.8
-
-(i) Sei (x, y) ∈ R2, z. B. x ≥ 0, y ≥ 0. Sei P ′ ∈ g1 mit d(0, P ′) = x und P ′ auf der
-gleichen Seite von g2 wie P .
-
-g1
-
-g2
-
-xP
-
-yP
-
-P
-
-Q
-
-0
-
-R
-
-X
-
-Abbildung 4.19: Beweis zu Satz 4.8
-
-(ii) Zu Zeigen: d(P,Q) = d(h(P ), h(Q))
-
-d(P,Q)2 Pythagoras
-= d(P,R)2 + d(R,Q)2 = (yQ − yP )2 + (xQ − xP )2.
-
-h(Q) = (xQ, yQ)
-
-4.3 Hyperbolische Geometrie
-
-Definition 63
-Sei
-
-H := { z ∈ C | =(z) > 0 } =
-{
-
-(x, y) ∈ R2
-∣∣ y > 0
-
-}
-
-
-
-79 4.3. HYPERBOLISCHE GEOMETRIE
-
-die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪G2 mit
-
-G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z −m| = r } }
-G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : <(z) = x } }
-
-Die Elemente aus G heißen hyperbolische Geraden.
-
-Bemerkung 68 (Eigenschaften der hyperbolischen Geraden)
-Die hyperbolischen Geraden erfüllen. . .
-
-a) . . . die Inzidenzaxiome §1
-
-b) . . . das Anordnungsaxiom §3 (ii)
-
-c) . . . nicht das Parallelenaxiom §5
-
-Beweis:
-
-a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt:
-Gegeben z1, z2 ∈ H
-Existenz:
-
-Fall 1 <(z1) = <(z2)
-⇒ z1 und z2 liegen auf
-
-g = { z ∈ C | <(z) = <(z1) ∧H }
-
-Siehe Abbildung 4.20a.
-
-Fall 2 <(z1) 6= <(z2)
-Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech-
-te zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten
-zu z1 und z2 sind gleich weit von z1 und z2 entfernt. Daher ist der Schnittpunkt mit
-der x-Achse der Mittelpunkt eines Kreises durch z1 und z2 (vgl. Abbildung 4.20b)
-
-x
-
-y
-
-−1 0 1 2 3 4 5
-0
-
-1
-
-2
-
-3
-
-4
-
-Z1
-
-Z2
-
-<(Z1)
-
-(a) Fall 1
-
-x
-
-y
-
-−1 0 1 2 3 4 5
-0
-
-1
-
-2
-
-3
-
-4
-
-Z1
-
-Z2
-
-(b) Fall 2
-
-Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer
-Geraden
-
-b) Sei g ∈ G1 ∪̇G2 eine hyperbolische Gerade.
-
-
-
-80 4.3. HYPERBOLISCHE GEOMETRIE
-
-Es existieren disjunkte Zerlegungen von H \ g:
-Fall 1: g = { z ∈ H ‖ z −m| = r } ∈ G1
-
-Dann gilt:
-H = { z ∈ H ‖ z −m| < r }︸ ︷︷ ︸
-
-=:H1 (Kreisinneres)
-
-∪̇ { z ∈ H ‖ z −m| > r }︸ ︷︷ ︸
-=:H2 (Kreisäußeres)
-
-Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer.
-
-Fall 2: g = { z ∈ H | <z = x } ∈ G2
-
-Die disjunkte Zerlegung ist:
-
-H = { z ∈ H | <(z) < x }︸ ︷︷ ︸
-=:H1 (Links)
-
-∪̇ { z ∈ H | <(z) > x }︸ ︷︷ ︸
-=:H2 (Rechts)
-
-Zu zeigen: ∀A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j
-
-„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g 6= ∅
-Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand
-von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der
-größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige
-Abbildung f : R→ R>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g 6= ∅
-„⇒“: A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g 6= ∅ ⇒ i 6= j
-
-Sei h die Gerade, die durch A und B geht.
-
-Da A,B /∈ g, aber A,B ∈ h gilt, haben g und h insbesondere mindestens einen
-unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt
-schneiden. Sei C dieser Punkt.
-
-Aus A,B /∈ g folgt: C 6= A und C 6= B. Also liegt C zwischen A und B. Daraus folgt,
-dass A und B bzgl. g in verschiedenen Halbebenen liegen.
-
-c) Siehe Abbildung 4.21.
-
-x
-
-y
-
-−5 −4 −3 −2 −1 0 1 2 3 4 5 6
-0
-
-1
-
-2
-
-3
-
-4
-
-5
-
-Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht.
-
-
-
-81 4.3. HYPERBOLISCHE GEOMETRIE
-
-Definition 64
-Es seien a, b, c, d ∈ R mit ad− bc 6= 0 und σ : C→ C eine Abbildung definiert durch
-
-σ(z) :=
-az + b
-
-cz + d
-
-σ heißt Möbiustransformation.
-
-Proposition 4.9
-a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation
-
-σ(z) :=
-
-(
-a b
-c d
-
-)
-◦ z :=
-
-az + b
-
-cz + d
-
-b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H.
-
-c) PSL2(R) operiert auf R ∪ {∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h.
-zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1,
-σ(x∞) =∞.
-
-d) SL2(R) wird von den Matrizen(
-λ 0
-0 λ−1
-
-)
-︸ ︷︷ ︸
-
-=:Aλ
-
-,
-
-(
-1 t
-0 1
-
-)
-︸ ︷︷ ︸
-
-=:Bt
-
-und
-(
-
-0 1
-−1 0
-
-)
-︸ ︷︷ ︸
-
-=:C
-
-mit t, λ ∈ R×
-
-erzeugt.
-
-e) PSL2(R) operiert auf G.
-
-Beweis:
-
-a) Sei z = x+ iy ∈ H, d. h. y > 0 und σ =
-
-(
-a b
-c d
-
-)
-∈ SL2(R)
-
-⇒ σ(z) =
-a(x+ iy) + b
-
-c(x+ iy) + d
-
-=
-(ax+ b) + iay
-
-(cx+ d) + icy
-· (cx+ d)− icy
-
-(cx+ d)− icy
-
-=
-(ax+ b)(cx+ d) + aycy
-
-(cx+ d)2 + (cy)2
-+ i
-
-ay(cx+ d)− (ax+ b)cy
-
-(cx+ d)2 + (cy)2
-
-=
-axcx+ axd+ bcx+ bd+ aycy
-
-(cx+ d)2 + (cy)2
-+ i
-
-(ad− bc)y
-(cx+ d)2 + (cy)2
-
-SL2(R)
-=
-
-ac(x2 + y2) + adx+ bcx+ bd
-
-(cx+ d)2 + (cy)2
-+ i
-
-y
-
-(cx+ d)2 + (cy)2
-
-⇒ =(σ(z)) = y
-(cx+d)2+(cy)2
-
-> 0
-
-Die Abbildung bildet also nach H ab. Außerdem gilt:(
-1 0
-0 1
-
-)
-◦ z =
-
-x+ iy
-
-1
-= x+ iy = z
-
-
-
-82 4.3. HYPERBOLISCHE GEOMETRIE
-
-und (
-a b
-c d
-
-)
-◦
-((
-
-a′ b′
-
-c′ d′
-
-)
-◦ z
-)
-
-=
-
-(
-a b
-c d
-
-)
-◦ a
-′z + b′
-
-c′z + d′
-
-=
-aa
-′z+b′
-
-c′z+d′ + b
-
-ca
-′z+b′
-
-c′z+d′ + d
-
-=
-
-a(a′z+b′)+b(c′z+d′)
-c′z+d′
-
-c(a′z+b′)+d(c′z+d′)
-c′z+d′
-
-=
-a(a′z + b′) + b(c′z + d′)
-
-c(a′z + b′) + d(c′z + d′)
-
-=
-(aa′ + bc′)z + ab′ + bd′
-
-(ca′ + db′)z + cb′ + dd′
-
-=
-
-(
-aa′ + bc′ ab′ + bd′
-
-ca′ + db′ cb′ + dd′
-
-)
-◦ z
-
-=
-
-((
-a b
-c d
-
-)
-·
-(
-a′ b′
-
-c′ d′
-
-))
-◦ z
-
-b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H.
-
-c) Ansatz: σ =
-
-(
-a b
-c d
-
-)
-σ(x0) = ax0+b
-
-cx0+d
-!
-
-= 0 ⇒ ax0 + b = 0⇒ b = −ax0
-
-σ(x∞) =∞⇒ cx∞ + d = 0⇒ d = −cx∞
-σ(x1) = 1⇒ ax1 + b = cx1 + d
-a(x1 − x0) = c(x1 − x∞)⇒ c = a x1−x0
-
-x1−x∞
-⇒ −a2 · x∞ x1−x0
-
-x1−x∞ + a2x0
-x1−x0
-x1−x∞ = 1
-
-⇒ a2 x1−x0
-x0−x∞ (x0 − x∞) = 1 ⇒ a2 = x1−x∞
-
-(x1−x∞)(x1−x0)
-
-d) Es gilt:
-
-A−1
-λ = A 1
-
-λ
-
-B−1
-t = B−t
-
-C−1 = C3
-
-Daher genügt es zu zeigen, dass man mit Aλ, Bt und C alle Matrizen aus SL2(R)
-erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit
-Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren.
-
-Sei also
-
-M =
-
-(
-a b
-c d
-
-)
-∈ SL2(R)
-
-beliebig.
-
-Fall 1: a = 0
-Da M ∈ SL2(R) ist, gilt detM = 1 = ad− bc = −bc. Daher ist insbesondere c 6= 0. Es
-folgt:
-
-(
-0 1
-−1 0
-
-)
-·
-(
-a b
-c d
-
-)
-=
-
-(
-c d
-−a −b
-
-)
-
-
-
-83 4.3. HYPERBOLISCHE GEOMETRIE
-
-Gehe zu Fall 2.
-
-Fall 2: a 6= 0
-Nun wird in M durch M ·A 1
-
-a
-an der Stelle von a eine 1 erzeugt:
-
-(
-a b
-c d
-
-)
-·
-(
-
-1
-a 0
-0 a
-
-)
-=
-
-(
-1 ab
-c
-a ad
-
-)
-
-Gehe zu Fall 3.
-
-Fall 3: a = 1 (
-1 b
-c d
-
-)
-·
-(
-
-1 −b
-0 1
-
-)
-=
-
-(
-1 0
-c d− bc
-
-)
-Da wir detM = 1 = ad− bc = d− bc wissen, gilt sogar M2,2 = 1.
-
-Gehe zu Fall 4.
-
-Fall 4: a = 1, b = 0, d = 1
-
-A−1CBcC
-
-(
-1 0
-c 1
-
-)
-=
-
-(
-1 0
-0 1
-
-)
-Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R. �
-
-e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen.
-
-• σ =
-
-(
-λ 0
-0 λ−1
-
-)
-, also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in
-
-Abbildung 4.22a und Abbildung 4.22b dargestellt sind.
-
-x
-
-y
-
-−1 0 1 2 3 4 5 6 7
-0
-
-1
-
-2
-
-3
-
-m λ2m
-
-m+ ir
-
-λ2m+ iλ2r
-
-m+ 1
-
-(a) Fall 1
-
-x
-
-y
-
-−1 0 1 2 3 4
-0
-
-1
-
-2
-
-3
-
-z
-
-x
-
-λ2z
-
-λ2x
-
-(b) Fall 2 (Strahlensatz)
-
-Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix
-
-• Offensichtlich gilt die Aussage für σ =
-
-(
-1 a
-0 1
-
-)
-• Sei nun σ =
-
-(
-0 1
-−1 0
-
-)
-, also σ(z) = −1
-
-z
-
-Bemerkung 69
-Zu hyperbolischen Geraden g1, g2 gibt es σ ∈ PSL2(R) mit σ(g1) = g2.
-
-
-
-84 4.3. HYPERBOLISCHE GEOMETRIE
-
-·
-
-x
-
-y
-
-−1 0 1
-0
-
-1
-
-z = r · eiϕ
-
-1
-z = 1
-
-r · eiϕ
-
-Abbildung 4.23: Inversion am Kreis
-
-Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert
-σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt.
-
-Definition 65
-Seien z1, z2, z3, z4 ∈ C paarweise verschieden.
-
-Dann heißt
-
-DV(z1, z2, z3, z4) :=
-z1−z4
-z1−z2
-z3−z4
-z3−z2
-
-=
-(z1 − z4) · (z3 − z2)
-
-(z1 − z2) · (z3 − z4)
-
-Doppelverhältnis von z1, . . . , z4.
-
-Bemerkung 70 (Eigenschaften des Doppelverhältnisses)
-a) DV(z1, . . . , z4) ∈ C \ { 0, 1 }
-b) DV(z1, z4, z3, z2) = 1
-
-DV(z1,z2,z3,z4)
-
-c) DV(z3, z2, z1, z4) = 1
-DV(z1,z2,z3,z4)
-
-d) DV ist auch wohldefiniert, wenn eines der zi =∞ oder wenn zwei der zi gleich sind.
-
-e) DV(0, 1,∞, z4) = z4 (Der Fall z4 ∈ { 0, 1,∞} ist zugelassen).
-f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ {∞ } ist
-
-DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
-
-und für σ(z) = 1
-z gilt
-
-DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4)
-
-g) DV(z1, z2, z3, z4) ∈ R ∪ {∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden.
-
-Beweis:
-
-a) DV(z1, . . . , z4) 6= 0, da zi paarweise verschieden
-DV(z1, . . . , z4) 6= 1, da:
-
-Annahme: DV(z1, . . . , z4) = 1
-
-⇔ (z1 − z2)(z3 − z4) = (z1 − z4)(z3 − z2)
-
-
-
-85 4.3. HYPERBOLISCHE GEOMETRIE
-
-⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4
-
-⇔ z2z3 + z1z4 = z3z4 + z1z2
-
-⇔ z2z3 − z3z4 = z1z2 − z1z4
-
-⇔ z3(z2 − z4) = z1(z2 − z4)
-
-⇔ z3 = z1 oder z2 = z4
-
-Alle zi sind paarweise verschieden ⇒ Widerspruch �
-
-b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4)
-(z1−z4)·(z3−z2) = 1
-
-DV(z1,z2,z3,z4)
-
-c) DV(z3, z2, z1, z4) = (z3−z4)·(z1−z2)
-(z3−z2)·(z1−z4) = 1
-
-DV(z1,z2,z3,z4)
-
-d) Zwei der zi dürfen gleich sein, da:
-
-Fall 1 z1 = z4 oder z3 = z2
-
-In diesem Fall ist DV(z1, . . . , z4) = 0
-
-Fall 2 z1 = z2 oder z3 = z4
-
-Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) =∞ gilt.
-
-Fall 3 z1 = z3 oder z2 = z4
-
-Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1.
-
-Im Fall, dass ein zi =∞ ist, ist entweder DV(0, 1,∞, z4) = 0 oder DV(0, 1,∞, z4)±∞
-
-e) DV(0, 1,∞, z4) = (0−z4)·(∞−1)
-(0−1)·(∞−z4) = z4·(∞−1)
-
-∞−z4 = z4
-
-f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-
-g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) =∞. Ein solches σ existiert, da man
-drei Parameter von σ wählen darf.
-
-Bem. 70.f⇒ DV(z1, . . . , z4) = DV(0, 1,∞, σ(z4))
-⇒ DV(z1, . . . , z4) ∈ R ∪ {∞ }
-⇔ σ(z4) ∈ R ∪ {∞ }
-Behauptung folgt, weil σ−1(R ∪∞) ein Kreis oder eine Gerade in C ist.
-
-Definition 66
-Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die
-„Schnittpunkte“ von gz1,z2 mit R ∪ {∞ }.
-Dann sei dH(z1, z2) := 1
-
-2 | ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik.
-
-Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2
-
-die „Schnittpunkte“ von gz1,z2 mit R ∪ {∞ }.
-Dann gilt:
-
-1
-
-2
-| ln DV(a1, z1, a2, z2)| = 1
-
-2
-| ln DV(a2, z1, a1, z2)|
-
-Beweis: Wegen Bemerkung 70.c gilt:
-
-DV(a1, z1, a2, z2) =
-1
-
-DV(a2, z1, a1, z2)
-
-Außerdem gilt:
-
-ln
-1
-
-x
-= lnx−1 = (−1) · lnx = − lnx
-
-
-
-86 4.3. HYPERBOLISCHE GEOMETRIE
-
-Da der ln im Betrag steht, folgt direkt:
-
-1
-
-2
-| ln DV(a1, z1, a2, z2)| = 1
-
-2
-| ln DV(a2, z1, a1, z2)|
-
-Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelver-
-hältnis genutzt werden. �
-
-Beh.: Die hyperbolische Metrik ist eine Metrik auf H.
-
-Beweis: Wegen Bemerkung 70.f ist
-
-d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) =∞
-
-d. h. σ(gz1,z2) = iR (imaginäre Achse).
-
-also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b.
-
-2d(ia, ib) =| ln DV(0, ia,∞, ib) |
-
-=| ln (0− ib)(∞− ia)
-
-(0− ia)(∞− ib) |
-
-=| ln b
-a
-|
-
-= ln b− ln a
-
-Also: d(z1, z2) ≥ 0, d(z1, z2) = 0⇔ z1 = z2
-
-2d(z2, z1) =| ln DV(a2, z2, a1, z1) |
-=| ln DV(∞, ib, 0, ia) |
-
-Bem. 70.b
-= | ln DV(0, ib,∞, ia) |
-= 2d(z1, z2)
-
-Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) =
-d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt).
-
-Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die
-Vorlesung „Hyperbolische Geometrie“ verwiesen.
-
-Satz 4.10
-Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen
-Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt,
-aber Axiom §5 ist verletzt.
-
-
-
-87 4.3. HYPERBOLISCHE GEOMETRIE
-
-Übungsaufgaben
-
-Aufgabe 8
-
-Seien (X, d) eine absolute Ebene und P,Q,R ∈ X Punkte. Der Scheitelwinkel des Winkels
-∠PQR ist der Winkel, der aus den Halbgeraden QP− und QR− gebildet wird. Die
-Nebenwinkel von ∠PQR sind die von QP+ und QR− bzw. QP− und QR+ gebildeten
-Winkel.
-
-Zeigen Sie:
-
-(a) Die beiden Nebenwinkel von ∠PQR sind gleich.
-
-(b) Der Winkel ∠PQR ist gleich seinem Scheitelwinkel.
-
-Aufgabe 9
-
-Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von
-Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y .
-
-Zeigen Sie:
-
-(a) Ist 4ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die
-Winkel ∠ABC und ∠BCA gleich.
-
-(b) Ist 4ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel
-gegenüber und umgekehrt.
-
-(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit
-
-P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g
-und der Schnittpunkt des Lots mit g heißt Lotfußpunkt .
-
-Aufgabe 10
-
-Seien f, g, h ∈ G und paarweise verschieden.
-
-Zeigen Sie: f ‖ g ∧ g ‖ h⇒ f ‖ h
-
-Aufgabe 11
-
-Beweise den Kongruenzsatz SSS.
-
-
-
-5 Krümmung
-
-Definition 67
-Sei f : [a, b]→ Rn eine eine Funktion aus C∞. Dann heißt f Kurve.
-
-5.1 Krümmung von Kurven
-
-Definition 68
-Sei γ : I = [a, b]→ Rn eine Kurve.
-
-a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt:
-
-‖γ′(t)‖2 = 1 ∀t ∈ I
-
-Dabei ist γ′(t) = (γ′1(t), γ′2(t), . . . , γ′n(t)).
-
-b) l(γ) =
-∫ b
-a ‖γ′(t)‖dt heißt Länge von γ.
-
-Bemerkung 71 (Eigenschaften von Kurven I)
-Sei γ : I = [a, b]→ Rn eine C∞-Funktion.
-
-a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b− a.
-b) Ist γ durch Bogenlänge parametrisiert, so ist γ′(t) orthogonal zu γ′′(t) für alle t ∈ I.
-
-Beweis:
-
-a) l(γ) =
-∫ b
-a ‖γ′(t)‖dt =
-
-∫ b
-a 1dt = b− a.
-
-b) Im Folgenden wird die Aussage nur für γ : [a, b]→ R2 bewiesen. Allerdings funktioniert
-der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden.
-
-1 = ‖γ′(t)‖ = ‖γ′(t)‖2 = 〈γ′(t), γ′(t)〉
-
-⇒ 0 =
-d
-
-dt
-〈γ′(t), γ′(t)〉
-
-=
-d
-
-dt
-(γ′1(t)γ′1(t) + γ′2(t)γ′2(t))
-
-= 2 · (γ′′1 (t) · γ′1(t) + γ′′2 (t) · γ′2(t))
-
-= 2 · 〈γ′′(t), γ′(t)〉
-
-Definition 69
-Sei γ : I → R2 eine durch Bogenlänge parametrisierte Kurve.
-
-a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt:
-
-〈n(t), γ′(t)〉 = 0, ‖n(t)‖ = 1 und det((γ′(t), n(t))) = +1
-
-
-
-89 5.1. KRÜMMUNG VON KURVEN
-
-b) Seit κ : I → R so, dass gilt:
-γ′′(t) = κ(t) · n(t)
-
-Dann heißt κ(t) Krümmung von γ in t.
-
-Da n(t) und γ′′(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t).
-Beispiel 45
-
-Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt:
-
-γ(t) =
-
-(
-r · cos
-
-t
-
-r
-, r · sin t
-
-r
-
-)
-für t ∈ [0, 2πr]
-
-ist parametrisiert durch Bogenlänge, da gilt:
-
-γ′(t) =
-
-(
-(r · 1
-
-r
-)(− sin
-
-t
-
-r
-), r
-
-1
-
-r
-cos
-
-t
-
-r
-
-)
-=
-
-(
-− sin
-
-t
-
-r
-, cos
-
-t
-
-r
-
-)
-Der Normalenvektor von γ in t ist
-
-n(t) =
-
-(
-− cos
-
-t
-
-r
-,− sin
-
-t
-
-r
-
-)
-da gilt:
-
-〈n(t), γ′(t)〉 =
-
-〈(
-− cos t
-
-r
-− sin t
-
-r
-
-)
-,
-
-(
-− sin t
-
-r
-cos t
-
-r
-
-)〉
-= (− cos
-
-t
-
-r
-) · (− sin
-
-t
-
-r
-) + (− sin
-
-t
-
-r
-) · (cos
-
-t
-
-r
-)
-
-= 0
-
-‖n(t)‖ =
-
-∥∥∥∥(− cos
-t
-
-r
-,− sin
-
-t
-
-r
-)
-
-∥∥∥∥
-= (− cos
-
-t
-
-r
-)2 + (− sin
-
-t
-
-r
-)2
-
-= 1
-
-det(γ′1(t), n(t)) =
-
-∥∥∥∥(− sin t
-r − cos t
-
-r
-cos t
-
-r − sin t
-r
-
-)∥∥∥∥
-= (− sin
-
-t
-
-r
-)2 − (− cos
-
-t
-
-r
-) · cos
-
-t
-
-r
-= 1
-
-Die Krümmung ist für jedes t konstant 1
-r , da gilt:
-
-γ′′(t) =
-
-(
-−1
-
-r
-cos
-
-t
-
-r
-,−1
-
-r
-sin
-
-t
-
-r
-
-)
-=
-
-1
-
-r
-·
-(
-− cos
-
-t
-
-r
-,− sin
-
-t
-
-r
-
-)
-⇒ κ(t) =
-
-1
-
-r
-
-
-
-90 5.2. TANGENTIALEBENE
-
-Definition 70
-Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve.
-
-a) Für t ∈ I heißt κ(t) := ‖γ′′(t)‖ die Krümmung von γ in t.
-
-b) Ist für t ∈ I die Ableitung γ′′(t) 6= 0, so heißt γ′′(t)
-‖γ′′(t)‖ Normalenvektor an γ in t.
-
-c) b(t) sei ein Vektor, der γ′(t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt.
-Also gilt:
-
-det(γ′(t), n(t), b(t)) = 1
-
-b(t) heißt Binormalenvektor, die Orthonormalbasis{
-γ′(t), n(t), b(t)
-
-}
-heißt begleitendes Dreibein.
-
-Bemerkung 72 (Eigenschaften von Kurven II)
-Sei γ : I → R3 durch Bogenlänge parametrisierte Kurve.
-
-a) n(t) ist orthogonal zu γ′(t).
-
-b) b(t) aus Definition 70.c ist eindeutig.
-
-5.2 Tangentialebene
-
-Erinnerung Sie sich an Definition 32 „reguläre Fläche“.
-
-Äquivalent dazu ist: S ist lokal von der Form
-
-V (f) =
-{
-x ∈ R3
-
-∣∣ f(x) = 0
-}
-
-für eine C∞-Funktion f : R3 → R.
-
-Definition 71
-Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um
-s ∈ V :
-
-(u, v) 7→ (x(u, v), y(u, v), z(u, v))
-
-Für p = F−1(s) ∈ U sei
-
-JF (p) =
-
-∂x
-∂u(p) ∂x
-
-∂v (p)
-∂y
-∂u(p) ∂y
-
-∂v (p)
-∂z
-∂u(p) ∂z
-
-∂v (p)
-
-
-und DpF : R2 → R3 die durch JF (p) definierte lineare Abbildung.
-
-Dann heißt TsS := Bild(DpF ) die Tangentialebene an s ∈ S.
-
-Bemerkung 73 (Eigenschaften der Tangentialebene)
-a) TsS ist 2-dimensionaler Untervektorraum von R3.
-
-b) TsS = 〈ũ, ṽ〉, wobei ũ, ṽ die Spaltenvektoren der Jacobi-Matrix JF (p) sind.
-
-c) TsS hängt nicht von der gewählten Parametrisierung ab.
-
-
-
-91 5.2. TANGENTIALEBENE
-
-d) Sei S = V (f) eine reguläre Fläche in R3, also f : V → R eine C∞-Funktion, V ⊆ R3
-
-offen, grad(f)(x) 6= 0 für alle x ∈ S.
-Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S.
-
-Beweis:
-
-a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist
-eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein
-Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2.
-
-b) Hier kann man wie in Punkt a) argumentieren
-
-c) TsS = {x ∈ R3|∃parametrisierte Kurve γ : [−ε,+ε] → S für ein ε > 0 mit γ(0) =
-s und γ′(0) = x}
-Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-
-d) Sei x ∈ TsS, γ : [−ε,+ε] → S eine parametrisierte Kurve mit ε > 0 und γ′(0) = s,
-sodass γ′(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0
-⇒ 0 = (f ◦ γ)′(0) = 〈grad(f)(γ(0)), γ′(0)〉
-⇒ TsS ⊆ grad(f)(s)⊥
-
-dim=2
-====⇒ TsS = (grad(f)(s))⊥
-
-Definition 72
-a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3 ist eine Abbildung n : S → S2 ⊆
-
-R3 mit n(s) ∈ TsS⊥ für jedes s ∈ S.
-
-b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt.
-
-Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden.
-Im Folgenden werden diese Begriffe jedoch synonym benutzt.
-
-Bemerkung 74 (Eigenschaften von Normalenfeldern)
-a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞).
-
-b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3 von s und eine lokale Parametrisierung
-F : U → V von S um s, sodass auf F (U) = V ∩ S ein stetiges Normalenfeld existiert.
-
-c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen
-Parametrisierungen Fi : Ui → Vi, i ∈ I gibt, sodass für alle i, j ∈ F und alle
-s ∈ Vi ∩ Vj ∩ S gilt:
-
-det(Ds
-
-Vi→Vj︷ ︸︸ ︷
-Fj ◦ F−1
-
-i︸ ︷︷ ︸
-∈R3×3
-
-) > 0
-
-Beweis: Wird hier nicht geführt.
-
-Beispiel 46 (Normalenfelder)
-1) S = S2, n1 = idS2 ist ein stetiges Normalenfeld.
-
-Auch n2 = −idS2 ist ein stetiges Normalenfeld.
-
-2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma-
-lenfeld, aber kein stetiges Normalenfeld.
-
-
-
-92 5.3. GAUSS-KRÜMMUNG
-
-Abbildung 5.1: Möbiusband
-
-5.3 Gauß-Krümmung
-
-Bemerkung 75
-Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, ‖x‖ = 1.
-
-Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3.
-
-Dann gibt es eine Umgebung V ⊆ R3 von s, sodass
-
-C := (s+ E) ∩ S ∩ V
-
-das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε]→ S enthält mit γ(0) = s
-und γ′(0) = x.
-
-Beweis: „Satz über implizite Funktionen“1
-
-Definition 73
-In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in der Ebene
-(s+ E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ′(0).
-
-Man schreibt: κNor(s, x) := κγ(0)
-
-Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt.
-
-Beispiel 47 (Gauß-Krümmung)
-1) S = S2 = V (X2 + Y 2 + Z2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id,
-
-s = (0, 0, 1), x = (1, 0, 0)
-⇒ E = R · x+ R · n(s) (x, z-Ebene)
-
-C = E ∩ S ist Kreislinie
-κNor(s, x) = 1
-
-r = 1
-
-2) S = V (X2 + Z2 − 1) ⊆ R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0)
-x1 = (0, 1, 0)⇒ E1 = R · e1 + R · e2 (x, y-Ebene)
-S ∩ E1 = V (X2 + Y 2 − 1) ∩ E, Kreislinie in E
-⇒ κNor(s, x1) = ±1
-x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene)
-
-1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II
-
-https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II
-
-
-93 5.3. GAUSS-KRÜMMUNG
-
-V ∩ E2 ∩ S =
-{
-
-(1, 0, z) ∈ R3
-∣∣ z ∈ R
-
-}
-ist eine Gerade
-
-⇒ κNor(s, x2) = 0
-
-3) S = V (X2 − Y 2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b)
-x1 = (1, 0, 0), n(s) = (0, 0, 1)
-x2 = (0, 1, 0)
-κNor(s, x1) = 2
-κNor(s, x2) = −2
-
-−1.5
-−1 −0.5
-
-0 0.5
-1 1.5
-
-−1
-
-0
-
-1
-
-0
-
-1
-
-2
-
-3
-
-4
-
-5
-
-x
-
-y
-
-z
-
-(a) S = V (X2 + Z2 − 1)
-
-−2 −1.5 −1 −0.5 0 0.5 1 1.5 2
-
-−2
-
-−1
-
-0
-
-1
-
-2
-
-−2
-
-0
-
-2
-
-x
-
-y
-
-z
-
-−4
-
-−2
-
-0
-
-2
-
-4
-
-f(x, y)
-
-(b) S = V (X2 − Y 2 − Z)
-
-Abbildung 5.2: Beispiele für reguläre Flächen
-
-Definition 74
-Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S.
-
-γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und
-γ′′(0) 6= 0.
-
-Sei n(0) := γ′′(0)
-‖γ′′(0)‖ . Zerlege
-
-n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS)⊥
-
-Dann ist n(0)⊥ = 〈n(0), n(s)〉 · n(s)
-κNor(s, γ) := 〈γ′′(0), n(s)〉 die Normalkrümmung.
-
-Bemerkung 76
-Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ).
-
-Beweis: γ′′(0) = γ′′(0), da γ′(0) = −γ′(0).
-
-Es gilt: κNor(s, γ) hängt nur von |γ′(0)| ab und ist gleich κNor(s, γ′(0)).
-
-Bemerkung 77
-Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
-
-Sei T 1
-s S = { x ∈ TsS | ‖x‖ = 1 } ∼= S1. Dann ist
-
-κnNor(s) : T 1
-s S → R, x 7→ κNor(s, x)
-
-eine glatte Funktion und BildκnNor(s) ist ein abgeschlossenes Intervall.
-
-Definition 75
-Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s.
-
-
-
-94 5.3. GAUSS-KRÜMMUNG
-
-a) κn1 (s) : = min
-{
-κnNor(s, x)
-
-∣∣ x ∈ T 1
-s S
-}
-
-und
-
-κn2 (s) : = max
-{
-κnNor(s, x)
-
-∣∣ x ∈ T 1
-s S
-} heißen Hauptkrümmungen von S in s.
-
-b) K(s) := κn1 (s) · κn2 (s) heißt Gauß-Krümmung von S in s.
-
-Bemerkung 78
-Ersetzt man n durch −n, so gilt:
-
-κ−nNor(s, x) = −κnNor(x) ∀x ∈ T 1
-s S
-
-⇒ κ−n1 (s) = −κn2 (s)
-
-κ−n2 (s) = −κn1 (s)
-
-und K−n(s) = Kn(s) =: K(s)
-
-Beispiel 48
-1) S = S2. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S2
-
-⇒ K(s) = 1
-
-2) Zylinder:
-κ1(s) = 0, κ2(s) = 1⇒ K(s) = 0
-
-3) Sattelpunkt auf hyperbolischem Paraboloid:
-κ1(s) < 0, κ2(s) = 0→ K(s) < 0
-
-4) S = Torus. Siehe Abbildung 5.3
-
-s1
-
-s2
-
-s3
-
-Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0
-
-Bemerkung 79
-Sei S eine reguläre Fläche, s ∈ S ein Punkt.
-
-
-
-95 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-
-a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s.
-
-b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s.
-
-5.4 Erste und zweite Fundamentalform
-
-Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine
-lokale Parametrisierung von S um s. Weiter sei p := F−1(s).
-
-Definition 76
-Sei IS ∈ R2×2 definiert als
-
-IS : =
-
-(
-g1,1(s) g1,2(s)
-g1,2(s) g2,2(s)
-
-)
-=
-
-(
-E(s) F (s)
-F (s) G(s)
-
-)
-mit gi,j = gs(DpF (ei), DpF (ej))
-
-= 〈 ∂F
-∂ui
-
-(p),
-∂F
-
-∂uj
-(p)〉 i, j ∈ { 1, 2 }
-
-Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F .
-
-Bemerkung 80
-a) Die Einschränkung des Standardskalarproduktes des R3 auf TsS macht TsS zu einem
-
-euklidischen Vektorraum.
-
-b) {DpF (e1), DpF (e2) } ist eine Basis von TsS.
-
-c) Bzgl. der Basis {DpF (e1), DpF (e2) } hat das Standardskalarprodukt aus Bemer-
-kung 80.a die Darstellungsmatrix IS .
-
-d) gi,j(s) ist eine differenzierbare Funktion von s.
-
-Bemerkung 81
-
-det(IS) =
-
-∥∥∥∥ ∂F∂u1
-(p)× ∂F
-
-∂u2
-(p)
-
-∥∥∥∥2
-
-Beweis: Sei ∂F
-∂u1
-
-(p) =
-
-x1
-
-x2
-
-x3
-
- , ∂F
-∂u2
-
-(p) =
-
-y1
-
-y2
-
-y3
-
-
-Dann ist ∂F
-
-∂u1
-(p)× ∂F
-
-∂u2
-(p) =
-
-z1
-
-z2
-
-z3
-
- mit
-
-z1 = x2y3 − x3y2
-
-z2 = x3y1 − x1y3
-
-z3 = x1y2 − x2y1
-
-⇒ ‖ ∂F
-∂u1
-
-(p)× ∂F
-
-∂u2
-(p)‖ = z2
-
-1 + z2
-2 + z2
-
-3
-
-
-
-96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-
-det(IS) = g1,1g2,2 − g2
-1,2
-
-=
-
-〈x1
-
-x2
-
-x3
-
- ,
-
-x1
-
-x2
-
-x3
-
-〉〈y1
-
-y2
-
-y3
-
- ,
-
-y1
-
-y2
-
-y3
-
-〉−〈
-x1
-
-x2
-
-x3
-
- ,
-
-y1
-
-y2
-
-y3
-
-〉2
-
-= (x2
-1 + x2
-
-2 + x2
-3)(y2
-
-1 + y2
-2 + y2
-
-3)− (x1y1 + x2y2 + x3y3)2
-
-Definition 77
-
-a) Das Differential dA =
-√
-
-det(I)du1du2 heißt Flächenelement von S bzgl. der Para-
-metrisierung F .
-
-b) Für eine Funktion f : V → R heißt∫
-V
-fdA :=
-
-∫
-U
-f(F (u1, u2)︸ ︷︷ ︸
-
-=:s
-
-)
-√
-
-det I(s)du1du2
-
-der Wert des Integrals von f über V , falls das Integral rechts existiert.
-
-Bemerkung 82
-a)
-∫
-V fdA ist unabhängig von der gewählten Parametrisierung.
-
-b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist.
-
-Dann ist
-∫
-S fdA wohldefiniert, falls (z. B.) S kompakt ist.
-
-Etwa: ∫
-S
-fdA =
-
-n∑
-i=1
-
-∫
-Vi
-
-fdA
-
-−
-∑
-i 6=j
-
-∫
-Vi∩Vj
-fdA
-
-+
-∑
-i,j,k
-
-∫
-Vi∩Vj∩Vk
-fdA
-
-− . . .
-
-Beweis:
-
-a) Mit Transformationsformel.
-
-b) Ist dem Leser überlassen.
-
-Proposition 5.1
-Sei S ⊆ R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S2.
-Dann gilt:
-
-a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S
-2 durch
-
-dsn(x) =
-d
-
-dt
-n(s„+“tx︸ ︷︷ ︸
-
-Soll auf Fläche S bleiben
-
-)
-∣∣∣
-t=0
-
-Die Abbildung dsn heißt Weingarten-Abbildung
-
-
-
-97 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-
-b) Tn(s)S
-2 = TsS.
-
-c) dsn ist ein Endomorphismus von TsS.
-
-d) dsn ist selbstadjungiert bzgl. des Skalarproduktes IS .
-
-Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt.
-
-
-
-98 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-
-Beweis:
-
-a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken.
-
-b) Tn(S)S
-2 = 〈n(s)〉⊥ = TsS
-
-c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus.
-
-d) Zu zeigen: ∀x, y ∈ IsS : 〈x, dsn(y)〉 = 〈dsn(x), y〉
-Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die
-Basisvektoren zu zeigen.
-
-Sei xi = DpF (ei) = ∂F
-∂ui
-
-(p) i = 1, 2
-
-Beh.: 〈xi, dsn(xj)〉 = 〈 ∂2F
-∂ui∂uj
-
-(p), dsn(xi)〉
-
-⇒ 〈 ∂2F
-∂ui∂uj
-
-(p), dsn(xi)〉 = 〈xj , dsn(xi)〉
-
-Bew.: 0 = 〈∂F
-∂u
-
-(p+ tej), n(p+ tej)〉
-
-⇒ 0 =
-d
-
-dt
-
-(
-〈∂F
-∂u
-
-(p+ tej), n(p+ tej)〉
-)∣∣∣
-
-t=0
-
-= 〈 d
-
-dt
-
-∂F
-
-∂ui
-(p+ tej)︸ ︷︷ ︸
-
-∂2F
-∂uj∂ui
-
-(p)
-
-∣∣∣
-t=0
-
-, n(s)〉+ 〈xi, dsnDpF (ej)︸ ︷︷ ︸
-xj
-
-〉
-
-Definition 78
-Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fundamental-
-form von S in s bzgl. F .
-
-Man schreibt: IIs(x, y) = 〈−dsn(x), y〉 = Is(−dsn(x), y)
-
-Bemerkung 83
-Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix
-
-(h
-(s)
-i,j )i,j=1,2 mit hi,j(s) = 〈 ∂2F
-
-∂ui∂uj
-(p), n(s)〉
-
-Proposition 5.2
-Sei γ : [−ε, ε]→ S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt:
-
-κNor(s, γ) = IIs(γ
-′(0), γ′(0))
-
-Beweis: Nach Definition 74 ist κNor(s, γ) = 〈γ′′(0), n(s)〉. Nach Voraussetzung gilt
-
-n(γ(t)) ⊥ γ′(t)⇔ 〈γ′′(0), n(s)〉 = 0
-
-Die Ableitung nach t ergibt
-
-0 =
-d
-
-dt
-(〈n(γ(t)), γ′(t))
-
-=
-
-〈
-d
-
-dt
-n(γ(t))
-
-∣∣∣
-t=0
-
-, γ′(0)
-
-〉
-+ 〈n(s), γ′′(0)〉
-
-
-
-99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM
-
-= 〈dsn(γ′(0)), γ′(0)〉+ κNor(s, γ)
-
-= −IIs(γ′(0), γ′(0)) + κNor(s, γ)
-
-Folgerung 5.3
-Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein:
-
-κNor(s, γ) = κNor(s, γ
-′(0))
-
-Satz 5.4
-Sei S ⊆ R3 eine reguläre, orientierbare Fläche und s ∈ S.
-
-a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs.
-
-b) Für die Gauß-Krümmung gilt: K(s) = det(IIs)
-
-Beweis:
-
-a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von
-IIs. Ist x ∈ TsS, ‖x‖ = 1, so gibt es ϕ ∈ [0, 2π) mit x = cosϕ · y1 + sinϕ · y2.
-
-Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi, yi) = λi. Dann gilt:
-
-IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2
-
-= (1− sin2 ϕ)λ1 + sin2 ϕλ2
-
-= λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1
-
-= cos2 ϕ+ (1− cos2 ϕ)λ2
-
-= λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2
-
-Prop. 5.2
-=====⇒ λ1 = min
-
-{
-κNor(s, x)
-
-∣∣ x ∈ T 1
-s S
-}
-
-λ2 = max
-{
-κNor(s, x)
-
-∣∣ x ∈ T 1
-s S
-}
-
-Satz 5.5 (Satz von Gauß-Bonnet)
-Sei S ⊆ R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:∫
-
-S
-K(s)dA = 2πχ(S)
-
-Dabei ist χ(S) die Euler-Charakteristik von S.
-
-Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von
-Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden.
-
-
-
-Lösungen der Übungsaufgaben
-
-Lösung zu Aufgabe 1
-
-Teilaufgabe a) Es gilt:
-
-(i) ∅, X ∈ TX .
-
-(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈
-TX : U1 ∩ U2 ∈ TX .
-
-(iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine
-beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I :
-
-⋃
-i∈I Ui ∈ TX
-
-Also ist (X,TX) ein topologischer Raum.
-
-Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x 6= y und die einzige Umgebung von x
-ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden.
-(X,TX) ist also nicht hausdorffsch.
-
-Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,TX) nach
-(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X,TX)
-kein metrischer Raum sein kann.
-
-Lösung zu Aufgabe 2
-
-Teilaufgabe a)
-
-Beh.: ∀a ∈ Z : { a } ist abgeschlossen.
-Sei a ∈ Z beliebig. Dann gilt:
-
-Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de
-schicken.
-
-Teilaufgabe b)
-
-Beh.: { −1, 1 } ist nicht offen
-Bew.: durch Widerspruch
-
-Annahme: { −1, 1 } ist offen.
-Dann gibt es T ⊆ B, sodass
-
-⋃
-M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele
-
-Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente
-⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ {−1, 1 } ist
-nicht offen. �
-
-Teilaufgabe c)
-
-Beh.: Es gibt unendlich viele Primzahlen.
-
-
-
-101 Lösungen der Übungsaufgaben
-
-Bew.: durch Widerspruch
-
-Annahme: Es gibt nur endlich viele Primzahlen p ∈ P
-
-Dann ist
-Z \ { −1,+1 } FS d. Arithmetik
-
-=
-⋃
-p∈P
-
-U0,p
-
-endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich. �
-
-Lösung zu Aufgabe 3
-
-(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form∏
-j∈J
-
-Uj ×
-∏
-
-i∈N,i 6=j
-Pi
-
-wobei J ⊆ N endlich und Uj ⊆ Pj offen ist.
-
-Beweis: Nach Definition der Produkttopologie bilden Mengen der Form∏
-i∈J
-
-Uj ×
-∏
-i∈N\J
-
-Pi
-
-wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie.
-
-Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen
-Form. �
-
-(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig.
-
-Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs-
-komponente Z ⊆ P . Da Z zusammenhängend ist und ∀i ∈ I : pi : P → Pi ist
-stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden
-Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder
-pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für
-alle i ∈ N. Dann gilt also:
-
-pi(x)︸ ︷︷ ︸
-=xi
-
-= zi = pi(y)︸ ︷︷ ︸
-=yi
-
-∀i ∈ N
-
-Somit folgt: x = y �
-
-Lösung zu Aufgabe 4
-
-(a) Beh.: GLn(R) ist nicht kompakt.
-Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 }
-nicht kompakt. 22⇒ GLn(R) ist nicht kompakt. �
-
-(b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt.
-Bew.: Für SL1(R) gilt: SL1(R) =
-
-{
-A ∈ R1×1
-
-∣∣ detA = 1
-}
-
-=
-(
-1
-) ∼= { 1 }. 22⇒ SL1(R)
-
-ist kompakt.
-
-
-
-102 Lösungen der Übungsaufgaben
-
-SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1
-sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere
-nun für für n ∈ N≥2,m ∈ N:
-
-Am = diagn(m,
-1
-
-m
-, . . . , 1)
-
-Dann gilt: detAm = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da ‖Am‖∞ =
-m −−−−→
-
-m→∞
-∞. �
-
-(c) Beh.: P(R) ist kompakt.
-Bew.: P(R) ∼= Sn/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil-
-dung stetig. Da Sn als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt
-ist 22⇒ P(R) ist kompakt. �
-
-Lösung zu Aufgabe 5
-
-Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden.
-Definition 79
-
-Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G→ H eine Abbildung.
-
-ϕ heißt Homomorphismus, wenn
-
-∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2)
-
-gilt.
-
-Es folgt direkt:
-
-1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R,+). Dann ist ϕ1 ein
-Gruppenhomomorphismus und ein Homöomorphismus.
-
-2) Sei G = (Z,+) und H = (Z/3Z,+). Dann ist ϕ2 : G → H,x 7→ x mod 3 ein
-Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomor-
-phismus.
-
-3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine
-Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup-
-penhomomorphismus.
-
-Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten
-verwendet.
-
-Lösung zu Aufgabe 6
-
-Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf
-Seite 6.
-Definition 80
-
-Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G→ H eine Abbildung.
-
-ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist.
-
-Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen
-Sinn und ein Isomorphismus benötigt eine Gruppenstruktur.
-
-
-
-103 Lösungen der Übungsaufgaben
-
-Lösung zu Aufgabe 7
-
-(a) Vor.: Sei M eine topologische Mannigfaltigkeit.
-Beh.: M ist wegzusammehängend ⇔M ist zusammenhängend
-
-Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung
-direkt aus Bemerkung 23.
-
-„⇐“: Seien x, y ∈M und
-
-Z := { z ∈M | ∃Weg von x nach z }
-
-Es gilt:
-
-(i) Z 6= ∅, da M lokal wegzusammenhängend ist
-
-(ii) Z ist offen, da M lokal wegzusammenhängend ist
-
-(iii) ZC := { z̃ ∈M | @Weg von x nach z̃ } ist offen
-Da M eine Mannigfaltigkeit ist, existiert zu jedem z̃ ∈ ZC eine offene und
-wegzusammenhängende Umgebung Uz̃ ⊆M .
-
-Es gilt sogar Uz̃ ⊆ ZC , denn gäbe es ein Uz̃ 3 z ∈ Z, so gäbe es Wege γ2 :
-[0, 1] → M,γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M,γ1(0) = z̃, γ1(1) = z.
-Dann wäre aber
-
-γ : [0, 1]→M,
-
-γ(x) =
-
-{
-γ1(2x) falls 0 ≤ x ≤ 1
-
-2
-
-γ2(2x− 1) falls 1
-2 < x ≤ 1
-
-ein stetiger Weg von z̃ nach x ⇒ Widerspruch.
-
-DaM zusammenhängend ist undM = Z︸︷︷︸
-offen
-
-∪ ZC︸︷︷︸
-offen
-
-, sowie Z 6= ∅ folgt ZC = ∅.
-
-Also ist M = Z wegzusammenhängend. �
-
-(b) Beh.: X ist wegzusammenhängend.
-
-Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R.
-Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte
-01 und 02.
-
-Da (R\{ 0 })∪{ 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem
-beliebigen Punkt a ∈ R \ { 0 }.
-Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein
-Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach
-02. �
-
-Lösung zu Aufgabe 9
-
-Vor.: Sei (X, d) eine absolute Ebene, A,B,C ∈ X und 4ABC ein Dreieck.
-
-
-
-104 Lösungen der Übungsaufgaben
-
-(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB
-Bew.: Sei AB ∼= AC.
-⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A.
-⇒ ϕ(∠ABC) = ∠ACB
-⇒ ∠ABC ∼= ∠ACB �
-
-(b) Beh.: Der längeren Seite von 4ABC liegt der größere Winkel gegenüber und umge-
-kehrt.
-Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C ′ ∈ AC+ mit d(A,C ′) = d(A,B)
-⇒ C ′ liegt zwischen A und C.
-Es gilt ]ABC ′ < ]ABC und aus Aufgabe 9 (a) folgt: ]ABC ′ = ]AC ′B.
-∠BC ′A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66
-
-=====⇒ ]BC ′A > ]BCA
-⇒ ]BCA < ]BC ′A = ]ABC ′ < ]ABC Sei umgekehrt ]ABC > ]BCA, kann
-wegen 1. Teil von Aufgabe 9 (b) nicht d(A,B) > d(A,C) gelten.
-Wegen Aufgabe 9 (a) kann nicht d(A,B) = d(A,C) gelten.
-⇒ d(A,B) < d(A,C) �
-
-(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g
-Beh.: ∃! Lot
-Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden
-Halbebenen bzgl. g.
-⇒ ϕ(P )P schneidet g in F .
-
-Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g
-⇒ ϕ(P )P schneidet g in F .
-
-Sei A ∈ g\{ F }. Dann gilt ϕ(∠AFP ) = ∠AFϕ(P ) = π ⇒ ∠AFP ist rechter Winkel.
-
-Gäbe es nun G ∈ g \ { F }, so dass PG weiteres Lot von P auf g ist, wäre 4PFG
-ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4).
-
-·
-·
-
-A
-
-G
-
-P
-
-F
-
-g
-
-Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P
-
-Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π
-⇒ G gibt es nicht. �
-
-Lösung zu Aufgabe 10
-
-Sei f ‖ h und o. B. d. A. f ‖ g.
-f ∦ h⇒ f ∩ h 6= ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele
-zu g durch x, da x /∈ g. Diese ist f , da x ∈ f und f ‖ g. Da aber x ∈ h, kann h nicht
-
-
-
-105 Lösungen der Übungsaufgaben
-
-parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f 6= h).⇒ g ∦ h �
-
-Lösung zu Aufgabe 11
-
-Sei (X, d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem 4ABC und 4A′B′C ′
-Dreiecke, für die gilt:
-
-d(A,B) = d(A′, B′)
-
-d(A,C) = d(A′, C ′)
-
-d(B,C) = d(B′, C ′)
-
-Sei ϕ die Isometrie mit ϕ(A) = A′, ϕ(B) = B′ und ϕ(C ′) liegt in der selben Halbebene
-bzgl. AB wie C. Diese Isometrie existiert wegen §4.
-
-Es gilt d(A,C) = d(A′, C ′) = d(ϕ(A′), ϕ(C ′)) = d(A,ϕ(C ′)) und d(B,C) = d(B′, C ′) =
-d(ϕ(B′), ϕ(C ′)) = d(B,ϕ(C ′)).
-Bem. 62
-=====⇒ C = ϕ(C).
-
-Es gilt also ϕ(4A′B′C ′) = 4ABC. �
-
-
-
-Bildquellen
-
-Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt.
-
-Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert.
-
-Abb. 0.1a S2: Tom Bombadil, tex.stackexchange.com/a/42865
-
-Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069
-
-Abb. 0.1e T 2: Jake, tex.stackexchange.com/a/70979/5645
-
-Abb. 1.6 Stereographische Projektion: texample.net/tikz/examples/map-projections
-
-Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie:
-
-– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png
-
-– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
-
-– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png
-
-– 62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
-
-Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3)
-
-Abb. 1.13 Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring.
-png
-
-Abb. 2.1 Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration.
-png
-
-Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014.
-
-Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
-
-Abb. 3.10 Überlagerung von S1 mit R: Alex, tex.stackexchange.com/a/149706/5645
-
-Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint,
-commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png
-
-Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645
-
-Abb. 5.3 Krümmung des Torus: Charles Staats, tex.stackexchange.com/a/149991/5645
-
-http://tex.stackexchange.com/a/42865/5645
-http://tex.stackexchange.com/a/12069/5645
-http://tex.stackexchange.com/a/70979/5645
-http://texample.net/tikz/examples/map-projections/
-https://commons.wikimedia.org/wiki/Category:Blue_knots
-https://commons.wikimedia.org/wiki/File:Blue_Unknot.png
-https://commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png
-https://commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png
-https://commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png
-https://commons.wikimedia.org/wiki/File:Reidemeister_move_1.png
-https://commons.wikimedia.org/wiki/File:Reidemeister_move_1.png
-https://commons.wikimedia.org/wiki/File:Reidemeister_move_1.png
-https://commons.wikimedia.org/wiki/File:Tricoloring.png
-https://commons.wikimedia.org/wiki/File:Tricoloring.png
-https://commons.wikimedia.org/wiki/File:Double_torus_illustration.png
-https://commons.wikimedia.org/wiki/File:Double_torus_illustration.png
-http://tex.stackexchange.com/users/484/charles-staats
-http://tex.stackexchange.com/a/149991/5645
-http://tex.stackexchange.com/users/22467/alex
-http://tex.stackexchange.com/a/149706/5645
-https://commons.wikimedia.org/wiki/User:DemonDeLuxe
-https://commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png
-http://tex.stackexchange.com/users/2552/jake
-http://tex.stackexchange.com/a/118573/5645
-http://tex.stackexchange.com/users/484/charles-staats
-http://tex.stackexchange.com/a/149991/5645
-
-
-Abkürzungsverzeichnis
-
-Beh. Behauptung
-
-Bew. Beweis
-
-bzgl. bezüglich
-
-bzw. beziehungsweise
-
-ca. circa
-
-d. h. das heißt
-
-Def. Definition
-
-etc. et cetera
-
-ex. existieren
-
-Hom. Homomorphismus
-
-o. B. d. A. ohne Beschränkung der Allgemeinheit
-
-Prop. Proposition
-
-sog. sogenannte
-
-Vor. Voraussetzung
-
-vgl. vergleiche
-
-z. B. zum Beispiel
-
-zhgd. zusammenhängend
-
-z. z. zu zeigen
-
-
-
-Ergänzende Definitionen und Sätze
-
-Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle
-benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet,
-aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra
-und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen.
-Definition 81
-
-Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 }
-mit xn → x0.
-
-Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra
-entnommen:
-Definition 82
-
-Es seien V und W K-Vektorräume und A(V ) und A(W ) die zugehörigen affinen Räume.
-Eine Abbildung f : V →W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1
-gilt:
-
-f(λa+ µb) = λf(a) + µf(b)
-
-Definition 83
-Sei V ein Vektorraum und S ⊆ V eine Teilmenge.
-
-S heißt eine Orthonormalbasis von V , wenn gilt:
-
-(i) S ist eine Basis von V
-
-(ii) ∀v ∈ S : ‖v‖ = 1
-
-(iii) ∀v1, v2 ∈ S : v1 6= v2 ⇒ 〈v1, v2〉 = 0
-
-Satz (Zwischenwertsatz)
-Sei a < b und f ∈ C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder
-f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0.
-
-Definition 84
-Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung.
-
-v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv.
-
-Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f .
-Satz (Binomischer Lehrsatz)
-
-Sei x, y ∈ R. Dann gilt:
-
-(x+ y)n =
-
-n∑
-k=0
-
-(
-n
-
-k
-
-)
-xn−kyk ∀n ∈ N0
-
-Definition 85
-Seien a, b ∈ R3 Vektoren.
-
-a× b :=
-
-a1
-
-b3
-a3
-
-×
-a1
-
-b3
-a3
-
- =
-
-a2b3 − a3b2
-a3b1 − a1b3
-a1b2 − a2b1
-
-
-
-
-
-Symbolverzeichnis
-
-Mengenoperationen
-
-Seien A,B und M Mengen.
-
-AC Komplement von A
-P(M) Potenzmenge von M
-M Abschluss von M
-∂M Rand der Menge M
-M◦ Inneres der Menge M
-A×B Kreuzprodukt
-A ⊆ B Teilmengenbeziehung
-A ( B echte Teilmengenbeziehung
-A \B Differenzmenge
-A ∪B Vereinigung
-A ∪̇B Disjunkte Vereinigung
-A ∩B Schnitt
-
-Geometrie
-
-AB Gerade durch die Punkte A und
-B
-
-AB Strecke mit Endpunkten A und B
-4ABC Dreieck mit Eckpunkten A,B,C
-AB ∼= CD Die Strecken AB und CD sind
-
-isometrisch
-|K| Geometrische Realisierung des
-
-Simplizialkomplexes K
-
-Gruppen
-
-Sei X ein topologischer Raum und K ein Kör-
-per.
-
-Homöo(X) Homöomorphismengruppe
-Iso(X) Isometriengruppe
-GLn(K) Allgemeine lineare Gruppe (von
-
-General Linear Group)
-SLn(K) Spezielle lineare Gruppe
-PSLn(K) Projektive lineare Gruppe
-
-Perm(X) Permutationsgruppe
-Sym(X) Symmetrische Gruppe
-
-Wege
-
-Sei γ : I → X ein Weg.
-
-[γ] Homotopieklasse von γ
-γ1 ∗ γ2 Zusammenhängen von Wegen
-γ1 ∼ γ2 Homotopie von Wegen
-γ(x) Inverser Weg, also γ(x) := γ(1− x)
-C Bild eines Weges γ, also C :=
-
-γ([0, 1])
-
-Weiteres
-
-B Basis einer Topologie
-Bδ(x) δ-Kugel um x
-S Subbasis einer Topologie
-T Topologie
-
-A Atlas
-P Projektiver Raum
-〈·, ·〉 Skalarprodukt
-X/∼ X modulo ∼
-[x]∼ Äquivalenzklassen von x bzgl. ∼
-‖x‖ Norm von x
-|x| Betrag von x
-〈a〉 Erzeugnis von a
-
-Sn Sphäre
-Tn Torus
-
-f ◦ g Verkettung von f und g
-πX Projektion auf X
-f |U f eingeschränkt auf U
-f−1(M) Urbild von M
-Rg(M) Rang von M
-χ(K) Euler-Charakteristik von K
-
-
-
-110 Symbolverzeichnis
-
-∆k Standard-Simplex
-X#Y Verklebung von X und Y
-dn Lineare Abbildung aus Bemer-
-
-kung 37
-A ∼= B A ist isometrisch zu B
-f∗ Abbildung zwischen Fundamental-
-
-gruppen (vgl. Seite 49)
-
-
-
-111 Symbolverzeichnis
-
-Zahlenmengen
-
-N = { 1, 2, 3, . . . } Natürliche Zahlen
-Z = N ∪ { 0,−1,−2, . . . } Ganze Zahlen
-Q = Z ∪
-
-{
-1
-2 ,
-
-1
-3 ,
-
-2
-3
-
-}
-=
-{
-z
-n mit z ∈ Z und n ∈ Z \ { 0 }
-
-}
-Rationale Zahlen
-
-R = Q ∪
-{√
-
-2,− 3
-√
-
-3, . . .
-}
-
-Reele Zahlen
-R+ Echt positive reele Zahlen
-Rn+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 } Halbraum
-R× = R \ { 0 } Einheitengruppe von R
-C = { a+ ib | a, b ∈ R } Komplexe Zahlen
-P = { 2, 3, 5, 7, . . . } Primzahlen
-H = { z ∈ C | =z > 0 } obere Halbebene
-I = [0, 1] ( R Einheitsintervall
-
-f : S1 ↪→ R2 Einbettung der Kreislinie in die Ebene
-π1(X,x) Fundamentalgruppe im topologischen Raum X um x ∈ X
-Fix(f) Menge der Fixpunkte der Abbildung f
-‖ · ‖2 2-Norm; Euklidische Norm
-κ Krümmung
-κNor Normalenkrümmung
-V (f) Nullstellenmenge von f2
-
-Krümmung
-
-DpF : R2 → R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89)
-TsS Tangentialebene an S ⊆ R3 durch s ∈ S
-dsn(x) Weingarten-Abbildung
-
-2von Vanishing Set
-
-
-
-Stichwortverzeichnis
-
-Abbildung
-affine, 107
-differenzierbare, 29
-homotope, 50
-offene, 53
-simpliziale, 35
-stetige, 9
-
-Abschluss, 3
-Abstand, 86
-Abstandsaxiom, 65
-Achterknoten, 20
-Aktion, siehe Gruppenoperation
-Anordnungsaxiome, 66
-Atlas, 24
-Außenwinkel, 70
-Axiom, 64
-Axiomensystem, 64
-
-Basis, 3
-Baum, 37
-Betti-Zahl, 41
-Bewegungsaxiom, 66
-Binormalenvektor, 89
-
-Cantorsches Diskontinuum, 22
-Ck-Struktur, 29
-
-Decktransformation, 59
-Decktransformationsgruppe, 59
-Deformationsretrakt, 47
-dicht, 3
-Diffeomorphismus, 29
-Dimension, 34
-diskret, 53
-Doppelverhältnis, 83
-Dreibein
-
-begleitendes, 89
-
-Ebene
-euklidische, 64
-
-Eigenvektor, 107
-Eigenwert, 107
-
-einfach zusammenhängend, 49
-Einheitsnormalenfeld, 90
-Euler-Charakteristik, siehe Eulerzahl
-Eulersche Polyederformel, 38
-Eulerzahl, 36
-
-Färbbarkeit, 21
-Faser, siehe Urbild
-Fläche
-
-orientierbare, 90
-reguläre, 30
-
-Flächenelement, 95
-Formoperator, siehe Weingarten-Abbildung
-Fundamentalform
-
-erste, 94
-zweite, 97
-
-Fundamentalgruppe, 47
-
-Gauß-Krümmung, 92, 91–94
-Geometrie, 64
-Gerade, 64
-
-hyperbolische, 77
-Graph, 37
-Grenzwert, 8
-Gruppe
-
-allgemeine lineare, 22, 26
-spezielle lineare, 22
-topologische, 33
-
-Gruppe operiert durch Homöomorphismen,
-61
-
-Gruppenaktion, siehe Gruppenoperation
-Gruppenoperation, 60, 60–63
-
-stetige, 61
-
-Häufungspunkt, 107
-Hülle
-
-konvexe, 34
-Halbebene, 66
-Halbgerade, 65
-Halbraum, 28
-Hauptkrümmung, 92
-Hilbert-Kurve, 19, 19
-
-
-
-113 Stichwortverzeichnis
-
-Homöomorphismengruppe, 10
-Homöomorphismus, 9
-Homologiegruppe, 41
-Homomorphismus, 101
-Homotopie, 44
-Homotopieklasse, 47
-
-Inklusionsabbildung, 47
-Innenwinkel, 70
-Inneres, 3
-Inzidenzaxiome, 64
-Isometrie, 6, 10
-Isometriegruppe, 10
-Isomorphismus, 101
-Isotopie, 20
-
-Jordankurve, 19
-geschlossene, 19
-
-Karte, 24
-Kartenwechsel, 28
-Kern
-
-offener, 3
-Kleeblattknoten, 20
-Klumpentopologie, siehe triviale Topologie
-Knoten, 20, 17–21
-
-äquivalente, 20
-trivialer, 20
-
-Knotendiagramm, 20
-kollinear, 65
-kongruent, siehe isometrisch
-Kongruenz, siehe Isometrie
-Kongruenzsatz
-
-SSS, 104
-SWS, 69
-SWW, 74
-WSW, 70
-
-Krümmung, 88, 89
-Kreis, 37
-Kreuzprodukt, 107
-Kurve, 87
-
-Länge einer, 87
-
-Lage
-allgemeine, 34
-
-Lehrsatz
-Binomischer, 107
-
-Lie-Gruppe, 33
-liegt zwischen, 65
-Liftung, 54
-Limes, 8
-
-lokal, 3
-Lot, 86
-Lotfußpunkt, 86
-
-Möbiusband, 91
-Möbiustransformation, 80
-Mannigfaltigkeit, 24
-
-differenzierbare, 29
-geschlossene, 25
-glatte, 29
-mit Rand, 28
-
-Menge
-abgeschlossene, 2
-offene, 2
-zusammenhängende, 11
-
-Metrik, 6
-diskrete, 6
-hyperbolische, 84
-SNCF, 8
-
-Nebenwinkel, 86
-Neilsche Parabel, 27
-Normalenfeld, 90
-Normalenvektor, 87, 89
-Normalkrümmung, 91, 92, 98
-
-Oktaeder, 34
-Orthonormalbasis, 107
-
-Paraboloid
-hyperbolisches, 92
-
-Parallele, 66
-Parallelenaxiom, 64
-parametrisiert
-
-durch Bogenlänge, 87
-Parametrisierung
-
-reguläre, 30
-Polyzylinder, 17
-Produkttopologie, 4
-Projektion
-
-stereographische, 11
-Punkt, 34
-
-Quotiententopologie, 5, 10, 11
-
-Rand, 3, 28
-Raum
-
-hausdorffscher, 8
-kompakter, 14
-metrischer, 6
-projektiver, 5, 22, 25, 52
-
-
-
-114 Stichwortverzeichnis
-
-topologischer, 2
-zusammenhängender, 11
-
-Realisierung
-geometrische, 34
-
-Retraktion, 47
-
-Satz von
-Gauß-Bonnet, 98
-
-Scheitelwinkel, 86
-Seite, 34
-Sierpińskiraum, 3, 22
-Simplex, 34
-Simplizialkomplex, 34
-Simplizialkomplexe
-
-flächengleiche, 74
-Sphäre
-
-exotische, 29
-Standard-Simplex, 34
-Standardtopologie, 2
-sternförmig, 48
-Stetigkeit, 9–11
-Strecke, 65
-Struktur
-
-differenzierbare, 29
-Subbasis, 3
-
-Tangentialebene, 89, 89–90
-Teilraum, 4
-Teilraumtopologie, 4
-Teilsimplex, 34
-Topologie
-
-diskrete, 2, 6
-euklidische, 2
-feinste, 11
-triviale, 2
-Zariski, 2, 12, 15
-
-Torus, iii, 5, 38, 51, 93
-Total Unzusammenhängend, 100
-Triangulierung, 38
-
-Überdeckung, 14
-Übergangsfunktion, siehe Kartenwechsel
-Überlagerung, 51, 51–60
-
-reguläre, 59
-universelle, 57
-
-Umgebung, 3
-Umgebungsbasis, 58
-
-vanishing set, 26
-Vektorprodukt, siehe Kreuzprodukt
-Verklebung, 26
-
-verträglich, 29
-
-Würfel, 34
-Weg, 17
-
-einfacher, 17
-geschlossener, 17
-homotope, 44
-inverser, 48
-zusammengesetzter, 46
-
-Wegzusammenhang, 18
-Weingarten-Abbildung, 95
-Winkel, 70
-
-Zusammenhang, 11–14
-Zusammenhangskomponente, 13
-Zwischenwertsatz, 107
-
-
-	1 Topologische Grundbegriffe
-	1.1 Topologische Räume
-	1.2 Metrische Räume
-	1.3 Stetigkeit
-	1.4 Zusammenhang
-	1.5 Kompaktheit
-	1.6 Wege und Knoten
-	Übungsaufgaben
-
-	2 Mannigfaltigkeiten und Simplizialkomplexe
-	2.1 Topologische Mannigfaltigkeiten
-	2.2 Differenzierbare Mannigfaltigkeiten
-	2.3 Simplizialkomplex
-	Übungsaufgaben
-
-	3 Fundamentalgruppe und Überlagerungen
-	3.1 Homotopie von Wegen
-	3.2 Fundamentalgruppe
-	3.3 Überlagerungen
-	3.4 Gruppenoperationen
-
-	4 Euklidische und nichteuklidische Geometrie
-	4.1 Axiome für die euklidische Ebene
-	4.2 Weitere Eigenschaften einer euklidischen Ebene
-	4.2.1 Flächeninhalt
-
-	4.3 Hyperbolische Geometrie
-	Übungsaufgaben
-
-	5 Krümmung
-	5.1 Krümmung von Kurven
-	5.2 Tangentialebene
-	5.3 Gauß-Krümmung
-	5.4 Erste und zweite Fundamentalform
-
-	Lösungen der Übungsaufgaben
-	Bildquellen
-	Abkürzungsverzeichnis
-	Ergänzende Definitionen und Sätze
-	Symbolverzeichnis
-	Stichwortverzeichnis
-
+[[[Tika text extraction failed!]]]
\ No newline at end of file
diff --git a/requirements/dev.txt b/requirements/dev.txt
index e14a84c..108978e 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,22 +1,24 @@
-#
-# This file is autogenerated by pip-compile with python 3.11
-# To update, run:
-#
-#    pip-compile requirements/dev.in
-#
-build==0.9.0
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements/dev.in
+build==1.2.2.post1
     # via pip-tools
-click==8.1.3
+click==8.2.0
     # via pip-tools
-packaging==22.0
+packaging==25.0
     # via build
-pep517==0.13.0
-    # via build
-pip-tools==6.12.1
+pip==25.1.1
+    # via pip-tools
+pip-tools==7.4.1
     # via -r requirements/dev.in
-wheel==0.38.4
+pyproject-hooks==1.2.0
+    # via
+    #   build
+    #   pip-tools
+setuptools==80.6.0
+    # via pip-tools
+tomli==2.2.1
+    # via
+    #   build
+    #   pip-tools
+wheel==0.45.1
     # via pip-tools
-
-# The following packages are considered to be unsafe in a requirements file:
-# pip
-# setuptools
diff --git a/requirements/main.in b/requirements/main.in
index d277654..a378426 100644
--- a/requirements/main.in
+++ b/requirements/main.in
@@ -6,6 +6,7 @@ pypdf
 requests
 rich
 tika
+playa-pdf
 python-Levenshtein
 pdftotext
 pydantic
@@ -13,3 +14,4 @@ pymupdf
 pypdfium2
 pdfrw
 lxml
+pdfrw
diff --git a/requirements/main.txt b/requirements/main.txt
index df79ee6..0d939e2 100644
--- a/requirements/main.txt
+++ b/requirements/main.txt
@@ -1,81 +1,99 @@
-#
-# This file is autogenerated by pip-compile with python 3.11
-# To update, run:
-#
-#    pip-compile requirements/main.in
-#
-borb==2.1.7
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements/main.in
+annotated-types==0.7.0
+    # via pydantic
+borb==2.1.25
     # via -r requirements/main.in
-certifi==2022.12.7
+certifi==2025.4.26
     # via requests
-cffi==1.15.1
+cffi==1.17.1
     # via cryptography
-charset-normalizer==2.1.1
+charset-normalizer==3.4.2
     # via
     #   pdfminer-six
     #   requests
-commonmark==0.9.1
-    # via rich
-cryptography==38.0.4
-    # via pdfminer-six
-fonttools==4.38.0
+cryptography==44.0.3
+    # via
+    #   borb
+    #   pdfminer-six
+fonttools==4.58.0
     # via borb
-idna==3.4
+idna==3.10
     # via requests
-levenshtein==0.20.9
+levenshtein==0.27.1
     # via python-levenshtein
-lxml==4.9.2
-    # via -r requirements/main.in
-numpy==1.24.1
+lxml==5.4.0
+    # via
+    #   -r requirements/main.in
+    #   borb
+markdown-it-py==3.0.0
+    # via rich
+mdurl==0.1.2
+    # via markdown-it-py
+numpy==2.2.5
     # via -r requirements/main.in
-pdfminer-six==20221105
+pdfminer-six==20250327
     # via
     #   -r requirements/main.in
     #   pdfplumber
-pdfplumber==0.7.6
+pdfplumber==0.11.6
+    # via -r requirements/main.in
+pdfrw==0.4
     # via -r requirements/main.in
-pdftotext==2.2.2
+pdftotext==3.0.0
     # via -r requirements/main.in
-pillow==9.3.0
+pillow==11.2.1
     # via
     #   borb
     #   pdfplumber
     #   qrcode
-pycparser==2.21
+playa-pdf==0.5.0
+    # via -r requirements/main.in
+pycparser==2.22
     # via cffi
-pydantic==1.10.4
+pydantic==2.11.4
     # via -r requirements/main.in
-pygments==2.14.0
+pydantic-core==2.33.2
+    # via pydantic
+pygments==2.19.1
     # via rich
-pymupdf==1.21.1
-    # via -r requirements/main.in
-pypdf==3.2.0
+pymupdf==1.25.5
     # via -r requirements/main.in
-pypdfium2==3.15.0
+pypdf==5.5.0
     # via -r requirements/main.in
-python-barcode==0.14.0
+pypdfium2==4.30.1
+    # via
+    #   -r requirements/main.in
+    #   pdfplumber
+python-barcode==0.15.1
     # via borb
-python-levenshtein==0.20.9
+python-levenshtein==0.27.1
     # via -r requirements/main.in
-qrcode[pil]==7.3.1
+qrcode==8.2
     # via borb
-rapidfuzz==2.13.7
+rapidfuzz==3.13.0
     # via levenshtein
-requests==2.28.1
+requests==2.32.3
     # via
     #   -r requirements/main.in
     #   borb
     #   tika
-rich==13.0.0
+rich==14.0.0
     # via -r requirements/main.in
-tika==1.25
+setuptools==80.6.0
+    # via
+    #   borb
+    #   tika
+tika==3.1.0
     # via -r requirements/main.in
-typing-extensions==4.4.0
+typing-extensions==4.13.2
+    # via
+    #   pydantic
+    #   pydantic-core
+    #   pypdf
+    #   rich
+    #   typing-inspection
+typing-inspection==0.4.0
     # via pydantic
-urllib3==1.26.13
+urllib3==2.4.0
     # via requests
-wand==0.6.10
-    # via pdfplumber
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools