From 2746eaf6480f7ee393fd46ecdee6c4c202fc0356 Mon Sep 17 00:00:00 2001
From: Azax4 <ankurkejriwal4@gmail.com>
Date: Mon, 5 May 2025 18:23:40 +0100
Subject: [PATCH] Added author page for Marten During

---
 data/xml/2025.nlp4dh.xml     | 2 +-
 data/xml/W14.xml             | 2 +-
 data/yaml/name_variants.yaml | 6 ++++++
 3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/data/xml/2025.nlp4dh.xml b/data/xml/2025.nlp4dh.xml
index d4a8f464b6..3a0cbabd64 100644
--- a/data/xml/2025.nlp4dh.xml
+++ b/data/xml/2025.nlp4dh.xml
@@ -410,7 +410,7 @@
       <title>Mining the Past: A Comparative Study of Classical and Neural Topic Models on Historical Newspaper Archives</title>
       <author><first>Keerthana</first><last>Murugaraj</last><affiliation>University of Luxemburg</affiliation></author>
       <author><first>Salima</first><last>Lamsiyah</last><affiliation>University of Luxemburg</affiliation></author>
-      <author><first>Marten</first><last>During</last><affiliation>University of Luxemburg</affiliation></author>
+      <author id="marten-during-ul"><first>Marten</first><last>During</last><affiliation>University of Luxemburg</affiliation></author>
       <author><first>Martin</first><last>Theobald</last><affiliation>University of Luxembourg</affiliation></author>
       <pages>452-463</pages>
       <abstract>Analyzing historical discourse in large-scale newspaper archives requires scalable and interpretable methods to uncover hidden themes. This study systematically evaluates topic modeling approaches for newspaper articles from 1955 to 2018, comparing probabilistic LDA, matrix factorization NMF, and neural-based models such as Top2Vec and BERTopic across various preprocessing strategies. We benchmark these methods on topic coherence, diversity, scalability, and interpretability. While LDA is commonly used in historical text analysis, our findings demonstrate that BERTopic, leveraging contextual embeddings, consistently outperforms classical models in all tested aspects, making it a more robust choice for large-scale textual corpora. Additionally, we highlight the trade-offs between preprocessing strategies and model performance, emphasizing the importance of tailored pipeline design. These insights advance the field of historical NLP, offering concrete guidance for historians and computational social scientists in selecting the most effective topic-modeling approach for analyzing digitized archives. Our code will be publicly available on GitHub.</abstract>
diff --git a/data/xml/W14.xml b/data/xml/W14.xml
index 8ee4a605ef..e35e4da0ba 100644
--- a/data/xml/W14.xml
+++ b/data/xml/W14.xml
@@ -1151,7 +1151,7 @@
       <title>Mining the Twentieth Century’s History from the Time Magazine Corpus</title>
       <author><first>Mike</first><last>Kestemont</last></author>
       <author><first>Folgert</first><last>Karsdorp</last></author>
-      <author><first>Marten</first><last>Düring</last></author>
+      <author id="marten-during"><first>Marten</first><last>Düring</last></author>
       <pages>62–70</pages>
       <url hash="3c8cbe0e">W14-0609</url>
       <doi>10.3115/v1/W14-0609</doi>
diff --git a/data/yaml/name_variants.yaml b/data/yaml/name_variants.yaml
index e2da044638..4af770ed2d 100644
--- a/data/yaml/name_variants.yaml
+++ b/data/yaml/name_variants.yaml
@@ -10724,3 +10724,9 @@
   id: hannah-cyberey
   variants:
   - {first: Hannah, last: Chen}
+- canonical: {first: Marten, last: During}
+  comment: University of Luxembourg
+  id: marten-during-ul
+- canonical: {first: Marten, last: Düring}
+  comment: May refer to several people
+  id: marten-during