Skip to content

Commit 5f37126

Browse files
authored
Add new bibliographic entries for EMNLP 2025
1 parent 0bed3cc commit 5f37126

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

_bibliography/papers.bib

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,51 @@
11
---
22
---
33
4+
@inproceedings{zhao-etal-2025-makieval,
5+
title = "{MAKIE}val: A Multilingual Automatic {W}i{K}idata-based Framework for Cultural Awareness Evaluation for {LLM}s",
6+
author = "Zhao, Raoyuan and
7+
Chen, Beiduo and
8+
Plank, Barbara and
9+
Hedderich, Michael A.",
10+
editor = "Christodoulopoulos, Christos and
11+
Chakraborty, Tanmoy and
12+
Rose, Carolyn and
13+
Peng, Violet",
14+
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
15+
month = nov,
16+
year = "2025",
17+
address = "Suzhou, China",
18+
publisher = "Association for Computational Linguistics",
19+
url = "https://aclanthology.org/2025.findings-emnlp.1256/",
20+
doi = "10.18653/v1/2025.findings-emnlp.1256",
21+
pages = "23104--23136",
22+
ISBN = "979-8-89176-335-7",
23+
abstract = "Large language models (LLMs) are used globally across many languages, but their English-centric pretraining raises concerns about cross-lingual disparities for cultural awareness, often resulting in biased outputs. However, comprehensive multilingual evaluation remains challenging due to limited benchmarks and questionable translation quality. To better assess these disparities, we introduce MAKIEval, an automatic multilingual framework for evaluating cultural awareness in LLMs across languages, regions, and topics. MAKIEval evaluates open-ended text generation, capturing how models express culturally grounded knowledge in natural language. Leveraging Wikidata{'}s multilingual structure as a cross-lingual anchor, it automatically identifies cultural entities in model outputs and links them to structured knowledge, enabling scalable, language-agnostic evaluation without manual annotation or translation. We then introduce four metrics that capture complementary dimensions of cultural awareness: granularity, diversity, cultural specificity, and consensus across languages. We assess 7 LLMs developed from different parts of the world, encompassing both open-source and proprietary systems, across 13 languages, 19 countries and regions, and 6 culturally salient topics (e.g., food, clothing). Notably, we find that models tend to exhibit stronger cultural awareness in English, suggesting that English prompts more effectively activate culturally grounded knowledge. We publicly release our code and data."
24+
}
25+
26+
@inproceedings{litschko-etal-2025-make,
27+
title = "Make Every Letter Count: Building Dialect Variation Dictionaries from Monolingual Corpora",
28+
author = "Litschko, Robert and
29+
Blaschke, Verena and
30+
Burkhardt, Diana and
31+
Plank, Barbara and
32+
Frassinelli, Diego",
33+
editor = "Christodoulopoulos, Christos and
34+
Chakraborty, Tanmoy and
35+
Rose, Carolyn and
36+
Peng, Violet",
37+
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
38+
month = nov,
39+
year = "2025",
40+
address = "Suzhou, China",
41+
publisher = "Association for Computational Linguistics",
42+
url = "https://aclanthology.org/2025.findings-emnlp.762/",
43+
doi = "10.18653/v1/2025.findings-emnlp.762",
44+
pages = "14157--14174",
45+
ISBN = "979-8-89176-335-7",
46+
abstract = "Dialects exhibit a substantial degree of variation due to the lack of a standard orthography. At the same time, the ability of Large Language Models (LLMs) to process dialects remains largely understudied. To address this gap, we use Bavarian as a case study and investigate the lexical dialect understanding capability of LLMs by examining how well they recognize and translate dialectal terms across different parts-of-speech. To this end, we introduce DiaLemma, a novel annotation framework for creating dialect variation dictionaries from monolingual data only, and use it to compile a ground truth dataset consisting of 100K human-annotated German-Bavarian word pairs. We evaluate how well nine state-of-the-art LLMs can judge Bavarian terms as dialect translations, inflected variants, or unrelated forms of a given German lemma. Our results show that LLMs perform best on nouns and lexically similar word pairs, and struggle most in distinguishing between direct translations and inflected variants. Interestingly, providing additional context in the form of example usages improves the translation performance, but reduces their ability to recognize dialect variants. This study highlights the limitations of LLMs in dealing with orthographic dialect variation and emphasizes the need for future work on adapting LLMs to dialects."
47+
}
48+
449
@inproceedings{testoni-etal-2025-racquet,
550
title = "{RA}c{QUE}t: Unveiling the Dangers of Overlooked Referential Ambiguity in Visual {LLM}s",
651
author = "Testoni, Alberto and

0 commit comments

Comments
 (0)