This is the repo for MosAIC, a multi-agent multimodal framework for culture enriched image captioning
@inproceedings{bai-etal-2025-power,
title = "The Power of Many: Multi-Agent Multimodal Models for Cultural Image Captioning",
author = "Bai, Longju and
Borah, Angana and
Ignat, Oana and
Mihalcea, Rada",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-long.152/",
pages = "2970--2993",
ISBN = "979-8-89176-189-6"
}