-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
break another prepare file aiming to more than one hf path
Signed-off-by: dafnapension <[email protected]>
- Loading branch information
1 parent
e69439a
commit 9270239
Showing
3 changed files
with
46 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from unitxt.blocks import LoadHF, Set, TaskCard | ||
from unitxt.catalog import add_to_catalog, get_from_catalog | ||
from unitxt.image_operators import ToImage | ||
from unitxt.splitters import RenameSplits | ||
from unitxt.templates import MultiReferenceTemplate | ||
from unitxt.test_utils.card import test_card | ||
|
||
templates = get_from_catalog("templates.qa.with_context.all") | ||
template = MultiReferenceTemplate( | ||
input_format="{context}\n{question}\nAnswer the question using a single word or phrase.", | ||
references_field="answers", | ||
__description__="lmms-evals default template for docvqa.", | ||
) | ||
|
||
card = TaskCard( | ||
loader=LoadHF( | ||
path="lmms-lab/DocVQA", | ||
name="DocVQA", | ||
data_classification_policy=["public"], | ||
splits=["train", "test", "validation"], | ||
), | ||
preprocess_steps=[ | ||
RenameSplits(mapper={"validation": "test"}), | ||
ToImage(field="image", to_field="context"), | ||
Set(fields={"context_type": "image"}), | ||
], | ||
task="tasks.qa.with_context.abstractive[metrics=[metrics.anls]]", | ||
templates=[template, *templates.items], | ||
default_template=template, | ||
__tags__={ | ||
"license": "apache-2.0", | ||
"multilinguality": "monolingual", | ||
"modalities": ["image", "text"], | ||
"size_categories": "10K<n<100K", | ||
"task_categories": "question-answering", | ||
"task_ids": "extractive-qa", | ||
}, | ||
__description__=( | ||
"The doc-vqa Dataset integrates images from the Infographic_vqa dataset sourced from HuggingFaceM4 The Cauldron dataset, as well as images from the dataset AFTDB (Arxiv Figure Table Database) curated by cmarkea. This dataset consists of pairs of images and corresponding text, with each image linked to an average of five questions and answers available in both English and French. These questions and answers were generated using Gemini 1.5 Pro, thereby rendering the dataset well-suited for multimodal tasks involving image-text pairing and multilingual question answering." | ||
), | ||
) | ||
|
||
test_card(card) | ||
add_to_catalog(card, "cards.doc_vqa.lmms_eval", overwrite=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,8 @@ | |
], | ||
"splits": [ | ||
"train", | ||
"test" | ||
"test", | ||
"validation" | ||
] | ||
}, | ||
"preprocess_steps": [ | ||
|