ReT-2/scripts/rag/evqa/evqa_index_and_search.sh at main · aimagelab/ReT-2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
#SBATCH --job-name=evqa_index_and_search
#SBATCH --output=
#SBATCH --error=
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --gpus-per-node=4
#SBATCH --mem=480G
#SBATCH --cpus-per-task=16
#SBATCH --partition=
#SBATCH --account=
#SBATCH --time=15:00:00

set -e

conda activate ret2
cd ~/ReT-2

export PYTHONPATH=.
export TRANSFORMERS_VERBOSITY=info
export TOKENIZERS_PARALLELISM=false
export OMP_NUM_THREADS=1

# https://huggingface.co/datasets/aimagelab/ReT-M2KR/blob/main/jsonl/rag/kb_evqa15M.jsonl
dataset_path_index="kb_evqa15M.jsonl"

# https://huggingface.co/datasets/aimagelab/ReT-M2KR#download-images
image_root_index=

# https://huggingface.co/datasets/aimagelab/ReT-M2KR/blob/main/jsonl/rag/evqa_test.jsonl
dataset_path_query="evqa_test.jsonl"

# https://github.com/google-research/google-research/tree/master/encyclopedic_vqa#vqa-questions
image_root_query=

index_root="./rag/indices"
index_model_name="ReT2-M2KR-ColBERT-SigLIP2-ViT-L"
index_dataset_name="evqa"
checkpoint_path="aimagelab/${index_model_name}"
batch_size=256

srun torchrun --nproc-per-node=$SLURM_GPUS_PER_NODE --standalone \
src/evaluate.py \
--action "index" \
--index_root "${index_root}" \
--index_model_name "${index_model_name}" \
--index_dataset_name "${index_dataset_name}" \
--image_root "${image_root_index}" \
--dataset_passages_path "${dataset_path_index}" \
--checkpoint_path "${checkpoint_path}" \
--batch_size "${batch_size}" \
--dataloader_num_workers 4 \
--fp16

srun python src/evaluate.py \
--action "create_index" \
--index_root "${index_root}" \
--index_model_name "${index_model_name}" \
--index_dataset_name "${index_dataset_name}" \

srun torchrun --nproc-per-node=1 --standalone \
src/evaluate.py \
--action "search" \
--index_root "${index_root}" \
--index_model_name "${index_model_name}" \
--index_dataset_name "${index_dataset_name}" \
--image_root "${image_root_query}" \
--dataset_path "${dataset_path_query}" \
--checkpoint_path "${checkpoint_path}" \
--batch_size "${batch_size}" \
--dataloader_num_workers $SLURM_CPUS_PER_TASK \
--fp16 \
--skip_metrics

srun python src/rag/ranking_tsv2jsonl.py \
--input_path "${index_root}/${index_model_name}/${index_dataset_name}/ranking.tsv" \
--dataset_passages_path "${dataset_path_index}"