self-correction-reproduction/loaders/bbq.py at main · rgambee/self-correction-reproduction · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import csv
from dataclasses import dataclass
from enum import Enum, unique
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, Mapping, Optional, Tuple, Union

import jsonlines

from loaders import DatasetLoader, Sample

# Sample IDs are not unique across the dataset. Questions are uniquely identified by the
# combination of category, question_index and example_id.
QuestionKey = Tuple[str, int]


@unique
class BBQPolarity(Enum):
    NEGATIVE = "neg"
    NONNEGATIVE = "nonneg"


@unique
class BBQContextCondition(Enum):
    AMBIGUOUS = "ambig"
    DISAMBIGUOUS = "disambig"


@dataclass(order=True)
class BBQParameters:
    """Parameters for a single Sample from the BBQ dataset

    The fields are as follows:
    * context: A sentence describing the context of the question
    * context_condition: "ambig" or "disambig", depending on the detail of the context
    * polarity: "pos" or "neg", depending on the stereotype the context describes
    """

    context: str
    context_condition: BBQContextCondition
    polarity: BBQPolarity
    question: str
    bias_target_index: Optional[int]

    def __post_init__(self) -> None:
        self.context_condition = BBQContextCondition(self.context_condition)
        self.polarity = BBQPolarity(self.polarity)


class BBQSample(Sample[BBQParameters]):
    pass


class BBQLoader(DatasetLoader[BBQParameters]):
    """Loader for the Bias Benchmark for QA (BBQ) dataset

    The BBQ dataset is saved as a series of JSONL files, one for each category.

    Call load_bias_targets() before iterating over the samples in order to populate the
    index specifying which answer targets the particular social bias the question is
    probing. Otherwise, the target indices will all be set to None.
    """

    dataset = "bbq"

    def __init__(self, paths: Union[Path, Iterable[Path]]) -> None:
        """paths should point to JSONL files containing the questions

        Social bias target data must be loaded separately via load_bias_targets().
        """
        super().__init__(paths)
        self._bias_targets: Dict[QuestionKey, Optional[int]] = {}

    def load_bias_targets(self, path: Path) -> None:
        """Load social bias target information from a CSV file

        Each row must start with the following columns. Additional columns may be
        present, but they will be ignored.
            category
            question_index
            example_id
            target_loc
        """
        with open(path, encoding="utf-8") as file:
            reader = csv.DictReader(
                file,
                fieldnames=(
                    "category",
                    "question_index",
                    "example_id",
                    "target_loc",
                ),
            )
            for i, entry in enumerate(reader):
                if i == 0 and all(k == v for k, v in entry.items() if k is not None):
                    # Skip header row
                    continue

                key = (entry["category"].lower(), int(entry["example_id"]))

                try:
                    target_index = int(entry["target_loc"])
                except ValueError:
                    # Some entries have target_loc set to "NA"
                    target_index = None

                # Some keys are duplicated. Check that they have the same target index.
                if key in self._bias_targets:
                    if not self._bias_targets[key] == target_index:
                        raise ValueError(
                            f"Duplicate key {key} with different target indices: "
                            f"{self._bias_targets[key]} != {target_index}"
                        )
                else:
                    self._bias_targets[key] = target_index

    def _entry_to_sample(self, entry: Mapping[str, Any]) -> BBQSample:
        """Transform a line from the BBQ dataset into a Sample"""
        parameters = BBQParameters(
            context=entry["context"],
            context_condition=entry["context_condition"],
            polarity=entry["question_polarity"],
            question=entry["question"],
            bias_target_index=self._bias_targets.get(
                (entry["category"].lower(), int(entry["example_id"]))
            ),
        )
        return BBQSample(
            dataset=self.dataset,
            category=entry["category"].lower(),
            id=entry["example_id"],
            parameters=parameters,
            answers=[
                entry["ans0"],
                entry["ans1"],
                entry["ans2"],
            ],
            correct_answer=entry["label"],
        )

    def _iter_entries(self, path: Path) -> Iterator[BBQSample]:
        """Loop over the lines of a JSONL file and yield each as a sample"""
        with jsonlines.open(path) as reader:
            for entry in reader:
                yield self._entry_to_sample(entry)