Skip to content

Troubles running embedding metrics in Colab (BERTscore, BLEURT and Prism) #102

@asnota

Description

@asnota

Hello, I've tried to test BERTscore, BLEURT and Prism embedding metrics from Colab, however got a stack or errors - the errors for BLEURT and Prism come from the same Docker source:
DockerException: Error while fetching server API version: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory')),

while BERTscore seems to have troubles with the dependencies:
No module named 'transformers.models.beit.configuration_beit'

Before trying the metrics, I did the heavy install, imported the library and initialized predictions and references objects:

!git clone https://github.com/GEM-benchmark/GEM-metrics
!pip install -r /content/GEM-metrics/requirements.txt -r /content/GEM-metrics/requirements-heavy.txt

import gem_metrics

list_of_predictions = ["The apple is tasty"]
list_of_references = [["The apple is tasty"]]

preds = gem_metrics.texts.Predictions(list_of_predictions)
refs = gem_metrics.texts.References(list_of_references)```

result = gem_metrics.compute(preds, refs, metrics_list=['bertscore']) # same for 'bleurt' and 'prism'

The errors stack for BERTscore is as follows:

[I 220919 13:17:01 __init__:170] Computing BERTScore for None.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/043235d6088ecd3dd5fb5ca3592b6913fd516027/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.22.1",
  "vocab_size": 30522
}

loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/043235d6088ecd3dd5fb5ca3592b6913fd516027/vocab.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/043235d6088ecd3dd5fb5ca3592b6913fd516027/tokenizer_config.json
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/043235d6088ecd3dd5fb5ca3592b6913fd516027/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.22.1",
  "vocab_size": 30522
}

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased/snapshots/043235d6088ecd3dd5fb5ca3592b6913fd516027/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.22.1",
  "vocab_size": 30522
}

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/transformers/utils/import_utils.py in _get_module(self, module_name)

18 frames
[/usr/lib/python3.7/importlib/__init__.py](https://localhost:8080/#) in import_module(name, package)
    126             level += 1
--> 127     return _bootstrap._gcd_import(name[level:], package, level)
    128 

/usr/lib/python3.7/importlib/_bootstrap.py in _gcd_import(name, package, level)

/usr/lib/python3.7/importlib/_bootstrap.py in _find_and_load(name, import_)

/usr/lib/python3.7/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)

ModuleNotFoundError: No module named 'transformers.models.beit.configuration_beit'

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
[<ipython-input-42-399945309db8>](https://localhost:8080/#) in <module>
----> 1 result = gem_metrics.compute(preds, refs, metrics_list=['bertscore']) # , 'bertscore', 'bleurt'
      2 result

[/usr/local/lib/python3.7/dist-packages/gem_metrics/__init__.py](https://localhost:8080/#) in compute(outs, refs, srcs, metrics_dict, metrics_list, cache, dataset_name)
    170             logger.info(f"Computing {metric_class.__name__} for {outs.filename}...")
    171             metric = metric_class()
--> 172             result = metric.compute_cached(cache, outs, refs)
    173             values.update(result)
    174             if cache is not None:

[/usr/local/lib/python3.7/dist-packages/gem_metrics/metric.py](https://localhost:8080/#) in compute_cached(self, cache, predictions, *args)
     80                 new_arg.assign_ids_and_unscramble(to_compute)
     81                 new_arg_list.append(new_arg)
---> 82             computed_scores = self.compute(cache, *new_arg_list)
     83         else:
     84             logger.info(

[/usr/local/lib/python3.7/dist-packages/gem_metrics/bertscore.py](https://localhost:8080/#) in compute(self, cache, predictions, references)
     31         # Use language-appropriate scorer.
     32         score = self.metric.compute(
---> 33             lang=predictions.language.alpha_2, model_type="distilbert-base-uncased"
     34         )
     35 

[/usr/local/lib/python3.7/dist-packages/datasets/metric.py](https://localhost:8080/#) in compute(self, predictions, references, **kwargs)
    436         except pa.ArrowInvalid:
    437             raise ValueError(
--> 438                 f"Predictions and/or references don't match the expected format.\n"
    439                 f"Expected format: {self.features},\n"
    440                 f"Input predictions: {predictions},\n"

[~/.cache/huggingface/modules/datasets_modules/metrics/bertscore/23c058b03785b916e9331e97245dd43a377e84fb477ebdb444aff40629e99732/bertscore.py](https://localhost:8080/#) in _compute(self, predictions, references, lang, model_type, num_layers, verbose, idf, device, batch_size, nthreads, all_layers, rescale_with_baseline, baseline_path, use_fast_tokenizer)
    174                     lang=lang,
    175                     rescale_with_baseline=rescale_with_baseline,
--> 176                     baseline_path=baseline_path,
    177                 )
    178 

[/usr/local/lib/python3.7/dist-packages/bert_score/scorer.py](https://localhost:8080/#) in __init__(self, model_type, num_layers, batch_size, nthreads, all_layers, idf, idf_sents, device, lang, rescale_with_baseline, baseline_path)
     99 
    100         self._tokenizer = get_tokenizer(self.model_type)
--> 101         self._model = get_model(self.model_type, self.num_layers, self.all_layers)
    102         self._model.to(self.device)
    103 

[/usr/local/lib/python3.7/dist-packages/bert_score/utils.py](https://localhost:8080/#) in get_model(model_type, num_layers, all_layers)
    203         model = T5EncoderModel.from_pretrained(model_type)
    204     else:
--> 205         model = AutoModel.from_pretrained(model_type)
    206     model.eval()
    207 

/usr/local/lib/python3.7/dist-packages/transformers/models/auto/auto_factory.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)

/usr/local/lib/python3.7/dist-packages/transformers/models/auto/auto_factory.py in keys(self)

/usr/local/lib/python3.7/dist-packages/transformers/models/auto/auto_factory.py in <listcomp>(.0)

/usr/local/lib/python3.7/dist-packages/transformers/models/auto/auto_factory.py in _load_attr_from_module(self, model_type, attr)

/usr/local/lib/python3.7/dist-packages/transformers/models/auto/auto_factory.py in getattribute_from_module(module, attr)

/usr/local/lib/python3.7/dist-packages/transformers/utils/import_utils.py in __getattr__(self, name)

/usr/local/lib/python3.7/dist-packages/transformers/utils/import_utils.py in _get_module(self, module_name)

RuntimeError: Failed to import transformers.models.beit.configuration_beit because of the following error (look up to see its traceback):
No module named 'transformers.models.beit.configuration_beit'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions