Skip to content

Commit a092c5d

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent a7e8ded commit a092c5d

File tree

74 files changed

+30285
-29751
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+30285
-29751
lines changed

argilla/docs/community/integrations/llamaindex_rag_github.ipynb

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,7 @@
202202
" \".svg\",\n",
203203
" \".ico\",\n",
204204
" \".json\",\n",
205-
" \".ipynb\", # Erase this line if you want to include notebooks\n",
206-
"\n",
205+
" \".ipynb\", # Erase this line if you want to include notebooks\n",
207206
" ],\n",
208207
" GithubRepositoryReader.FilterType.EXCLUDE,\n",
209208
" ),\n",
@@ -231,9 +230,7 @@
231230
"outputs": [],
232231
"source": [
233232
"# LLM settings\n",
234-
"Settings.llm = OpenAI(\n",
235-
" model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key\n",
236-
")\n",
233+
"Settings.llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key)\n",
237234
"\n",
238235
"# Load the data and create the index\n",
239236
"index = VectorStoreIndex.from_documents(documents)\n",

argilla/docs/tutorials/image_classification.ipynb

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,7 @@
9393
"from PIL import Image\n",
9494
"\n",
9595
"from datasets import load_dataset, Dataset, load_metric\n",
96-
"from transformers import (\n",
97-
" AutoImageProcessor,\n",
98-
" AutoModelForImageClassification,\n",
99-
" pipeline,\n",
100-
" Trainer,\n",
101-
" TrainingArguments\n",
102-
")\n",
96+
"from transformers import AutoImageProcessor, AutoModelForImageClassification, pipeline, Trainer, TrainingArguments\n",
10397
"\n",
10498
"import argilla as rg"
10599
]
@@ -182,7 +176,7 @@
182176
" title=\"What digit do you see on the image?\",\n",
183177
" labels=labels,\n",
184178
" )\n",
185-
" ]\n",
179+
" ],\n",
186180
")"
187181
]
188182
},
@@ -246,7 +240,7 @@
246240
"n_rows = 100\n",
247241
"\n",
248242
"hf_dataset = load_dataset(\"ylecun/mnist\", streaming=True)\n",
249-
"dataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\n",
243+
"dataset_rows = [row for _, row in zip(range(n_rows), hf_dataset[\"train\"])]\n",
250244
"hf_dataset = Dataset.from_list(dataset_rows)\n",
251245
"\n",
252246
"hf_dataset"
@@ -525,7 +519,8 @@
525519
],
526520
"source": [
527521
"def greyscale_to_rgb(img) -> Image:\n",
528-
" return Image.merge('RGB', (img, img, img))\n",
522+
" return Image.merge(\"RGB\", (img, img, img))\n",
523+
"\n",
529524
"\n",
530525
"submitted_image_rgb = [\n",
531526
" {\n",
@@ -556,7 +551,7 @@
556551
"\n",
557552
"submitted_image_rgb_processed = [\n",
558553
" {\n",
559-
" \"pixel_values\": processor(sample[\"image\"], return_tensors='pt')[\"pixel_values\"],\n",
554+
" \"pixel_values\": processor(sample[\"image\"], return_tensors=\"pt\")[\"pixel_values\"],\n",
560555
" \"label\": sample[\"label\"],\n",
561556
" }\n",
562557
" for sample in submitted_image_rgb\n",
@@ -624,8 +619,8 @@
624619
"source": [
625620
"def collate_fn(batch):\n",
626621
" return {\n",
627-
" 'pixel_values': torch.stack([torch.tensor(x['pixel_values'][0]) for x in batch]),\n",
628-
" 'labels': torch.tensor([int(x['label']) for x in batch])\n",
622+
" \"pixel_values\": torch.stack([torch.tensor(x[\"pixel_values\"][0]) for x in batch]),\n",
623+
" \"labels\": torch.tensor([int(x[\"label\"]) for x in batch]),\n",
629624
" }"
630625
]
631626
},
@@ -643,6 +638,8 @@
643638
"outputs": [],
644639
"source": [
645640
"metric = load_metric(\"accuracy\", trust_remote_code=True)\n",
641+
"\n",
642+
"\n",
646643
"def compute_metrics(p):\n",
647644
" return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)"
648645
]
@@ -664,7 +661,7 @@
664661
" checkpoint,\n",
665662
" num_labels=len(labels),\n",
666663
" id2label={int(i): int(c) for i, c in enumerate(labels)},\n",
667-
" label2id={int(c): int(i) for i, c in enumerate(labels)}\n",
664+
" label2id={int(c): int(i) for i, c in enumerate(labels)},\n",
668665
")\n",
669666
"model.config"
670667
]
@@ -698,19 +695,19 @@
698695
],
699696
"source": [
700697
"training_args = TrainingArguments(\n",
701-
" output_dir=\"./image-classifier\",\n",
702-
" per_device_train_batch_size=16,\n",
703-
" eval_strategy=\"steps\",\n",
704-
" num_train_epochs=1,\n",
705-
" fp16=False, # True if you have a GPU with mixed precision support\n",
706-
" save_steps=100,\n",
707-
" eval_steps=100,\n",
708-
" logging_steps=10,\n",
709-
" learning_rate=2e-4,\n",
710-
" save_total_limit=2,\n",
711-
" remove_unused_columns=True,\n",
712-
" push_to_hub=False,\n",
713-
" load_best_model_at_end=True,\n",
698+
" output_dir=\"./image-classifier\",\n",
699+
" per_device_train_batch_size=16,\n",
700+
" eval_strategy=\"steps\",\n",
701+
" num_train_epochs=1,\n",
702+
" fp16=False, # True if you have a GPU with mixed precision support\n",
703+
" save_steps=100,\n",
704+
" eval_steps=100,\n",
705+
" logging_steps=10,\n",
706+
" learning_rate=2e-4,\n",
707+
" save_total_limit=2,\n",
708+
" remove_unused_columns=True,\n",
709+
" push_to_hub=False,\n",
710+
" load_best_model_at_end=True,\n",
714711
")\n",
715712
"\n",
716713
"trainer = Trainer(\n",
@@ -745,12 +742,14 @@
745742
"source": [
746743
"pipe = pipeline(\"image-classification\", model=model, image_processor=processor)\n",
747744
"\n",
745+
"\n",
748746
"def run_inference(batch):\n",
749747
" predictions = pipe(batch[\"image\"])\n",
750748
" batch[\"image_label\"] = [prediction[0][\"label\"] for prediction in predictions]\n",
751749
" batch[\"score\"] = [prediction[0][\"score\"] for prediction in predictions]\n",
752750
" return batch\n",
753751
"\n",
752+
"\n",
754753
"hf_dataset = hf_dataset.map(run_inference, batched=True)"
755754
]
756755
},

argilla/docs/tutorials/image_preference.ipynb

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -191,11 +191,10 @@
191191
" metadata=[\n",
192192
" rg.FloatMetadataProperty(name=\"toxicity\", title=\"Toxicity score\"),\n",
193193
" rg.FloatMetadataProperty(name=\"identity_attack\", title=\"Identity attack score\"),\n",
194-
"\n",
195194
" ],\n",
196195
" vectors=[\n",
197196
" rg.VectorField(name=\"original_caption_vector\", dimensions=384),\n",
198-
" ]\n",
197+
" ],\n",
199198
")"
200199
]
201200
},
@@ -254,7 +253,7 @@
254253
"n_rows = 25\n",
255254
"\n",
256255
"hf_dataset = load_dataset(\"tomg-group-umd/pixelprose\", streaming=True)\n",
257-
"dataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\n",
256+
"dataset_rows = [row for _, row in zip(range(n_rows), hf_dataset[\"train\"])]\n",
258257
"hf_dataset = Dataset.from_list(dataset_rows)\n",
259258
"\n",
260259
"hf_dataset"
@@ -341,8 +340,7 @@
341340
}
342341
],
343342
"source": [
344-
"hf_dataset = hf_dataset.filter(\n",
345-
" lambda x: any([x[\"url\"].endswith(extension) for extension in [\".jpg\", \".png\", \".jpeg\"]]))\n",
343+
"hf_dataset = hf_dataset.filter(lambda x: any([x[\"url\"].endswith(extension) for extension in [\".jpg\", \".png\", \".jpeg\"]]))\n",
346344
"\n",
347345
"hf_dataset"
348346
]
@@ -380,6 +378,7 @@
380378
"API_URL = \"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell\"\n",
381379
"headers = {\"Authorization\": f\"Bearer {os.getenv('HF_TOKEN')}\"}\n",
382380
"\n",
381+
"\n",
383382
"def query(payload):\n",
384383
" response = requests.post(API_URL, headers=headers, json=payload)\n",
385384
" if response.status_code == 200:\n",
@@ -391,9 +390,8 @@
391390
" image = query(payload)\n",
392391
" return image\n",
393392
"\n",
394-
"query({\n",
395-
"\t\"inputs\": \"Astronaut riding a horse\"\n",
396-
"})"
393+
"\n",
394+
"query({\"inputs\": \"Astronaut riding a horse\"})"
397395
]
398396
},
399397
{
@@ -426,9 +424,10 @@
426424
"def generate_image(row):\n",
427425
" caption = row[\"original_caption\"]\n",
428426
" row[\"image_1\"] = query({\"inputs\": caption})\n",
429-
" row[\"image_2\"] = query({\"inputs\": caption + \" \"}) # space to avoid caching and getting the same image\n",
427+
" row[\"image_2\"] = query({\"inputs\": caption + \" \"}) # space to avoid caching and getting the same image\n",
430428
" return row\n",
431-
" \n",
429+
"\n",
430+
"\n",
432431
"hf_dataset_with_images = hf_dataset.map(generate_image, batched=False)\n",
433432
"\n",
434433
"hf_dataset_with_images"
@@ -451,11 +450,13 @@
451450
"source": [
452451
"model = SentenceTransformer(\"TaylorAI/bge-micro-v2\")\n",
453452
"\n",
453+
"\n",
454454
"def encode_questions(batch):\n",
455455
" vectors_as_numpy = model.encode(batch[\"original_caption\"])\n",
456456
" batch[\"original_caption_vector\"] = [x.tolist() for x in vectors_as_numpy]\n",
457457
" return batch\n",
458458
"\n",
459+
"\n",
459460
"hf_dataset_with_images_vectors = hf_dataset_with_images.map(encode_questions, batched=True)"
460461
]
461462
},
@@ -474,11 +475,14 @@
474475
"metadata": {},
475476
"outputs": [],
476477
"source": [
477-
"dataset.records.log(records=hf_dataset_with_images_vectors, mapping={\n",
478-
" \"key\": \"id\",\n",
479-
" \"original_caption\": \"caption\",\n",
480-
" \"url\": \"image_original\",\n",
481-
"})"
478+
"dataset.records.log(\n",
479+
" records=hf_dataset_with_images_vectors,\n",
480+
" mapping={\n",
481+
" \"key\": \"id\",\n",
482+
" \"original_caption\": \"caption\",\n",
483+
" \"url\": \"image_original\",\n",
484+
" },\n",
485+
")"
482486
]
483487
},
484488
{

argilla/docs/tutorials/token_classification.ipynb

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,7 @@
309309
"source": [
310310
"def predict_gliner(model, text, labels, threshold):\n",
311311
" entities = model.predict_entities(text, labels, threshold)\n",
312-
" return [\n",
313-
" {k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities\n",
314-
" ]"
312+
" return [{k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities]"
315313
]
316314
},
317315
{
@@ -330,9 +328,7 @@
330328
"data = dataset.records.to_list(flatten=True)\n",
331329
"updated_data = [\n",
332330
" {\n",
333-
" \"span_label\": predict_gliner(\n",
334-
" model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70\n",
335-
" ),\n",
331+
" \"span_label\": predict_gliner(model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70),\n",
336332
" \"id\": sample[\"id\"],\n",
337333
" }\n",
338334
" for sample in data\n",

0 commit comments

Comments
 (0)