-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathindexRecipes.mjs
More file actions
72 lines (60 loc) · 1.84 KB
/
indexRecipes.mjs
File metadata and controls
72 lines (60 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import { ChromaClient } from 'chromadb'
import SentenceTransformer from './sentence-transformer.mjs'
import fs from 'fs';
import readline from 'readline';
const COLLECTION_NAME = 'recipes';
//[WINDOWS] const client = new ChromaClient({path: 'http://localhost:<ANOTHER_PORT>'});
const client = new ChromaClient({path: 'http://localhost:8000'});
const embedder = new SentenceTransformer('key-not-needed');
// Prepare the collection
await client.deleteCollection({ name: COLLECTION_NAME });
const collection = await client.getOrCreateCollection({
name: COLLECTION_NAME,
embeddingFunction: embedder,
});
await addDataInBatch('./recipes/20170107-061401-recipeitems.json', 100);
async function addDataInBatch(filePath, batchSize) {
const fileStream = fs.createReadStream(filePath);
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
let ids = [];
let metadatas = [];
let documents = [];
let index = 0;
for await (const line of rl) {
const recipe = JSON.parse(line);
const ingredients = recipe.ingredients.replace(/\\n/g, ' ');
ids.push(recipe._id.$oid);
documents.push(recipe.name + '\n' + ingredients);
metadatas.push({
name: recipe.name,
ingredients: ingredients,
url: recipe.url,
image: recipe.image,
cookTime: recipe.cookTime,
recipeYield: recipe.recipeYield,
source: recipe.source,
});
index++;
if (index % batchSize === 0) {
await collection.upsert({
ids: ids,
metadatas: metadatas,
documents: documents
});
ids = [];
metadatas = [];
documents = [];
console.log(`Added ${index} documents`);
}
}
// Add the remaining documents
await collection.upsert({
ids: ids,
metadatas: metadatas,
documents: documents
});
console.log(`Added ${index} documents`);
}