|
18 | 18 | }, |
19 | 19 | { |
20 | 20 | "cell_type": "code", |
21 | | - "execution_count": 1, |
| 21 | + "execution_count": 47, |
22 | 22 | "metadata": { |
23 | 23 | "id": "t1O3Je_ENtpf" |
24 | 24 | }, |
25 | 25 | "outputs": [], |
26 | 26 | "source": [ |
27 | 27 | "from pdf2image import convert_from_path\n", |
| 28 | + "from pathlib import Path\n", |
28 | 29 | "import google.generativeai as genai\n", |
29 | 30 | "import json\n", |
30 | 31 | "import os" |
|
41 | 42 | }, |
42 | 43 | { |
43 | 44 | "cell_type": "code", |
44 | | - "execution_count": 2, |
| 45 | + "execution_count": 48, |
45 | 46 | "metadata": { |
46 | 47 | "id": "rHXNl6qnN3qO" |
47 | 48 | }, |
48 | 49 | "outputs": [], |
49 | 50 | "source": [ |
50 | 51 | "# Used to securely store your API key\n", |
51 | | - "GOOGLE_API_KEY = \"AIzaSyDxxX0kIspXTg34tXfQfoTO0istS9RbrQg\" # replace 'your-api-key-here' with your actual API key\n", |
| 52 | + "GOOGLE_API_KEY = \"your-api-key-here\" # replace 'your-api-key-here' with your actual API key\n", |
52 | 53 | "\n", |
53 | 54 | "genai.configure(api_key=GOOGLE_API_KEY)" |
54 | 55 | ] |
|
64 | 65 | }, |
65 | 66 | { |
66 | 67 | "cell_type": "code", |
67 | | - "execution_count": 3, |
| 68 | + "execution_count": 49, |
68 | 69 | "metadata": { |
69 | 70 | "colab": { |
70 | 71 | "base_uri": "https://localhost:8080/", |
|
102 | 103 | }, |
103 | 104 | { |
104 | 105 | "cell_type": "code", |
105 | | - "execution_count": 4, |
| 106 | + "execution_count": 50, |
106 | 107 | "metadata": { |
107 | 108 | "id": "sErUXoOIOMKs" |
108 | 109 | }, |
|
139 | 140 | }, |
140 | 141 | { |
141 | 142 | "cell_type": "code", |
142 | | - "execution_count": 5, |
| 143 | + "execution_count": 51, |
143 | 144 | "metadata": { |
144 | 145 | "id": "kDhL4GY1OuW_" |
145 | 146 | }, |
|
161 | 162 | }, |
162 | 163 | { |
163 | 164 | "cell_type": "code", |
164 | | - "execution_count": 6, |
| 165 | + "execution_count": 52, |
165 | 166 | "metadata": { |
166 | 167 | "id": "Eh_FdZ_nO2Xx" |
167 | 168 | }, |
168 | 169 | "outputs": [], |
169 | 170 | "source": [ |
170 | | - "from pathlib import Path\n", |
171 | | - "\n", |
172 | 171 | "def image_format(image_path):\n", |
173 | 172 | " img = Path(image_path)\n", |
174 | 173 | "\n", |
|
195 | 194 | }, |
196 | 195 | { |
197 | 196 | "cell_type": "code", |
198 | | - "execution_count": 7, |
| 197 | + "execution_count": 53, |
199 | 198 | "metadata": { |
200 | 199 | "id": "gkDwnC9NQKSd" |
201 | 200 | }, |
|
218 | 217 | }, |
219 | 218 | { |
220 | 219 | "cell_type": "code", |
221 | | - "execution_count": 8, |
| 220 | + "execution_count": 54, |
222 | 221 | "metadata": {}, |
223 | 222 | "outputs": [], |
224 | 223 | "source": [ |
225 | | - "pdf_path = \"pdfs/boundaried/sku_list_2.pdf\" ## replace PDF to parse\n", |
| 224 | + "pdf_path = \"pdfs/boundaried/sku_list_2.pdf\" # replace PDF to parse\n", |
226 | 225 | "pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]\n", |
227 | | - "os.makedirs(pdf_name, exist_ok=True)" |
| 226 | + "export_path = os.path.join(\"exported-jsons\", pdf_name)\n", |
| 227 | + "os.makedirs(export_path, exist_ok=True)" |
228 | 228 | ] |
229 | 229 | }, |
230 | 230 | { |
|
236 | 236 | }, |
237 | 237 | { |
238 | 238 | "cell_type": "code", |
239 | | - "execution_count": 9, |
| 239 | + "execution_count": 55, |
240 | 240 | "metadata": {}, |
241 | 241 | "outputs": [], |
242 | 242 | "source": [ |
243 | | - "images = convert_from_path(pdf_path, first_page=2, last_page=10) ## set the exact pages to parse" |
| 243 | + "images = convert_from_path(pdf_path, first_page=2, last_page=3) ## set the exact pages to parse" |
244 | 244 | ] |
245 | 245 | }, |
246 | 246 | { |
|
252 | 252 | }, |
253 | 253 | { |
254 | 254 | "cell_type": "code", |
255 | | - "execution_count": 10, |
| 255 | + "execution_count": 56, |
256 | 256 | "metadata": {}, |
257 | 257 | "outputs": [], |
258 | 258 | "source": [ |
|
274 | 274 | }, |
275 | 275 | { |
276 | 276 | "cell_type": "code", |
277 | | - "execution_count": 11, |
| 277 | + "execution_count": 57, |
278 | 278 | "metadata": {}, |
279 | 279 | "outputs": [], |
280 | 280 | "source": [ |
281 | | - "all_json_outputs = [] \n", |
| 281 | + "images_dir = Path(\"exported-jsons\") / pdf_name / \"images\"\n", |
| 282 | + "images_dir.mkdir(parents=True, exist_ok=True) # This creates the directory if it doesn't exist\n", |
282 | 283 | "\n", |
283 | | - "for i, image in enumerate(images):\n", |
284 | | - " image_path = os.path.join(pdf_name, f\"output_image_{i}.png\")\n", |
285 | | - " image.save(image_path, \"PNG\")\n", |
| 284 | + "all_json_outputs = []\n", |
286 | 285 | "\n", |
287 | | - " response_text = gemini_output(image_path, system_prompt, user_prompt)\n", |
| 286 | + "for i, image in enumerate(images):\n", |
| 287 | + " image_path = images_dir / f\"output_image_{i}.png\"\n", |
| 288 | + " image.save(image_path.as_posix(), \"PNG\") # Save the image to the specified path\n", |
| 289 | + " response_text = gemini_output(image_path.as_posix(), system_prompt, user_prompt)\n", |
288 | 290 | "\n", |
289 | 291 | " try:\n", |
290 | 292 | " json_output = json.loads(response_text)\n", |
|
303 | 305 | }, |
304 | 306 | { |
305 | 307 | "cell_type": "code", |
306 | | - "execution_count": 12, |
| 308 | + "execution_count": 58, |
307 | 309 | "metadata": {}, |
308 | 310 | "outputs": [ |
309 | 311 | { |
310 | 312 | "name": "stdout", |
311 | 313 | "output_type": "stream", |
312 | 314 | "text": [ |
313 | | - "Final JSON saved to: sku_list_2/sku_list_2.json.\n" |
| 315 | + "Final JSON saved to: exported-jsons/sku_list_2.json.\n" |
314 | 316 | ] |
315 | 317 | } |
316 | 318 | ], |
317 | 319 | "source": [ |
318 | | - "final_json_path = os.path.join(pdf_name, f\"{pdf_name}.json\")\n", |
319 | | - "with open(final_json_path, 'w') as f:\n", |
| 320 | + "export_dir = Path(\"exported-jsons\")\n", |
| 321 | + "export_json_path = export_dir / f\"{pdf_name}.json\"\n", |
| 322 | + "with open(export_json_path, 'w') as f:\n", |
320 | 323 | " json.dump(all_json_outputs, f)\n", |
321 | 324 | "\n", |
322 | | - "print(f\"Final JSON saved to: {final_json_path}.\")" |
| 325 | + "print(f\"Final JSON saved to: {export_json_path}.\")" |
323 | 326 | ] |
324 | 327 | } |
325 | 328 | ], |
|
0 commit comments