72
72
- name : Install all doc and test dependencies
73
73
run : |
74
74
make install-ci
75
- make install-all-ingest
76
75
make check-licenses
77
76
78
77
lint :
@@ -273,37 +272,6 @@ jobs:
273
272
python-version : ${{ matrix.python-version }}
274
273
check-only : ' true'
275
274
276
- test_ingest_unit :
277
- strategy :
278
- matrix :
279
- python-version : [ "3.9","3.10" ]
280
- runs-on : ubuntu-latest
281
- needs : [ setup_ingest, lint ]
282
- steps :
283
- # actions/checkout MUST come before auth
284
- - uses : ' actions/checkout@v4'
285
- - name : Set up Python ${{ matrix.python-version }}
286
- uses : actions/setup-python@v5
287
- with :
288
- python-version : ${{ matrix.python-version }}
289
- - name : Get full Python version
290
- id : full-python-version
291
- run : echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
292
- - name : Setup virtual environment
293
- uses : ./.github/actions/base-ingest-cache
294
- with :
295
- python-version : ${{ matrix.python-version }}
296
- - name : Test Ingest (unit)
297
- env :
298
- NLTK_DATA : ${{ github.workspace }}/nltk_data
299
- PYTHON : python${{ matrix.python-version }}
300
- run : |
301
- source .venv/bin/activate
302
- make install-ci
303
- make install-all-ingest
304
- PYTHONPATH=. ${PYTHON} -m pytest test_unstructured_ingest/unit
305
-
306
-
307
275
test_ingest_src :
308
276
strategy :
309
277
matrix :
@@ -378,8 +346,6 @@ jobs:
378
346
PYTHON : python${{ matrix.python-version }}
379
347
run : |
380
348
source .venv/bin/activate
381
- make install-ci
382
- make install-all-ingest
383
349
sudo apt-get update
384
350
sudo apt-get install -y libmagic-dev poppler-utils libreoffice
385
351
make install-pandoc
@@ -392,103 +358,6 @@ jobs:
392
358
./test_unstructured_ingest/test-ingest-src.sh
393
359
394
360
395
- test_ingest_dest :
396
- environment : ci
397
- strategy :
398
- matrix :
399
- python-version : ["3.9","3.10"]
400
- runs-on : ubuntu-latest-m
401
- needs : [setup_ingest, lint]
402
- steps :
403
- # actions/checkout MUST come before auth
404
- - uses : ' actions/checkout@v4'
405
- - name : Set up Python ${{ matrix.python-version }}
406
- uses : actions/setup-python@v5
407
- with :
408
- python-version : ${{ matrix.python-version }}
409
- - name : Get full Python version
410
- id : full-python-version
411
- run : echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
412
- - name : Setup virtual environment
413
- uses : ./.github/actions/base-ingest-cache
414
- with :
415
- python-version : ${{ matrix.python-version }}
416
- - name : Setup docker-compose
417
- uses : KengoTODA/actions-setup-docker-compose@v1
418
- with :
419
- version : ' 2.22.0'
420
- - name : Test (end-to-end)
421
- env :
422
- AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
423
- AWS_ACCESS_KEY_ID : ${{ secrets.AWS_ACCESS_KEY_ID }}
424
- S3_INGEST_TEST_ACCESS_KEY : ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
425
- S3_INGEST_TEST_SECRET_KEY : ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
426
- AZURE_SEARCH_ENDPOINT : ${{ secrets.AZURE_SEARCH_ENDPOINT }}
427
- AZURE_SEARCH_API_KEY : ${{ secrets.AZURE_SEARCH_API_KEY }}
428
- BOX_APP_CONFIG : ${{ secrets.BOX_APP_CONFIG }}
429
- DROPBOX_APP_KEY : ${{ secrets.DROPBOX_APP_KEY }}
430
- DROPBOX_APP_SECRET : ${{ secrets.DROPBOX_APP_SECRET }}
431
- DROPBOX_REFRESH_TOKEN : ${{ secrets.DROPBOX_REFRESH_TOKEN }}
432
- GCP_INGEST_SERVICE_KEY : ${{ secrets.GCP_INGEST_SERVICE_KEY }}
433
- OPENAI_API_KEY : ${{ secrets.OPENAI_API_KEY }}
434
- MONGODB_URI : ${{ secrets.MONGODB_URI }}
435
- MONGODB_DATABASE_NAME : ${{ secrets.MONGODB_DATABASE_NAME }}
436
- AZURE_DEST_CONNECTION_STR : ${{ secrets.AZURE_DEST_CONNECTION_STR }}
437
- PINECONE_API_KEY : ${{secrets.PINECONE_API_KEY}}
438
- VECTARA_OAUTH_CLIENT_ID : ${{secrets.VECTARA_OAUTH_CLIENT_ID}}
439
- VECTARA_OAUTH_SECRET : ${{secrets.VECTARA_OAUTH_SECRET}}
440
- VECTARA_CUSTOMER_ID : ${{secrets.VECTARA_CUSTOMER_ID}}
441
- ASTRA_DB_APPLICATION_TOKEN : ${{secrets.ASTRA_DB_TOKEN}}
442
- ASTRA_DB_API_ENDPOINT : ${{secrets.ASTRA_DB_ENDPOINT}}
443
- CLARIFAI_API_KEY : ${{secrets.CLARIFAI_API_KEY}}
444
- DATABRICKS_HOST : ${{secrets.DATABRICKS_HOST}}
445
- DATABRICKS_USERNAME : ${{secrets.DATABRICKS_USERNAME}}
446
- DATABRICKS_PASSWORD : ${{secrets.DATABRICKS_PASSWORD}}
447
- DATABRICKS_CATALOG : ${{secrets.DATABRICKS_CATALOG}}
448
- OCR_AGENT : " unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
449
- CI : " true"
450
- NLTK_DATA : ${{ github.workspace }}/nltk_data
451
- PYTHON : python${{ matrix.python-version }}
452
- run : |
453
- source .venv/bin/activate
454
- make install-ci
455
- make install-all-ingest
456
- sudo apt-get update
457
- sudo apt-get install -y libmagic-dev poppler-utils libreoffice
458
- make install-pandoc
459
- sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
460
- sudo apt-get update
461
- sudo apt-get install -y tesseract-ocr
462
- sudo apt-get install -y tesseract-ocr-kor
463
- sudo apt-get install diffstat
464
- tesseract --version
465
- ./test_unstructured_ingest/test-ingest-dest.sh
466
-
467
- test_ingest_help :
468
- environment : ci
469
- strategy :
470
- matrix :
471
- python-version : ["3.9","3.10","3.11", "3.12"]
472
- runs-on : ubuntu-latest
473
- needs : [setup_ingest, lint]
474
- steps :
475
- - uses : ' actions/checkout@v4'
476
- - name : Set up Python ${{ matrix.python-version }}
477
- uses : actions/setup-python@v5
478
- with :
479
- python-version : ${{ matrix.python-version }}
480
- - name : Setup virtual environment
481
- uses : ./.github/actions/base-ingest-cache
482
- with :
483
- python-version : ${{ matrix.python-version }}
484
- - name : Validate --help
485
- run : |
486
- source .venv/bin/activate
487
- make install-ci
488
- make install-all-ingest
489
- ./test_unstructured_ingest/test-help.sh
490
-
491
-
492
361
test_unstructured_api_unit :
493
362
strategy :
494
363
matrix :
0 commit comments