diff --git a/plugins/pdf_reader/.CHECKSUM b/plugins/pdf_reader/.CHECKSUM index e76d185bb4..b5f096868e 100644 --- a/plugins/pdf_reader/.CHECKSUM +++ b/plugins/pdf_reader/.CHECKSUM @@ -1,15 +1,15 @@ { - "spec": "6707025dd71169a10c0706a231ec7776", - "manifest": "f5c84efadfe43ca2b0dadc3b529f45f4", - "setup": "fee7b2e1a9cb71901995eb67a195d7eb", + "spec": "4a73afeeb3f51a38671eb7c25941b8c9", + "manifest": "db2aed4fc1a423a31f969d0ca0158e76", + "setup": "9e2ee9d110e8ed975296579b81f5d548", "schemas": [ { "identifier": "extract_text/schema.py", - "hash": "35e1f75bab710442b0852a659f2a3a55" + "hash": "e9e14996e00954b0d1cbddaf98d9684f" }, { "identifier": "connection/schema.py", - "hash": "bd524b567f9638ba1c6f7e0c9e45ff2e" + "hash": "2a983a9b7aa5dd290ed28e0eb7e0c9c6" } ] } \ No newline at end of file diff --git a/plugins/pdf_reader/Dockerfile b/plugins/pdf_reader/Dockerfile index 0f9933e613..d0c34e5c13 100755 --- a/plugins/pdf_reader/Dockerfile +++ b/plugins/pdf_reader/Dockerfile @@ -1,20 +1,34 @@ -FROM --platform=linux/amd64 rapid7/insightconnect-python-3-plugin:6.1.4 +FROM --platform=linux/amd64 rapid7/insightconnect-python-3-plugin:6.4.1 AS builder + +WORKDIR /python/src + +ADD ./plugin.spec.yaml /plugin.spec.yaml +ADD ./requirements.txt /python/src/requirements.txt +ADD . /python/src + + + +RUN pip install . +RUN pip uninstall -y setuptools + +FROM --platform=linux/amd64 rapid7/insightconnect-python-3-plugin:6.4.1 LABEL organization=rapid7 LABEL sdk=python WORKDIR /python/src -ADD ./plugin.spec.yaml /plugin.spec.yaml -ADD ./requirements.txt /python/src/requirements.txt +COPY --from=builder /python/src /python/src +COPY --from=builder /plugin.spec.yaml /plugin.spec.yaml + RUN if [ -f requirements.txt ]; then pip install -r requirements.txt; fi -ADD . /python/src +ENV PYTHONPATH="/python/src:${PYTHONPATH}" -RUN python setup.py build && python setup.py install +RUN rm -rf /root/.cache; # User to run plugin code. The two supported users are: root, nobody USER nobody -ENTRYPOINT ["/usr/local/bin/komand_pdf_reader"] +ENTRYPOINT ["python", "/python/src/bin/komand_pdf_reader"] diff --git a/plugins/pdf_reader/bin/komand_pdf_reader b/plugins/pdf_reader/bin/komand_pdf_reader index 3c36db6f5d..4e1031d1ea 100755 --- a/plugins/pdf_reader/bin/komand_pdf_reader +++ b/plugins/pdf_reader/bin/komand_pdf_reader @@ -6,7 +6,7 @@ from sys import argv Name = "PDF Reader" Vendor = "rapid7" -Version = "1.0.4" +Version = "1.0.5" Description = "PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)" diff --git a/plugins/pdf_reader/help.md b/plugins/pdf_reader/help.md index c927c0ae40..8ed8eec216 100644 --- a/plugins/pdf_reader/help.md +++ b/plugins/pdf_reader/help.md @@ -1,8 +1,6 @@ # Description -PDF Reader is a plugin for extracting text from a PDF file. - -This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/) +PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/) # Key Features @@ -75,6 +73,7 @@ Example output: # Version History +* 1.0.5 - Updated dependencies | Updated SDK to the latest version (6.4.1) * 1.0.4 - Bumping requirements.txt | SDK Bump * 1.0.3 - Updated version of Pillow used. Updated to use latest SDK and refreshed with latest tooling. * 1.0.2 - Fix extracting text from PDF in Extract Text action | Update to use the `insightconnect-python-3-38-plugin:4` Docker image | Code refactor | Add input example in plugin spec and help.md diff --git a/plugins/pdf_reader/komand_pdf_reader/actions/__init__.py b/plugins/pdf_reader/komand_pdf_reader/actions/__init__.py index df697cb57c..cdb95b5ec2 100755 --- a/plugins/pdf_reader/komand_pdf_reader/actions/__init__.py +++ b/plugins/pdf_reader/komand_pdf_reader/actions/__init__.py @@ -1,4 +1,3 @@ # GENERATED BY INSIGHT-PLUGIN - DO NOT EDIT from .extract_text.action import ExtractText - diff --git a/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/action.py b/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/action.py index 253d0535ef..c99b72c8c2 100755 --- a/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/action.py +++ b/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/action.py @@ -4,7 +4,7 @@ # Custom imports below from insightconnect_plugin_runtime.exceptions import PluginException import pdfplumber -from pdfminer.pdfparser import PDFSyntaxError +from pdfplumber.utils.exceptions import PdfminerException import base64 import io @@ -29,7 +29,7 @@ def run(self, params={}): pdf_text += page[1].extract_text().replace("\n", " ") finally: pdf_file.close() - except PDFSyntaxError: + except PdfminerException: raise PluginException( cause="The provided content is not in PDF file format.", assistance="Please check that the input is correct and try again.", diff --git a/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/schema.py b/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/schema.py index cb9e46f90a..9b22c04c8c 100755 --- a/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/schema.py +++ b/plugins/pdf_reader/komand_pdf_reader/actions/extract_text/schema.py @@ -16,7 +16,8 @@ class Output: class ExtractTextInput(insightconnect_plugin_runtime.Input): - schema = json.loads(r""" + schema = json.loads( + r""" { "type": "object", "title": "Variables", @@ -35,14 +36,16 @@ class ExtractTextInput(insightconnect_plugin_runtime.Input): ], "definitions": {} } - """) + """ + ) def __init__(self): super(self.__class__, self).__init__(self.schema) class ExtractTextOutput(insightconnect_plugin_runtime.Output): - schema = json.loads(r""" + schema = json.loads( + r""" { "type": "object", "title": "Variables", @@ -56,7 +59,8 @@ class ExtractTextOutput(insightconnect_plugin_runtime.Output): }, "definitions": {} } - """) + """ + ) def __init__(self): super(self.__class__, self).__init__(self.schema) diff --git a/plugins/pdf_reader/komand_pdf_reader/connection/schema.py b/plugins/pdf_reader/komand_pdf_reader/connection/schema.py index 10cc2e684f..f66ba8be9a 100755 --- a/plugins/pdf_reader/komand_pdf_reader/connection/schema.py +++ b/plugins/pdf_reader/komand_pdf_reader/connection/schema.py @@ -8,9 +8,11 @@ class Input: class ConnectionSchema(insightconnect_plugin_runtime.Input): - schema = json.loads(r""" + schema = json.loads( + r""" {} - """) + """ + ) def __init__(self): super(self.__class__, self).__init__(self.schema) diff --git a/plugins/pdf_reader/komand_pdf_reader/tasks/__init__.py b/plugins/pdf_reader/komand_pdf_reader/tasks/__init__.py index 7020c9a4ad..797e426edf 100644 --- a/plugins/pdf_reader/komand_pdf_reader/tasks/__init__.py +++ b/plugins/pdf_reader/komand_pdf_reader/tasks/__init__.py @@ -1,2 +1 @@ # GENERATED BY INSIGHT-PLUGIN - DO NOT EDIT - diff --git a/plugins/pdf_reader/komand_pdf_reader/triggers/__init__.py b/plugins/pdf_reader/komand_pdf_reader/triggers/__init__.py index 7020c9a4ad..797e426edf 100755 --- a/plugins/pdf_reader/komand_pdf_reader/triggers/__init__.py +++ b/plugins/pdf_reader/komand_pdf_reader/triggers/__init__.py @@ -1,2 +1 @@ # GENERATED BY INSIGHT-PLUGIN - DO NOT EDIT - diff --git a/plugins/pdf_reader/plugin.spec.yaml b/plugins/pdf_reader/plugin.spec.yaml index 936110e31e..e116e60459 100644 --- a/plugins/pdf_reader/plugin.spec.yaml +++ b/plugins/pdf_reader/plugin.spec.yaml @@ -4,7 +4,7 @@ products: [insightconnect] name: pdf_reader title: PDF Reader description: PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/) -version: 1.0.4 +version: 1.0.5 connection_version: 1 vendor: rapid7 supported_versions: ["Not applicable"] @@ -23,9 +23,10 @@ hub_tags: features: [] sdk: type: full - version: 6.1.4 + version: 6.4.1 user: nobody version_history: + - "1.0.5 - Updated dependencies | Updated SDK to the latest version (6.4.1)" - "1.0.4 - Bumping requirements.txt | SDK Bump" - "1.0.3 - Updated version of Pillow used. Updated to use latest SDK and refreshed with latest tooling." - "1.0.2 - Fix extracting text from PDF in Extract Text action | Update to use the `insightconnect-python-3-38-plugin:4` Docker image | Code refactor | Add input example in plugin spec and help.md" diff --git a/plugins/pdf_reader/requirements.txt b/plugins/pdf_reader/requirements.txt index 7334bba127..4a825c9e80 100755 --- a/plugins/pdf_reader/requirements.txt +++ b/plugins/pdf_reader/requirements.txt @@ -1,6 +1,5 @@ # List third-party dependencies here, separated by newlines. # All dependencies must be version-pinned, eg. requests==1.2.0 # See: https://pip.pypa.io/en/stable/user_guide/#requirements-files -pdfplumber==0.5.28 +pdfplumber==0.11.8 parameterized==0.8.1 -Pillow==10.3.0 diff --git a/plugins/pdf_reader/setup.py b/plugins/pdf_reader/setup.py index a598e2a961..afd2859303 100755 --- a/plugins/pdf_reader/setup.py +++ b/plugins/pdf_reader/setup.py @@ -2,13 +2,14 @@ from setuptools import setup, find_packages -setup(name="pdf_reader-rapid7-plugin", - version="1.0.4", - description="PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)", - author="rapid7", - author_email="", - url="", - packages=find_packages(), - install_requires=['insightconnect-plugin-runtime'], # Add third-party dependencies to requirements.txt, not here! - scripts=['bin/komand_pdf_reader'] - ) +setup( + name="pdf_reader-rapid7-plugin", + version="1.0.5", + description="PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)", + author="rapid7", + author_email="", + url="", + packages=find_packages(), + install_requires=["insightconnect-plugin-runtime"], # Add third-party dependencies to requirements.txt, not here! + entry_points={"console_scripts": ["komand_pdf_reader = bin.komand_pdf_reader:main"]}, +) diff --git a/plugins/pdf_reader/unit_test/test_extract_text.py b/plugins/pdf_reader/unit_test/test_extract_text.py index 39177fba44..8edbe739ea 100644 --- a/plugins/pdf_reader/unit_test/test_extract_text.py +++ b/plugins/pdf_reader/unit_test/test_extract_text.py @@ -1,8 +1,14 @@ +import os +import sys from unittest import TestCase + +from insightconnect_plugin_runtime.exceptions import PluginException +from parameterized import parameterized + +sys.path.append(os.path.abspath("../")) + from komand_pdf_reader.actions.extract_text import ExtractText from komand_pdf_reader.actions.extract_text.schema import Input, Output -from parameterized import parameterized -from insightconnect_plugin_runtime.exceptions import PluginException class TestExtractText(TestCase):