Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions plugins/pdf_reader/.CHECKSUM
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"spec": "6707025dd71169a10c0706a231ec7776",
"manifest": "f5c84efadfe43ca2b0dadc3b529f45f4",
"setup": "fee7b2e1a9cb71901995eb67a195d7eb",
"spec": "fef1e12e781a55fd2ad56ea71e0e4fc9",
"manifest": "db2aed4fc1a423a31f969d0ca0158e76",
"setup": "9e2ee9d110e8ed975296579b81f5d548",
"schemas": [
{
"identifier": "extract_text/schema.py",
"hash": "35e1f75bab710442b0852a659f2a3a55"
"hash": "e9e14996e00954b0d1cbddaf98d9684f"
},
{
"identifier": "connection/schema.py",
"hash": "bd524b567f9638ba1c6f7e0c9e45ff2e"
"hash": "2a983a9b7aa5dd290ed28e0eb7e0c9c6"
}
]
}
26 changes: 20 additions & 6 deletions plugins/pdf_reader/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,34 @@
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-plugin:6.1.4
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-plugin:6.3.10 AS builder

WORKDIR /python/src

ADD ./plugin.spec.yaml /plugin.spec.yaml
ADD ./requirements.txt /python/src/requirements.txt
ADD . /python/src



RUN pip install .
RUN pip uninstall -y setuptools

FROM --platform=linux/amd64 rapid7/insightconnect-python-3-plugin:6.3.10

LABEL organization=rapid7
LABEL sdk=python

WORKDIR /python/src

ADD ./plugin.spec.yaml /plugin.spec.yaml
ADD ./requirements.txt /python/src/requirements.txt
COPY --from=builder /python/src /python/src
COPY --from=builder /plugin.spec.yaml /plugin.spec.yaml


RUN if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

ADD . /python/src
ENV PYTHONPATH="/python/src:${PYTHONPATH}"

RUN python setup.py build && python setup.py install
RUN rm -rf /root/.cache;

# User to run plugin code. The two supported users are: root, nobody
USER nobody

ENTRYPOINT ["/usr/local/bin/komand_pdf_reader"]
ENTRYPOINT ["python", "/python/src/bin/komand_pdf_reader"]
2 changes: 1 addition & 1 deletion plugins/pdf_reader/bin/komand_pdf_reader
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from sys import argv

Name = "PDF Reader"
Vendor = "rapid7"
Version = "1.0.4"
Version = "1.0.5"
Description = "PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)"


Expand Down
5 changes: 2 additions & 3 deletions plugins/pdf_reader/help.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# Description

PDF Reader is a plugin for extracting text from a PDF file.

This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)
PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)

# Key Features

Expand Down Expand Up @@ -75,6 +73,7 @@ Example output:

# Version History

* 1.0.5 - Addressed snyk vulnerability | Updated SDK to the latest version (6.3.10)
* 1.0.4 - Bumping requirements.txt | SDK Bump
* 1.0.3 - Updated version of Pillow used. Updated to use latest SDK and refreshed with latest tooling.
* 1.0.2 - Fix extracting text from PDF in Extract Text action | Update to use the `insightconnect-python-3-38-plugin:4` Docker image | Code refactor | Add input example in plugin spec and help.md
Expand Down
1 change: 0 additions & 1 deletion plugins/pdf_reader/komand_pdf_reader/actions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# GENERATED BY INSIGHT-PLUGIN - DO NOT EDIT

from .extract_text.action import ExtractText

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Custom imports below
from insightconnect_plugin_runtime.exceptions import PluginException
import pdfplumber
from pdfminer.pdfparser import PDFSyntaxError
from pdfplumber.utils.exceptions import PdfminerException
import base64
import io

Expand All @@ -29,7 +29,7 @@ def run(self, params={}):
pdf_text += page[1].extract_text().replace("\n", " ")
finally:
pdf_file.close()
except PDFSyntaxError:
except PdfminerException:
raise PluginException(
cause="The provided content is not in PDF file format.",
assistance="Please check that the input is correct and try again.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class Output:


class ExtractTextInput(insightconnect_plugin_runtime.Input):
schema = json.loads(r"""
schema = json.loads(
r"""
{
"type": "object",
"title": "Variables",
Expand All @@ -35,14 +36,16 @@ class ExtractTextInput(insightconnect_plugin_runtime.Input):
],
"definitions": {}
}
""")
"""
)

def __init__(self):
super(self.__class__, self).__init__(self.schema)


class ExtractTextOutput(insightconnect_plugin_runtime.Output):
schema = json.loads(r"""
schema = json.loads(
r"""
{
"type": "object",
"title": "Variables",
Expand All @@ -56,7 +59,8 @@ class ExtractTextOutput(insightconnect_plugin_runtime.Output):
},
"definitions": {}
}
""")
"""
)

def __init__(self):
super(self.__class__, self).__init__(self.schema)
6 changes: 4 additions & 2 deletions plugins/pdf_reader/komand_pdf_reader/connection/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ class Input:


class ConnectionSchema(insightconnect_plugin_runtime.Input):
schema = json.loads(r"""
schema = json.loads(
r"""
{}
""")
"""
)

def __init__(self):
super(self.__class__, self).__init__(self.schema)
1 change: 0 additions & 1 deletion plugins/pdf_reader/komand_pdf_reader/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# GENERATED BY INSIGHT-PLUGIN - DO NOT EDIT

Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# GENERATED BY INSIGHT-PLUGIN - DO NOT EDIT

5 changes: 3 additions & 2 deletions plugins/pdf_reader/plugin.spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ products: [insightconnect]
name: pdf_reader
title: PDF Reader
description: PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)
version: 1.0.4
version: 1.0.5
connection_version: 1
vendor: rapid7
supported_versions: ["Not applicable"]
Expand All @@ -23,9 +23,10 @@ hub_tags:
features: []
sdk:
type: full
version: 6.1.4
version: 6.3.10
user: nobody
version_history:
- "1.0.5 - Addressed snyk vulnerability | Updated SDK to the latest version (6.3.10)"
- "1.0.4 - Bumping requirements.txt | SDK Bump"
- "1.0.3 - Updated version of Pillow used. Updated to use latest SDK and refreshed with latest tooling."
- "1.0.2 - Fix extracting text from PDF in Extract Text action | Update to use the `insightconnect-python-3-38-plugin:4` Docker image | Code refactor | Add input example in plugin spec and help.md"
Expand Down
3 changes: 1 addition & 2 deletions plugins/pdf_reader/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# List third-party dependencies here, separated by newlines.
# All dependencies must be version-pinned, eg. requests==1.2.0
# See: https://pip.pypa.io/en/stable/user_guide/#requirements-files
pdfplumber==0.5.28
pdfplumber==0.11.8
parameterized==0.8.1
Pillow==10.3.0
21 changes: 11 additions & 10 deletions plugins/pdf_reader/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
from setuptools import setup, find_packages


setup(name="pdf_reader-rapid7-plugin",
version="1.0.4",
description="PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)",
author="rapid7",
author_email="",
url="",
packages=find_packages(),
install_requires=['insightconnect-plugin-runtime'], # Add third-party dependencies to requirements.txt, not here!
scripts=['bin/komand_pdf_reader']
)
setup(
name="pdf_reader-rapid7-plugin",
version="1.0.5",
description="PDF Reader is a plugin for extracting text from a PDF file. This plugin utilizes a Python package called [pdfplumber](https://pypi.org/project/pdfplumber/)",
author="rapid7",
author_email="",
url="",
packages=find_packages(),
install_requires=["insightconnect-plugin-runtime"], # Add third-party dependencies to requirements.txt, not here!
entry_points={"console_scripts": ["komand_pdf_reader = bin.komand_pdf_reader:main"]},
)
10 changes: 8 additions & 2 deletions plugins/pdf_reader/unit_test/test_extract_text.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import os
import sys
from unittest import TestCase

from insightconnect_plugin_runtime.exceptions import PluginException
from parameterized import parameterized

sys.path.append(os.path.abspath("../"))

from komand_pdf_reader.actions.extract_text import ExtractText
from komand_pdf_reader.actions.extract_text.schema import Input, Output
from parameterized import parameterized
from insightconnect_plugin_runtime.exceptions import PluginException


class TestExtractText(TestCase):
Expand Down
Loading