diff --git a/plugins/extractit/.CHECKSUM b/plugins/extractit/.CHECKSUM index b835e145bf..87b82e0b27 100644 --- a/plugins/extractit/.CHECKSUM +++ b/plugins/extractit/.CHECKSUM @@ -1,7 +1,7 @@ { - "spec": "f1e1c6648fc10484bf56af68de3b2c63", - "manifest": "ce3ad35db69a1fc0170b525e22cfeba4", - "setup": "4a5e6a50304d0c8d92173d265f94b312", + "spec": "55beea4ab115661cac1457e024389337", + "manifest": "77ddc2f595cea4cc092cb7ff19f85906", + "setup": "e715193c6840f534e1cedb8332e99da0", "schemas": [ { "identifier": "cve_extractor/schema.py", diff --git a/plugins/extractit/Dockerfile b/plugins/extractit/Dockerfile index 6b5bc2ba5e..9f120a10d2 100755 --- a/plugins/extractit/Dockerfile +++ b/plugins/extractit/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.3.8 AS builder +FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.4.1 AS builder WORKDIR /python/src @@ -11,7 +11,7 @@ ADD . /python/src RUN pip install . RUN pip uninstall -y setuptools -FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.3.8 +FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.4.1 LABEL organization=rapid7 LABEL sdk=python diff --git a/plugins/extractit/bin/icon_extractit b/plugins/extractit/bin/icon_extractit index 5fd9f4a7d8..8c0c9b2627 100755 --- a/plugins/extractit/bin/icon_extractit +++ b/plugins/extractit/bin/icon_extractit @@ -6,7 +6,7 @@ from sys import argv Name = "ExtractIt" Vendor = "rapid7" -Version = "3.0.13" +Version = "3.0.14" Description = "The ExtractIt plugin is a collection of data extraction actions. This plugin allows users to extract various pieces of information from blocks of text. The pieces of information this plugin can extract include IPs, URLs, file paths, dates, domains, hashes, MAC addresses, and email addresses" diff --git a/plugins/extractit/help.md b/plugins/extractit/help.md index c7313ae676..5a5b1bc95f 100644 --- a/plugins/extractit/help.md +++ b/plugins/extractit/help.md @@ -673,6 +673,7 @@ Example output: # Version History +* 3.0.14 - Updated dependency | Updated SDK to the latest version (6.4.1) * 3.0.13 - Improved range of possible 'filepaths' | Resolved issue where filenames were being populated under 'domain' | Updated SDK to latest version (6.3.8) * 3.0.12 - Updated SDK to the latest version (6.3.3) * 3.0.11 - Updated tldextract to v5.1.3 diff --git a/plugins/extractit/icon_extractit/util/extractor.py b/plugins/extractit/icon_extractit/util/extractor.py index 8bc6566811..24c6dce775 100644 --- a/plugins/extractit/icon_extractit/util/extractor.py +++ b/plugins/extractit/icon_extractit/util/extractor.py @@ -5,7 +5,7 @@ import zipfile from datetime import datetime from difflib import get_close_matches -from typing import Any, Dict, List, Union +from typing import Any, List, Union import openpyxl import pdfplumber @@ -15,7 +15,7 @@ from insightconnect_plugin_runtime.exceptions import PluginException from odf.opendocument import load from openpyxl.workbook.workbook import Worksheet -from pdfminer.pdfparser import PDFSyntaxError +from pdfplumber.utils.exceptions import PdfminerException from pdfplumber.page import Page from publicsuffix2 import PublicSuffixList @@ -199,7 +199,7 @@ def extract_content_from_file(provided_file: bytes, provided_regex: str = "") -> page_content = page_content.replace(word, word.replace("\n", "")) pdf_content += page_content return pdf_content - except PDFSyntaxError: + except PdfminerException: raise PluginException( cause="The type of the provided file is not supported.", assistance="Supported file types are text/binary, such as: PDF, DOCX, PPTX, XLSX, ODT, ODP, ODF, TXT, ZIP", diff --git a/plugins/extractit/icon_extractit/util/util.py b/plugins/extractit/icon_extractit/util/util.py index fc8b03cbeb..a69cf86326 100644 --- a/plugins/extractit/icon_extractit/util/util.py +++ b/plugins/extractit/icon_extractit/util/util.py @@ -32,22 +32,22 @@ class Regex: "mm\\dd\\yy": r"\b([0][1-9]|[1][0-2])\\([1|2][0-9]|[3][0|1]|[0]?[1-9])\\([[0-9]{2})", "mm.dd.yy": r"\b([0][1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])\.([0-9]{2})", "mm-dd-yy": r"\b([0][1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])-([0-9]{2})", - "dd/mmm/yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/(\d{4})", - "dd\\mmm\\yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\(\d{4})", - "dd.mmm.yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.(\d{4})", - "dd-mmm-yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})", - "dd/mmm/yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([0-9]{2})", - "dd\\mmm\\yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([0-9]{2})", - "dd.mmm.yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([0-9]{2})", - "dd-mmm-yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{2})", + "dd/mmm/yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/(\d{4})", + "dd\\mmm\\yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\(\d{4})", + "dd.mmm.yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.(\d{4})", + "dd-mmm-yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})", + "dd/mmm/yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([0-9]{2})", + "dd\\mmm\\yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([0-9]{2})", + "dd.mmm.yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([0-9]{2})", + "dd-mmm-yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{2})", "yyyy/mm/dd": r"\b([0-9]{4})\/([0]?[1-9]|[1][0-2])\/([1|2][0-9]|[3][0|1]|[0]?[1-9])", "yyyy\\mm\\dd": r"\b([0-9]{4})\\([0]?[1-9]|[1][0-2])\\([1|2][0-9]|[3][0|1]|[0]?[1-9])", "yyyy.mm.dd": r"\b([0-9]{4})\.([0]?[1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])", "yyyy-mm-dd": r"\b([0-9]{4})-([0]?[1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])", - "yyyy.mmm.dd": r"\b((?i)[0-9]{4}).(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([1|2][0-9]|[3][0|1]|[0]?[1-9])", - "yyyy-mmm-dd": r"\b((?i)[0-9]{4})-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([1|2][0-9]|[3][0|1]|[0]?[1-9])", - "yyyy/mmm/dd": r"\b((?i)[0-9]{4})\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([1|2][0-9]|[3][0|1]|[0]?[1-9])", - "yyyy\\mmm\\dd": r"\b((?i)[0-9]{4})\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([1|2][0-9]|[3][0|1]|[0]?[1-9])", + "yyyy.mmm.dd": r"(?i)\b([0-9]{4}).(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([1|2][0-9]|[3][0|1]|[0]?[1-9])", + "yyyy-mmm-dd": r"(?i)\b([0-9]{4})-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([1|2][0-9]|[3][0|1]|[0]?[1-9])", + "yyyy/mmm/dd": r"(?i)\b([0-9]{4})\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([1|2][0-9]|[3][0|1]|[0]?[1-9])", + "yyyy\\mmm\\dd": r"(?i)\b([0-9]{4})\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([1|2][0-9]|[3][0|1]|[0]?[1-9])", "yy.mm.dd": r"\b([0-9]{2})\.([0]?[1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])", "yy-mm-dd": r"\b([0-9]{2})-([0]?[1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])", "yy/mm/dd": r"\b([0-9]{2})\/([0]?[1-9]|[1][0-2])\/([1|2][0-9]|[3][0|1]|[0]?[1-9])", diff --git a/plugins/extractit/plugin.spec.yaml b/plugins/extractit/plugin.spec.yaml index ce40f12945..5a6da52111 100644 --- a/plugins/extractit/plugin.spec.yaml +++ b/plugins/extractit/plugin.spec.yaml @@ -7,7 +7,7 @@ description: The ExtractIt plugin is a collection of data extraction actions. Th plugin allows users to extract various pieces of information from blocks of text. The pieces of information this plugin can extract include IPs, URLs, file paths, dates, domains, hashes, MAC addresses, and email addresses -version: 3.0.13 +version: 3.0.14 connection_version: 3 vendor: rapid7 support: rapid7 @@ -28,7 +28,7 @@ hub_tags: features: [] sdk: type: slim - version: 6.3.8 + version: 6.4.1 user: nobody key_features: - Extract dates from a string or file for use in subsequent workflow actions @@ -46,6 +46,7 @@ key_features: - Extract CVEs from a string or file for use in subsequent workflow actions - Extract all indicators from a string or file for use in subsequent workflow actions version_history: +- 3.0.14 - Updated dependency | Updated SDK to the latest version (6.4.1) - 3.0.13 - Improved range of possible 'filepaths' | Resolved issue where filenames were being populated under 'domain' | Updated SDK to latest version (6.3.8) - 3.0.12 - Updated SDK to the latest version (6.3.3) - 3.0.11 - Updated tldextract to v5.1.3 diff --git a/plugins/extractit/requirements.txt b/plugins/extractit/requirements.txt index 7551453ce6..a8cec8316f 100755 --- a/plugins/extractit/requirements.txt +++ b/plugins/extractit/requirements.txt @@ -2,9 +2,9 @@ # All dependencies must be version-pinned, eg. requests==1.2.0 # See: https://pip.pypa.io/en/stable/user_guide/#requirements-files tldextract==5.1.3 -regex==2023.8.8 +regex==2025.11.3 validators==0.34.0 -pdfplumber==0.11.4 +pdfplumber==0.11.8 openpyxl==3.1.5 parameterized==0.8.1 publicsuffix2==2.20191221 diff --git a/plugins/extractit/setup.py b/plugins/extractit/setup.py index 165cf01d56..3039b8481a 100755 --- a/plugins/extractit/setup.py +++ b/plugins/extractit/setup.py @@ -4,7 +4,7 @@ setup( name="extractit-rapid7-plugin", - version="3.0.13", + version="3.0.14", description="The ExtractIt plugin is a collection of data extraction actions. This plugin allows users to extract various pieces of information from blocks of text. The pieces of information this plugin can extract include IPs, URLs, file paths, dates, domains, hashes, MAC addresses, and email addresses", author="rapid7", author_email="",