Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions plugins/extractit/.CHECKSUM
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"spec": "f1e1c6648fc10484bf56af68de3b2c63",
"manifest": "ce3ad35db69a1fc0170b525e22cfeba4",
"setup": "4a5e6a50304d0c8d92173d265f94b312",
"spec": "55beea4ab115661cac1457e024389337",
"manifest": "77ddc2f595cea4cc092cb7ff19f85906",
"setup": "e715193c6840f534e1cedb8332e99da0",
"schemas": [
{
"identifier": "cve_extractor/schema.py",
Expand Down
4 changes: 2 additions & 2 deletions plugins/extractit/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.3.8 AS builder
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.4.1 AS builder

WORKDIR /python/src

Expand All @@ -11,7 +11,7 @@ ADD . /python/src
RUN pip install .
RUN pip uninstall -y setuptools

FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.3.8
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.4.1

LABEL organization=rapid7
LABEL sdk=python
Expand Down
2 changes: 1 addition & 1 deletion plugins/extractit/bin/icon_extractit
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from sys import argv

Name = "ExtractIt"
Vendor = "rapid7"
Version = "3.0.13"
Version = "3.0.14"
Description = "The ExtractIt plugin is a collection of data extraction actions. This plugin allows users to extract various pieces of information from blocks of text. The pieces of information this plugin can extract include IPs, URLs, file paths, dates, domains, hashes, MAC addresses, and email addresses"


Expand Down
1 change: 1 addition & 0 deletions plugins/extractit/help.md
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ Example output:

# Version History

* 3.0.14 - Updated dependency | Updated SDK to the latest version (6.4.1)
* 3.0.13 - Improved range of possible 'filepaths' | Resolved issue where filenames were being populated under 'domain' | Updated SDK to latest version (6.3.8)
* 3.0.12 - Updated SDK to the latest version (6.3.3)
* 3.0.11 - Updated tldextract to v5.1.3
Expand Down
6 changes: 3 additions & 3 deletions plugins/extractit/icon_extractit/util/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import zipfile
from datetime import datetime
from difflib import get_close_matches
from typing import Any, Dict, List, Union
from typing import Any, List, Union

import openpyxl
import pdfplumber
Expand All @@ -15,7 +15,7 @@
from insightconnect_plugin_runtime.exceptions import PluginException
from odf.opendocument import load
from openpyxl.workbook.workbook import Worksheet
from pdfminer.pdfparser import PDFSyntaxError
from pdfplumber.utils.exceptions import PdfminerException
from pdfplumber.page import Page
from publicsuffix2 import PublicSuffixList

Expand Down Expand Up @@ -199,7 +199,7 @@ def extract_content_from_file(provided_file: bytes, provided_regex: str = "") ->
page_content = page_content.replace(word, word.replace("\n", ""))
pdf_content += page_content
return pdf_content
except PDFSyntaxError:
except PdfminerException:
raise PluginException(
cause="The type of the provided file is not supported.",
assistance="Supported file types are text/binary, such as: PDF, DOCX, PPTX, XLSX, ODT, ODP, ODF, TXT, ZIP",
Expand Down
24 changes: 12 additions & 12 deletions plugins/extractit/icon_extractit/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,22 @@ class Regex:
"mm\\dd\\yy": r"\b([0][1-9]|[1][0-2])\\([1|2][0-9]|[3][0|1]|[0]?[1-9])\\([[0-9]{2})",
"mm.dd.yy": r"\b([0][1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])\.([0-9]{2})",
"mm-dd-yy": r"\b([0][1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])-([0-9]{2})",
"dd/mmm/yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/(\d{4})",
"dd\\mmm\\yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\(\d{4})",
"dd.mmm.yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.(\d{4})",
"dd-mmm-yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})",
"dd/mmm/yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([0-9]{2})",
"dd\\mmm\\yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([0-9]{2})",
"dd.mmm.yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([0-9]{2})",
"dd-mmm-yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{2})",
"dd/mmm/yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/(\d{4})",
"dd\\mmm\\yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\(\d{4})",
"dd.mmm.yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.(\d{4})",
"dd-mmm-yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})",
"dd/mmm/yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([0-9]{2})",
"dd\\mmm\\yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([0-9]{2})",
"dd.mmm.yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([0-9]{2})",
"dd-mmm-yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{2})",
"yyyy/mm/dd": r"\b([0-9]{4})\/([0]?[1-9]|[1][0-2])\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy\\mm\\dd": r"\b([0-9]{4})\\([0]?[1-9]|[1][0-2])\\([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy.mm.dd": r"\b([0-9]{4})\.([0]?[1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy-mm-dd": r"\b([0-9]{4})-([0]?[1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy.mmm.dd": r"\b((?i)[0-9]{4}).(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy-mmm-dd": r"\b((?i)[0-9]{4})-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy/mmm/dd": r"\b((?i)[0-9]{4})\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy\\mmm\\dd": r"\b((?i)[0-9]{4})\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy.mmm.dd": r"(?i)\b([0-9]{4}).(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy-mmm-dd": r"(?i)\b([0-9]{4})-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy/mmm/dd": r"(?i)\b([0-9]{4})\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yyyy\\mmm\\dd": r"(?i)\b([0-9]{4})\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yy.mm.dd": r"\b([0-9]{2})\.([0]?[1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yy-mm-dd": r"\b([0-9]{2})-([0]?[1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
"yy/mm/dd": r"\b([0-9]{2})\/([0]?[1-9]|[1][0-2])\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",
Expand Down
5 changes: 3 additions & 2 deletions plugins/extractit/plugin.spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ description: The ExtractIt plugin is a collection of data extraction actions. Th
plugin allows users to extract various pieces of information from blocks of text.
The pieces of information this plugin can extract include IPs, URLs, file paths,
dates, domains, hashes, MAC addresses, and email addresses
version: 3.0.13
version: 3.0.14
connection_version: 3
vendor: rapid7
support: rapid7
Expand All @@ -28,7 +28,7 @@ hub_tags:
features: []
sdk:
type: slim
version: 6.3.8
version: 6.4.1
user: nobody
key_features:
- Extract dates from a string or file for use in subsequent workflow actions
Expand All @@ -46,6 +46,7 @@ key_features:
- Extract CVEs from a string or file for use in subsequent workflow actions
- Extract all indicators from a string or file for use in subsequent workflow actions
version_history:
- 3.0.14 - Updated dependency | Updated SDK to the latest version (6.4.1)
- 3.0.13 - Improved range of possible 'filepaths' | Resolved issue where filenames were being populated under 'domain' | Updated SDK to latest version (6.3.8)
- 3.0.12 - Updated SDK to the latest version (6.3.3)
- 3.0.11 - Updated tldextract to v5.1.3
Expand Down
4 changes: 2 additions & 2 deletions plugins/extractit/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# All dependencies must be version-pinned, eg. requests==1.2.0
# See: https://pip.pypa.io/en/stable/user_guide/#requirements-files
tldextract==5.1.3
regex==2023.8.8
regex==2025.11.3
validators==0.34.0
pdfplumber==0.11.4
pdfplumber==0.11.8
openpyxl==3.1.5
parameterized==0.8.1
publicsuffix2==2.20191221
Expand Down
2 changes: 1 addition & 1 deletion plugins/extractit/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name="extractit-rapid7-plugin",
version="3.0.13",
version="3.0.14",
description="The ExtractIt plugin is a collection of data extraction actions. This plugin allows users to extract various pieces of information from blocks of text. The pieces of information this plugin can extract include IPs, URLs, file paths, dates, domains, hashes, MAC addresses, and email addresses",
author="rapid7",
author_email="",
Expand Down