Skip to content

Commit f02c48e

Browse files
ExtractIt - Addressed snyk vulnerability (#3654)
* ExtractIt - Addressed snyk vulnerability * Updated version history * Bumped the SDK * Bumping regex version
1 parent a7a5dfb commit f02c48e

File tree

9 files changed

+28
-26
lines changed

9 files changed

+28
-26
lines changed

plugins/extractit/.CHECKSUM

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"spec": "f1e1c6648fc10484bf56af68de3b2c63",
3-
"manifest": "ce3ad35db69a1fc0170b525e22cfeba4",
4-
"setup": "4a5e6a50304d0c8d92173d265f94b312",
2+
"spec": "55beea4ab115661cac1457e024389337",
3+
"manifest": "77ddc2f595cea4cc092cb7ff19f85906",
4+
"setup": "e715193c6840f534e1cedb8332e99da0",
55
"schemas": [
66
{
77
"identifier": "cve_extractor/schema.py",

plugins/extractit/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.3.8 AS builder
1+
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.4.1 AS builder
22

33
WORKDIR /python/src
44

@@ -11,7 +11,7 @@ ADD . /python/src
1111
RUN pip install .
1212
RUN pip uninstall -y setuptools
1313

14-
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.3.8
14+
FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.4.1
1515

1616
LABEL organization=rapid7
1717
LABEL sdk=python

plugins/extractit/bin/icon_extractit

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ from sys import argv
66

77
Name = "ExtractIt"
88
Vendor = "rapid7"
9-
Version = "3.0.13"
9+
Version = "3.0.14"
1010
Description = "The ExtractIt plugin is a collection of data extraction actions. This plugin allows users to extract various pieces of information from blocks of text. The pieces of information this plugin can extract include IPs, URLs, file paths, dates, domains, hashes, MAC addresses, and email addresses"
1111

1212

plugins/extractit/help.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ Example output:
673673

674674
# Version History
675675

676+
* 3.0.14 - Updated dependency | Updated SDK to the latest version (6.4.1)
676677
* 3.0.13 - Improved range of possible 'filepaths' | Resolved issue where filenames were being populated under 'domain' | Updated SDK to latest version (6.3.8)
677678
* 3.0.12 - Updated SDK to the latest version (6.3.3)
678679
* 3.0.11 - Updated tldextract to v5.1.3

plugins/extractit/icon_extractit/util/extractor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import zipfile
66
from datetime import datetime
77
from difflib import get_close_matches
8-
from typing import Any, Dict, List, Union
8+
from typing import Any, List, Union
99

1010
import openpyxl
1111
import pdfplumber
@@ -15,7 +15,7 @@
1515
from insightconnect_plugin_runtime.exceptions import PluginException
1616
from odf.opendocument import load
1717
from openpyxl.workbook.workbook import Worksheet
18-
from pdfminer.pdfparser import PDFSyntaxError
18+
from pdfplumber.utils.exceptions import PdfminerException
1919
from pdfplumber.page import Page
2020
from publicsuffix2 import PublicSuffixList
2121

@@ -199,7 +199,7 @@ def extract_content_from_file(provided_file: bytes, provided_regex: str = "") ->
199199
page_content = page_content.replace(word, word.replace("\n", ""))
200200
pdf_content += page_content
201201
return pdf_content
202-
except PDFSyntaxError:
202+
except PdfminerException:
203203
raise PluginException(
204204
cause="The type of the provided file is not supported.",
205205
assistance="Supported file types are text/binary, such as: PDF, DOCX, PPTX, XLSX, ODT, ODP, ODF, TXT, ZIP",

plugins/extractit/icon_extractit/util/util.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,22 @@ class Regex:
3232
"mm\\dd\\yy": r"\b([0][1-9]|[1][0-2])\\([1|2][0-9]|[3][0|1]|[0]?[1-9])\\([[0-9]{2})",
3333
"mm.dd.yy": r"\b([0][1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])\.([0-9]{2})",
3434
"mm-dd-yy": r"\b([0][1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])-([0-9]{2})",
35-
"dd/mmm/yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/(\d{4})",
36-
"dd\\mmm\\yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\(\d{4})",
37-
"dd.mmm.yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.(\d{4})",
38-
"dd-mmm-yyyy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})",
39-
"dd/mmm/yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([0-9]{2})",
40-
"dd\\mmm\\yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([0-9]{2})",
41-
"dd.mmm.yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([0-9]{2})",
42-
"dd-mmm-yy": r"\b((?i)[0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{2})",
35+
"dd/mmm/yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/(\d{4})",
36+
"dd\\mmm\\yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\(\d{4})",
37+
"dd.mmm.yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.(\d{4})",
38+
"dd-mmm-yyyy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})",
39+
"dd/mmm/yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([0-9]{2})",
40+
"dd\\mmm\\yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([0-9]{2})",
41+
"dd.mmm.yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])\.(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([0-9]{2})",
42+
"dd-mmm-yy": r"(?i)\b([0]?[1-9]|[1|2][0-9]|[3][0|1])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{2})",
4343
"yyyy/mm/dd": r"\b([0-9]{4})\/([0]?[1-9]|[1][0-2])\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",
4444
"yyyy\\mm\\dd": r"\b([0-9]{4})\\([0]?[1-9]|[1][0-2])\\([1|2][0-9]|[3][0|1]|[0]?[1-9])",
4545
"yyyy.mm.dd": r"\b([0-9]{4})\.([0]?[1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
4646
"yyyy-mm-dd": r"\b([0-9]{4})-([0]?[1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
47-
"yyyy.mmm.dd": r"\b((?i)[0-9]{4}).(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
48-
"yyyy-mmm-dd": r"\b((?i)[0-9]{4})-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
49-
"yyyy/mmm/dd": r"\b((?i)[0-9]{4})\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",
50-
"yyyy\\mmm\\dd": r"\b((?i)[0-9]{4})\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([1|2][0-9]|[3][0|1]|[0]?[1-9])",
47+
"yyyy.mmm.dd": r"(?i)\b([0-9]{4}).(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
48+
"yyyy-mmm-dd": r"(?i)\b([0-9]{4})-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
49+
"yyyy/mmm/dd": r"(?i)\b([0-9]{4})\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",
50+
"yyyy\\mmm\\dd": r"(?i)\b([0-9]{4})\\(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\([1|2][0-9]|[3][0|1]|[0]?[1-9])",
5151
"yy.mm.dd": r"\b([0-9]{2})\.([0]?[1-9]|[1][0-2])\.([1|2][0-9]|[3][0|1]|[0]?[1-9])",
5252
"yy-mm-dd": r"\b([0-9]{2})-([0]?[1-9]|[1][0-2])-([1|2][0-9]|[3][0|1]|[0]?[1-9])",
5353
"yy/mm/dd": r"\b([0-9]{2})\/([0]?[1-9]|[1][0-2])\/([1|2][0-9]|[3][0|1]|[0]?[1-9])",

plugins/extractit/plugin.spec.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ description: The ExtractIt plugin is a collection of data extraction actions. Th
77
plugin allows users to extract various pieces of information from blocks of text.
88
The pieces of information this plugin can extract include IPs, URLs, file paths,
99
dates, domains, hashes, MAC addresses, and email addresses
10-
version: 3.0.13
10+
version: 3.0.14
1111
connection_version: 3
1212
vendor: rapid7
1313
support: rapid7
@@ -28,7 +28,7 @@ hub_tags:
2828
features: []
2929
sdk:
3030
type: slim
31-
version: 6.3.8
31+
version: 6.4.1
3232
user: nobody
3333
key_features:
3434
- Extract dates from a string or file for use in subsequent workflow actions
@@ -46,6 +46,7 @@ key_features:
4646
- Extract CVEs from a string or file for use in subsequent workflow actions
4747
- Extract all indicators from a string or file for use in subsequent workflow actions
4848
version_history:
49+
- 3.0.14 - Updated dependency | Updated SDK to the latest version (6.4.1)
4950
- 3.0.13 - Improved range of possible 'filepaths' | Resolved issue where filenames were being populated under 'domain' | Updated SDK to latest version (6.3.8)
5051
- 3.0.12 - Updated SDK to the latest version (6.3.3)
5152
- 3.0.11 - Updated tldextract to v5.1.3

plugins/extractit/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
# All dependencies must be version-pinned, eg. requests==1.2.0
33
# See: https://pip.pypa.io/en/stable/user_guide/#requirements-files
44
tldextract==5.1.3
5-
regex==2023.8.8
5+
regex==2025.11.3
66
validators==0.34.0
7-
pdfplumber==0.11.4
7+
pdfplumber==0.11.8
88
openpyxl==3.1.5
99
parameterized==0.8.1
1010
publicsuffix2==2.20191221

plugins/extractit/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
setup(
66
name="extractit-rapid7-plugin",
7-
version="3.0.13",
7+
version="3.0.14",
88
description="The ExtractIt plugin is a collection of data extraction actions. This plugin allows users to extract various pieces of information from blocks of text. The pieces of information this plugin can extract include IPs, URLs, file paths, dates, domains, hashes, MAC addresses, and email addresses",
99
author="rapid7",
1010
author_email="",

0 commit comments

Comments
 (0)