Skip to content
Draft

Todo #831

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -306,13 +306,13 @@ jobs:
- name: Compare results
run: |
exit_code=0
LOW_DELTA=10
THRESHOLD=250
LOW_DELTA=100
THRESHOLD=2500

# RELEASE
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
d=$(( 10000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
python -c "print(f'RELEASE_TIME = ${RELEASE_TIME} sec, HEAD_TIME = ${HEAD_TIME} sec, diff: {$d/100:.2f} %')"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
Expand All @@ -322,8 +322,8 @@ jobs:
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
d=$(( 10000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
python -c "print(f'RELEASE_TIME = ${RELEASE_TIME} sec, HEAD_TIME = ${HEAD_TIME} sec, diff: {$d/100:.2f} %')"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
Expand All @@ -335,8 +335,8 @@ jobs:

# BASE
if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
d=$(( 10000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
python -c "print(f'BASE_TIME = ${BASE_TIME} sec, HEAD_TIME = ${HEAD_TIME} sec, diff: {$d/100:.2f} %')"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
Expand All @@ -346,8 +346,8 @@ jobs:
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
d=$(( 10000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} ))
python -c "print(f'BASE_TIME = ${BASE_TIME} sec, HEAD_TIME = ${HEAD_TIME} sec, diff: {$d/100:.2f} %')"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ jobs:
run: |
banner="$(python -m credsweeper --banner | grep CredSweeper | head -1)"
echo "banner = '${banner}'"
if [ "CredSweeper 1.15.2 crc32:cba20e43" != "${banner}" ]; then
if [ "CredSweeper 1.15.3 crc32:a2d3d87b" != "${banner}" ]; then
echo "Update the check for '${banner}'"
exit 1
fi
Expand Down
14 changes: 12 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,12 @@ jobs:
- name: Install application
run: |
python -m pip install .
python -m pip freeze # dbg

- name: Inspect environment and requirements
run: |
python --version
python -m pip --version
python -m pip freeze

- name: Remove sources dir to check installation for Linux & MAC
if: runner.os != 'Windows'
Expand Down Expand Up @@ -124,7 +129,12 @@ jobs:
run: |
python -m pip install --upgrade pip
python -m pip install --requirement requirements.txt
python -m pip freeze # dbg

- name: Inspect environment and requirements
run: |
python --version
python -m pip --version
python -m pip freeze

- name: UnitTest with pytest and coverage
run: |
Expand Down
41 changes: 1 addition & 40 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,12 @@ check_untyped_defs = True

# Per-module options:

[mypy-git.*]
ignore_missing_imports = True

[mypy-lxml.*]
ignore_missing_imports = True

[mypy-numpy.*]
ignore_missing_imports = True

[mypy-scipy.*]
ignore_missing_imports = True

[mypy-google_auth_oauthlib.*]
ignore_missing_imports = True

[mypy-oauthlib.*]
ignore_missing_imports = True

[mypy-pandas.*]
ignore_missing_imports = True

[mypy-sklearn.*]
[mypy-base62.*]
ignore_missing_imports = True

[mypy-onnxruntime.*]
ignore_missing_imports = True

[mypy-regex.*]
ignore_missing_imports = True

[mypy-whatthepatch.*]
ignore_missing_imports = True

[mypy-setuptools.*]
ignore_missing_imports = True

[mypy-password_strength.*]
ignore_missing_imports = True

[mypy-docx.*]
ignore_missing_imports = True

[mypy-base62.*]
ignore_missing_imports = True

[mypy-rpmfile.*]
ignore_missing_imports = True

Expand Down
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ max-statements=105
extension-pkg-allow-list=lxml

[MESSAGES CONTROL]
disable=C0103,C0114,C0413,R0401,R1705,R1714,R1731,R1730,W0718
disable=C0103,C0114,C0413,R0401,R1714,R1730,R1731,W0718

[DESIGN]
min-public-methods=0
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"__version__"
]

__version__ = "1.15.2"
__version__ = "1.15.3"
3 changes: 1 addition & 2 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,7 @@ def __init__(self,
def _get_config_path(config_path: Optional[str]) -> Path:
if config_path:
return Path(config_path)
else:
return APP_PATH / "secret" / "config.json"
return APP_PATH / "secret" / "config.json"

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Expand Down
32 changes: 18 additions & 14 deletions credsweeper/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@ class Severity(Enum):
INFO = "info"

def __lt__(self, other) -> bool:
if self == Severity.INFO:
return other is not Severity.INFO
elif self == Severity.LOW:
return other in [Severity.MEDIUM, Severity.HIGH, Severity.CRITICAL]
elif self == Severity.MEDIUM:
return other in [Severity.HIGH, Severity.CRITICAL]
elif self == Severity.HIGH:
return other is Severity.CRITICAL
return False
match self:
case Severity.INFO:
return other is not Severity.INFO
case Severity.LOW:
return other in [Severity.MEDIUM, Severity.HIGH, Severity.CRITICAL]
case Severity.MEDIUM:
return other in [Severity.HIGH, Severity.CRITICAL]
case Severity.HIGH:
return other is Severity.CRITICAL
case _:
return False

@staticmethod
def get(severity: Union[str, "Severity"]) -> Optional["Severity"]:
Expand All @@ -42,11 +44,13 @@ class Confidence(Enum):
WEAK = "weak"

def __lt__(self, other) -> bool:
if self == Confidence.WEAK:
return other is not Confidence.WEAK
elif self == Confidence.MODERATE:
return other is Confidence.STRONG
return False
match self:
case Confidence.WEAK:
return other is not Confidence.WEAK
case Confidence.MODERATE:
return other is Confidence.STRONG
case _:
return False

@staticmethod
def get(confidence: Union[str, "Confidence"]) -> Optional["Confidence"]:
Expand Down
3 changes: 1 addition & 2 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ def _encode(value: Any) -> Any:
"""
if isinstance(value, str):
return py_encode_basestring_ascii(value)
else:
return value
return value

def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
"""Represent candidate with subtext or|and hashed values"""
Expand Down
7 changes: 4 additions & 3 deletions credsweeper/deep_scanner/docx_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@ def _iter_block_items(block):
yield from DocxScanner._iter_block_items(cell)
yield from block.paragraphs
return
elif isinstance(block, Document):
parent_elm = block.element.body
elif isinstance(block, Section):
if isinstance(block, Section):
yield from DocxScanner._iter_block_items(block.header)
yield from DocxScanner._iter_block_items(block.footer)
return

if isinstance(block, Document):
parent_elm = block.element.body
elif isinstance(block, _Cell):
parent_elm = block._tc # pylint: disable=W0212
else:
Expand Down
4 changes: 2 additions & 2 deletions credsweeper/deep_scanner/zip_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def match(data: Union[bytes, bytearray]) -> bool:
if 0x03 == data[2] and 0x04 == data[3]:
# normal PK
return True
elif 0x05 == data[2] and 0x06 == data[3]:
if 0x05 == data[2] and 0x06 == data[3]:
# empty archive - no sense to scan in other scanners, so let it be a zip
return True
elif 0x07 == data[2] and 0x08 == data[3]:
if 0x07 == data[2] and 0x08 == data[3]:
# spanned archive - NOT SUPPORTED
return False
return False
Expand Down
5 changes: 2 additions & 3 deletions credsweeper/file_handler/analysis_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ def line(self) -> str:
if self.__line is None:
# normal target
return self.__lines[self.__line_pos]
else:
# chunked target
return self.__line
# chunked target
return self.__line

@cached_property
def line_len(self) -> int:
Expand Down
25 changes: 13 additions & 12 deletions credsweeper/file_handler/data_content_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,7 @@ def represent_as_xml(self) -> Optional[bool]:
self.lines, self.line_numbers = Util.get_xml_from_lines(xml_text)
logger.debug("CONVERTED from xml")
return bool(self.lines and self.line_numbers)
else:
logger.debug("Weak data to parse as XML")
logger.debug("Weak data to parse as XML")
except Exception as exc:
logger.debug("Cannot parse as XML:%s %s", exc, self.data)
return None
Expand All @@ -173,16 +172,18 @@ def _check_multiline_cell(self, cell: Tag) -> Optional[Tuple[int, str]]:
if stripped_line := line.strip():
line_numbers.append(cell.sourceline + offset)
stripped_lines.append(stripped_line)
if 0 == len(stripped_lines):
return None
elif 1 == len(stripped_lines):
return line_numbers[0], stripped_lines[0]
else:
# the cell will be analysed as multiline text
self.line_numbers.extend(line_numbers)
self.lines.extend(stripped_lines)
self.__html_lines_size += sum(len(x) for x in stripped_lines)
return None

match len(stripped_lines):
case 0:
return None
case 1:
return line_numbers[0], stripped_lines[0]
case _:
# the cell will be analysed as multiline text
self.line_numbers.extend(line_numbers)
self.lines.extend(stripped_lines)
self.__html_lines_size += sum(len(x) for x in stripped_lines)
return None

@staticmethod
def simple_html_representation(html: BeautifulSoup) -> Tuple[List[int], List[str], int]:
Expand Down
16 changes: 8 additions & 8 deletions credsweeper/file_handler/diff_content_provider.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from dataclasses import dataclass
from functools import cached_property
from typing import List, Tuple, Generator, TypedDict, Optional, Union, Any, Dict
from typing import List, Tuple, Generator, TypedDict, Optional, Union, Any, Dict, cast

import whatthepatch

Expand Down Expand Up @@ -119,26 +119,26 @@ def patch2files_diff(raw_patch: List[str], change_type: DiffRowType) -> Dict[str
if not raw_patch:
return {}

added_files, deleted_files = {}, {}
added_files: Dict[str, List[DiffDict]] = {}
deleted_files: Dict[str, List[DiffDict]] = {}
try:
for patch in whatthepatch.parse_patch(raw_patch):
if patch.changes is None:
logger.warning("Patch '%s' cannot be scanned", str(patch.header))
continue
changes = []
changes: List[DiffDict] = []
for change in patch.changes:
change_dict = change._asdict()
change_dict = cast(DiffDict, change._asdict())
changes.append(change_dict)

added_files[patch.header.new_path] = changes
deleted_files[patch.header.old_path] = changes
if change_type == DiffRowType.ADDED:
return added_files
elif change_type == DiffRowType.DELETED:
if change_type == DiffRowType.DELETED:
return deleted_files
else:
logger.error("Change type should be one of: '%s', '%s'; but received %s", DiffRowType.ADDED,
DiffRowType.DELETED, change_type)
logger.error("Change type should be one of: '%s', '%s'; but received %s", DiffRowType.ADDED,
DiffRowType.DELETED, change_type)
except Exception as exc:
logger.error(exc)
return {}
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/file_handler/file_path_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def check_file_size(config: Config, reference: Union[str, Path, io.BytesIO, Tupl
if MIN_DATA_LEN > file_size:
logger.debug("Size (%s) of the file '%s' is too small", file_size, path)
return True
elif isinstance(config.size_limit, int) and config.size_limit < file_size:
if isinstance(config.size_limit, int) and config.size_limit < file_size:
logger.warning("Size (%s) of the file '%s' is over limit (%s)", file_size, path, config.size_limit)
return True

Expand Down
1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from credsweeper.filters.value_not_part_encoded_check import ValueNotPartEncodedCheck
from credsweeper.filters.value_number_check import ValueNumberCheck
from credsweeper.filters.value_pattern_check import ValuePatternCheck
from credsweeper.filters.value_sealed_secret_check import ValueSealedSecretCheck
from credsweeper.filters.value_search_check import ValueSearchCheck
from credsweeper.filters.value_similarity_check import ValueSimilarityCheck
from credsweeper.filters.value_split_keyword_check import ValueSplitKeywordCheck
Expand Down
4 changes: 2 additions & 2 deletions credsweeper/filters/group/general_keyword.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from credsweeper.common.constants import GroupType
from credsweeper.config.config import Config
from credsweeper.filters import ValueDictionaryKeywordCheck
from credsweeper.filters import ValueDictionaryKeywordCheck, ValueSealedSecretCheck
from credsweeper.filters.group.group import Group


Expand All @@ -9,4 +9,4 @@ class GeneralKeyword(Group):

def __init__(self, config: Config) -> None:
super().__init__(config, GroupType.KEYWORD)
self.filters.extend([ValueDictionaryKeywordCheck()])
self.filters.extend([ValueDictionaryKeywordCheck(), ValueSealedSecretCheck()])
3 changes: 2 additions & 1 deletion credsweeper/filters/group/password_keyword.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from credsweeper.common.constants import GroupType
from credsweeper.config.config import Config
from credsweeper.filters import ValueLengthCheck, LineGitBinaryCheck
from credsweeper.filters import ValueLengthCheck, LineGitBinaryCheck, ValueSealedSecretCheck
from credsweeper.filters import ValueSplitKeywordCheck
from credsweeper.filters.group.group import Group
from credsweeper.filters.line_uue_part_check import LineUUEPartCheck
Expand All @@ -14,6 +14,7 @@ def __init__(self, config: Config) -> None:
self.filters.extend([
ValueLengthCheck(max_len=config.max_password_value_length),
ValueSplitKeywordCheck(),
ValueSealedSecretCheck(),
LineGitBinaryCheck(),
LineUUEPartCheck()
])
Loading
Loading