Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 5211c5d

Browse files
authoredFeb 19, 2025··
Merge pull request #210 from bellingcat/logger_fix
Fix issue #200 + Refactor _LAZY_LOADED_MODULES
2 parents 6cdefaa + 0450757 commit 5211c5d

File tree

15 files changed

+232
-187
lines changed

15 files changed

+232
-187
lines changed
 

‎docs/scripts/scripts.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# iterate through all the modules in auto_archiver.modules and turn the __manifest__.py file into a markdown table
22
from pathlib import Path
3-
from auto_archiver.core.module import available_modules
3+
from auto_archiver.core.module import ModuleFactory
44
from auto_archiver.core.base_module import BaseModule
55
from ruamel.yaml import YAML
66
import io
@@ -41,7 +41,7 @@ def generate_module_docs():
4141
configs_cheatsheet = "\n## Configuration Options\n"
4242
configs_cheatsheet += header_row
4343

44-
for module in sorted(available_modules(with_manifest=True), key=lambda x: (x.requires_setup, x.name)):
44+
for module in sorted(ModuleFactory().available_modules(), key=lambda x: (x.requires_setup, x.name)):
4545
# generate the markdown file from the __manifest__.py file.
4646

4747
manifest = module.manifest

‎pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[project]
66
name = "auto-archiver"
7-
version = "0.13.3"
7+
version = "0.13.4"
88
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
99

1010
requires-python = ">=3.10,<3.13"

‎src/auto_archiver/core/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
from .metadata import Metadata
55
from .media import Media
6-
from .module import BaseModule
6+
from .base_module import BaseModule
77

88
# cannot import ArchivingOrchestrator/Config to avoid circular dep
99
# from .orchestrator import ArchivingOrchestrator

‎src/auto_archiver/core/base_module.py

+12-24
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11

2-
from urllib.parse import urlparse
3-
from typing import Mapping, Any
2+
from __future__ import annotations
3+
4+
from typing import Mapping, Any, Type, TYPE_CHECKING
45
from abc import ABC
56
from copy import deepcopy, copy
67
from tempfile import TemporaryDirectory
78
from auto_archiver.utils import url as UrlUtil
9+
from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES
810

911
from loguru import logger
1012

13+
if TYPE_CHECKING:
14+
from .module import ModuleFactory
15+
1116
class BaseModule(ABC):
1217

1318
"""
@@ -17,41 +22,24 @@ class BaseModule(ABC):
1722
however modules can have a .setup() method to run any setup code
1823
(e.g. logging in to a site, spinning up a browser etc.)
1924
20-
See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
25+
See consts.MODULE_TYPES for the types of modules you can create, noting that
2126
a subclass can be of multiple types. For example, a module that extracts data from
2227
a website and stores it in a database would be both an 'extractor' and a 'database' module.
2328
2429
Each module is a python package, and should have a __manifest__.py file in the
2530
same directory as the module file. The __manifest__.py specifies the module information
26-
like name, author, version, dependencies etc. See BaseModule._DEFAULT_MANIFEST for the
31+
like name, author, version, dependencies etc. See DEFAULT_MANIFEST for the
2732
default manifest structure.
2833
2934
"""
3035

31-
MODULE_TYPES = [
32-
'feeder',
33-
'extractor',
34-
'enricher',
35-
'database',
36-
'storage',
37-
'formatter'
38-
]
39-
40-
_DEFAULT_MANIFEST = {
41-
'name': '', # the display name of the module
42-
'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
43-
'type': [], # the type of the module, can be one or more of BaseModule.MODULE_TYPES
44-
'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
45-
'description': '', # a description of the module
46-
'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
47-
'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
48-
'version': '1.0', # the version of the module
49-
'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
50-
}
36+
MODULE_TYPES = CONF_MODULE_TYPES
5137

38+
# NOTE: these here are declard as class variables, but they are overridden by the instance variables in the __init__ method
5239
config: Mapping[str, Any]
5340
authentication: Mapping[str, Mapping[str, str]]
5441
name: str
42+
module_factory: ModuleFactory
5543

5644
# this is set by the orchestrator prior to archiving
5745
tmp_dir: TemporaryDirectory = None

‎src/auto_archiver/core/config.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from loguru import logger
1212

1313
from copy import deepcopy
14-
from .module import BaseModule
14+
from auto_archiver.core.consts import MODULE_TYPES
1515

1616
from typing import Any, List, Type, Tuple
1717

@@ -21,7 +21,7 @@
2121
# Auto Archiver Configuration
2222
# Steps are the modules that will be run in the order they are defined
2323
24-
steps:""" + "".join([f"\n {module}s: []" for module in BaseModule.MODULE_TYPES]) + \
24+
steps:""" + "".join([f"\n {module}s: []" for module in MODULE_TYPES]) + \
2525
"""
2626
2727
# Global configuration

‎src/auto_archiver/core/consts.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
2+
MODULE_TYPES = [
3+
'feeder',
4+
'extractor',
5+
'enricher',
6+
'database',
7+
'storage',
8+
'formatter'
9+
]
10+
11+
MANIFEST_FILE = "__manifest__.py"
12+
13+
DEFAULT_MANIFEST = {
14+
'name': '', # the display name of the module
15+
'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
16+
'type': [], # the type of the module, can be one or more of MODULE_TYPES
17+
'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
18+
'description': '', # a description of the module
19+
'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
20+
'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
21+
'version': '1.0', # the version of the module
22+
'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
23+
}
There was a problem loading the remainder of the diff.

0 commit comments

Comments
 (0)
Please sign in to comment.