diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..4b6a115
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,8 @@
+[flake8]
+ignore = E203, W503, F541
+max-line-length = 120
+max-doc-length = 120
+max-complexity = 10
+exclude = .venv,cookiecutter,.git,.local,.idea,.mypy_cache,.pytest_cache
+per-file-ignores =
+    per-file-ignores = __init__.py:F401
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
new file mode 100644
index 0000000..8b71f65
--- /dev/null
+++ b/.github/workflows/pull_request.yml
@@ -0,0 +1,21 @@
+name: Test
+
+on:
+  pull_request
+
+jobs:
+  pull_request:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Setup Python Version
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+          cache: 'pip' # caching pip dependencies
+      - name: Install Python dependencies
+        run: pip install -r requirements.dev.txt
+      - name: Run pre-commit
+        uses: pre-commit/action@v3.0.0
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index b1dd87d..61f39ae 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -15,7 +15,7 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v3
       - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v1
+        uses: aws-actions/configure-aws-credentials@v2
         with:
           aws-access-key-id: ${{ secrets.GDBP_AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.GDBP_AWS_SECRET_ACCESS_KEY }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..a122e22
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+    -   id: check-ast
+    -   id: requirements-txt-fixer
+    -   id: check-docstring-first
+
+-   repo: https://github.com/psf/black
+    rev: 23.7.0
+    hooks:
+    -   id: black
+
+-   repo: https://github.com/PyCQA/flake8
+    rev: 6.0.0
+    hooks:
+    -   id: flake8
+
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+    -   id: isort
+        args: ["--profile", "black"]
+
+-   repo: https://github.com/PyCQA/bandit
+    rev: 1.7.5
+    hooks:
+    - id: bandit
+      entry: bandit --quiet -r -x tests/ src/*.py
diff --git a/SECURITY.md b/SECURITY.md
index f13c6b8..f1f794b 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,4 +2,4 @@
 
 ## Reporting a Vulnerability
 
-See https://hackerone.com/brave for details.
\ No newline at end of file
+See https://hackerone.com/brave for details.
diff --git a/config.py b/config.py
index 6e8ec15..060454e 100644
--- a/config.py
+++ b/config.py
@@ -1,44 +1,55 @@
 import os
 
 # Disable uploads to S3. Useful when running locally or in CI.
-NO_UPLOAD = os.getenv('NO_UPLOAD', None)
-NO_DOWNLOAD = os.getenv('NO_DOWNLOAD', None)
+NO_UPLOAD = os.getenv("NO_UPLOAD", None)
+NO_DOWNLOAD = os.getenv("NO_DOWNLOAD", None)
 
-PCDN_URL_BASE = os.getenv('PCDN_URL_BASE', 'https://pcdn.brave.software')
-PUB_S3_BUCKET = os.getenv('PUB_S3_BUCKET', 'brave-today-cdn-development')
+PCDN_URL_BASE = os.getenv("PCDN_URL_BASE", "https://pcdn.brave.software")
+PUB_S3_BUCKET = os.getenv("PUB_S3_BUCKET", "brave-today-cdn-development")
 # Canonical ID of the public S3 bucket
-BRAVE_TODAY_CANONICAL_ID = os.getenv('BRAVE_TODAY_CANONICAL_ID', None)
-BRAVE_TODAY_CLOUDFRONT_CANONICAL_ID = os.getenv('BRAVE_TODAY_CLOUDFRONT_CANONICAL_ID', None)
-
-LANG_REGION_MODEL_MAP = os.getenv('LANG_REGION_MODEL_MAP', [
-    ('en_US', "sentence-transformers/all-MiniLM-L6-v2"),
-    ('en_CA', "sentence-transformers/all-MiniLM-L6-v2"),
-    ('en_GB', "sentence-transformers/all-MiniLM-L6-v2"),
-    ('es_ES', "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
-    ('es_MX', "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
-    ('pt_BR', "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
-    ('ja_JP', "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
-    ('de_DE', "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
-    ('fr_FR', "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
-    ('en_AU', "sentence-transformers/all-MiniLM-L6-v2"),
-    ('en_IN', "sentence-transformers/all-MiniLM-L6-v2"),
-
-])
-
-SOURCES_JSON_FILE = os.getenv('SOURCES_JSON_FILE', 'sources.{LANG_REGION}')
-FEED_JSON_FILE = os.getenv('FEED_JSON_FILE', 'feed.{LANG_REGION}')
-
-OUTPUT_DIR = os.getenv('OUTPUT_DIR', 'output')
-
-ARTICLE_HISTORY_FILE = os.getenv('ARTICLE_HISTORY_FILE', "articles_history.{LANG_REGION}.csv")
+BRAVE_TODAY_CANONICAL_ID = os.getenv("BRAVE_TODAY_CANONICAL_ID", None)
+BRAVE_TODAY_CLOUDFRONT_CANONICAL_ID = os.getenv(
+    "BRAVE_TODAY_CLOUDFRONT_CANONICAL_ID", None
+)
+
+LANG_REGION_MODEL_MAP = os.getenv(
+    "LANG_REGION_MODEL_MAP",
+    [
+        ("en_US", "sentence-transformers/all-MiniLM-L6-v2"),
+        ("en_CA", "sentence-transformers/all-MiniLM-L6-v2"),
+        ("en_GB", "sentence-transformers/all-MiniLM-L6-v2"),
+        ("es_ES", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
+        ("es_MX", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
+        ("pt_BR", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
+        ("ja_JP", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
+        ("de_DE", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
+        ("fr_FR", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
+        ("en_AU", "sentence-transformers/all-MiniLM-L6-v2"),
+        ("en_IN", "sentence-transformers/all-MiniLM-L6-v2"),
+    ],
+)
+
+SOURCES_JSON_FILE = os.getenv("SOURCES_JSON_FILE", "sources.{LANG_REGION}")
+FEED_JSON_FILE = os.getenv("FEED_JSON_FILE", "feed.{LANG_REGION}")
+
+OUTPUT_DIR = os.getenv("OUTPUT_DIR", "output")
+
+ARTICLE_HISTORY_FILE = os.getenv(
+    "ARTICLE_HISTORY_FILE", "articles_history.{LANG_REGION}.csv"
+)
 # Don't compute the embedding for a source that has less than 30 collected articles
-MINIMUM_ARTICLE_HISTORY_SIZE = os.getenv('MINIMUM_ARTICLE_HISTORY_SIZE', 30)
-SIMILARITY_CUTOFF_RATIO = os.getenv('SIMILARITY_CUTOFF_RATIO', 0.9)
-SOURCE_SIMILARITY_T10 = os.getenv('SOURCE_SIMILARITY_T10', "source_similarity_t10.{LANG_REGION}")
-SOURCE_SIMILARITY_T10_HR = os.getenv('SOURCE_SIMILARITY_T10_HR', "source_similarity_t10_hr.{LANG_REGION}")
-
-SOURCE_EMBEDDINGS = os.getenv('SOURCE_EMBEDDINGS', "SOURCE_EMBEDDINGS.{LANG_REGION}")
-
-if SENTRY_URL := os.getenv('SENTRY_URL'):
+MINIMUM_ARTICLE_HISTORY_SIZE = os.getenv("MINIMUM_ARTICLE_HISTORY_SIZE", 30)
+SIMILARITY_CUTOFF_RATIO = os.getenv("SIMILARITY_CUTOFF_RATIO", 0.9)
+SOURCE_SIMILARITY_T10 = os.getenv(
+    "SOURCE_SIMILARITY_T10", "source_similarity_t10.{LANG_REGION}"
+)
+SOURCE_SIMILARITY_T10_HR = os.getenv(
+    "SOURCE_SIMILARITY_T10_HR", "source_similarity_t10_hr.{LANG_REGION}"
+)
+
+SOURCE_EMBEDDINGS = os.getenv("SOURCE_EMBEDDINGS", "SOURCE_EMBEDDINGS.{LANG_REGION}")
+
+if SENTRY_URL := os.getenv("SENTRY_URL"):
     import sentry_sdk
+
     sentry_sdk.init(dsn=SENTRY_URL, traces_sample_rate=0)
diff --git a/embeddings.py b/embeddings.py
index bf62d94..b80c546 100644
--- a/embeddings.py
+++ b/embeddings.py
@@ -9,16 +9,16 @@
 logger = get_logger()
 
 
-def compute_source_similarity(source_1, source_2, function='cosine'):
-    if function == 'dot':
+def compute_source_similarity(source_1, source_2, function="cosine"):
+    if function == "dot":
         return util.dot_score(source_1, np.transpose(source_2))
-    elif function == 'cosine':
+    elif function == "cosine":
         return util.pytorch_cos_sim(source_1, source_2)[0][0]
 
 
 def get_source_representation_from_titles(titles, model):
     if len(titles) < config.MINIMUM_ARTICLE_HISTORY_SIZE:
-       return np.zeros((1, EMBEDDING_DIMENSIONALITY))
+        return np.zeros((1, EMBEDDING_DIMENSIONALITY))
 
     return model.encode(titles).mean(axis=0)
 
@@ -27,5 +27,6 @@ def compute_source_representation_from_articles(articles_df, publisher_id, model
     publisher_bucket_df = articles_df[articles_df.publisher_id == publisher_id]
 
     titles = [
-        title for title in publisher_bucket_df.title.to_numpy() if title is not None]
+        title for title in publisher_bucket_df.title.to_numpy() if title is not None
+    ]
     return get_source_representation_from_titles(titles, model)
diff --git a/renovate.json b/renovate.json
index 39a2b6e..8e32181 100644
--- a/renovate.json
+++ b/renovate.json
@@ -2,5 +2,23 @@
   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
   "extends": [
     "config:base"
+  ],
+  "schedule": [
+    "every 7 days"
+  ],
+  "baseBranches": [
+    "master"
+  ],
+  "pre-commit": {
+    "enabled": true
+  },
+  "pip_requirements": {
+    "fileMatch": ["requirements.*"]
+  },
+  "packageRules": [
+    {
+      "packagePatterns": ["^regex$"],
+      "enabled": false
+    }
   ]
 }
diff --git a/requirements.dev.txt b/requirements.dev.txt
new file mode 100644
index 0000000..0839e1f
--- /dev/null
+++ b/requirements.dev.txt
@@ -0,0 +1,10 @@
+-r requirements.txt
+bandit==1.7.5
+black==23.7.0
+bpython==0.24
+flake8==6.1.0
+isort==5.12.0
+pip-check-reqs==2.4.4
+pre-commit==3.3.3
+pylint==2.17.5
+pytest==7.4.0
diff --git a/requirements.txt b/requirements.txt
index 4269242..9a0fcdc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+boto3==1.26.14
+botocore==1.29.14
 feedparser==6.0.10
 numpy==1.23.5
 pandas==1.5.1
@@ -5,10 +7,8 @@ requests==2.31.0
 scipy==1.10.0
 sentence-transformers==2.2.2
 sentry-sdk==1.29.2
-tqdm==4.66.1
-boto3==1.26.14
-botocore==1.29.14
 structlog==23.1.0
 torch==2.0.1
 torchvision==0.15.2
+tqdm==4.66.1
 transformers==4.31.0
diff --git a/source-feed-accumulator.py b/source-feed-accumulator.py
index d0b15ed..721ffd4 100644
--- a/source-feed-accumulator.py
+++ b/source-feed-accumulator.py
@@ -12,34 +12,52 @@
 
 
 def sanitize_articles_history(lang_region):
-    articles_history_df = pd.read_csv(config.OUTPUT_DIR + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region))
+    articles_history_df = pd.read_csv(
+        config.OUTPUT_DIR + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region)
+    )
     articles_history_df = articles_history_df.drop_duplicates().dropna()
-    articles_history_df.to_csv(config.OUTPUT_DIR + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region), index=False)
+    articles_history_df.to_csv(
+        config.OUTPUT_DIR + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
+        index=False,
+    )
 
 
 def accumulate_articles(articles, lang_region):
     for i, article in tqdm(enumerate(articles)):
-        title = article['title'].replace('\r', '').replace('\n', '').replace('"', '')
-        description = article['description'].replace('\r', '').replace('\n', '').replace('"', '')
-        publish_time = article['publish_time']
-        publisher_id = article['publisher_id']
-
-        with open(config.OUTPUT_DIR + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region), "a") as f:
-            f.write('"' + '","'.join([title, description, publish_time, publisher_id]) + '"\n')
+        title = article["title"].replace("\r", "").replace("\n", "").replace('"', "")
+        description = (
+            article["description"].replace("\r", "").replace("\n", "").replace('"', "")
+        )
+        publish_time = article["publish_time"]
+        publisher_id = article["publisher_id"]
+
+        with open(
+            config.OUTPUT_DIR
+            + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
+            "a",
+        ) as f:
+            f.write(
+                '"'
+                + '","'.join([title, description, publish_time, publisher_id])
+                + '"\n'
+            )
 
 
 for lang_region, model in config.LANG_REGION_MODEL_MAP:
     logger.info(f"Starting feeds accumulator for {lang_region}")
 
-    feed_file = f'{config.FEED_JSON_FILE.format(LANG_REGION=lang_region)}.json'
+    feed_file = f"{config.FEED_JSON_FILE.format(LANG_REGION=lang_region)}.json"
 
     pathlib.Path(config.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
 
     if not config.NO_DOWNLOAD:
         download_file(feed_file, config.PUB_S3_BUCKET, f"brave-today/{feed_file}")
-        download_file(config.OUTPUT_DIR + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
-                      config.PUB_S3_BUCKET,
-                      f"source-suggestions/{config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region)}")
+        download_file(
+            config.OUTPUT_DIR
+            + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
+            config.PUB_S3_BUCKET,
+            f"source-suggestions/{config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region)}",
+        )
 
     with open(feed_file) as feeds:
         feeds_data = json.loads(feeds.read())
@@ -51,8 +69,11 @@ def accumulate_articles(articles, lang_region):
     sanitize_articles_history(lang_region)
 
     if not config.NO_UPLOAD:
-        upload_file(config.OUTPUT_DIR + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
-                    config.PUB_S3_BUCKET,
-                    f"source-suggestions/{config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region)}")
+        upload_file(
+            config.OUTPUT_DIR
+            + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
+            config.PUB_S3_BUCKET,
+            f"source-suggestions/{config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region)}",
+        )
 
     logger.info("Finished sanitizing articles_history.")
diff --git a/source-similarity-matrix.py b/source-similarity-matrix.py
index b239163..2019840 100644
--- a/source-similarity-matrix.py
+++ b/source-similarity-matrix.py
@@ -9,28 +9,39 @@
 from tqdm import tqdm
 
 import config
-from embeddings import (EMBEDDING_DIMENSIONALITY,
-                        compute_source_representation_from_articles,
-                        compute_source_similarity)
-from utils import (clean_source_similarity_file, download_file,
-                   get_source_id_for_title, upload_file)
+from embeddings import (
+    EMBEDDING_DIMENSIONALITY,
+    compute_source_representation_from_articles,
+    compute_source_similarity,
+)
+from utils import (
+    clean_source_similarity_file,
+    download_file,
+    get_source_id_for_title,
+    upload_file,
+)
 
 logger = get_logger()
 
 
 # Compute similarity matrix for all existing LANG_REGION pairs
-for lang_region, model_name in config.LANG_REGION_MODEL_MAP:
+for lang_region, model_name in config.LANG_REGION_MODEL_MAP:  # noqa: C901
     logger.info(
-        f"Started computing similarity matrix for {lang_region} using {model_name}")
+        f"Started computing similarity matrix for {lang_region} using {model_name}"
+    )
 
     pathlib.Path(config.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
 
     if not config.NO_DOWNLOAD:
-        download_file(config.OUTPUT_DIR + "/" + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
-                      config.PUB_S3_BUCKET,
-                      f"source-suggestions/{config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region)}")
+        download_file(
+            config.OUTPUT_DIR
+            + "/"
+            + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
+            config.PUB_S3_BUCKET,
+            f"source-suggestions/{config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region)}",
+        )
 
-    sources_file = f'{config.SOURCES_JSON_FILE.format(LANG_REGION=lang_region)}.json'
+    sources_file = f"{config.SOURCES_JSON_FILE.format(LANG_REGION=lang_region)}.json"
 
     if not config.NO_DOWNLOAD:
         download_file(sources_file, config.PUB_S3_BUCKET, sources_file)
@@ -41,9 +52,13 @@
     sources_df = pd.json_normalize(sources_data)
     sources_df["source_representation"] = np.nan
 
-    articles_df = pd.read_csv(config.OUTPUT_DIR + '/' + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
-                              header=None)
-    articles_df.columns = ['title', 'description', 'timestamp', 'publisher_id']
+    articles_df = pd.read_csv(
+        config.OUTPUT_DIR
+        + "/"
+        + config.ARTICLE_HISTORY_FILE.format(LANG_REGION=lang_region),
+        header=None,
+    )
+    articles_df.columns = ["title", "description", "timestamp", "publisher_id"]
 
     logger.info("Loading Embedding Model...")
     model = SentenceTransformer(model_name)
@@ -58,17 +73,23 @@
     reprs = np.zeros((publisher_ids.size, EMBEDDING_DIMENSIONALITY))
     for i, publisher_id in tqdm(enumerate(publisher_ids)):
         reprs[i, :] = compute_source_representation_from_articles(
-            articles_df, publisher_id, model)
+            articles_df, publisher_id, model
+        )
         if not reprs[i, :].any():
             logger.warning(
-                f"Source {sources_df[sources_df.publisher_id == publisher_id].publisher_name.item()} has no articles. Skipping...")
+                f"Source {sources_df[sources_df.publisher_id == publisher_id].publisher_name.item()} "
+                f"has no articles. Skipping..."
+            )
 
     logger.info(f"Computing sources representations for {lang_region}")
-    sources_representation = pd.DataFrame({'publisher_id': publisher_ids})
+    sources_representation = pd.DataFrame({"publisher_id": publisher_ids})
     sources_representation = pd.concat(
-        [sources_representation, pd.DataFrame(reprs)], axis=1)
+        [sources_representation, pd.DataFrame(reprs)], axis=1
+    )
     sources_representation.to_csv(
-        f'output/{config.SOURCE_EMBEDDINGS.format(LANG_REGION=lang_region)}.csv', header=None)
+        f"output/{config.SOURCE_EMBEDDINGS.format(LANG_REGION=lang_region)}.csv",
+        header=None,
+    )
     logger.info("Finished building source embeddings.")
 
     # For each source pair, compute pair similarity
@@ -105,35 +126,58 @@
         if sources_ranking:
             top_similarity_score = sources_ranking[0][1]
         similarity_cutoff = config.SIMILARITY_CUTOFF_RATIO * top_similarity_score
-        top10_dictionary[source_id] = [{'source': get_source_id_for_title(source[0], sources_df), 'score': source[1]}
-                                       for
-                                       source in sources_ranking[:10] if source[1] > similarity_cutoff]
-        top10_dictionary_human_readable[feed] = [{'source': source[0], 'score': source[1]} for source in
-                                                 sources_ranking[:10] if source[1] > similarity_cutoff]
+        top10_dictionary[source_id] = [
+            {
+                "source": get_source_id_for_title(source[0], sources_df),
+                "score": source[1],
+            }
+            for source in sources_ranking[:10]
+            if source[1] > similarity_cutoff
+        ]
+        top10_dictionary_human_readable[feed] = [
+            {"source": source[0], "score": source[1]}
+            for source in sources_ranking[:10]
+            if source[1] > similarity_cutoff
+        ]
 
     logger.info("Removing un-matched sources")
-    top10_dictionary = clean_source_similarity_file(
-        sources_data, top10_dictionary)
+    top10_dictionary = clean_source_similarity_file(sources_data, top10_dictionary)
 
     logger.info("Outputting sources similarities files")
-    with open(f'output/{config.SOURCE_SIMILARITY_T10.format(LANG_REGION=lang_region)}.json', 'w') as f:
+    with open(
+        f"output/{config.SOURCE_SIMILARITY_T10.format(LANG_REGION=lang_region)}.json",
+        "w",
+    ) as f:
         json.dump(top10_dictionary, f)
-    with open(f'output/{config.SOURCE_SIMILARITY_T10_HR.format(LANG_REGION=lang_region)}.json', 'w') as f:
+    with open(
+        f"output/{config.SOURCE_SIMILARITY_T10_HR.format(LANG_REGION=lang_region)}.json",
+        "w",
+    ) as f:
         json.dump(top10_dictionary_human_readable, f)
 
     logger.info("Script has finished running.")
 
     if not config.NO_UPLOAD:
-        upload_file(config.OUTPUT_DIR + "/" + f'/{config.SOURCE_SIMILARITY_T10.format(LANG_REGION=lang_region)}.json',
-                    config.PUB_S3_BUCKET,
-                    f"source-suggestions/{config.SOURCE_SIMILARITY_T10.format(LANG_REGION=lang_region)}.json")
+        upload_file(
+            config.OUTPUT_DIR
+            + "/"
+            + f"/{config.SOURCE_SIMILARITY_T10.format(LANG_REGION=lang_region)}.json",
+            config.PUB_S3_BUCKET,
+            f"source-suggestions/{config.SOURCE_SIMILARITY_T10.format(LANG_REGION=lang_region)}.json",
+        )
 
         upload_file(
-            config.OUTPUT_DIR + "/" +
-            f'/{config.SOURCE_SIMILARITY_T10_HR.format(LANG_REGION=lang_region)}.json',
+            config.OUTPUT_DIR
+            + "/"
+            + f"/{config.SOURCE_SIMILARITY_T10_HR.format(LANG_REGION=lang_region)}.json",
             config.PUB_S3_BUCKET,
-            f"source-suggestions/{config.SOURCE_SIMILARITY_T10_HR.format(LANG_REGION=lang_region)}.json")
+            f"source-suggestions/{config.SOURCE_SIMILARITY_T10_HR.format(LANG_REGION=lang_region)}.json",
+        )
 
-        upload_file(config.OUTPUT_DIR + "/" + f'/{config.SOURCE_EMBEDDINGS.format(LANG_REGION=lang_region)}.csv',
-                    config.PUB_S3_BUCKET,
-                    f"source-suggestions/{config.SOURCE_EMBEDDINGS.format(LANG_REGION=lang_region)}.csv")
+        upload_file(
+            config.OUTPUT_DIR
+            + "/"
+            + f"/{config.SOURCE_EMBEDDINGS.format(LANG_REGION=lang_region)}.csv",
+            config.PUB_S3_BUCKET,
+            f"source-suggestions/{config.SOURCE_EMBEDDINGS.format(LANG_REGION=lang_region)}.csv",
+        )
diff --git a/utils.py b/utils.py
index 06c98f9..51940f6 100644
--- a/utils.py
+++ b/utils.py
@@ -2,13 +2,12 @@
 import mimetypes
 
 import boto3
-import numpy as np
 from botocore.exceptions import ClientError
 
 import config
 
 boto_session = boto3.Session()
-s3_client = boto_session.client('s3')
+s3_client = boto_session.client("s3")
 
 
 class InvalidS3Bucket(Exception):
@@ -19,12 +18,17 @@ def upload_file(file_name, bucket, object_name=None):
     if object_name is None:
         object_name = file_name
     try:
-        content_type = mimetypes.guess_type(file_name)[0] or 'binary/octet-stream'
-        s3_client.upload_file(file_name, bucket, object_name, ExtraArgs={
-            'GrantRead': f'id={config.BRAVE_TODAY_CLOUDFRONT_CANONICAL_ID}',
-            'GrantFullControl': f'id={config.BRAVE_TODAY_CANONICAL_ID}',
-            'ContentType': content_type
-        })
+        content_type = mimetypes.guess_type(file_name)[0] or "binary/octet-stream"
+        s3_client.upload_file(
+            file_name,
+            bucket,
+            object_name,
+            ExtraArgs={
+                "GrantRead": f"id={config.BRAVE_TODAY_CLOUDFRONT_CANONICAL_ID}",
+                "GrantFullControl": f"id={config.BRAVE_TODAY_CANONICAL_ID}",
+                "ContentType": content_type,
+            },
+        )
 
     except ClientError as e:
         logging.error(e)