diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5677e0a..119b328 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.1 + rev: v0.12.5 hooks: - id: ruff-check args: [ --fix ] @@ -14,6 +14,7 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: + - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/sphinx-contrib/sphinx-lint rev: v1.0.0 diff --git a/docs/conf.py b/docs/conf.py index ba511bb..957107d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -46,7 +46,7 @@ # General information about the project. project = "itemloaders" -copyright = "Zyte Group Ltd" +project_copyright = "Zyte Group Ltd" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/pyproject.toml b/pyproject.toml index 6e10f83..05da683 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,14 +100,21 @@ disable = [ "too-many-positional-arguments", "too-many-public-methods", "unused-argument", + "use-implicit-booleaness-not-comparison", ] [tool.ruff.lint] extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", # flake8-bugbear "B", # flake8-comprehensions "C4", + # flake8-commas + "COM", # pydocstyle "D", # flake8-future-annotations @@ -130,6 +137,8 @@ extend-select = [ "PIE", # pylint "PL", + # flake8-pytest-style + "PT", # flake8-use-pathlib "PTH", # flake8-pyi @@ -160,6 +169,8 @@ extend-select = [ "YTT", ] ignore = [ + # Trailing comma missing + "COM812", # Missing docstring in public module "D100", # Missing docstring in public class @@ -218,5 +229,8 @@ ignore = [ "S101", ] +[tool.ruff.lint.isort] +split-on-trailing-comma = false + [tool.ruff.lint.pydocstyle] convention = "pep257" diff --git a/tests/test_base_loader.py b/tests/test_base_loader.py index 5342d0c..a12e827 100644 --- a/tests/test_base_loader.py +++ b/tests/test_base_loader.py @@ -1,6 +1,7 @@ -import unittest from functools import partial +import pytest + from itemloaders import ItemLoader from itemloaders.processors import Compose, Identity, Join, MapCompose, TakeFirst @@ -20,7 +21,7 @@ def processor_with_args(value, other=None, loader_context=None): return value -class BasicItemLoaderTest(unittest.TestCase): +class TestItemLoaderBasic: def test_load_item_using_default_loader(self): i = {"summary": "lala"} il = ItemLoader(item=i) @@ -61,7 +62,7 @@ class MyLoader(ItemLoader): assert il.load_item() == {"name": "", "price": 0.0} il.replace_value("sku", [valid_fragment], re=sku_re) - self.assertEqual(il.load_item()["sku"], "1234") + assert il.load_item()["sku"] == "1234" def test_self_referencing_loader(self): class MyLoader(ItemLoader): @@ -112,44 +113,41 @@ def test_add_none(self): def test_replace_value(self): il = CustomItemLoader() il.replace_value("name", "marta") - self.assertEqual(il.get_collected_values("name"), ["Marta"]) - self.assertEqual(il.get_output_value("name"), ["Marta"]) + assert il.get_collected_values("name") == ["Marta"] + assert il.get_output_value("name") == ["Marta"] il.replace_value("name", "pepe") - self.assertEqual(il.get_collected_values("name"), ["Pepe"]) - self.assertEqual(il.get_output_value("name"), ["Pepe"]) + assert il.get_collected_values("name") == ["Pepe"] + assert il.get_output_value("name") == ["Pepe"] il.replace_value(None, "Jim", lambda x: {"name": x}) - self.assertEqual(il.get_collected_values("name"), ["Jim"]) + assert il.get_collected_values("name") == ["Jim"] def test_replace_value_none(self): il = CustomItemLoader() il.replace_value("name", None) - self.assertEqual(il.get_collected_values("name"), []) + assert il.get_collected_values("name") == [] il.replace_value("name", "marta") - self.assertEqual(il.get_collected_values("name"), ["Marta"]) + assert il.get_collected_values("name") == ["Marta"] il.replace_value( "name", None ) # when replacing with `None` nothing should happen - self.assertEqual(il.get_collected_values("name"), ["Marta"]) + assert il.get_collected_values("name") == ["Marta"] def test_get_value(self): il = ItemLoader() - self.assertEqual("FOO", il.get_value(["foo", "bar"], TakeFirst(), str.upper)) - self.assertEqual( - ["foo", "bar"], il.get_value(["name:foo", "name:bar"], re="name:(.*)$") - ) - self.assertEqual( - "foo", il.get_value(["name:foo", "name:bar"], TakeFirst(), re="name:(.*)$") - ) - self.assertEqual( - None, il.get_value(["foo", "bar"], TakeFirst(), re="name:(.*)$") + assert il.get_value(["foo", "bar"], TakeFirst(), str.upper) == "FOO" + assert il.get_value(["name:foo", "name:bar"], re="name:(.*)$") == ["foo", "bar"] + assert ( + il.get_value(["name:foo", "name:bar"], TakeFirst(), re="name:(.*)$") + == "foo" ) - self.assertEqual(None, il.get_value(None, TakeFirst())) + assert None is il.get_value(["foo", "bar"], TakeFirst(), re="name:(.*)$") + assert None is il.get_value(None, TakeFirst()) il.add_value("name", ["name:foo", "name:bar"], TakeFirst(), re="name:(.*)$") - self.assertEqual(["foo"], il.get_collected_values("name")) + assert il.get_collected_values("name") == ["foo"] il.replace_value("name", "name:bar", re="name:(.*)$") - self.assertEqual(["bar"], il.get_collected_values("name")) + assert il.get_collected_values("name") == ["bar"] def test_iter_on_input_processor_input(self): class NameFirstItemLoader(ItemLoader): @@ -157,32 +155,34 @@ class NameFirstItemLoader(ItemLoader): il = NameFirstItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_collected_values("name"), ["marta"]) + assert il.get_collected_values("name") == ["marta"] il = NameFirstItemLoader() il.add_value("name", ["marta", "jose"]) - self.assertEqual(il.get_collected_values("name"), ["marta"]) + assert il.get_collected_values("name") == ["marta"] il = NameFirstItemLoader() il.replace_value("name", "marta") - self.assertEqual(il.get_collected_values("name"), ["marta"]) + assert il.get_collected_values("name") == ["marta"] il = NameFirstItemLoader() il.replace_value("name", ["marta", "jose"]) - self.assertEqual(il.get_collected_values("name"), ["marta"]) + assert il.get_collected_values("name") == ["marta"] il = NameFirstItemLoader() il.add_value("name", "marta") il.add_value("name", ["jose", "pedro"]) - self.assertEqual(il.get_collected_values("name"), ["marta", "jose"]) + assert il.get_collected_values("name") == ["marta", "jose"] def test_map_compose_filter(self): def filter_world(x): return None if x == "world" else x proc = MapCompose(filter_world, str.upper) - self.assertEqual( - proc(["hello", "world", "this", "is", "scrapy"]), - ["HELLO", "THIS", "IS", "SCRAPY"], - ) + assert proc(["hello", "world", "this", "is", "scrapy"]) == [ + "HELLO", + "THIS", + "IS", + "SCRAPY", + ] def test_map_compose_filter_multil(self): class CustomItemLoader(ItemLoader): @@ -190,14 +190,14 @@ class CustomItemLoader(ItemLoader): il = CustomItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["Mart"]) + assert il.get_output_value("name") == ["Mart"] item = il.load_item() - self.assertEqual(item["name"], ["Mart"]) + assert item["name"] == ["Mart"] def test_default_input_processor(self): il = DefaultedItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["mart"]) + assert il.get_output_value("name") == ["mart"] def test_inherited_default_input_processor(self): class InheritDefaultedItemLoader(DefaultedItemLoader): @@ -205,7 +205,7 @@ class InheritDefaultedItemLoader(DefaultedItemLoader): il = InheritDefaultedItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["mart"]) + assert il.get_output_value("name") == ["mart"] def test_input_processor_inheritance(self): class ChildItemLoader(CustomItemLoader): @@ -213,9 +213,9 @@ class ChildItemLoader(CustomItemLoader): il = ChildItemLoader() il.add_value("url", "HTTP://scrapy.ORG") - self.assertEqual(il.get_output_value("url"), ["http://scrapy.org"]) + assert il.get_output_value("url") == ["http://scrapy.org"] il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["Marta"]) + assert il.get_output_value("name") == ["Marta"] class ChildChildItemLoader(ChildItemLoader): url_in = MapCompose(lambda v: v.upper()) @@ -223,9 +223,9 @@ class ChildChildItemLoader(ChildItemLoader): il = ChildChildItemLoader() il.add_value("url", "http://scrapy.org") - self.assertEqual(il.get_output_value("url"), ["HTTP://SCRAPY.ORG"]) + assert il.get_output_value("url") == ["HTTP://SCRAPY.ORG"] il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["Marta"]) + assert il.get_output_value("name") == ["Marta"] def test_empty_map_compose(self): class IdentityDefaultedItemLoader(DefaultedItemLoader): @@ -233,7 +233,7 @@ class IdentityDefaultedItemLoader(DefaultedItemLoader): il = IdentityDefaultedItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["marta"]) + assert il.get_output_value("name") == ["marta"] def test_identity_input_processor(self): class IdentityDefaultedItemLoader(DefaultedItemLoader): @@ -241,7 +241,7 @@ class IdentityDefaultedItemLoader(DefaultedItemLoader): il = IdentityDefaultedItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["marta"]) + assert il.get_output_value("name") == ["marta"] def test_extend_custom_input_processors(self): class ChildItemLoader(CustomItemLoader): @@ -249,7 +249,7 @@ class ChildItemLoader(CustomItemLoader): il = ChildItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["mARTA"]) + assert il.get_output_value("name") == ["mARTA"] def test_extend_default_input_processors(self): class ChildDefaultedItemLoader(DefaultedItemLoader): @@ -259,19 +259,19 @@ class ChildDefaultedItemLoader(DefaultedItemLoader): il = ChildDefaultedItemLoader() il.add_value("name", "marta") - self.assertEqual(il.get_output_value("name"), ["MART"]) + assert il.get_output_value("name") == ["MART"] def test_output_processor_using_function(self): il = CustomItemLoader() il.add_value("name", ["mar", "ta"]) - self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) + assert il.get_output_value("name") == ["Mar", "Ta"] class TakeFirstItemLoader(CustomItemLoader): name_out = " ".join il = TakeFirstItemLoader() il.add_value("name", ["mar", "ta"]) - self.assertEqual(il.get_output_value("name"), "Mar Ta") + assert il.get_output_value("name") == "Mar Ta" def test_output_processor_error(self): class CustomItemLoader(ItemLoader): @@ -300,33 +300,33 @@ class CustomItemLoader(ItemLoader): def test_output_processor_using_classes(self): il = CustomItemLoader() il.add_value("name", ["mar", "ta"]) - self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) + assert il.get_output_value("name") == ["Mar", "Ta"] class TakeFirstItemLoader1(CustomItemLoader): name_out = Join() il = TakeFirstItemLoader1() il.add_value("name", ["mar", "ta"]) - self.assertEqual(il.get_output_value("name"), "Mar Ta") + assert il.get_output_value("name") == "Mar Ta" class TakeFirstItemLoader2(CustomItemLoader): name_out = Join("
") il = TakeFirstItemLoader2() il.add_value("name", ["mar", "ta"]) - self.assertEqual(il.get_output_value("name"), "Mar
Ta") + assert il.get_output_value("name") == "Mar
Ta" def test_default_output_processor(self): il = CustomItemLoader() il.add_value("name", ["mar", "ta"]) - self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) + assert il.get_output_value("name") == ["Mar", "Ta"] class LalaItemLoader(CustomItemLoader): default_output_processor = Identity() il = LalaItemLoader() il.add_value("name", ["mar", "ta"]) - self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"]) + assert il.get_output_value("name") == ["Mar", "Ta"] def test_loader_context_on_declaration(self): class ChildItemLoader(CustomItemLoader): @@ -334,9 +334,9 @@ class ChildItemLoader(CustomItemLoader): il = ChildItemLoader() il.add_value("url", "text") - self.assertEqual(il.get_output_value("url"), ["val"]) + assert il.get_output_value("url") == ["val"] il.replace_value("url", "text2") - self.assertEqual(il.get_output_value("url"), ["val"]) + assert il.get_output_value("url") == ["val"] def test_loader_context_on_instantiation(self): class ChildItemLoader(CustomItemLoader): @@ -344,9 +344,9 @@ class ChildItemLoader(CustomItemLoader): il = ChildItemLoader(key="val") il.add_value("url", "text") - self.assertEqual(il.get_output_value("url"), ["val"]) + assert il.get_output_value("url") == ["val"] il.replace_value("url", "text2") - self.assertEqual(il.get_output_value("url"), ["val"]) + assert il.get_output_value("url") == ["val"] def test_loader_context_on_assign(self): class ChildItemLoader(CustomItemLoader): @@ -355,9 +355,9 @@ class ChildItemLoader(CustomItemLoader): il = ChildItemLoader() il.context["key"] = "val" il.add_value("url", "text") - self.assertEqual(il.get_output_value("url"), ["val"]) + assert il.get_output_value("url") == ["val"] il.replace_value("url", "text2") - self.assertEqual(il.get_output_value("url"), ["val"]) + assert il.get_output_value("url") == ["val"] def test_item_passed_to_input_processor_functions(self): def processor(value, loader_context): @@ -369,9 +369,9 @@ class ChildItemLoader(CustomItemLoader): it = {"name": "marta"} il = ChildItemLoader(item=it) il.add_value("url", "text") - self.assertEqual(il.get_output_value("url"), ["marta"]) + assert il.get_output_value("url") == ["marta"] il.replace_value("url", "text2") - self.assertEqual(il.get_output_value("url"), ["marta"]) + assert il.get_output_value("url") == ["marta"] # def test_add_value_on_unknown_field(self): # il = CustomItemLoader() @@ -383,9 +383,9 @@ class CustomItemLoader(ItemLoader): il = CustomItemLoader() il.add_value("name", ["marta", "other"]) - self.assertEqual(il.get_output_value("name"), "Mart") + assert il.get_output_value("name") == "Mart" item = il.load_item() - self.assertEqual(item["name"], "Mart") + assert item["name"] == "Mart" def test_partial_processor(self): def join(values, sep=None, loader_context=None, ignored=None): @@ -405,16 +405,22 @@ class CustomItemLoader(ItemLoader): il.add_value("url", ["rabbit", "hole"]) il.add_value("summary", ["rabbit", "hole"]) item = il.load_item() - self.assertEqual(item["name"], "rabbit+hole") - self.assertEqual(item["url"], "rabbit.hole") - self.assertEqual(item["summary"], "rabbithole") + assert item["name"] == "rabbit+hole" + assert item["url"] == "rabbit.hole" + assert item["summary"] == "rabbithole" def test_error_input_processor(self): class CustomItemLoader(ItemLoader): name_in = MapCompose(float) il = CustomItemLoader() - self.assertRaises(ValueError, il.add_value, "name", ["marta", "other"]) + with pytest.raises( + ValueError, + match="Error with input processor MapCompose: .* " + "error='ValueError: Error in MapCompose .* " + "error='ValueError: could not convert", + ): + il.add_value("name", ["marta", "other"]) def test_error_output_processor(self): class CustomItemLoader(ItemLoader): @@ -422,20 +428,29 @@ class CustomItemLoader(ItemLoader): il = CustomItemLoader() il.add_value("name", "marta") - with self.assertRaises(ValueError): + with pytest.raises( + ValueError, + match="Error with output processor: .* " + "error='ValueError: Error in Compose .* " + "error='ValueError: could not convert", + ): il.load_item() def test_error_processor_as_argument(self): il = CustomItemLoader() - self.assertRaises( - ValueError, il.add_value, "name", ["marta", "other"], Compose(float) - ) + with pytest.raises( + ValueError, + match=r"Error with processor Compose .* " + r"error='ValueError: Error in Compose .* " + r"error='TypeError: float\(\) argument", + ): + il.add_value("name", ["marta", "other"], Compose(float)) def test_get_unset_value(self): loader = ItemLoader() - self.assertEqual(loader.load_item(), {}) - self.assertEqual(loader.get_output_value("foo"), []) - self.assertEqual(loader.load_item(), {}) + assert loader.load_item() == {} + assert loader.get_output_value("foo") == [] + assert loader.load_item() == {} class BaseNoInputReprocessingLoader(ItemLoader): @@ -447,7 +462,7 @@ class NoInputReprocessingDictLoader(BaseNoInputReprocessingLoader): default_item_class = dict -class NoInputReprocessingFromDictTest(unittest.TestCase): +class TestNoInputReprocessingFromDict: """ Loaders initialized from loaded items must not reprocess fields (dict instances) """ @@ -455,33 +470,33 @@ class NoInputReprocessingFromDictTest(unittest.TestCase): def test_avoid_reprocessing_with_initial_values_single(self): il = NoInputReprocessingDictLoader(item={"title": "foo"}) il_loaded = il.load_item() - self.assertEqual(il_loaded, {"title": "foo"}) - self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"} - ) + assert il_loaded == {"title": "foo"} + assert NoInputReprocessingDictLoader(item=il_loaded).load_item() == { + "title": "foo" + } def test_avoid_reprocessing_with_initial_values_list(self): il = NoInputReprocessingDictLoader(item={"title": ["foo", "bar"]}) il_loaded = il.load_item() - self.assertEqual(il_loaded, {"title": "foo"}) - self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"} - ) + assert il_loaded == {"title": "foo"} + assert NoInputReprocessingDictLoader(item=il_loaded).load_item() == { + "title": "foo" + } def test_avoid_reprocessing_without_initial_values_single(self): il = NoInputReprocessingDictLoader() il.add_value("title", "foo") il_loaded = il.load_item() - self.assertEqual(il_loaded, {"title": "FOO"}) - self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"} - ) + assert il_loaded == {"title": "FOO"} + assert NoInputReprocessingDictLoader(item=il_loaded).load_item() == { + "title": "FOO" + } def test_avoid_reprocessing_without_initial_values_list(self): il = NoInputReprocessingDictLoader() il.add_value("title", ["foo", "bar"]) il_loaded = il.load_item() - self.assertEqual(il_loaded, {"title": "FOO"}) - self.assertEqual( - NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"} - ) + assert il_loaded == {"title": "FOO"} + assert NoInputReprocessingDictLoader(item=il_loaded).load_item() == { + "title": "FOO" + } diff --git a/tests/test_loader_initialization.py b/tests/test_loader_initialization.py index b2ba331..b609904 100644 --- a/tests/test_loader_initialization.py +++ b/tests/test_loader_initialization.py @@ -1,106 +1,101 @@ from __future__ import annotations -import unittest -from typing import Any, Protocol +from abc import ABC, abstractmethod +from typing import Any from itemloaders import ItemLoader -class InitializationTestProtocol(Protocol): - item_class: Any +class TestInitializationBase(ABC): + @property + @abstractmethod + def item_class(self) -> type[Any]: + raise NotImplementedError - def assertEqual(self, first: Any, second: Any, msg: Any = ...) -> None: ... - - def assertIsInstance(self, obj: object, cls: type, msg: Any = None) -> None: ... - - -class InitializationTestMixin: - item_class: Any = None - - def test_keep_single_value(self: InitializationTestProtocol) -> None: + def test_keep_single_value(self) -> None: """Loaded item should contain values from the initial item""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {"name": ["foo"]}) + assert isinstance(loaded_item, self.item_class) + assert dict(loaded_item) == {"name": ["foo"]} - def test_keep_list(self: InitializationTestProtocol) -> None: + def test_keep_list(self) -> None: """Loaded item should contain values from the initial item""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]}) + assert isinstance(loaded_item, self.item_class) + assert dict(loaded_item) == {"name": ["foo", "bar"]} def test_add_value_singlevalue_singlevalue( - self: InitializationTestProtocol, + self, ) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) il.add_value("name", "bar") loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]}) + assert isinstance(loaded_item, self.item_class) + assert dict(loaded_item) == {"name": ["foo", "bar"]} - def test_add_value_singlevalue_list(self: InitializationTestProtocol) -> None: + def test_add_value_singlevalue_list(self) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) il.add_value("name", ["item", "loader"]) loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {"name": ["foo", "item", "loader"]}) + assert isinstance(loaded_item, self.item_class) + assert dict(loaded_item) == {"name": ["foo", "item", "loader"]} - def test_add_value_list_singlevalue(self: InitializationTestProtocol) -> None: + def test_add_value_list_singlevalue(self) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) il.add_value("name", "qwerty") loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "qwerty"]}) + assert isinstance(loaded_item, self.item_class) + assert dict(loaded_item) == {"name": ["foo", "bar", "qwerty"]} - def test_add_value_list_list(self: InitializationTestProtocol) -> None: + def test_add_value_list_list(self) -> None: """Values added after initialization should be appended""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) il.add_value("name", ["item", "loader"]) loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "item", "loader"]}) + assert isinstance(loaded_item, self.item_class) + assert dict(loaded_item) == {"name": ["foo", "bar", "item", "loader"]} - def test_get_output_value_singlevalue(self: InitializationTestProtocol) -> None: + def test_get_output_value_singlevalue(self) -> None: """Getting output value must not remove value from item""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) - self.assertEqual(il.get_output_value("name"), ["foo"]) + assert il.get_output_value("name") == ["foo"] loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(loaded_item, {"name": ["foo"]}) + assert isinstance(loaded_item, self.item_class) + assert loaded_item == {"name": ["foo"]} - def test_get_output_value_list(self: InitializationTestProtocol) -> None: + def test_get_output_value_list(self) -> None: """Getting output value must not remove value from item""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) - self.assertEqual(il.get_output_value("name"), ["foo", "bar"]) + assert il.get_output_value("name") == ["foo", "bar"] loaded_item = il.load_item() - self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(loaded_item, {"name": ["foo", "bar"]}) + assert isinstance(loaded_item, self.item_class) + assert loaded_item == {"name": ["foo", "bar"]} - def test_values_single(self: InitializationTestProtocol) -> None: + def test_values_single(self) -> None: """Values from initial item must be added to loader._values""" input_item = self.item_class(name="foo") il = ItemLoader(item=input_item) - self.assertEqual(il._values.get("name"), ["foo"]) + assert il._values.get("name") == ["foo"] - def test_values_list(self: InitializationTestProtocol) -> None: + def test_values_list(self) -> None: """Values from initial item must be added to loader._values""" input_item = self.item_class(name=["foo", "bar"]) il = ItemLoader(item=input_item) - self.assertEqual(il._values.get("name"), ["foo", "bar"]) + assert il._values.get("name") == ["foo", "bar"] -class InitializationFromDictTest(InitializationTestMixin, unittest.TestCase): +class InitializationFromDictTest(TestInitializationBase): item_class = dict diff --git a/tests/test_nested_items.py b/tests/test_nested_items.py index f780ec2..010cbad 100644 --- a/tests/test_nested_items.py +++ b/tests/test_nested_items.py @@ -1,53 +1,55 @@ from __future__ import annotations -import unittest from typing import Any +import pytest + from itemloaders import ItemLoader -class NestedItemTest(unittest.TestCase): - """Test that adding items as values works as expected.""" +def _test_item(item: Any) -> None: + il = ItemLoader() + il.add_value("item_list", item) + assert il.load_item() == {"item_list": [item]} + + +def test_attrs(): + try: + import attr # noqa: PLC0415 + except ImportError: + pytest.skip("Cannot import attr") + + @attr.s + class TestItem: + foo = attr.ib() - def _test_item(self, item: Any) -> None: - il = ItemLoader() - il.add_value("item_list", item) - self.assertEqual(il.load_item(), {"item_list": [item]}) + _test_item(TestItem(foo="bar")) - def test_attrs(self): - try: - import attr # noqa: PLC0415 - except ImportError: - self.skipTest("Cannot import attr") - @attr.s - class TestItem: - foo = attr.ib() +def test_dataclass(): + try: + from dataclasses import dataclass # noqa: PLC0415 + except ImportError: + pytest.skip("Cannot import dataclasses.dataclass") - self._test_item(TestItem(foo="bar")) + @dataclass + class TestItem: + foo: str - def test_dataclass(self): - try: - from dataclasses import dataclass # noqa: PLC0415 - except ImportError: - self.skipTest("Cannot import dataclasses.dataclass") + _test_item(TestItem(foo="bar")) - @dataclass - class TestItem: - foo: str - self._test_item(TestItem(foo="bar")) +def test_dict(): + _test_item({"foo": "bar"}) - def test_dict(self): - self._test_item({"foo": "bar"}) - def test_scrapy_item(self): - try: - from scrapy import Field, Item # noqa: PLC0415 - except ImportError: - self.skipTest("Cannot import Field or Item from scrapy") +def test_scrapy_item(): + try: + from scrapy import Field, Item # noqa: PLC0415 + except ImportError: + pytest.skip("Cannot import Field or Item from scrapy") - class TestItem(Item): - foo = Field() + class TestItem(Item): + foo = Field() - self._test_item(TestItem(foo="bar")) + _test_item(TestItem(foo="bar")) diff --git a/tests/test_nested_loader.py b/tests/test_nested_loader.py index 19e4bd3..407d9fe 100644 --- a/tests/test_nested_loader.py +++ b/tests/test_nested_loader.py @@ -1,11 +1,9 @@ -import unittest - from parsel import Selector from itemloaders import ItemLoader -class SubselectorLoaderTest(unittest.TestCase): +class TestSubselectorLoader: selector = Selector( text=""" @@ -31,18 +29,14 @@ def test_nested_xpath(self): assert nl.selector nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall()) - self.assertEqual(loader.get_output_value("name"), ["marta"]) - self.assertEqual( - loader.get_output_value("name_div"), ['
marta
'] - ) - self.assertEqual(loader.get_output_value("name_value"), ["marta"]) + assert loader.get_output_value("name") == ["marta"] + assert loader.get_output_value("name_div") == ['
marta
'] + assert loader.get_output_value("name_value") == ["marta"] - self.assertEqual(loader.get_output_value("name"), nl.get_output_value("name")) - self.assertEqual( - loader.get_output_value("name_div"), nl.get_output_value("name_div") - ) - self.assertEqual( - loader.get_output_value("name_value"), nl.get_output_value("name_value") + assert loader.get_output_value("name") == nl.get_output_value("name") + assert loader.get_output_value("name_div") == nl.get_output_value("name_div") + assert loader.get_output_value("name_value") == nl.get_output_value( + "name_value" ) def test_nested_css(self): @@ -53,18 +47,14 @@ def test_nested_css(self): assert nl.selector nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall()) - self.assertEqual(loader.get_output_value("name"), ["marta"]) - self.assertEqual( - loader.get_output_value("name_div"), ['
marta
'] - ) - self.assertEqual(loader.get_output_value("name_value"), ["marta"]) + assert loader.get_output_value("name") == ["marta"] + assert loader.get_output_value("name_div") == ['
marta
'] + assert loader.get_output_value("name_value") == ["marta"] - self.assertEqual(loader.get_output_value("name"), nl.get_output_value("name")) - self.assertEqual( - loader.get_output_value("name_div"), nl.get_output_value("name_div") - ) - self.assertEqual( - loader.get_output_value("name_value"), nl.get_output_value("name_value") + assert loader.get_output_value("name") == nl.get_output_value("name") + assert loader.get_output_value("name_div") == nl.get_output_value("name_div") + assert loader.get_output_value("name_value") == nl.get_output_value( + "name_value" ) def test_nested_replace(self): @@ -73,11 +63,11 @@ def test_nested_replace(self): nl2 = nl1.nested_xpath("a") loader.add_xpath("url", "//footer/a/@href") - self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + assert loader.get_output_value("url") == ["http://www.scrapy.org"] nl1.replace_xpath("url", "img/@src") - self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) + assert loader.get_output_value("url") == ["/images/logo.png"] nl2.replace_xpath("url", "@href") - self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + assert loader.get_output_value("url") == ["http://www.scrapy.org"] def test_nested_ordering(self): loader = ItemLoader(selector=self.selector) @@ -89,15 +79,12 @@ def test_nested_ordering(self): nl2.add_xpath("url", "text()") loader.add_xpath("url", "//footer/a/@href") - self.assertEqual( - loader.get_output_value("url"), - [ - "/images/logo.png", - "http://www.scrapy.org", - "homepage", - "http://www.scrapy.org", - ], - ) + assert loader.get_output_value("url") == [ + "/images/logo.png", + "http://www.scrapy.org", + "homepage", + "http://www.scrapy.org", + ] def test_nested_load_item(self): loader = ItemLoader(selector=self.selector) @@ -114,9 +101,9 @@ def test_nested_load_item(self): assert item is nl1.item assert item is nl2.item - self.assertEqual(item["name"], ["marta"]) - self.assertEqual(item["url"], ["http://www.scrapy.org"]) - self.assertEqual(item["image"], ["/images/logo.png"]) + assert item["name"] == ["marta"] + assert item["url"] == ["http://www.scrapy.org"] + assert item["image"] == ["/images/logo.png"] def test_nested_empty_selector(self): loader = ItemLoader(selector=self.selector) diff --git a/tests/test_output_processor.py b/tests/test_output_processor.py index e319112..db2da6f 100644 --- a/tests/test_output_processor.py +++ b/tests/test_output_processor.py @@ -1,13 +1,12 @@ from __future__ import annotations -import unittest from typing import Any from itemloaders import ItemLoader from itemloaders.processors import Compose, Identity, TakeFirst -class TestOutputProcessorDict(unittest.TestCase): +class TestOutputProcessorDict: def test_output_processor(self): class TempDict(dict[str, Any]): def __init__(self, *args, **kwargs): @@ -21,11 +20,11 @@ class TempLoader(ItemLoader): loader = TempLoader() item = loader.load_item() - self.assertIsInstance(item, TempDict) - self.assertEqual(dict(item), {"temp": 0.3}) + assert isinstance(item, TempDict) + assert dict(item) == {"temp": 0.3} -class TestOutputProcessorItem(unittest.TestCase): +class TestOutputProcessorItem: def test_output_processor(self): class TempLoader(ItemLoader): default_input_processor = Identity() @@ -35,5 +34,5 @@ class TempLoader(ItemLoader): item.setdefault("temp", 0.3) loader = TempLoader(item=item) item = loader.load_item() - self.assertIsInstance(item, dict) - self.assertEqual(dict(item), {"temp": 0.3}) + assert isinstance(item, dict) + assert dict(item) == {"temp": 0.3} diff --git a/tests/test_processors.py b/tests/test_processors.py index 55a0c9e..367c005 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -1,49 +1,69 @@ -import unittest +import pytest from itemloaders.processors import Compose, Identity, Join, MapCompose, TakeFirst -class ProcessorsTest(unittest.TestCase): - def test_take_first(self): - proc = TakeFirst() - self.assertEqual(proc([None, "", "hello", "world"]), "hello") - self.assertEqual(proc([None, "", 0, "hello", "world"]), 0) - - def test_identity(self): - proc = Identity() - self.assertEqual( - proc([None, "", "hello", "world"]), [None, "", "hello", "world"] - ) - - def test_join(self): - proc = Join() - self.assertRaises(TypeError, proc, [None, "", "hello", "world"]) - self.assertEqual(proc(["", "hello", "world"]), " hello world") - self.assertEqual(proc(["hello", "world"]), "hello world") - self.assertIsInstance(proc(["hello", "world"]), str) - - def test_compose(self): - proc = Compose(lambda v: v[0], str.upper) - self.assertEqual(proc(["hello", "world"]), "HELLO") - proc = Compose(str.upper) - self.assertEqual(proc(None), None) - proc = Compose(str.upper, stop_on_none=False) - self.assertRaises(ValueError, proc, None) - proc = Compose(str.upper, lambda x: x + 1) - self.assertRaises(ValueError, proc, "hello") - - def test_mapcompose(self): - def filter_world(x): - return None if x == "world" else x - - proc = MapCompose(filter_world, str.upper) - self.assertEqual( - proc(["hello", "world", "this", "is", "scrapy"]), - ["HELLO", "THIS", "IS", "SCRAPY"], - ) - proc = MapCompose(filter_world, str.upper) - self.assertEqual(proc(None), []) - proc = MapCompose(filter_world, str.upper) - self.assertRaises(ValueError, proc, [1]) - proc = MapCompose(filter_world, lambda x: x + 1) - self.assertRaises(ValueError, proc, "hello") +def test_take_first(): + proc = TakeFirst() + assert proc([None, "", "hello", "world"]) == "hello" + assert proc([None, "", 0, "hello", "world"]) == 0 + + +def test_identity(): + proc = Identity() + assert proc([None, "", "hello", "world"]) == [None, "", "hello", "world"] + + +def test_join(): + proc = Join() + with pytest.raises(TypeError): + proc([None, "", "hello", "world"]) + assert proc(["", "hello", "world"]) == " hello world" + assert proc(["hello", "world"]) == "hello world" + assert isinstance(proc(["hello", "world"]), str) + + +def test_compose(): + proc = Compose(lambda v: v[0], str.upper) + assert proc(["hello", "world"]) == "HELLO" + proc = Compose(str.upper) + assert proc(None) is None + proc = Compose(str.upper, stop_on_none=False) + with pytest.raises( + ValueError, + match="Error in Compose with .* error='TypeError: (descriptor 'upper'|'str' object expected)", + ): + proc(None) + proc = Compose(str.upper, lambda x: x + 1) + with pytest.raises( + ValueError, + match="Error in Compose with .* error='TypeError: (can only|unsupported operand)", + ): + proc("hello") + + +def test_mapcompose(): + def filter_world(x): + return None if x == "world" else x + + proc = MapCompose(filter_world, str.upper) + assert proc(["hello", "world", "this", "is", "scrapy"]) == [ + "HELLO", + "THIS", + "IS", + "SCRAPY", + ] + proc = MapCompose(filter_world, str.upper) + assert proc(None) == [] + proc = MapCompose(filter_world, str.upper) + with pytest.raises( + ValueError, + match="Error in MapCompose with .* error='TypeError: (descriptor 'upper'|'str' object expected)", + ): + proc([1]) + proc = MapCompose(filter_world, lambda x: x + 1) + with pytest.raises( + ValueError, + match="Error in MapCompose with .* error='TypeError: (can only|unsupported operand)", + ): + proc("hello") diff --git a/tests/test_select_jmes.py b/tests/test_select_jmes.py index d9e5f3b..4d127de 100644 --- a/tests/test_select_jmes.py +++ b/tests/test_select_jmes.py @@ -1,26 +1,21 @@ -import unittest - from itemloaders.processors import SelectJmes +test_list_equals = { + "simple": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"), + "invalid": ("foo.bar.baz", {"foo": {"bar": "baz"}}, None), + "top_level": ("foo", {"foo": {"bar": "baz"}}, {"bar": "baz"}), + "double_vs_single_quote_string": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"), + "dict": ( + "foo.bar[*].name", + {"foo": {"bar": [{"name": "one"}, {"name": "two"}]}}, + ["one", "two"], + ), + "list": ("[1]", [1, 2], 2), +} -class SelectJmesTestCase(unittest.TestCase): - test_list_equals = { - "simple": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"), - "invalid": ("foo.bar.baz", {"foo": {"bar": "baz"}}, None), - "top_level": ("foo", {"foo": {"bar": "baz"}}, {"bar": "baz"}), - "double_vs_single_quote_string": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"), - "dict": ( - "foo.bar[*].name", - {"foo": {"bar": [{"name": "one"}, {"name": "two"}]}}, - ["one", "two"], - ), - "list": ("[1]", [1, 2], 2), - } - def test_output(self): - for key, value in self.test_list_equals.items(): - expr, test_list, expected = value - test = SelectJmes(expr)(test_list) - self.assertEqual( - test, expected, msg=f"test {key!r} got {test} expected {expected}" - ) +def test_output(): + for key, value in test_list_equals.items(): + expr, test_list, expected = value + test = SelectJmes(expr)(test_list) + assert test == expected, f"test {key!r} got {test} expected {expected}" diff --git a/tests/test_selector_loader.py b/tests/test_selector_loader.py index d52e2e3..6c0009c 100644 --- a/tests/test_selector_loader.py +++ b/tests/test_selector_loader.py @@ -1,7 +1,7 @@ import re -import unittest from unittest.mock import MagicMock +import pytest from parsel import Selector from itemloaders import ItemLoader @@ -12,7 +12,7 @@ class CustomItemLoader(ItemLoader): name_in = MapCompose(lambda v: v.title()) -class SelectortemLoaderTest(unittest.TestCase): +class TestSelectorItemLoader: selector = Selector( text=""" @@ -42,237 +42,240 @@ class SelectortemLoaderTest(unittest.TestCase): def test_init_method(self): loader = CustomItemLoader() - self.assertEqual(loader.selector, None) + assert loader.selector is None def test_init_method_errors(self): loader = CustomItemLoader() - self.assertRaises(RuntimeError, loader.add_xpath, "url", "//a/@href") - self.assertRaises(RuntimeError, loader.replace_xpath, "url", "//a/@href") - self.assertRaises(RuntimeError, loader.get_xpath, "//a/@href") - self.assertRaises(RuntimeError, loader.add_css, "name", "#name::text") - self.assertRaises(RuntimeError, loader.replace_css, "name", "#name::text") - self.assertRaises(RuntimeError, loader.get_css, "#name::text") + with pytest.raises(RuntimeError): + loader.add_xpath("url", "//a/@href") + with pytest.raises(RuntimeError): + loader.replace_xpath("url", "//a/@href") + with pytest.raises(RuntimeError): + loader.get_xpath("//a/@href") + with pytest.raises(RuntimeError): + loader.add_css("name", "#name::text") + with pytest.raises(RuntimeError): + loader.replace_css("name", "#name::text") + with pytest.raises(RuntimeError): + loader.get_css("#name::text") def test_init_method_with_selector(self): loader = CustomItemLoader(selector=self.selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_xpath("name", "//div/text()") - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] def test_init_method_with_selector_css(self): loader = CustomItemLoader(selector=self.selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_css("name", "div::text") - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.add_css("url", "a::attr(href)") - self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + assert loader.get_output_value("url") == ["http://www.scrapy.org"] # combining/accumulating CSS selectors and XPath expressions loader.add_xpath("name", "//div/text()") - self.assertEqual(loader.get_output_value("name"), ["Marta", "Marta"]) + assert loader.get_output_value("name") == ["Marta", "Marta"] loader.add_xpath("url", "//img/@src") - self.assertEqual( - loader.get_output_value("url"), - ["http://www.scrapy.org", "/images/logo.png"], - ) + assert loader.get_output_value("url") == [ + "http://www.scrapy.org", + "/images/logo.png", + ] def test_add_xpath_re(self): loader = CustomItemLoader(selector=self.selector) loader.add_xpath("name", "//div/text()", re="ma") - self.assertEqual(loader.get_output_value("name"), ["Ma"]) + assert loader.get_output_value("name") == ["Ma"] loader = CustomItemLoader(selector=self.selector) loader.add_xpath("name", "//div/text()", re=re.compile("ma")) - self.assertEqual(loader.get_output_value("name"), ["Ma"]) + assert loader.get_output_value("name") == ["Ma"] def test_add_xpath_variables(self): loader = CustomItemLoader(selector=self.selector) loader.add_xpath("name", "id($id)/text()", id="id") - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader = CustomItemLoader(selector=self.selector) loader.add_xpath("name", "id($id)/text()", id="id2") - self.assertEqual(loader.get_output_value("name"), []) + assert loader.get_output_value("name") == [] def test_replace_xpath(self): loader = CustomItemLoader(selector=self.selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_xpath("name", "//div/text()") - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.replace_xpath("name", "//p/text()") - self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) + assert loader.get_output_value("name") == ["Paragraph"] loader.replace_xpath("name", ["//p/text()", "//div/text()"]) - self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"]) + assert loader.get_output_value("name") == ["Paragraph", "Marta"] def test_get_xpath(self): loader = CustomItemLoader(selector=self.selector) - self.assertEqual(loader.get_xpath("//p/text()"), ["paragraph"]) - self.assertEqual(loader.get_xpath("//p/text()", TakeFirst()), "paragraph") - self.assertEqual(loader.get_xpath("//p/text()", TakeFirst(), re="pa"), "pa") + assert loader.get_xpath("//p/text()") == ["paragraph"] + assert loader.get_xpath("//p/text()", TakeFirst()) == "paragraph" + assert loader.get_xpath("//p/text()", TakeFirst(), re="pa") == "pa" - self.assertEqual( - loader.get_xpath(["//p/text()", "//div/text()"]), ["paragraph", "marta"] - ) + assert loader.get_xpath(["//p/text()", "//div/text()"]) == [ + "paragraph", + "marta", + ] def test_replace_xpath_multi_fields(self): loader = CustomItemLoader(selector=self.selector) loader.add_xpath(None, "//div/text()", TakeFirst(), lambda x: {"name": x}) - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.replace_xpath(None, "//p/text()", TakeFirst(), lambda x: {"name": x}) - self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) + assert loader.get_output_value("name") == ["Paragraph"] def test_replace_xpath_re(self): loader = CustomItemLoader(selector=self.selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_xpath("name", "//div/text()") - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.replace_xpath("name", "//div/text()", re="ma") - self.assertEqual(loader.get_output_value("name"), ["Ma"]) + assert loader.get_output_value("name") == ["Ma"] def test_add_css_re(self): loader = CustomItemLoader(selector=self.selector) loader.add_css("name", "div::text", re="ma") - self.assertEqual(loader.get_output_value("name"), ["Ma"]) + assert loader.get_output_value("name") == ["Ma"] loader.add_css("url", "a::attr(href)", re="http://(.+)") - self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) + assert loader.get_output_value("url") == ["www.scrapy.org"] loader = CustomItemLoader(selector=self.selector) loader.add_css("name", "div::text", re=re.compile("ma")) - self.assertEqual(loader.get_output_value("name"), ["Ma"]) + assert loader.get_output_value("name") == ["Ma"] loader.add_css("url", "a::attr(href)", re=re.compile("http://(.+)")) - self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) + assert loader.get_output_value("url") == ["www.scrapy.org"] def test_replace_css(self): loader = CustomItemLoader(selector=self.selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_css("name", "div::text") - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.replace_css("name", "p::text") - self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) + assert loader.get_output_value("name") == ["Paragraph"] loader.replace_css("name", ["p::text", "div::text"]) - self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"]) + assert loader.get_output_value("name") == ["Paragraph", "Marta"] loader.add_css("url", "a::attr(href)", re="http://(.+)") - self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) + assert loader.get_output_value("url") == ["www.scrapy.org"] loader.replace_css("url", "img::attr(src)") - self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) + assert loader.get_output_value("url") == ["/images/logo.png"] def test_get_css(self): loader = CustomItemLoader(selector=self.selector) - self.assertEqual(loader.get_css("p::text"), ["paragraph"]) - self.assertEqual(loader.get_css("p::text", TakeFirst()), "paragraph") - self.assertEqual(loader.get_css("p::text", TakeFirst(), re="pa"), "pa") + assert loader.get_css("p::text") == ["paragraph"] + assert loader.get_css("p::text", TakeFirst()) == "paragraph" + assert loader.get_css("p::text", TakeFirst(), re="pa") == "pa" - self.assertEqual( - loader.get_css(["p::text", "div::text"]), ["paragraph", "marta"] - ) - self.assertEqual( - loader.get_css(["a::attr(href)", "img::attr(src)"]), - ["http://www.scrapy.org", "/images/logo.png"], - ) + assert loader.get_css(["p::text", "div::text"]) == ["paragraph", "marta"] + assert loader.get_css(["a::attr(href)", "img::attr(src)"]) == [ + "http://www.scrapy.org", + "/images/logo.png", + ] def test_replace_css_multi_fields(self): loader = CustomItemLoader(selector=self.selector) loader.add_css(None, "div::text", TakeFirst(), lambda x: {"name": x}) - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.replace_css(None, "p::text", TakeFirst(), lambda x: {"name": x}) - self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) + assert loader.get_output_value("name") == ["Paragraph"] loader.add_css(None, "a::attr(href)", TakeFirst(), lambda x: {"url": x}) - self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + assert loader.get_output_value("url") == ["http://www.scrapy.org"] loader.replace_css(None, "img::attr(src)", TakeFirst(), lambda x: {"url": x}) - self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) + assert loader.get_output_value("url") == ["/images/logo.png"] def test_replace_css_re(self): loader = CustomItemLoader(selector=self.selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_css("url", "a::attr(href)") - self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + assert loader.get_output_value("url") == ["http://www.scrapy.org"] loader.replace_css("url", "a::attr(href)", re=r"http://www\.(.+)") - self.assertEqual(loader.get_output_value("url"), ["scrapy.org"]) + assert loader.get_output_value("url") == ["scrapy.org"] def test_jmes_not_installed(self): selector = MagicMock(spec=Selector) del selector.jmespath loader = CustomItemLoader(selector=selector) - with self.assertRaises(AttributeError) as err: + with pytest.raises(AttributeError) as err: loader.add_jmes("name", "name", re="ma") - self.assertEqual( - str(err.exception), "Please install parsel >= 1.8.1 to get jmespath support" + assert ( + str(err.value) == "Please install parsel >= 1.8.1 to get jmespath support" ) def test_add_jmes_re(self): loader = CustomItemLoader(selector=self.jmes_selector) loader.add_jmes("name", "name", re="ma") - self.assertEqual(loader.get_output_value("name"), ["Ma"]) + assert loader.get_output_value("name") == ["Ma"] loader.add_jmes("url", "website.url", re="http://(.+)") - self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) + assert loader.get_output_value("url") == ["www.scrapy.org"] loader = CustomItemLoader(selector=self.jmes_selector) loader.add_jmes("name", "name", re=re.compile("ma")) - self.assertEqual(loader.get_output_value("name"), ["Ma"]) + assert loader.get_output_value("name") == ["Ma"] loader.add_jmes("url", "website.url", re=re.compile("http://(.+)")) - self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) + assert loader.get_output_value("url") == ["www.scrapy.org"] def test_get_jmes(self): loader = CustomItemLoader(selector=self.jmes_selector) - self.assertEqual(loader.get_jmes("description"), ["paragraph"]) - self.assertEqual(loader.get_jmes("description", TakeFirst()), "paragraph") - self.assertEqual(loader.get_jmes("description", TakeFirst(), re="pa"), "pa") + assert loader.get_jmes("description") == ["paragraph"] + assert loader.get_jmes("description", TakeFirst()) == "paragraph" + assert loader.get_jmes("description", TakeFirst(), re="pa") == "pa" - self.assertEqual( - loader.get_jmes(["description", "name"]), ["paragraph", "marta"] - ) - self.assertEqual( - loader.get_jmes(["website.url", "logo"]), - ["http://www.scrapy.org", "/images/logo.png"], - ) + assert loader.get_jmes(["description", "name"]) == ["paragraph", "marta"] + assert loader.get_jmes(["website.url", "logo"]) == [ + "http://www.scrapy.org", + "/images/logo.png", + ] def test_replace_jmes(self): loader = CustomItemLoader(selector=self.jmes_selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_jmes("name", "name") - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.replace_jmes("name", "description") - self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) + assert loader.get_output_value("name") == ["Paragraph"] loader.replace_jmes("name", ["description", "name"]) - self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"]) + assert loader.get_output_value("name") == ["Paragraph", "Marta"] loader.add_jmes("url", "website.url", re="http://(.+)") - self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"]) + assert loader.get_output_value("url") == ["www.scrapy.org"] loader.replace_jmes("url", "logo") - self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) + assert loader.get_output_value("url") == ["/images/logo.png"] def test_replace_jmes_multi_fields(self): loader = CustomItemLoader(selector=self.jmes_selector) loader.add_jmes(None, "name", TakeFirst(), lambda x: {"name": x}) - self.assertEqual(loader.get_output_value("name"), ["Marta"]) + assert loader.get_output_value("name") == ["Marta"] loader.replace_jmes(None, "description", TakeFirst(), lambda x: {"name": x}) - self.assertEqual(loader.get_output_value("name"), ["Paragraph"]) + assert loader.get_output_value("name") == ["Paragraph"] loader.add_jmes(None, "website.url", TakeFirst(), lambda x: {"url": x}) - self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + assert loader.get_output_value("url") == ["http://www.scrapy.org"] loader.replace_jmes(None, "logo", TakeFirst(), lambda x: {"url": x}) - self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"]) + assert loader.get_output_value("url") == ["/images/logo.png"] def test_replace_jmes_re(self): loader = CustomItemLoader(selector=self.jmes_selector) - self.assertTrue(loader.selector) + assert loader.selector loader.add_jmes("url", "website.url") - self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"]) + assert loader.get_output_value("url") == ["http://www.scrapy.org"] loader.replace_jmes("url", "website.url", re=r"http://www\.(.+)") - self.assertEqual(loader.get_output_value("url"), ["scrapy.org"]) + assert loader.get_output_value("url") == ["scrapy.org"] def test_fluent_interface(self): loader = ItemLoader(selector=self.selector) @@ -285,7 +288,8 @@ def test_fluent_interface(self): .replace_value("url", "http://foo") .load_item() ) - self.assertEqual( - item, - {"name": ["marta"], "description": ["paragraph"], "url": ["http://foo"]}, - ) + assert item == { + "name": ["marta"], + "description": ["paragraph"], + "url": ["http://foo"], + } diff --git a/tests/test_utils_misc.py b/tests/test_utils_misc.py index 6c9e9b6..1812aa5 100644 --- a/tests/test_utils_misc.py +++ b/tests/test_utils_misc.py @@ -1,23 +1,16 @@ -import unittest - from itemloaders.utils import arg_to_iter -class UtilsMiscTestCase(unittest.TestCase): - def test_arg_to_iter(self): - assert hasattr(arg_to_iter(None), "__iter__") - assert hasattr(arg_to_iter(100), "__iter__") - assert hasattr(arg_to_iter("lala"), "__iter__") - assert hasattr(arg_to_iter([1, 2, 3]), "__iter__") - assert hasattr(arg_to_iter(letter for letter in "abcd"), "__iter__") - - self.assertEqual(list(arg_to_iter(None)), []) - self.assertEqual(list(arg_to_iter("lala")), ["lala"]) - self.assertEqual(list(arg_to_iter(100)), [100]) - self.assertEqual(list(arg_to_iter(letter for letter in "abc")), ["a", "b", "c"]) - self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3]) - self.assertEqual(list(arg_to_iter({"a": 1})), [{"a": 1}]) - +def test_arg_to_iter(): + assert hasattr(arg_to_iter(None), "__iter__") + assert hasattr(arg_to_iter(100), "__iter__") + assert hasattr(arg_to_iter("lala"), "__iter__") + assert hasattr(arg_to_iter([1, 2, 3]), "__iter__") + assert hasattr(arg_to_iter(letter for letter in "abcd"), "__iter__") -if __name__ == "__main__": - unittest.main() + assert list(arg_to_iter(None)) == [] + assert list(arg_to_iter("lala")) == ["lala"] + assert list(arg_to_iter(100)) == [100] + assert list(arg_to_iter(letter for letter in "abc")) == ["a", "b", "c"] + assert list(arg_to_iter([1, 2, 3])) == [1, 2, 3] + assert list(arg_to_iter({"a": 1})) == [{"a": 1}] diff --git a/tests/test_utils_python.py b/tests/test_utils_python.py index 00a3d4f..ebb3212 100644 --- a/tests/test_utils_python.py +++ b/tests/test_utils_python.py @@ -4,66 +4,58 @@ import operator import platform import sys -import unittest from typing import Any from itemloaders.utils import get_func_args -class UtilsPythonTestCase(unittest.TestCase): - def test_get_func_args(self): - def f1(a, b, c): - pass - - def f2(a, b=None, c=None): - pass - - def f3(a, b=None, *, c=None): - pass +def test_get_func_args(): + def f1(a, b, c): + pass - class A: - def __init__(self, a: Any, b: Any, c: Any): - pass + def f2(a, b=None, c=None): + pass - def method(self, a, b, c): - pass + def f3(a, b=None, *, c=None): + pass - class Callable: - def __call__(self, a, b, c): - pass - - a = A(1, 2, 3) - cal = Callable() - partial_f1 = functools.partial(f1, None) - partial_f2 = functools.partial(f1, b=None) - partial_f3 = functools.partial(partial_f2, None) - - self.assertEqual(get_func_args(f1), ["a", "b", "c"]) - self.assertEqual(get_func_args(f2), ["a", "b", "c"]) - self.assertEqual(get_func_args(f3), ["a", "b", "c"]) - self.assertEqual(get_func_args(A), ["a", "b", "c"]) - self.assertEqual(get_func_args(a.method), ["a", "b", "c"]) - self.assertEqual(get_func_args(partial_f1), ["b", "c"]) - self.assertEqual(get_func_args(partial_f2), ["a", "c"]) - self.assertEqual(get_func_args(partial_f3), ["c"]) - self.assertEqual(get_func_args(cal), ["a", "b", "c"]) - self.assertEqual(get_func_args(object), []) - self.assertEqual(get_func_args(str.split, stripself=True), ["sep", "maxsplit"]) - self.assertEqual(get_func_args(" ".join, stripself=True), ["iterable"]) + class A: + def __init__(self, a: Any, b: Any, c: Any): + pass - if sys.version_info >= (3, 13) or platform.python_implementation() == "PyPy": - # the correct and correctly extracted signature - self.assertEqual( - get_func_args(operator.itemgetter(2), stripself=True), ["obj"] - ) - elif platform.python_implementation() == "CPython": - # ["args", "kwargs"] is a correct result for the pre-3.13 incorrect function signature - # [] is an incorrect result on even older CPython (https://github.com/python/cpython/issues/86951) - self.assertIn( - get_func_args(operator.itemgetter(2), stripself=True), - [[], ["args", "kwargs"]], - ) + def method(self, a, b, c): + pass + class Callable: + def __call__(self, a, b, c): + pass -if __name__ == "__main__": - unittest.main() + a = A(1, 2, 3) + cal = Callable() + partial_f1 = functools.partial(f1, None) + partial_f2 = functools.partial(f1, b=None) + partial_f3 = functools.partial(partial_f2, None) + + assert get_func_args(f1) == ["a", "b", "c"] + assert get_func_args(f2) == ["a", "b", "c"] + assert get_func_args(f3) == ["a", "b", "c"] + assert get_func_args(A) == ["a", "b", "c"] + assert get_func_args(a.method) == ["a", "b", "c"] + assert get_func_args(partial_f1) == ["b", "c"] + assert get_func_args(partial_f2) == ["a", "c"] + assert get_func_args(partial_f3) == ["c"] + assert get_func_args(cal) == ["a", "b", "c"] + assert get_func_args(object) == [] + assert get_func_args(str.split, stripself=True) == ["sep", "maxsplit"] + assert get_func_args(" ".join, stripself=True) == ["iterable"] + + if sys.version_info >= (3, 13) or platform.python_implementation() == "PyPy": + # the correct and correctly extracted signature + assert get_func_args(operator.itemgetter(2), stripself=True) == ["obj"] + elif platform.python_implementation() == "CPython": + # ["args", "kwargs"] is a correct result for the pre-3.13 incorrect function signature + # [] is an incorrect result on even older CPython (https://github.com/python/cpython/issues/86951) + assert get_func_args(operator.itemgetter(2), stripself=True) in [ + [], + ["args", "kwargs"], + ] diff --git a/tox.ini b/tox.ini index 7a00623..4651505 100644 --- a/tox.ini +++ b/tox.ini @@ -49,12 +49,13 @@ commands = [testenv:typing] basepython = python3 deps = - mypy==1.16.1 + mypy==1.17.0 attrs>=18.2.0 - Scrapy==2.13.2 - types-jmespath==1.0.2.20250529 + pytest==8.4.1 + Scrapy==2.13.3 + types-jmespath==1.0.2.20250711 commands = - mypy --strict --implicit-reexport {posargs:itemloaders tests} + mypy --strict {posargs:itemloaders tests} [testenv:pre-commit] basepython = python3