-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from nodestream-proj/improvements
Improving code quality and adding tests
- Loading branch information
Showing
11 changed files
with
473 additions
and
267 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"python.testing.pytestArgs": [ | ||
"." | ||
], | ||
"python.testing.unittestEnabled": false, | ||
"python.testing.pytestEnabled": true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from unittest.mock import Mock | ||
|
||
from nodestream.model import DesiredIngestion, Node | ||
|
||
from nodestream_plugin_semantic.model import Content, hash | ||
|
||
|
||
def test_content_from_text(): | ||
content_text = "test content" | ||
content = Content.from_text(content_text) | ||
assert content.content == content_text | ||
assert content.id == hash(content_text) | ||
assert content.parent is None | ||
|
||
|
||
def test_content_add_metadata(): | ||
content = Content.from_text("test content") | ||
content.add_metadata("key", "value") | ||
assert content.metadata == {"key": "value"} | ||
|
||
|
||
def test_content_split_on_delimiter(): | ||
content_text = "line1\nline2\nline3" | ||
content = Content.from_text(content_text) | ||
lines = list(content.split_on_delimiter("\n")) | ||
assert len(lines) == 3 | ||
assert lines[0].content == "line1" | ||
assert lines[1].content == "line2" | ||
assert lines[2].content == "line3" | ||
assert all(line.parent == content for line in lines) | ||
|
||
|
||
def test_content_assign_embedding(): | ||
content = Content.from_text("test content") | ||
embedding = [0.1, 0.2, 0.3] | ||
content.assign_embedding(embedding) | ||
assert content.embedding == embedding | ||
|
||
|
||
def test_content_apply_to_node(): | ||
content = Content.from_text("test content") | ||
node = Mock(spec=Node) | ||
content.apply_to_node("test_type", node) | ||
node.type = "test_type" | ||
node.key_values.set_property.assert_called_with("id", content.id) | ||
node.properties.set_property.assert_any_call("content", content.content) | ||
|
||
|
||
def test_content_make_ingestible(): | ||
parent_content = Content.from_text("parent content") | ||
child_content = Content.from_text("child content", parent=parent_content) | ||
ingest = child_content.make_ingestible("test_type", "test_relationship") | ||
|
||
assert isinstance(ingest, DesiredIngestion) | ||
assert ingest.source.type == "test_type" | ||
ingest.source.key_values == {"id": child_content.id} | ||
ingest.source.properties == {"content": child_content.content} | ||
|
||
assert len(ingest.relationships) == 1 | ||
relationship = ingest.relationships[0] | ||
assert relationship.relationship.type == "test_relationship" | ||
assert relationship.outbound is False | ||
assert relationship.to_node.type == "test_type" | ||
relationship.to_node.key_values == {"id": parent_content.id} | ||
relationship.to_node.properties == {"content": parent_content.content} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
from pathlib import Path | ||
from unittest.mock import AsyncMock, MagicMock, patch | ||
|
||
import pytest | ||
from nodestream.model import DesiredIngestion | ||
|
||
from nodestream_plugin_semantic.model import Content | ||
from nodestream_plugin_semantic.pipeline import ( | ||
ChunkContent, | ||
ContentInterpreter, | ||
ConvertToContent, | ||
DocumentExtractor, | ||
EmbedContent, | ||
) | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_chunk_content(): | ||
chunker = MagicMock() | ||
chunker.chunk.return_value = [ | ||
Content(id="1", content="chunk1"), | ||
Content(id="2", content="chunk2"), | ||
] | ||
transformer = ChunkContent(chunker) | ||
record = Content(id="0", content="original content") | ||
chunks = [chunk async for chunk in transformer.transform_record(record)] | ||
assert len(chunks) == 2 | ||
assert chunks[0].content == "chunk1" | ||
assert chunks[1].content == "chunk2" | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_embed_content(): | ||
embedder = AsyncMock() | ||
embedder.embed.return_value = "embedded content" | ||
transformer = EmbedContent(embedder) | ||
content = Content(id="0", content="original content") | ||
result = await transformer.transform_record(content) | ||
assert result.content == "original content" | ||
assert result.embedding == "embedded content" | ||
|
||
|
||
def test_document_extractor(): | ||
paths = [Path("file1.txt"), Path("file2.txt")] | ||
content_type = MagicMock() | ||
content_type.is_supported.return_value = True | ||
content_type.read.return_value = "file content" | ||
with patch( | ||
"nodestream_plugin_semantic.pipeline.glob", | ||
return_value=["file1.txt", "file2.txt"], | ||
), patch( | ||
"nodestream_plugin_semantic.pipeline.ContentType.by_name", | ||
return_value=content_type, | ||
): | ||
extractor = DocumentExtractor.from_file_data(globs=["*.txt"]) | ||
assert len(extractor.paths) == 2 | ||
assert extractor.read(paths[0]) == "file content" | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_document_extractor_extract_records(): | ||
content_type = MagicMock() | ||
content_type.is_supported.return_value = True | ||
content_type.read.return_value = "file content" | ||
with patch( | ||
"nodestream_plugin_semantic.pipeline.glob", return_value=["file1.txt"] | ||
), patch( | ||
"nodestream_plugin_semantic.pipeline.ContentType.by_name", | ||
return_value=content_type, | ||
): | ||
extractor = DocumentExtractor.from_file_data(globs=["*.txt"]) | ||
records = [record async for record in extractor.extract_records()] | ||
assert len(records) == 1 | ||
assert records[0].content == "file content" | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_convert_to_content(): | ||
record = {"id": "1", "content": "some content"} | ||
transformer = ConvertToContent() | ||
content = await transformer.transform_record(record) | ||
assert content.id == "1" | ||
assert content.content == "some content" | ||
|
||
|
||
@pytest.mark.asyncio | ||
async def test_content_interpreter(): | ||
content = Content(id="1", content="some content") | ||
transformer = ContentInterpreter() | ||
desired_ingestion = await transformer.transform_record(content) | ||
assert isinstance(desired_ingestion, DesiredIngestion) | ||
|
||
|
||
def test_content_interpreter_expand_schema(): | ||
transformer = ContentInterpreter() | ||
coordinator = MagicMock() | ||
transformer.expand_schema(coordinator) | ||
coordinator.on_node_schema.assert_called() | ||
coordinator.on_relationship_schema.assert_called() | ||
coordinator.connect.assert_called() |