Skip to content

Commit 56274b0

Browse files
committed
full
1 parent 3d06587 commit 56274b0

File tree

9 files changed

+84
-94
lines changed

9 files changed

+84
-94
lines changed

financial_data_load/full_data_load.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
- Azure AI Foundry deployed (run: azd up && uv run python setup_env.py)
2828
- Azure CLI logged in (az login --use-device-code)
2929
- Neo4j connection configured in .env
30-
- PDFs in DATA_DIR (defaults to ~/projects/workshops/workshop-financial-data)
30+
- PDFs in financial-data/form10k-sample (relative to this script)
3131
"""
3232

3333
import asyncio
@@ -58,8 +58,8 @@
5858
logging.basicConfig(level=logging.INFO)
5959
logger = logging.getLogger(__name__)
6060

61-
# Data directory - adjust this path as needed
62-
DATA_DIR = Path.home() / "projects" / "workshops" / "workshop-financial-data"
61+
# Data directory - relative to this script
62+
DATA_DIR = Path(__file__).parent / "financial-data"
6363
PDF_DIR = DATA_DIR / "form10k-sample"
6464
COMPANY_CSV = DATA_DIR / "Company_Filings.csv"
6565
ASSET_MANAGER_CSV = DATA_DIR / "Asset_Manager_Holdings.csv"

financial_data_load/src/01_01_data_loading.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77
Run with: uv run python solutions/01_01_data_loading.py
88
"""
99

10-
from neo4j import GraphDatabase
11-
12-
from config import Neo4jConfig
10+
from config import get_neo4j_driver
1311

1412
# Sample text representing SEC 10-K filing content
1513
SAMPLE_TEXT = """
@@ -120,13 +118,7 @@ def show_graph_structure(driver) -> None:
120118

121119
def main():
122120
"""Run data loading demo."""
123-
config = Neo4jConfig()
124-
driver = GraphDatabase.driver(
125-
config.uri,
126-
auth=(config.username, config.password)
127-
)
128-
129-
try:
121+
with get_neo4j_driver() as driver:
130122
driver.verify_connectivity()
131123
print("Connected to Neo4j successfully!")
132124

@@ -153,9 +145,7 @@ def main():
153145
# Show structure
154146
show_graph_structure(driver)
155147

156-
finally:
157-
driver.close()
158-
print("\nConnection closed.")
148+
print("\nConnection closed.")
159149

160150

161151
if __name__ == "__main__":

financial_data_load/src/01_02_embeddings.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,12 @@
99

1010
import asyncio
1111

12-
from neo4j import GraphDatabase
1312
from neo4j_graphrag.indexes import create_vector_index
1413
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import (
1514
FixedSizeSplitter,
1615
)
1716

18-
from config import Neo4jConfig, get_embedder
17+
from config import get_neo4j_driver, get_embedder
1918

2019
# Sample text representing SEC 10-K filing content
2120
SAMPLE_TEXT = """
@@ -159,13 +158,7 @@ def demo_search(driver, embedder) -> None:
159158

160159
async def main():
161160
"""Run embeddings demo."""
162-
config = Neo4jConfig()
163-
driver = GraphDatabase.driver(
164-
config.uri,
165-
auth=(config.username, config.password)
166-
)
167-
168-
try:
161+
with get_neo4j_driver() as driver:
169162
driver.verify_connectivity()
170163
print("Connected to Neo4j successfully!")
171164

@@ -199,9 +192,7 @@ async def main():
199192
print("\n=== Vector Search Demo ===")
200193
demo_search(driver, embedder)
201194

202-
finally:
203-
driver.close()
204-
print("\n\nConnection closed.")
195+
print("\n\nConnection closed.")
205196

206197

207198
if __name__ == "__main__":

financial_data_load/src/01_03_entity_extraction.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@
99

1010
import asyncio
1111

12-
from neo4j import GraphDatabase
1312
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
1413

15-
from config import Neo4jConfig, get_llm, get_embedder
14+
from config import get_neo4j_driver, get_llm, get_embedder
1615

1716
# Sample text representing SEC 10-K filing content
1817
SAMPLE_TEXT = """
@@ -164,13 +163,7 @@ def find_chunks_for_entity(driver, entity_name: str) -> None:
164163

165164
async def main():
166165
"""Run entity extraction demo."""
167-
config = Neo4jConfig()
168-
driver = GraphDatabase.driver(
169-
config.uri,
170-
auth=(config.username, config.password)
171-
)
172-
173-
try:
166+
with get_neo4j_driver() as driver:
174167
driver.verify_connectivity()
175168
print("Connected to Neo4j successfully!")
176169

@@ -217,9 +210,7 @@ async def main():
217210
find_chunks_for_entity(driver, "iPhone")
218211
find_chunks_for_entity(driver, "Apple")
219212

220-
finally:
221-
driver.close()
222-
print("\n\nConnection closed.")
213+
print("\n\nConnection closed.")
223214

224215

225216
if __name__ == "__main__":

financial_data_load/src/01_04_full_dataset_queries.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
containing SEC 10-K filings from multiple companies with extracted entities and relationships.
66
77
Prerequisites:
8-
- Full dataset must be loaded (use restore_neo4j.py script)
8+
- Full dataset must be loaded (use full_data_load.py script)
99
- Neo4j connection configured in config.py
1010
"""
1111

12-
from neo4j import GraphDatabase
13-
from config import Neo4jConfig, get_embedder
12+
from config import get_neo4j_driver, get_embedder
1413

1514

1615
INDEX_NAME = "chunkEmbeddings"
@@ -137,19 +136,14 @@ def show_company_products(driver, company_name: str):
137136
def main():
138137
# Setup connection
139138
print("Connecting to Neo4j...")
140-
neo4j_config = Neo4jConfig()
141-
driver = GraphDatabase.driver(
142-
neo4j_config.uri,
143-
auth=(neo4j_config.username, neo4j_config.password)
144-
)
145-
driver.verify_connectivity()
146-
print("Connected to Neo4j successfully!\n")
147-
148-
# Initialize embedder
149-
embedder = get_embedder()
150-
print(f"Embedder initialized: {embedder.model}\n")
151-
152-
try:
139+
with get_neo4j_driver() as driver:
140+
driver.verify_connectivity()
141+
print("Connected to Neo4j successfully!\n")
142+
143+
# Initialize embedder
144+
embedder = get_embedder()
145+
print(f"Embedder initialized: {embedder.model}\n")
146+
153147
# Show graph summary
154148
print("\n" + "="*60)
155149
print("GRAPH SUMMARY")
@@ -203,11 +197,8 @@ def main():
203197
print()
204198
show_company_products(driver, "Microsoft")
205199

206-
finally:
207-
# Cleanup
208-
driver.close()
209-
print("\n" + "="*60)
210-
print("Connection closed.")
200+
print("\n" + "="*60)
201+
print("Connection closed.")
211202

212203

213204
if __name__ == "__main__":

financial_data_load/src/05_01_fulltext_search.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from neo4j import GraphDatabase
1212

13-
from config import Neo4jConfig
13+
from config import get_neo4j_driver
1414

1515

1616
def basic_search(driver: GraphDatabase.driver, term: str) -> None:
@@ -146,13 +146,7 @@ def hybrid_search(driver: GraphDatabase.driver, keyword: str) -> None:
146146

147147
def main() -> None:
148148
"""Run all fulltext search examples."""
149-
config = Neo4jConfig()
150-
driver = GraphDatabase.driver(
151-
config.uri,
152-
auth=(config.username, config.password),
153-
)
154-
155-
try:
149+
with get_neo4j_driver() as driver:
156150
driver.verify_connectivity()
157151
print("Connected to Neo4j")
158152

@@ -163,7 +157,7 @@ def main() -> None:
163157
)
164158
if not result.single():
165159
print("\nError: Fulltext index 'search_entities' not found.")
166-
print("Run: uv run python scripts/restore_neo4j.py --full-text")
160+
print("Run: uv run python full_data_load.py")
167161
return
168162

169163
# Run examples
@@ -174,9 +168,6 @@ def main() -> None:
174168
search_with_graph_traversal(driver, "Nvidia")
175169
hybrid_search(driver, "Amazon")
176170

177-
finally:
178-
driver.close()
179-
180171

181172
if __name__ == "__main__":
182173
main()

financial_data_load/src/05_02_hybrid_search.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,10 @@
2626
"""
2727

2828
import neo4j
29-
from neo4j import GraphDatabase
3029
from neo4j_graphrag.retrievers import HybridRetriever, HybridCypherRetriever
3130
from neo4j_graphrag.types import RetrieverResultItem
3231

33-
from config import Neo4jConfig, get_embedder
32+
from config import get_neo4j_driver, get_embedder
3433

3534
# Index names
3635
VECTOR_INDEX = "chunkEmbeddings"
@@ -209,15 +208,9 @@ def search_method_comparison(retriever: HybridRetriever, query: str) -> None:
209208

210209
def main() -> None:
211210
"""Run all hybrid search examples."""
212-
config = Neo4jConfig()
213-
driver = GraphDatabase.driver(
214-
config.uri,
215-
auth=(config.username, config.password),
216-
)
217-
218-
try:
211+
with get_neo4j_driver() as driver:
219212
driver.verify_connectivity()
220-
print(f"Connected to Neo4j: {config.uri}")
213+
print("Connected to Neo4j")
221214

222215
# Initialize embedder
223216
embedder = get_embedder()
@@ -232,7 +225,7 @@ def main() -> None:
232225
)
233226
if not result.single():
234227
print(f"\nError: Fulltext index '{FULLTEXT_INDEX}' not found.")
235-
print("Run: uv run python scripts/restore_neo4j.py --full-text")
228+
print("Run: uv run python full_data_load.py")
236229
return
237230

238231
# Create HybridRetriever
@@ -263,9 +256,7 @@ def main() -> None:
263256
graph_enhanced_search(hybrid_cypher_retriever, "artificial intelligence")
264257
search_method_comparison(hybrid_retriever, "Microsoft cloud computing strategy")
265258

266-
finally:
267-
driver.close()
268-
print("\nConnection closed")
259+
print("\nConnection closed")
269260

270261

271262
if __name__ == "__main__":

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ dependencies = [
1111
"python-dotenv",
1212
"pydantic-settings>=2.0.0",
1313
"neo4j>=5.0.0",
14-
"neo4j-graphrag>=1.10.0",
14+
# Using local fork for testing - replace with "neo4j-graphrag>=1.10.0" for production
15+
"neo4j-graphrag @ file:///Users/ryanknight/projects/neo4j-graphrag-python",
1516
"httpx>=0.27.0",
1617
"openai>=1.0.0",
1718
"ipykernel>=6.0.0",

uv.lock

Lines changed: 50 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)