Skip to content
This repository was archived by the owner on Jul 15, 2025. It is now read-only.

Commit e1b801e

Browse files
committed
First attempt at a Node trait with Domain node using Claude Code
1 parent f5dd299 commit e1b801e

File tree

2 files changed

+203
-10
lines changed

2 files changed

+203
-10
lines changed

pixlie_ai/src/engine/node.rs

Lines changed: 126 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,28 @@ use crate::entity::web::link::Link;
1818
use crate::entity::web::web_metadata::WebMetadata;
1919
use crate::entity::web::web_page::WebPage;
2020
use crate::error::PiResult;
21-
use crate::{ExternalData, FetchError, FetchResponse};
21+
use crate::{ExternalData, FetchError, FetchRequest, FetchResponse};
2222
use chrono::{DateTime, Utc};
2323
use serde::{Deserialize, Serialize};
24+
use std::any::Any;
2425
use std::cmp::Ordering;
2526
use std::sync::Arc;
2627
use strum::{Display, EnumString};
2728
use ts_rs::TS;
2829
use utoipa::ToSchema;
2930

31+
pub trait Node: Send + Sync {
32+
fn process(&self, node_id: NodeId, engine: Arc<&Engine>) -> PiResult<()>;
33+
fn as_any(&self) -> &dyn Any;
34+
fn get_labels(&self) -> Vec<NodeLabel>;
35+
}
36+
37+
pub trait Fetchable: Node {
38+
fn make_fetch_requests(&self, node_id: NodeId, engine: Arc<&Engine>) -> PiResult<Vec<FetchRequest>>;
39+
fn handle_fetch_response(&self, node_id: NodeId, engine: Arc<&Engine>, response: FetchResponse) -> PiResult<()>;
40+
fn handle_fetch_error(&self, node_id: NodeId, engine: Arc<&Engine>, error: FetchError) -> PiResult<()>;
41+
}
42+
3043
#[derive(Clone, Display, Deserialize, Serialize)]
3144
pub enum Payload {
3245
Link(Link),
@@ -112,6 +125,118 @@ pub struct NodeItem {
112125
pub written_at: DateTime<Utc>,
113126
}
114127

128+
pub struct TypedNodeItem<T: Node> {
129+
pub id: NodeId,
130+
pub node: T,
131+
pub flags: NodeFlags,
132+
pub written_at: DateTime<Utc>,
133+
}
134+
135+
impl<T: Node> TypedNodeItem<T> {
136+
pub fn new(id: NodeId, node: T) -> Self {
137+
Self {
138+
id,
139+
node,
140+
flags: NodeFlags::empty(),
141+
written_at: Utc::now(),
142+
}
143+
}
144+
145+
pub fn process(&self, engine: Arc<&Engine>) -> PiResult<()> {
146+
self.node.process(self.id, engine)
147+
}
148+
149+
pub fn make_fetch_requests(&self, engine: Arc<&Engine>) -> PiResult<Vec<FetchRequest>>
150+
where
151+
T: Fetchable
152+
{
153+
self.node.make_fetch_requests(self.id, engine)
154+
}
155+
156+
pub fn handle_fetch_response(&self, engine: Arc<&Engine>, response: FetchResponse) -> PiResult<()>
157+
where
158+
T: Fetchable
159+
{
160+
self.node.handle_fetch_response(self.id, engine, response)
161+
}
162+
163+
pub fn handle_fetch_error(&self, engine: Arc<&Engine>, error: FetchError) -> PiResult<()>
164+
where
165+
T: Fetchable
166+
{
167+
self.node.handle_fetch_error(self.id, engine, error)
168+
}
169+
170+
pub fn get_labels(&self) -> Vec<NodeLabel> {
171+
self.node.get_labels()
172+
}
173+
174+
pub fn get_node(&self) -> &T {
175+
&self.node
176+
}
177+
178+
pub fn get_node_mut(&mut self) -> &mut T {
179+
&mut self.node
180+
}
181+
}
182+
183+
pub enum AnyTypedNode {
184+
Domain(TypedNodeItem<crate::entity::web::domain::Domain>),
185+
// Link(TypedNodeItem<crate::entity::web::link::Link>),
186+
// WebPage(TypedNodeItem<crate::entity::web::web_page::WebPage>),
187+
// Add more as you implement them
188+
}
189+
190+
impl AnyTypedNode {
191+
pub fn process(&self, engine: Arc<&Engine>) -> PiResult<()> {
192+
match self {
193+
AnyTypedNode::Domain(node) => node.process(engine),
194+
// AnyTypedNode::Link(node) => node.process(engine),
195+
// AnyTypedNode::WebPage(node) => node.process(engine),
196+
}
197+
}
198+
199+
pub fn handle_fetch_response(&self, engine: Arc<&Engine>, response: FetchResponse) -> PiResult<()> {
200+
match self {
201+
AnyTypedNode::Domain(node) => node.handle_fetch_response(engine, response),
202+
}
203+
}
204+
205+
pub fn handle_fetch_error(&self, engine: Arc<&Engine>, error: FetchError) -> PiResult<()> {
206+
match self {
207+
AnyTypedNode::Domain(node) => node.handle_fetch_error(engine, error),
208+
}
209+
}
210+
211+
pub fn get_labels(&self) -> Vec<NodeLabel> {
212+
match self {
213+
AnyTypedNode::Domain(node) => node.get_labels(),
214+
}
215+
}
216+
217+
pub fn get_id(&self) -> NodeId {
218+
match self {
219+
AnyTypedNode::Domain(node) => node.id,
220+
}
221+
}
222+
}
223+
224+
pub fn create_typed_node(id: NodeId, labels: &[NodeLabel], payload: &Payload) -> Option<AnyTypedNode> {
225+
use crate::entity::web::domain::Domain;
226+
227+
if labels.contains(&NodeLabel::DomainName) {
228+
if let Payload::Text(domain_name) = payload {
229+
return Some(AnyTypedNode::Domain(TypedNodeItem::new(id, Domain::new(domain_name.clone()))));
230+
}
231+
}
232+
233+
// Add more entity types here as you implement them
234+
// if labels.contains(&NodeLabel::Link) { ... }
235+
// if labels.contains(&NodeLabel::WebPage) { ... }
236+
237+
None
238+
}
239+
115240
impl NodeItem {
116241
pub(super) fn process(&self, arced_engine: Arc<&Engine>) -> PiResult<()> {
117242
if self.labels.contains(&NodeLabel::DomainName) {
@@ -213,12 +338,6 @@ impl NodeItem {
213338
}
214339
}
215340

216-
impl NodeItem {
217-
pub fn get_label(&self) -> String {
218-
self.payload.to_string()
219-
}
220-
}
221-
222341
impl Ord for NodeItem {
223342
fn cmp(&self, other: &Self) -> Ordering {
224343
self.id.cmp(&other.id)

pixlie_ai/src/entity/web/domain.rs

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,85 @@
1-
use crate::engine::node::{ArcedNodeItem, NodeId, NodeItem, NodeLabel, Payload};
1+
use crate::engine::node::{ArcedNodeItem, Fetchable, Node, NodeId, NodeItem, NodeLabel, Payload};
22
use crate::engine::{EdgeLabel, Engine, NodeFlags};
33
use crate::error::{PiError, PiResult};
4-
use crate::{ExternalData, FetchRequest};
4+
use crate::{ExternalData, FetchError, FetchRequest, FetchResponse};
55
use log::error;
6+
use serde::{Deserialize, Serialize};
7+
use std::any::Any;
68
use std::sync::Arc;
79

8-
pub struct Domain;
10+
#[derive(Clone, Debug, Serialize, Deserialize)]
11+
pub struct Domain {
12+
pub name: String,
13+
}
14+
15+
impl Node for Domain {
16+
fn process(&self, node_id: NodeId, engine: Arc<&Engine>) -> PiResult<()> {
17+
// Initial processing logic - could trigger fetch requests
18+
let fetch_requests = self.make_fetch_requests(node_id, engine.clone())?;
19+
for request in fetch_requests {
20+
engine.fetch(request)?;
21+
}
22+
Ok(())
23+
}
24+
25+
fn as_any(&self) -> &dyn Any {
26+
self
27+
}
28+
29+
fn get_labels(&self) -> Vec<NodeLabel> {
30+
vec![NodeLabel::DomainName]
31+
}
32+
}
33+
34+
impl Fetchable for Domain {
35+
fn make_fetch_requests(&self, node_id: NodeId, engine: Arc<&Engine>) -> PiResult<Vec<FetchRequest>> {
36+
// Check if we already have robots.txt
37+
// For now, always request it
38+
Ok(vec![FetchRequest::new(node_id, "/robots.txt")])
39+
}
40+
41+
fn handle_fetch_response(&self, node_id: NodeId, engine: Arc<&Engine>, response: FetchResponse) -> PiResult<()> {
42+
// Handle robots.txt response
43+
let content_node_id = engine
44+
.get_or_add_node(
45+
Payload::Text(response.contents.clone()),
46+
vec![NodeLabel::RobotsTxt],
47+
true,
48+
None,
49+
)?
50+
.get_node_id();
51+
engine.add_connection(
52+
(node_id, content_node_id),
53+
(EdgeLabel::OwnerOf, EdgeLabel::BelongsTo),
54+
)?;
55+
engine.toggle_flag(&node_id, NodeFlags::IS_PROCESSED)?;
56+
Ok(())
57+
}
58+
59+
fn handle_fetch_error(&self, node_id: NodeId, engine: Arc<&Engine>, error: FetchError) -> PiResult<()> {
60+
// Handle robots.txt fetch error - save empty robots.txt
61+
let content_node_id = engine
62+
.get_or_add_node(
63+
Payload::Text("".to_string()),
64+
vec![NodeLabel::RobotsTxt],
65+
true,
66+
None,
67+
)?
68+
.get_node_id();
69+
engine.add_connection(
70+
(node_id, content_node_id),
71+
(EdgeLabel::OwnerOf, EdgeLabel::BelongsTo),
72+
)?;
73+
engine.toggle_flag(&node_id, NodeFlags::IS_PROCESSED)?;
74+
Ok(())
75+
}
76+
}
77+
78+
impl Domain {
79+
pub fn new(name: String) -> Self {
80+
Self { name }
81+
}
82+
}
983

1084
pub enum FindDomainOf<'a> {
1185
DomainName(&'a str),

0 commit comments

Comments
 (0)