Skip to content

Commit a6f99a2

Browse files
authored
Merge pull request #829 from vsbogd/grounding-space
Grounding space implementation with a more compact representation in memory
2 parents b39cf2e + 56ec0a5 commit a6f99a2

File tree

15 files changed

+2098
-232
lines changed

15 files changed

+2098
-232
lines changed

lib/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dyn-fmt = "0.4.0"
1616
itertools = "0.13.0"
1717
unescaper = "0.1.5"
1818
unicode_reader = "1.0.2"
19+
bimap = "0.6.3"
1920

2021
# pkg_mgmt deps
2122
xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true }
@@ -24,6 +25,9 @@ serde_json = { version="1.0.116", optional=true }
2425
semver = { version="1.0", features = ["serde"], optional=true }
2526
git2 = { version="0.18.3", features=["vendored-libgit2"], optional=true }
2627

28+
[dev-dependencies]
29+
ra_ap_profile = "0.0.261"
30+
2731
[lib]
2832
name = "hyperon"
2933
path = "src/lib.rs"

lib/examples/load_space.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
use std::env;
2+
use std::fs::File;
3+
use std::io::BufReader;
4+
use std::time::{SystemTime, Duration};
5+
use ra_ap_profile::memory_usage;
6+
7+
use hyperon::*;
8+
use hyperon::metta::text::*;
9+
use hyperon::space::grounding::*;
10+
11+
#[inline]
12+
fn now() -> SystemTime {
13+
SystemTime::now()
14+
}
15+
16+
#[inline]
17+
fn since(time: SystemTime) -> Duration {
18+
SystemTime::now().duration_since(time).unwrap()
19+
}
20+
21+
fn main() -> Result<(), String> {
22+
let args: Vec<String> = env::args().collect();
23+
println!("args passed: {:?}", args);
24+
let filename = match args.get(1) {
25+
Some(filename) => filename,
26+
None => return Err(format!("Please specify MeTTa file as a first argument")),
27+
};
28+
let open_error = |err| { format!("Cannot open file: {}, because of error: {}", filename, err) };
29+
let file = BufReader::new(File::open(filename).map_err(open_error)?);
30+
31+
let mut parser = SExprParser::new(file);
32+
let tokenizer = Tokenizer::new();
33+
let mut space = GroundingSpace::new();
34+
35+
let before = memory_usage().allocated;
36+
let start = now();
37+
loop {
38+
match parser.parse(&tokenizer)? {
39+
Some(atom) => space.add(atom),
40+
None => break,
41+
}
42+
}
43+
let duration = since(start);
44+
let after = memory_usage().allocated;
45+
println!("loading time {:?}", duration);
46+
println!("memory usage: {}", after - before);
47+
48+
let query = match args.get(2) {
49+
Some(query) => SExprParser::new(query).parse(&tokenizer)?
50+
.expect(format!("Incorrect atom: {}", query).as_str()),
51+
None => expr!("no_match"),
52+
};
53+
54+
let start = now();
55+
let result = space.query(&query);
56+
let duration = since(start);
57+
println!("{} -> {}, time {:?}", query, result, duration);
58+
59+
// FILE: gaf/edges.metta
60+
// QUERY: (go_gene_product (ontology_term GO:0002377) (protein A0A075B6H8))
61+
//use hyperon::space::grounding::index::storage::AtomStorage;
62+
//use hyperon::space::grounding::index::trie::{AllowDuplication, AtomTrie, AtomTrieNode, AtomTrieNodeContent};
63+
64+
//println!("Atom size {}", std::mem::size_of::<Atom>());
65+
//println!("AtomTrieNode size {}", std::mem::size_of::<AtomTrieNode>());
66+
//println!("AtomTrieNodeContent size {}", std::mem::size_of::<AtomTrieNodeContent<AllowDuplication>>());
67+
68+
//println!("atom storage count: {}", space.index.storage.count());
69+
//let mut storage = AtomStorage::default();
70+
//let before = memory_usage().allocated;
71+
//std::mem::swap(&mut space.index.storage, &mut storage);
72+
//drop(storage);
73+
//let after = memory_usage().allocated;
74+
//println!("atom storage mem: {}", before - after);
75+
76+
//println!("atom index node count: {:?}", space.index.trie.stats());
77+
//let mut trie = AtomTrie::default();
78+
//let before = memory_usage().allocated;
79+
//std::mem::swap(&mut space.index.trie, &mut trie);
80+
//drop(trie);
81+
//let after = memory_usage().allocated;
82+
//println!("atom index mem: {}", before - after);
83+
84+
//println!("{}", space.query(&query));
85+
86+
Ok(())
87+
}

lib/src/atom/matcher.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -881,15 +881,6 @@ impl<'a> Iterator for BindingsIter<'a> {
881881
}
882882
}
883883

884-
impl<'a> IntoIterator for &'a Bindings {
885-
type Item = (&'a VariableAtom, Atom);
886-
type IntoIter = BindingsIter<'a>;
887-
888-
fn into_iter(self) -> Self::IntoIter {
889-
self.iter()
890-
}
891-
}
892-
893884

894885
/// Represents a set of [Bindings] instances resulting from an operation where multiple matches are possible.
895886
#[derive(Clone, Debug)]
@@ -979,10 +970,15 @@ impl BindingsSet {
979970
BindingsSet(smallvec::smallvec![])
980971
}
981972

982-
/// Creates a new unconstrained BindingsSet
973+
/// Creates a new BindingsSet with a single full match
983974
pub fn single() -> Self {
984975
BindingsSet(smallvec::smallvec![Bindings::new()])
985976
}
977+
978+
/// Creates a new BindingsSet with `count` full matches
979+
pub fn count(count: usize) -> Self {
980+
BindingsSet(smallvec::SmallVec::from_elem(Bindings::new(), count))
981+
}
986982

987983
/// Returns `true` if a BindingsSet contains no Bindings Objects (fully constrained)
988984
///

lib/src/atom/serial.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,11 @@ pub trait ConvertingSerializer<T>: Serializer + Default {
7272
/// Serialization result type
7373
pub type Result = std::result::Result<(), Error>;
7474

75+
trait PrivHasher : Hasher {}
76+
impl PrivHasher for DefaultHasher {}
77+
7578
// there are much speedier hashers, but not sure if it's worth the extra dependency given the other options
76-
impl Serializer for DefaultHasher {
79+
impl<H: PrivHasher> Serializer for H {
7780
fn serialize_bool(&mut self, v: bool) -> Result { Ok(self.write_u8(v as u8)) }
7881
fn serialize_i64(&mut self, v: i64) -> Result { Ok(self.write_i64(v)) }
7982
fn serialize_f64(&mut self, v: f64) -> Result { Ok(self.write_u64(v as u64)) }
@@ -95,3 +98,14 @@ impl Serializer for Vec<u8> {
9598
fn serialize_f64(&mut self, v: f64) -> Result { Ok(self.extend(v.to_le_bytes())) }
9699
fn serialize_str(&mut self, v: &str) -> Result { Ok(self.extend(v.bytes())) }
97100
}
101+
102+
#[derive(Default)]
103+
pub struct NullSerializer();
104+
105+
impl Serializer for NullSerializer {
106+
fn serialize_bool(&mut self, _v: bool) -> Result { Ok(()) }
107+
fn serialize_i64(&mut self, _v: i64) -> Result { Ok(()) }
108+
fn serialize_f64(&mut self, _v: f64) -> Result { Ok(()) }
109+
fn serialize_str(&mut self, _v: &str) -> Result { Ok(()) }
110+
}
111+

lib/src/common/collections.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,22 @@ impl<'a, T: 'a + Display> Display for VecDisplay<'a, T> {
297297
}
298298
}
299299

300+
/// Helper function to implement Display for all mapping like code structures.
301+
/// Displays iterator over pairs in a format { <key>: <value>, ... }
302+
pub fn write_mapping<A, B, I>(f: &mut std::fmt::Formatter, it: I) -> std::fmt::Result
303+
where
304+
A: Display,
305+
B: Display,
306+
I: Iterator<Item=(A, B)>
307+
{
308+
write!(f, "{{").and_then(|()| {
309+
it.fold((Ok(()), true), |(res, start), (a, b)| {
310+
let comma = if start { "" } else { "," };
311+
(res.and_then(|()| write!(f, "{} {}: {}", comma, a, b)), false)
312+
}).0
313+
}).and_then(|()| write!(f, " }}"))
314+
}
315+
300316

301317
#[cfg(test)]
302318
mod test {

0 commit comments

Comments
 (0)