diff --git a/bridge_adapters/src/lisp_adapters/collections.rs b/bridge_adapters/src/lisp_adapters/collections.rs index 08aa455ff8..395634105d 100644 --- a/bridge_adapters/src/lisp_adapters/collections.rs +++ b/bridge_adapters/src/lisp_adapters/collections.rs @@ -1,10 +1,10 @@ use crate::lisp_adapters::{SlFromRef, SlFromRefMut}; use bridge_types::ErrorStrings; use compile_state::state::SloshVm; +use slvm::vm_hashmap::VMHashMap; use slvm::{VMError, VMResult, Value, ValueType}; -use std::collections::HashMap; -impl<'a> SlFromRef<'a, Value> for &'a HashMap { +impl<'a> SlFromRef<'a, Value> for &'a VMHashMap { fn sl_from_ref(value: Value, vm: &'a SloshVm) -> VMResult { match value { Value::Map(h) => Ok(vm.get_map(h)), @@ -18,7 +18,7 @@ impl<'a> SlFromRef<'a, Value> for &'a HashMap { } } -impl<'a> SlFromRefMut<'a, Value> for &'a mut HashMap { +impl<'a> SlFromRefMut<'a, Value> for &'a mut VMHashMap { fn sl_from_ref_mut(value: Value, vm: &'a mut SloshVm) -> VMResult { match value { Value::Map(h) => Ok(vm.get_map_mut(h)?), diff --git a/bridge_types/src/lib.rs b/bridge_types/src/lib.rs index b30b91abca..721e28cdb5 100644 --- a/bridge_types/src/lib.rs +++ b/bridge_types/src/lib.rs @@ -1,5 +1,4 @@ use std::borrow::Cow; -use std::collections::HashMap; use std::fmt::Display; /// Marker traits @@ -13,14 +12,6 @@ impl BridgedType for Option where T: BridgedType {} /// A [`Result`] value that contains a [`BridgedType`] can be represented as a rust value. impl BridgedType for Result where T: BridgedType {} -/// A [`HashMap`] that contains a [`BridgedType`] can be represented as a rust value. -impl BridgedType for HashMap -where - T: BridgedType, - U: BridgedType, -{ -} - /// A [`Vec`] that contains a [`BridgedType`] can be represented as a rust value. impl BridgedType for Vec where T: BridgedType {} diff --git a/builtins/src/collections.rs b/builtins/src/collections.rs index cfc1cfb0e9..ac1052b863 100644 --- a/builtins/src/collections.rs +++ b/builtins/src/collections.rs @@ -1,8 +1,8 @@ use crate::SloshVm; use bridge_adapters::add_builtin; use bridge_macros::sl_sh_fn; +use slvm::vm_hashmap::{VMHashMap, ValHash}; use slvm::{VMError, VMResult, Value, ValueType}; -use std::collections::HashMap; pub fn vec_slice(vm: &mut SloshVm, registers: &[Value]) -> VMResult { let (vector, start, end) = match registers.len() { @@ -72,39 +72,26 @@ pub fn vec_to_list(vm: &mut SloshVm, registers: &[Value]) -> VMResult { } } -/// Usage: (hash-remove! hashmap key) -/// -/// Remove a key from a hashmap. This is a destructive form! -/// -/// Section: hashmap -/// -/// Example: -/// (def tst-hash {:key1 "val one" 'key2 "val two" "key3" "val three" \S "val S"}) -/// (test::assert-equal 4 (len (hash-keys tst-hash))) -/// (test::assert-equal "val one" tst-hash.:key1) -/// (test::assert-equal "val two" (get tst-hash 'key2)) -/// (test::assert-equal "val three" (get tst-hash "key3")) -/// (test::assert-equal "val S" (get tst-hash \S)) -/// (hash-remove! tst-hash 'key2) -/// (test::assert-equal 3 (len (hash-keys tst-hash))) -/// (test::assert-equal "val one" tst-hash.:key1) -/// (test::assert-equal "val three" (get tst-hash "key3")) -/// (test::assert-equal "val S" (get tst-hash \S)) -/// (hash-remove! tst-hash :key1) -/// (test::assert-equal 2 (len (hash-keys tst-hash))) -/// (test::assert-equal "val three" (get tst-hash "key3")) -/// (test::assert-equal "val S" (get tst-hash \S)) -/// (hash-remove! tst-hash "key3") -/// (test::assert-equal 1 (len (hash-keys tst-hash))) -/// (test::assert-equal "val S" (get tst-hash \S)) -/// (hash-remove! tst-hash \S) -/// (test::assert-equal 0 (len (hash-keys tst-hash))) -#[sl_sh_fn(fn_name = "hash-remove!")] -pub fn hash_remove(map: &mut HashMap, key: Value) -> VMResult { - if let Some(old) = map.remove(&key) { - Ok(old) +// This has to be a low level not macro implementation because passing in a &mut VMHashMap means +// we can not use our vm (it has a mutable borrow already out) so we have to play some ordering games +// for the borrow checker and need the raw registers... +// Note, this should probably become a bytecode at some point anyway (?). +pub fn hash_remove(vm: &mut SloshVm, registers: &[Value]) -> VMResult { + let mut args = registers.iter(); + if let (Some(Value::Map(map_handle)), Some(key), None) = (args.next(), args.next(), args.next()) + { + let id = ValHash::from_value(vm, *key); + let map = vm.get_map_mut(*map_handle)?; + if let Some(old) = map.remove_id(id) { + Ok(old) + } else { + Ok(Value::Nil) + } } else { - Ok(Value::Nil) + Err(VMError::new( + "hashmap", + "Invalid args, requires hashmap and key to remove.", + )) } } @@ -128,9 +115,9 @@ pub fn hash_remove(map: &mut HashMap, key: Value) -> VMResult, key: Value) -> VMResult { - if map.contains_key(&key) { +#[sl_sh_fn(fn_name = "hash-haskey?", takes_env = true)] +pub fn hash_hashkey(environment: &mut SloshVm, map: &VMHashMap, key: Value) -> VMResult { + if map.contains_key(environment, key) { Ok(Value::True) } else { Ok(Value::False) @@ -232,10 +219,10 @@ pub fn to_list(environment: &mut SloshVm, src: Value) -> VMResult { /// (test::assert-true (in? (hash-keys tst-hash) "key3") " Test key3") /// (test::assert-false (in? (hash-keys tst-hash) :key4)) #[sl_sh_fn(fn_name = "hash-keys")] -pub fn hash_keys(map: &HashMap) -> VMResult> { +pub fn hash_keys(map: &VMHashMap) -> VMResult> { let mut keys = Vec::with_capacity(map.len()); for key in map.keys() { - keys.push(*key); + keys.push(key); } Ok(keys) } @@ -288,10 +275,43 @@ pub fn setup_collection_builtins(env: &mut SloshVm) { intern_occurs(env); intern_reverse(env); intern_hash_keys(env); - intern_hash_remove(env); intern_is_in(env); intern_to_vec(env); intern_to_list(env); + + add_builtin( + env, + "hash-remove!", + hash_remove, + r#"Usage: (hash-remove! hashmap key) + +Remove a key from a hashmap. This is a destructive form! + +Section: hashmap + +Example: +(def tst-hash {:key1 "val one" 'key2 "val two" "key3" "val three" \S "val S"}) +(test::assert-equal 4 (len (hash-keys tst-hash))) +(test::assert-equal "val one" tst-hash.:key1) +(test::assert-equal "val two" (get tst-hash 'key2)) +(test::assert-equal "val three" (get tst-hash "key3")) +(test::assert-equal "val S" (get tst-hash \S)) +(hash-remove! tst-hash 'key2) +(test::assert-equal 3 (len (hash-keys tst-hash))) +(test::assert-equal "val one" tst-hash.:key1) +(test::assert-equal "val three" (get tst-hash "key3")) +(test::assert-equal "val S" (get tst-hash \S)) +(hash-remove! tst-hash :key1) +(test::assert-equal 2 (len (hash-keys tst-hash))) +(test::assert-equal "val three" (get tst-hash "key3")) +(test::assert-equal "val S" (get tst-hash \S)) +(hash-remove! tst-hash "key3") +(test::assert-equal 1 (len (hash-keys tst-hash))) +(test::assert-equal "val S" (get tst-hash \S)) +(hash-remove! tst-hash \S) +(test::assert-equal 0 (len (hash-keys tst-hash))) + "#, + ); add_builtin( env, "flatten", diff --git a/builtins/src/fs_meta.rs b/builtins/src/fs_meta.rs index b28596041b..2c19f25592 100644 --- a/builtins/src/fs_meta.rs +++ b/builtins/src/fs_meta.rs @@ -4,7 +4,6 @@ use compile_state::state::SloshVm; use shell::builtins::expand_tilde; use sl_compiler::load_eval::apply_callable; use slvm::{from_i56, VMError, VMResult, Value}; -use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::{env, fs, io}; @@ -12,6 +11,7 @@ use glob::glob; use bridge_adapters::add_builtin; use same_file; +use slvm::vm_hashmap::VMHashMap; use std::fs::{File, Metadata}; use std::time::SystemTime; use walkdir::{DirEntry, WalkDir}; @@ -354,19 +354,18 @@ fn is_same_file(path_0: &str, path_1: &str) -> VMResult { /// (defn create-in (in-dir num-files visited) /// (dotimes-i i num-files /// (let (tmp-file (get-temp-file in-dir)) -/// (set! visited.~tmp-file nil)))) +/// (set! visited.~tmp-file #f)))) /// /// (defn create-dir (tmp-dir visited) /// (let (new-tmp (get-temp tmp-dir)) /// (set! visited.~new-tmp #f) /// new-tmp)) /// -/// #| XXXX Fix hashmaps so strings and string consts hash the same then restore this testing... /// (with-temp (fn (root-tmp-dir) /// (let (tmp-file-count 5 -/// visited {}) -/// (def cnt 0) -/// (set! visited.~root-tmp-dir nil) +/// visited {} +/// cnt 0) +/// (set! visited.~root-tmp-dir #f) /// (create-in root-tmp-dir tmp-file-count visited) /// (let (tmp-dir (create-dir root-tmp-dir visited) /// new-files (create-in tmp-dir tmp-file-count visited) @@ -378,47 +377,46 @@ fn is_same_file(path_0: &str, path_1: &str) -> VMResult { /// (set! visited.~x #t) /// (inc! cnt)))) /// (test::assert-equal (+ 3 (* 3 tmp-file-count)) cnt) -/// (test::assert-equal (+ 3 (* 3 tmp-file-count)) (len (hash-keys visited))) +/// (test::assert-equal (+ 3 (* 3 tmp-file-count)) (len visited)) /// (seq-for key in (hash-keys visited) (test::assert-true visited.~key)))))) /// /// (with-temp (fn (root-tmp-dir) /// (let (tmp-file-count 5 -/// visited {}) -/// (def cnt 0) -/// (set! visited.~root-tmp-dir nil) +/// visited {} +/// cnt 0) +/// (set! visited.~root-tmp-dir #f) /// (create-in root-tmp-dir tmp-file-count visited) /// (let (tmp-dir (create-dir root-tmp-dir visited) -/// new-files (create-in tmp-dir tmp-file-count visited) -/// tmp-dir (create-dir tmp-dir {}) -/// new-files (create-in tmp-dir tmp-file-count {})) +/// new-files (create-in tmp-dir tmp-file-count visited) +/// tmp-dir (create-dir tmp-dir {}) +/// new-files (do (set! visited.~tmp-dir #f)(create-in tmp-dir tmp-file-count {}))) /// (fs-crawl root-tmp-dir (fn (x) /// (let (file visited.~x) /// (test::assert-true (not file)) ;; also tests double counting /// (set! visited.~x #t) /// (inc! cnt))) 2) /// (test::assert-equal (+ 3 (* 2 tmp-file-count)) cnt) -/// (test::assert-equal (+ 3 (* 2 tmp-file-count)) (length (hash-keys visited))) -/// (seq-for key in (hash-keys visited) (test::assert-true visited.~key))))) +/// (test::assert-equal (+ 3 (* 2 tmp-file-count)) (len visited)) +/// (seq-for key in (hash-keys visited) (test::assert-true visited.~key)))))) /// /// (with-temp (fn (root-tmp-dir) /// (let (tmp-file-count 5 -/// visited {}) -/// (def cnt 0) -/// (set! visited.~root-tmp-dir nil) +/// visited {} +/// cnt 0) +/// (set! visited.~root-tmp-dir #f) /// (create-in root-tmp-dir tmp-file-count visited) /// (let (tmp-dir (create-dir root-tmp-dir {}) -/// new-files (create-in tmp-dir tmp-file-count {}) -/// tmp-dir (create-dir tmp-dir {}) -/// new-files (create-in tmp-dir tmp-file-count {})) +/// new-files (do (set! visited.~tmp-dir #f)(create-in tmp-dir tmp-file-count {})) +/// tmp-dir (create-dir tmp-dir {}) +/// new-files (create-in tmp-dir tmp-file-count {})) /// (fs-crawl root-tmp-dir (fn (x) /// (let (file visited.~x) /// (test::assert-true (not file)) ;; also tests double counting /// (set! visited.~x #t) /// (inc! cnt))) 1) /// (test::assert-equal (+ 2 tmp-file-count) cnt) -/// (test::assert-equal (+ 2 tmp-file-count) (length (hash-keys visited))) -/// (seq-for key in (hash-keys visited) (test::assert-true visited.~key))))) -/// |# +/// (test::assert-equal (+ 2 tmp-file-count) (len visited)) +/// (seq-for key in (hash-keys visited) (test::assert-true visited.~key)))))) #[sl_sh_fn(fn_name = "fs-crawl", takes_env = true)] fn fs_crawl( environment: &mut SloshVm, @@ -619,7 +617,7 @@ fn fs_meta(vm: &mut SloshVm, registers: &[Value]) -> VMResult { let name = string.pretty_value(vm); let file = File::open(name)?; let meta = file.metadata()?; - let mut map = HashMap::new(); + let mut map = VMHashMap::new(); let ftype = if meta.is_dir() { "dir" } else if meta.is_file() { @@ -634,15 +632,14 @@ fn fs_meta(vm: &mut SloshVm, registers: &[Value]) -> VMResult { } else { Value::False }; - map.insert(Value::Keyword(vm.intern_static("readonly")), ro); - map.insert( - Value::Keyword(vm.intern_static("len")), - (meta.len() as i64).into(), - ); - map.insert( - Value::Keyword(vm.intern_static("type")), - Value::Keyword(vm.intern_static(ftype)), - ); + let key = Value::Keyword(vm.intern_static("readonly")); + map.insert(vm, key, ro); + let key = Value::Keyword(vm.intern_static("len")); + let val: Value = (meta.len() as i64).into(); + map.insert(vm, key, val); + let key = Value::Keyword(vm.intern_static("type")); + let val = Value::Keyword(vm.intern_static(ftype)); + map.insert(vm, key, val); // XXX TODO- include times. Ok(vm.alloc_map(map)) } else { diff --git a/builtins/src/rand.rs b/builtins/src/rand.rs index b506d249f5..88225f2b33 100644 --- a/builtins/src/rand.rs +++ b/builtins/src/rand.rs @@ -279,9 +279,9 @@ Section: random Example: (def rand-int (random 100)) -(test::assert-true (and (> rand-int 0) (< rand-int 100))) +(test::assert-true (and (>= rand-int 0) (< rand-int 100))) (def rand-float (random 1.0)) -(test::assert-true (and (> rand-float 0) (< rand-float 1))) +(test::assert-true (and (>= rand-float 0.0) (< rand-float 1.0))) (test::assert-error-msg (random -1) :rand \"Expected positive number\") (test::assert-error-msg (random 1 2) :rand \"Expected positive number, float or int\") ", diff --git a/compiler/src/compile/destructure.rs b/compiler/src/compile/destructure.rs index 440c6f6d1f..23dae1e90a 100644 --- a/compiler/src/compile/destructure.rs +++ b/compiler/src/compile/destructure.rs @@ -1,7 +1,7 @@ use compile_state::state::CompileState; use slvm::opcodes::*; +use slvm::vm_hashmap::VMHashMap; use slvm::{Handle, Interned, VMError, VMResult, Value}; -use std::collections::HashMap; use crate::{compile, mkconst, SloshVm, SloshVmTrait}; @@ -49,10 +49,10 @@ pub fn resolve_destruct_containers(env: &mut SloshVm, arg: Value) -> Value { } Value::Symbol(i) if *i == i_hash => { let mut iter = v[s + 1..].iter(); - let mut map = HashMap::new(); + let mut map = VMHashMap::new(); while let Some(key) = iter.next() { if let Some(val) = iter.next() { - map.insert(*key, *val); + map.insert(env, *key, *val); } } let v = env.alloc_map(map); @@ -304,19 +304,19 @@ impl DestructState { let mut opt_comps = Vec::new(); let mut len = map.len(); let mut register_labels = Vec::new(); - let optionals = if let Some(opts) = map.get(&Value::Keyword(or_i)) { - let opts = resolve_destruct_containers(env, *opts); + let optionals = if let Some(opts) = map.get(env, Value::Keyword(or_i)) { + let opts = resolve_destruct_containers(env, opts); if let Value::Map(handle) = opts { env.get_map(handle).clone() } else { return Err(VMError::new_compile(":or must be followed by a map")); } } else { - HashMap::new() + VMHashMap::new() }; let start_reg = *next_reg; - for (key, val) in &map { - let key = resolve_destruct_containers(env, *key); + for (key, val) in map.iter() { + let key = resolve_destruct_containers(env, key); match &key { Value::Keyword(i) if *i == or_i => { len -= 1; @@ -324,25 +324,25 @@ impl DestructState { } Value::Symbol(i) => { register_labels.push(Register::Named(*i, *next_reg as u16)); - keys.push(*val); + keys.push(val); *next_reg += 1; } Value::Vector(h) => { register_labels.push(Register::Reserved(*next_reg as u16)); stack.push(DestructType::Vector(*h, *next_reg)); - keys.push(*val); + keys.push(val); *next_reg += 1; } Value::Map(h) => { register_labels.push(Register::Reserved(*next_reg as u16)); stack.push(DestructType::Map(*h, *next_reg)); - keys.push(*val); + keys.push(val); *next_reg += 1; } _ => return Err(VMError::new_compile("not a valid destructure")), } - if let Some(opt_val) = optionals.get(val) { - opt_comps.push((*next_reg - 1, *opt_val)); + if let Some(opt_val) = optionals.get(env, val) { + opt_comps.push((*next_reg - 1, opt_val)); } } self.destructures.push(Destructure { diff --git a/slosh_test/src/docs.rs b/slosh_test/src/docs.rs index ce43052fe1..e3d6535b82 100644 --- a/slosh_test/src/docs.rs +++ b/slosh_test/src/docs.rs @@ -7,6 +7,7 @@ use mdbook::{BookItem, MDBook}; use regex::{Regex, RegexBuilder}; use sl_compiler::load_eval::run_reader; use sl_compiler::Reader; +use slvm::vm_hashmap::VMHashMap; use slvm::VMErrorObj::Message; use slvm::{Interned, VMError, VMResult, Value}; use std::borrow::Cow; @@ -363,8 +364,8 @@ impl SloshDoc { } /// Return an empty documentation map. - fn nil_doc_map(vm: &mut SloshVm) -> HashMap { - let mut map = HashMap::with_capacity(4); + fn nil_doc_map(vm: &mut SloshVm) -> VMHashMap { + let mut map = VMHashMap::with_capacity(4); insert_nil_section(&mut map, USAGE, vm); insert_nil_section(&mut map, SECTION, vm); insert_nil_section(&mut map, DESCRIPTION, vm); @@ -426,23 +427,18 @@ impl From for VMError { type DocResult = Result; -fn insert_section( - map: &mut HashMap, - key: &'static str, - value: String, - vm: &mut SloshVm, -) { +fn insert_section(map: &mut VMHashMap, key: &'static str, value: String, vm: &mut SloshVm) { let key_const = Value::Keyword(vm.intern_static(key)); let value_text = vm.alloc_string(value); - map.insert(key_const, value_text); + map.insert(vm, key_const, value_text); } -fn insert_nil_section(map: &mut HashMap, key: &'static str, vm: &mut SloshVm) { +fn insert_nil_section(map: &mut VMHashMap, key: &'static str, vm: &mut SloshVm) { let key_const = Value::Keyword(vm.intern_static(key)); - map.insert(key_const, Value::Nil); + map.insert(vm, key_const, Value::Nil); } -impl SlFrom for HashMap { +impl SlFrom for VMHashMap { fn sl_from(value: SloshDoc, vm: &mut SloshVm) -> VMResult { let mut map = Self::with_capacity(4); match (value.doc_string.usage, value.doc_string.example) { @@ -471,7 +467,7 @@ impl SlFrom for HashMap { impl SlFrom for Value { fn sl_from(value: SloshDoc, vm: &mut SloshVm) -> VMResult { - let map = HashMap::sl_from(value, vm)?; + let map = VMHashMap::sl_from(value, vm)?; Ok(vm.alloc_map(map)) } } diff --git a/vm/src/heap.rs b/vm/src/heap.rs index de6c48117d..7f995f19a4 100644 --- a/vm/src/heap.rs +++ b/vm/src/heap.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::sync::Arc; use crate::bits::FLAG_MUT; @@ -7,10 +6,12 @@ pub mod handle; pub use crate::handle::Handle; use crate::heap::io::HeapIo; use crate::heap::storage::Storage; +use crate::vm_hashmap::VMHashMap; pub mod bits; pub mod io; mod storage; +pub mod vm_hashmap; #[derive(Clone, Debug)] pub struct CallFrame { @@ -51,7 +52,7 @@ pub struct Continuation { enum Object { String(Arc), Vector(Arc>), - Map(Arc>), + Map(Arc), Bytes(Arc>), // Everything below here is always read only. @@ -241,7 +242,7 @@ impl Heap { pub fn alloc_map( &mut self, - map: HashMap, + map: VMHashMap, mutable: MutState, mark_roots: MarkFunc, ) -> Value @@ -385,7 +386,7 @@ impl Heap { } } - pub fn get_map(&self, handle: Handle) -> &HashMap { + pub fn get_map(&self, handle: Handle) -> &VMHashMap { if let Some(Object::Map(map)) = self.objects.get(handle.idx()) { map } else { @@ -393,7 +394,7 @@ impl Heap { } } - pub fn get_map_mut(&mut self, handle: Handle) -> VMResult<&mut HashMap> { + pub fn get_map_mut(&mut self, handle: Handle) -> VMResult<&mut VMHashMap> { if !self.objects.is_mutable(handle.idx()) { return Err(VMError::new_heap("Map is not mutable!")); } @@ -572,8 +573,8 @@ impl Heap { } Object::Map(map) => { for (key, val) in map.iter() { - self.mark_trace(*key); - self.mark_trace(*val); + self.mark_trace(key); + self.mark_trace(val); } } Object::Bytes(_) => {} diff --git a/vm/src/heap/vm_hashmap.rs b/vm/src/heap/vm_hashmap.rs new file mode 100644 index 0000000000..ae4cc61bdb --- /dev/null +++ b/vm/src/heap/vm_hashmap.rs @@ -0,0 +1,281 @@ +use crate::{GVm, Value}; +use bridge_types::BridgedType; +use std::collections::hash_map::Keys; +use std::collections::HashMap; +use std::hash::{BuildHasher, Hash, Hasher}; + +/** + * Provides a wrapper to allow us to build a hashmap with Value keys that hashes String and StringConst + * to the same hash. + * Note, this is public only to allow use of insert_id and remove_id which we need to work around + * borrowing issues. +*/ +#[derive(Copy, Clone, Debug)] +pub struct ValHash { + val: Value, + hash: u64, +} + +impl ValHash { + /** Make a ValHash from a Value. */ + pub fn from_value(vm: &GVm, val: Value) -> Self { + ValHash { + val, + hash: val.get_hash(vm), + } + } +} + +impl PartialEq for ValHash { + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash + } +} + +impl Eq for ValHash {} + +impl Hash for ValHash { + fn hash(&self, state: &mut H) { + state.write_u64(self.hash); + } +} + +#[derive(Copy, Clone, Debug, Default)] +struct IdHasher { + hash: u64, +} + +impl BuildHasher for IdHasher { + type Hasher = IdHasher; + + fn build_hasher(&self) -> Self::Hasher { + *self + } +} + +impl Hasher for IdHasher { + fn finish(&self) -> u64 { + self.hash + } + + fn write(&mut self, _bytes: &[u8]) { + panic!("Invalid use of IdHasher!"); + } + + fn write_u64(&mut self, val: u64) { + self.hash = val; + } +} + +/** + * Wrapper class for a HashMap. We need this because we want String and StringConst + * to hash to the same value (what a script user will expect) and that requires a VM so can not just + * implement Hash on Value (will not have access to a VM). + */ +#[derive(Clone, Debug)] +pub struct VMHashMap { + map: HashMap, +} + +impl VMHashMap { + /** Create a new empty HashMap. */ + pub fn new() -> Self { + VMHashMap { + map: HashMap::default(), + } + } + + /** Create a new empty HashMap with an initial capacity. */ + pub fn with_capacity(cap: usize) -> Self { + VMHashMap { + map: HashMap::with_capacity_and_hasher(cap, IdHasher::default()), + } + } + + /** Get the value at key, requires the current VM for hashing. */ + pub fn get(&self, vm: &GVm, key: Value) -> Option { + let id = ValHash::from_value(vm, key); + self.map.get(&id).copied() + } + + /** Insert the value at key, requires the current VM for hashing. + * Returns the old value at key if it exists (None otherwise). + */ + pub fn insert(&mut self, vm: &GVm, key: Value, val: Value) -> Option { + let id = ValHash::from_value(vm, key); + self.map.insert(id, val) + } + + /** Insert val at the key id provided. This allows calling code to pre-generate the ValHash. + * This is a borrow checker workaround. + */ + pub fn insert_id(&mut self, id: ValHash, val: Value) -> Option { + self.map.insert(id, val) + } + + /** Number of items in the HashMap. */ + pub fn len(&self) -> usize { + self.map.len() + } + + /** Is this HashMap empty? */ + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /** Does this HashMap contain key? */ + pub fn contains_key(&self, vm: &GVm, key: Value) -> bool { + let id = ValHash::from_value(vm, key); + self.map.contains_key(&id) + } + + /** Clear (remove all key/values) from the HashMap. */ + pub fn clear(&mut self) { + self.map.clear(); + } + + /** Remove key from the HashMap. Return the old value if it existed (None otherwise). */ + pub fn remove(&mut self, vm: &GVm, key: Value) -> Option { + let id = ValHash::from_value(vm, key); + self.map.remove(&id) + } + + /** Remove the key from HashMap (like remove) except caller pre-generates the ValHash. Used to + * work around the borrow checker. */ + pub fn remove_id(&mut self, id: ValHash) -> Option { + self.map.remove(&id) + } + + /** Returns an iterator over all the keys in the HashMap. */ + pub fn keys(&self) -> VMMapKeys { + VMMapKeys { + keys: self.map.keys(), + } + } + + /** Return an iterator over all the (key, value) pairs in the HashMap. */ + pub fn iter(&self) -> VMHashMapIter { + VMHashMapIter { + iter: self.map.iter(), + } + } +} + +/// A [`VMHashMap`] that contains a [`BridgedType`] can be represented as a rust value. +impl BridgedType for VMHashMap {} + +impl Default for VMHashMap { + fn default() -> Self { + Self::new() + } +} + +/** Iterator over the key vals in a HashMap. */ +pub struct VMHashMapIter<'a> { + iter: std::collections::hash_map::Iter<'a, ValHash, Value>, +} + +impl<'a> Iterator for VMHashMapIter<'a> { + type Item = (Value, Value); + + fn next(&mut self) -> Option { + self.iter.next().map(|(k, v)| (k.val, *v)) + } +} + +/** Iterator over the keys in a HashMap. */ +pub struct VMMapKeys<'a> { + keys: Keys<'a, ValHash, Value>, +} + +impl<'a> Iterator for VMMapKeys<'a> { + type Item = Value; + + fn next(&mut self) -> Option { + self.keys.next().map(|v| v.val) + } +} + +#[cfg(test)] +mod tests { + use crate::vm_hashmap::VMHashMap; + use crate::{Value, Vm}; + + #[test] + fn test_map_str() { + let mut vm = Vm::new(); + let mut m = VMHashMap::default(); + let cs = Value::StringConst(vm.intern("Test String")); + let ds = vm.alloc_string("Test String".to_string()); + let i: Value = 1.into(); + m.insert(&mut vm, cs, i); + assert_eq!(m.get(&vm, cs).unwrap(), i); + assert_eq!(m.get(&vm, ds).unwrap(), i); + let i: Value = 10.into(); + m.insert(&mut vm, ds, i); + assert_eq!(m.get(&vm, cs).unwrap(), i); + assert_eq!(m.get(&vm, ds).unwrap(), i); + let old = m.remove(&mut vm, cs).unwrap(); + assert_eq!(old, i); + assert!(m.get(&vm, cs).is_none()); + assert!(m.get(&vm, ds).is_none()); + } + + #[test] + fn test_map_sym_key_sanity() { + let mut vm = Vm::new(); + let mut m = VMHashMap::default(); + let sym = Value::Symbol(vm.intern("Test String")); + let key = Value::Keyword(vm.intern("Test String")); + let i: Value = 1.into(); + m.insert(&mut vm, sym, i); + assert_eq!(m.get(&vm, sym).unwrap(), i); + assert!(m.get(&vm, key).is_none()); + let i2: Value = 10.into(); + m.insert(&mut vm, key, i2); + assert_eq!(m.get(&vm, sym).unwrap(), i); + assert_eq!(m.get(&vm, key).unwrap(), i2); + let old = m.remove(&mut vm, sym).unwrap(); + assert_eq!(old, i); + assert!(m.get(&vm, sym).is_none()); + assert_eq!(m.get(&vm, key).unwrap(), i2); + } + + #[test] + fn test_map_key_iter_sanity() { + let mut vm = Vm::new(); + let mut m = VMHashMap::default(); + let key1 = Value::Keyword(vm.intern("one")); + let key2 = Value::Keyword(vm.intern("two")); + let key3 = Value::Keyword(vm.intern("three")); + assert_eq!(m.keys().count(), 0); + let i1: Value = 1.into(); + m.insert(&mut vm, key1, i1); + let i2: Value = 1.into(); + m.insert(&mut vm, key2, i2); + let i3: Value = 1.into(); + m.insert(&mut vm, key3, i3); + assert_eq!(m.keys().count(), 3); + m.remove(&mut vm, key1); + assert_eq!(m.keys().count(), 2); + } + + #[test] + fn test_map_iter_sanity() { + let mut vm = Vm::new(); + let mut m = VMHashMap::default(); + let key1 = Value::Keyword(vm.intern("one")); + let key2 = Value::Keyword(vm.intern("two")); + let key3 = Value::Keyword(vm.intern("three")); + assert_eq!(m.iter().count(), 0); + let i1: Value = 1.into(); + m.insert(&mut vm, key1, i1); + let i2: Value = 1.into(); + m.insert(&mut vm, key2, i2); + let i3: Value = 1.into(); + m.insert(&mut vm, key3, i3); + assert_eq!(m.iter().count(), 3); + m.remove(&mut vm, key1); + assert_eq!(m.iter().count(), 2); + } +} diff --git a/vm/src/value.rs b/vm/src/value.rs index ed977d3231..a42c6b50bc 100644 --- a/vm/src/value.rs +++ b/vm/src/value.rs @@ -1,4 +1,4 @@ -use crate::{float, Handle, Heap, Interned, VMError, VMResult}; +use crate::{float, FxHasher, Handle, Heap, Interned, VMError, VMResult}; use bridge_types::BridgedType; use std::collections::{BTreeSet, HashMap}; use std::fmt; @@ -552,6 +552,56 @@ impl Value { matches!(self, Value::List(_, _)) } } + + /** + * Get the hash of a value making sure that strings (whether const or allocated) hash to the same + * value. + * Note: This use the FxHasher which means it is fast and stable but is not randomized to be + * hardened against external key attacks. + */ + pub fn get_hash(&self, vm: &GVm) -> u64 { + let mut hasher = FxHasher::default(); + match self { + Value::StringConst(i) => { + let s = vm.get_interned(*i); + hasher.write_u8(0xFF); + hasher.write(s.as_bytes()); + } + Value::String(h) => { + let s = vm.get_string(*h); + hasher.write_u8(0xFF); + hasher.write(s.as_bytes()); + } + + Value::Byte(_) + | Value::Int(_) + | Value::Float(_) + | Value::CodePoint(_) + | Value::CharCluster(_, _) + | Value::CharClusterLong(_) + | Value::Symbol(_) + | Value::Keyword(_) + | Value::Builtin(_) + | Value::True + | Value::False + | Value::Nil + | Value::Undefined + | Value::Special(_) + | Value::Vector(_) + | Value::Map(_) + | Value::Bytes(_) + | Value::Pair(_) + | Value::List(_, _) + | Value::Lambda(_) + | Value::Closure(_) + | Value::Continuation(_) + | Value::CallFrame(_) + | Value::Value(_) + | Value::Error(_) + | Value::Io(_) => self.hash(&mut hasher), + } + hasher.finish() + } } #[derive(Clone, Debug)] diff --git a/vm/src/vm.rs b/vm/src/vm.rs index 2a49a225e4..1f693e1878 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -350,8 +350,8 @@ impl GVm { // must set val to false in two instances because // its possible a previous iteration set val to true. for (k, v) in m1.iter() { - if let Some(v2) = m2.get(k) { - if self.is_equal_pair(*v, *v2)? == Value::False { + if let Some(v2) = m2.get(self, k) { + if self.is_equal_pair(v, v2)? == Value::False { val = Value::False; break; } else { diff --git a/vm/src/vm/call_collection.rs b/vm/src/vm/call_collection.rs index d259bf19da..1affafca9b 100644 --- a/vm/src/vm/call_collection.rs +++ b/vm/src/vm/call_collection.rs @@ -10,8 +10,8 @@ impl GVm { match num_args { 1 => { let map = self.heap().get_map(handle); - let res = if let Some(val) = map.get(&self.register(first_reg as usize + 1)) { - *val + let res = if let Some(val) = map.get(self, self.register(first_reg as usize + 1)) { + val } else { Value::Nil }; @@ -19,8 +19,8 @@ impl GVm { } 2 => { let map = self.heap().get_map(handle); - let res = if let Some(val) = map.get(&self.register(first_reg as usize + 1)) { - *val + let res = if let Some(val) = map.get(self, self.register(first_reg as usize + 1)) { + val } else { self.register(first_reg as usize + 2) }; diff --git a/vm/src/vm/exec_loop.rs b/vm/src/vm/exec_loop.rs index eeaa4d5c94..f07e12fd78 100644 --- a/vm/src/vm/exec_loop.rs +++ b/vm/src/vm/exec_loop.rs @@ -1,10 +1,11 @@ use crate::opcodes::*; +use crate::vm_hashmap::{VMHashMap, ValHash}; use crate::{ from_i56, CallFrame, Chunk, Continuation, Error, GVm, VMError, VMErrorObj, VMResult, Value, STACK_CAP, }; -use std::collections::HashMap; use std::marker::PhantomData; +use std::num::TryFromIntError; use std::sync::Arc; use unicode_segmentation::UnicodeSegmentation; @@ -26,8 +27,8 @@ impl GVm { let map = self.get_map(handle); for i in 0..len { let key = self.register(dest + i); - if let Some(item) = map.get(&key) { - *self.register_mut(dest + i) = *item; + if let Some(item) = map.get(self, key) { + *self.register_mut(dest + i) = item; } else { *self.register_mut(dest + i) = Value::Undefined; } @@ -220,8 +221,8 @@ impl GVm { Value::Map(h) => { let map = self.get_map(h); let key = self.register(i as usize); - if let Some(val) = map.get(&key) { - *val + if let Some(val) = map.get(self, key) { + val } else { self.make_err("vm-missing", key) } @@ -314,9 +315,9 @@ impl GVm { } Value::Map(h) => { let key = self.register(i as usize); + let id = ValHash::from_value(self, key); let map = self.get_map_mut(h)?; - let slot = map.entry(key); - *slot.or_insert(Value::Undefined) = src; + map.insert_id(id, src); } _ => { return Err(VMError::new_vm(format!( @@ -335,6 +336,96 @@ impl GVm { matches!(lambda, Value::Builtin(_)) && self.call_frame().is_none() } + /** Implementation of the INC bytecode. */ + fn inc_val(&mut self, wide: bool) -> VMResult<()> { + let (dest, i) = decode2!(self.ip_ptr, wide); + match self.register(dest as usize) { + Value::Byte(v) => { + let i: u8 = i + .try_into() + .map_err(|e: TryFromIntError| VMError::new_vm(e.to_string()))?; + *self.register_mut(dest as usize) = Value::Byte(v + i); + Ok(()) + } + Value::Int(v) => { + let v = from_i56(&v); + *self.register_mut(dest as usize) = (v + i as i64).into(); + Ok(()) + } + // Handle closed over values... + Value::Value(h) => { + let val = self.get_value_mut(h); + match val { + Value::Byte(v) => { + let i: u8 = i + .try_into() + .map_err(|e: TryFromIntError| VMError::new_vm(e.to_string()))?; + *val = Value::Byte(*v + i); + Ok(()) + } + Value::Int(v) => { + let v = from_i56(v); + *val = (v + i as i64).into(); + Ok(()) + } + _ => Err(VMError::new_vm(format!( + "INC: Can only INC an integer type, got {:?}.", + val + ))), + } + } + _ => Err(VMError::new_vm(format!( + "INC: Can only INC an integer type, got {:?}.", + self.register(dest as usize) + ))), + } + } + + /** Implementation of the DEC bytecode. */ + fn dec_val(&mut self, wide: bool) -> VMResult<()> { + let (dest, i) = decode2!(self.ip_ptr, wide); + match self.register(dest as usize) { + Value::Byte(v) => { + let i: u8 = i + .try_into() + .map_err(|e: TryFromIntError| VMError::new_vm(e.to_string()))?; + *self.register_mut(dest as usize) = Value::Byte(v - i); + Ok(()) + } + Value::Int(v) => { + let v = from_i56(&v); + *self.register_mut(dest as usize) = (v - i as i64).into(); + Ok(()) + } + // Handle closed over values... + Value::Value(h) => { + let val = self.get_value_mut(h); + match val { + Value::Byte(v) => { + let i: u8 = i + .try_into() + .map_err(|e: TryFromIntError| VMError::new_vm(e.to_string()))?; + *val = Value::Byte(*v - i); + Ok(()) + } + Value::Int(v) => { + let v = from_i56(v); + *val = (v - i as i64).into(); + Ok(()) + } + _ => Err(VMError::new_vm(format!( + "INC: Can only INC an integer type, got {:?}.", + val + ))), + } + } + _ => Err(VMError::new_vm(format!( + "DEC: Can only DEC an integer type, got {:?}.", + self.register(dest as usize) + ))), + } + } + // Some macro expansions trips this. #[allow(clippy::redundant_closure_call)] pub(super) fn exec_loop(&mut self, chunk: Arc) -> Result<(), (VMError, Arc)> { @@ -1085,7 +1176,7 @@ impl GVm { MAPMK => { let (dest, start, end) = decode3!(self.ip_ptr, wide); let map = if end == start { - HashMap::new() + VMHashMap::new() } else if (end - start) % 2 != 0 { return Err(( VMError::new_vm( @@ -1095,9 +1186,13 @@ impl GVm { chunk.clone(), )); } else { - let mut map = HashMap::new(); + let mut map = VMHashMap::new(); for i in (start..end).step_by(2) { - map.insert(self.register(i as usize), self.register(i as usize + 1)); + map.insert( + self, + self.register(i as usize), + self.register(i as usize + 1), + ); } map }; diff --git a/vm/src/vm/storage.rs b/vm/src/vm/storage.rs index 98c639e8e9..ac6c59464f 100644 --- a/vm/src/vm/storage.rs +++ b/vm/src/vm/storage.rs @@ -1,9 +1,9 @@ use crate::heap::Error; use crate::{CallFrame, Chunk, Continuation, Handle, Heap, Interned, MutState, VMResult, Value}; -use std::collections::HashMap; use std::sync::Arc; use crate::io::HeapIo; +use crate::vm_hashmap::VMHashMap; use crate::GVm; /// Vm code to access storage, heap, stack, globals, etc. @@ -157,14 +157,14 @@ impl GVm { res } - pub fn alloc_map(&mut self, map: HashMap) -> Value { + pub fn alloc_map(&mut self, map: VMHashMap) -> Value { let mut heap = self.heap.take().expect("VM must have a Heap!"); let res = heap.alloc_map(map, MutState::Mutable, |heap| self.mark_roots(heap)); self.heap = Some(heap); res } - pub fn alloc_map_ro(&mut self, map: HashMap) -> Value { + pub fn alloc_map_ro(&mut self, map: VMHashMap) -> Value { let mut heap = self.heap.take().expect("VM must have a Heap!"); let res = heap.alloc_map(map, MutState::Immutable, |heap| self.mark_roots(heap)); self.heap = Some(heap); @@ -322,11 +322,11 @@ impl GVm { self.heap_mut().get_vector_mut(handle) } - pub fn get_map(&self, handle: Handle) -> &HashMap { + pub fn get_map(&self, handle: Handle) -> &VMHashMap { self.heap().get_map(handle) } - pub fn get_map_mut(&mut self, handle: Handle) -> VMResult<&mut HashMap> { + pub fn get_map_mut(&mut self, handle: Handle) -> VMResult<&mut VMHashMap> { self.heap_mut().get_map_mut(handle) }