diff --git a/ext/rubydex/graph.c b/ext/rubydex/graph.c index 85f328518..f79bd494d 100644 --- a/ext/rubydex/graph.c +++ b/ext/rubydex/graph.c @@ -6,6 +6,7 @@ #include "reference.h" #include "ruby/internal/globals.h" #include "rustbindings.h" +#include "index_result.h" #include "utils.h" static VALUE cGraph; @@ -27,7 +28,7 @@ static VALUE rdxr_graph_alloc(VALUE klass) { return TypedData_Wrap_Struct(klass, &graph_type, graph); } -// Graph#index_all: (Array[String] file_paths) -> nil +// Graph#index_all: (Array[String] file_paths) -> IndexResult // Raises IndexingError if anything failed during indexing static VALUE rdxr_graph_index_all(VALUE self, VALUE file_paths) { rdxi_check_array_of_strings(file_paths); @@ -39,7 +40,9 @@ static VALUE rdxr_graph_index_all(VALUE self, VALUE file_paths) { // Get the underying graph pointer and then invoke the Rust index all implementation void *graph; TypedData_Get_Struct(self, void *, &graph_type, graph); - const char *error_messages = rdx_index_all(graph, (const char **)converted_file_paths, length); + + IndexResultPointer index_result = NULL; + const char *error_messages = rdx_index_all(graph, (const char **)converted_file_paths, length, &index_result); // Free the converted file paths and allow the GC to collect them for (size_t i = 0; i < length; i++) { @@ -55,7 +58,7 @@ static VALUE rdxr_graph_index_all(VALUE self, VALUE file_paths) { rb_raise(eIndexingError, "%s", StringValueCStr(error_string)); } - return Qnil; + return TypedData_Wrap_Struct(cIndexResult, &index_result_type, index_result); } // Size function for the declarations enumerator @@ -272,12 +275,21 @@ static VALUE rdxr_graph_method_references(VALUE self) { return self; } -// Graph#resolve: () -> self +// Graph#resolve: (IndexResult?) -> self // Runs the resolver to compute declarations and ownership -static VALUE rdxr_graph_resolve(VALUE self) { +static VALUE rdxr_graph_resolve(int argc, VALUE *argv, VALUE self) { + VALUE units_obj; + rb_scan_args(argc, argv, "01", &units_obj); + void *graph; TypedData_Get_Struct(self, void *, &graph_type, graph); - rdx_graph_resolve(graph); + + IndexResultPointer index_result = NULL; + if (!NIL_P(units_obj)) { + TypedData_Get_Struct(units_obj, void *, &index_result_type, index_result); + } + + rdx_graph_resolve(graph, index_result); return self; } @@ -398,7 +410,7 @@ void rdxi_initialize_graph(VALUE mRubydex) { cGraph = rb_define_class_under(mRubydex, "Graph", rb_cObject); rb_define_alloc_func(cGraph, rdxr_graph_alloc); rb_define_method(cGraph, "index_all", rdxr_graph_index_all, 1); - rb_define_method(cGraph, "resolve", rdxr_graph_resolve, 0); + rb_define_method(cGraph, "resolve", rdxr_graph_resolve, -1); rb_define_method(cGraph, "resolve_constant", rdxr_graph_resolve_constant, 2); rb_define_method(cGraph, "declarations", rdxr_graph_declarations, 0); rb_define_method(cGraph, "documents", rdxr_graph_documents, 0); diff --git a/ext/rubydex/index_result.c b/ext/rubydex/index_result.c new file mode 100644 index 000000000..819fd89b1 --- /dev/null +++ b/ext/rubydex/index_result.c @@ -0,0 +1,33 @@ +#include "index_result.h" +#include "rustbindings.h" + +VALUE cIndexResult; + +static void index_result_free(void *ptr) { + if (ptr) { + rdx_index_result_free(ptr); + } +} + +const rb_data_type_t index_result_type = {"IndexResult", {0, index_result_free, 0}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY}; + +// IndexResult#definition_ids_length -> Integer +static VALUE rdxr_index_result_definition_ids_length(VALUE self) { + void *index_result; + TypedData_Get_Struct(self, void *, &index_result_type, index_result); + return SIZET2NUM(rdx_index_result_definition_ids_len(index_result)); +} + +// IndexResult#reference_ids_length -> Integer +static VALUE rdxr_index_result_reference_ids_length(VALUE self) { + void *index_result; + TypedData_Get_Struct(self, void *, &index_result_type, index_result); + return SIZET2NUM(rdx_index_result_reference_ids_len(index_result)); +} + +void rdxi_initialize_index_result(VALUE mRubydex) { + cIndexResult = rb_define_class_under(mRubydex, "IndexResult", rb_cObject); + rb_undef_alloc_func(cIndexResult); + rb_define_method(cIndexResult, "definition_ids_length", rdxr_index_result_definition_ids_length, 0); + rb_define_method(cIndexResult, "reference_ids_length", rdxr_index_result_reference_ids_length, 0); +} diff --git a/ext/rubydex/index_result.h b/ext/rubydex/index_result.h new file mode 100644 index 000000000..a11f69193 --- /dev/null +++ b/ext/rubydex/index_result.h @@ -0,0 +1,13 @@ +#ifndef INDEX_RESULT_H +#define INDEX_RESULT_H + +#include "ruby.h" + +typedef void *IndexResultPointer; + +extern VALUE cIndexResult; +extern const rb_data_type_t index_result_type; + +void rdxi_initialize_index_result(VALUE mRubydex); + +#endif diff --git a/ext/rubydex/rubydex.c b/ext/rubydex/rubydex.c index 8f00e2c38..948f54884 100644 --- a/ext/rubydex/rubydex.c +++ b/ext/rubydex/rubydex.c @@ -5,6 +5,7 @@ #include "graph.h" #include "location.h" #include "reference.h" +#include "index_result.h" VALUE mRubydex; @@ -12,6 +13,7 @@ void Init_rubydex(void) { rb_ext_ractor_safe(true); mRubydex = rb_define_module("Rubydex"); + rdxi_initialize_index_result(mRubydex); rdxi_initialize_graph(mRubydex); rdxi_initialize_declaration(mRubydex); rdxi_initialize_document(mRubydex); diff --git a/rakelib/index_top_gems.rake b/rakelib/index_top_gems.rake index 4a0900cc8..4a159169f 100644 --- a/rakelib/index_top_gems.rake +++ b/rakelib/index_top_gems.rake @@ -51,7 +51,12 @@ task index_top_gems: :compile_release do # Index the gem's files and yield errors back to the main Ractor graph = Rubydex::Graph.new - error = graph.index_all(Dir.glob("#{gem_dir}/**/*.rb")) + error = nil + begin + graph.index_all(Dir.glob("#{gem_dir}/**/*.rb")) + rescue Rubydex::IndexingError => e + error = e + end next unless error @mutex.synchronize { @errors << "#{gem} => #{error}" } diff --git a/rust/rubydex-sys/src/graph_api.rs b/rust/rubydex-sys/src/graph_api.rs index 2f4dae1fe..6847d681a 100644 --- a/rust/rubydex-sys/src/graph_api.rs +++ b/rust/rubydex-sys/src/graph_api.rs @@ -1,10 +1,11 @@ //! This file provides the C API for the Graph object -use crate::declaration_api::CDeclaration; -use crate::declaration_api::DeclarationsIter; +use crate::declaration_api::{CDeclaration, DeclarationsIter}; +use crate::index_result_api::IndexResultPointer; use crate::reference_api::{ReferenceKind, ReferencesIter}; use crate::{name_api, utils}; use libc::{c_char, c_void}; +use rubydex::indexing::IndexResult; use rubydex::model::encoding::Encoding; use rubydex::model::graph::Graph; use rubydex::model::ids::DeclarationId; @@ -130,6 +131,7 @@ pub unsafe extern "C" fn rdx_index_all( pointer: GraphPointer, file_paths: *const *const c_char, count: usize, + out_result: *mut IndexResultPointer, ) -> *const c_char { let file_paths: Vec = unsafe { utils::convert_double_pointer_to_vec(file_paths, count).unwrap() }; let (file_paths, errors) = listing::collect_file_paths(file_paths); @@ -145,7 +147,7 @@ pub unsafe extern "C" fn rdx_index_all( } with_mut_graph(pointer, |graph| { - let errors = indexing::index_files(graph, file_paths); + let (result, errors) = indexing::index_files(graph, file_paths); if !errors.is_empty() { let error_messages = errors @@ -157,16 +159,28 @@ pub unsafe extern "C" fn rdx_index_all( return CString::new(error_messages).unwrap().into_raw().cast_const(); } + if !out_result.is_null() { + unsafe { + *out_result = Box::into_raw(Box::new(result)) as IndexResultPointer; + } + } + ptr::null() }) } /// Runs the resolver to compute declarations, ownership and related structures #[unsafe(no_mangle)] -pub extern "C" fn rdx_graph_resolve(pointer: GraphPointer) { +pub extern "C" fn rdx_graph_resolve(pointer: GraphPointer, index_result: IndexResultPointer) { with_mut_graph(pointer, |graph| { let mut resolver = Resolver::new(graph); - resolver.resolve_all(); + + if index_result.is_null() { + resolver.resolve_all(); + } else { + let result = unsafe { &*index_result.cast::() }; + resolver.resolve(&result.definition_ids, &result.reference_ids); + } }); } diff --git a/rust/rubydex-sys/src/index_result_api.rs b/rust/rubydex-sys/src/index_result_api.rs new file mode 100644 index 000000000..ddff297cb --- /dev/null +++ b/rust/rubydex-sys/src/index_result_api.rs @@ -0,0 +1,35 @@ +use libc::c_void; +use rubydex::indexing::IndexResult; + +pub type IndexResultPointer = *mut c_void; + +#[unsafe(no_mangle)] +pub extern "C" fn rdx_index_result_free(pointer: IndexResultPointer) { + if pointer.is_null() { + return; + } + + unsafe { + let _ = Box::from_raw(pointer.cast::()); + } +} + +#[unsafe(no_mangle)] +pub extern "C" fn rdx_index_result_definition_ids_len(pointer: IndexResultPointer) -> usize { + if pointer.is_null() { + return 0; + } + + let result = unsafe { &*pointer.cast::() }; + result.definition_ids.len() +} + +#[unsafe(no_mangle)] +pub extern "C" fn rdx_index_result_reference_ids_len(pointer: IndexResultPointer) -> usize { + if pointer.is_null() { + return 0; + } + + let result = unsafe { &*pointer.cast::() }; + result.reference_ids.len() +} diff --git a/rust/rubydex-sys/src/lib.rs b/rust/rubydex-sys/src/lib.rs index 4225c20e2..5075c2f25 100644 --- a/rust/rubydex-sys/src/lib.rs +++ b/rust/rubydex-sys/src/lib.rs @@ -3,6 +3,7 @@ pub mod definition_api; pub mod diagnostic_api; pub mod document_api; pub mod graph_api; +pub mod index_result_api; pub mod location_api; pub mod name_api; pub mod reference_api; diff --git a/rust/rubydex/src/indexing.rs b/rust/rubydex/src/indexing.rs index 3f01678b3..fe088ef77 100644 --- a/rust/rubydex/src/indexing.rs +++ b/rust/rubydex/src/indexing.rs @@ -3,6 +3,8 @@ use crate::{ indexing::{local_graph::LocalGraph, ruby_indexer::RubyIndexer}, job_queue::{Job, JobQueue}, model::graph::Graph, + model::identity_maps::IdentityHashSet, + model::ids::{DefinitionId, ReferenceId}, }; use crossbeam_channel::{Sender, unbounded}; use std::{fs, path::PathBuf, sync::Arc}; @@ -11,6 +13,11 @@ use url::Url; pub mod local_graph; pub mod ruby_indexer; +pub struct IndexResult { + pub definition_ids: IdentityHashSet, + pub reference_ids: IdentityHashSet, +} + /// Job that indexes a single Ruby file pub struct IndexingRubyFileJob { path: PathBuf, @@ -69,7 +76,7 @@ impl Job for IndexingRubyFileJob { /// # Panics /// /// Will panic if the graph cannot be wrapped in an Arc> -pub fn index_files(graph: &mut Graph, paths: Vec) -> Vec { +pub fn index_files(graph: &mut Graph, paths: Vec) -> (IndexResult, Vec) { let queue = Arc::new(JobQueue::new()); let (local_graphs_tx, local_graphs_rx) = unbounded(); let (errors_tx, errors_rx) = unbounded(); @@ -87,11 +94,21 @@ pub fn index_files(graph: &mut Graph, paths: Vec) -> Vec { JobQueue::run(&queue); + let mut definition_ids: IdentityHashSet = IdentityHashSet::default(); + let mut reference_ids: IdentityHashSet = IdentityHashSet::default(); + while let Ok(local_graph) = local_graphs_rx.recv() { - graph.update(local_graph); + let (new_definition_ids, new_reference_ids) = graph.update(local_graph); + definition_ids.extend(new_definition_ids); + reference_ids.extend(new_reference_ids); } - errors_rx.iter().collect() + let result = IndexResult { + definition_ids, + reference_ids, + }; + + (result, errors_rx.iter().collect()) } #[cfg(test)] @@ -120,7 +137,7 @@ mod tests { let relative_to_pwd = &dots.join(absolute_path); let mut graph = Graph::new(); - let errors = index_files(&mut graph, vec![relative_to_pwd.clone()]); + let (_, errors) = index_files(&mut graph, vec![relative_to_pwd.clone()]); assert!(errors.is_empty()); assert_eq!(graph.documents().len(), 1); diff --git a/rust/rubydex/src/main.rs b/rust/rubydex/src/main.rs index c863391a7..4afdefbf0 100644 --- a/rust/rubydex/src/main.rs +++ b/rust/rubydex/src/main.rs @@ -67,7 +67,7 @@ fn main() { // Indexing let mut graph = Graph::new(); - let errors = time_it!(indexing, { indexing::index_files(&mut graph, file_paths) }); + let (_, errors) = time_it!(indexing, { indexing::index_files(&mut graph, file_paths) }); for error in errors { eprintln!("{error}"); diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index 116cff0ab..eb64bd1d3 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -495,10 +495,12 @@ impl Graph { } //// Handles the deletion of a document identified by `uri` - pub fn delete_uri(&mut self, uri: &str) { + pub fn delete_uri(&mut self, uri: &str) -> (IdentityHashSet, IdentityHashSet) { let uri_id = UriId::from(uri); self.remove_definitions_for_uri(uri_id); self.documents.remove(&uri_id); + + (IdentityHashSet::default(), IdentityHashSet::default()) } /// Merges everything in `other` into this Graph. This method is meant to merge all graph representations from @@ -535,13 +537,18 @@ impl Graph { /// Updates the global representation with the information contained in `other`, handling deletions, insertions and /// updates to existing entries - pub fn update(&mut self, other: LocalGraph) { + pub fn update(&mut self, other: LocalGraph) -> (IdentityHashSet, IdentityHashSet) { // For each URI that was indexed through `other`, check what was discovered and update our current global // representation let uri_id = other.uri_id(); self.remove_definitions_for_uri(uri_id); + let definition_ids: IdentityHashSet = other.definitions().keys().copied().collect(); + let reference_ids: IdentityHashSet = other.constant_references().keys().copied().collect(); + self.extend(other); + + (definition_ids, reference_ids) } // Removes all nodes and relationships associated to the given URI. This is used to clean up stale data when a diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index 96e85c4cb..d2c262fc4 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -121,7 +121,18 @@ impl<'a> Resolver<'a> { ); } - let other_ids = self.prepare_units(); + let definition_ids: IdentityHashSet = self.graph.definitions().keys().copied().collect(); + let reference_ids: IdentityHashSet = self.graph.constant_references().keys().copied().collect(); + self.resolve(&definition_ids, &reference_ids); + } + + /// Resolves only the specified definitions and references + pub fn resolve( + &mut self, + definition_ids: &IdentityHashSet, + reference_ids: &IdentityHashSet, + ) { + let other_ids = self.prepare_units(definition_ids.iter(), reference_ids.iter()); loop { // Flag to ensure the end of the resolution loop. We go through all items in the queue based on its current @@ -1336,13 +1347,19 @@ impl<'a> Resolver<'a> { parent_depth + nesting_depth + 1 } - fn prepare_units(&mut self) -> Vec { - let estimated_length = self.graph.definitions().len() / 2; - let mut definitions = Vec::with_capacity(estimated_length); - let mut others = Vec::with_capacity(estimated_length); + fn prepare_units<'b, D, R>(&mut self, definition_ids: D, reference_ids: R) -> Vec + where + D: Iterator, + R: Iterator, + { + let mut definitions = Vec::new(); + let mut others = Vec::new(); let names = self.graph.names(); - for (id, definition) in self.graph.definitions() { + for id in definition_ids { + let Some(definition) = self.graph.definitions().get(id) else { + continue; + }; let uri = self.graph.documents().get(definition.uri_id()).unwrap().uri(); match definition { @@ -1388,19 +1405,19 @@ impl<'a> Resolver<'a> { (Self::name_depth(name_a, names), uri_a, offset_a).cmp(&(Self::name_depth(name_b, names), uri_b, offset_b)) }); - let mut const_refs = self - .graph - .constant_references() - .iter() - .map(|(id, constant_ref)| { - let uri = self.graph.documents().get(&constant_ref.uri_id()).unwrap().uri(); - - ( - Unit::ConstantRef(*id), - (names.get(constant_ref.name_id()).unwrap(), uri, constant_ref.offset()), - ) - }) - .collect::>(); + let mut const_refs = Vec::new(); + + for id in reference_ids { + let Some(constant_ref) = self.graph.constant_references().get(id) else { + continue; + }; + let uri = self.graph.documents().get(&constant_ref.uri_id()).unwrap().uri(); + + const_refs.push(( + Unit::ConstantRef(*id), + (names.get(constant_ref.name_id()).unwrap(), uri, constant_ref.offset()), + )); + } // Sort constant references based on their name complexity so that simpler names are always first const_refs.sort_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| { @@ -1412,7 +1429,6 @@ impl<'a> Resolver<'a> { self.unit_queue .extend(const_refs.into_iter().map(|(id, _)| id).collect::>()); - others.shrink_to_fit(); others } diff --git a/test/graph_test.rb b/test/graph_test.rb index 30b0de0ab..9b952da03 100644 --- a/test/graph_test.rb +++ b/test/graph_test.rb @@ -9,7 +9,7 @@ class GraphTest < Minitest::Test def test_indexing_empty_context with_context do |context| graph = Rubydex::Graph.new - assert_nil(graph.index_all(context.glob("**/*.rb"))) + assert_instance_of(Rubydex::IndexResult, graph.index_all(context.glob("**/*.rb"))) end end @@ -19,7 +19,7 @@ def test_indexing_context_files context.write!("bar.rb", "class Bar; end") graph = Rubydex::Graph.new - assert_nil(graph.index_all(context.glob("**/*.rb"))) + assert_instance_of(Rubydex::IndexResult, graph.index_all(context.glob("**/*.rb"))) end end