@@ -15,13 +15,15 @@ use chrono::prelude::*;
1515use oxigraph:: model:: { Dataset , Graph , NamedNode , NamedNodeRef , NamedOrBlankNodeRef , TripleRef } ;
1616use oxigraph:: store:: Store ;
1717use petgraph:: visit:: EdgeRef ;
18- use std:: io:: { BufReader , Write } ;
18+ use std:: fs:: File ;
19+ use std:: io:: { BufReader , Read , Write } ;
1920use std:: path:: Path ;
2021use std:: path:: PathBuf ;
2122
2223use crate :: io:: GraphIO ;
2324use crate :: ontology:: { GraphIdentifier , Ontology , OntologyLocation } ;
2425use anyhow:: { anyhow, Result } ;
26+ use blake3;
2527use log:: { debug, error, info, warn} ;
2628use petgraph:: graph:: { Graph as DiGraph , NodeIndex } ;
2729use std:: collections:: { HashMap , HashSet , VecDeque } ;
@@ -867,48 +869,47 @@ impl OntoEnv {
867869 }
868870 } ;
869871
870- // if the source modified is missing, then we assume it has been updated
871- let source_modified = self
872- . io
873- . source_last_modified ( ontology. id ( ) )
874- . unwrap_or ( Utc :: now ( ) ) ;
875- // if the ontology has no modified time, then we assume it has never been updated
876872 let last_updated = ontology
877873 . last_updated
878874 . unwrap_or ( Utc . timestamp_opt ( 0 , 0 ) . unwrap ( ) ) ;
879875
880- if source_modified > last_updated {
881- if let OntologyLocation :: File ( path) = location {
882- // Mtime is newer, so now check if content is different
883- let new_graph = match self . io . read_file ( path) {
884- Ok ( g ) => g ,
876+ match location {
877+ OntologyLocation :: File ( path) => {
878+ // Prefer a fast content hash comparison to avoid mtime granularity issues.
879+ let current_hash = match hash_file ( path) {
880+ Ok ( h ) => h ,
885881 Err ( e) => {
886882 warn ! (
887- "Could not read file for update check {}: {}" ,
883+ "Could not hash file for update check {}: {}" ,
888884 path. display( ) ,
889885 e
890886 ) ;
891- return true ; // If we can't read it, assume it's updated
887+ return true ; // assume updated if we cannot hash
892888 }
893889 } ;
894- let old_graph = match self . io . get_graph ( ontology. id ( ) ) {
895- Ok ( g) => g,
896- Err ( e) => {
897- warn ! (
898- "Could not get graph from store for update check {}: {}" ,
899- ontology. id( ) ,
900- e
901- ) ;
902- return true ; // If we can't get the old one, assume updated
890+
891+ if let Some ( stored_hash) = ontology. content_hash ( ) {
892+ if stored_hash == current_hash {
893+ return false ;
903894 }
904- } ;
905- return new_graph != old_graph;
895+ return true ;
896+ }
897+
898+ // Fallback to mtime when legacy records lack a stored hash.
899+ let source_modified = self
900+ . io
901+ . source_last_modified ( ontology. id ( ) )
902+ . unwrap_or ( Utc :: now ( ) ) ;
903+ source_modified > last_updated
904+ }
905+ _ => {
906+ let source_modified = self
907+ . io
908+ . source_last_modified ( ontology. id ( ) )
909+ . unwrap_or ( Utc :: now ( ) ) ;
910+ source_modified > last_updated
906911 }
907- // For non-file locations, we can't easily check content, so stick with mtime.
908- return true ;
909912 }
910-
911- false
912913 } )
913914 . map ( |( graphid, _) | graphid. clone ( ) )
914915 . collect ( )
@@ -1792,6 +1793,21 @@ impl OntoEnv {
17921793 }
17931794}
17941795
1796+ fn hash_file ( path : & Path ) -> Result < String > {
1797+ let file = File :: open ( path) ?;
1798+ let mut reader = BufReader :: new ( file) ;
1799+ let mut hasher = blake3:: Hasher :: new ( ) ;
1800+ let mut buf = [ 0u8 ; 8192 ] ;
1801+ loop {
1802+ let n = reader. read ( & mut buf) ?;
1803+ if n == 0 {
1804+ break ;
1805+ }
1806+ hasher. update ( & buf[ ..n] ) ;
1807+ }
1808+ Ok ( hasher. finalize ( ) . to_hex ( ) . to_string ( ) )
1809+ }
1810+
17951811#[ cfg( test) ]
17961812mod tests {
17971813 use super :: * ;
0 commit comments