5
5
import os
6
6
import logging
7
7
from pathlib import Path
8
+ import hashlib
8
9
9
10
from .database import SQLiteDB
10
11
from .settings import CustomFormatter
23
24
24
25
async def summarize_document (doc : Document ):
25
26
logger .info (f"Processing file { doc .metadata ['file_path' ]} " )
26
- if db .is_file_exist (doc .metadata ['file_path' ], doc .hash ):
27
+ doc_hash = get_file_hash (doc .metadata ['file_path' ])
28
+ if db .is_file_exist (doc .metadata ['file_path' ], doc_hash ):
27
29
summary = db .get_file_summary (doc .metadata ['file_path' ])
28
30
else :
29
31
summary = await model .summarize_document_api (doc .text )
30
- db .insert_file_summary (doc .metadata ['file_path' ], doc . hash , summary )
32
+ db .insert_file_summary (doc .metadata ['file_path' ], doc_hash , summary )
31
33
return {
32
34
"file_path" : doc .metadata ['file_path' ],
33
35
"summary" : summary
@@ -36,11 +38,12 @@ async def summarize_document(doc: Document):
36
38
37
39
async def summarize_image_document (doc : ImageDocument ):
38
40
logger .info (f"Processing image { doc .image_path } " )
39
- if db .is_file_exist (doc .image_path , doc .hash ):
41
+ image_hash = get_file_hash (doc .image_path )
42
+ if db .is_file_exist (doc .image_path , image_hash ):
40
43
summary = db .get_file_summary (doc .image_path )
41
44
else :
42
45
summary = await model .summarize_image_api (image_path = doc .image_path )
43
- db .insert_file_summary (doc .image_path , doc . hash , summary )
46
+ db .insert_file_summary (doc .image_path , image_hash , summary )
44
47
return {
45
48
"file_path" : doc .image_path ,
46
49
"summary" : summary
@@ -129,11 +132,19 @@ def update_file(root_path, item):
129
132
os .makedirs (dst_dir )
130
133
if os .path .isfile (src_file ):
131
134
shutil .move (src_file , dst_file )
132
- new_hash = SimpleDirectoryReader ( input_files = [ dst_file ]). load_data ()[ 0 ]. hash
135
+ new_hash = get_file_hash ( dst_file )
133
136
db .update_file (src_file , dst_file , new_hash )
134
137
135
138
136
139
async def search_files (root_path : str , recursive : bool , required_exts : list , search_query : str ):
137
140
summaries = await get_dir_summaries (root_path , recursive , required_exts )
138
141
files = await model .search_files_api (summaries , search_query )
139
142
return files
143
+
144
+
145
+ def get_file_hash (file_path ):
146
+ hash_func = hashlib .new ('sha256' )
147
+ with open (file_path , 'rb' ) as f :
148
+ while chunk := f .read (8192 ):
149
+ hash_func .update (chunk )
150
+ return hash_func .hexdigest ()
0 commit comments