@@ -29,13 +29,25 @@ public sealed class FileEmbeddingConsumer :
2929 private readonly IConfiguration _configuration ;
3030 private readonly IServiceProvider _serviceProvider ;
3131
32+ private readonly string _tempFolder ;
33+ private readonly string _markdownFolder ;
34+ private readonly string _imageAnalysisFolder ;
35+ private readonly string _chunkFolder ;
36+ private readonly string _embeddingFolder ;
37+
3238 public FileEmbeddingConsumer ( ILogger < FileEmbeddingConsumer > logger ,
3339 IConfiguration configuration ,
3440 IServiceProvider serviceProvider )
3541 {
3642 _logger = logger ;
3743 _configuration = configuration ;
3844 _serviceProvider = serviceProvider ;
45+
46+ _tempFolder = _configuration [ "Storage:TempFolderPath" ] ;
47+ _markdownFolder = Path . Combine ( _tempFolder , "Markdown" ) ;
48+ _imageAnalysisFolder = Path . Combine ( _tempFolder , "ImageAnalysis" ) ;
49+ _chunkFolder = Path . Combine ( _tempFolder , "Chunks" ) ;
50+ _embeddingFolder = Path . Combine ( _tempFolder , "Embeddings" ) ;
3951 }
4052
4153 public async Task HandleAsync ( FileCreatedEvent data , MetaData metaData , CancellationToken cancellationToken = default )
@@ -107,9 +119,9 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
107119
108120 var chunks = TextChunkingService . ChunkSentences ( Encoding . UTF8 . GetString ( bytes ) ) ;
109121
110- var chunksFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "Chunks" , fileEntry . Id . ToString ( ) ) ) ;
122+ var chunksFolder = CreateDirectoryIfNotExist ( Path . Combine ( _chunkFolder , fileEntry . Id . ToString ( ) ) ) ;
111123
112- var embeddingsFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "Embeddings" , fileEntry . Id . ToString ( ) ) ) ;
124+ var embeddingsFolder = CreateDirectoryIfNotExist ( Path . Combine ( _embeddingFolder , fileEntry . Id . ToString ( ) ) ) ;
113125
114126 foreach ( var chunk in chunks )
115127 {
@@ -121,6 +133,7 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
121133 var fileEntryEmbedding = new FileEntryEmbedding
122134 {
123135 ChunkName = $ "{ chunk . StartIndex } _{ chunk . EndIndex } .txt",
136+ ChunkLocation = Path . Combine ( "Chunks" , fileEntry . Id . ToString ( ) , $ "{ chunk . StartIndex } _{ chunk . EndIndex } .txt") ,
124137 FileEntryId = fileEntry . Id ,
125138 Embedding = JsonSerializer . Serialize ( embedding . EmbeddingVector ) ,
126139 TokenDetails = JsonSerializer . Serialize ( embedding . UsageDetails )
@@ -141,19 +154,17 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
141154 {
142155 _logger . LogInformation ( "Converting file to markdown for FileEntry Id: {FileEntryId}" , fileEntry ? . Id ) ;
143156
144- var markdownFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "Markdown" ) ) ;
157+ var markdownFolder = CreateDirectoryIfNotExist ( _markdownFolder ) ;
145158
146- var markdownFile = Path . Combine ( markdownFolder , fileEntry . Id + " .md") ;
159+ var markdownFile = Path . Combine ( markdownFolder , $ " { fileEntry . Id } .md") ;
147160
148- if ( ! File . Exists ( markdownFile ) )
149- {
150- var bytes = await GetBytesAsync ( fileStorageManager , fileEntry , cancellationToken ) ;
151- var markdown = await markdownService . ConvertToMarkdownAsync ( bytes , fileEntry . FileName , cancellationToken ) ;
152- await File . WriteAllTextAsync ( markdownFile , markdown , cancellationToken ) ;
153- }
161+ var bytes = await GetBytesAsync ( fileStorageManager , fileEntry , cancellationToken ) ;
162+ var markdown = await markdownService . ConvertToMarkdownAsync ( bytes , fileEntry . FileName , cancellationToken ) ;
163+ await File . WriteAllTextAsync ( markdownFile , markdown , cancellationToken ) ;
154164
155165 fileEntryText = new FileEntryText
156166 {
167+ TextLocation = Path . Combine ( "Markdown" , $ "{ fileEntry . Id } .md") ,
157168 FileEntryId = fileEntry . Id ,
158169 } ;
159170
@@ -165,15 +176,13 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
165176
166177 if ( ! hasFileEntryEmbeddings )
167178 {
168- var markdownFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "Markdown" ) ) ;
169-
170- var markdownFile = Path . Combine ( markdownFolder , fileEntry . Id + ".md" ) ;
179+ var markdownFile = Path . Combine ( _markdownFolder , $ "{ fileEntry . Id } .md") ;
171180
172181 var chunks = TextChunkingService . ChunkSentences ( await File . ReadAllTextAsync ( markdownFile , cancellationToken ) ) ;
173182
174- var chunksFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "Chunks" , fileEntry . Id . ToString ( ) ) ) ;
183+ var chunksFolder = CreateDirectoryIfNotExist ( Path . Combine ( _chunkFolder , fileEntry . Id . ToString ( ) ) ) ;
175184
176- var embeddingsFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "Embeddings" , fileEntry . Id . ToString ( ) ) ) ;
185+ var embeddingsFolder = CreateDirectoryIfNotExist ( Path . Combine ( _embeddingFolder , fileEntry . Id . ToString ( ) ) ) ;
177186
178187 foreach ( var chunk in chunks )
179188 {
@@ -185,6 +194,7 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
185194 var fileEntryEmbedding = new FileEntryEmbedding
186195 {
187196 ChunkName = $ "{ chunk . StartIndex } _{ chunk . EndIndex } .txt",
197+ ChunkLocation = Path . Combine ( "Chunks" , fileEntry . Id . ToString ( ) , $ "{ chunk . StartIndex } _{ chunk . EndIndex } .txt") ,
188198 FileEntryId = fileEntry . Id ,
189199 Embedding = JsonSerializer . Serialize ( embedding . EmbeddingVector ) ,
190200 TokenDetails = JsonSerializer . Serialize ( embedding . UsageDetails )
@@ -201,9 +211,9 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
201211 {
202212 _logger . LogInformation ( "Processing image file for FileEntry Id: {FileEntryId}" , fileEntry ? . Id ) ;
203213
204- var imageAnalysisFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "ImageAnalysis" ) ) ;
214+ var imageAnalysisFolder = CreateDirectoryIfNotExist ( _imageAnalysisFolder ) ;
205215
206- var embeddingsFolder = CreateDirectoryIfNotExist ( Path . Combine ( _configuration [ "Storage:TempFolderPath" ] , "Embeddings" , fileEntry . Id . ToString ( ) ) ) ;
216+ var embeddingsFolder = CreateDirectoryIfNotExist ( Path . Combine ( _embeddingFolder , fileEntry . Id . ToString ( ) ) ) ;
207217
208218 var imageAnalysisFile = Path . Combine ( imageAnalysisFolder , $ "{ fileEntry . Id } .json") ;
209219 var embeddingFile = Path . Combine ( embeddingsFolder , $ "{ fileEntry . Id } .json") ;
@@ -222,6 +232,7 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
222232
223233 fileEntryText = new FileEntryText
224234 {
235+ TextLocation = Path . Combine ( "ImageAnalysis" , $ "{ fileEntry . Id } .json") ,
225236 FileEntryId = fileEntry . Id ,
226237 } ;
227238
@@ -240,6 +251,7 @@ private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cance
240251 var fileEntryEmbedding = new FileEntryEmbedding
241252 {
242253 ChunkName = $ "{ fileEntry . Id } .json",
254+ ChunkLocation = Path . Combine ( "ImageAnalysis" , $ "{ fileEntry . Id } .json") ,
243255 FileEntryId = fileEntry . Id ,
244256 Embedding = JsonSerializer . Serialize ( embedding . EmbeddingVector ) ,
245257 TokenDetails = JsonSerializer . Serialize ( embedding . UsageDetails )
0 commit comments