Skip to content

Commit 498c772

Browse files
committed
(#270) Convert documents to Markdown
1 parent 488a886 commit 498c772

File tree

3 files changed

+43
-43
lines changed

3 files changed

+43
-43
lines changed

src/Monolith/ClassifiedAds.Background/MessageBusConsumers/FileEmbeddingConsumer.cs

Lines changed: 40 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using Microsoft.Extensions.DependencyInjection;
99
using Microsoft.Extensions.Logging;
1010
using System;
11+
using System.IO;
1112
using System.Net.Http;
1213
using System.Net.Http.Headers;
1314
using System.Security.Cryptography;
@@ -40,72 +41,68 @@ public async Task HandleAsync(FileCreatedEvent data, MetaData metaData, Cancella
4041
{
4142
_logger.LogInformation("Handling FileCreatedEvent for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
4243

43-
if (string.IsNullOrEmpty(data?.FileEntry?.FileLocation))
44-
{
45-
return;
46-
}
47-
48-
if (data.FileEntry.FileName.EndsWith(".txt") ||
49-
data.FileEntry.FileName.EndsWith(".md") ||
50-
data.FileEntry.FileName.EndsWith(".markdown"))
51-
{
52-
_logger.LogInformation("Skipping text file for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
53-
return;
54-
}
44+
await ProcessFileAsync(data.FileEntry, cancellationToken);
45+
}
5546

56-
if (data.FileEntry.FileName.EndsWith(".pdf") ||
57-
data.FileEntry.FileName.EndsWith(".docx"))
58-
{
59-
_logger.LogInformation("Converting file to markdown for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
47+
public async Task HandleAsync(FileUpdatedEvent data, MetaData metaData, CancellationToken cancellationToken = default)
48+
{
49+
_logger.LogInformation("Handling FileUpdatedEvent for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
6050

61-
var markdown = await ConvertToMarkdownAsync(data.FileEntry, cancellationToken);
51+
await ProcessFileAsync(data.FileEntry, cancellationToken);
52+
}
6253

63-
return;
64-
}
54+
public Task HandleAsync(FileDeletedEvent data, MetaData metaData, CancellationToken cancellationToken = default)
55+
{
56+
_logger.LogInformation("Handling FileDeletedEvent for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
6557

66-
return;
58+
return Task.CompletedTask;
6759
}
6860

69-
public async Task HandleAsync(FileUpdatedEvent data, MetaData metaData, CancellationToken cancellationToken = default)
61+
private async Task ProcessFileAsync(FileEntry fileEntry, CancellationToken cancellationToken)
7062
{
71-
_logger.LogInformation("Handling FileUpdatedEvent for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
72-
73-
if (string.IsNullOrEmpty(data?.FileEntry?.FileLocation))
63+
if (string.IsNullOrEmpty(fileEntry?.FileLocation))
7464
{
7565
return;
7666
}
7767

78-
if (data.FileEntry.FileName.EndsWith(".txt") ||
79-
data.FileEntry.FileName.EndsWith(".md") ||
80-
data.FileEntry.FileName.EndsWith(".markdown"))
68+
using var scope = _serviceProvider.CreateScope();
69+
var fileStorageManager = scope.ServiceProvider.GetService<IFileStorageManager>();
70+
71+
var fileExtension = Path.GetExtension(fileEntry.FileName);
72+
73+
if (fileExtension == ".txt" ||
74+
fileExtension == ".md" ||
75+
fileExtension == ".markdown")
8176
{
82-
_logger.LogInformation("Skipping text file for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
77+
// TODO: xxx
8378
return;
8479
}
8580

86-
if (data.FileEntry.FileName.EndsWith(".pdf") ||
87-
data.FileEntry.FileName.EndsWith(".docx"))
81+
if (fileExtension == ".pdf" ||
82+
fileExtension == ".docx")
8883
{
89-
_logger.LogInformation("Converting file to markdown for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
84+
_logger.LogInformation("Converting file to markdown for FileEntry Id: {FileEntryId}", fileEntry?.Id);
9085

91-
var markdown = await ConvertToMarkdownAsync(data.FileEntry, cancellationToken);
86+
var markdownFolder = Path.Combine(_configuration["Storage:TempFolderPath"], "Markdown");
9287

93-
return;
94-
}
88+
if (!Directory.Exists(markdownFolder))
89+
{
90+
Directory.CreateDirectory(markdownFolder);
91+
}
9592

96-
return;
97-
}
93+
var markdownFile = Path.Combine(markdownFolder, fileEntry.Id + ".md");
9894

99-
public Task HandleAsync(FileDeletedEvent data, MetaData metaData, CancellationToken cancellationToken = default)
100-
{
101-
_logger.LogInformation("Handling FileDeletedEvent for FileEntry Id: {FileEntryId}", data?.FileEntry?.Id);
102-
return Task.CompletedTask;
95+
if (!File.Exists(markdownFile))
96+
{
97+
var markdown = await ConvertToMarkdownAsync(fileStorageManager, fileEntry, cancellationToken);
98+
await File.WriteAllTextAsync(markdownFile, markdown, cancellationToken);
99+
}
100+
}
103101
}
104102

105-
private async Task<string> ConvertToMarkdownAsync(FileEntry fileEntry, CancellationToken cancellationToken = default)
103+
private async Task<string> ConvertToMarkdownAsync(IFileStorageManager fileStorageManager, FileEntry fileEntry, CancellationToken cancellationToken = default)
106104
{
107-
// TODO: xxx
108-
var content = await _serviceProvider.CreateScope().ServiceProvider.GetRequiredService<IFileStorageManager>().ReadAsync(fileEntry, cancellationToken);
105+
var content = await fileStorageManager.ReadAsync(fileEntry, cancellationToken);
109106

110107
if (fileEntry.Encrypted)
111108
{

src/Monolith/ClassifiedAds.Background/appsettings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
"Storage": {
3131
"Provider": "Local",
3232
"MasterEncryptionKey": "+2ZC9wrwlvPswPxCND0BjrKJ3CfOpImGtn4hloVwo2I=",
33+
"TempFolderPath": "C:\\Data\\Practical.CleanArchitecture\\Temp",
3334
"Local": {
3435
"Path": "C:\\Data\\files"
3536
},

src/Monolith/ClassifiedAds.Infrastructure/Storages/StorageOptions.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ public class StorageOptions
1010

1111
public string MasterEncryptionKey { get; set; }
1212

13+
public string TempFolderPath { get; set; }
14+
1315
public LocalOptions Local { get; set; }
1416

1517
public AzureBlobOption Azure { get; set; }

0 commit comments

Comments
 (0)