Skip to content

Commit 02c47a5

Browse files
committed
Single resource per blob
1 parent 9e1d8a6 commit 02c47a5

File tree

2 files changed

+21
-31
lines changed

2 files changed

+21
-31
lines changed

src/Microsoft.Health.Fhir.SqlServer/Features/Storage/SqlServerFhirDataStore.cs

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,12 @@ public SqlServerFhirDataStore(
131131

132132
internal static TimeSpan MergeResourcesTransactionHeartbeatPeriod => TimeSpan.FromSeconds(10);
133133

134-
private async Task DeleteBlobFromAdlsAsync(long transactionId, CancellationToken cancellationToken)
134+
private async Task DeleteBlobFromAdlsAsync(long transactionId, int blobIndex, CancellationToken cancellationToken)
135135
{
136136
var start = DateTime.UtcNow;
137137
var sw = Stopwatch.StartNew();
138138
var retries = 0;
139-
var blobName = GetBlobNameForRaw(transactionId);
139+
var blobName = GetBlobNameForRaw(transactionId, blobIndex);
140140
while (true)
141141
{
142142
try
@@ -165,32 +165,24 @@ private async Task PutRawResourcesIntoAdlsAsync(IReadOnlyList<MergeResourceWrapp
165165
{
166166
var start = DateTime.UtcNow;
167167
var sw = Stopwatch.StartNew();
168-
var eol = Encoding.UTF8.GetByteCount(Environment.NewLine);
169168
var retries = 0;
170-
var blobName = GetBlobNameForRaw(transactionId);
171169
while (true)
172170
{
173171
try
174172
{
175-
using var stream = await SqlAdlsClient.Container.GetBlockBlobClient(blobName).OpenWriteAsync(true, null, cancellationToken);
176-
using var writer = new StreamWriter(stream);
177-
var offset = 0;
178-
foreach (var resource in resources)
173+
Parallel.For(0, resources.Count, index =>
179174
{
180-
resource.FileId = transactionId;
181-
resource.OffsetInFile = offset;
182-
var line = resource.ResourceWrapper.RawResource.Data;
183-
offset += Encoding.UTF8.GetByteCount(line) + eol;
184-
await writer.WriteLineAsync(line);
185-
}
175+
resources[index].FileId = transactionId;
176+
resources[index].OffsetInFile = index;
177+
var blobClient = SqlAdlsClient.Container.GetBlobClient(GetBlobNameForRaw(transactionId, index));
178+
blobClient.Upload(BinaryData.FromString(resources[index].ResourceWrapper.RawResource.Data + Environment.NewLine), overwrite: true);
179+
});
186180

187-
#pragma warning disable CA2016
188-
await writer.FlushAsync();
189181
break;
190182
}
191183
catch (Exception e)
192184
{
193-
await StoreClient.TryLogEvent("PutRawResourcesIntoAdlsAsync", "Error", $"blob={blobName} error={e}", start, cancellationToken);
185+
await StoreClient.TryLogEvent("PutRawResourcesIntoAdlsAsync", "Error", $"transactionId={transactionId} error={e}", start, cancellationToken);
194186
if (e.ToString().Contains("ConditionNotMet", StringComparison.OrdinalIgnoreCase) && retries++ < 3)
195187
{
196188
await Task.Delay(1000, cancellationToken);
@@ -202,12 +194,12 @@ private async Task PutRawResourcesIntoAdlsAsync(IReadOnlyList<MergeResourceWrapp
202194
}
203195

204196
var mcsec = (long)Math.Round(sw.Elapsed.TotalMilliseconds * 1000, 0);
205-
await StoreClient.TryLogEvent("PutRawResourcesToAdls", "Warn", $"mcsec={mcsec} resources={resources.Count} blob={blobName}", start, cancellationToken);
197+
await StoreClient.TryLogEvent("PutRawResourcesToAdls", "Warn", $"mcsec={mcsec} resources={resources.Count} transactionId ={transactionId}", start, cancellationToken);
206198
}
207199

208-
internal static string GetBlobNameForRaw(long fileId)
200+
internal static string GetBlobNameForRaw(long fileId, int blobIndex)
209201
{
210-
return $"hash-{GetPermanentHashCode(fileId)}/transaction-{fileId}.ndjson";
202+
return $"hash-{GetPermanentHashCode(fileId)}/transaction-{fileId}-index-{blobIndex}.ndjson";
211203
}
212204

213205
private static string GetPermanentHashCode(long tr)

src/Microsoft.Health.Fhir.SqlServer/Features/Storage/SqlStoreClient.cs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
using System.Globalization;
1111
using System.IO;
1212
using System.Linq;
13+
using System.Resources;
1314
using System.Threading;
1415
using System.Threading.Tasks;
1516
using Azure.Storage.Blobs;
@@ -119,21 +120,18 @@ private async Task<IReadOnlyList<ResourceWrapper>> ReadResourceWrappersAsync(Sql
119120
throw new InvalidOperationException("ADLS container is null.");
120121
}
121122

122-
var resourceRefsByFile = resourceRefs.GroupBy(_ => _.FileId);
123-
foreach (var file in resourceRefsByFile)
123+
Parallel.ForEach(resourceRefs, resourceRef =>
124124
{
125-
var blobName = SqlServerFhirDataStore.GetBlobNameForRaw(file.Key);
125+
var blobName = SqlServerFhirDataStore.GetBlobNameForRaw(resourceRef.FileId, resourceRef.OffsetInFile);
126126
var blobClient = SqlAdlsClient.Container.GetBlobClient(blobName);
127-
using var stream = blobClient.OpenRead();
128-
using var reader = new StreamReader(stream);
129-
foreach (var offset in file)
127+
var result = blobClient.Download();
128+
using var streamReader = new StreamReader(result.Value.Content);
129+
var rawResource = streamReader.ReadLine();
130+
lock (results)
130131
{
131-
reader.DiscardBufferedData();
132-
stream.Position = offset.OffsetInFile;
133-
var line = reader.ReadLine();
134-
results.Add((file.Key, offset.OffsetInFile), line);
132+
results.Add((resourceRef.FileId, resourceRef.OffsetInFile), rawResource);
135133
}
136-
}
134+
});
137135

138136
return results;
139137
}

0 commit comments

Comments
 (0)