Skip to content

Commit 7aefb1b

Browse files
authored
Use CrtChecksums explicitly to download S3 objects inside lambda function (#961)
* Use CrtChecksums explicitly to download S3 objects inside lambda function * Add explicity dep on System.Private.Uri and use default maxkeys for ListObjects call * use httpclient to download links.json
1 parent 34cb5cd commit 7aefb1b

File tree

2 files changed

+48
-27
lines changed

2 files changed

+48
-27
lines changed

src/infra/docs-lambda-index-publisher/Program.cs

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,40 @@
1111
using Elastic.Markdown.IO.State;
1212
using Elastic.Markdown.Links.CrossLinks;
1313

14-
await LambdaBootstrapBuilder.Create(Handler)
15-
.Build()
16-
.RunAsync();
14+
const string bucketName = "elastic-docs-link-index";
1715

16+
// await LambdaBootstrapBuilder.Create(Handler)
17+
// .Build()
18+
// .RunAsync();
19+
20+
// Uncomment to test locally without uploading
21+
await CreateLinkIndex(new AmazonS3Client());
22+
23+
#pragma warning disable CS8321 // Local function is declared but never used
1824
static async Task<string> Handler(ILambdaContext context)
25+
#pragma warning restore CS8321 // Local function is declared but never used
1926
{
2027
var sw = Stopwatch.StartNew();
28+
2129
IAmazonS3 client = new AmazonS3Client();
22-
var bucketName = "elastic-docs-link-index";
30+
var linkIndex = await CreateLinkIndex(client);
31+
if (linkIndex == null)
32+
return $"Error encountered on server. getting list of objects.";
33+
34+
var json = LinkIndex.Serialize(linkIndex);
35+
36+
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
37+
await client.UploadObjectFromStreamAsync(bucketName, "link-index.json", stream, new Dictionary<string, object>(), CancellationToken.None);
38+
return $"Finished in {sw}";
39+
}
40+
41+
42+
static async Task<LinkIndex?> CreateLinkIndex(IAmazonS3 client)
43+
{
2344
var request = new ListObjectsV2Request
2445
{
2546
BucketName = bucketName,
26-
MaxKeys = 5
47+
MaxKeys = 1000 //default
2748
};
2849

2950
var linkIndex = new LinkIndex
@@ -32,20 +53,23 @@ static async Task<string> Handler(ILambdaContext context)
3253
};
3354
try
3455
{
56+
var httpClient = new HttpClient();
3557
ListObjectsV2Response response;
3658
do
3759
{
3860
response = await client.ListObjectsV2Async(request, CancellationToken.None);
39-
foreach (var obj in response.S3Objects)
61+
await Parallel.ForEachAsync(response.S3Objects, async (obj, ctx) =>
4062
{
4163
if (!obj.Key.StartsWith("elastic/", StringComparison.OrdinalIgnoreCase))
42-
continue;
64+
return;
4365

4466
var tokens = obj.Key.Split('/');
4567
if (tokens.Length < 3)
46-
continue;
68+
return;
4769

48-
var gitReference = await ReadLinkReferenceSha(client, obj);
70+
// TODO create a dedicated state file for git configuration
71+
// Deserializing all of the links metadata adds significant overhead
72+
var gitReference = await ReadLinkReferenceSha(httpClient, obj);
4973

5074
var repository = tokens[1];
5175
var branch = tokens[2];
@@ -67,39 +91,36 @@ static async Task<string> Handler(ILambdaContext context)
6791
{ branch, entry }
6892
});
6993
}
70-
71-
Console.WriteLine(entry);
72-
}
94+
});
7395

7496
// If the response is truncated, set the request ContinuationToken
7597
// from the NextContinuationToken property of the response.
7698
request.ContinuationToken = response.NextContinuationToken;
7799
} while (response.IsTruncated);
78100
}
79-
catch (AmazonS3Exception ex)
101+
catch
80102
{
81-
return $"Error encountered on server. Message:'{ex.Message}' getting list of objects.";
103+
return null;
82104
}
83105

84-
var json = LinkIndex.Serialize(linkIndex);
85-
86-
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
87-
await client.UploadObjectFromStreamAsync(bucketName, "link-index.json", stream, new Dictionary<string, object>(), CancellationToken.None);
88-
return $"Finished in {sw}";
106+
return linkIndex;
89107
}
90108

91-
static async Task<string> ReadLinkReferenceSha(IAmazonS3 client, S3Object obj)
109+
static async Task<string> ReadLinkReferenceSha(HttpClient httpClient, S3Object obj)
92110
{
93111
try
94112
{
95-
var contents = await client.GetObjectAsync(obj.Key, obj.Key, CancellationToken.None);
96-
await using var s = contents.ResponseStream;
97-
var linkReference = LinkReference.Deserialize(s);
113+
// can not use client getobject since CRT checksum validation requires native code not available in AOT.
114+
var tokens = obj.Key.Split('/');
115+
var path = Path.Join(tokens[0], tokens[1], "tree", tokens[2], string.Join("/", tokens[3..]));
116+
var url = "https://docs-v3-preview.elastic.dev/" + path;
117+
var json = await httpClient.GetStringAsync(new Uri(url));
118+
119+
var linkReference = LinkReference.Deserialize(json);
98120
return linkReference.Origin.Ref;
99121
}
100-
catch (Exception e)
122+
catch
101123
{
102-
Console.WriteLine(e);
103124
// it's important we don't fail here we need to fallback gracefully from this so we can fix the root cause
104125
// of why a repository is not reporting its git reference properly
105126
return "unknown";

src/infra/docs-lambda-index-publisher/aws-lambda-tools-defaults.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
"dotnet lambda help",
66
"All the command line options for the Lambda command can be specified in this file."
77
],
8-
"profile": "",
9-
"region": "",
8+
"profile": "default",
9+
"region": "us-west-2",
1010
"configuration": "Release",
1111
"function-runtime": "provided.al2",
1212
"function-memory-size": 512,

0 commit comments

Comments
 (0)