Skip to content

Commit 99cae01

Browse files
author
Rong Zeng
committed
[3.2] Fix the consistency issue of listFrom API
1 parent 43e7bfa commit 99cae01

File tree

3 files changed

+125
-7
lines changed

3 files changed

+125
-7
lines changed

storage-s3-dynamodb/src/main/java/io/delta/storage/BaseExternalLogStore.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,9 @@ public void write(
242242
resolvedPath.getName(),
243243
tempPath,
244244
false, // not complete
245-
null // no expireTime
245+
null, // no expireTime
246+
fs.getDefaultBlockSize(),
247+
System.currentTimeMillis()
246248
);
247249

248250
// Step 2.1: Create temp file T(N)
@@ -455,7 +457,7 @@ private void copyFile(FileSystem fs, Path src, Path dst) throws IOException {
455457
/**
456458
* Returns path stripped user info.
457459
*/
458-
private Path stripUserInfo(Path path) {
460+
protected Path stripUserInfo(Path path) {
459461
final URI uri = path.toUri();
460462

461463
try {

storage-s3-dynamodb/src/main/java/io/delta/storage/ExternalCommitEntry.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,31 @@ public final class ExternalCommitEntry {
5151
*/
5252
public final Long expireTime;
5353

54+
/*
55+
* File size
56+
*/
57+
public final Long fileSize;
58+
59+
/*
60+
* file modification time
61+
*/
62+
public final Long modificationTime;
63+
5464
public ExternalCommitEntry(
5565
Path tablePath,
5666
String fileName,
5767
String tempPath,
5868
boolean complete,
59-
Long expireTime) {
69+
Long expireTime,
70+
Long fileSize,
71+
Long modificationTime) {
6072
this.tablePath = tablePath;
6173
this.fileName = fileName;
6274
this.tempPath = tempPath;
6375
this.complete = complete;
6476
this.expireTime = expireTime;
77+
this.fileSize = fileSize;
78+
this.modificationTime = modificationTime;
6579
}
6680

6781
/**
@@ -73,7 +87,9 @@ public ExternalCommitEntry asComplete(long expirationDelaySeconds) {
7387
this.fileName,
7488
this.tempPath,
7589
true,
76-
System.currentTimeMillis() / 1000L + expirationDelaySeconds
90+
System.currentTimeMillis() / 1000L + expirationDelaySeconds,
91+
this.fileSize,
92+
this.modificationTime
7793
);
7894
}
7995

storage-s3-dynamodb/src/main/java/io/delta/storage/S3DynamoDBLogStore.java

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
/*
23
* Copyright (2021) The Delta Lake Project Authors.
34
*
@@ -15,21 +16,31 @@
1516
*/
1617

1718
package io.delta.storage;
18-
19+
import io.delta.storage.internal.S3LogStoreUtil;
1920
import io.delta.storage.utils.ReflectionUtils;
2021
import org.apache.hadoop.fs.Path;
22+
import com.google.common.collect.Lists;
2123

2224
import java.io.InterruptedIOException;
2325
import java.io.UncheckedIOException;
26+
import java.net.URI;
27+
import java.util.ArrayList;
2428
import java.util.Arrays;
29+
import java.util.Collections;
30+
import java.util.Comparator;
31+
import java.util.HashMap;
32+
import java.util.Iterator;
2533
import java.util.List;
2634
import java.util.Map;
2735
import java.util.Optional;
2836
import java.util.concurrent.ConcurrentHashMap;
37+
import java.util.stream.Collectors;
38+
import java.util.stream.Stream;
2939
import java.io.IOException;
40+
import org.apache.hadoop.fs.FileStatus;
41+
import org.apache.hadoop.fs.FileSystem;
3042

3143
import org.apache.hadoop.conf.Configuration;
32-
3344
import com.amazonaws.auth.AWSCredentialsProvider;
3445
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient;
3546
import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
@@ -87,7 +98,6 @@ public class S3DynamoDBLogStore extends BaseExternalLogStore {
8798
public static final String DDB_CLIENT_CREDENTIALS_PROVIDER = "credentials.provider";
8899
public static final String DDB_CREATE_TABLE_RCU = "provisionedThroughput.rcu";
89100
public static final String DDB_CREATE_TABLE_WCU = "provisionedThroughput.wcu";
90-
91101
// WARNING: setting this value too low can cause data loss. Defaults to a duration of 1 day.
92102
public static final String TTL_SECONDS = "ddb.ttl";
93103

@@ -100,6 +110,8 @@ public class S3DynamoDBLogStore extends BaseExternalLogStore {
100110
private static final String ATTR_COMPLETE = "complete";
101111
private static final String ATTR_EXPIRE_TIME = "expireTime";
102112

113+
private static final String ATTR_FILE_SIZE = "fileSize";
114+
private static final String ATTR_MODI_TIME = "modificationTime";
103115
/**
104116
* Member fields
105117
*/
@@ -139,6 +151,84 @@ public S3DynamoDBLogStore(Configuration hadoopConf) throws IOException {
139151
tryEnsureTableExists(hadoopConf);
140152
}
141153

154+
private Iterator<FileStatus> mergeFileLists(
155+
List<FileStatus> list,
156+
List<FileStatus> listWithPrecedence) {
157+
final Map<Path, FileStatus> fileStatusMap = new HashMap<>();
158+
159+
// insert all elements from `listWithPrecedence` (highest priority)
160+
// and then insert elements from `list` if and only if that key doesn't already exist
161+
Stream.concat(listWithPrecedence.stream(), list.stream())
162+
.forEach(fs -> fileStatusMap.putIfAbsent(fs.getPath(), fs));
163+
164+
return fileStatusMap
165+
.values()
166+
.stream()
167+
.sorted(Comparator.comparing(a -> a.getPath().getName()))
168+
.iterator();
169+
}
170+
171+
/**
172+
* List files starting from `resolvedPath` (inclusive) in the same directory.
173+
*/
174+
private List<FileStatus> listFromCache(
175+
FileSystem fs,
176+
Path resolvedPath) {
177+
final Path pathKey = super.stripUserInfo(resolvedPath);
178+
final Path tablePath = getTablePath(resolvedPath);
179+
180+
final Map<String, Condition> conditions = new ConcurrentHashMap<>();
181+
conditions.put(
182+
ATTR_TABLE_PATH,
183+
new Condition()
184+
.withComparisonOperator(ComparisonOperator.EQ)
185+
.withAttributeValueList(new AttributeValue(tablePath.toString()))
186+
);
187+
conditions.put(ATTR_FILE_NAME, new Condition().withComparisonOperator(ComparisonOperator.GE)
188+
.withAttributeValueList(new AttributeValue(pathKey.getName())));
189+
190+
final List<Map<String,AttributeValue>> items = client.query(
191+
new QueryRequest(tableName)
192+
.withConsistentRead(true)
193+
.withScanIndexForward(false)
194+
.withLimit(1)
195+
.withKeyConditions(conditions)
196+
).getItems();
197+
198+
List<FileStatus> statuses = new ArrayList<FileStatus>();
199+
items.forEach(item -> {
200+
ExternalCommitEntry entry = dbResultToCommitEntry(item);
201+
if (entry.complete == true) {
202+
long fileSize = entry.fileSize != null ? entry.fileSize : 0L;
203+
long modificationTime = entry.modificationTime != null ? entry.modificationTime : System.currentTimeMillis();
204+
statuses.add(
205+
new FileStatus(
206+
entry.fileSize,
207+
false,
208+
1,
209+
fileSize,
210+
modificationTime,
211+
entry.absoluteFilePath()
212+
));
213+
}
214+
});
215+
return statuses;
216+
}
217+
218+
@Override
219+
public Iterator<FileStatus> listFrom(Path path, Configuration hadoopConf) throws IOException {
220+
final FileSystem fs = path.getFileSystem(hadoopConf);
221+
final Path resolvedPath = stripUserInfo(fs.makeQualified(path));
222+
223+
final List<FileStatus> listedFromFs = Lists.newArrayList(super.listFrom(path, hadoopConf));
224+
225+
// add this to list the completed entry from external cache
226+
// on the occasion that the filesystem could not provide strong consistency
227+
final List<FileStatus> listedFromCache = listFromCache(fs, resolvedPath);
228+
229+
return mergeFileLists(listedFromCache, listedFromFs);
230+
}
231+
142232
@Override
143233
public CloseableIterator<String> read(Path path, Configuration hadoopConf) throws IOException {
144234
// With many concurrent readers/writers, there's a chance that concurrent 'recovery'
@@ -222,11 +312,16 @@ protected Optional<ExternalCommitEntry> getLatestExternalEntry(Path tablePath) {
222312
*/
223313
private ExternalCommitEntry dbResultToCommitEntry(Map<String, AttributeValue> item) {
224314
final AttributeValue expireTimeAttr = item.get(ATTR_EXPIRE_TIME);
315+
Long fileSize = item.get(ATTR_FILE_SIZE) != null ? Long.parseLong(item.get(ATTR_FILE_SIZE).getN()) : 0;
316+
Long modiTime = item.get(ATTR_MODI_TIME) != null ? Long.parseLong(item.get(ATTR_MODI_TIME).getN()) : System.currentTimeMillis() / 1000;
317+
225318
return new ExternalCommitEntry(
226319
new Path(item.get(ATTR_TABLE_PATH).getS()),
227320
item.get(ATTR_FILE_NAME).getS(),
228321
item.get(ATTR_TEMP_PATH).getS(),
229322
item.get(ATTR_COMPLETE).getS().equals("true"),
323+
fileSize,
324+
modiTime,
230325
expireTimeAttr != null ? Long.parseLong(expireTimeAttr.getN()) : null
231326
);
232327
}
@@ -236,6 +331,11 @@ private PutItemRequest createPutItemRequest(ExternalCommitEntry entry, boolean o
236331
attributes.put(ATTR_TABLE_PATH, new AttributeValue(entry.tablePath.toString()));
237332
attributes.put(ATTR_FILE_NAME, new AttributeValue(entry.fileName));
238333
attributes.put(ATTR_TEMP_PATH, new AttributeValue(entry.tempPath));
334+
attributes.put(ATTR_FILE_SIZE, new AttributeValue().withN(
335+
String.valueOf(entry.fileSize != null ? entry.fileSize : 0L)));
336+
attributes.put(ATTR_MODI_TIME, new AttributeValue().withN(
337+
String.valueOf(entry.modificationTime != null ? entry.modificationTime : System.currentTimeMillis())
338+
));
239339
attributes.put(
240340
ATTR_COMPLETE,
241341
new AttributeValue().withS(Boolean.toString(entry.complete))

0 commit comments

Comments
 (0)