Skip to content

Commit 60bdac4

Browse files
committed
Update partition logic explanation
1 parent 6eb9b6b commit 60bdac4

File tree

1 file changed

+12
-10
lines changed

1 file changed

+12
-10
lines changed

Diff for: README.md

+12-10
Original file line numberDiff line numberDiff line change
@@ -78,18 +78,20 @@ int partitionSizeByOneTask = totalFileLength / approximateNumPartitions;
7878
...
7979
*/
8080

81-
int numPartitions;
82-
if (path.toString().endsWith(".gz") || path.toString().endsWith(".bz2") || path.toString().endsWith(".lzo")) {
83-
// if the file is compressed, skip partitioning.
84-
numPartitions = 1;
81+
long numPartitions;
82+
if (task.getPartition()) {
83+
if (file.canDecompress()) {
84+
numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
85+
}
86+
else if (file.getCodec() != null) { // if not null, the file is compressed.
87+
numPartitions = 1;
88+
}
89+
else {
90+
numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
91+
}
8592
}
86-
else if (!task.getPartition()) {
87-
// if no partition mode, skip partitioning.
88-
numPartitions = 1;
89-
}
9093
else {
91-
// equalize the file size per task as much as possible.
92-
numPartitions = ((fileLength - 1) / partitionSizeByOneTask) + 1;
94+
numPartitions = 1;
9395
}
9496

9597
/*

0 commit comments

Comments
 (0)