20
20
21
21
import org .apache .iceberg .expressions .Expression ;
22
22
import org .apache .iceberg .expressions .ResidualEvaluator ;
23
+ import org .apache .iceberg .relocated .com .google .common .base .MoreObjects ;
23
24
24
- class UnboundBaseFileScanTask extends BaseFileScanTask {
25
- private UnboundGenericDataFile unboundDataFile ;
26
- private UnboundGenericDeleteFile [] unboundDeleteFiles ;
25
+ public class UnboundBaseFileScanTask extends BaseFileScanTask {
26
+ private DataFile unboundDataFile ;
27
+ private DeleteFile [] unboundDeleteFiles ;
27
28
private Expression filter ;
28
29
29
- UnboundBaseFileScanTask (
30
- UnboundGenericDataFile unboundDataFile ,
31
- UnboundGenericDeleteFile [] unboundDeleteFiles ,
32
- Expression filter ) {
30
+ public UnboundBaseFileScanTask (
31
+ DataFile unboundDataFile , DeleteFile [] unboundDeleteFiles , Expression filter ) {
33
32
super (unboundDataFile , unboundDeleteFiles , null , null , ResidualEvaluator .unpartitioned (filter ));
34
33
this .unboundDataFile = unboundDataFile ;
35
34
this .unboundDeleteFiles = unboundDeleteFiles ;
@@ -46,11 +45,74 @@ public PartitionSpec spec() {
46
45
throw new UnsupportedOperationException ("spec() is not supported in UnboundBaseFileScanTask" );
47
46
}
48
47
48
+ @ Override
49
+ public String toString () {
50
+ return MoreObjects .toStringHelper (this )
51
+ .add ("unboundDataFile" , unboundDataFile )
52
+ .add ("unboundDeleteFiles" , unboundDeleteFiles )
53
+ .add ("filter" , filter )
54
+ .toString ();
55
+ }
56
+
49
57
public FileScanTask bind (PartitionSpec spec , boolean caseSensitive ) {
50
- GenericDataFile boundDataFile = unboundDataFile .bindToSpec (spec );
58
+ // TODO before creating a new task
59
+ // need to ensure that dataFile is refreshed with correct partitionData using spec
60
+ // need to ensure deleteFiles is refreshed with spec info
61
+ // need to ensure residual refreshed with spec.
62
+
63
+ Metrics dataFileMetrics =
64
+ new Metrics (
65
+ unboundDataFile .recordCount (),
66
+ unboundDataFile .columnSizes (),
67
+ unboundDataFile .valueCounts (),
68
+ unboundDataFile .nullValueCounts (),
69
+ unboundDataFile .nanValueCounts ());
70
+ PartitionData partitionData = new PartitionData (spec .partitionType ());
71
+
72
+ GenericDataFile boundDataFile =
73
+ new GenericDataFile (
74
+ spec .specId (),
75
+ (String ) unboundDataFile .path (),
76
+ unboundDataFile .format (),
77
+ partitionData ,
78
+ unboundDataFile .fileSizeInBytes (),
79
+ dataFileMetrics ,
80
+ unboundDataFile .keyMetadata (),
81
+ unboundDataFile .splitOffsets (),
82
+ unboundDataFile .sortOrderId ());
83
+
51
84
DeleteFile [] boundDeleteFiles = new DeleteFile [unboundDeleteFiles .length ];
52
85
for (int i = 0 ; i < unboundDeleteFiles .length ; i ++) {
53
- boundDeleteFiles [i ] = unboundDeleteFiles [i ].bindToSpec (spec );
86
+ DeleteFile deleteFile = unboundDeleteFiles [i ];
87
+ Metrics deleteFileMetrics =
88
+ new Metrics (
89
+ deleteFile .recordCount (),
90
+ deleteFile .columnSizes (),
91
+ deleteFile .valueCounts (),
92
+ deleteFile .nullValueCounts (),
93
+ deleteFile .nanValueCounts ());
94
+
95
+ int [] equalityDeletes = null ;
96
+ if (deleteFile .equalityFieldIds () != null ) {
97
+ equalityDeletes =
98
+ deleteFile .equalityFieldIds ().stream ().mapToInt (Integer ::intValue ).toArray ();
99
+ }
100
+
101
+ DeleteFile genericDeleteFile =
102
+ new GenericDeleteFile (
103
+ spec .specId (),
104
+ deleteFile .content (),
105
+ (String ) deleteFile .path (),
106
+ deleteFile .format (),
107
+ partitionData ,
108
+ deleteFile .fileSizeInBytes (),
109
+ deleteFileMetrics ,
110
+ equalityDeletes ,
111
+ deleteFile .sortOrderId (),
112
+ deleteFile .splitOffsets (),
113
+ deleteFile .keyMetadata ());
114
+
115
+ boundDeleteFiles [i ] = genericDeleteFile ;
54
116
}
55
117
56
118
String schemaString = SchemaParser .toJson (spec .schema ());
0 commit comments