@@ -5,15 +5,11 @@ package io.qbeast.spark.delta
5
5
6
6
import io .qbeast .core .model ._
7
7
import io .qbeast .spark .delta .QbeastMetadataSQL ._
8
+ import io .qbeast .spark .utils .State .FLOODED
8
9
import io .qbeast .spark .utils .TagColumns
9
- import org .apache .spark .sql .Dataset
10
- import org .apache .spark .sql .SparkSession
11
10
import org .apache .spark .sql .delta .actions .AddFile
12
- import org .apache .spark .sql .functions .col
13
- import org .apache .spark .sql .functions .collect_list
14
- import org .apache .spark .sql .functions .lit
15
- import org .apache .spark .sql .functions .min
16
- import org .apache .spark .sql .functions .sum
11
+ import org .apache .spark .sql .functions .{col , collect_list , lit , min , sum }
12
+ import org .apache .spark .sql .{Dataset , SparkSession }
17
13
18
14
import scala .collection .immutable .SortedMap
19
15
@@ -24,7 +20,11 @@ import scala.collection.immutable.SortedMap
24
20
* @param announcedSet the announced set available for the revision
25
21
* @param replicatedSet the replicated set available for the revision
26
22
*/
27
- private [delta] class IndexStatusBuilder (qbeastSnapshot : DeltaQbeastSnapshot , revision : Revision )
23
+ private [delta] class IndexStatusBuilder (
24
+ qbeastSnapshot : DeltaQbeastSnapshot ,
25
+ revision : Revision ,
26
+ replicatedSet : ReplicatedSet ,
27
+ announcedSet : Set [CubeId ] = Set .empty)
28
28
extends Serializable
29
29
with StagingUtils {
30
30
@@ -37,19 +37,15 @@ private[delta] class IndexStatusBuilder(qbeastSnapshot: DeltaQbeastSnapshot, rev
37
37
qbeastSnapshot.loadRevisionBlocks(revision.revisionID)
38
38
39
39
def build (): IndexStatus = {
40
- val cubeStatuses =
40
+ val cubeStatus =
41
41
if (isStaging(revision)) stagingCubeStatuses
42
42
else buildCubesStatuses
43
43
44
- val (replicatedSet, announcedSet): (Set [CubeId ], Set [CubeId ]) =
45
- if (isStaging(revision)) (Set .empty, Set .empty)
46
- else buildReplicatedAndAnnouncedSets(cubeStatuses)
47
-
48
44
IndexStatus (
49
45
revision = revision,
50
46
replicatedSet = replicatedSet,
51
47
announcedSet = announcedSet,
52
- cubesStatuses = cubeStatuses )
48
+ cubesStatuses = cubeStatus )
53
49
}
54
50
55
51
def stagingCubeStatuses : SortedMap [CubeId , CubeStatus ] = {
@@ -64,7 +60,7 @@ private[delta] class IndexStatusBuilder(qbeastSnapshot: DeltaQbeastSnapshot, rev
64
60
revision.revisionID,
65
61
Weight .MinValue ,
66
62
maxWeight,
67
- false ,
63
+ FLOODED ,
68
64
0 ,
69
65
addFile.size,
70
66
addFile.modificationTime))
@@ -95,34 +91,13 @@ private[delta] class IndexStatusBuilder(qbeastSnapshot: DeltaQbeastSnapshot, rev
95
91
.select(
96
92
createCube(col(" cube" ), lit(ndims)).as(" cubeId" ),
97
93
col(" maxWeight" ),
98
- normalizeWeight(col(" maxWeight" ), col(" elementCount" ), lit(rev.desiredCubeSize))
99
- .as( " normalizedWeight" ),
94
+ normalizeWeight(col(" maxWeight" ), col(" elementCount" ), lit(rev.desiredCubeSize)).as(
95
+ " normalizedWeight" ),
100
96
col(" files" ))
101
97
.as[CubeStatus ]
102
98
.collect()
103
99
.foreach(row => builder += row.cubeId -> row)
104
100
builder.result()
105
101
}
106
102
107
- def buildReplicatedAndAnnouncedSets (
108
- cubeStatuses : Map [CubeId , CubeStatus ]): (Set [CubeId ], Set [CubeId ]) = {
109
- val replicatedSet = Set .newBuilder[CubeId ]
110
- val announcedSet = Set .newBuilder[CubeId ]
111
- cubeStatuses.foreach { case (id, status) =>
112
- var hasReplicated = false
113
- var hasUnreplicated = false
114
- status.files.foreach(file =>
115
- if (file.replicated) hasReplicated = true
116
- else hasUnreplicated = true )
117
- if (hasReplicated) {
118
- if (hasUnreplicated) {
119
- announcedSet += id
120
- } else {
121
- replicatedSet += id
122
- }
123
- }
124
- }
125
- (replicatedSet.result(), announcedSet.result())
126
- }
127
-
128
103
}
0 commit comments