@@ -88,8 +88,7 @@ func (h *runner) PrepareTransform(tid string, t *pipepb.PTransform, comps *pipep
88
88
}
89
89
90
90
func (h * runner ) handleFlatten (tid string , t * pipepb.PTransform , comps * pipepb.Components ) prepareResult {
91
- if ! h .config .SDKFlatten {
92
- t .EnvironmentId = "" // force the flatten to be a runner transform due to configuration.
91
+ if ! h .config .SDKFlatten && ! strings .HasPrefix (tid , "ft_" ) {
93
92
forcedRoots := []string {tid } // Have runner side transforms be roots.
94
93
95
94
// Force runner flatten consumers to be roots.
@@ -109,52 +108,48 @@ func (h *runner) handleFlatten(tid string, t *pipepb.PTransform, comps *pipepb.C
109
108
// they're written out to the runner in the same fashion.
110
109
// This may stop being necessary once Flatten Unzipping happens in the optimizer.
111
110
outPCol := comps .GetPcollections ()[outColID ]
112
- outCoderID := outPCol .CoderId
113
- outCoder := comps .GetCoders ()[outCoderID ]
114
- coderSubs := map [string ]* pipepb.Coder {}
115
111
pcollSubs := map [string ]* pipepb.PCollection {}
112
+ tSubs := map [string ]* pipepb.PTransform {}
116
113
117
- if ! strings .HasPrefix (outCoderID , "cf_" ) {
118
- // Create a new coder id for the flatten output PCollection and use
119
- // this coder id for all input PCollections
120
- outCoderID = "cf_" + outColID
121
- outCoder = proto .Clone (outCoder ).(* pipepb.Coder )
122
- coderSubs [outCoderID ] = outCoder
123
-
124
- pcollSubs [outColID ] = proto .Clone (outPCol ).(* pipepb.PCollection )
125
- pcollSubs [outColID ].CoderId = outCoderID
126
-
127
- outPCol = pcollSubs [outColID ]
128
- }
129
-
130
- for _ , p := range t .GetInputs () {
114
+ ts := proto .Clone (t ).(* pipepb.PTransform )
115
+ ts .EnvironmentId = "" // force the flatten to be a runner transform due to configuration.
116
+ for localID , p := range t .GetInputs () {
131
117
inPCol := comps .GetPcollections ()[p ]
132
118
if inPCol .CoderId != outPCol .CoderId {
133
- if strings .HasPrefix (inPCol .CoderId , "cf_" ) {
134
- // The input pcollection is the output of another flatten:
135
- // e.g. [[a, b] | Flatten], c] | Flatten
136
- // In this case, we just point the input coder id to the new flatten
137
- // output coder, so any upstream input pcollections will use the new
138
- // output coder.
139
- coderSubs [inPCol .CoderId ] = outCoder
140
- } else {
141
- // Create a substitute PCollection for this input with the flatten
142
- // output coder id
143
- pcollSubs [p ] = proto .Clone (inPCol ).(* pipepb.PCollection )
144
- pcollSubs [p ].CoderId = outPCol .CoderId
145
- }
119
+ // TODO: do the following injection conditionally.
120
+ // Now we inject an SDK-side flatten between the upstream transform and
121
+ // the flatten.
122
+ // Before: upstream -> [upstream out] -> runner flatten
123
+ // After: upstream -> [upstream out] -> SDK-side flatten -> [SDK-side flatten out] -> runner flatten
124
+ // Create a PCollection sub
125
+ fColID := "fc_" + p + "_to_" + outColID
126
+ fPCol := proto .Clone (outPCol ).(* pipepb.PCollection )
127
+ fPCol .CoderId = outPCol .CoderId // same coder as runner flatten
128
+ pcollSubs [fColID ] = fPCol
129
+
130
+ // Create a PTransform sub
131
+ ftID := "ft_" + p + "_to_" + outColID
132
+ ft := proto .Clone (t ).(* pipepb.PTransform )
133
+ ft .EnvironmentId = t .EnvironmentId // Set environment to ensure it is a SDK-side transform
134
+ ft .Inputs = map [string ]string {"0" : p }
135
+ ft .Outputs = map [string ]string {"0" : fColID }
136
+ tSubs [ftID ] = ft
137
+
138
+ // Replace the input of runner flatten with the output of SDK-side flatten
139
+ ts .Inputs [localID ] = fColID
140
+
141
+ // Force sdk-side flattens to be roots
142
+ forcedRoots = append (forcedRoots , ftID )
146
143
}
147
144
}
145
+ tSubs [tid ] = ts
148
146
149
147
// Return the new components which is the transforms consumer
150
148
return prepareResult {
151
149
// We sub this flatten with itself, to not drop it.
152
150
SubbedComps : & pipepb.Components {
153
- Transforms : map [string ]* pipepb.PTransform {
154
- tid : t ,
155
- },
151
+ Transforms : tSubs ,
156
152
Pcollections : pcollSubs ,
157
- Coders : coderSubs ,
158
153
},
159
154
RemovedLeaves : nil ,
160
155
ForcedRoots : forcedRoots ,
0 commit comments