15
15
*/
16
16
package com .google .swarm .tokenization ;
17
17
18
+ import com .google .swarm .tokenization .common .AuditInspectDataTransform ;
19
+ import com .google .swarm .tokenization .common .BQWriteTransform ;
18
20
import com .google .swarm .tokenization .common .DLPTransform ;
19
21
import com .google .swarm .tokenization .common .FileReaderTransform ;
22
+ import com .google .swarm .tokenization .common .RowToJson ;
20
23
import com .google .swarm .tokenization .common .S3ReaderOptions ;
24
+ import com .google .swarm .tokenization .common .Util ;
21
25
import org .apache .beam .sdk .Pipeline ;
22
26
import org .apache .beam .sdk .PipelineResult ;
27
+ import org .apache .beam .sdk .io .gcp .bigquery .BigQueryIO ;
28
+ import org .apache .beam .sdk .io .gcp .pubsub .PubsubIO ;
23
29
import org .apache .beam .sdk .options .PipelineOptionsFactory ;
24
30
import org .apache .beam .sdk .values .KV ;
25
31
import org .apache .beam .sdk .values .PCollection ;
26
32
import org .apache .beam .sdk .values .PCollectionTuple ;
33
+ import org .apache .beam .sdk .values .Row ;
27
34
import org .slf4j .Logger ;
28
35
import org .slf4j .LoggerFactory ;
29
36
@@ -33,7 +40,6 @@ public class DLPS3ScannerPipeline {
33
40
public static void main (String [] args ) {
34
41
S3ReaderOptions options =
35
42
PipelineOptionsFactory .fromArgs (args ).withValidation ().as (S3ReaderOptions .class );
36
- // options.setEnableStreamingEngine(true);
37
43
run (options );
38
44
}
39
45
@@ -48,23 +54,7 @@ public static PipelineResult run(S3ReaderOptions options) {
48
54
.setDelimeter (options .getDelimeter ())
49
55
.setKeyRange (options .getKeyRange ())
50
56
.build ());
51
- // .apply(
52
- // "Fixed Window",
53
- // Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(10)))
54
- // .triggering(
55
- //
56
- // AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.ZERO))
57
- // .discardingFiredPanes()
58
- // .withAllowedLateness(Duration.ZERO));
59
57
60
- // nonInspectedContents.apply("Print", ParDo.of(new DoFn<KV<String,String>, String>(){
61
- //
62
- // @ProcessElement
63
- // public void processElement(ProcessContext c) {
64
- // c.output(c.element().getValue());
65
- // }
66
- // }));
67
- //
68
58
PCollectionTuple inspectedData =
69
59
nonInspectedContents .apply (
70
60
"DLPScanner" ,
@@ -74,37 +64,37 @@ public static PipelineResult run(S3ReaderOptions options) {
74
64
.setBatchSize (options .getBatchSize ())
75
65
.build ());
76
66
77
- // PCollection<Row> inspectedContents =
78
- // inspectedData.get(Util.inspectData).setRowSchema(Util.bqDataSchema);
79
- //
80
- // PCollection<Row> inspectedStats =
81
- // inspectedData.get(Util.auditData).setRowSchema(Util.bqAuditSchema);
82
- //
83
- // PCollection<Row> auditData =
84
- // inspectedStats
85
- // .apply("FileTrackerTransform", new AuditInspectDataTransform())
86
- // .setRowSchema(Util.bqAuditSchema);
87
- //
88
- // auditData.apply(
89
- // "WriteAuditData",
90
- // BigQueryIO.<Row>write()
91
- // .to(options.getAuditTableSpec())
92
- // .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
93
- // .useBeamSchema()
94
- // .withoutValidation()
95
- // .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
96
- // .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER));
97
- //
98
- // auditData
99
- // .apply("RowToJson", new RowToJson())
100
- // .apply("WriteToTopic", PubsubIO.writeStrings().to(options.getTopic()));
67
+ PCollection <Row > inspectedContents =
68
+ inspectedData .get (Util .inspectData ).setRowSchema (Util .bqDataSchema );
101
69
102
- // inspectedContents.apply(
103
- // "WriteInspectData",
104
- // BQWriteTransform.newBuilder()
105
- // .setTableSpec(options.getTableSpec())
106
- // .setMethod(options.getWriteMethod())
107
- // .build());
70
+ PCollection <Row > inspectedStats =
71
+ inspectedData .get (Util .auditData ).setRowSchema (Util .bqAuditSchema );
72
+
73
+ PCollection <Row > auditData =
74
+ inspectedStats
75
+ .apply ("FileTrackerTransform" , new AuditInspectDataTransform ())
76
+ .setRowSchema (Util .bqAuditSchema );
77
+
78
+ auditData .apply (
79
+ "WriteAuditData" ,
80
+ BigQueryIO .<Row >write ()
81
+ .to (options .getAuditTableSpec ())
82
+ .withMethod (BigQueryIO .Write .Method .STREAMING_INSERTS )
83
+ .useBeamSchema ()
84
+ .withoutValidation ()
85
+ .withWriteDisposition (BigQueryIO .Write .WriteDisposition .WRITE_APPEND )
86
+ .withCreateDisposition (BigQueryIO .Write .CreateDisposition .CREATE_NEVER ));
87
+
88
+ auditData
89
+ .apply ("RowToJson" , new RowToJson ())
90
+ .apply ("WriteToTopic" , PubsubIO .writeStrings ().to (options .getTopic ()));
91
+
92
+ inspectedContents .apply (
93
+ "WriteInspectData" ,
94
+ BQWriteTransform .newBuilder ()
95
+ .setTableSpec (options .getTableSpec ())
96
+ .setMethod (options .getWriteMethod ())
97
+ .build ());
108
98
return p .run ();
109
99
}
110
100
}
0 commit comments