38
38
GCP_PROJECT_ID : apache-beam-testing
39
39
GCP_REGION : us-central1
40
40
GCS_TEMP_LOCATION : gs://rc-validation-migration-tests/temp/
41
+ GCS_STAGING_LOCATION : gs://rc-validation-migration-tests/staging/
41
42
GCS_INPUT_PATH : gs://apache-beam-samples/shakespeare/kinglear.txt
42
43
43
44
jobs :
44
- setup :
45
+ validate-rc-package :
45
46
runs-on : self-hosted
46
47
steps :
47
48
- name : Checkout repository
@@ -52,43 +53,33 @@ jobs:
52
53
with :
53
54
go-version : default
54
55
55
- - name : Fetch Go SDK RC and Tidy Modules
56
- working-directory : ./sdks/go/examples/wordcount
56
+ - name : Setup Go Module and Fetch RC
57
+ id : setup_go
57
58
run : |
58
- go get -d github.com/apache/beam/sdks/v2@${{ github.event.inputs.rc_tag }}
59
+ TEMP_DIR="go-rc-test-${{ github.run_id }}"
60
+ mkdir $TEMP_DIR
61
+ wget -O $TEMP_DIR/wordcount.go https://raw.githubusercontent.com/apache/beam/refs/heads/master/sdks/go/examples/wordcount/wordcount.go
62
+ cd $TEMP_DIR
63
+ go mod init rc-test
64
+ go get github.com/apache/beam/sdks/v2/go/pkg/beam@${{ github.event.inputs.rc_tag }}
59
65
go mod tidy
66
+ echo "work_dir=$TEMP_DIR" >> $GITHUB_OUTPUT # Output relative path
60
67
61
-
62
- validate-go-rc-prism :
63
- needs : setup
64
- runs-on : self-hosted # Changed to self-hosted
65
- steps :
66
- - name : Checkout repository
67
- uses : actions/checkout@v4
68
-
69
- - name : Set up environment
70
- uses : ./.github/actions/setup-environment-action
71
- with :
72
- go-version : default
73
-
74
-
75
- # Assuming gcloud/gsutil is available and authenticated on the self-hosted runner
68
+ # --- Prism Steps ---
76
69
- name : Download Input File from GCS (Prism)
77
- working-directory : ./sdks/go/examples/wordcount
70
+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
78
71
run : gsutil cp ${{ env.GCS_INPUT_PATH }} ./kinglear.txt
79
72
80
73
- name : Run Go WordCount with PrismRunner
81
- working-directory : ./sdks/go/examples/wordcount
74
+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
82
75
run : |
83
76
go run wordcount.go \
84
77
--input ./kinglear.txt \
85
78
--output ./output_prism.txt \
86
- --runner=PrismRunner \
87
- --environment_type=DOCKER \
88
- --environment_config=apache/beam_go_sdk:${{ github.event.inputs.container_tag }}
79
+ --runner=PrismRunner
89
80
90
- - name : Check output file
91
- working-directory : ./sdks/go/examples/wordcount
81
+ - name : Check Prism output file
82
+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
92
83
run : |
93
84
echo "--- PrismRunner WordCount Output ---"
94
85
cat output_prism.txt* # Output might be sharded
@@ -101,22 +92,9 @@ jobs:
101
92
exit 1
102
93
fi
103
94
104
- validate-go-rc-dataflow :
105
- needs : setup
106
- runs-on : self-hosted # Changed to self-hosted
107
- steps :
108
- - name : Checkout repository
109
- uses : actions/checkout@v4
110
-
111
- - name : Set up environment
112
- uses : ./.github/actions/setup-environment-action
113
- with :
114
- go-version : default
115
-
116
-
117
- # Assuming gcloud is available and authenticated on the self-hosted runner
95
+ # --- Dataflow Steps ---
118
96
- name : Run Go WordCount with DataflowRunner
119
- working-directory : ./sdks/go/examples/wordcount
97
+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
120
98
env :
121
99
# Define output path based on constant prefix and RC tag for uniqueness
122
100
GCS_OUTPUT_PATH : ${{ env.GCS_OUTPUT_PREFIX }}/${{ github.event.inputs.rc_tag }}/dataflow/output
@@ -129,11 +107,31 @@ jobs:
129
107
--project=${{ env.GCP_PROJECT_ID }} \
130
108
--region=${{ env.GCP_REGION }} \
131
109
--temp_location=${{ env.GCS_TEMP_LOCATION }} \
110
+ --staging_location=${{ env.GCS_STAGING_LOCATION }} \
132
111
--environment_type=DOCKER \
133
112
--environment_config=apache/beam_go_sdk:${{ github.event.inputs.container_tag }}
134
113
135
- # Note: Checking Dataflow output requires gcloud storage commands and depends on job completion.
136
- # This basic workflow focuses on submission. A more robust check would poll the job status
137
- # and then verify GCS output, which is significantly more complex.
138
- - name : Log Dataflow Job Submission Info
139
- run : echo "Dataflow job submitted. Check GCP console (project ${{ env.GCP_PROJECT_ID }}) for status and output at ${{ env.GCS_OUTPUT_PREFIX }}/${{ github.event.inputs.rc_tag }}/dataflow/output"
114
+ - name : Check Dataflow Output in GCS
115
+ working-directory : ./${{ steps.setup_go.outputs.work_dir }} # Added working directory for consistency, though not strictly needed for gsutil
116
+ env :
117
+ # Re-define the output path pattern for checking
118
+ GCS_OUTPUT_PATH_PATTERN : ${{ env.GCS_OUTPUT_PREFIX }}/${{ github.event.inputs.rc_tag }}/dataflow/output*
119
+ run : |
120
+ echo "Checking for Dataflow output files in GCS at: $GCS_OUTPUT_PATH_PATTERN"
121
+ # Use gsutil stat. The -q flag suppresses errors for non-existent files,
122
+ # allowing us to check the exit code. Exit code 0 means found, 1 means not found.
123
+ if gsutil -q stat $GCS_OUTPUT_PATH_PATTERN; then
124
+ echo "Output files found in GCS."
125
+ FILE_COUNT=$(gsutil ls $GCS_OUTPUT_PATH_PATTERN | wc -l)
126
+ if [ "$FILE_COUNT" -gt 0 ]; then echo "Found $FILE_COUNT output file(s)."; else echo "Error: Output path exists but contains no files."; exit 1; fi
127
+ else
128
+ echo "Error: Output files not found in GCS at $GCS_OUTPUT_PATH_PATTERN"
129
+ exit 1
130
+ fi
131
+
132
+ - name : Cleanup Temporary Directory
133
+ if : always() # Ensure cleanup runs even if previous steps fail
134
+ working-directory : ./ # Run from the root workspace dir
135
+ run : |
136
+ echo "Cleaning up temporary directory: ${{ steps.setup_go.outputs.work_dir }}"
137
+ rm -rf ${{ steps.setup_go.outputs.work_dir }}
0 commit comments