@@ -20,6 +20,7 @@ import (
2020 "strings"
2121 "time"
2222
23+ "cloud.google.com/go/civil"
2324 gcs "cloud.google.com/go/storage"
2425 "google.golang.org/api/option"
2526
@@ -44,6 +45,7 @@ type GCSSource struct {
4445 io.Closer // Closer interface to be provided by an embedded struct.
4546 RetryBaseTime time.Duration // The base time for backoff and retry.
4647 TableBase string // TableBase is BQ table associated with this source, or "invalid".
48+ PathDate civil.Date // Date associated with YYYY/MM/DD in FilePath.
4749}
4850
4951// Retrieve next file header.
@@ -125,6 +127,11 @@ func (src *GCSSource) Detail() string {
125127 return src .FilePath
126128}
127129
130+ // Date returns a civil.Date associated with the GCSSource archive path.
131+ func (src * GCSSource ) Date () civil.Date {
132+ return src .PathDate
133+ }
134+
128135// NextTest reads the next test object from the tar file.
129136// Skips reading contents of any file larger than maxSize, returning empty data
130137// and storage.ErrOversizeFile.
@@ -218,25 +225,30 @@ var errNoClient = errors.New("client should be non-null")
218225//
219226// uri should be of form gs://bucket/filename.tar or gs://bucket/filename.tgz
220227// FYI Using a persistent client saves about 80 msec, and 220 allocs, totalling 70kB.
221- func NewTestSource (client * gcs.Client , uri string , label string ) (etl.TestSource , error ) {
228+ func NewTestSource (client * gcs.Client , dp etl. DataPath , label string ) (etl.TestSource , error ) {
222229 if client == nil {
223230 return nil , errNoClient
224231 }
225232 // For now only handle gcs paths.
226- if ! strings .HasPrefix (uri , "gs://" ) {
227- return nil , errors .New ("invalid file path: " + uri )
233+ if ! strings .HasPrefix (dp . URI , "gs://" ) {
234+ return nil , errors .New ("invalid file path: " + dp . URI )
228235 }
229- parts := strings .SplitN (uri , "/" , 4 )
236+ parts := strings .SplitN (dp . URI , "/" , 4 )
230237 if len (parts ) != 4 {
231- return nil , errors .New ("invalid file path: " + uri )
238+ return nil , errors .New ("invalid file path: " + dp . URI )
232239 }
233240 bucket := parts [2 ]
234241 fn := parts [3 ]
235242
243+ archiveDate , err := time .Parse ("2006/01/02" , dp .DatePath )
244+ if err != nil {
245+ return nil , fmt .Errorf ("failed to parse archive date path: %w" , err )
246+ }
247+
236248 // TODO - consider just always testing for valid gzip file.
237249 if ! (strings .HasSuffix (fn , ".tgz" ) || strings .HasSuffix (fn , ".tar" ) ||
238250 strings .HasSuffix (fn , ".tar.gz" )) {
239- return nil , errors .New ("not tar or tgz: " + uri )
251+ return nil , errors .New ("not tar or tgz: " + dp . URI )
240252 }
241253
242254 // TODO(prod) Evaluate whether this is long enough.
@@ -268,7 +280,15 @@ func NewTestSource(client *gcs.Client, uri string, label string) (etl.TestSource
268280 tarReader := tar .NewReader (rdr )
269281
270282 baseTimeout := 16 * time .Millisecond
271- return & GCSSource {uri , tarReader , closer , baseTimeout , label }, nil
283+ gcs := & GCSSource {
284+ FilePath : dp .URI ,
285+ TarReader : tarReader ,
286+ Closer : closer ,
287+ RetryBaseTime : baseTimeout ,
288+ TableBase : label ,
289+ PathDate : civil .DateOf (archiveDate ),
290+ }
291+ return gcs , nil
272292}
273293
274294// GetStorageClient provides a storage reader client.
@@ -304,7 +324,7 @@ func (sf *gcsSourceFactory) Get(ctx context.Context, dp etl.DataPath) (etl.TestS
304324 http .StatusInternalServerError , etl .ErrBadDataType )
305325 }
306326
307- tr , err := NewTestSource (sf .client , dp . URI , label )
327+ tr , err := NewTestSource (sf .client , dp , label )
308328 if err != nil {
309329 log .Printf ("Error opening gcs file: %v" , err )
310330 // TODO - anything better we could do here?
0 commit comments