Skip to content

Commit 52e50c2

Browse files
author
Doug Schmidt
authored
Merge pull request #303 from DougSchmidt-AI/feature/PF-1417-PointZillaHistoricalTimezones
PF-1417 - Added more support for NWIS text files
2 parents 0bc6d18 + 37dbab9 commit 52e50c2

File tree

7 files changed

+316
-32
lines changed

7 files changed

+316
-32
lines changed

TimeSeries/PublicApis/SdkExamples/PointZilla/Context.cs

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ public class Context
9494
public Field CsvTimezoneField { get; set; }
9595
public string CsvComment { get; set; }
9696
public int CsvSkipRows { get; set; }
97+
public int CsvSkipRowsAfterHeader { get; set; }
9798
public bool CsvHasHeaderRow { get; set; }
9899
public bool CsvIgnoreInvalidRows { get; set; }
99100
public string CsvHeaderStartsWith { get; set; }

TimeSeries/PublicApis/SdkExamples/PointZilla/Field.cs

+42-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Text.RegularExpressions;
23

34
namespace PointZilla
45
{
@@ -17,14 +18,41 @@ public static Field Parse(string text, string fieldName)
1718
throw new Exception($"{fieldName}: {text} is an invalid index. Use a positive integer or a named column.");
1819
}
1920

21+
var match = FieldRegex.Match(text);
22+
23+
if (match.Success)
24+
{
25+
var patternText = match.Groups["pattern"].Value;
26+
var countText = match.Groups["count"].Value;
27+
28+
Regex regex;
29+
30+
try
31+
{
32+
regex = new Regex(patternText);
33+
}
34+
catch (ArgumentException exception)
35+
{
36+
throw new ExpectedException($"{fieldName}: {text} is not a valid regular expression: {exception.Message}");
37+
}
38+
39+
if (!int.TryParse(countText, out var count))
40+
count = 1;
41+
42+
return new Field(fieldName, regex, count);
43+
}
44+
2045
return new Field(fieldName, text);
2146
}
2247

2348
public int ColumnIndex { get; set; }
2449
public string ColumnName { get; }
2550
public string FieldName { get; }
51+
public int PatternCount { get; }
52+
public Regex ColumnRegex { get; }
2653

2754
public bool HasColumnName => !string.IsNullOrWhiteSpace(ColumnName);
55+
public bool HasColumnRegex => ColumnRegex != null;
2856
public bool HasColumnIndex => ColumnIndex > 0;
2957

3058
private Field(string fieldName, int columnIndex)
@@ -39,11 +67,24 @@ private Field(string fieldName, string columnName)
3967
ColumnName = columnName;
4068
}
4169

70+
private Field(string fieldName, Regex columnRegex, int patternCount)
71+
{
72+
FieldName = fieldName;
73+
ColumnRegex = columnRegex;
74+
PatternCount = patternCount;
75+
}
76+
4277
public override string ToString()
4378
{
4479
return HasColumnName
4580
? $"{FieldName}:'{ColumnName}'"
46-
: $"{FieldName}:#{ColumnIndex}";
81+
: HasColumnRegex
82+
? PatternCount > 1
83+
? $"{FieldName}:/{ColumnRegex}/#{PatternCount}"
84+
: $"{FieldName}:/{ColumnRegex}/"
85+
: $"{FieldName}:#{ColumnIndex}";
4786
}
87+
88+
private static readonly Regex FieldRegex = new Regex(@"^/(?<pattern>.+)/(#(?<count>\d+))?$");
4889
}
4990
}

TimeSeries/PublicApis/SdkExamples/PointZilla/Formats.cs

+223-9
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,39 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Net;
6+
using Microsoft.VisualBasic.FileIO;
37

48
namespace PointZilla
59
{
610
public static class Formats
711
{
812
public static string Description =>
9-
"Shortcut for known CSV formats. One of 'NG', '3X', or 'PointZilla'. [default: NG]";
13+
$"Shortcut for known CSV formats. One of {string.Join(", ", Formatters.Select(f => f.Id))}. [default: {Formatters.First().Id}]";
1014

1115
public static void SetFormat(Context context, string value)
1216
{
13-
if (!Formatters.TryGetValue(value, out var formatter))
17+
if (!FormatterLookup.TryGetValue(value, out var formatter))
1418
throw new ExpectedException($"'{value}' is an unknown CSV format.");
1519

1620
formatter(context);
1721
}
1822

19-
private static readonly Dictionary<string, Action<Context>> Formatters =
20-
new Dictionary<string, Action<Context>>(StringComparer.InvariantCultureIgnoreCase)
21-
{
22-
{ "NG", SetNgCsvFormat },
23-
{ "3X", Set3XCsvFormat },
24-
{ "PointZilla", SetPointZillaCsvFormat },
25-
};
23+
private static readonly IReadOnlyList<(string Id, Action<Context> Formatter)> Formatters = new (string Id, Action<Context> Formatter)[]
24+
{
25+
("NG", SetNgCsvFormat),
26+
("3X", Set3XCsvFormat),
27+
("PointZilla", SetPointZillaCsvFormat),
28+
("NWIS", SetNwisCsvFormat),
29+
};
30+
31+
private static readonly Dictionary<string, Action<Context>> FormatterLookup =
32+
Formatters
33+
.ToDictionary(
34+
f => f.Id,
35+
f => f.Formatter,
36+
StringComparer.InvariantCultureIgnoreCase);
2637

2738
public static void SetNgCsvFormat(Context context)
2839
{
@@ -48,6 +59,7 @@ public static void SetNgCsvFormat(Context context)
4859
// 2013-07-03T11:59:59Z,2013-07-03 23:59:59,966.15,Raw - yet to be review,200,
4960

5061
context.CsvSkipRows = 0;
62+
context.CsvSkipRowsAfterHeader = 0;
5163
context.CsvComment = "#";
5264
context.CsvDateTimeField = Field.Parse("ISO 8601 UTC", nameof(context.CsvDateTimeField));
5365
context.CsvDateTimeFormat = null;
@@ -73,6 +85,7 @@ public static void Set3XCsvFormat(Context context)
7385

7486
context.CsvComment = null;
7587
context.CsvSkipRows = 2;
88+
context.CsvSkipRowsAfterHeader = 0;
7689
context.CsvDateTimeField = Field.Parse("Date-Time", nameof(context.CsvDateTimeField));
7790
context.CsvDateTimeFormat = "MM/dd/yyyy HH:mm:ss";
7891
context.CsvDateOnlyField = null;
@@ -96,6 +109,7 @@ public static void SetPointZillaCsvFormat(Context context)
96109
// 2015-12-04T00:02:00Z, 3.525279357147, 500, ,
97110

98111
context.CsvSkipRows = 0;
112+
context.CsvSkipRowsAfterHeader = 0;
99113
context.CsvComment = "#";
100114
context.CsvDateTimeField = Field.Parse("ISO 8601 UTC", nameof(context.CsvDateTimeField));
101115
context.CsvDateTimeFormat = null;
@@ -109,5 +123,205 @@ public static void SetPointZillaCsvFormat(Context context)
109123
context.CsvIgnoreInvalidRows = true;
110124
context.CsvRealign = false;
111125
}
126+
127+
public static void SetNwisCsvFormat(Context context)
128+
{
129+
// https://nwis.waterservices.usgs.gov/nwis/iv/?format=rdb&sites=01536000&period=P1D
130+
// # Data provided for site 01536000
131+
// # TS_ID Parameter Description
132+
// # 121787 00060 Discharge, cubic feet per second
133+
// # 121786 00065 Gage height, feet
134+
// #
135+
// # Data-value qualification codes included in this output:
136+
// # P Provisional data subject to revision.
137+
// #
138+
// agency_cd site_no datetime tz_cd 121787_00060 121787_00060_cd 121786_00065 121786_00065_cd
139+
// 5s 15s 20d 6s 14n 10s 14n 10s
140+
// USGS 01536000 2022-03-23 12:15 EDT 629 P 3.42 P
141+
// USGS 01536000 2022-03-23 12:30 EDT 629 P 3.42 P
142+
// USGS 01536000 2022-03-23 12:45 EDT 629 P 3.42 P
143+
// USGS 01536000 2022-03-23 13:00 EDT 629 P 3.42 P
144+
context.CsvDelimiter = "\t";
145+
context.CsvSkipRows = 0;
146+
context.CsvSkipRowsAfterHeader = 1; // Skips that "5s 15s 20d 6s 14n 10s 14n 10s" line after the header
147+
context.CsvComment = "#";
148+
context.CsvDateTimeField = Field.Parse("datetime", nameof(context.CsvDateTimeField));
149+
context.CsvDateTimeFormat = "yyyy-MM-dd HH:mm";
150+
context.CsvDateOnlyField = null;
151+
context.CsvTimeOnlyField = null;
152+
context.CsvTimezoneField = Field.Parse("tz_cd", nameof(context.CsvTimezoneField));
153+
context.CsvValueField = Field.Parse("/_00060/", nameof(context.CsvValueField)); // Match discharge by default
154+
context.CsvGradeField = null;
155+
context.CsvQualifiersField = null;
156+
context.CsvIgnoreInvalidRows = true;
157+
context.CsvRealign = false;
158+
159+
AddNwisTimezoneAliases(context.TimezoneAliases);
160+
}
161+
162+
private static void AddNwisTimezoneAliases(Dictionary<string, string> aliases)
163+
{
164+
var text = FetchNwisZoneDefinitions();
165+
166+
// #
167+
// # National Water Information System
168+
// # 2022/03/24
169+
// #
170+
// #
171+
// # Date Retrieved: USGS Water Data for the Nation Help System
172+
// #
173+
// tz_cd tz_nm tz_ds tz_utc_offset_tm tz_dst_cd tz_dst_nm tz_dst_utc_offset_tm
174+
// 5s 31s 34s 6s 6s 31s 6s
175+
// ACST Central Australia Standard Time Central Australia +09:30 ACSST Central Australia Summer Time +10:30
176+
// AEST Australia Eastern Standard Time Eastern Australia +10:00 AESST Australia Eastern Summer Time +11:00
177+
// AFT Afghanistan Time Afghanistan +04:30
178+
// AKST Alaska Standard Time Alaska -09:00 AKDT Alaska Daylight Time -08:00
179+
// AST Atlantic Standard Time (Canada) Atlantic (Canada) -04:00 ADT Atlantic Daylight Time -03:00
180+
181+
using (var reader = new StringReader(text))
182+
{
183+
var parser = new TextFieldParser(reader)
184+
{
185+
CommentTokens = new[] { "#" },
186+
TextFieldType = FieldType.Delimited,
187+
Delimiters = new[] { "\t" },
188+
TrimWhiteSpace = true,
189+
HasFieldsEnclosedInQuotes = true,
190+
};
191+
192+
var skipRowsAfterHeader = 1;
193+
var columns = new Dictionary<string, int>(StringComparer.InvariantCultureIgnoreCase);
194+
195+
while (!parser.EndOfData)
196+
{
197+
var fields = parser.ReadFields();
198+
if (fields == null) continue;
199+
200+
if (!columns.Any())
201+
{
202+
for (var i = 0; i < fields.Length; ++i)
203+
{
204+
columns[fields[i]] = i;
205+
}
206+
continue;
207+
}
208+
209+
if (skipRowsAfterHeader > 0)
210+
{
211+
--skipRowsAfterHeader;
212+
continue;
213+
}
214+
215+
string ParseField(string name)
216+
{
217+
if (!columns.TryGetValue(name, out var index) || index >= fields.Length)
218+
return null;
219+
220+
return fields[index];
221+
}
222+
223+
string ParseUtcOffset(string name)
224+
{
225+
var value = ParseField(name);
226+
227+
if (string.IsNullOrEmpty(value))
228+
return value;
229+
230+
if (value[0] == '+' || value[0] == '-')
231+
return $"UTC{value}";
232+
233+
return $"UTC+{value}";
234+
}
235+
236+
var standardCode = ParseField("tz_cd");
237+
var standardOffset = ParseUtcOffset("tz_utc_offset_tm");
238+
var daylightCode = ParseField("tz_dst_cd");
239+
var daylightOffset = ParseUtcOffset("tz_dst_utc_offset_tm");
240+
241+
if (!string.IsNullOrEmpty(standardCode) && !string.IsNullOrEmpty(standardOffset))
242+
aliases[standardCode] = standardOffset;
243+
244+
if (!string.IsNullOrEmpty(daylightCode) && !string.IsNullOrEmpty(daylightOffset))
245+
aliases[daylightCode] = daylightOffset;
246+
}
247+
}
248+
}
249+
250+
private static string FetchNwisZoneDefinitions()
251+
{
252+
try
253+
{
254+
// Try to fetch the latest
255+
return new WebClient().DownloadString("https://help.waterdata.usgs.gov/code/tz_query?fmt=rdb");
256+
}
257+
catch (Exception)
258+
{
259+
// If we are blocked from the internet, fall back to a recent copy
260+
return @"#
261+
# National Water Information System
262+
# 2022/03/24
263+
#
264+
#
265+
# Date Retrieved: USGS Water Data for the Nation Help System
266+
#
267+
tz_cd tz_nm tz_ds tz_utc_offset_tm tz_dst_cd tz_dst_nm tz_dst_utc_offset_tm
268+
5s 31s 34s 6s 6s 31s 6s
269+
ACST Central Australia Standard Time Central Australia +09:30 ACSST Central Australia Summer Time +10:30
270+
AEST Australia Eastern Standard Time Eastern Australia +10:00 AESST Australia Eastern Summer Time +11:00
271+
AFT Afghanistan Time Afghanistan +04:30
272+
AKST Alaska Standard Time Alaska -09:00 AKDT Alaska Daylight Time -08:00
273+
AST Atlantic Standard Time (Canada) Atlantic (Canada) -04:00 ADT Atlantic Daylight Time -03:00
274+
AWST Australia Western Standard Time Western Australia +08:00 AWSST Australia Western Summer Time +09:00
275+
BT Baghdad Time Baghdad +03:00
276+
CAST Central Australia Standard Time Central Australia +09:30 CADT Central Australia Daylight Time +10:30
277+
CCT China Coastal Time China Coastal +08:00
278+
CET Central European Time Central Europe +01:00 CETDST Central European Daylight Time +02:00
279+
CST Central Standard Time Central North America -06:00 CDT Central Daylight Time -05:00
280+
DNT Dansk Normal Time Dansk +01:00
281+
DST Dansk Summer Time Dansk +01:00
282+
EAST East Australian Standard Time East Australia +10:00 EASST East Australian Summer Time +11:00
283+
EET Eastern Europe Standard Time Eastern Europe, Russia Zone 1 +02:00 EETDST Eastern Europe Daylight Time +03:00
284+
EST Eastern Standard Time Eastern North America -05:00 EDT Eastern Daylight Time -04:00
285+
FST French Summer Time French +01:00 FWT French Winter Time +02:00
286+
GMT Greenwich Mean Time Great Britain 00:00 BST British Summer Time +01:00
287+
GST Guam Standard Time Guam Standard Time, Russia Zone 9 +10:00
288+
HST Hawaii Standard Time Hawaii -10:00 HDT Hawaii Daylight Time -09:00
289+
IDLE International Date Line, East International Date Line, East +12:00
290+
IDLW International Date Line, West International Date Line, West -12:00
291+
IST Israel Standard Time Israel +02:00
292+
IT Iran Time Iran +03:30
293+
JST Japan Standard Time Japan Standard Time, Russia Zone 8 +09:00
294+
JT Java Time Java +07:30
295+
KST Korea Standard Time Korea +09:00
296+
LIGT Melbourne, Australia Melbourne +10:00
297+
MET Middle Europe Time Middle Europe +01:00 METDST Middle Europe Daylight Time +02:00
298+
MEWT Middle Europe Winter Time Middle Europe +01:00 MEST Middle Europe Summer Time +02:00
299+
MEZ Middle Europe Zone Middle Europe +01:00
300+
MST Mountain Standard Time Mountain North America -07:00 MDT Mountain Daylight Time -06:00
301+
MT Moluccas Time Moluccas +08:30
302+
NFT Newfoundland Standard Time Newfoundland -03:30 NDT Newfoundland Daylight Time -02:30
303+
NOR Norway Standard Time Norway +01:00
304+
NST Newfoundland Standard Time Newfoundland -03:30 NDT Newfoundland Daylight Time -02:30
305+
NZST New Zealand Standard Time New Zealand +12:00 NZDT New Zealand Daylight Time +13:00
306+
NZT New Zealand Time New Zealand +12:00 NZDT New Zealand Daylight Time +13:00
307+
PST Pacific Standard Time Pacific North America -08:00 PDT Pacific Daylight Time -07:00
308+
SAT South Australian Standard Time South Australia +09:30 SADT South Australian Daylight Time +10:30
309+
SET Seychelles Time Seychelles +01:00
310+
SWT Swedish Winter Time Swedish +01:00 SST Swedish Summer Time +02:00
311+
UTC Universal Coordinated Time Universal Coordinated Time 00:00
312+
WAST West Australian Standard Time West Australia +07:00 WADT West Australian Daylight Time +08:00
313+
WAT West Africa Time West Africa -01:00
314+
WET Western Europe Western Europe 00:00 WETDST Western Europe Daylight Time +01:00
315+
WST West Australian Standard Time West Australian +08:00 WDT West Australian Daylight Time +09:00
316+
ZP-11 UTC -11 hours UTC -11 hours -11:00
317+
ZP-2 UTC -2 hours Zone UTC -2 Hours -02:00
318+
ZP-3 UTC -3 hours Zone UTC -3 Hours -03:00
319+
ZP11 UTC +11 hours Zone UTC +11 Hours +11:00
320+
ZP4 UTC +4 hours Zone UTC +4 Hours +04:00
321+
ZP5 UTC +5 hours Zone UTC +5 Hours +05:00
322+
ZP6 UTC +6 hours Zone UTC +6 Hours +06:00
323+
";
324+
}
325+
}
112326
}
113327
}

TimeSeries/PublicApis/SdkExamples/PointZilla/PointReaders/CsvReader.cs

+7
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,7 @@ private List<TimeSeriesPoint> LoadCsvPoints(string source, TextFieldParser parse
412412
var skipCount = Context.CsvSkipRows;
413413
var startingHeaderColumns = GetStartingHeaderColumns();
414414
var parseHeaderRow = Context.CsvHasHeaderRow;
415+
var afterHeaderSkipCount = Context.CsvSkipRowsAfterHeader;
415416

416417
while (!parser.EndOfData)
417418
{
@@ -438,6 +439,12 @@ private List<TimeSeriesPoint> LoadCsvPoints(string source, TextFieldParser parse
438439
continue;
439440
}
440441

442+
if (afterHeaderSkipCount > 0)
443+
{
444+
--afterHeaderSkipCount;
445+
continue;
446+
}
447+
441448
var point = ParsePoint(fields);
442449

443450
if (point == null)

0 commit comments

Comments
 (0)