Skip to content

Commit 550afbd

Browse files
author
Doug Schmidt
authored
Merge pull request #301 from DougSchmidt-AI/feature/PF-1408-PointZillaFetchFromURL
PF-1408 - Support fetching CSV/text from HTTP and FTP URLs
2 parents 2c25fcd + 7855c42 commit 550afbd

File tree

3 files changed

+105
-20
lines changed

3 files changed

+105
-20
lines changed

TimeSeries/PublicApis/SdkExamples/PointZilla/PointReaders/CsvReader.cs

+39-12
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Data;
4+
using System.Diagnostics;
45
using System.Globalization;
56
using System.IO;
67
using System.Linq;
8+
using System.Net;
79
using System.Reflection;
810
using Aquarius.TimeSeries.Client.ServiceModels.Acquisition;
911
using ExcelDataReader;
@@ -43,10 +45,14 @@ public CsvReader(Context context)
4345

4446
private List<TimeSeriesPoint> LoadPoints(string path)
4547
{
46-
if (!File.Exists(path))
48+
var isUri = Uri.TryCreate(path, UriKind.Absolute, out var uri);
49+
50+
if (!isUri && !File.Exists(path))
4751
throw new ExpectedException($"File '{path}' does not exist.");
4852

49-
var points = LoadExcelPoints(path) ?? LoadCsvPoints(path);
53+
var points = isUri
54+
? LoadUrlPoints(uri)
55+
: LoadExcelPoints(path) ?? LoadCsvPoints(path);
5056

5157
var anyGapPoints = points.Any(p => p.Type == PointType.Gap);
5258

@@ -217,7 +223,10 @@ private List<string> GetFields(IExcelDataReader rowReader)
217223

218224
private List<string> GetStartingHeaderColumns()
219225
{
220-
return (Context.CsvHeaderStartsWith ?? string.Empty)
226+
if (string.IsNullOrEmpty(Context.CsvHeaderStartsWith))
227+
return new List<string>();
228+
229+
return Context.CsvHeaderStartsWith
221230
.Split(',')
222231
.Select(s => s.Trim())
223232
.ToList();
@@ -354,19 +363,37 @@ private static void ParseValidExcelColumn(DataRow row, int? fieldIndex, Action<o
354363

355364
private List<TimeSeriesPoint> LoadCsvPoints(string path)
356365
{
357-
var points = new List<TimeSeriesPoint>();
366+
return LoadCsvPoints(path, new TextFieldParser(path));
367+
}
368+
369+
private List<TimeSeriesPoint> LoadUrlPoints(Uri uri)
370+
{
371+
Log.Info($"Fetching data from {uri} ...");
372+
373+
var stopwatch = Stopwatch.StartNew();
374+
375+
var text = new WebClient().DownloadString(uri);
376+
377+
Log.Info($"Fetched {text.Length.Bytes().Humanize("#.#")} in {stopwatch.Elapsed.Humanize(2)}.");
358378

379+
using (var reader = new StringReader(text))
380+
{
381+
return LoadCsvPoints(uri.ToString(), new TextFieldParser(reader));
382+
}
383+
}
384+
385+
private List<TimeSeriesPoint> LoadCsvPoints(string source, TextFieldParser parser)
386+
{
359387
var csvDelimiter = string.IsNullOrEmpty(Context.CsvDelimiter)
360388
? ","
361389
: Context.CsvDelimiter;
362390

363-
var parser = new TextFieldParser(path)
364-
{
365-
TextFieldType = FieldType.Delimited,
366-
Delimiters = new[] { csvDelimiter },
367-
TrimWhiteSpace = true,
368-
HasFieldsEnclosedInQuotes = true
369-
};
391+
parser.TextFieldType = FieldType.Delimited;
392+
parser.Delimiters = new[] { csvDelimiter };
393+
parser.TrimWhiteSpace = true;
394+
parser.HasFieldsEnclosedInQuotes = true;
395+
396+
var points = new List<TimeSeriesPoint>();
370397

371398
if (!string.IsNullOrWhiteSpace(Context.CsvComment))
372399
{
@@ -408,7 +435,7 @@ private List<TimeSeriesPoint> LoadCsvPoints(string path)
408435
{
409436
if (Context.CsvIgnoreInvalidRows) continue;
410437

411-
throw new ExpectedException($"Can't parse '{path}' ({lineNumber}): {string.Join(", ", fields)}");
438+
throw new ExpectedException($"Can't parse '{source}' ({lineNumber}): {string.Join(", ", fields)}");
412439
}
413440

414441
points.Add(point);

TimeSeries/PublicApis/SdkExamples/PointZilla/Program.cs

+13-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ var usageMessage
284284
continue;
285285
}
286286

287-
if (File.Exists(arg))
287+
if (File.Exists(arg) || IsValidUrl(arg))
288288
{
289289
context.CsvFiles.Add(arg);
290290
continue;
@@ -472,6 +472,18 @@ private static void ParseManualPoints(Context context, double numericValue)
472472
context.StartTime = context.StartTime.Plus(Duration.FromTimeSpan(context.PointInterval));
473473
}
474474

475+
private static bool IsValidUrl(string text)
476+
{
477+
return Uri.TryCreate(text, UriKind.Absolute, out var uri) && SupportedUriSchemes.Contains(uri.Scheme);
478+
}
479+
480+
private static readonly HashSet<string> SupportedUriSchemes = new HashSet<string>
481+
{
482+
Uri.UriSchemeHttp,
483+
Uri.UriSchemeHttps,
484+
Uri.UriSchemeFtp,
485+
};
486+
475487
private static void ParseManualGap(Context context)
476488
{
477489
context.ManualPoints.Add(new TimeSeriesPoint{Type = PointType.Gap});

TimeSeries/PublicApis/SdkExamples/PointZilla/Readme.md

+53-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Download the [latest PointZilla.exe release here](../../../../../../releases/lat
77
Points can be specified from:
88
- Command line parameters (useful for appending a single point)
99
- Signal generators: linear, saw-tooth, square-wave, or sine-wave signals. Useful for just getting *something* into a time-series
10-
- CSV files (including CSV exports from AQTS Springboard)
10+
- CSV files (including CSV exports from AQTS Springboard) from file, FTP, or HTTP sources.
1111
- Points retrieved live from other AQTS systems, including from legacy 3.X systems.
1212
- The results of a database query (via direct support fo SqlServer, Postgres, and MySql. ODBC connections are supported too, but require configuration)
1313
- `CMD.EXE`, `PowerShell` or `bash`: `PointZilla` works well from within any shell.
@@ -34,9 +34,10 @@ These examples will get you through most of the heavy lifting to get some points
3434
A few interesting operations include:
3535
- [Appending a few random points](#append-something-to-a-time-series)
3636
- [Appending a single point](#append-a-single-point-to-a-time-series)
37-
- [Appending points from a CSV](#append-values-from-a-csv-file)
38-
- [Appending points from Excel](#appending-values-from-an-excel-spreadsheet)
39-
- [Appending points from a database](#appending-values-from-a-database-query)
37+
- [Appending points from a CSV](#append-points-from-a-csv-file)
38+
- [Appending points from Excel](#append-points-from-an-excel-spreadsheet)
39+
- [Appending points from an HTTP request](#append-points-from-an-http-request)
40+
- [Appending points from a database](#append-points-from-a-database-query)
4041
- [Appending points with grades or qualifiers](#appending-grades-and-qualifiers)
4142
- [Appending points with notes](#appending-points-with-notes)
4243
- [Copy points from another time-series](#copying-points-from-another-time-series)
@@ -129,7 +130,7 @@ $ ./PointZilla.exe -server=myserver Stage.Label@MyLocation 12.5 Gap 15.3
129130

130131
When reading data from a CSV file, use the case-insensitive keyword `Gap` in a timestamp or value column to represent an explicit gap.
131132

132-
## Append values from a CSV file
133+
## Append points from a CSV file
133134

134135
`PointZilla` can also read times, values, grade codes, and qualifiers from a CSV file.
135136

@@ -240,15 +241,60 @@ The following options are all equivalent ways of specifying Australian Central S
240241

241242
When the `/UtcOffset` value is explicitly set, the value will also be used when creating any time-series or locations.
242243

243-
## Appending values from an Excel spreadsheet
244+
## Append points from an Excel spreadsheet
244245

245246
All the CSV parsing options also apply to parsing Excel workbooks.
246247

247248
By default, the first sheet in the workbook will be parsed according to the CSV parsing rules.
248249

249250
You can use the `/ExcelSheetNumber=integer` or `/ExcelSheetName=name` options to parse a different sheet in the workbook.
250251

251-
## Appending values from a database query
252+
## Append points from an HTTP request
253+
254+
All the CSV parsing options also apply to text downloaded via FTP or HTTP requests.
255+
256+
This approach works when the web request returns a text stream for its response payload.
257+
258+
Here is a an example HTTP request which uses the [USGS NWIS service](https://help.waterdata.usgs.gov/faq/automated-retrievals#Examples) to fetch the last 24 hours of Stage points (HG in AQTS, code 00065 in NWIS) points from a location.
259+
260+
https://nwis.waterdata.usgs.gov/hi/nwis/uv/?format=rdb&site_no=16010000&period=PT1D
261+
262+
The NWIS data response includes some commented lines at the start, followed by a 2-line header row, and then the tab-delimited (not comma delimited) data rows follow.
263+
264+
```
265+
# Data provided for site 16010000
266+
# TS parameter Description
267+
# 42061 00060 Discharge, cubic feet per second
268+
# 42062 00065 Gage height, feet
269+
#
270+
# Data-value qualification codes included in this output:
271+
# P Provisional data subject to revision.
272+
#
273+
agency_cd site_no datetime tz_cd 42061_00060 42061_00060_cd 42062_00065 42062_00065_cd
274+
5s 15s 20d 6s 14n 10s 14n 10s
275+
USGS 16010000 2022-03-10 00:00 HST 5.34 P 2.08 P
276+
USGS 16010000 2022-03-10 00:05 HST 5.34 P 2.08 P
277+
USGS 16010000 2022-03-10 00:10 HST 5.34 P 2.08 P
278+
USGS 16010000 2022-03-10 00:15 HST 5.34 P 2.08 P
279+
```
280+
281+
This command line will fetch the data, extract the points from the "datetime" and "42062_00060" columns, and append them to an AQTS series.
282+
283+
```sh
284+
$ ./PointZilla.exe -server=doug-vm2019 "Stage.Working@Location" "https://nwis.waterdata.usgs.gov/hi/nwis/uv/?format=rdb&site_no=16010000&period=PT1D" -CsvDelimiter=%09 -CsvComment="#" -CsvDateTimeField=datetime -CsvValueField=42061_00060 -CsvDateTimeFormat="yyyy-MM-dd HH:mm" -CsvIgnoreInvalidRows=true
285+
16:38:30.539 INFO - PointZilla v1.0.0.0
286+
16:38:30.592 INFO - Fetching data from https://nwis.waterdata.usgs.gov/hi/nwis/uv/?format=rdb&site_no=16010000&period=PT1D ...
287+
16:38:31.653 INFO - Fetched 23.5 KB in 1 second, 40 milliseconds.
288+
16:38:31.810 INFO - Loaded 461 points [2022-03-10T08:00:00Z to 2022-03-11T22:20:00Z] from 'https://nwis.waterdata.usgs.gov/hi/nwis/uv/?format=rdb&site_no=16010000&period=PT1D'.
289+
16:38:31.813 INFO - Connecting to doug-vm2019 ...
290+
16:38:31.984 INFO - Connected to doug-vm2019 (2021.4.77.0)
291+
16:38:32.627 INFO - Appending 461 points [2022-03-10T08:00:00Z to 2022-03-11T22:20:00Z] to Stage.Working@Location (ProcessorBasic) ...
292+
16:38:33.202 INFO - Appended 461 points and 0 notes (deleting 0 points and 0 notes) in 0.6 seconds.
293+
```
294+
295+
Note: Support for other common web formats like XML, JSON, or Parquet files is not yet supported.
296+
297+
## Append points from a database query
252298

253299
PointZilla can also execute a database query and import the results from the query as a time-series.
254300

0 commit comments

Comments
 (0)