Skip to content

Commit 513fc0b

Browse files
Merge pull request #347 from johntruckenbrodt/feature/archive.select_return-value
[Archive.select] new argument 'return_value'
2 parents 35341c2 + cd406a4 commit 513fc0b

File tree

4 files changed

+90
-11
lines changed

4 files changed

+90
-11
lines changed

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies:
1616
- python>=3.8
1717
- pyyaml
1818
- requests
19+
- shapely
1920
- spatialist>=0.15.2
2021
- sqlalchemy>=1.4,<2.0
2122
- sqlalchemy-utils>=0.37

pyroSAR/drivers.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2796,7 +2796,7 @@ def cleanup(self):
27962796

27972797
@staticmethod
27982798
def encode(string, encoding='utf-8'):
2799-
if not isinstance(string, str):
2799+
if not isinstance(string, str) and hasattr(string, 'encode'):
28002800
return string.encode(encoding)
28012801
else:
28022802
return string
@@ -3042,7 +3042,7 @@ def move(self, scenelist, directory, pbar=False):
30423042
log.info('The following scenes already exist at the target location:\n{}'.format('\n'.join(double)))
30433043

30443044
def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True,
3045-
processdir=None, recursive=False, polarizations=None, **args):
3045+
processdir=None, recursive=False, polarizations=None, return_value="scene", **args):
30463046
"""
30473047
select scenes from the database
30483048
@@ -3057,7 +3057,7 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True
30573057
date_strict: bool
30583058
treat dates as strict limits or also allow flexible limits to incorporate scenes
30593059
whose acquisition period overlaps with the defined limit?
3060-
3060+
30613061
- strict: start >= mindate & stop <= maxdate
30623062
- not strict: stop >= mindate & start <= maxdate
30633063
processdir: str or None
@@ -3067,15 +3067,55 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True
30673067
(only if `processdir` is not None) should also the subdirectories of the `processdir` be scanned?
30683068
polarizations: list[str] or None
30693069
a list of polarization strings, e.g. ['HH', 'VV']
3070+
return_value: str or List[str]
3071+
the query return value(s). Options:
3072+
3073+
- geometry_wkb: the scene's footprint geometry formatted as WKB
3074+
- geometry_wkt: the scene's footprint geometry formatted as WKT
3075+
- mindate: the acquisition start datetime in UTC formatted as YYYYmmddTHHMMSS
3076+
- maxdate: the acquisition end datetime in UTC formatted as YYYYmmddTHHMMSS
3077+
- all further database column names (see :meth:`~Archive.get_colnames()`)
30703078
**args:
30713079
any further arguments (columns), which are registered in the database. See :meth:`~Archive.get_colnames()`
30723080
30733081
Returns
30743082
-------
3075-
list[str]
3076-
the file names pointing to the selected scenes
3077-
3083+
List[str] or List[tuple[str]]
3084+
If a single return_value is specified: list of values for that attribute
3085+
If multiple return_values are specified: list of tuples containing the requested attributes
30783086
"""
3087+
# Convert return_value to list if it's a string
3088+
if isinstance(return_value, str):
3089+
return_values = [return_value]
3090+
else:
3091+
return_values = return_value
3092+
3093+
return_values_sql = []
3094+
for val in return_values:
3095+
if val == 'mindate':
3096+
return_values_sql.append('start')
3097+
elif val == 'maxdate':
3098+
return_values_sql.append('stop')
3099+
elif val == 'geometry_wkt':
3100+
prefix = 'ST_' if self.driver == 'postgresql' else ''
3101+
return_values_sql.append(f'{prefix}AsText(geometry) as geometry_wkt')
3102+
elif val == 'geometry_wkb':
3103+
prefix = 'ST_' if self.driver == 'postgresql' else ''
3104+
return_values_sql.append(f'{prefix}AsBinary(geometry) as geometry_wkb')
3105+
else:
3106+
return_values_sql.append(val)
3107+
3108+
# Validate that all requested return values exist in the database
3109+
valid_columns = self.get_colnames()
3110+
extra = ['mindate', 'maxdate', 'geometry_wkt', 'geometry_wkb']
3111+
normal_returns = [x for x in return_values if x not in extra]
3112+
invalid_returns = [x for x in normal_returns if x not in valid_columns]
3113+
if invalid_returns:
3114+
invalid_str = ', '.join(invalid_returns)
3115+
msg = (f"The following options are not supported as "
3116+
f"return values: {invalid_str}")
3117+
raise ValueError(msg)
3118+
30793119
arg_valid = [x for x in args.keys() if x in self.get_colnames()]
30803120
arg_invalid = [x for x in args.keys() if x not in self.get_colnames()]
30813121
if len(arg_invalid) > 0:
@@ -3091,6 +3131,7 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True
30913131
arg_format.append("""{0}='{1}'""".format(key, args[key]))
30923132
elif isinstance(args[key], (tuple, list)):
30933133
arg_format.append("""{0} IN ('{1}')""".format(key, "', '".join(map(str, args[key]))))
3134+
30943135
if mindate:
30953136
if isinstance(mindate, datetime):
30963137
mindate = mindate.strftime('%Y%m%dT%H%M%S')
@@ -3102,6 +3143,7 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True
31023143
vals.append(mindate)
31033144
else:
31043145
log.info('WARNING: argument mindate is ignored, must be in format YYYYmmddTHHMMSS')
3146+
31053147
if maxdate:
31063148
if isinstance(maxdate, datetime):
31073149
maxdate = maxdate.strftime('%Y%m%dT%H%M%S')
@@ -3140,7 +3182,10 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True
31403182
subquery = ' WHERE {}'.format(' AND '.join(arg_format))
31413183
else:
31423184
subquery = ''
3143-
query = '''SELECT scene, outname_base FROM data{}'''.format(subquery)
3185+
3186+
# Modify the query to select the requested return values
3187+
query = 'SELECT {}, outname_base FROM data{}'.format(', '.join(return_values_sql), subquery)
3188+
31443189
# the query gets assembled stepwise here
31453190
for val in vals:
31463191
query = query.replace('?', """'{0}'""", 1).format(val)
@@ -3151,12 +3196,18 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, date_strict=True
31513196

31523197
if processdir and os.path.isdir(processdir):
31533198
scenes = [x for x in query_rs
3154-
if len(finder(processdir, [x[1]], regex=True, recursive=recursive)) == 0]
3199+
if len(finder(processdir, [x[-1]], regex=True, recursive=recursive)) == 0]
31553200
else:
31563201
scenes = query_rs
3202+
31573203
ret = []
31583204
for x in scenes:
3159-
ret.append(self.encode(x[0]))
3205+
# If only one return value was requested, append just that value
3206+
if len(return_values) == 1:
3207+
ret.append(self.encode(x[0]))
3208+
else:
3209+
# If multiple return values were requested, append a tuple of all values
3210+
ret.append(tuple(self.encode(val) for val in x[:-1])) # Exclude outname_base
31603211

31613212
return ret
31623213

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ progressbar2
77
psycopg2
88
pyyaml
99
requests
10+
shapely
1011
spatialist>=0.15.2
1112
sqlalchemy>=1.4,<2.0
1213
sqlalchemy-utils>=0.37

tests/test_drivers.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from spatialist import Vector
99
from sqlalchemy import Table, MetaData, Column, Integer, String
1010
from geoalchemy2 import Geometry
11+
from shapely import wkt
1112

1213
metadata = MetaData()
1314

@@ -177,6 +178,20 @@ def test_archive(tmpdir, testdata):
177178
assert len(out) == 1
178179
assert isinstance(out[0], str)
179180

181+
out = db.select(vv=1, return_value=['mindate', 'geometry_wkt', 'geometry_wkb'])
182+
assert len(out) == 1
183+
assert isinstance(out[0], tuple)
184+
assert out[0][0] == '20150222T170750'
185+
geom = wkt.loads('POLYGON(('
186+
'8.505644 50.295261, 12.0268 50.688881, '
187+
'11.653832 52.183979, 8.017178 51.788181, '
188+
'8.505644 50.295261))')
189+
assert wkt.loads(out[0][1]) == geom
190+
assert out[0][2] == geom.wkb
191+
192+
with pytest.raises(ValueError):
193+
out = db.select(vv=1, return_value=['foobar'])
194+
180195
db.insert(testdata['s1_3'])
181196
db.insert(testdata['s1_4'])
182197
db.drop_element(testdata['s1_3'])
@@ -202,9 +217,11 @@ def test_archive2(tmpdir, testdata):
202217
assert not os.path.isfile(dbfile)
203218
assert Vector(shp).nfeatures == 1
204219

205-
with pytest.raises(OSError):
206-
with pyroSAR.Archive(dbfile) as db:
220+
with pyroSAR.Archive(dbfile) as db:
221+
with pytest.raises(OSError):
207222
db.import_outdated(testdata['archive_old_csv'])
223+
with pytest.raises(RuntimeError):
224+
db.import_outdated('foobar')
208225

209226
# the archive_old_bbox database contains a relative file name for the scene
210227
# so that it can be reimported into the new database. The working directory
@@ -249,6 +266,15 @@ def test_archive_postgres(tmpdir, testdata):
249266
out = db.select(vv=1, acquisition_mode=('IW', 'EW'))
250267
assert len(out) == 1
251268
assert isinstance(out[0], str)
269+
270+
out = db.select(vv=1, return_value=['scene', 'start'])
271+
assert len(out) == 1
272+
assert isinstance(out[0], tuple)
273+
assert out[0][1] == '20150222T170750'
274+
275+
with pytest.raises(ValueError):
276+
out = db.select(vv=1, return_value=['foobar'])
277+
252278
db.add_tables(mytable)
253279
assert 'mytable' in db.get_tablenames()
254280
with pytest.raises(TypeError):

0 commit comments

Comments
 (0)