11from collections import OrderedDict
22from functools import partial
3+ import warnings
4+ import pandas
5+ import re
36import numpy as np
47from .blob import unpack
58from .errors import DataJointError
6- import warnings
9+ from . settings import config
710
811
912class key :
@@ -24,6 +27,16 @@ def to_dicts(recarray):
2427 yield dict (zip (recarray .dtype .names , rec .tolist ()))
2528
2629
30+ def _flatten_attribute_list (primary_key , attr ):
31+ for a in attr :
32+ if re .match (r'^\s*KEY\s*(ASC\s*)?$' , a ):
33+ yield from primary_key
34+ elif re .match (r'^\s*KEY\s*DESC\s*$' , a ):
35+ yield from (q + ' DESC' for q in primary_key )
36+ else :
37+ yield a
38+
39+
2740class Fetch :
2841 """
2942 A fetch object that handles retrieving elements from the table expression.
@@ -33,36 +46,59 @@ class Fetch:
3346 def __init__ (self , expression ):
3447 self ._expression = expression
3548
36- def __call__ (self , * attrs , offset = None , limit = None , order_by = None , as_dict = False , squeeze = False ):
49+ def __call__ (self , * attrs , offset = None , limit = None , order_by = None , format = None , as_dict = False , squeeze = False ):
3750 """
3851 Fetches the expression results from the database into an np.array or list of dictionaries and unpacks blob attributes.
3952
4053 :param attrs: zero or more attributes to fetch. If not provided, the call will return
4154 all attributes of this relation. If provided, returns tuples with an entry for each attribute.
4255 :param offset: the number of tuples to skip in the returned result
4356 :param limit: the maximum number of tuples to return
44- :param order_by: the list of attributes to order the results. No ordering should be assumed if order_by=None.
57+ :param order_by: a single attribute or the list of attributes to order the results.
58+ No ordering should be assumed if order_by=None.
59+ To reverse the order, add DESC to the attribute name or names: e.g. ("age DESC", "frequency")
60+ To order by primary key, use "KEY" or "KEY DESC"
61+ :param format: Effective when as_dict=False and when attrs is empty
62+ None: default from config['fetch_format'] or 'array' if not configured
63+ "array": use numpy.key_array
64+ "frame": output pandas.DataFrame. .
4565 :param as_dict: returns a list of dictionaries instead of a record array
4666 :param squeeze: if True, remove extra dimensions from arrays
4767 :return: the contents of the relation in the form of a structured numpy.array or a dict list
4868 """
4969
50- # if 'order_by' passed in a string, make into list
51- if isinstance (order_by , str ):
52- order_by = [order_by ]
70+ if order_by is not None :
71+ # if 'order_by' passed in a string, make into list
72+ if isinstance (order_by , str ):
73+ order_by = [order_by ]
74+ # expand "KEY" or "KEY DESC"
75+ order_by = list (_flatten_attribute_list (self ._expression .primary_key , order_by ))
5376
5477 # if attrs are specified then as_dict cannot be true
5578 if attrs and as_dict :
5679 raise DataJointError ('Cannot specify attributes to return when as_dict=True. '
57- 'Use proj() to select attributes or set as_dict=False' )
80+ 'Use '
81+ 'proj() to select attributes or set as_dict=False' )
82+ # format should not be specified with attrs or is_dict=True
83+ if format is not None and (as_dict or attrs ):
84+ raise DataJointError ('Cannot specify output format when as_dict=True or '
85+ 'when attributes are selected to be fetched separately.' )
86+
87+ if format not in {None , "array" , "frame" }:
88+ raise DataJointError ('Fetch output format must be in {{"array", "frame"}} but "{}" was given' .format (format ))
89+
90+ if not (attrs or as_dict ) and format is None :
91+ format = config ['fetch_format' ] # default to array
92+ if format not in {"array" , "frame" }:
93+ raise DataJointError ('Invalid entry "{}" in datajoint.config["fetch_format"]: use "array" or "frame"' .format (format ))
5894
5995 if limit is None and offset is not None :
6096 warnings .warn ('Offset set, but no limit. Setting limit to a large number. '
6197 'Consider setting a limit explicitly.' )
6298 limit = 2 * len (self ._expression )
6399
64100 if not attrs :
65- # fetch all attributes
101+ # fetch all attributes as a numpy.record_array or pandas.DataFrame
66102 cur = self ._expression .cursor (as_dict = as_dict , limit = limit , offset = offset , order_by = order_by )
67103 heading = self ._expression .heading
68104 if as_dict :
@@ -78,6 +114,8 @@ def __call__(self, *attrs, offset=None, limit=None, order_by=None, as_dict=False
78114 ret [name ] = list (map (external_table .get , ret [name ]))
79115 elif heading [name ].is_blob :
80116 ret [name ] = list (map (partial (unpack , squeeze = squeeze ), ret [name ]))
117+ if format == "frame" :
118+ ret = pandas .DataFrame (ret ).set_index (heading .primary_key )
81119 else : # if list of attributes provided
82120 attributes = [a for a in attrs if not is_key (a )]
83121 result = self ._expression .proj (* attributes ).fetch (
0 commit comments