99
1010
1111class UnnamedDataFrame (DataFrame ):
12+ row_cls : Optional [type ]= None
13+
1214 def __init__ (self ,
1315 key_names : Optional [Union [str , list [str ]]]= None ,
1416 value_names : Optional [Union [str , list [str ]]]= None ,
@@ -19,14 +21,15 @@ def __init__(self,
1921 ** kwargs
2022 ):
2123 """ Defines a DataFrame wrapper """
22- super ().__init__ (* args , ** kwargs )
2324 to_list = lambda l : [l ] if isinstance (l , str ) else l
25+ key_names , value_names = to_list (key_names ), to_list (value_names )
26+ columns = sum ([ n if n else list () for n in (key_names , value_names ) ], list ())
27+ super ().__init__ (* args , ** kwargs , columns = columns )
2428 self .meta = SimpleNamespace ()
2529 self .meta .name = name
26- self .meta .key_names , self .meta .value_names = to_list ( key_names ), to_list ( value_names )
30+ self .meta .key_names , self .meta .value_names = key_names , value_names
2731 assert isinstance (self .key_names , list ) or not self .key_names
2832 assert isinstance (self .value_names , list ) or not self .value_names
29- columns = sum ([ n if n else list () for n in (self .key_names , self .value_names ) ], list ())
3033 for column in columns :
3134 if column not in self .columns :
3235 self [column ] = float ("nan" )
@@ -46,17 +49,6 @@ def value_names(self):
4649 def default_value (self ) -> Any :
4750 return self .meta ._default_value
4851
49- def value2row (self , value : Optional [Any ]= None , ** kwargs ) -> Series :
50- if value is None :
51- value = dict ()
52- elif isinstance (value , (dict , Series )):
53- value = { key : v for key , v in value .items () }
54- elif isinstance (value , Iterable ):
55- value = { self .value_names [index ]: v for index , v in enumerate (value ) }
56- else :
57- value = { self .value_names [0 ]: value }
58- return Series (kwargs | value )
59-
6052 def row2key (self , row : Series ) -> Any :
6153 if not self .key_names :
6254 return row
@@ -66,7 +58,7 @@ def row2key(self, row: Series) -> Any:
6658
6759 def row2value (self , row : Series ) -> Any :
6860 if not self .value_names :
69- return row
61+ return row if row_cls is None else row_cls ( row )
7062 if len (self .value_names ) == 1 :
7163 return row [self .value_names [0 ]]
7264 return tuple ( row [name ] for name in self .value_names )
@@ -75,44 +67,51 @@ def df2value(self, df: DataFrame, last_only: Optional[bool]=None) -> Any:
7567 last_only = self .meta ._last_only if last_only is None else last_only
7668 if last_only :
7769 return self .row2value (df .iloc [- 1 ])
78- return df
70+ return type ( self )( df )
7971
8072 """ The following methods are are more standard """
81- def add_row (self , value : Optional [Any ]= None , ** kwargs ) -> None :
73+ def input2dict (self , * args , keys_only : bool = False , ** kwargs ) -> dict :
74+ """ args is assumed to list keys and then values,
75+ though some may be specified through kwargs """
76+ key_value_columns = self .key_names if keys_only else (self .key_names + self .value_names )
77+ assert len (args ) <= len (key_value_columns )
78+ assert all ({ key not in key_value_columns [:len (args )] for key in kwargs })
79+ assert (not keys_only ) or all ({ key in self .key_names for key in kwargs })
80+ to_value = lambda v , k : str (v ) if k in self .key_names else v
81+ kwargs = { k : to_value (v , k ) for k , v in kwargs .items () }
82+ return kwargs | { k : to_value (v , k ) for k , v in zip (key_value_columns [:len (args )], args ) }
83+
84+ def add_row (self , * args , ** kwargs ) -> None :
8285 self .index = list (range (len (self )))
83- kwargs = { k : (str (v ) if k in self .key_names else v ) for k , v in kwargs .items () }
84- self .loc [len (self )] = Series (kwargs ) if value is None else self .value2row (value , ** kwargs )
86+ self .loc [len (self )] = Series (self .input2dict (* args , ** kwargs ))
8587
8688 def get (self ,
8789 * args ,
8890 process : bool = True ,
8991 last_only : Optional [bool ]= None ,
9092 ** kwargs
9193 ) -> Union ["UnnamedDataFrame" , tuple ]:
92- assert len (args ) <= len (self .key_names )
93- assert all ({ key not in self .key_names [:len (args )] for key in kwargs })
94- kwargs = { k : str (v ) for k , v in kwargs .items () }
95- kwargs |= { key : str (value ) for key , value in zip (self .key_names [:len (args )], args ) }
94+ kwargs = self .input2dict (* args , keys_only = True , ** kwargs )
9695 df = self [reduce (lambda a , x : a & x , [ self [k ] == v for k , v in kwargs .items () ], True )]
97- key_names = [ n for n in self .key_names if n not in kwargs ]
96+ key_names = [ key_name for key_name in self .key_names if key_name not in kwargs ]
9897 if key_names or not process :
9998 return type (self )(df , key_names = key_names )
10099 return self .default_value if df .empty else self .df2value (df , last_only )
101100
102101 def __contains__ (self , * args , ** kwargs ) -> bool :
103- return not self .get (* args , ** kwargs ).empty
102+ return not self .get (* args , process = False , ** kwargs ).empty
104103
105- def set (self , value : Optional [ Any ] = None , * args , ** kwargs ) -> None :
106- assert len ( args ) <= len ( self . key_names )
107- assert all ({ key not in self . key_names [: len ( args )] for key in kwargs })
108- kwargs = { k : str ( v ) for k , v in kwargs . items () }
109- kwargs |= { k : str ( v ) for k , v in zip ( self .key_names [: len ( args )], args ) }
110- df = self .get (process = False , ** kwargs )
104+ def set (self , * args , ** kwargs ) -> None :
105+ """ args is assumed to list keys and then values,
106+ though some may be specified through kwargs """
107+ kwargs_keys_only = self . input2dict ( * args [: len ( self . key_names )], ** kwargs )
108+ kwargs = self .input2dict ( * args , ** kwargs )
109+ df = self .get (process = False , ** kwargs_keys_only )
111110 if df .empty :
112- self .add_row (value , ** kwargs )
111+ self .add_row (** kwargs )
113112 else : # Updates the last row of df
114113 name = df .iloc [- 1 ].name
115- self .loc [name ] = self . value2row ( value , ** kwargs )
114+ self .loc [name ] = Series ( kwargs )
116115
117116 def __or__ (self , other : "UnnamedDataFrame" ) -> "UnnamedDataFrame" :
118117 return type (self )(pd .concat ([self , other ]))
@@ -122,11 +121,26 @@ def load(cls, filename: str) -> "UnnamedDataFrame":
122121 try : return cls (pd .read_csv (filename , keep_default_na = False ))
123122 except pd .errors .EmptyDataError : return cls ()
124123
124+ def last_only (self ) -> "UnnamedDataFrame" :
125+ return type (self )(
126+ data = [ row for _ , row in self .iter (process = False , last_only = True ) ],
127+ key_names = self .key_names ,
128+ value_names = self .value_names ,
129+ name = self .meta .name ,
130+ default_value = self .meta ._default_value ,
131+ last_only = self .meta ._last_only ,
132+ )
133+
125134 def groupby (self , columns : Optional [list [str ]]= None , process : bool = True ) -> dict :
126135 return { key : value for key , value in self .iter (columns , process ) }
127136
128- def iter (self , columns : Optional [list [str ]]= None , process : bool = True ) -> Iterable :
137+ def iter (self ,
138+ columns : Optional [list [str ]]= None ,
139+ process : bool = True ,
140+ last_only : Optional [bool ]= None
141+ ) -> Iterable :
129142 columns = columns if columns else self .key_names
143+ last_only = self .meta ._last_only if last_only is None else last_only
130144 if columns is None :
131145 for _ , row in self .iterrows ():
132146 if process :
@@ -135,14 +149,15 @@ def iter(self, columns: Optional[list[str]]=None, process: bool=True) -> Iterabl
135149 yield row
136150 return None
137151 if not columns :
138- yield list (), self .df2value (self ) if process else self
152+ yield list (), self .df2value (self , last_only ) if process else self
139153 return None
140154 groups = DataFrame (self ).groupby (columns )
141155 kn = [ n for n in self .key_names if n not in columns ]
142156 for key in list (groups .groups .keys ()):
143157 key_tuple = key if isinstance (key , tuple ) else (key ,)
144158 df = groups .get_group (key_tuple )
145- yield key , type (self )(df , key_names = kn ) if kn or not process else self .df2value (df )
159+ v = type (self )(df , key_names = kn ) if kn or not process else self .df2value (df , last_only )
160+ yield key , v
146161
147162 def __iter__ (self , process : bool = True ) -> Iterable :
148163 return self .iter (process = process )
0 commit comments