Skip to content

Commit f88ac0a

Browse files
committed
WIP Moving Assessments and Comparisons to UnnamedDataFrame
1 parent e14bf83 commit f88ac0a

File tree

8 files changed

+86
-129
lines changed

8 files changed

+86
-129
lines changed

solidago/src/solidago/primitives/datastructure/unnamed_dataframe.py

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010

1111
class UnnamedDataFrame(DataFrame):
12+
row_cls: Optional[type]=None
13+
1214
def __init__(self,
1315
key_names: Optional[Union[str, list[str]]]=None,
1416
value_names: Optional[Union[str, list[str]]]=None,
@@ -19,14 +21,15 @@ def __init__(self,
1921
**kwargs
2022
):
2123
""" Defines a DataFrame wrapper """
22-
super().__init__(*args, **kwargs)
2324
to_list = lambda l: [l] if isinstance(l, str) else l
25+
key_names, value_names = to_list(key_names), to_list(value_names)
26+
columns = sum([ n if n else list() for n in (key_names, value_names) ], list())
27+
super().__init__(*args, **kwargs, columns=columns)
2428
self.meta = SimpleNamespace()
2529
self.meta.name = name
26-
self.meta.key_names, self.meta.value_names = to_list(key_names), to_list(value_names)
30+
self.meta.key_names, self.meta.value_names = key_names, value_names
2731
assert isinstance(self.key_names, list) or not self.key_names
2832
assert isinstance(self.value_names, list) or not self.value_names
29-
columns = sum([ n if n else list() for n in (self.key_names, self.value_names) ], list())
3033
for column in columns:
3134
if column not in self.columns:
3235
self[column] = float("nan")
@@ -46,17 +49,6 @@ def value_names(self):
4649
def default_value(self) -> Any:
4750
return self.meta._default_value
4851

49-
def value2row(self, value: Optional[Any]=None, **kwargs) -> Series:
50-
if value is None:
51-
value = dict()
52-
elif isinstance(value, (dict, Series)):
53-
value = { key: v for key, v in value.items() }
54-
elif isinstance(value, Iterable):
55-
value = { self.value_names[index]: v for index, v in enumerate(value) }
56-
else:
57-
value = { self.value_names[0]: value }
58-
return Series(kwargs | value)
59-
6052
def row2key(self, row: Series) -> Any:
6153
if not self.key_names:
6254
return row
@@ -66,7 +58,7 @@ def row2key(self, row: Series) -> Any:
6658

6759
def row2value(self, row: Series) -> Any:
6860
if not self.value_names:
69-
return row
61+
return row if row_cls is None else row_cls(row)
7062
if len(self.value_names) == 1:
7163
return row[self.value_names[0]]
7264
return tuple( row[name] for name in self.value_names )
@@ -75,44 +67,51 @@ def df2value(self, df: DataFrame, last_only: Optional[bool]=None) -> Any:
7567
last_only = self.meta._last_only if last_only is None else last_only
7668
if last_only:
7769
return self.row2value(df.iloc[-1])
78-
return df
70+
return type(self)(df)
7971

8072
""" The following methods are are more standard """
81-
def add_row(self, value: Optional[Any]=None, **kwargs) -> None:
73+
def input2dict(self, *args, keys_only: bool=False, **kwargs) -> dict:
74+
""" args is assumed to list keys and then values,
75+
though some may be specified through kwargs """
76+
key_value_columns = self.key_names if keys_only else (self.key_names + self.value_names)
77+
assert len(args) <= len(key_value_columns)
78+
assert all({ key not in key_value_columns[:len(args)] for key in kwargs })
79+
assert (not keys_only) or all({ key in self.key_names for key in kwargs })
80+
to_value = lambda v, k: str(v) if k in self.key_names else v
81+
kwargs = { k: to_value(v, k) for k, v in kwargs.items() }
82+
return kwargs | { k: to_value(v, k) for k, v in zip(key_value_columns[:len(args)], args) }
83+
84+
def add_row(self, *args, **kwargs) -> None:
8285
self.index = list(range(len(self)))
83-
kwargs = { k: (str(v) if k in self.key_names else v) for k, v in kwargs.items() }
84-
self.loc[len(self)] = Series(kwargs) if value is None else self.value2row(value, **kwargs)
86+
self.loc[len(self)] = Series(self.input2dict(*args, **kwargs))
8587

8688
def get(self,
8789
*args,
8890
process: bool=True,
8991
last_only: Optional[bool]=None,
9092
**kwargs
9193
) -> Union["UnnamedDataFrame", tuple]:
92-
assert len(args) <= len(self.key_names)
93-
assert all({ key not in self.key_names[:len(args)] for key in kwargs })
94-
kwargs = { k: str(v) for k, v in kwargs.items() }
95-
kwargs |= { key: str(value) for key, value in zip(self.key_names[:len(args)], args) }
94+
kwargs = self.input2dict(*args, keys_only=True, **kwargs)
9695
df = self[reduce(lambda a, x: a & x, [ self[k] == v for k, v in kwargs.items() ], True)]
97-
key_names = [ n for n in self.key_names if n not in kwargs ]
96+
key_names = [ key_name for key_name in self.key_names if key_name not in kwargs ]
9897
if key_names or not process:
9998
return type(self)(df, key_names=key_names)
10099
return self.default_value if df.empty else self.df2value(df, last_only)
101100

102101
def __contains__(self, *args, **kwargs) -> bool:
103-
return not self.get(*args, **kwargs).empty
102+
return not self.get(*args, process=False, **kwargs).empty
104103

105-
def set(self, value: Optional[Any]=None, *args, **kwargs) -> None:
106-
assert len(args) <= len(self.key_names)
107-
assert all({ key not in self.key_names[:len(args)] for key in kwargs })
108-
kwargs = { k: str(v) for k, v in kwargs.items() }
109-
kwargs |= { k: str(v) for k, v in zip(self.key_names[:len(args)], args) }
110-
df = self.get(process=False, **kwargs)
104+
def set(self, *args, **kwargs) -> None:
105+
""" args is assumed to list keys and then values,
106+
though some may be specified through kwargs """
107+
kwargs_keys_only = self.input2dict(*args[:len(self.key_names)], **kwargs)
108+
kwargs = self.input2dict(*args, **kwargs)
109+
df = self.get(process=False, **kwargs_keys_only)
111110
if df.empty:
112-
self.add_row(value, **kwargs)
111+
self.add_row(**kwargs)
113112
else: # Updates the last row of df
114113
name = df.iloc[-1].name
115-
self.loc[name] = self.value2row(value, **kwargs)
114+
self.loc[name] = Series(kwargs)
116115

117116
def __or__(self, other: "UnnamedDataFrame") -> "UnnamedDataFrame":
118117
return type(self)(pd.concat([self, other]))
@@ -122,11 +121,26 @@ def load(cls, filename: str) -> "UnnamedDataFrame":
122121
try: return cls(pd.read_csv(filename, keep_default_na=False))
123122
except pd.errors.EmptyDataError: return cls()
124123

124+
def last_only(self) -> "UnnamedDataFrame":
125+
return type(self)(
126+
data=[ row for _, row in self.iter(process=False, last_only=True) ],
127+
key_names=self.key_names,
128+
value_names=self.value_names,
129+
name=self.meta.name,
130+
default_value=self.meta._default_value,
131+
last_only=self.meta._last_only,
132+
)
133+
125134
def groupby(self, columns: Optional[list[str]]=None, process: bool=True) -> dict:
126135
return { key: value for key, value in self.iter(columns, process) }
127136

128-
def iter(self, columns: Optional[list[str]]=None, process: bool=True) -> Iterable:
137+
def iter(self,
138+
columns: Optional[list[str]]=None,
139+
process: bool=True,
140+
last_only: Optional[bool]=None
141+
) -> Iterable:
129142
columns = columns if columns else self.key_names
143+
last_only = self.meta._last_only if last_only is None else last_only
130144
if columns is None:
131145
for _, row in self.iterrows():
132146
if process:
@@ -135,14 +149,15 @@ def iter(self, columns: Optional[list[str]]=None, process: bool=True) -> Iterabl
135149
yield row
136150
return None
137151
if not columns:
138-
yield list(), self.df2value(self) if process else self
152+
yield list(), self.df2value(self, last_only) if process else self
139153
return None
140154
groups = DataFrame(self).groupby(columns)
141155
kn = [ n for n in self.key_names if n not in columns ]
142156
for key in list(groups.groups.keys()):
143157
key_tuple = key if isinstance(key, tuple) else (key,)
144158
df = groups.get_group(key_tuple)
145-
yield key, type(self)(df, key_names=kn) if kn or not process else self.df2value(df)
159+
v = type(self)(df, key_names=kn) if kn or not process else self.df2value(df, last_only)
160+
yield key, v
146161

147162
def __iter__(self, process: bool=True) -> Iterable:
148163
return self.iter(process=process)
Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,26 @@
1-
from typing import Optional, Union
1+
from typing import Optional, Union, Any
22
from pandas import DataFrame, Series
33

4-
from solidago.primitives.datastructure import NestedDictOfRowLists
4+
from solidago.primitives.datastructure import UnnamedDataFrame
55

66

77
class Assessment(Series):
88
def __init__(self, *args, **kwargs):
99
super().__init__(*args, **kwargs)
1010

1111

12-
class Assessments(NestedDictOfRowLists):
12+
class Assessments(UnnamedDataFrame):
1313
row_cls: type=Assessment
1414

1515
def __init__(self,
16-
d: Optional[Union[NestedDictOfRowLists, dict, DataFrame]]=None,
16+
data: Optional[Any]=None,
1717
key_names=["username", "criterion", "entity_name"],
18-
save_filename="assessments.csv"
18+
name="assessments",
19+
last_only=True,
20+
**kwargs
1921
):
20-
super().__init__(d, key_names, save_filename)
22+
super().__init__(key_names, None, name, None, last_only, data, **kwargs)
2123

2224
def get_evaluators(self, entity: Union[str, "Entity"]) -> set[str]:
23-
return self[{ "entity_name": entity }].get_set("username")
25+
return set(self.get(entity_name=entity)["username"])
2426

solidago/src/solidago/state/comparisons/base.py

Lines changed: 14 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,46 @@
11
import numpy as np
22

3-
from typing import Optional, Union, Mapping, Literal
3+
from typing import Optional, Union, Mapping, Literal, Any
44
from pandas import DataFrame, Series
55

6-
from solidago.primitives.datastructure import NestedDictOfRowLists
6+
from solidago.primitives.datastructure import UnnamedDataFrame
77

88

99
class Comparison(Series):
1010
def __init__(self, *args, **kwargs):
1111
super().__init__(*args, **kwargs)
1212

1313

14-
class Comparisons(NestedDictOfRowLists):
14+
class Comparisons(UnnamedDataFrame):
1515
row_cls: type=Comparison
1616

1717
def __init__(self,
18-
d: Optional[Union[NestedDictOfRowLists, dict, DataFrame]]=None,
18+
data: Optional[Any]=None,
1919
key_names=["username", "criterion", "left_name", "right_name"],
20-
save_filename="comparisons.csv"
20+
name="comparisons",
21+
last_only=True,
22+
**kwargs
2123
):
22-
super().__init__(d, key_names, save_filename)
24+
super().__init__(key_names, None, name, None, last_only, data, **kwargs)
2325

2426
def get_evaluators(self, entity: Union[str, "Entity"]) -> set[str]:
25-
evaluators = self[{ "left_name": entity }].get_set("username")
26-
return evaluators | self[{ "right_name": entity }].get_set("username")
27-
28-
def order_by_entities(self, other_keys_first: bool=False) -> "Comparisons":
29-
""" Returns an object Comparison, with the same set of comparisons,
30-
but now ordered by entities. Key names in self are replugged into the result,
31-
except for "left_name" and "right_name". Instead, an "other_name" is added
32-
to account for the other entity that the comparison is against.
33-
Moreover, we add an entry to each dict, which says whether "entity_name"
34-
was the left or the right video.
35-
36-
Returns
37-
-------
38-
ordered_comparisons: Comparisons
39-
With key_names == ["entity_name", "other_name", *] or [*, "entity_name", "other_name"]
40-
depending on parameter other_keys_first
41-
"""
42-
other_key_names = [
43-
kn for kn in self.key_names
44-
if kn not in ("entity_name", "other_name", "left_name", "right_name")
45-
]
46-
if other_keys_first:
47-
key_names = other_key_names + ["entity_name", "other_name"]
48-
else:
49-
key_names = ["entity_name", "other_name"] + other_key_names
50-
51-
if "entity_name" in self.key_names:
52-
return self.reorder_keys(key_names)
53-
assert "left_name" in self.key_names and "right_name" in self.key_names, "" \
54-
"Comparisons must have columns `left_name` and `right_name`"
55-
56-
def to_keys(non_entity_keys: list[str], entity_name: str, other_name: str) -> list[str]:
57-
if other_keys_first:
58-
return non_entity_keys + [entity_name, other_name]
59-
return [entity_name, other_name] + non_entity_keys
60-
61-
def invert(comparison):
62-
if "comparison" in comparison:
63-
comparison["comparison"] = - comparison["comparison"]
64-
return comparison
65-
66-
result = Comparisons(key_names=key_names)
67-
left_key_index = self.key_names.index("left_name")
68-
right_key_index = self.key_names.index("right_name")
69-
for keys, comparison in self:
70-
left_name, right_name = keys[left_key_index], keys[right_key_index]
71-
non_entity_keys = [
72-
key for index, key in enumerate(keys)
73-
if index not in (left_key_index, right_key_index)
74-
]
75-
new_comparison = dict(zip(self.key_names, keys)) | dict(comparison)
76-
result.add_row(
77-
to_keys(non_entity_keys, left_name, right_name),
78-
new_comparison | dict(location="left")
79-
)
80-
result.add_row(
81-
to_keys(non_entity_keys, right_name, left_name),
82-
invert(new_comparison) | dict(location="right")
83-
)
84-
return result
27+
evaluators = set(self.get(left_name=entity)["username"])
28+
return evaluators | set(self.get(right_name=entity)["username"])
8529

8630
def compared_entity_indices(self,
8731
entity_name2index: dict[str, int],
88-
last_comparison_only: bool=True,
32+
last_only: bool=True,
8933
) -> dict[str, list[int]]:
9034
key_indices = { loc: self.key_names.index(f"{loc}_name") for loc in ("left", "right") }
91-
returns = "last_row" if last_comparison_only else "rows"
9235
return {
9336
location: [
9437
entity_name2index[keys[key_indices[location]]]
95-
for keys, _ in self.iter(returns)
38+
for keys, _ in self.iter(last_only=last_only)
9639
] for location in ("left", "right")
9740
}
9841

99-
def normalized_comparisons(self, last_comparison_only: bool) -> Series:
100-
df = self.to_df(last_row_only=last_comparison_only)
101-
if df.empty:
102-
return Series()
103-
return df["comparison"] / df["comparison_max"]
42+
def normalized_comparisons(self) -> Series:
43+
return Series() if self.empty else self["comparison"] / self["comparison_max"]
10444

10545
def to_comparison_dict(self,
10646
entities: "Entities",

solidago/tests/modules/test_trust_propagation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def test_lipschitrust_simple():
1515
["0", "2", "Personhood", 1, 0],
1616
["2", "3", "Personhood", 1, 0],
1717
["3", "4", "Personhood", 1, 0]
18-
], columns=["by", "to", "kind", "weight", "priority"])
18+
])
1919
users = LipschiTrust(pretrust_value=0.8, decay=0.8, sink_vouch=5.0, error=1e-8)(users, vouches)
2020
assert users.get("0")["trust_score"] == 0.8
2121
assert users.get("4")["trust_score"] > 0
@@ -50,7 +50,7 @@ def test_lipschitrust_ten_users():
5050
["8", "3", "Personhood", 1, 0],
5151
["9", "4", "Personhood", 1, 0],
5252
["9", "5", "Personhood", 1, 0],
53-
], columns=["by", "to", "kind", "weight", "priority"])
53+
])
5454

5555
trust_propagator = LipschiTrust(pretrust_value=0.8, decay=0.8, sink_vouch=5.0, error=1e-8)
5656
users = trust_propagator(users, vouches)

solidago/tests/modules/test_voting_right_assignment.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,11 @@ def test_affine_overtrust():
178178
users = Users(dict(username=list(range(5)), trust_score=[0.5, 0.6, 0.0, 0.4, 1]))
179179
entities = Entities(list(range(6)))
180180
made_public = MadePublic()
181-
made_public["0", "0"] = True
182-
made_public["0", "3"] = True
183-
made_public["1", "5"] = True
184-
made_public["2", "1"] = True
185-
made_public["4", "3"] = True
181+
made_public.set(True, "0", "0")
182+
made_public.set(True, "0", "3")
183+
made_public.set(True, "1", "5")
184+
made_public.set(True, "2", "1")
185+
made_public.set(True, "4", "3")
186186

187187
assessments = Assessments()
188188
comparisons = Comparisons()

solidago/tests/state/test_made_public.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
def test_made_public():
66
made_public = MadePublic()
7-
made_public.set(False, "aidjango", "entity_4")
8-
made_public.set(True, "le_science4all", "entity_4")
7+
made_public.set("aidjango", "entity_4", False)
8+
made_public.set("le_science4all", "entity_4", True)
99
assert not made_public.get("aidjango", "entity_4")
1010
assert made_public.get("le_science4all", "entity_4")

solidago/tests/state/test_voting_rights.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
def test_voting_rights():
66
voting_rights = VotingRights()
7-
voting_rights.set(0.5, "aidjango", "entity_4", "default")
8-
voting_rights.set(1., "le_science4all", "entity_4", "largely_recommended")
7+
voting_rights.set("aidjango", "entity_4", "default", 0.5)
8+
voting_rights.set("le_science4all", "entity_4", "largely_recommended", 1)
99
assert voting_rights.get("aidjango", "entity_4", "default") == 0.5
1010
assert voting_rights.get("le_science4all", "entity_4", "largely_recommended") == 1
1111
assert voting_rights.get("le_science4all", "entity_4", "default") == 0

solidago/tests/state/test_vouches.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@
44

55
def test_vouches():
66
vouches = Vouches()
7-
vouches.set((0.5, 0), "aidjango", "le_science4all", "Personhood")
7+
vouches.set("aidjango", "le_science4all", "Personhood", 0.5, 0)
88
assert vouches.get("aidjango", "le_science4all", "Personhood") == (0.5, 0)
99
assert vouches.get("le_science4all", "aidjango", "Personhood") == (0, - float("inf"))

0 commit comments

Comments
 (0)