Skip to content

Commit b173353

Browse files
authored
Merge pull request #67 from UDST/mnl_w_mct_df
New MergedChoiceTable feature: `from_df()` construction
2 parents 54c936d + f1ec684 commit b173353

File tree

8 files changed

+134
-47
lines changed

8 files changed

+134
-47
lines changed

.travis.yml

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,13 @@ python:
44
- "2.7"
55
- "3.5"
66
- "3.6"
7-
8-
matrix:
9-
include:
10-
- python: "3.7" # temp solution until travis supports python 3.7 more cleanly
11-
dist: xenial
12-
sudo: true
7+
- "3.7"
8+
- "3.8"
139

1410
install:
1511
- pip install .
1612
- pip install -r requirements-dev.txt
17-
- # extra tests run if urbansim is present, but it can't install with python 3.7
18-
- if [ "$TRAVIS_PYTHON_VERSION" != "3.7" ]; then pip install urbansim; fi
13+
- pip install orca urbansim # extra tests run if urbansim is present
1914
- pip list
2015
- pip show choicemodels
2116

CHANGELOG.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
# ChoiceModels change log
2-
### 0.2.2dev0 (2019-04-23)
32

4-
- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than seqeuentially.
3+
### 0.2.2.dev1 (2020-04-14)
4+
5+
- adds a `MergedChoiceTable.from_df()` as an alternative constructor
6+
7+
### 0.2.2.dev0 (2019-04-23)
8+
9+
- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than sequentially
510

611
### 0.2.1 (2019-01-30)
712

choicemodels/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
from .mnl import MultinomialLogit, MultinomialLogitResults
55

6-
version = __version__ = '0.2.2dev0'
6+
version = __version__ = '0.2.2.dev1'

choicemodels/tools/mergedchoicetable.py

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,6 @@ def __init__(self, observations, alternatives, chosen_alternatives=None,
114114
raise ValueError("Cannot sample without replacement with sample_size {} "
115115
"and n_alts {}".format(sample_size, alternatives.shape[0]))
116116

117-
if (observations.index.name == None):
118-
observations.index.name = 'obs_id'
119-
120-
if (alternatives.index.name == None):
121-
alternatives.index.name = 'alt_id'
122-
123117
# TO DO - check that dfs have unique indexes
124118
# TO DO - check that chosen_alternatives correspond correctly to other dfs
125119
# TO DO - same with weights (could join onto other tables and then split off)
@@ -130,14 +124,25 @@ def __init__(self, observations, alternatives, chosen_alternatives=None,
130124
observations = observations.drop(chosen_alternatives.name, axis='columns')
131125
chosen_alternatives.name = '_' + alternatives.index.name # avoids conflicts
132126

133-
# Check for duplicate column names
134-
obs_cols = list(observations.columns) + list(observations.index.names)
135-
alt_cols = list(alternatives.columns) + list(alternatives.index.names)
136-
dupes = set(obs_cols) & set(alt_cols)
127+
# Allow missing obs and alts, to support .from_df() constructor
128+
if (observations is not None):
129+
130+
# Provide default names for observation and alternatives id's
131+
132+
if (observations.index.name == None):
133+
observations.index.name = 'obs_id'
134+
135+
if (alternatives.index.name == None):
136+
alternatives.index.name = 'alt_id'
137+
138+
# Check for duplicate column names
139+
obs_cols = list(observations.columns) + list(observations.index.names)
140+
alt_cols = list(alternatives.columns) + list(alternatives.index.names)
141+
dupes = set(obs_cols) & set(alt_cols)
137142

138-
if len(dupes) > 0:
139-
raise ValueError("Both input tables contain column {}. Please ensure "
140-
"column names are unique before merging".format(dupes))
143+
if len(dupes) > 0:
144+
raise ValueError("Both input tables contain column {}. Please ensure "
145+
"column names are unique before merging".format(dupes))
141146

142147
# Normalize weights to a pd.Series
143148
if (weights is not None) & isinstance(weights, str):
@@ -172,17 +177,48 @@ def __init__(self, observations, alternatives, chosen_alternatives=None,
172177
self.weights_2d = weights_2d
173178

174179
# Build choice table...
180+
# Allow missing obs and alts, to support .from_df() constructor
181+
if (observations is not None):
175182

176-
if (len(observations) == 0) or (len(alternatives) == 0):
177-
self._merged_table = pd.DataFrame()
183+
if (len(observations) == 0) or (len(alternatives) == 0):
184+
self._merged_table = pd.DataFrame()
178185

179-
elif (sample_size is None):
180-
self._merged_table = self._build_table_without_sampling()
186+
elif (sample_size is None):
187+
self._merged_table = self._build_table_without_sampling()
181188

182-
else:
183-
self._merged_table = self._build_table()
189+
else:
190+
self._merged_table = self._build_table()
184191

185192

193+
@classmethod
194+
def from_df(cls, df):
195+
"""
196+
Create a MergedChoiceTable instance from a pre-generated DataFrame.
197+
198+
Each chooser's rows should be contiguous. If applicable, the chosen alternative
199+
should be listed first. This ordering is used by MergedChoiceTable.to_frame(),
200+
and appears to be an undocumented requirement of the legacy MNL code.
201+
202+
Parameters
203+
----------
204+
df : pandas.DataFrame
205+
Table with a two-level MultiIndex where the first level corresponds to the
206+
index of the observations and the second to the index of the alternatives.
207+
May include a binary column named 'chosen' indicating observed choices.
208+
209+
Returns
210+
-------
211+
MergedChoiceTable
212+
213+
"""
214+
obj = cls(observations = None, alternatives = None)
215+
obj._merged_table = df
216+
217+
# TO DO: sort the dataframe so that rows are automatically in a consistent order
218+
219+
return obj
220+
221+
186222
def _merge_interaction_terms(self, df):
187223
"""
188224
Merges interaction terms (if they exist) onto the input DataFrame.
@@ -436,7 +472,7 @@ def observation_id_col(self):
436472
str
437473
438474
"""
439-
return self.observations.index.name
475+
return self._merged_table.index.names[0]
440476

441477

442478
@property
@@ -450,7 +486,7 @@ def alternative_id_col(self):
450486
str
451487
452488
"""
453-
return self.alternatives.index.name
489+
return self._merged_table.index.names[1]
454490

455491

456492
@property
@@ -464,7 +500,7 @@ def choice_col(self):
464500
str or None
465501
466502
"""
467-
if (self.chosen_alternatives is not None):
503+
if ('chosen' in self._merged_table.columns):
468504
return 'chosen'
469505

470506
else:

docs/source/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ ChoiceModels
88

99
ChoiceModels is a Python library for discrete choice modeling, with utilities for sampling, simulation, and other ancillary tasks. It's part of the `Urban Data Science Toolkit <https://docs.udst.org>`__ (UDST).
1010

11-
v0.2.2dev0, released April 23, 2019
11+
v0.2.2.dev1, released April 14, 2020
1212

1313

1414
Contents

requirements.txt

Lines changed: 0 additions & 7 deletions
This file was deleted.

setup.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,9 @@
44
with open('README.md', 'r') as f:
55
long_description = f.read()
66

7-
with open('requirements.txt') as f:
8-
install_requires = f.readlines()
9-
install_requires = [item.strip() for item in install_requires]
10-
117
setup(
128
name='choicemodels',
13-
version='0.2.2dev0',
9+
version='0.2.2.dev1',
1410
description='Tools for discrete choice estimation',
1511
long_description=long_description,
1612
author='UDST',
@@ -23,8 +19,18 @@
2319
'Programming Language :: Python :: 3',
2420
'Programming Language :: Python :: 3.5',
2521
'Programming Language :: Python :: 3.6',
22+
'Programming Language :: Python :: 3.7',
23+
'Programming Language :: Python :: 3.8',
2624
'License :: OSI Approved :: BSD License'
2725
],
2826
packages=['choicemodels', 'choicemodels.tools'],
29-
install_requires=install_requires
27+
install_requires=[
28+
'numpy >= 1.14',
29+
'pandas >= 0.23',
30+
'patsy >= 0.5',
31+
'pylogit >= 0.2.2',
32+
'scipy >= 1.0',
33+
'statsmodels >= 0.8, <0.11; python_version <"3.6"',
34+
'statsmodels >= 0.8; python_version >="3.6"'
35+
]
3036
)

tests/test_mct.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,55 @@ def test_join_key_name_conflict(obs, alts):
228228
MergedChoiceTable(obs, alts, chosen_alternatives=alts.index.name)
229229

230230

231+
def test_obs_id_property(obs, alts):
232+
"""
233+
Observation id should be available for a merged table.
234+
235+
"""
236+
mct = choicemodels.tools.MergedChoiceTable(obs, alts,
237+
sample_size = 2,
238+
chosen_alternatives = 'choice')
239+
240+
assert(mct.observation_id_col == 'oid')
241+
242+
243+
def test_alt_id_property(obs, alts):
244+
"""
245+
Alternative id should be available for a merged table.
246+
247+
"""
248+
mct = choicemodels.tools.MergedChoiceTable(obs, alts,
249+
sample_size = 2,
250+
chosen_alternatives = 'choice')
251+
252+
assert(mct.alternative_id_col == 'aid')
253+
254+
255+
def test_choice_col_property(obs, alts):
256+
"""
257+
Choice column property should be present if applicable, or None.
258+
259+
"""
260+
mct = choicemodels.tools.MergedChoiceTable(obs, alts,
261+
sample_size = 2,
262+
chosen_alternatives = 'choice')
263+
assert(mct.choice_col == 'chosen')
264+
265+
mct = choicemodels.tools.MergedChoiceTable(obs, alts,
266+
sample_size = 2)
267+
assert(mct.choice_col == None)
268+
269+
270+
def test_from_df(obs, alts):
271+
"""
272+
MCT creation from a dataframe should work smoothly.
273+
274+
"""
275+
df = choicemodels.tools.MergedChoiceTable(obs, alts,
276+
sample_size = 2,
277+
chosen_alternatives = 'choice').to_frame()
278+
279+
mct = choicemodels.tools.MergedChoiceTable.from_df(df)
280+
281+
assert(df.equals(mct.to_frame()))
282+

0 commit comments

Comments
 (0)