Skip to content

Commit fed83b4

Browse files
committed
Merge branch 'develop' of github.com:microsoft/interpret into develop
2 parents 6cb82eb + 30066a1 commit fed83b4

28 files changed

+1358
-1132
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ enc_temp_folder/
1818
*.egg-info/
1919
*.ipynb_checkpoints/
2020
venv/
21+
.pytest_cache/
2122

2223
# General
2324
*.bak

R/DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: interpret
22
Title: Fit Interpretable Models and Explain Blackbox Machine Learning
3-
Version: 0.1.21
4-
Date: 2019-10-07
3+
Version: 0.1.22
4+
Date: 2019-10-10
55
Description: Machine Learning package for training interpretable models and explaining blackbox systems. Historically, the most intelligible models were not very accurate, and the most accurate models were not intelligible. Microsoft Research has developed an algorithm called the Explainable Boosting Machine (EBM) which has both high accuracy and intelligibility. EBM uses machine learning techniques like bagging and boosting to breathe new life into traditional GAMs (Generalized Additive Models). This makes them as accurate as random forests and gradient boosted trees, and also enhances their intelligibility and editability. Details on the EBM algorithm can be found in the paper by Rich Caruana, Yin Lou, Johannes Gehrke, Paul Koch, Marc Sturm, and Noemie Elhadad (2015, <doi:10.1145/2783258.2788613>).
66
URL: https://github.com/microsoft/interpret
77
BugReports: https://github.com/microsoft/interpret/issues

R/src/Makevars

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
COREDIR=../../core
66

77
CXX_STD = CXX11
8-
PKG_CPPFLAGS= -I$(COREDIR) -I$(COREDIR)/inc -DEBMCORE_R
8+
PKG_CPPFLAGS= -I$(COREDIR) -I$(COREDIR)/inc -DEBMCORE_R -DEBMCORE_EXPORTS
99
PKG_CXXFLAGS=$(CXX_VISIBILITY)
1010

1111
OBJECTS = interpret_R.o $(COREDIR)/DataSetByFeature.o $(COREDIR)/DataSetByFeatureCombination.o $(COREDIR)/InteractionDetection.o $(COREDIR)/Logging.o $(COREDIR)/SamplingWithReplacement.o $(COREDIR)/Training.o

R/src/interpret_R.cpp

Lines changed: 64 additions & 67 deletions
Large diffs are not rendered by default.

azure-pipelines.yml

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,29 @@ jobs:
44
matrix:
55
Linux:
66
image.name: 'ubuntu-16.04'
7-
Windows:
8-
image.name: 'windows-2019'
97
Mac:
108
image.name: 'macOS-10.13'
9+
Windows:
10+
image.name: 'windows-2019'
1111
maxParallel: 3
1212
pool:
1313
vmImage: '$(image.name)'
1414
steps:
1515
- script: |
16+
sudo apt-get -y update
1617
sudo apt-get -y install g++-multilib
17-
displayName: 'Install prereq for linux.'
18-
condition: in(variables['image.name'], 'ubuntu-16.04')
18+
/bin/sh ./build.sh -32bit
19+
condition: startsWith(variables['image.name'], 'ubuntu')
20+
displayName: 'Building native code (Linux)'
1921
- script: |
20-
chmod +x build.sh
21-
./build.sh -32bit
22-
condition: in(variables['image.name'], 'ubuntu-16.04', 'macOS-10.13')
23-
displayName: 'Building/moving native code (linux/mac).'
22+
/bin/sh ./build.sh -32bit
23+
condition: startsWith(variables['image.name'], 'macOS')
24+
displayName: 'Building native code (Mac)'
2425
- script: |
25-
set PATH=C:\Windows\system32\;C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\
26-
build.bat -32bit
27-
condition: in(variables['image.name'], 'windows-2019')
28-
displayName: 'Building/moving native code. (win)'
26+
set PATH=%PATH%;C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\
27+
.\build.bat -32bit
28+
condition: startsWith(variables['image.name'], 'windows')
29+
displayName: 'Building native code (Windows)'
2930
- task: CopyFiles@2
3031
condition: succeeded()
3132
inputs:
@@ -34,6 +35,7 @@ jobs:
3435
targetFolder: '$(Build.ArtifactStagingDirectory)'
3536
displayName: 'Move binary to staging'
3637
- task: PublishBuildArtifacts@1
38+
condition: succeeded()
3739
inputs:
3840
pathtoPublish: $(Build.ArtifactStagingDirectory)
3941
artifactName: native-$(image.name)
@@ -45,10 +47,10 @@ jobs:
4547
matrix:
4648
Linux:
4749
image.name: 'ubuntu-16.04'
48-
Windows:
49-
image.name: 'windows-2019'
5050
Mac:
5151
image.name: 'macOS-10.13'
52+
Windows:
53+
image.name: 'windows-2019'
5254
maxParallel: 3
5355
pool:
5456
vmImage: '$(image.name)'
@@ -59,25 +61,28 @@ jobs:
5961
downloadType: 'specific'
6062
displayName: 'Download build artifacts'
6163
- task: CopyFiles@2
64+
condition: succeeded()
6265
inputs:
6366
sourceFolder: '$(System.ArtifactsDirectory)'
6467
contents: '**/lib_ebmcore_*'
6568
targetFolder: 'staging'
6669
flattenFolders: true
6770
displayName: 'Move binary to staging directory'
6871
- script: |
72+
sudo apt-get -y update
6973
sudo apt-get -y install g++-multilib
70-
displayName: 'Install prereq for linux.'
71-
condition: in(variables['image.name'], 'ubuntu-16.04')
74+
/bin/sh ./tests/core/test_core_api.sh -nobuildcore
75+
condition: startsWith(variables['image.name'], 'ubuntu')
76+
displayName: 'Testing native code (Linux)'
7277
- script: |
7378
/bin/sh ./tests/core/test_core_api.sh -nobuildcore
74-
condition: in(variables['image.name'], 'ubuntu-16.04', 'macOS-10.13')
75-
displayName: 'Testing native code (linux/mac).'
79+
condition: startsWith(variables['image.name'], 'macOS')
80+
displayName: 'Testing native code (Mac)'
7681
- script: |
77-
set PATH=C:\Windows\system32\;C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\
82+
set PATH=%PATH%;C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\
7883
.\tests\core\test_core_api.bat -nobuildcore
79-
condition: in(variables['image.name'], 'windows-2019')
80-
displayName: 'Testing native code. (win)'
84+
condition: startsWith(variables['image.name'], 'windows')
85+
displayName: 'Testing native code (Windows)'
8186
8287
- job: 'Build_JS_Inline'
8388
pool:
@@ -98,10 +103,11 @@ jobs:
98103
targetFolder: '$(Build.ArtifactStagingDirectory)'
99104
displayName: 'Move assets to staging'
100105
- task: PublishBuildArtifacts@1
106+
condition: succeeded()
101107
inputs:
102108
pathtoPublish: $(Build.ArtifactStagingDirectory)
103109
artifactName: interpret-inline-bundle
104-
displayName: 'Publish interpret inline library'
110+
displayName: 'Publish interpret-inline.js library'
105111

106112
- job: 'Build_Package'
107113
dependsOn: ['Build_Native', 'Build_JS_Inline']
@@ -114,25 +120,29 @@ jobs:
114120
downloadType: 'specific'
115121
displayName: 'Download build artifacts'
116122
- task: UsePythonVersion@0
123+
condition: succeeded()
117124
inputs:
118125
versionSpec: '3.6'
119126
architecture: 'x64'
120127
- script: python -m pip install --upgrade pip setuptools wheel
128+
condition: succeeded()
121129
displayName: 'Install tools'
122130
- task: CopyFiles@2
131+
condition: succeeded()
123132
inputs:
124133
sourceFolder: '$(System.ArtifactsDirectory)'
125-
contents: '**/lib_ebmcore_*'
134+
contents: '**/lib_ebmcore_*_x64*'
126135
targetFolder: 'python/interpret-core/interpret/lib'
127136
flattenFolders: true
128137
displayName: 'Move binary to Python layer'
129138
- task: CopyFiles@2
139+
condition: succeeded()
130140
inputs:
131141
sourceFolder: '$(System.ArtifactsDirectory)'
132142
contents: '**/interpret-inline.js'
133143
targetFolder: 'python/interpret-core/interpret/lib'
134144
flattenFolders: true
135-
displayName: 'Move Interpret inline to Python layer'
145+
displayName: 'Move interpret-inline.js to Python layer'
136146
- script: python setup.py bdist_wheel -d ../../staging
137147
condition: succeeded()
138148
workingDirectory: 'python/interpret-core'
@@ -149,6 +159,7 @@ jobs:
149159
targetFolder: '$(Build.ArtifactStagingDirectory)/wheel'
150160
displayName: 'Move wheel for Build Artifact'
151161
- task: PublishBuildArtifacts@1
162+
condition: succeeded()
152163
inputs:
153164
pathtoPublish: '$(Build.ArtifactStagingDirectory)/wheel'
154165
artifactName: 'wheel'
@@ -176,9 +187,9 @@ jobs:
176187
WindowsPython37:
177188
python.version: '3.7'
178189
image.name: 'windows-2019'
179-
# MacPython35:
180-
# python.version: '3.5'
181-
# image.name: 'macOS-10.13'
190+
MacPython35:
191+
python.version: '3.5'
192+
image.name: 'macOS-10.13'
182193
MacPython36:
183194
python.version: '3.6'
184195
image.name: 'macOS-10.13'
@@ -195,57 +206,53 @@ jobs:
195206
downloadType: 'specific'
196207
displayName: 'Download build artifacts'
197208
- task: CopyFiles@2
209+
condition: succeeded()
198210
inputs:
199211
sourceFolder: '$(System.ArtifactsDirectory)'
200212
contents: '**/lib_ebmcore_*'
201213
targetFolder: 'python/interpret-core/interpret/lib'
202214
flattenFolders: true
203215
displayName: 'Move binary to Python layer'
204216
- task: CopyFiles@2
217+
condition: succeeded()
205218
inputs:
206219
sourceFolder: '$(System.ArtifactsDirectory)'
207220
contents: '**/interpret-inline.js'
208221
targetFolder: 'python/interpret-core/interpret/lib'
209222
flattenFolders: true
210223
displayName: 'Move Interpret inline to Python layer'
211224
- task: UsePythonVersion@0
225+
condition: succeeded()
212226
inputs:
213227
versionSpec: '$(python.version)'
214228
architecture: 'x64'
215229
- script: python -m pip install --upgrade pip setuptools wheel
230+
condition: succeeded()
216231
displayName: 'Install tools'
217-
- script: |
218-
python -m pip install numpy scipy
232+
- script: python -m pip install numpy scipy
233+
condition: succeeded()
219234
workingDirectory: python/interpret-core
220235
displayName: 'Install numpy/scipy first for mis-specified pip packages.'
221236
- script: |
222237
mkdir -p ~/.matplotlib
223238
echo "backend: TkAgg" >> ~/.matplotlib/matplotlibrc
224-
condition: in(variables['image.name'], 'macOS-10.13')
239+
condition: startsWith(variables['image.name'], 'macOS')
225240
displayName: 'Matplotlib patch for mac.'
226-
- script: |
227-
python -m pip install -r dev-requirements.txt
228-
workingDirectory: python/interpret-core
229-
displayName: 'Install requirements (Windows)'
230-
condition: in(variables['image.name'], 'windows-2019')
231-
- script: |
232-
export LIBRARY_PATH=$LIBRARY_PATH:/opt/hostedtoolcache/Python/3.6.9/x64/lib
233-
export LIBRARY_PATH=$LIBRARY_PATH:/opt/hostedtoolcache/Python/3.7.4/x64/lib
234-
python -m pip install -r dev-requirements.txt
241+
- script: python -m pip install -r dev-requirements.txt
242+
condition: succeeded()
235243
workingDirectory: python/interpret-core
236-
displayName: 'Install requirements (Non-Windows)'
237-
condition: in(variables['image.name'], 'ubuntu-16.04', 'macOS-10.13')
244+
displayName: 'Install requirements (Linux/Mac/Windows)'
238245
- script: |
239246
set PATH=%PATH%;%GeckoWebDriver%
240247
python -m pytest -vv -n auto --runslow --runselenium --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html
241248
workingDirectory: python/interpret-core
242-
condition: in(variables['image.name'], 'windows-2019')
249+
condition: startsWith(variables['image.name'], 'windows')
243250
displayName: 'Run pytest (Windows)'
244251
- script: |
245252
python -m pytest -vv -n auto --runslow --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html
246253
workingDirectory: python/interpret-core
247-
condition: in(variables['image.name'], 'ubuntu-16.04', 'macOS-10.13')
248-
displayName: 'Run pytest (Non-Windows)'
254+
condition: or(startsWith(variables['image.name'], 'ubuntu'), startsWith(variables['image.name'], 'macOS'))
255+
displayName: 'Run pytest (Linux/Mac)'
249256
- task: PublishTestResults@2
250257
condition: succeededOrFailed()
251258
inputs:
@@ -257,13 +264,11 @@ jobs:
257264
codeCoverageTool: Cobertura
258265
summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
259266
reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'
260-
condition: in(variables['image.name'], 'windows-2019')
267+
condition: startsWith(variables['image.name'], 'windows')
261268
displayName: 'Publish test coverage results'
262269

263270
- job: 'Publish_Package'
264-
dependsOn:
265-
- 'Test'
266-
- 'Test_Native'
271+
dependsOn: ['Test_Native', 'Test']
267272
pool:
268273
vmImage: 'ubuntu-16.04'
269274
steps:

core/CachedThreadResources.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#ifndef CACHED_THREAD_RESOURCES_H
66
#define CACHED_THREAD_RESOURCES_H
77

8+
#include <vector>
89
#include <queue>
910
#include <stdlib.h> // malloc, realloc, free
1011
#include <stddef.h> // size_t, ptrdiff_t
@@ -19,7 +20,7 @@ class CompareTreeNodeSplittingGain final {
1920
public:
2021
// TODO : check how efficient this is. Is there a faster way to to this
2122
constexpr bool operator() (const TreeNode<bClassification> * const & lhs, const TreeNode<bClassification> * const & rhs) const {
22-
return rhs->m_UNION.afterExaminationForPossibleSplitting.splitGain < lhs->m_UNION.afterExaminationForPossibleSplitting.splitGain;
23+
return rhs->m_UNION.m_afterExaminationForPossibleSplitting.m_splitGain < lhs->m_UNION.m_afterExaminationForPossibleSplitting.m_splitGain;
2324
}
2425
};
2526

@@ -103,15 +104,15 @@ class CachedTrainingThreadResources {
103104
return m_aThreadByteBuffer1;
104105
}
105106

106-
// TODO : we can probably avoid redoing any tree growing IF realloc doesn't move the memory since all the internal pointers would still be valid in that case
107107
EBM_INLINE bool GrowThreadByteBuffer2(const size_t cByteBoundaries) {
108108
// by adding cByteBoundaries and shifting our existing size, we do 2 things:
109109
// 1) we ensure that if we have zero size, we'll get some size that we'll get a non-zero size after the shift
110110
// 2) we'll always get back an odd number of items, which is good because we always have an odd number of TreeNodeChilden
111111
EBM_ASSERT(0 == m_cThreadByteBufferCapacity2 % cByteBoundaries);
112112
m_cThreadByteBufferCapacity2 = cByteBoundaries + (m_cThreadByteBufferCapacity2 << 1);
113113
LOG_N(TraceLevelInfo, "Growing CachedTrainingThreadResources::ThreadByteBuffer2 to %zu", m_cThreadByteBufferCapacity2);
114-
// TODO : can we use malloc here? We only need realloc if we need to keep the existing data
114+
// TODO : use malloc here. our tree objects have internal pointers, so we're going to dispose of our work anyways
115+
// There is no way to check if the array was re-allocated or not without invoking undefined behavior, so we don't get a benefit if the array can be resized with realloc
115116
void * const aNewThreadByteBuffer = realloc(m_aThreadByteBuffer2, m_cThreadByteBufferCapacity2);
116117
if(UNLIKELY(nullptr == aNewThreadByteBuffer)) {
117118
// according to the realloc spec, if realloc fails to allocate the new memory, it returns nullptr BUT the old memory is valid.

core/DataSetByFeature.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
// Licensed under the MIT license.
33
// Author: Paul Koch <[email protected]>
44

5-
#ifndef DATA_SET_INTERNAL_H
6-
#define DATA_SET_INTERNAL_H
5+
#ifndef DATA_SET_BY_FEATURE_H
6+
#define DATA_SET_BY_FEATURE_H
77

88
#include <stddef.h> // size_t, ptrdiff_t
99

@@ -12,7 +12,6 @@
1212
#include "Logging.h" // EBM_ASSERT & LOG
1313
#include "FeatureCore.h"
1414

15-
// TODO: rename this to DataSetByFeature
1615
class DataSetByFeature final {
1716
const FractionalDataType * const m_aResidualErrors;
1817
const StorageDataTypeCore * const * const m_aaInputData;
@@ -32,9 +31,8 @@ class DataSetByFeature final {
3231
EBM_ASSERT(nullptr != m_aResidualErrors);
3332
return m_aResidualErrors;
3433
}
35-
// TODO: we can change this to take the m_iInputData value directly, which we get from the user! (this also applies to the other dataset)
36-
// TODO: rename this to GetInputDataPointer
37-
EBM_INLINE const StorageDataTypeCore * GetDataPointer(const FeatureCore * const pFeature) const {
34+
// TODO: we can change this to take the m_iFeatureData value directly, which we get from a loop index
35+
EBM_INLINE const StorageDataTypeCore * GetInputDataPointer(const FeatureCore * const pFeature) const {
3836
EBM_ASSERT(nullptr != pFeature);
3937
EBM_ASSERT(pFeature->m_iFeatureData < m_cFeatures);
4038
EBM_ASSERT(nullptr != m_aaInputData);
@@ -48,4 +46,4 @@ class DataSetByFeature final {
4846
}
4947
};
5048

51-
#endif // DATA_SET_INTERNAL_H
49+
#endif // DATA_SET_BY_FEATURE_H

core/DataSetByFeatureCombination.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ EBM_INLINE static const StorageDataTypeCore * const * ConstructInputData(const s
183183

184184
EBM_ASSERT(nullptr != aInputDataFrom);
185185

186-
const FeatureCombinationCore::FeatureCombinationEntry * pFeatureCombinationEntry = &pFeatureCombination->m_FeatureCombinationEntry[0];
186+
const FeatureCombinationCore::FeatureCombinationEntry * pFeatureCombinationEntry = ARRAY_TO_POINTER_CONST(pFeatureCombination->m_FeatureCombinationEntry);
187187
InputDataPointerAndCountBins dimensionInfo[k_cDimensionsMax];
188188
InputDataPointerAndCountBins * pDimensionInfo = &dimensionInfo[0];
189189
EBM_ASSERT(0 < cFeatures);
@@ -263,28 +263,25 @@ EBM_INLINE static const StorageDataTypeCore * const * ConstructInputData(const s
263263
}
264264

265265
DataSetByFeatureCombination::DataSetByFeatureCombination(const bool bAllocateResidualErrors, const bool bAllocatePredictorScores, const bool bAllocateTargetData, const size_t cFeatureCombinations, const FeatureCombinationCore * const * const apFeatureCombination, const size_t cInstances, const IntegerDataType * const aInputDataFrom, const void * const aTargets, const FractionalDataType * const aPredictorScoresFrom, const size_t cVectorLength)
266-
: m_aResidualErrors(bAllocateResidualErrors ? ConstructResidualErrors(cInstances, cVectorLength) : static_cast<FractionalDataType *>(INVALID_POINTER))
267-
, m_aPredictorScores(bAllocatePredictorScores ? ConstructPredictorScores(cInstances, cVectorLength, aPredictorScoresFrom) : static_cast<FractionalDataType *>(INVALID_POINTER))
268-
, m_aTargetData(bAllocateTargetData ? ConstructTargetData(cInstances, static_cast<const IntegerDataType *>(aTargets)) : static_cast<const StorageDataTypeCore *>(INVALID_POINTER))
266+
: m_aResidualErrors(bAllocateResidualErrors ? ConstructResidualErrors(cInstances, cVectorLength) : static_cast<FractionalDataType *>(nullptr))
267+
, m_aPredictorScores(bAllocatePredictorScores ? ConstructPredictorScores(cInstances, cVectorLength, aPredictorScoresFrom) : static_cast<FractionalDataType *>(nullptr))
268+
, m_aTargetData(bAllocateTargetData ? ConstructTargetData(cInstances, static_cast<const IntegerDataType *>(aTargets)) : static_cast<const StorageDataTypeCore *>(nullptr))
269269
, m_aaInputData(0 == cFeatureCombinations ? nullptr : ConstructInputData(cFeatureCombinations, apFeatureCombination, cInstances, aInputDataFrom))
270270
, m_cInstances(cInstances)
271-
, m_cFeatureCombinations(cFeatureCombinations) {
272-
271+
, m_cFeatureCombinations(cFeatureCombinations)
272+
, m_bAllocateResidualErrors(bAllocateResidualErrors)
273+
, m_bAllocatePredictorScores(bAllocatePredictorScores)
274+
, m_bAllocateTargetData(bAllocateTargetData) {
273275
EBM_ASSERT(0 < cInstances);
274276
}
275277

276278
DataSetByFeatureCombination::~DataSetByFeatureCombination() {
277279
LOG_0(TraceLevelInfo, "Entered ~DataSetByFeatureCombination");
278280

279-
if(INVALID_POINTER != m_aResidualErrors) {
280-
free(m_aResidualErrors);
281-
}
282-
if(INVALID_POINTER != m_aPredictorScores) {
283-
free(m_aPredictorScores);
284-
}
285-
if(INVALID_POINTER != m_aTargetData) {
286-
free(const_cast<StorageDataTypeCore *>(m_aTargetData));
287-
}
281+
free(m_aResidualErrors);
282+
free(m_aPredictorScores);
283+
free(const_cast<StorageDataTypeCore *>(m_aTargetData));
284+
288285
if(nullptr != m_aaInputData) {
289286
EBM_ASSERT(0 < m_cFeatureCombinations);
290287
const StorageDataTypeCore * const * paInputData = m_aaInputData;

0 commit comments

Comments
 (0)