Skip to content
Open
Show file tree
Hide file tree
Changes from 127 commits
Commits
Show all changes
131 commits
Select commit Hold shift + click to select a range
77b4744
Create test_profile.ipynb
ClarkXu0625 Apr 28, 2025
5a5b025
Copy important result notebook
ClarkXu0625 Apr 28, 2025
23f2918
Create README.md
ClarkXu0625 Apr 28, 2025
8737180
Update test_profile.ipynb
ClarkXu0625 Apr 28, 2025
a9d5a5f
Update test_profile.ipynb
ClarkXu0625 Apr 28, 2025
e551a7c
Update constant_non0_projection_exp_4_21.ipynb
ClarkXu0625 Apr 28, 2025
9cf6e10
Update README.md
ClarkXu0625 Apr 28, 2025
e3c2481
update profile result
ClarkXu0625 Apr 28, 2025
80db642
Update README.md
ClarkXu0625 Apr 28, 2025
d3056c4
Update README.md
ClarkXu0625 Apr 28, 2025
e0d2dd2
Update README.md
ClarkXu0625 Apr 28, 2025
c4a1d60
Update README.md
ClarkXu0625 Apr 28, 2025
ef49fd8
save previous experiments
ClarkXu0625 Apr 29, 2025
fbc4a86
to profile oblique splitter
ClarkXu0625 Apr 29, 2025
f996350
check local implementation
ClarkXu0625 Apr 29, 2025
70f9a1e
Re-organize
ClarkXu0625 Apr 29, 2025
160a4e2
constant nonzeros per row
ClarkXu0625 Apr 29, 2025
1d1ed0f
save constant per row results
ClarkXu0625 Apr 29, 2025
272e3b7
Update constant_non0_projection_exp_4_21.ipynb
ClarkXu0625 Apr 29, 2025
05568df
Update create_dateset.ipynb
ClarkXu0625 Apr 29, 2025
c9c576f
Update test_profile.ipynb
ClarkXu0625 Apr 29, 2025
beb3803
Update test_profile.ipynb
ClarkXu0625 Apr 29, 2025
928295c
rename
ClarkXu0625 Apr 29, 2025
3684265
Update README.md
ClarkXu0625 Apr 29, 2025
d829d73
results with larger projection matrices
ClarkXu0625 Apr 29, 2025
e4018f6
update running time on linux
ClarkXu0625 Apr 29, 2025
4ea6000
remove repeating cells
ClarkXu0625 Apr 29, 2025
401065c
fix wrong plots
ClarkXu0625 Apr 29, 2025
d09ca58
add comparison between training time at different non-zeros in projec…
ClarkXu0625 Apr 29, 2025
8252f75
Update README.md
ClarkXu0625 Apr 29, 2025
5349f06
test profile resutls
ClarkXu0625 Apr 29, 2025
3bac585
update results
ClarkXu0625 Apr 29, 2025
f73901c
print running time
ClarkXu0625 Apr 29, 2025
169d88f
Update test_profile.ipynb
ClarkXu0625 Apr 29, 2025
d07c204
Update constant_nonzero_projection_exp.ipynb
ClarkXu0625 May 1, 2025
13b9266
Update test_profile.ipynb
ClarkXu0625 May 1, 2025
dd53597
Update .gitmodules
ClarkXu0625 May 1, 2025
962759f
Update .gitmodules
ClarkXu0625 May 1, 2025
6d46aec
modified sklearn
ClarkXu0625 May 1, 2025
35eccbf
AMD profile
ClarkXu0625 May 1, 2025
5273736
Update constant_nonzero_projection_exp.ipynb
ClarkXu0625 May 1, 2025
5c92515
update test profile
ClarkXu0625 May 1, 2025
4818956
update uprof
ClarkXu0625 May 1, 2025
ef24768
import clock
ClarkXu0625 May 1, 2025
a695b5f
print time
ClarkXu0625 May 1, 2025
cb82663
Update .gitignore
ClarkXu0625 May 1, 2025
8ae0cc2
Update .gitignore
ClarkXu0625 May 1, 2025
5652311
profiles results
ClarkXu0625 May 6, 2025
b8adc6d
Update README.md
ClarkXu0625 May 6, 2025
0d75964
change in param
ClarkXu0625 May 6, 2025
c95a380
change directory
ClarkXu0625 May 6, 2025
e292c0a
uProf results backup
ClarkXu0625 May 6, 2025
d6f9975
update plots - n——jovs versus training time
ClarkXu0625 May 6, 2025
4906e95
add comments 5/6
May 6, 2025
fd87a34
uprof update
ClarkXu0625 May 8, 2025
06c4558
put functions to utils
May 12, 2025
a21aa6d
Create shuffle_benchmark.ipynb
May 12, 2025
279d637
Update _oblique_splitter.pyx
May 12, 2025
d8decee
Update _utils.pyx
ClarkXu0625 May 12, 2025
5dd10c1
floyd result
ClarkXu0625 May 13, 2025
8c03798
change fisher yates shuffle to floyd
ClarkXu0625 May 13, 2025
eaed561
uProf results update
ClarkXu0625 May 13, 2025
6b186f8
remove print lines
ClarkXu0625 May 13, 2025
4ec190c
remove line profile prints
ClarkXu0625 May 13, 2025
19c6609
Update plot.py
ClarkXu0625 May 13, 2025
30aec1c
update results
ClarkXu0625 May 13, 2025
dde58da
update for per row
ClarkXu0625 May 13, 2025
1fb3855
update results
ClarkXu0625 May 13, 2025
911d48a
Update shuffle_benchmark.ipynb
ClarkXu0625 May 13, 2025
0e8e0d3
update results
ClarkXu0625 May 13, 2025
0559095
Delete build_tree_lineprofile.txt
ClarkXu0625 May 13, 2025
46eb0a5
Update bin.index
ClarkXu0625 May 13, 2025
dfe27e3
Update test_profile.py
ClarkXu0625 May 13, 2025
60156d2
floyd method updates
ClarkXu0625 May 13, 2025
447202f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 13, 2025
d0a4d12
delete fisher yates from utils
ClarkXu0625 May 13, 2025
1016bed
Merge branch 'fast-oblique' of https://github.com/ClarkXu0625/treeple…
ClarkXu0625 May 13, 2025
c6f9286
trailing whitespace fix
ClarkXu0625 May 13, 2025
df549ad
add fisher yates back
May 13, 2025
29135e0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 13, 2025
7241f0e
add sampling_method parameter
May 13, 2025
0e9a667
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 13, 2025
f51af99
Update _oblique_splitter.pyx
May 13, 2025
dcbed61
Merge branch 'fast-oblique' of https://github.com/ClarkXu0625/treeple…
May 13, 2025
52181bb
fix flag
May 13, 2025
99a4a1d
Update bin.index
ClarkXu0625 May 13, 2025
962b663
remove sampling parameters from call
May 13, 2025
6f93ed0
remove fisher
May 13, 2025
9e62461
Update session.uprof
ClarkXu0625 May 13, 2025
b78e083
Update _oblique_splitter.pyx
May 13, 2025
1125df7
Update _oblique_splitter.pyx
ClarkXu0625 May 13, 2025
5e20571
update 5 rep results
ClarkXu0625 May 14, 2025
7d098a5
Update shuffle_benchmark.ipynb
ClarkXu0625 May 14, 2025
63f2732
Update .gitignore
ClarkXu0625 May 14, 2025
510af9e
remove sampling_method parameter
ClarkXu0625 May 14, 2025
8deda9f
add comment
ClarkXu0625 May 14, 2025
a540916
Update _utils.pyx
ClarkXu0625 May 14, 2025
e9ceba3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 14, 2025
dfcfbc9
Create shuffle_benchmark2.ipynb
ClarkXu0625 May 14, 2025
aa679dd
Merge branch 'main' into fast-oblique
ClarkXu0625 May 15, 2025
bfdea41
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 15, 2025
da4083f
remove fisher yates
May 15, 2025
b42b75e
delete repeat function
May 15, 2025
1c86a58
remove clark experiment folder
ClarkXu0625 May 15, 2025
d17be19
remove sklearn
ClarkXu0625 May 15, 2025
2e37081
update to match original version
ClarkXu0625 May 15, 2025
aad9d9c
delete printline
ClarkXu0625 May 15, 2025
bc94055
delete profiling
ClarkXu0625 May 15, 2025
4930e3c
delete comment
ClarkXu0625 May 15, 2025
daf443d
keep fisher yates for potential use
ClarkXu0625 May 15, 2025
26ccbb5
remove comments
ClarkXu0625 May 15, 2025
1705c7d
remove unused import
ClarkXu0625 May 15, 2025
d94cd3c
remove repeating functions
ClarkXu0625 May 15, 2025
4df1183
Update _utils.pyx
ClarkXu0625 May 15, 2025
a7485c8
remove import that does not match
May 20, 2025
d60d350
Update treeple/tree/_utils.pyx
ClarkXu0625 Jun 3, 2025
69b819a
Update treeple/tree/_utils.pyx
YuxinB Jun 6, 2025
a48afb7
Update _utils.pyx
YuxinB Jun 6, 2025
7e55b2c
cleaner floyd
ClarkXu0625 Jun 11, 2025
069c9bc
style fix
Jun 11, 2025
752d589
Merge pull request #360 from ClarkXu0625/fast-oblique
YuxinB Jun 14, 2025
8b80b3c
MAINT remove extra file
PSSF23 Jul 10, 2025
99a2912
MAINT specify sklearn version
PSSF23 Jul 10, 2025
a173c53
MAINT specify sklearn version
PSSF23 Jul 10, 2025
18ee025
FIX remove nightly sklearn
PSSF23 Jul 10, 2025
98a054b
Revert "FIX remove nightly sklearn"
PSSF23 Jul 10, 2025
320e047
ENH update fork
PSSF23 Jul 10, 2025
d6da555
Reapply "FIX remove nightly sklearn"
PSSF23 Jul 10, 2025
2d31d9b
FIX remove nightly
PSSF23 Jul 10, 2025
899a017
FIX correct if statement for ubunto
PSSF23 Jul 10, 2025
fac9b13
ENH update fork
PSSF23 Jul 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ meson-python>=0.16.0
cython>=3.0.10
ninja
numpy
scikit-learn>=1.5.0
scikit-learn~=1.6.0
click
rich-click
doit
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ requires = [
"setuptools<=65.5",
"packaging",
"Cython>=3.0.10",
"scikit-learn>=1.6.0",
"scikit-learn~=1.6.0",
"scipy>=1.5.0",
"numpy>=1.25; python_version>='3.9'"
]
Expand Down
2 changes: 1 addition & 1 deletion treeple/_lib/sklearn_fork
Submodule sklearn_fork updated 376 files
1 change: 0 additions & 1 deletion treeple/datasets/hyppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,6 @@ def make_trunk_classification(
)

y = np.concatenate((np.zeros(n_samples // 2), np.ones(n_samples // 2)))

if return_params:
return [X, y, [mu_0_vec, mu_1_vec], [cov, cov]]
return X, y
Expand Down
11 changes: 5 additions & 6 deletions treeple/tree/_oblique_splitter.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ from libcpp.vector cimport vector

from .._lib.sklearn.tree._criterion cimport Criterion
from .._lib.sklearn.tree._utils cimport rand_int, rand_uniform
from ._utils cimport fisher_yates_shuffle
from ._utils cimport floyd_sample_indices


cdef float64_t INFINITY = np.inf
Expand Down Expand Up @@ -194,9 +194,8 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):

self.X = X

# create a helper array for allowing efficient Fisher-Yates
self.indices_to_sample = np.arange(self.max_features * self.n_features,
dtype=np.intp)
# create a helper array for allowing efficient Fisher-Yates/ Floyd's method
self.indices_to_sample = np.zeros(self.n_non_zeros, dtype=np.intp)

# XXX: Just to initialize stuff
# self.feature_weights = np.ones((self.n_features,), dtype=float32_t) / self.n_features
Expand Down Expand Up @@ -238,8 +237,8 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
cdef intp_t[::1] indices_to_sample = self.indices_to_sample
cdef intp_t grid_size = self.max_features * self.n_features

# shuffle indices over the 2D grid to sample using Fisher-Yates
fisher_yates_shuffle(indices_to_sample, grid_size, random_state)
# draw n_non_zeros random indices from the mTry x n_features set of indices
floyd_sample_indices(indices_to_sample, n_non_zeros, grid_size, random_state)

# sample 'n_non_zeros' in a mtry X n_features projection matrix
# which consists of +/- 1's chosen at a 1/2s rate
Expand Down
8 changes: 8 additions & 0 deletions treeple/tree/_utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ cdef void fisher_yates_shuffle(
) noexcept nogil


cdef void floyd_sample_indices(
intp_t[::1] out,
intp_t k,
intp_t n,
uint32_t* random_state
) noexcept nogil


cdef int rand_weighted_binary(
float64_t p0,
uint32_t* random_state
Expand Down
35 changes: 35 additions & 0 deletions treeple/tree/_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ cimport numpy as cnp

cnp.import_array()

from libcpp.unordered_set cimport unordered_set

from .._lib.sklearn.tree._utils cimport rand_int, rand_uniform


Expand Down Expand Up @@ -41,6 +43,39 @@ cdef inline void fisher_yates_shuffle(
indices_to_sample[i], indices_to_sample[j]


cdef inline void floyd_sample_indices(
intp_t[::1] out,
intp_t k,
intp_t n,
uint32_t* random_state
) noexcept nogil:
"""
Rober Floyd's algorithm for sampling without replacement

Parameters
----------
out : intp_t[::1]
Output memoryview where the sampled integers are stored.
k : intp_t
Number of samples to draw.
n : intp_t
Size of the domain to sample from
random_state : uint32_t*
The random state.
"""
cdef unordered_set[intp_t] seen
cdef intp_t i, r = 0

for i in range(n - k, n):
r = rand_int(0, i + 1, random_state)
if seen.find(r) == seen.end():
seen.insert(r)
out[i - n + k] = r
else:
seen.insert(i)
out[i - n + k] = i


cdef inline int rand_weighted_binary(
float64_t p0,
uint32_t* random_state
Expand Down
Loading