Skip to content

Commit a37feeb

Browse files
authored
Merge pull request #283 from PolyMathOrg/add_shuffling
Add shuffling
2 parents f085adf + c4df4c1 commit a37feeb

17 files changed

+542
-404
lines changed

src/DataFrame-Tests/DataFrameAggrGroupTest.class.st

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Class {
44
#instVars : [
55
'df'
66
],
7-
#category : #'DataFrame-Tests-Core'
7+
#category : 'DataFrame-Tests-Core'
88
}
99

1010
{ #category : #running }

src/DataFrame-Tests/DataFrameHeadTailTest.class.st

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Class {
55
'df',
66
'series'
77
],
8-
#category : #'DataFrame-Tests-Core'
8+
#category : 'DataFrame-Tests-Core'
99
}
1010

1111
{ #category : #running }

src/DataFrame-Tests/DataFrameInternalTest.class.st

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Class {
44
#instVars : [
55
'df'
66
],
7-
#category : #'DataFrame-Tests-Core'
7+
#category : 'DataFrame-Tests-Core'
88
}
99

1010
{ #category : #running }

src/DataFrame-Tests/DataFrameStatsTest.class.st

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Class {
44
#instVars : [
55
'df'
66
],
7-
#category : #'DataFrame-Tests-Core'
7+
#category : 'DataFrame-Tests-Core'
88
}
99

1010
{ #category : #running }

src/DataFrame-Tests/DataFrameTest.class.st

+83
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,62 @@ Class {
77
#category : #'DataFrame-Tests-Core'
88
}
99

10+
{ #category : #private }
11+
DataFrameTest >> expectedShuffledDataFrameWithSeedOne [
12+
"In theory, shuffling an array with a fixed random seed should produce the same result across different versions of Pharo and the same implementation of the random number generator. The purpose of using a fixed seed is to make the random number generation deterministic, meaning that given the same seed, the sequence of random numbers generated will be the same every time.
13+
There is a difference in the RNG used in Pharo 12, which uses the primitive 231, and the previous Pharo versions, which used a native implementation in privateNextSeed.
14+
The change was introduced in this commit: https://github.com/pharo-project/pharo/commit/bf22496dbd0996ee470c9f85a7cc076e01dff57f
15+
So we answer different data frames as the result ordering changes because of this implementation change"
16+
17+
| expected |
18+
expected := SystemVersion current major >= 12
19+
ifTrue: [
20+
(DataFrame withRows: #(
21+
('Barcelona' 1.609 true)
22+
('London' 8.788 false)
23+
('Dubai' 2.789 true))
24+
rowNames: #( 'A' 'C' 'B'))
25+
yourself ]
26+
ifFalse: [
27+
(DataFrame withRows: #(
28+
('Dubai' 2.789 true)
29+
('London' 8.788 false)
30+
('Barcelona' 1.609 true))
31+
rowNames: #('B' 'C' 'A'))
32+
yourself ].
33+
expected columnNames: #( 'City' 'Population' 'BeenThere' ).
34+
^ expected
35+
36+
]
37+
38+
{ #category : #private }
39+
DataFrameTest >> expectedShuffledDataFrameWithSeedTwo [
40+
"In theory, shuffling an array with a fixed random seed should produce the same result across different versions of Pharo and the same implementation of the random number generator. The purpose of using a fixed seed is to make the random number generation deterministic, meaning that given the same seed, the sequence of random numbers generated will be the same every time.
41+
There is a difference in the RNG used in Pharo 12, which uses the primitive 231, and the previous Pharo versions, which used a native implementation in privateNextSeed.
42+
The change was introduced in this commit: https://github.com/pharo-project/pharo/commit/bf22496dbd0996ee470c9f85a7cc076e01dff57f
43+
So we answer different data frames as the result ordering changes because of this implementation change"
44+
45+
| expected |
46+
expected := SystemVersion current major >= 12
47+
ifTrue: [
48+
(DataFrame withRows: #(
49+
('Dubai' 2.789 true)
50+
('Barcelona' 1.609 true)
51+
('London' 8.788 false) )
52+
rowNames: #('B' 'A' 'C'))
53+
yourself ]
54+
ifFalse: [
55+
(DataFrame withRows: #(
56+
('London' 8.788 false)
57+
('Barcelona' 1.609 true)
58+
('Dubai' 2.789 true))
59+
rowNames: #('C' 'A' 'B'))
60+
yourself ].
61+
expected columnNames: #( 'City' 'Population' 'BeenThere' ).
62+
^ expected
63+
64+
]
65+
1066
{ #category : #running }
1167
DataFrameTest >> setUp [
1268

@@ -920,6 +976,19 @@ DataFrameTest >> testColumns [
920976
self assert: df columns equals: expectedCollection
921977
]
922978

979+
{ #category : #tests }
980+
DataFrameTest >> testColumnsAllBut [
981+
982+
| expectedDataFrame |
983+
expectedDataFrame := DataFrame withRows: #( #( 'Barcelona' 1.609 ) #( 'Dubai' 2.789 ) #( 'London' 8.788 ) ).
984+
expectedDataFrame rowNames: #( 'A' 'B' 'C' ).
985+
expectedDataFrame columnNames: #( 'City' 'Population').
986+
987+
self
988+
assert: (df columnsAllBut: #(BeenThere))
989+
equals: expectedDataFrame
990+
]
991+
923992
{ #category : #tests }
924993
DataFrameTest >> testColumnsAt [
925994

@@ -5164,6 +5233,20 @@ DataFrameTest >> testSelectEmptyDataFrame [
51645233
self assert: actual equals: expected
51655234
]
51665235

5236+
{ #category : #tests }
5237+
DataFrameTest >> testShuffledWithSeed [
5238+
5239+
| expected |
5240+
5241+
expected := self expectedShuffledDataFrameWithSeedOne.
5242+
self assert: (df shuffleWithSeed: 1) equals: expected.
5243+
5244+
expected := self expectedShuffledDataFrameWithSeedTwo.
5245+
self assert: (df shuffleWithSeed: 2) equals: expected.
5246+
5247+
5248+
]
5249+
51675250
{ #category : #tests }
51685251
DataFrameTest >> testSortBy [
51695252

src/DataFrame-Tests/DataPearsonCorrelationMethodTest.class.st

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Class {
22
#name : #DataPearsonCorrelationMethodTest,
33
#superclass : #TestCase,
4-
#category : #'DataFrame-Tests-Math'
4+
#category : 'DataFrame-Tests-Math'
55
}
66

77
{ #category : #tests }

src/DataFrame-Tests/DataSeriesTest.class.st

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Class {
55
'series',
66
'keyArray'
77
],
8-
#category : #'DataFrame-Tests-Core'
8+
#category : 'DataFrame-Tests-Core'
99
}
1010

1111
{ #category : #running }

src/DataFrame/Array.extension.st

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
Extension { #name : #Array }
1+
Extension { #name : 'Array' }
22

3-
{ #category : #'*DataFrame' }
3+
{ #category : '*DataFrame' }
44
Array >> calculateDataType [
55

66
| types |
@@ -18,7 +18,7 @@ Array >> calculateDataType [
1818
^ UndefinedObject
1919
]
2020

21-
{ #category : #'*DataFrame' }
21+
{ #category : '*DataFrame' }
2222
Array >> leastCommonSuperclassOf: firstClass and: secondClass [
2323
"Determines the closest element of class hierarchy which is the common ancestor of two given classes"
2424

@@ -40,7 +40,7 @@ Array >> leastCommonSuperclassOf: firstClass and: secondClass [
4040
^ Object
4141
]
4242

43-
{ #category : #'*DataFrame' }
43+
{ #category : '*DataFrame' }
4444
Array >> sortIfPossible [
4545
"Sort if possible"
4646

src/DataFrame/Behavior.extension.st

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
Extension { #name : #Behavior }
1+
Extension { #name : 'Behavior' }
22

3-
{ #category : #'*DataFrame-Core-Base' }
3+
{ #category : '*DataFrame-Core-Base' }
44
Behavior >> inheritsFromOrEqualTo: aClass [
55
"Answer whether the argument, aClass, is equal to the receiver or belongs to its superclass chain."
66

src/DataFrame/Collection.extension.st

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
Extension { #name : #Collection }
1+
Extension { #name : 'Collection' }
22

3-
{ #category : #'*DataFrame-Core-Base' }
3+
{ #category : '*DataFrame-Core-Base' }
44
Collection >> ** arg [
55

66
^ self raisedTo: arg
77
]
88

9-
{ #category : #'*DataFrame' }
9+
{ #category : '*DataFrame' }
1010
Collection >> asDataFrame [
1111

1212
| numberOfRows numberOfColumns dataFrame |
@@ -31,26 +31,26 @@ Collection >> asDataFrame [
3131
^ dataFrame
3232
]
3333

34-
{ #category : #'*DataFrame-Core-Base' }
34+
{ #category : '*DataFrame-Core-Base' }
3535
Collection >> asDataSeries [
3636

3737
^ DataSeries newFrom: self
3838
]
3939

40-
{ #category : #'*DataFrame-Core-Base' }
40+
{ #category : '*DataFrame-Core-Base' }
4141
Collection >> closeTo: aCollection [
4242

4343
^ (self - aCollection) inject: true into: [ :accum :each |
4444
accum and: (each closeTo: 0) ]
4545
]
4646

47-
{ #category : #'*DataFrame-Core-Base' }
47+
{ #category : '*DataFrame-Core-Base' }
4848
Collection >> variance [
4949

5050
^ self stdev squared
5151
]
5252

53-
{ #category : #'*DataFrame' }
53+
{ #category : '*DataFrame' }
5454
Collection >> withSeries: aDataSeries collect: twoArgBlock [
5555
"Collect and return the result of evaluating twoArgBlock with corresponding elements from this collection and aDataSeries."
5656
| result |

src/DataFrame/DataCorrelationMethod.class.st

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
Class {
2-
#name : #DataCorrelationMethod,
3-
#superclass : #Object,
4-
#category : #'DataFrame-Math'
2+
#name : 'DataCorrelationMethod',
3+
#superclass : 'Object',
4+
#category : 'DataFrame-Math',
5+
#package : 'DataFrame',
6+
#tag : 'Math'
57
}
68

7-
{ #category : #comparing }
9+
{ #category : 'comparing' }
810
DataCorrelationMethod class >> between: x and: y [
911
"Calcualte the correlation coefficient between two data series"
1012
self subclassResponsibility

0 commit comments

Comments
 (0)