@@ -4,7 +4,7 @@ class GroupBy
4
4
5
5
attr_reader :groups
6
6
7
- # Yield over each group created by group_by. A DataFrame is yielded in
7
+ # Iterate over each group created by group_by. A DataFrame is yielded in
8
8
# block.
9
9
def each_group &block
10
10
groups . keys . each do |k |
@@ -26,6 +26,7 @@ def initialize context, names
26
26
@groups . freeze
27
27
end
28
28
29
+ # Get a Daru::Vector of the size of each group.
29
30
def size
30
31
index =
31
32
if multi_indexed_grouping?
@@ -38,23 +39,79 @@ def size
38
39
Daru ::Vector . new ( values , index : index , name : :size )
39
40
end
40
41
42
+ # Get the first group
41
43
def first
42
44
head ( 1 )
43
45
end
44
46
47
+ # Get the last group
45
48
def last
46
49
tail ( 1 )
47
50
end
48
51
52
+ # Get the top 'n' groups
53
+ # @param quantity [Fixnum] (5) The number of groups.
54
+ # @example Usage of head
55
+ # df = Daru::DataFrame.new({
56
+ # a: %w{foo bar foo bar foo bar foo foo},
57
+ # b: %w{one one two three two two one three},
58
+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
59
+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
60
+ # })
61
+ # df.group_by([:a, :b]).head(1)
62
+ # # =>
63
+ # # #<Daru::DataFrame:82745170 @name = d7003f75-5eb9-4967-9303-c08dd9160224 @size = 6>
64
+ # # a b c d
65
+ # # 1 bar one 2 22
66
+ # # 3 bar three 1 44
67
+ # # 5 bar two 6 66
68
+ # # 0 foo one 1 11
69
+ # # 7 foo three 8 88
70
+ # # 2 foo two 3 33
49
71
def head quantity = 5
50
72
select_groups_from :first , quantity
51
73
end
52
74
75
+ # Get the bottom 'n' groups
76
+ # @param quantity [Fixnum] (5) The number of groups.
77
+ # @example Usage of tail
78
+ # df = Daru::DataFrame.new({
79
+ # a: %w{foo bar foo bar foo bar foo foo},
80
+ # b: %w{one one two three two two one three},
81
+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
82
+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
83
+ # })
84
+ # # df.group_by([:a, :b]).tail(1)
85
+ # # =>
86
+ # # #<Daru::DataFrame:82378270 @name = 0623db46-5425-41bd-a843-99baac3d1d9a @size = 6>
87
+ # # a b c d
88
+ # # 1 bar one 2 22
89
+ # # 3 bar three 1 44
90
+ # # 5 bar two 6 66
91
+ # # 6 foo one 3 77
92
+ # # 7 foo three 8 88
93
+ # # 4 foo two 3 55
53
94
def tail quantity = 5
54
95
select_groups_from :last , quantity
55
96
end
56
97
57
98
# Calculate mean of numeric groups, excluding missing values.
99
+ # @example Usage of mean
100
+ # df = Daru::DataFrame.new({
101
+ # a: %w{foo bar foo bar foo bar foo foo},
102
+ # b: %w{one one two three two two one three},
103
+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
104
+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
105
+ # df.group_by([:a, :b]).mean
106
+ # # =>
107
+ # # #<Daru::DataFrame:81097450 @name = 0c32983f-3e06-451f-a9c9-051cadfe7371 @size = 6>
108
+ # # c d
109
+ # # ["bar", "one"] 2 22
110
+ # # ["bar", "three"] 1 44
111
+ # # ["bar", "two"] 6 66
112
+ # # ["foo", "one"] 2.0 44.0
113
+ # # ["foo", "three"] 8 88
114
+ # # ["foo", "two"] 3.0 44.0
58
115
def mean
59
116
apply_method :numeric , :mean
60
117
end
@@ -69,6 +126,24 @@ def sum
69
126
apply_method :numeric , :sum
70
127
end
71
128
129
+ # Count groups, excludes missing values.
130
+ # @example Using count
131
+ # df = Daru::DataFrame.new({
132
+ # a: %w{foo bar foo bar foo bar foo foo},
133
+ # b: %w{one one two three two two one three},
134
+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
135
+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
136
+ # })
137
+ # df.group_by([:a, :b]).count
138
+ # # =>
139
+ # # #<Daru::DataFrame:76900210 @name = 7b9cf55d-17f8-48c7-b03a-2586c6e5ec5a @size = 6>
140
+ # # c d
141
+ # # ["bar", "one"] 1 1
142
+ # # ["bar", "two"] 1 1
143
+ # # ["bar", "three"] 1 1
144
+ # # ["foo", "one"] 2 2
145
+ # # ["foo", "three"] 1 1
146
+ # # ["foo", "two"] 2 2
72
147
def count
73
148
width = @non_group_vectors . size
74
149
Daru ::DataFrame . new ( [ size ] *width , order : @non_group_vectors )
@@ -91,6 +166,21 @@ def min
91
166
end
92
167
93
168
# Returns one of the selected groups as a DataFrame.
169
+ # @param group [Array] The group that is to be selected from those grouped.
170
+ #
171
+ # @example Getting a group
172
+ #
173
+ # df = Daru::DataFrame.new({
174
+ # a: %w{foo bar foo bar foo bar foo foo},
175
+ # b: %w{one one two three two two one three},
176
+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
177
+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
178
+ # })
179
+ # df.group_by([:a, :b]).get_group ['bar','two']
180
+ # #=>
181
+ # ##<Daru::DataFrame:83258980 @name = 687ee3f6-8874-4899-97fa-9b31d84fa1d5 @size = 1>
182
+ # # a b c d
183
+ # # 5 bar two 6 66
94
184
def get_group group
95
185
indexes = @groups [ group ]
96
186
elements = [ ]
0 commit comments