1
1
--[[
2
- This file is part of Lua-MapReduce
2
+ This file is part of Lua-Tuple (https://github.com/pakozm/lua-tuple)
3
+ This file is part of Lua-MapReduce (https://github.com/pakozm/lua-mapreduce)
3
4
4
5
Copyright 2014, Francisco Zamora-Martinez
5
6
17
18
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
]]
19
20
20
- -- The job class is used by workers to execute map/reduce job. This class allows
21
- -- to write job status and to update job statistics in MongoDB. Execution of
22
- -- user map/reduce/combiner modules is done in job class. Intermediate data is
23
- -- written here in the storage given at 'task' collection.
21
+ -- Linear implementation of in-mutable and interned tuples for Lua. It is linear
22
+ -- because tuples are stored into a linear table. A different approach would be
23
+ -- store tuples into an inverted prefix tree (trie). Major difference between
24
+ -- both approaches is that linear implementation needs more memory but has
25
+ -- better indexing time, while prefix tree implementation needs less memory but
26
+ -- has worst indexing time.
24
27
25
28
local tuple = {
26
29
_VERSION = " 0.1" ,
27
30
_NAME = " tuple" ,
28
31
}
29
32
33
+ -- libraries import
34
+ local assert = assert
35
+ local getmetatable = getmetatable
36
+ local ipairs = ipairs
37
+ local pairs = pairs
38
+ local select = select
39
+ local tostring = tostring
40
+ local type = type
41
+ local bit32_band = bit32.band
42
+ local bit32_lshift = bit32.lshift
43
+ local bit32_rshift = bit32.rshift
44
+ local bit32_bxor = bit32.bxor
45
+ local math_max = math.max
46
+ local string_byte = string.byte
47
+ local string_format = string.format
48
+ local string_sub = string.sub
49
+ local table_concat = table.concat
50
+ local table_pack = table.pack
51
+
52
+ -- constants
53
+ local BYTE_MASK = 0x000000FF
54
+ local WORD_MASK = 0xFFFFFFFF
55
+ local MAX_NUMBER = 2 ^ 32
56
+ local MAX_BUCKET_HOLES_RATIO = 100
30
57
local NUM_BUCKETS = 2 ^ 20
31
- local list_of_tuples = setmetatable ({}, { __mode = " v" })
58
+ local WEAK_MT = { __mode = " v" }
59
+
60
+ -- the list of tuples is a hash table with a maximum of NUM_BUCKETS
61
+ local list_of_tuples = {}
32
62
63
+ -- converts a number into a binary string, for hash computation purposes
33
64
local function dump_number (n )
34
- return string.format (" %c%c%c%c%c%c%c%c" ,
35
- bit32.band (n ,0xFF ),
36
- bit32.band (bit32.rshift (n ,8 ),0x00000000000000FF ),
37
- bit32.band (bit32.rshift (n ,16 ),0x00000000000000FF ),
38
- bit32.band (bit32.rshift (n ,24 ),0x00000000000000FF ),
39
- bit32.band (bit32.rshift (n ,32 ),0x00000000000000FF ),
40
- bit32.band (bit32.rshift (n ,40 ),0x00000000000000FF ),
41
- bit32.band (bit32.rshift (n ,48 ),0x00000000000000FF ),
42
- bit32.band (bit32.rshift (n ,56 ),0x00000000000000FF ))
65
+ assert (n < MAX_NUMBER , " Only valid for 32 bit numbers" )
66
+ return string_format (" %c%c%c%c" ,
67
+ bit32_band (n ,BYTE_MASK ),
68
+ bit32_band (bit32_rshift (n ,8 ),BYTE_MASK ),
69
+ bit32_band (bit32_rshift (n ,16 ),BYTE_MASK ),
70
+ bit32_band (bit32_rshift (n ,24 ),BYTE_MASK ))
43
71
end
44
72
73
+ -- computes the hash of a given tuple candidate
45
74
local function compute_hash (t )
46
75
local h = 0
47
76
for i = 1 ,# t do
48
77
local v = t [i ]
49
78
local tt = type (v )
79
+ -- dump the value if it is a number, another tuple or a nil value
50
80
if tt == " number" then v = dump_number (v )
51
81
elseif tt == " table" then v = dump_number (compute_hash (v ))
82
+ elseif tt == " nil" then v = " nil"
52
83
end
84
+ -- sanity check
53
85
assert (type (v ) == " string" ,
54
86
" Needs an array with numbers, tables or strings" )
87
+ -- hash computation for every char in the string v
55
88
for j = 1 ,# v do
56
- h = h + string.byte (string.sub (v ,j ,j ))
57
- h = h + bit32.lshift (h ,10 )
58
- h = bit32.bxor (h , bit32.rshift (h ,6 ))
59
- h = bit32.band (h , 0x00000000FFFFFFFF )
89
+ h = h + string_byte (string_sub (v ,j ,j ))
90
+ h = h + bit32_lshift (h ,10 )
91
+ h = bit32_bxor (h , bit32_rshift (h ,6 ))
92
+ -- compute hash modules 2^32
93
+ h = bit32_band (h , WORD_MASK )
60
94
end
61
95
end
62
- h = h + bit32.rshift (h ,3 )
63
- h = bit32.bxor (h , bit32.lshift (h ,11 ))
64
- h = h + bit32.lshift (h ,15 )
65
- h = bit32.band (h , 0x00000000FFFFFFFF )
96
+ h = h + bit32_rshift (h ,3 )
97
+ h = bit32_bxor (h , bit32_lshift (h ,11 ))
98
+ h = h + bit32_lshift (h ,15 )
99
+ -- compute hash modules 2^32
100
+ h = bit32_band (h , WORD_MASK )
66
101
return h
67
102
end
68
103
104
+ -- tuple instances has this metatable
69
105
local tuple_instance_mt = {
106
+ -- disallow to change metatable
107
+ __metatable = false ,
108
+ -- avoid to insert new elements
70
109
__newindex = function (self ) error (" Unable to modify a tuple" ) end ,
110
+ -- convert it to a string like: tuple{ a, b, ... }
71
111
__tostring = function (self )
72
112
local result = {}
73
- for i = 1 ,# self do result [# result + 1 ] = tostring (self [i ]) end
74
- return table.concat ({" tuple(" ,table.concat (result , " , " )," )" }, " " )
113
+ for i = 1 ,# self do
114
+ local v = self [i ]
115
+ if type (v ) == " string" then v = string_format (" %q" ,v ) end
116
+ result [# result + 1 ] = tostring (v )
117
+ end
118
+ return table_concat ({" tuple{" ,table_concat (result , " , " )," }" }, " " )
75
119
end ,
76
- __concat = function (self ,other )
120
+ -- concatenates two tuples or a tuple with a number, string or another table
121
+ __concat = function (a ,b )
122
+ if type (a ) ~= " table" then a ,b = b ,a end
77
123
local aux = {}
78
- for i = 1 ,# self do aux [# aux + 1 ] = self [i ] end
79
- if type (other ) == " table" then
80
- for i = 1 ,# other do aux [# aux + 1 ] = other [i ] end
124
+ for i = 1 ,# a do aux [# aux + 1 ] = a [i ] end
125
+ if type (b ) == " table" then
126
+ for i = 1 ,# b do aux [# aux + 1 ] = b [i ] end
81
127
else
82
- aux [# aux + 1 ] = other
128
+ aux [# aux + 1 ] = b
83
129
end
84
130
return tuple (aux )
85
131
end ,
86
132
}
87
133
88
- local function proxy (t )
89
- setmetatable (t , tuple_instance_mt )
90
- return setmetatable ({},{
91
- __newindex = function (self ) error (" Unable to modify a tuple" ) end ,
92
- __index = function (self ,k ) if k == " is_tuple" then return true end return t [k ] end ,
93
- __len = function (self ) return # t end ,
94
- __tostring = function (self ) return tostring (t ) end ,
134
+ -- returns a wrapper table (proxy) which shades the data table, allowing
135
+ -- in-mutability in Lua, it receives the table data and the number of elements
136
+ local function proxy (tpl ,n )
137
+ setmetatable (tpl , tuple_instance_mt )
138
+ return setmetatable ({}, {
139
+ -- the proxy table has an in-mutable metatable, and stores in __metatable
140
+ -- a string identifier, the real tuple data and the number of elements
141
+ __metatable = { " is_tuple" , tpl , n },
142
+ __index = tpl ,
143
+ __newindex = function (self ) error (" Tuples are in-mutable data" ) end ,
144
+ __len = function (self ) return getmetatable (self )[3 ] end ,
145
+ __tostring = function (self ) return tostring (getmetatable (self )[2 ]) end ,
95
146
__lt = function (self ,other )
147
+ local t = getmetatable (self )[2 ]
96
148
if type (other ) ~= " table" then return false
97
149
elseif # t < # other then return true
98
150
elseif # t > # other then return false
151
+ elseif t == other then return false
99
152
else
100
153
for i = 1 ,# t do
101
154
if t [i ] > other [i ] then return false end
@@ -104,51 +157,83 @@ local function proxy(t)
104
157
end
105
158
end ,
106
159
__le = function (self ,other )
160
+ local t = getmetatable (self )[2 ]
107
161
-- equality is comparing references (tuples are in-mutable and interned)
108
162
if self == other then return true end
109
163
return self < other
110
164
end ,
111
- __pairs = function (self ) return pairs (t ) end ,
112
- __ipairs = function (self ) return ipairs (t ) end ,
113
- __concat = function (self ,other ) return t .. other end ,
165
+ __pairs = function (self ) return pairs (getmetatable (self )[2 ]) end ,
166
+ __ipairs = function (self ) return ipairs (getmetatable (self )[2 ]) end ,
167
+ __concat = function (self ,other ) return getmetatable (self )[2 ] .. other end ,
168
+ __mode = " v" ,
114
169
})
115
170
end
116
171
172
+ -- builds a candidate tuple given a table, recursively converting tables in new
173
+ -- tuples
117
174
local function tuple_constructor (t )
118
- local h = 0
119
175
local new_tuple = {}
120
- for i ,v in ipairs (t ) do
121
- if type (v ) == " table" then
122
- new_tuple [i ] = tuple (v )
123
- else
124
- new_tuple [i ] = v
176
+ for i ,v in pairs (t ) do
177
+ -- ignore the field "n" introduced by variadic args
178
+ if i ~= " n" then
179
+ assert (type (i ) == " number" and i > 0 , " Needs integer keys > 0" )
180
+ if type (v ) == " table" then
181
+ -- recursively converts tables in new tuples
182
+ new_tuple [i ] = tuple (v )
183
+ else
184
+ -- copies the value
185
+ new_tuple [i ] = v
186
+ end
125
187
end
126
188
end
127
- return proxy (new_tuple )
189
+ -- returns a proxy to the new_tuple table with #t length
190
+ return proxy (new_tuple ,# t )
128
191
end
129
192
193
+ -- metatable of tuple "class" table
130
194
local tuple_mt = {
131
195
-- tuple constructor doesn't allow table loops
132
196
__call = function (self , ...)
133
- local t = { ... } if # t == 1 then t = t [1 ] end
197
+ local n = select (' #' , ... )
198
+ local t = table_pack (... ) assert (# t == n ) if # t == 1 then t = t [1 ] end
134
199
if type (t ) ~= " table" then
200
+ -- non-table elements are unpacked when only one is given
135
201
return t
136
- elseif # t == 1 then
137
- return tuple (t [1 ])
138
202
else
139
- if t .is_tuple then return t end
203
+ -- check if the given table is a tuple, if it is the case, just return it
204
+ local mt = getmetatable (t ) if mt and mt [1 ]== " is_tuple" then return t end
205
+ -- create a new tuple candidate
140
206
local new_tuple = tuple_constructor (t )
141
207
local p = compute_hash (new_tuple ) % NUM_BUCKETS
142
- local bucket = (list_of_tuples [p ] or setmetatable ({}, { __mode = " v " } ))
208
+ local bucket = (list_of_tuples [p ] or setmetatable ({}, WEAK_MT ))
143
209
list_of_tuples [p ] = bucket
144
- for i ,vi in ipairs (bucket ) do
210
+ -- Count the number of elements in the bucket and the maximum non-nil key.
211
+ -- In case the relation between this two values was greater than
212
+ -- MAX_BUCKET_HOLES_RATIO, the bucket will be rearranged to remove all nil
213
+ -- holes.
214
+ local max ,n = 0 ,0
215
+ for i ,vi in pairs (bucket ) do
145
216
local equals = true
217
+ -- check equality by comparing all the elements one-by-one
146
218
for j ,vj in ipairs (vi ) do
147
219
if vj ~= new_tuple [j ] then equals = false break end
148
220
end
221
+ -- BREAKS the execution flow in case the tuple exists in the bucket
149
222
if equals == true then return vi end
223
+ max = math_max (max ,i )
224
+ n = n + 1
225
+ end
226
+ -- rearrange the bucket when the ratio achieves the threshold
227
+ if max / n > MAX_BUCKET_HOLES_RATIO then
228
+ local new_bucket = {}
229
+ for i ,vi in pairs (bucket ) do new_bucket [# new_bucket + 1 ] = vi end
230
+ list_of_tuples [p ], bucket = new_bucket , new_bucket
231
+ max = # bucket
232
+ collectgarbage (" collect" )
150
233
end
151
- table.insert (bucket , new_tuple )
234
+ bucket [max + 1 ] = new_tuple
235
+ -- take note of the bucket into __metatable array, position 4
236
+ getmetatable (new_tuple )[4 ] = p
152
237
return new_tuple
153
238
end
154
239
end ,
@@ -166,6 +251,29 @@ tuple.utest = function()
166
251
assert (a == c )
167
252
assert (b == a [2 ])
168
253
assert (b == c [2 ])
254
+ a ,b ,c = nil ,nil ,nil
255
+ collectgarbage (" collect" )
256
+ --
257
+ local aux = {} for i = 1 ,10000 do aux [tuple (i ,i )] = i end
258
+ assert (tuple .stats () == 10000 )
259
+ collectgarbage (" collect" )
260
+ assert (tuple .stats () == 10000 )
261
+ aux = nil
262
+ collectgarbage (" collect" )
263
+ assert (tuple .stats () == 0 )
264
+ end
265
+
266
+ -- returns the number of tuples "alive", the number of used buckets, and the
267
+ -- loading factor of the hash table
268
+ tuple .stats = function ()
269
+ local num_buckets = 0
270
+ local size = 0
271
+ for k1 ,v1 in pairs (list_of_tuples ) do
272
+ num_buckets = num_buckets + 1
273
+ for k2 ,v2 in pairs (v1 ) do size = size + 1 end
274
+ end
275
+ if num_buckets == 0 then num_buckets = 1 end
276
+ return size ,num_buckets ,size / NUM_BUCKETS
169
277
end
170
278
171
279
return tuple
0 commit comments