Skip to content

Commit fb8c436

Browse files
committed
Merge pull request #16 from pakozm/devel
New version v0.3.5
2 parents cf7100c + 7b2253c commit fb8c436

File tree

2 files changed

+163
-55
lines changed

2 files changed

+163
-55
lines changed

mapreduce/init.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ local tuple = require "mapreduce.tuple"
2323
local persistent_table = require "mapreduce.persistent_table"
2424

2525
local mapreduce = {
26-
_VERSION = "0.3.4",
26+
_VERSION = "0.3.5",
2727
_NAME = "mapreduce",
2828
worker = worker,
2929
server = server,

mapreduce/tuple.lua

Lines changed: 162 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
--[[
2-
This file is part of Lua-MapReduce
2+
This file is part of Lua-Tuple (https://github.com/pakozm/lua-tuple)
3+
This file is part of Lua-MapReduce (https://github.com/pakozm/lua-mapreduce)
34
45
Copyright 2014, Francisco Zamora-Martinez
56
@@ -17,85 +18,137 @@
1718
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1819
]]
1920

20-
-- The job class is used by workers to execute map/reduce job. This class allows
21-
-- to write job status and to update job statistics in MongoDB. Execution of
22-
-- user map/reduce/combiner modules is done in job class. Intermediate data is
23-
-- written here in the storage given at 'task' collection.
21+
-- Linear implementation of in-mutable and interned tuples for Lua. It is linear
22+
-- because tuples are stored into a linear table. A different approach would be
23+
-- store tuples into an inverted prefix tree (trie). Major difference between
24+
-- both approaches is that linear implementation needs more memory but has
25+
-- better indexing time, while prefix tree implementation needs less memory but
26+
-- has worst indexing time.
2427

2528
local tuple = {
2629
_VERSION = "0.1",
2730
_NAME = "tuple",
2831
}
2932

33+
-- libraries import
34+
local assert = assert
35+
local getmetatable = getmetatable
36+
local ipairs = ipairs
37+
local pairs = pairs
38+
local select = select
39+
local tostring = tostring
40+
local type = type
41+
local bit32_band = bit32.band
42+
local bit32_lshift = bit32.lshift
43+
local bit32_rshift = bit32.rshift
44+
local bit32_bxor = bit32.bxor
45+
local math_max = math.max
46+
local string_byte = string.byte
47+
local string_format = string.format
48+
local string_sub = string.sub
49+
local table_concat = table.concat
50+
local table_pack = table.pack
51+
52+
-- constants
53+
local BYTE_MASK = 0x000000FF
54+
local WORD_MASK = 0xFFFFFFFF
55+
local MAX_NUMBER = 2^32
56+
local MAX_BUCKET_HOLES_RATIO = 100
3057
local NUM_BUCKETS = 2^20
31-
local list_of_tuples = setmetatable({}, { __mode="v" })
58+
local WEAK_MT = { __mode="v" }
59+
60+
-- the list of tuples is a hash table with a maximum of NUM_BUCKETS
61+
local list_of_tuples = {}
3262

63+
-- converts a number into a binary string, for hash computation purposes
3364
local function dump_number(n)
34-
return string.format("%c%c%c%c%c%c%c%c",
35-
bit32.band(n,0xFF),
36-
bit32.band(bit32.rshift(n,8),0x00000000000000FF),
37-
bit32.band(bit32.rshift(n,16),0x00000000000000FF),
38-
bit32.band(bit32.rshift(n,24),0x00000000000000FF),
39-
bit32.band(bit32.rshift(n,32),0x00000000000000FF),
40-
bit32.band(bit32.rshift(n,40),0x00000000000000FF),
41-
bit32.band(bit32.rshift(n,48),0x00000000000000FF),
42-
bit32.band(bit32.rshift(n,56),0x00000000000000FF))
65+
assert(n < MAX_NUMBER, "Only valid for 32 bit numbers")
66+
return string_format("%c%c%c%c",
67+
bit32_band(n,BYTE_MASK),
68+
bit32_band(bit32_rshift(n,8),BYTE_MASK),
69+
bit32_band(bit32_rshift(n,16),BYTE_MASK),
70+
bit32_band(bit32_rshift(n,24),BYTE_MASK))
4371
end
4472

73+
-- computes the hash of a given tuple candidate
4574
local function compute_hash(t)
4675
local h = 0
4776
for i=1,#t do
4877
local v = t[i]
4978
local tt = type(v)
79+
-- dump the value if it is a number, another tuple or a nil value
5080
if tt == "number" then v = dump_number(v)
5181
elseif tt == "table" then v = dump_number(compute_hash(v))
82+
elseif tt == "nil" then v = "nil"
5283
end
84+
-- sanity check
5385
assert(type(v) == "string",
5486
"Needs an array with numbers, tables or strings")
87+
-- hash computation for every char in the string v
5588
for j=1,#v do
56-
h = h + string.byte(string.sub(v,j,j))
57-
h = h + bit32.lshift(h,10)
58-
h = bit32.bxor(h, bit32.rshift(h,6))
59-
h = bit32.band(h, 0x00000000FFFFFFFF)
89+
h = h + string_byte(string_sub(v,j,j))
90+
h = h + bit32_lshift(h,10)
91+
h = bit32_bxor(h, bit32_rshift(h,6))
92+
-- compute hash modules 2^32
93+
h = bit32_band(h, WORD_MASK)
6094
end
6195
end
62-
h = h + bit32.rshift(h,3)
63-
h = bit32.bxor(h, bit32.lshift(h,11))
64-
h = h + bit32.lshift(h,15)
65-
h = bit32.band(h, 0x00000000FFFFFFFF)
96+
h = h + bit32_rshift(h,3)
97+
h = bit32_bxor(h, bit32_lshift(h,11))
98+
h = h + bit32_lshift(h,15)
99+
-- compute hash modules 2^32
100+
h = bit32_band(h, WORD_MASK)
66101
return h
67102
end
68103

104+
-- tuple instances has this metatable
69105
local tuple_instance_mt = {
106+
-- disallow to change metatable
107+
__metatable = false,
108+
-- avoid to insert new elements
70109
__newindex = function(self) error("Unable to modify a tuple") end,
110+
-- convert it to a string like: tuple{ a, b, ... }
71111
__tostring = function(self)
72112
local result = {}
73-
for i=1,#self do result[#result+1] = tostring(self[i]) end
74-
return table.concat({"tuple(",table.concat(result, ", "),")"}, " ")
113+
for i=1,#self do
114+
local v = self[i]
115+
if type(v) == "string" then v = string_format("%q",v) end
116+
result[#result+1] = tostring(v)
117+
end
118+
return table_concat({"tuple{",table_concat(result, ", "),"}"}, " ")
75119
end,
76-
__concat = function(self,other)
120+
-- concatenates two tuples or a tuple with a number, string or another table
121+
__concat = function(a,b)
122+
if type(a) ~= "table" then a,b=b,a end
77123
local aux = {}
78-
for i=1,#self do aux[#aux+1] = self[i] end
79-
if type(other) == "table" then
80-
for i=1,#other do aux[#aux+1] = other[i] end
124+
for i=1,#a do aux[#aux+1] = a[i] end
125+
if type(b) == "table" then
126+
for i=1,#b do aux[#aux+1] = b[i] end
81127
else
82-
aux[#aux+1] = other
128+
aux[#aux+1] = b
83129
end
84130
return tuple(aux)
85131
end,
86132
}
87133

88-
local function proxy(t)
89-
setmetatable(t, tuple_instance_mt)
90-
return setmetatable({},{
91-
__newindex = function(self) error("Unable to modify a tuple") end,
92-
__index = function(self,k) if k == "is_tuple" then return true end return t[k] end,
93-
__len = function(self) return #t end,
94-
__tostring = function(self) return tostring(t) end,
134+
-- returns a wrapper table (proxy) which shades the data table, allowing
135+
-- in-mutability in Lua, it receives the table data and the number of elements
136+
local function proxy(tpl,n)
137+
setmetatable(tpl, tuple_instance_mt)
138+
return setmetatable({}, {
139+
-- the proxy table has an in-mutable metatable, and stores in __metatable
140+
-- a string identifier, the real tuple data and the number of elements
141+
__metatable = { "is_tuple", tpl , n },
142+
__index = tpl,
143+
__newindex = function(self) error("Tuples are in-mutable data") end,
144+
__len = function(self) return getmetatable(self)[3] end,
145+
__tostring = function(self) return tostring(getmetatable(self)[2]) end,
95146
__lt = function(self,other)
147+
local t = getmetatable(self)[2]
96148
if type(other) ~= "table" then return false
97149
elseif #t < #other then return true
98150
elseif #t > #other then return false
151+
elseif t == other then return false
99152
else
100153
for i=1,#t do
101154
if t[i] > other[i] then return false end
@@ -104,51 +157,83 @@ local function proxy(t)
104157
end
105158
end,
106159
__le = function(self,other)
160+
local t = getmetatable(self)[2]
107161
-- equality is comparing references (tuples are in-mutable and interned)
108162
if self == other then return true end
109163
return self < other
110164
end,
111-
__pairs = function(self) return pairs(t) end,
112-
__ipairs = function(self) return ipairs(t) end,
113-
__concat = function(self,other) return t .. other end,
165+
__pairs = function(self) return pairs(getmetatable(self)[2]) end,
166+
__ipairs = function(self) return ipairs(getmetatable(self)[2]) end,
167+
__concat = function(self,other) return getmetatable(self)[2] .. other end,
168+
__mode = "v",
114169
})
115170
end
116171

172+
-- builds a candidate tuple given a table, recursively converting tables in new
173+
-- tuples
117174
local function tuple_constructor(t)
118-
local h = 0
119175
local new_tuple = {}
120-
for i,v in ipairs(t) do
121-
if type(v) == "table" then
122-
new_tuple[i] = tuple(v)
123-
else
124-
new_tuple[i] = v
176+
for i,v in pairs(t) do
177+
-- ignore the field "n" introduced by variadic args
178+
if i~="n" then
179+
assert(type(i) == "number" and i>0, "Needs integer keys > 0")
180+
if type(v) == "table" then
181+
-- recursively converts tables in new tuples
182+
new_tuple[i] = tuple(v)
183+
else
184+
-- copies the value
185+
new_tuple[i] = v
186+
end
125187
end
126188
end
127-
return proxy(new_tuple)
189+
-- returns a proxy to the new_tuple table with #t length
190+
return proxy(new_tuple,#t)
128191
end
129192

193+
-- metatable of tuple "class" table
130194
local tuple_mt = {
131195
-- tuple constructor doesn't allow table loops
132196
__call = function(self, ...)
133-
local t = { ... } if #t == 1 then t = t[1] end
197+
local n = select('#', ...)
198+
local t = table_pack(...) assert(#t == n) if #t == 1 then t = t[1] end
134199
if type(t) ~= "table" then
200+
-- non-table elements are unpacked when only one is given
135201
return t
136-
elseif #t == 1 then
137-
return tuple(t[1])
138202
else
139-
if t.is_tuple then return t end
203+
-- check if the given table is a tuple, if it is the case, just return it
204+
local mt = getmetatable(t) if mt and mt[1]=="is_tuple" then return t end
205+
-- create a new tuple candidate
140206
local new_tuple = tuple_constructor(t)
141207
local p = compute_hash(new_tuple) % NUM_BUCKETS
142-
local bucket = (list_of_tuples[p] or setmetatable({}, { __mode="v" }))
208+
local bucket = (list_of_tuples[p] or setmetatable({}, WEAK_MT))
143209
list_of_tuples[p] = bucket
144-
for i,vi in ipairs(bucket) do
210+
-- Count the number of elements in the bucket and the maximum non-nil key.
211+
-- In case the relation between this two values was greater than
212+
-- MAX_BUCKET_HOLES_RATIO, the bucket will be rearranged to remove all nil
213+
-- holes.
214+
local max,n = 0,0
215+
for i,vi in pairs(bucket) do
145216
local equals = true
217+
-- check equality by comparing all the elements one-by-one
146218
for j,vj in ipairs(vi) do
147219
if vj ~= new_tuple[j] then equals=false break end
148220
end
221+
-- BREAKS the execution flow in case the tuple exists in the bucket
149222
if equals == true then return vi end
223+
max = math_max(max,i)
224+
n = n+1
225+
end
226+
-- rearrange the bucket when the ratio achieves the threshold
227+
if max/n > MAX_BUCKET_HOLES_RATIO then
228+
local new_bucket = {}
229+
for i,vi in pairs(bucket) do new_bucket[#new_bucket+1] = vi end
230+
list_of_tuples[p], bucket = new_bucket, new_bucket
231+
max = #bucket
232+
collectgarbage("collect")
150233
end
151-
table.insert(bucket, new_tuple)
234+
bucket[max+1] = new_tuple
235+
-- take note of the bucket into __metatable array, position 4
236+
getmetatable(new_tuple)[4] = p
152237
return new_tuple
153238
end
154239
end,
@@ -166,6 +251,29 @@ tuple.utest = function()
166251
assert(a == c)
167252
assert(b == a[2])
168253
assert(b == c[2])
254+
a,b,c = nil,nil,nil
255+
collectgarbage("collect")
256+
--
257+
local aux = {} for i=1,10000 do aux[tuple(i,i)] = i end
258+
assert(tuple.stats() == 10000)
259+
collectgarbage("collect")
260+
assert(tuple.stats() == 10000)
261+
aux = nil
262+
collectgarbage("collect")
263+
assert(tuple.stats() == 0)
264+
end
265+
266+
-- returns the number of tuples "alive", the number of used buckets, and the
267+
-- loading factor of the hash table
268+
tuple.stats = function()
269+
local num_buckets = 0
270+
local size = 0
271+
for k1,v1 in pairs(list_of_tuples) do
272+
num_buckets = num_buckets + 1
273+
for k2,v2 in pairs(v1) do size=size+1 end
274+
end
275+
if num_buckets == 0 then num_buckets = 1 end
276+
return size,num_buckets,size/NUM_BUCKETS
169277
end
170278

171279
return tuple

0 commit comments

Comments
 (0)