Skip to content

Commit e91e4b4

Browse files
committed
Merge branch 'devel'
2 parents 4178cf7 + 5395b6a commit e91e4b4

16 files changed

+632
-183
lines changed

README.md

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@ is stored at auxiliary mongoDB collections.
1111
This software depends in:
1212

1313
- [Lua 5.2](http://www.lua.org/)
14-
- [pakozm/luamongo](https://github.com/pakozm/luamongo/), a fork of
15-
[moai/luamongo](https://github.com/moai/luamongo) for Lua 5.2 and with minor
16-
improvements.
14+
- [luamongo](https://github.com/moai/luamongo/), mongoDB driver
15+
for Lua 5.2.
1716

1817
Installation
1918
------------
@@ -105,7 +104,8 @@ the same structure, they return a Lua table with two fields:
105104
- **init** function, which receives a table of arguments and allows to configure
106105
your module options, in case that you need any option.
107106

108-
- **func** function, which implements the necessary Lua code.
107+
- A function which implements the necessary Lua code for the operation. The name
108+
of the function is different for each operation.
109109

110110
A map-reduce task is divided, at least, in the following modules:
111111

@@ -120,7 +120,7 @@ local init = function(arg)
120120
end
121121
return {
122122
init = init,
123-
func = function()
123+
taskfn = function()
124124
coroutine.yield(1,"mapreduce/server.lua")
125125
coroutine.yield(2,"mapreduce/worker.lua")
126126
coroutine.yield(3,"mapreduce/test.lua")
@@ -130,15 +130,17 @@ return {
130130
```
131131

132132
- **mapfn.lua** is the script where the map function is implemented. The
133-
**func** field is executed as a standard Lua function, and receives tow
134-
arguments `(key,value)` generated by one of the yields at your `taskfn`
135-
script. Map results are produced by calling the global function
133+
**func** field is executed as a standard Lua function, and receives three
134+
arguments `(key,value,emit)`. The first two are generated b
135+
one of the yields at your `taskfn`
136+
script. The third argument is a function. Map results
137+
are produced by calling the function
136138
`emit(key,value)`.
137139

138140
```Lua
139141
return {
140142
init = function() end,
141-
func = function(key,value)
143+
mapfn = function(key,value,emit)
142144
for line in io.lines(value) do
143145
for w in line:gmatch("[^%s]+") do
144146
emit(w,1)
@@ -161,7 +163,7 @@ local offset_basis = 2166136261
161163
local MAX = 2^32
162164
return {
163165
init = function() end,
164-
func = function(key)
166+
partitionfn = function(key)
165167
-- compute hash
166168
local h = offset_basis
167169
for i=1,#key do
@@ -186,7 +188,7 @@ return {
186188
```Lua
187189
return {
188190
init = function() end,
189-
func = function(key,values)
191+
reducefn = function(key,values)
190192
local count=0
191193
for _,v in ipairs(values) do count = count + v end
192194
return count
@@ -204,7 +206,7 @@ return {
204206
```Lua
205207
return {
206208
init = function() end,
207-
func = function(it)
209+
finalfn = function(it)
208210
for key,value in it do
209211
print(value,key)
210212
end

examples/WordCount/finalfn.lua

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
local it = 0
21
return {
32
init = function() end,
4-
func = function(pairs_iterator)
5-
it = it + 1
3+
finalfn = function(pairs_iterator)
64
for key,value in pairs_iterator do
75
print(value,key)
86
end

examples/WordCount/init.lua

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
-- string hash function: http://isthe.com/chongo/tech/comp/fnv/
2+
local NUM_REDUCERS = 10
3+
local FNV_prime = 16777619
4+
local offset_basis = 2166136261
5+
local MAX = 2^32
6+
return {
7+
-- arg is for configuration purposes, it will be executed with init_args given
8+
-- to the server
9+
init = function(arg) end,
10+
11+
taskfn = function()
12+
coroutine.yield(1,"mapreduce/server.lua")
13+
coroutine.yield(2,"mapreduce/worker.lua")
14+
coroutine.yield(3,"mapreduce/test.lua")
15+
coroutine.yield(4,"mapreduce/utils.lua")
16+
end,
17+
18+
mapfn = function(key,value,emit)
19+
for line in io.lines(value) do
20+
for w in line:gmatch("[^%s]+") do
21+
emit(w,1)
22+
end
23+
end
24+
end,
25+
26+
partitionfn = function(key)
27+
-- compute hash
28+
local h = offset_basis
29+
for i=1,#key do
30+
h = (h * FNV_prime) % MAX
31+
h = bit32.bxor(h, key:byte(i))
32+
end
33+
return h % NUM_REDUCERS
34+
end,
35+
36+
reducefn = function(key,values)
37+
local count=0
38+
for _,v in ipairs(values) do count = count + v end
39+
return count
40+
end,
41+
42+
finalfn = function(pairs_iterator)
43+
for key,value in pairs_iterator do
44+
print(value,key)
45+
end
46+
return true -- indicates to remove mongo gridfs result files
47+
end,
48+
}

examples/WordCount/mapfn.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
return {
22
init = function() end,
3-
func = function(key,value)
3+
mapfn = function(key,value,emit)
44
for line in io.lines(value) do
55
for w in line:gmatch("[^%s]+") do
66
emit(w,1)

examples/WordCount/partitionfn.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ local offset_basis = 2166136261
55
local MAX = 2^32
66
return {
77
init = function() end,
8-
func = function(key)
8+
partitionfn = function(key)
99
-- compute hash
1010
local h = offset_basis
1111
for i=1,#key do

examples/WordCount/reducefn.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
return {
22
init = function() end,
3-
func = function(key,values)
3+
reducefn = function(key,values)
44
local count=0
55
for _,v in ipairs(values) do count = count + v end
66
return count

examples/WordCount/taskfn.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ local init = function(arg)
44
end
55
return {
66
init = init,
7-
func = function()
7+
taskfn = function()
88
coroutine.yield(1,"mapreduce/server.lua")
99
coroutine.yield(2,"mapreduce/worker.lua")
1010
coroutine.yield(3,"mapreduce/test.lua")

examples/WordCountBig/taskfn.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ return {
22
-- init is for configuration purposes, it is allowed in any of the scripts
33
init = function(arg)
44
end,
5-
func = function()
5+
taskfn = function()
66
local f = io.popen("ls /home/experimentos/CORPORA/EUROPARL/en-splits/*","r")
77
local i=0
88
for filename in f:lines() do

execute_server.lua

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
--
1010
-- [4] => mapfn Lua module, idem
1111
--
12-
-- [6] => partitionfn Lua module, idem
12+
-- [5] => partitionfn Lua module, idem
1313
--
14-
-- [7] => reducefn Lua module, idem
14+
-- [6] => reducefn Lua module, idem
1515
--
16-
-- [8] => finalfn Lua module, idem
16+
-- [7] => finalfn Lua module, idem
1717
--
18-
-- [9] => result_ns Lua string (OPTIONAL, by default all data will be removed)
18+
-- [8] => result_ns Lua string (OPTIONAL, by default all data will be removed)
1919
--
2020
-- IMPORTANT: the Lua modules (taskfn, mapfn, reducefn, ...) need to be in the
2121
-- LUA_PATH in all the machines where this code need to be executed
@@ -41,12 +41,13 @@ s:configure{
4141
partitionfn = normalize(partitionfn),
4242
reducefn = normalize(reducefn),
4343
finalfn = normalize(finalfn),
44-
task_args = arg,
45-
map_args = arg,
46-
partition_args = arg,
47-
reduce_args = arg,
48-
final_args = arg,
44+
init_args = arg,
4945
result_ns = result_ns,
46+
-- storage = "gridfs[:PATH]", -- 'gridfs', 'shared', 'sshfs', with the
47+
-- optional string :PATH. if not given PATH will be os.tmpname()
48+
-- storage = "gridfs:/tmp/wordcount",
49+
-- storage = "shared:/home/experimentos/tmp/wordcount",
50+
-- storage = "sshfs:/tmp/wordcount",
5051
}
5152
mapreduce.utils.sleep(4)
5253
s:loop()

mapreduce/cnn.lua

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,24 @@ function cnn:get_dbname()
2929
return self.dbname
3030
end
3131

32+
function cnn:insert_error(who,msg)
33+
local ns = string.format("%s.errors", self.dbname)
34+
local db = self:connect()
35+
db:insert(ns, { worker = who, msg = msg })
36+
end
37+
38+
function cnn:get_errors()
39+
local ns = string.format("%s.errors", self.dbname)
40+
local db = self:connect()
41+
return db:query(ns, {})
42+
end
43+
44+
function cnn:remove_errors(ids)
45+
local ns = string.format("%s.errors", self.dbname)
46+
local db = self:connect()
47+
db:remove(ns,{ _id = { ["$in"] = ids } })
48+
end
49+
3250
function cnn:annotate_insert(ns,tbl,callback)
3351
self.pending_inserts = self.pending_inserts or {}
3452
self.pending_callbacks = self.pending_callbacks or {}

0 commit comments

Comments
 (0)