Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 30d62cf

Browse files
committedJun 23, 2014
Merge pull request #11 from pakozm/devel
Devel
2 parents b0dde09 + 5b0d136 commit 30d62cf

File tree

9 files changed

+24
-17
lines changed

9 files changed

+24
-17
lines changed
 

‎README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ $ ./execute_BIG_server.sh > output
6969

7070
**Note 1:** using only one worker takes: 146 seconds
7171

72-
**Note 2:** using 30 mappers and 10 reducers (30 workers) takes: 35 seconds
72+
**Note 2:** using 30 mappers and 15 reducers (30 workers) takes: 32 seconds
7373

7474
A naive word-count version implemented with pipes and shellscripts takes:
7575

‎mapreduce/examples/WordCount/init.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- string hash function: http://isthe.com/chongo/tech/comp/fnv/
2-
local NUM_REDUCERS = 10
2+
local NUM_REDUCERS = 15
33
local FNV_prime = 16777619
44
local offset_basis = 2166136261
55
local MAX = 2^32

‎mapreduce/examples/WordCount/partitionfn.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- string hash function: http://isthe.com/chongo/tech/comp/fnv/
2-
local NUM_REDUCERS = 10
2+
local NUM_REDUCERS = 15
33
local FNV_prime = 16777619
44
local offset_basis = 2166136261
55
local MAX = 2^32

‎mapreduce/fs.lua

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ local fs = {
3333
local utils = require "mapreduce.utils"
3434

3535
local make_wildcard_from_mongo_match = function(match_tbl)
36-
return match_tbl.filename["$regex"]:gsub("%.%*","*"):gsub("[$^]","")
36+
return match_tbl.filename["$regex"]:
37+
gsub("\\%.","."):gsub("%.%*","*"):gsub("[$^]","")
3738
end
3839

3940
------------------------------------------------------------------------------

‎mapreduce/init.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ local utils = require "mapreduce.utils"
2222
local persistent_table = require "mapreduce.persistent_table"
2323

2424
local mapreduce = {
25-
_VERSION = "0.3.2",
25+
_VERSION = "0.3.4",
2626
_NAME = "mapreduce",
2727
worker = worker,
2828
server = server,

‎mapreduce/job.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ function job_prepare_reduce(self, g, storage, path)
251251
local fs,make_builder,make_lines_iterator = fs.router(self.cnn,mappers,
252252
storage,path)
253253
local filenames = {}
254-
local match_str = string.format("^%s.*", job_file)
254+
local match_str = string.format("^%s\\..*", job_file)
255255
local list = fs:list({ filename = { ["$regex"] = match_str } })
256256
for v in list:results() do
257257
table.insert(filenames, v.filename)

‎mapreduce/server.lua

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ local get_storage_from = utils.get_storage_from
124124

125125
-- PRIVATE FUNCTIONS AND METHODS
126126

127+
local function keys_of(t)
128+
local out = {}
129+
for k,_ in pairs(t) do table.insert(out, k) end
130+
return out
131+
end
132+
127133
local function count_digits(n)
128134
-- sanity check
129135
assert(n >= 0, "Only valid for positive integers")
@@ -293,15 +299,15 @@ local function server_prepare_reduce(self)
293299
max_part_key = math.max(max_part_key, part_key)
294300
-- annotate the mapper
295301
mappers_by_part_key[part_key] = mappers_by_part_key[part_key] or {}
296-
table.insert(mappers_by_part_key[part_key], map_hostnames[mapper_key])
302+
mappers_by_part_key[part_key][ map_hostnames[mapper_key] ] = true
297303
end
298304
local part_key_digits = count_digits(max_part_key)
299305
local result_str_format = "%s.P%0" .. tostring(part_key_digits) .. "d"
300306
local count=0
301307
for part_key,_ in pairs(part_keys) do
302308
count = count + 1
303309
local value = {
304-
mappers = mappers_by_part_key[part_key],
310+
mappers = keys_of(mappers_by_part_key[part_key]),
305311
file = string.format("%s/%s.P%d", path, map_results_ns, part_key),
306312
result = string.format(result_str_format, self.result_ns, part_key),
307313
}

‎mapreduce/utils.lua

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ local function gridfs_lines_iterator(gridfs, filename)
139139
local gridfs = gridfs
140140
local gridfile = gridfile
141141
if current_chunk < num_chunks then
142-
chunk = chunk or gridfile:chunk(current_chunk)
142+
chunk = chunk or assert( gridfile:chunk(current_chunk) )
143143
if current_pos <= chunk:len() then
144144
local first_chunk = current_chunk
145145
local last_chunk = current_chunk
@@ -149,15 +149,15 @@ local function gridfs_lines_iterator(gridfs, filename)
149149
for k,v in ipairs(tbl) do tbl[k] = nil end
150150
local found_line = false
151151
repeat
152-
chunk = chunk or gridfile:chunk(current_chunk)
152+
chunk = chunk or assert( gridfile:chunk(current_chunk) )
153153
data = data or chunk:data()
154154
local chunk_len = chunk:len()
155155
local match = data:match("^([^\n]*)\n", current_pos)
156156
if match then
157157
tbl[ #tbl+1 ] = match
158158
current_pos = #match + current_pos + 1 -- +1 because of the \n
159159
abs_pos = #match + abs_pos + 1
160-
found_line = true
160+
found_line = true
161161
else -- if match ... then
162162
-- inserts the whole chunk substring, no \n match found
163163
tbl[ #tbl+1 ] = data:sub(current_pos, chunk_len)
@@ -250,8 +250,8 @@ local function merge_iterator(fs, filenames, make_lines_iterator)
250250
local data = data
251251
local take_next = take_next
252252
local queue = queue
253-
-- merge all the files until finished (empty queue)
254-
while not queue:empty() do
253+
-- merge all the files until empty queue (finished)
254+
if not queue:empty() then
255255
counter = counter + 1
256256
--
257257
local key,result = merge_min_keys()
@@ -260,7 +260,7 @@ local function merge_iterator(fs, filenames, make_lines_iterator)
260260
collectgarbage("collect")
261261
end
262262
return key,result
263-
end -- while not finished()
263+
end -- if not finished
264264
end -- return function
265265
end
266266

‎mapreduce/worker.lua

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,12 @@ function worker_methods:execute()
124124
num_failed_jobs = num_failed_jobs + 1
125125
end
126126
failed_jobs[id] = true
127-
end
127+
end -- if self.current_job then
128128
self.cnn:flush_pending_inserts(0)
129129
self.cnn:insert_error(utils.get_hostname(), msg)
130-
print(string.format("Error executing a job: %s",msg))
130+
io.stderr:write(string.format("Error executing a job: %s\n",msg))
131131
utils.sleep(utils.DEFAULT_SLEEP*4)
132-
end
132+
end -- if not ok then
133133
until ok or num_failed_jobs >= utils.MAX_WORKER_RETRIES
134134
print(string.format("# Worker retries: %d",num_failed_jobs))
135135
if num_failed_jobs >= utils.MAX_WORKER_RETRIES then

0 commit comments

Comments
 (0)
Please sign in to comment.