Skip to content

Need better diagnostics/warnings/reporting when locks time out #6

@nickminutello

Description

@nickminutello

I have found that when trying to diagnose issues with lock timeouts in a multi-process/multi-host environment, it was kind of essential to know "who" took the lock that timed out (that way you can then know where to start looking as to why that lock timed out).

To do this I made the lock value slightly more sophisticated than a timeout value - I added the host/pid.

I cant do a pull-request - but I can paste the code here: (this has a configurable delay patch mixed in too)

var hostName;
try {
    var OS = require('os');
    hostName = OS.hostname();
} catch (e) {
    hostName = process.env.HOST || process.env.HOSTNAME;
}
hostName = hostName ? hostName.split('.')[0] : 'unknown-host';

function createLockValue(lockTimeoutValue) {
    var callerId = hostName + ":" + process.pid;
    return callerId + "|" + lockTimeoutValue;
}

function getTimeoutValue(lockValue) {
    if (lockValue) {
        var parts = lockValue.split("|");
        if (parts.length ===2) {
            return parseFloat(parts[1]);
        }
    }
    return 0;
}

function getCallerId(lockValue) {
    if (lockValue) {
        var parts = lockValue.split("|");
        if (parts.length ===2) {
            return parts[0];
        }
    }
    return "unknown-caller-id";
}

function acquireLock(client, lockName, timeout, retryDelay, onLockAquired) {
    function retry() {
        acquireLock(client, lockName, timeout, retryDelay, onLockAquired);
    }

    var lockTimeoutValue = (Date.now() + timeout + 1);

    client.setnx(lockName, createLockValue(lockTimeoutValue), function(err, result) {
        if(err) {
            return setTimeout(retry, retryDelay);
        }
        if(result === 0) {
            client.get(lockName, function(err, lockValue) {
                if(err) {
                    return setTimeout(retry, retryDelay);
                }

                var timeStamp = getTimeoutValue(lockValue);

                var now = Date.now();
                if(timeStamp > now) {
                    setTimeout(retry, retryDelay);
                } else {
                    lockTimeoutValue = (Date.now() + timeout + 1);
                    client.getset(lockName, createLockValue(lockTimeoutValue), function(err, result) {
                        if(err) {
                            return setTimeout(retry, retryDelay);
                        }

                        if(result === lockValue) {
                            if (lockValue) {
                                console.log("[PID %s] [%s] Warning : Lock '%s' timed out by %dms - stealing lock from %s ...", process.pid, new Date().toISOString(), lockName, (now - timeStamp), getCallerId(lockValue));
                            }
                            onLockAquired(lockTimeoutValue);
                        } else {
                            setTimeout(retry, retryDelay);
                        }
                    });
                }
            });
        } else {
            onLockAquired(lockTimeoutValue);
        }
    });
}

module.exports = function(client, retryDelay) {
    if(!(client && client.setnx)) {
        throw new Error("You must specify a client instance of http://github.com/mranney/node_redis");
    }

    retryDelay = retryDelay || 50;

    return function(lockName, timeout, taskToPerform) {
        if(!lockName) {
            throw new Error("You must specify a lock string. It is on the basis on this the lock is acquired.");
        }

        if(!taskToPerform) {
            taskToPerform = timeout;
            timeout = 5000;
        }

        lockName = "lock." + lockName;

        acquireLock(client, lockName, timeout, retryDelay, function(lockTimeoutValue) {
            var start  = Date.now();
            taskToPerform(function(done) {
                if(lockTimeoutValue > Date.now()) {
                    client.del(lockName, done);
                } else {
                    console.log("[PID %s] [%s] Warning : Lock '%s' timed out before lock could be released. Locked operation took %dms", process.pid, new Date().toISOString(),lockName, Date.now() - start);
                    done();
                }
            });
        });
    };
};

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions