Skip to content

express - middleware 源码解刨 #9

@Hazlank

Description

@Hazlank

Middleware

用过express都知道,express有中间件的概念,中间件可以:

  • Execute any code.
  • Make changes to the request and the response objects.
  • End the request-response cycle.
  • Call the next middleware function in the stack.

当有请求向服务器发起的时候,可以让中间件处理进来的请求。比如,在一开始URL进来的时候并不能很好的拿到传进来queryString,express会自带处理queryString的中间件,解析并放在req.query里,让用户很方便的操作queryString。或者让每个响应头添加上csrf-token等等。中间件不是必须的,我们可以选择所需要的中间件功能来提供帮助。

今天就来解读中间件的运行原理,使用中间件的代码很简单app.use(),下面是express5.x源码

app.use = function use(fn) {
  var offset = 0;
  var path = '/';

  // default path to '/'
  // disambiguate app.use([fn])
  if (typeof fn !== 'function') {
    var arg = fn;

    while (Array.isArray(arg) && arg.length !== 0) {
      arg = arg[0];
    }

    // first arg is the path
    if (typeof arg !== 'function') {
      offset = 1;
      path = fn;
    }
  }

  var fns = flatten(slice.call(arguments, offset));

  if (fns.length === 0) {
    throw new TypeError('app.use() requires a middleware function')
  }

  // setup router
  this.lazyrouter();
  var router = this._router;

  fns.forEach(function (fn) {
    // non-express app
    if (!fn || !fn.handle || !fn.set) {
      return router.use(path, fn);
    }

    debug('.use app under %s', path);
    fn.mountpath = path;
    fn.parent = this;

    // restore .app property on req and res
    router.use(path, function mounted_app(req, res, next) {
      var orig = req.app;
      fn.handle(req, res, function (err) {
        setPrototypeOf(req, orig.request)
        setPrototypeOf(res, orig.response)
        next(err);
      });
    });

    // mounted an app
    fn.emit('mount', this);
  }, this);

  return this;
};

use函数接收的一个参数不是函数的话,就为匹配的路径,他也可以接收多个函数并扁平化。注意到中间的代码,fns会循环被router.use用到,并且会传入第二个函数,执行fn.handle。能看到有一段lazyrouter()的函数被执行,他的代码具体:

app.lazyrouter = function lazyrouter() {
  if (!this._router) {
    this._router = new Router({
      caseSensitive: this.enabled('case sensitive routing'),
      strict: this.enabled('strict routing')
    });

    this._router.use(query(this.get('query parser fn')));
    this._router.use(middleware.init(this));
  }
};

可以看到,他会创建_router,并像前面app.use传进来的fn一样,最后调用_router.use(fn),看看router.use又是什么

router.use = function use(fn) {
  var offset = 0;
  var path = '/';

  // default path to '/'
  // disambiguate router.use([fn])
  if (typeof fn !== 'function') {
    var arg = fn;

    while (Array.isArray(arg) && arg.length !== 0) {
      arg = arg[0];
    }

    // first arg is the path
    if (typeof arg !== 'function') {
      offset = 1;
      path = fn;
    }
  }

  var callbacks = flatten(slice.call(arguments, offset));

  if (callbacks.length === 0) {
    throw new TypeError('Router.use() requires a middleware function')
  }

  for (var i = 0; i < callbacks.length; i++) {
    var fn = callbacks[i];

    if (typeof fn !== 'function') {
      throw new TypeError('Router.use() requires a middleware function but got a ' + gettype(fn))
    }

    // add the middleware
    debug('use %o %s', path, fn.name || '<anonymous>')

    var layer = new Layer(path, {
      sensitive: this.caseSensitive,
      strict: false,
      end: false
    }, fn);

    layer.route = undefined;

    this.stack.push(layer);
  }

  return this;
};

可以看到router.use做的事前跟前面的app.use一样,处理path,扁平化fn。除了在最后的时候调用new Layer(),并放入router.statck,在实例化router的时候(new Router()),会初始化一个stack数组来放Layer

到这里,先整理一下所有的结构,当用到app.use的时候会初始化一个_router在app里,这个_router有个属性叫stack,用来存放Layer。
具体代码以及生成的结构

var app = express();
app.use(function middleware(req, res, next) {
  next('route');
});

console.log(app._router)

router

stack前两个是啥呢?记不记得在lazyloader有两个函数

	  this._router.use(query(this.get('query parser fn')));
	  this._router.use(middleware.init(this));

	  //query.js
	  module.exports = function query(options) {
	    var opts = merge({}, options)
	    var queryparse = qs.parse;
	  
	    if (typeof options === 'function') {
	      queryparse = options;
	      opts = undefined;
	    }
	  
	    if (opts !== undefined && opts.allowPrototypes === undefined) {
	      // back-compat for qs module
	      opts.allowPrototypes = true;
	    }
	  
	    return function query(req, res, next){
	      if (!req.query) {
	        var val = parseUrl(req).query;
	        req.query = queryparse(val, opts);
	      }
	  
	      next();
	    };

	  //middleware.init
	  exports.init = function(app){
	    return function expressInit(req, res, next){
	      if (app.enabled('x-powered-by')) res.setHeader('X-Powered-By', 'Express');
	      req.res = res;
	      res.req = req;
	      req.next = next;
	  
	      setPrototypeOf(req, app.request)
	      setPrototypeOf(res, app.response)
	  
	      res.locals = res.locals || Object.create(null);
	  
	      next();
	    };
	  };

看到具体的代码能得知,他们算是官方自带的中间件了,一个能帮助解析querystring放到req.query里。另外一个告诉浏览器服务器的支持来自Express。

ok,我们还有layer没探讨是做什么的

function Layer(path, options, fn) {
  if (!(this instanceof Layer)) {
    return new Layer(path, options, fn);
  }

  debug('new %o', path)
  var opts = options || {};

  this.handle = fn;
  this.name = fn.name || '<anonymous>';
  this.params = undefined;
  this.path = undefined;
  this.regexp = pathRegexp(path, this.keys = [], opts);

  // set fast path flags
  this.regexp.fast_star = path === '*'
  this.regexp.fast_slash = path === '/' && opts.end === false
}

/**
 * Handle the error for the layer.
 *
 * @param {Error} error
 * @param {Request} req
 * @param {Response} res
 * @param {function} next
 * @api private
 */

Layer.prototype.handle_error = function handle_error(error, req, res, next) {
  var fn = this.handle;

  if (fn.length !== 4) {
    // not a standard error handler
    return next(error);
  }

  try {
    fn(error, req, res, next);
  } catch (err) {
    next(err);
  }
};

/**
 * Handle the request for the layer.
 *
 * @param {Request} req
 * @param {Response} res
 * @param {function} next
 * @api private
 */

Layer.prototype.handle_request = function handle(req, res, next) {
  var fn = this.handle;

  if (fn.length > 3) {
    // not a standard request handler
    return next();
  }

  try {
    fn(req, res, next);
  } catch (err) {
    next(err);
  }
};

/**
 * Check if this route matches `path`, if so
 * populate `.params`.
 *
 * @param {String} path
 * @return {Boolean}
 * @api private
 */

Layer.prototype.match = function match(path) {
  var match

  if (path != null) {
    // fast path non-ending match for / (any path matches)
    if (this.regexp.fast_slash) {
      this.params = {}
      this.path = ''
      return true
    }

    // fast path for * (everything matched in a param)
    if (this.regexp.fast_star) {
      this.params = {'0': decode_param(path)}
      this.path = path
      return true
    }

    // match the path
    match = this.regexp.exec(path)
  }

  if (!match) {
    this.params = undefined;
    this.path = undefined;
    return false;
  }

  // store values
  this.params = {};
  this.path = match[0]

  var keys = this.keys;
  var params = this.params;

  for (var i = 1; i < match.length; i++) {
    var key = keys[i - 1];
    var prop = key.name;
    var val = decode_param(match[i])

    if (val !== undefined || !(hasOwnProperty.call(params, prop))) {
      params[prop] = val;
    }
  }

  return true;
};

Layer很简单,构造函数取fn,path的值赋值到this。在去到path之后会被pathRegexp转成正则表达式,所以这个patch其实也支持正则的传入。Layer有三个原型方法,match作为匹配当前的正则是否匹配路由,handle_error,handle_request执行fn并将req,res,next传到函数参数内,这两个不同在于如果handle_request执行fn的时候在try catch抛出异常后,会回到next(err),去执行handle_error。接下来就得找到什么时候执行handle_request函数。

在请求响应的时候会触发routed的操作执行下面的代码https://github.com/expressjs/express/blob/master/lib/router/index.js#L136-L320

proto.handle = function handle(req, res, out) {
  var self = this;

  debug('dispatching %s %s', req.method, req.url);

  var idx = 0;
  var protohost = getProtohost(req.url) || ''
  var removed = '';
  var slashAdded = false;
  var paramcalled = {};

  // store options for OPTIONS request
  // only used if OPTIONS request
  var options = [];

  // middleware and routes
  var stack = self.stack;

  // manage inter-router variables
  var parentParams = req.params;
  var parentUrl = req.baseUrl || '';
  var done = restore(out, req, 'baseUrl', 'next', 'params');

  // setup next layer
  req.next = next;

  // for options requests, respond with a default if nothing else responds
  if (req.method === 'OPTIONS') {
    done = wrap(done, function(old, err) {
      if (err || options.length === 0) return old(err);
      sendOptionsResponse(res, options, old);
    });
  }

  // setup basic req values
  req.baseUrl = parentUrl;
  req.originalUrl = req.originalUrl || req.url;

  next();

  function next(err) {
    var layerError = err === 'route'
      ? null
      : err;

    // remove added slash
    if (slashAdded) {
      req.url = req.url.substr(1);
      slashAdded = false;
    }

    // restore altered req.url
    if (removed.length !== 0) {
      req.baseUrl = parentUrl;
      req.url = protohost + removed + req.url.substr(protohost.length);
      removed = '';
    }

    // signal to exit router
    if (layerError === 'router') {
      setImmediate(done, null)
      return
    }

    // no more matching layers
    if (idx >= stack.length) {
      setImmediate(done, layerError);
      return;
    }

    // get pathname of request
    var path = getPathname(req);

    if (path == null) {
      return done(layerError);
    }

    // find next matching layer
    var layer;
    var match;
    var route;

    while (match !== true && idx < stack.length) {
      layer = stack[idx++];
      match = matchLayer(layer, path);
      route = layer.route;

      if (typeof match !== 'boolean') {
        // hold on to layerError
        layerError = layerError || match;
      }

      if (match !== true) {
        continue;
      }

      if (!route) {
        // process non-route handlers normally
        continue;
      }

      if (layerError) {
        // routes do not match with a pending error
        match = false;
        continue;
      }

      var method = req.method;
      var has_method = route._handles_method(method);

      // build up automatic options response
      if (!has_method && method === 'OPTIONS') {
        appendMethods(options, route._options());
      }

      // don't even bother matching route
      if (!has_method && method !== 'HEAD') {
        match = false;
        continue;
      }
    }

    // no match
    if (match !== true) {
      return done(layerError);
    }

    // store route for dispatch on change
    if (route) {
      req.route = route;
    }

    // Capture one-time layer values
    req.params = self.mergeParams
      ? mergeParams(layer.params, parentParams)
      : layer.params;
    var layerPath = layer.path;

    // this should be done for the layer
    self.process_params(layer, paramcalled, req, res, function (err) {
      if (err) {
        return next(layerError || err);
      }

      if (route) {
        return layer.handle_request(req, res, next);
      }

      trim_prefix(layer, layerError, layerPath, path);
    });
  }

  function trim_prefix(layer, layerError, layerPath, path) {
    if (layerPath.length !== 0) {
      // Validate path breaks on a path separator
      var c = path[layerPath.length]
      if (c && c !== '/' && c !== '.') return next(layerError)

      // Trim off the part of the url that matches the route
      // middleware (.use stuff) needs to have the path stripped
      debug('trim prefix (%s) from url %s', layerPath, req.url);
      removed = layerPath;
      req.url = protohost + req.url.substr(protohost.length + removed.length);

      // Ensure leading slash
      if (!protohost && req.url[0] !== '/') {
        req.url = '/' + req.url;
        slashAdded = true;
      }

      // Setup base URL (no trailing slash)
      req.baseUrl = parentUrl + (removed[removed.length - 1] === '/'
        ? removed.substring(0, removed.length - 1)
        : removed);
    }

    debug('%s %s : %s', layer.name, layerPath, req.originalUrl);

    if (layerError) {
      layer.handle_error(layerError, req, res, next);
    } else {
      layer.handle_request(req, res, next);
    }
  }
};

取出当前的stack,也就是存放着Layer的数组,执行next()函数,在next函数里会通过while循环stack,如果match会false会停止。
首先会把Layer取出来并执行matchLayer(layer, path),matchLayer会调用Layer.match来进行判断,如果不匹配,会置match为false,并用continue跳过当前的循环跳入下一个。我们能看到还有一种会跳过的方式为next(err),传进来一个layerError值,他会设置match为false并跳过所有的循环。所以我们写中间件的时候给next传err msg就不会运行下面的中间件以及app.get / get post等方法。当然这个err如果你传的是'route'字符串,他不视为错误只是跳过当前的中间件,这是官方钦定的方法。虽然会有点反直觉,如果用户没看到相关API传了个route进去却没报错可能会有点摸不着头脑。

最后就是执行相应的layer.handle_error或者layer.handle_request

express有个洋葱模型的概念,就是执行顺序,看下列的代码

var app = express();
app.use(function middleware(req, res, next) {
  console.log(1)
  next();
  console.log(2)
});

app.use(function middleware(req, res, next) {
  console.log(3)
  next();
  console.log(4)
});

app.use(function middleware(req, res, next) {
  console.log(5)
  next();
  console.log(6)
});

执行顺序为1,3,5,6,4,2
router
大致意思为执行顺序从外到内再从内到外

其实很容易理解,执行函数推进栈的时候从上往下执行,到执行最后一个函数的时候需要回收栈的时候就从最后一个回来啦

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions