diff --git a/keras_model.cc b/keras_model.cc
index 24d5563..4b3d5b6 100644
--- a/keras_model.cc
+++ b/keras_model.cc
@@ -7,421 +7,555 @@
 #include "keras_model.h"
 
 #include <stdio.h>
+#include <algorithm>
 #include <cmath>
 #include <fstream>
 #include <limits>
+#include <unordered_map>
 #include <utility>
 
+namespace kerasify {
 
-bool ReadUnsignedInt(std::ifstream* file, unsigned int* i)
-{
-    KASSERT(file, "Invalid file stream");
-    KASSERT(i, "Invalid pointer");
+bool ReadUnsignedInt(std::ifstream* file, unsigned int* i) {
+  KASSERT(file, "Invalid file stream");
+  KASSERT(i, "Invalid pointer");
 
-    file->read((char *) i, sizeof(unsigned int));
-    KASSERT(file->gcount() == sizeof(unsigned int), "Expected unsigned int");
+  file->read((char*)i, sizeof(unsigned int));
+  KASSERT(file->gcount() == sizeof(unsigned int), "Expected unsigned int");
 
-    return true;
+  return true;
 }
 
-bool ReadFloat(std::ifstream* file, float* f)
-{
-    KASSERT(file, "Invalid file stream");
-    KASSERT(f, "Invalid pointer");
+bool ReadFloat(std::ifstream* file, float* f) {
+  KASSERT(file, "Invalid file stream");
+  KASSERT(f, "Invalid pointer");
 
-    file->read((char *) f, sizeof(float));
-    KASSERT(file->gcount() == sizeof(float), "Expected float");
+  file->read((char*)f, sizeof(float));
+  KASSERT(file->gcount() == sizeof(float), "Expected float");
 
-    return true;
+  return true;
 }
 
-bool ReadFloats(std::ifstream* file, float* f, size_t n)
-{
-    KASSERT(file, "Invalid file stream");
-    KASSERT(f, "Invalid pointer");
+bool ReadFloats(std::ifstream* file, float* f, size_t n) {
+  KASSERT(file, "Invalid file stream");
+  KASSERT(f, "Invalid pointer");
 
-    file->read((char *) f, sizeof(float) * n);
-    KASSERT(((unsigned int) file->gcount()) == sizeof(float) * n, "Expected floats");
+  file->read((char*)f, sizeof(float) * n);
+  KASSERT(((unsigned int)file->gcount()) == sizeof(float) * n,
+          "Expected floats");
 
-    return true;
+  return true;
 }
 
-bool KerasLayerActivation::LoadLayer(std::ifstream* file)
-{
-    KASSERT(file, "Invalid file stream");
-
-    unsigned int activation = 0;
-    KASSERT(ReadUnsignedInt(file, &activation), "Failed to read activation type");
-
-    switch (activation)
-    {
-        case kLinear:
-            activation_type_ = kLinear;
-            break;
-        case kRelu:
-            activation_type_ = kRelu;
-            break;
-        case kSoftPlus:
-            activation_type_ = kSoftPlus;
-            break;
-        default:
-            KASSERT(false, "Unsupported activation type %d", activation);
-    }
+bool ReadString(std::ifstream* file, std::string* str) {
+  KASSERT(file, "Invalid file stream");
+  KASSERT(str, "Invalid pointer");
 
-    return true;
+  unsigned int n;
+  KASSERT(ReadUnsignedInt(file, &n), "Expected string size");
+
+  char buffer[n];
+  file->read((char*)&buffer, sizeof(char) * n);
+  *str = std::string((char*)&buffer);
+  KASSERT(((unsigned int)file->gcount()) == sizeof(char) * n, "Expected chars");
+
+  return true;
 }
 
-bool KerasLayerActivation::Apply(Tensor* in, Tensor* out)
-{
-    KASSERT(in, "Invalid input");
-    KASSERT(out, "Invalid output");
-
-    *out = *in;
-
-    switch (activation_type_)
-    {
-        case kLinear:
-            break;
-        case kRelu:
-            for (size_t i = 0; i < out->data_.size(); i++)
-            {
-                if(out->data_[i] < 0.0)
-                {
-                    out->data_[i] = 0.0;
-                }
-            }
-            break;
-        case kSoftPlus:
-            for (size_t i = 0; i < out->data_.size(); i++)
-            {
-                out->data_[i] = std::log(1.0 + std::exp(out->data_[i]));
-            }
-            break;
-        default:
-            break;
-    }
+bool ReadStrings(std::ifstream* file, std::vector<std::string>* strs) {
+  KASSERT(file, "Invalid file stream");
+  KASSERT(strs, "Invalid pointer");
 
-    return true;
+  unsigned int n;
+  KASSERT(ReadUnsignedInt(file, &n), "Expected string list count");
+
+  strs->clear();
+  strs->resize(n);
+  for (unsigned int i = 0; i < n; i++) {
+    KASSERT(ReadString(file, &((*strs)[i])), "Expected string in list");
+  }
+
+  return true;
+}
+
+bool KerasLayerInput::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
+
+  return true;
 }
 
-bool KerasLayerDense::LoadLayer(std::ifstream* file)
-{
-    KASSERT(file, "Invalid file stream");
+bool KerasLayerInput::Apply(const std::vector<Tensor*>& in_list, Tensor* out) {
+  KASSERT(in_list.size() == 1, "Invalid input");
+  KASSERT(out, "Invalid output");
 
-    unsigned int weights_rows = 0;
-    KASSERT(ReadUnsignedInt(file, &weights_rows), "Expected weight rows");
-    KASSERT(weights_rows > 0, "Invalid weights # rows");
+  *out = *in_list[0];
 
-    unsigned int weights_cols = 0;
-    KASSERT(ReadUnsignedInt(file, &weights_cols), "Expected weight cols");
-    KASSERT(weights_cols > 0, "Invalid weights shape");
+  return true;
+}
 
-    unsigned int biases_shape = 0;
-    KASSERT(ReadUnsignedInt(file, &biases_shape), "Expected biases shape");
-    KASSERT(biases_shape > 0, "Invalid biases shape");
+bool KerasLayerMerge::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
 
-    weights_.Resize(weights_rows, weights_cols);
-    KASSERT(ReadFloats(file, weights_.data_.data(), weights_rows * weights_cols), "Expected weights");
+  return true;
+}
 
-    biases_.Resize(biases_shape);
-    KASSERT(ReadFloats(file, biases_.data_.data(), biases_shape), "Expected biases");
+bool KerasLayerMerge::Apply(const std::vector<Tensor*>& in_list, Tensor* out) {
+  KASSERT(!in_list.empty(), "Invalid input");
+  KASSERT(out, "Invalid output");
 
-    KASSERT(activation_.LoadLayer(file), "Failed to load activation");
+  Tensor tmp = *in_list[0];
+  for (unsigned int i = 1; i < in_list.size(); i++) {
+    KASSERT(tmp.Append(*in_list[i]), "Unable to append tensor");
+  }
 
-    return true;
+  *out = tmp;
+  return true;
 }
 
-bool KerasLayerDense::Apply(Tensor* in, Tensor* out)
-{
-    KASSERT(in, "Invalid input");
-    KASSERT(out, "Invalid output");
-    KASSERT(in->dims_.size() <= 2, "Invalid input dimensions");
+bool KerasLayerActivation::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
+
+  unsigned int activation = 0;
+  KASSERT(ReadUnsignedInt(file, &activation), "Failed to read activation type");
+
+  switch (activation) {
+    case kLinear:
+      activation_type_ = kLinear;
+      break;
+    case kRelu:
+      activation_type_ = kRelu;
+      break;
+    case kSoftPlus:
+      activation_type_ = kSoftPlus;
+      break;
+    default:
+      KASSERT(false, "Unsupported activation type %d", activation);
+  }
+
+  return true;
+}
 
-    if (in->dims_.size() == 2)
-    {
-        KASSERT(in->dims_[1] == weights_.dims_[0],
-            "Dimension mismatch %d %d", in->dims_[1], weights_.dims_[0]);
-    }
+bool KerasLayerActivation::Apply(const std::vector<Tensor*>& in_list,
+                                 Tensor* out) {
+  KASSERT(in_list.size() == 1, "Invalid input");
+  KASSERT(out, "Invalid output");
 
-    Tensor tmp(weights_.dims_[1]);
+  *out = *in_list[0];
 
-    for (int i = 0; i < weights_.dims_[0]; i++)
-    {
-        for (int j = 0; j < weights_.dims_[1]; j++)
-        {
-            tmp(j) += (*in)(i) * weights_(i, j);
+  switch (activation_type_) {
+    case kLinear:
+      break;
+    case kRelu:
+      for (size_t i = 0; i < out->data_.size(); i++) {
+        if (out->data_[i] < 0.0) {
+          out->data_[i] = 0.0;
         }
+      }
+      break;
+    case kSoftPlus:
+      for (size_t i = 0; i < out->data_.size(); i++) {
+        out->data_[i] = std::log(1.0 + std::exp(out->data_[i]));
+      }
+      break;
+    default:
+      break;
+  }
+
+  return true;
+}
+
+bool KerasLayerDense::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
+
+  unsigned int weights_rows = 0;
+  KASSERT(ReadUnsignedInt(file, &weights_rows), "Expected weight rows");
+  KASSERT(weights_rows > 0, "Invalid weights # rows");
+
+  unsigned int weights_cols = 0;
+  KASSERT(ReadUnsignedInt(file, &weights_cols), "Expected weight cols");
+  KASSERT(weights_cols > 0, "Invalid weights shape");
+
+  unsigned int biases_shape = 0;
+  KASSERT(ReadUnsignedInt(file, &biases_shape), "Expected biases shape");
+  KASSERT(biases_shape > 0, "Invalid biases shape");
+
+  weights_.Resize(weights_rows, weights_cols);
+  KASSERT(ReadFloats(file, weights_.data_.data(), weights_rows * weights_cols),
+          "Expected weights");
+
+  biases_.Resize(biases_shape);
+  KASSERT(ReadFloats(file, biases_.data_.data(), biases_shape),
+          "Expected biases");
+
+  KASSERT(activation_.LoadLayer(file), "Failed to load activation");
+
+  return true;
+}
+
+bool KerasLayerDense::Apply(const std::vector<Tensor*>& in_list, Tensor* out) {
+  KASSERT(in_list.size() == 1, "Invalid input");
+  KASSERT(out, "Invalid output");
+
+  Tensor* in = in_list[0];
+  KASSERT(in->dims_.size() <= 2, "Invalid input dimensions");
+
+  if (in->dims_.size() == 2) {
+    KASSERT(in->dims_[1] == weights_.dims_[0], "Dimension mismatch %d %d",
+            in->dims_[1], weights_.dims_[0]);
+  }
+
+  Tensor tmp(weights_.dims_[1]);
+
+  for (int i = 0; i < weights_.dims_[0]; i++) {
+    for (int j = 0; j < weights_.dims_[1]; j++) {
+      tmp(j) += (*in)(i)*weights_(i, j);
     }
-    
-    for (int i = 0; i < biases_.dims_[0]; i++)
-    {
-        tmp(i) += biases_(i);
-    }
+  }
 
-    KASSERT(activation_.Apply(&tmp, out), "Failed to apply activation");
+  for (int i = 0; i < biases_.dims_[0]; i++) {
+    tmp(i) += biases_(i);
+  }
 
-    return true;
+  KASSERT(activation_.Apply({&tmp}, out), "Failed to apply activation");
+
+  return true;
 }
 
-bool KerasLayerConvolution2d::LoadLayer(std::ifstream* file)
-{
-    KASSERT(file, "Invalid file stream");
+bool KerasLayerConvolution2d::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
 
-    unsigned int weights_i = 0;
-    KASSERT(ReadUnsignedInt(file, &weights_i), "Expected weights_i");
-    KASSERT(weights_i > 0, "Invalid weights # i");
+  unsigned int weights_i = 0;
+  KASSERT(ReadUnsignedInt(file, &weights_i), "Expected weights_i");
+  KASSERT(weights_i > 0, "Invalid weights # i");
 
-    unsigned int weights_j = 0;
-    KASSERT(ReadUnsignedInt(file, &weights_j), "Expected weights_j");
-    KASSERT(weights_j > 0, "Invalid weights # j");
+  unsigned int weights_j = 0;
+  KASSERT(ReadUnsignedInt(file, &weights_j), "Expected weights_j");
+  KASSERT(weights_j > 0, "Invalid weights # j");
 
-    unsigned int weights_k = 0;
-    KASSERT(ReadUnsignedInt(file, &weights_k), "Expected weights_k");
-    KASSERT(weights_k > 0, "Invalid weights # k");
+  unsigned int weights_k = 0;
+  KASSERT(ReadUnsignedInt(file, &weights_k), "Expected weights_k");
+  KASSERT(weights_k > 0, "Invalid weights # k");
 
-    unsigned int weights_l = 0;
-    KASSERT(ReadUnsignedInt(file, &weights_l), "Expected weights_l");
-    KASSERT(weights_l > 0, "Invalid weights # l");
+  unsigned int weights_l = 0;
+  KASSERT(ReadUnsignedInt(file, &weights_l), "Expected weights_l");
+  KASSERT(weights_l > 0, "Invalid weights # l");
 
-    unsigned int biases_shape = 0;
-    KASSERT(ReadUnsignedInt(file, &biases_shape), "Expected biases shape");
-    KASSERT(biases_shape > 0, "Invalid biases shape");
+  unsigned int biases_shape = 0;
+  KASSERT(ReadUnsignedInt(file, &biases_shape), "Expected biases shape");
+  KASSERT(biases_shape > 0, "Invalid biases shape");
 
-    weights_.Resize(weights_i, weights_j, weights_k, weights_l);
-    KASSERT(ReadFloats(file, weights_.data_.data(),
-        weights_i * weights_j * weights_k * weights_l), "Expected weights");
+  weights_.Resize(weights_i, weights_j, weights_k, weights_l);
+  KASSERT(ReadFloats(file, weights_.data_.data(),
+                     weights_i * weights_j * weights_k * weights_l),
+          "Expected weights");
 
-    biases_.Resize(biases_shape);
-    KASSERT(ReadFloats(file, biases_.data_.data(), biases_shape), "Expected biases");
+  biases_.Resize(biases_shape);
+  KASSERT(ReadFloats(file, biases_.data_.data(), biases_shape),
+          "Expected biases");
 
-    KASSERT(activation_.LoadLayer(file), "Failed to load activation");
+  KASSERT(activation_.LoadLayer(file), "Failed to load activation");
 
-    return true;
+  return true;
 }
 
-bool KerasLayerConvolution2d::Apply(Tensor* in, Tensor* out)
-{
-    KASSERT(in, "Invalid input");
-    KASSERT(out, "Invalid output");
-
-    KASSERT(in->dims_[0] == weights_.dims_[1], "Input 'depth' doesn't match kernel 'depth'");
-
-    int st_nj = (weights_.dims_[2] - 1) / 2;
-    int st_pj = (weights_.dims_[2]) / 2;
-    int st_nk = (weights_.dims_[3] - 1) / 2;
-    int st_pk = (weights_.dims_[3]) / 2;
-
-    Tensor tmp(weights_.dims_[0],
-        in->dims_[1] - st_nj - st_pj,
-        in->dims_[2] - st_nk - st_pk);
-
-    // Iterate over each kernel.
-    for (int i = 0; i < weights_.dims_[0]; i++)
-    {
-        // Iterate over each 'depth'.
-        for (int j = 0; j < weights_.dims_[1]; j++)
-        {
-            // 2D convolution in x and y (k and l in Tensor dimensions).
-            for(int tj = st_nj; tj < in->dims_[1] - st_pj; tj++)
-            {
-                for(int tk = st_nk; tk < in->dims_[2] - st_pk; tk++)
-                {
-                    // Iterate over kernel.
-                    for(int k = 0; k < weights_.dims_[2]; k++)
-                    {
-                        for(int l = 0; l < weights_.dims_[3]; l++)
-                        {
-                            const float& weight = weights_(i, j, k, l);
-                            const float& value = (*in)(j, tj - st_nj + k, tk - st_nk + l);
-
-                            tmp(i, tj - st_nj, tk - st_nk) += weight * value;
-                        }
-                    }
-                }
+bool KerasLayerConvolution2d::Apply(const std::vector<Tensor*>& in_list,
+                                    Tensor* out) {
+  KASSERT(in_list.size() == 1, "Invalid input");
+  KASSERT(out, "Invalid output");
+
+  Tensor* in = in_list[0];
+  KASSERT(in->dims_[0] == weights_.dims_[1],
+          "Input 'depth' doesn't match kernel 'depth'");
+
+  int st_nj = (weights_.dims_[2] - 1) / 2;
+  int st_pj = (weights_.dims_[2]) / 2;
+  int st_nk = (weights_.dims_[3] - 1) / 2;
+  int st_pk = (weights_.dims_[3]) / 2;
+
+  Tensor tmp(weights_.dims_[0], in->dims_[1] - st_nj - st_pj,
+             in->dims_[2] - st_nk - st_pk);
+
+  // Iterate over each kernel.
+  for (int i = 0; i < weights_.dims_[0]; i++) {
+    // Iterate over each 'depth'.
+    for (int j = 0; j < weights_.dims_[1]; j++) {
+      // 2D convolution in x and y (k and l in Tensor dimensions).
+      for (int tj = st_nj; tj < in->dims_[1] - st_pj; tj++) {
+        for (int tk = st_nk; tk < in->dims_[2] - st_pk; tk++) {
+          // Iterate over kernel.
+          for (int k = 0; k < weights_.dims_[2]; k++) {
+            for (int l = 0; l < weights_.dims_[3]; l++) {
+              const float& weight = weights_(i, j, k, l);
+              const float& value = (*in)(j, tj - st_nj + k, tk - st_nk + l);
+
+              tmp(i, tj - st_nj, tk - st_nk) += weight * value;
             }
+          }
         }
+      }
+    }
 
-        // Apply kernel bias to all points in output.
-        for (int j = 0; j < tmp.dims_[1]; j++)
-        {
-            for (int k = 0; k < tmp.dims_[2]; k++)
-            {
-                tmp(i, j, k) += biases_(i);
-            }
-        }
+    // Apply kernel bias to all points in output.
+    for (int j = 0; j < tmp.dims_[1]; j++) {
+      for (int k = 0; k < tmp.dims_[2]; k++) {
+        tmp(i, j, k) += biases_(i);
+      }
     }
+  }
 
-    KASSERT(activation_.Apply(&tmp, out), "Failed to apply activation");
+  KASSERT(activation_.Apply({&tmp}, out), "Failed to apply activation");
 
-    return true;
+  return true;
 }
 
-
-bool KerasLayerFlatten::LoadLayer(std::ifstream* file)
-{
-    KASSERT(file, "Invalid file stream");
-    return true;
+bool KerasLayerFlatten::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
+  return true;
 }
 
-bool KerasLayerFlatten::Apply(Tensor* in, Tensor* out)
-{
-    KASSERT(in, "Invalid input");
-    KASSERT(out, "Invalid output");
+bool KerasLayerFlatten::Apply(const std::vector<Tensor*>& in_list,
+                              Tensor* out) {
+  KASSERT(in_list.size() == 1, "Invalid input");
+  KASSERT(out, "Invalid output");
 
-    *out = *in;
-    out->Flatten();
+  *out = *in_list[0];
+  out->Flatten();
 
-    return true;
+  return true;
 }
 
-bool KerasLayerElu::LoadLayer(std::ifstream* file)
-{
-    KASSERT(file, "Invalid file stream");
+bool KerasLayerElu::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
 
-    KASSERT(ReadFloat(file, &alpha_), "Failed to read alpha");
+  KASSERT(ReadFloat(file, &alpha_), "Failed to read alpha");
 
-    return true;
+  return true;
 }
 
-bool KerasLayerElu::Apply(Tensor* in, Tensor* out)
-{
-    KASSERT(in, "Invalid input");
-    KASSERT(out, "Invalid output");
+bool KerasLayerElu::Apply(const std::vector<Tensor*>& in_list, Tensor* out) {
+  KASSERT(in_list.size() == 1, "Invalid input");
+  KASSERT(out, "Invalid output");
 
-    *out = *in;
+  *out = *in_list[0];
 
-    for (size_t i = 0; i < out->data_.size(); i++)
-    {
-        if(out->data_[i] < 0.0)
-        {
-            out->data_[i] = alpha_ * (exp(out->data_[i]) - 1.0);
-        }
+  for (size_t i = 0; i < out->data_.size(); i++) {
+    if (out->data_[i] < 0.0) {
+      out->data_[i] = alpha_ * (exp(out->data_[i]) - 1.0);
     }
+  }
 
-    return true;
+  return true;
 }
 
-bool KerasLayerMaxPooling2d::LoadLayer(std::ifstream* file)
-{
-    KASSERT(file, "Invalid file stream");
+bool KerasLayerMaxPooling2d::LoadLayer(std::ifstream* file) {
+  KASSERT(file, "Invalid file stream");
 
-    KASSERT(ReadUnsignedInt(file, &pool_size_j_), "Expected pool size j");
-    KASSERT(ReadUnsignedInt(file, &pool_size_k_), "Expected pool size k");
+  KASSERT(ReadUnsignedInt(file, &pool_size_j_), "Expected pool size j");
+  KASSERT(ReadUnsignedInt(file, &pool_size_k_), "Expected pool size k");
 
-    return true;
+  return true;
 }
 
-bool KerasLayerMaxPooling2d::Apply(Tensor* in, Tensor* out)
-{
-    KASSERT(in, "Invalid input");
-    KASSERT(out, "Invalid output");
-
-    KASSERT(in->dims_.size() == 3, "Input must have 3 dimensions");
-
-    Tensor tmp(in->dims_[0],
-        in->dims_[1] / pool_size_j_,
-        in->dims_[2] / pool_size_k_);
-
-    for (int i = 0; i < tmp.dims_[0]; i++)
-    {
-        for (int j = 0; j < tmp.dims_[1]; j++)
-        {
-            const int tj = j * pool_size_j_;
-
-            for (int k = 0; k < tmp.dims_[2]; k++)
-            {
-                const int tk = k * pool_size_k_;
-
-                // Find maximum value over patch starting at tj, tk.
-                float max_val = -std::numeric_limits<float>::infinity();
-
-                for (unsigned int pj = 0; pj < pool_size_j_; pj++)
-                {
-                    for (unsigned int pk = 0; pk < pool_size_k_; pk++)
-                    {
-                        const float& pool_val = (*in)(i, tj + pj, tk + pk);
-                        if (pool_val > max_val) {
-                            max_val = pool_val;
-                        }
-                    }
-                }
-
-                tmp(i, j, k) = max_val;
+bool KerasLayerMaxPooling2d::Apply(const std::vector<Tensor*>& in_list,
+                                   Tensor* out) {
+  KASSERT(in_list.size() == 1, "Invalid input");
+  KASSERT(out, "Invalid output");
+
+  Tensor* in = in_list[0];
+  KASSERT(in->dims_.size() == 3, "Input must have 3 dimensions");
+
+  Tensor tmp(in->dims_[0], in->dims_[1] / pool_size_j_,
+             in->dims_[2] / pool_size_k_);
+
+  for (int i = 0; i < tmp.dims_[0]; i++) {
+    for (int j = 0; j < tmp.dims_[1]; j++) {
+      const int tj = j * pool_size_j_;
+
+      for (int k = 0; k < tmp.dims_[2]; k++) {
+        const int tk = k * pool_size_k_;
+
+        // Find maximum value over patch starting at tj, tk.
+        float max_val = -std::numeric_limits<float>::infinity();
+
+        for (unsigned int pj = 0; pj < pool_size_j_; pj++) {
+          for (unsigned int pk = 0; pk < pool_size_k_; pk++) {
+            const float& pool_val = (*in)(i, tj + pj, tk + pk);
+            if (pool_val > max_val) {
+              max_val = pool_val;
             }
+          }
         }
+
+        tmp(i, j, k) = max_val;
+      }
     }
+  }
 
-    *out = tmp;
+  *out = tmp;
 
-    return true;
+  return true;
 }
 
+bool KerasModel::LoadModel(const std::string& filename) {
+  std::ifstream file(filename.c_str(), std::ios::binary);
+  KASSERT(file.is_open(), "Unable to open file %s", filename.c_str());
+
+  unsigned int num_layers = 0;
+  KASSERT(ReadUnsignedInt(&file, &num_layers), "Expected number of layers");
+
+  KASSERT(ReadStrings(&file, &input_layer_names_),
+          "Expected input layer names");
+  KASSERT(!input_layer_names_.empty(),
+          "Expected at least one output layer name.")
+  KASSERT(ReadStrings(&file, &output_layer_names_),
+          "Expected output layer names");
+  KASSERT(!output_layer_names_.empty(),
+          "Expected at least one output layer name.");
+
+  for (unsigned int i = 0; i < num_layers; i++) {
+    std::string layer_name;
+    KASSERT(ReadString(&file, &layer_name), "Expected layer name");
+
+    std::vector<std::string> inbound_layer_names;
+    KASSERT(ReadStrings(&file, &inbound_layer_names),
+            "Expected inbound layer names");
+
+    unsigned int layer_type = 0;
+    KASSERT(ReadUnsignedInt(&file, &layer_type), "Expected layer type");
+
+    KerasLayer* layer = nullptr;
+
+    switch (layer_type) {
+      case kDense:
+        layer = new KerasLayerDense(layer_name, inbound_layer_names);
+        break;
+      case kConvolution2d:
+        layer = new KerasLayerConvolution2d(layer_name, inbound_layer_names);
+        break;
+      case kFlatten:
+        layer = new KerasLayerFlatten(layer_name, inbound_layer_names);
+        break;
+      case kElu:
+        layer = new KerasLayerElu(layer_name, inbound_layer_names);
+        break;
+      case kActivation:
+        layer = new KerasLayerActivation(layer_name, inbound_layer_names);
+        break;
+      case kMaxPooling2D:
+        layer = new KerasLayerMaxPooling2d(layer_name, inbound_layer_names);
+        break;
+      case kInput:
+        layer = new KerasLayerInput(layer_name, inbound_layer_names);
+        break;
+      case kMerge:
+        layer = new KerasLayerMerge(layer_name, inbound_layer_names);
+        break;
+      default:
+        break;
+    }
 
-bool KerasModel::LoadModel(const std::string& filename)
-{
-    std::ifstream file(filename.c_str(), std::ios::binary);
-    KASSERT(file.is_open(), "Unable to open file %s", filename.c_str());
-
-    unsigned int num_layers = 0;
-    KASSERT(ReadUnsignedInt(&file, &num_layers), "Expected number of layers");
-
-    for (unsigned int i = 0; i < num_layers; i++)
-    {
-        unsigned int layer_type = 0;
-        KASSERT(ReadUnsignedInt(&file, &layer_type), "Expected layer type");
-
-        KerasLayer* layer = NULL;
-
-        switch (layer_type)
-        {
-            case kDense:
-                layer = new KerasLayerDense();
-                break;
-            case kConvolution2d:
-                layer = new KerasLayerConvolution2d();
-                break;
-            case kFlatten:
-                layer = new KerasLayerFlatten();
-                break;
-            case kElu:
-                layer = new KerasLayerElu();
-                break;
-            case kActivation:
-                layer = new KerasLayerActivation();
-                break;
-            case kMaxPooling2D:
-                layer = new KerasLayerMaxPooling2d();
-                break;
-            default:
-                break;
-        }
+    KASSERT(layer, "Unknown layer type %d", layer_type);
 
-        KASSERT(layer, "Unknown layer type %d", layer_type);
+    KASSERT(layer->LoadLayer(&file), "Failed to load layer %d", i);
+    layers_.push_back(layer);
 
-        KASSERT(layer->LoadLayer(&file), "Failed to load layer %d", i);
-        layers_.push_back(layer);
-    }
+    graph_.Initialize(layers_);
+  }
+
+  return true;
+}
 
+bool KerasModel::Apply(Tensor* in, Tensor* out) {
+  KASSERT(output_layer_names_.size() == 1,
+          "Only single output models supported.");
+  const std::string& output_layer_name = output_layer_names_[0];
+
+  KASSERT(input_layer_names_.size() == 1,
+          "Only single input models supported.");
+  const std::string& input_layer_name = input_layer_names_[0];
+
+  std::unordered_map<std::string, Tensor*> in_map = {{input_layer_name, in}};
+  std::unordered_map<std::string, Tensor*> out_map = {{output_layer_name, out}};
+  return Apply(in_map, &out_map);
+}
+
+bool KerasGraph::KerasNode::Initialize(KerasGraph* graph) {
+  for (const std::string& layer_name : layer_->inbound_layer_names()) {
+    inbound_nodes_.push_back(graph->GetOrCreateNode(layer_name));
+  }
+  return true;
+}
+
+bool KerasGraph::KerasNode::Compute() {
+  if (result_ != nullptr) {
     return true;
+  }
+
+  std::vector<Tensor*> in_list;
+  for (KerasNode* node : inbound_nodes_) {
+    KASSERT(node->Compute(), "Unable to compute node");
+    in_list.push_back(node->result());
+  }
+
+  result_.reset(new Tensor());
+  KASSERT(layer_->Apply(in_list, result_.get()), "Failed to apply layer %s",
+          layer_->name().c_str());
+  return true;
 }
 
-bool KerasModel::Apply(Tensor* in, Tensor* out)
-{
-    Tensor temp_in, temp_out;
+bool KerasGraph::Initialize(const std::vector<KerasLayer*>& layers) {
+  // Build layer map.
+  for (KerasLayer* layer : layers) {
+    layer_map_[layer->name()] = layer;
+  }
 
-    for (unsigned int i = 0; i < layers_.size(); i++)
-    {
-        if (i == 0)
-        {
-            temp_in = *in;
-        }
+  return true;
+}
 
-        KASSERT(layers_[i]->Apply(&temp_in, &temp_out), "Failed to apply layer %d", i);
+KerasGraph::KerasNode* KerasGraph::GetOrCreateNode(
+    const std::string& layer_name) {
+  if (node_map_.find(layer_name) == node_map_.end()) {
+    KerasLayer* layer = layer_map_[layer_name];
+    node_map_[layer_name] = std::unique_ptr<KerasNode>(new KerasNode(layer));
+    node_map_[layer_name]->Initialize(this);
+  }
 
-        temp_in = temp_out;
-    }
+  return node_map_[layer_name].get();
+}
 
-    *out = temp_out;
+bool KerasGraph::Evaluate(TensorMap& in_map, TensorMap* out_map) {
+  // Set input on input nodes in graph.
+  for (auto in_map_iter : in_map) {
+    const std::string& layer_name = in_map_iter.first;
+    Tensor* in = in_map_iter.second;
+
+    KerasNode* in_node = GetOrCreateNode(layer_name);
+    in_node->SetResult(*in);
+  }
+
+  // Compute output nodes.
+  for (auto out_map_iter : *out_map) {
+    const std::string& layer_name = out_map_iter.first;
+    Tensor* out = out_map_iter.second;
+    KerasNode* out_node = GetOrCreateNode(layer_name);
+    KASSERT(out_node->Compute(), "Unable to compute node for %s",
+            layer_name.c_str());
+    *out = *out_node->result();
+  }
+
+  // Clear computation nodes.
+  for (const auto& node_pair : node_map_) {
+    KASSERT(node_pair.second->Clear(), "Unable to clear node for compute");
+  }
+  return true;
+}
 
-    return true;
+bool KerasModel::Apply(TensorMap& in_map, TensorMap* out_map) {
+  KASSERT(!in_map.empty(), "No inputs provided");
+  KASSERT(out_map, "Invalid output map");
+  KASSERT(!out_map->empty(), "No outputs requested");
+
+  return graph_.Evaluate(in_map, out_map);
 }
+
+}  // namespace kerasify
diff --git a/keras_model.h b/keras_model.h
index 6efc1c8..8bd3461 100644
--- a/keras_model.h
+++ b/keras_model.h
@@ -7,375 +7,478 @@
 #ifndef KERAS_MODEL_H_
 #define KERAS_MODEL_H_
 
-#include <chrono>
 #include <math.h>
+#include <chrono>
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
-#define KASSERT(x, ...) \
-    if (!(x)) { \
-        printf("KASSERT: %s(%d): ", __FILE__, __LINE__); \
-        printf(__VA_ARGS__); \
-        printf("\n"); \
-        return false; \
-    }
+#define KASSERT(x, ...)                              \
+  if (!(x)) {                                        \
+    printf("KASSERT: %s(%d): ", __FILE__, __LINE__); \
+    printf(__VA_ARGS__);                             \
+    printf("\n");                                    \
+    return false;                                    \
+  }
 
-#define KASSERT_EQ(x, y, eps) \
-    if (fabs(x - y) > eps) { printf("KASSERT: Expected %f, got %f\n", y, x); return false; }
+#define KASSERT_EQ(x, y, eps)                       \
+  if (fabs(x - y) > eps) {                          \
+    printf("KASSERT: Expected %f, got %f\n", y, x); \
+    return false;                                   \
+  }
 
 #ifdef DEBUG
-#define KDEBUG(x, ...) \
-    if (!(x)) { \
-        printf("%s(%d): ", __FILE__, __LINE__); \
-        printf(__VA_ARGS__); \
-        printf("\n"); \
-        exit(-1); \
-    }
+#define KDEBUG(x, ...)                      \
+  if (!(x)) {                               \
+    printf("%s(%d): ", __FILE__, __LINE__); \
+    printf(__VA_ARGS__);                    \
+    printf("\n");                           \
+    exit(-1);                               \
+  }
 #else
 #define KDEBUG(x, ...) ;
 #endif
 
-class Tensor {
-public:
-    Tensor() {}
+namespace kerasify {
 
-    Tensor(int i)
-    {
-        Resize(i);
-    }
+class Tensor {
+ public:
+  Tensor() {}
 
-    Tensor(int i, int j)
-    {
-        Resize(i, j);
-    }
+  Tensor(const std::vector<int>& dims, const std::vector<float>& data)
+      : dims_(dims), data_(data) {
+    KDEBUG(!dims.empty(), "Invalid dimensions");
+  }
 
-    Tensor(int i, int j, int k)
-    {
-        Resize(i, j, k);
-    }
+  Tensor(int i) { Resize(i); }
 
-    Tensor(int i, int j, int k, int l)
-    {
-        Resize(i, j, k, l);
-    }
+  Tensor(int i, int j) { Resize(i, j); }
 
-    void Resize(int i)
-    {
-        dims_ = {i};
-        data_.resize(i);
-    }
+  Tensor(int i, int j, int k) { Resize(i, j, k); }
 
-    void Resize(int i, int j)
-    {
-        dims_ = {i, j};
-        data_.resize(i * j);
-    }
+  Tensor(int i, int j, int k, int l) { Resize(i, j, k, l); }
 
-    void Resize(int i, int j, int k)
-    {
-        dims_ = {i, j, k};
-        data_.resize(i * j * k);
-    }
+  void Resize(int i) {
+    dims_ = {i};
+    data_.resize(i);
+  }
 
-    void Resize(int i, int j, int k, int l)
-    {
-        dims_ = {i, j, k, l};
-        data_.resize(i * j * k * l);
-    }
+  void Resize(int i, int j) {
+    dims_ = {i, j};
+    data_.resize(i * j);
+  }
 
-    inline void Flatten()
-    {
-        KDEBUG(dims_.size() > 0, "Invalid tensor");
+  void Resize(int i, int j, int k) {
+    dims_ = {i, j, k};
+    data_.resize(i * j * k);
+  }
 
-        int elements = dims_[0];
-        for (unsigned int i = 1; i < dims_.size(); i++)
-        {
-            elements *= dims_[i];
-        }
-        dims_ = {elements};
-    }
+  void Resize(int i, int j, int k, int l) {
+    dims_ = {i, j, k, l};
+    data_.resize(i * j * k * l);
+  }
 
-    inline float& operator()(int i)
-    {
-        KDEBUG(dims_.size() == 1, "Invalid indexing for tensor");
-        KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
+  const std::vector<int>& dims() const { return dims_; }
 
-        return data_[i];
+  // Concatenate along first dimension.
+  inline bool Append(const Tensor& other) {
+    // Check for compatible dimensionality.
+    if (dims_.size() != other.dims_.size()) {
+      return false;
     }
 
-    inline float& operator()(int i, int j)
-    {
-        KDEBUG(dims_.size() == 2, "Invalid indexing for tensor");
-        KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
-        KDEBUG(j < dims_[1] && j >= 0, "Invalid j: %d (max %d)", j, dims_[1]);
-
-        return data_[dims_[1] * i + j];
+    // Skip the batch first dimension. All other dimensions should match.
+    for (unsigned int i = 1; i < dims_.size(); i++) {
+      if (dims_[i] != other.dims_[i]) {
+        return false;
+      }
     }
 
-    inline float& operator()(int i, int j, int k)
-    {
-        KDEBUG(dims_.size() == 3, "Invalid indexing for tensor");
-        KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
-        KDEBUG(j < dims_[1] && j >= 0, "Invalid j: %d (max %d)", j, dims_[1]);
-        KDEBUG(k < dims_[2] && k >= 0, "Invalid k: %d (max %d)", k, dims_[2]);
+    // Concatenate.
+    const unsigned int initial_data_size = data_.size();
+    dims_[0] += other.dims_[0];  // Update dimensions.
+    data_.resize(initial_data_size + other.data_.size());
 
-        return data_[dims_[2] * (dims_[1] * i + j) + k];
+    unsigned int i = initial_data_size;
+    for (const auto value : other.data_) {
+      data_[i] = value;
+      i++;
     }
+    return true;
+  }
 
-    inline float& operator()(int i, int j, int k, int l)
-    {
-        KDEBUG(dims_.size() == 4, "Invalid indexing for tensor");
-        KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
-        KDEBUG(j < dims_[1] && j >= 0, "Invalid j: %d (max %d)", j, dims_[1]);
-        KDEBUG(k < dims_[2] && k >= 0, "Invalid k: %d (max %d)", k, dims_[2]);
-        KDEBUG(l < dims_[3] && l >= 0, "Invalid l: %d (max %d)", l, dims_[3]);
+  inline void Flatten() {
+    KDEBUG(dims_.size() > 0, "Invalid tensor");
 
-        return data_[dims_[3] * (dims_[2] * (dims_[1] * i + j) + k) + l];
+    int elements = dims_[0];
+    for (unsigned int i = 1; i < dims_.size(); i++) {
+      elements *= dims_[i];
     }
-
-    void Print()
-    {
-        if (dims_.size() == 1) {
-            printf("[ ");
-            for (int i = 0; i < dims_[0]; i++) {
-                printf("%f ", (*this)(i));
-            }
-            printf("]\n");
-        } else if (dims_.size() == 2) {
-            printf("[\n");
-            for (int i = 0; i < dims_[0]; i++) {
-                printf(" [ ");
-                for (int j = 0; j < dims_[1]; j++) {
-                    printf("%f ", (*this)(i, j));
-                }
-                printf("]\n");
-            }
-            printf("]\n");
-        } else if (dims_.size() == 3) {
-            printf("[\n");
-            for (int i = 0; i < dims_[0]; i++) {
-                printf(" [\n");
-                for (int j = 0; j < dims_[1]; j++) {
-                    printf("  [ ");
-                    for (int k = 0; k < dims_[2]; k++) {
-                        printf("%f ", (*this)(i, j, k));
-                    }
-                    printf("  ]\n");
-                }
-                printf(" ]\n");
-            }
-            printf("]\n");
-        } else if (dims_.size() == 4) {
-            printf("[\n");
-            for (int i = 0; i < dims_[0]; i++) {
-                printf(" [\n");
-                for (int j = 0; j < dims_[1]; j++) {
-                    printf("  [\n");
-                    for (int k = 0; k < dims_[2]; k++) {
-                        printf("   [");
-                        for (int l = 0; l < dims_[3]; l++) {
-                            printf("%f ", (*this)(i, j, k, l));
-                        }
-                        printf("]\n");
-                    }
-                    printf("  ]\n");
-                }
-                printf(" ]\n");
+    dims_ = {elements};
+  }
+
+  inline float& operator()(int i) {
+    KDEBUG(dims_.size() == 1, "Invalid indexing for tensor");
+    KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
+
+    return data_[i];
+  }
+
+  inline float& operator()(int i, int j) {
+    KDEBUG(dims_.size() == 2, "Invalid indexing for tensor");
+    KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
+    KDEBUG(j < dims_[1] && j >= 0, "Invalid j: %d (max %d)", j, dims_[1]);
+
+    return data_[dims_[1] * i + j];
+  }
+
+  inline float& operator()(int i, int j, int k) {
+    KDEBUG(dims_.size() == 3, "Invalid indexing for tensor");
+    KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
+    KDEBUG(j < dims_[1] && j >= 0, "Invalid j: %d (max %d)", j, dims_[1]);
+    KDEBUG(k < dims_[2] && k >= 0, "Invalid k: %d (max %d)", k, dims_[2]);
+
+    return data_[dims_[2] * (dims_[1] * i + j) + k];
+  }
+
+  inline float& operator()(int i, int j, int k, int l) {
+    KDEBUG(dims_.size() == 4, "Invalid indexing for tensor");
+    KDEBUG(i < dims_[0] && i >= 0, "Invalid i: %d (max %d)", i, dims_[0]);
+    KDEBUG(j < dims_[1] && j >= 0, "Invalid j: %d (max %d)", j, dims_[1]);
+    KDEBUG(k < dims_[2] && k >= 0, "Invalid k: %d (max %d)", k, dims_[2]);
+    KDEBUG(l < dims_[3] && l >= 0, "Invalid l: %d (max %d)", l, dims_[3]);
+
+    return data_[dims_[3] * (dims_[2] * (dims_[1] * i + j) + k) + l];
+  }
+
+  void Print() {
+    if (dims_.size() == 1) {
+      printf("[ ");
+      for (int i = 0; i < dims_[0]; i++) {
+        printf("%f ", (*this)(i));
+      }
+      printf("]\n");
+    } else if (dims_.size() == 2) {
+      printf("[\n");
+      for (int i = 0; i < dims_[0]; i++) {
+        printf(" [ ");
+        for (int j = 0; j < dims_[1]; j++) {
+          printf("%f ", (*this)(i, j));
+        }
+        printf("]\n");
+      }
+      printf("]\n");
+    } else if (dims_.size() == 3) {
+      printf("[\n");
+      for (int i = 0; i < dims_[0]; i++) {
+        printf(" [\n");
+        for (int j = 0; j < dims_[1]; j++) {
+          printf("  [ ");
+          for (int k = 0; k < dims_[2]; k++) {
+            printf("%f ", (*this)(i, j, k));
+          }
+          printf("  ]\n");
+        }
+        printf(" ]\n");
+      }
+      printf("]\n");
+    } else if (dims_.size() == 4) {
+      printf("[\n");
+      for (int i = 0; i < dims_[0]; i++) {
+        printf(" [\n");
+        for (int j = 0; j < dims_[1]; j++) {
+          printf("  [\n");
+          for (int k = 0; k < dims_[2]; k++) {
+            printf("   [");
+            for (int l = 0; l < dims_[3]; l++) {
+              printf("%f ", (*this)(i, j, k, l));
             }
             printf("]\n");
+          }
+          printf("  ]\n");
         }
+        printf(" ]\n");
+      }
+      printf("]\n");
     }
+  }
 
-    void PrintShape()
-    {
-        printf("(");
-        for (unsigned int i = 0; i < dims_.size(); i++)
-        {
-            printf("%d ", dims_[i]);
-        }
-        printf(")\n");
+  void PrintShape() {
+    printf("(");
+    for (unsigned int i = 0; i < dims_.size(); i++) {
+      printf("%d ", dims_[i]);
     }
+    printf(")\n");
+  }
+
+  std::vector<int> dims_;
+  std::vector<float> data_;
+};
+using TensorMap = std::unordered_map<std::string, Tensor*>;
+
+class KerasLayer {
+ public:
+  explicit KerasLayer(const std::string& name,
+                      const std::vector<std::string>& inbound_layer_names)
+      : name_(name), inbound_layer_names_(inbound_layer_names) {}
+
+  virtual ~KerasLayer() = default;
 
+  virtual bool LoadLayer(std::ifstream* file) = 0;
 
-    std::vector<int> dims_;
-    std::vector<float> data_;
+  virtual bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) = 0;
+
+  const std::string& name() const { return name_; }
+
+  const std::vector<std::string>& inbound_layer_names() const {
+    return inbound_layer_names_;
+  }
+
+ protected:
+  const std::string name_;
+  const std::vector<std::string> inbound_layer_names_;
 };
 
+using KerasLayerMap = std::unordered_map<std::string, KerasLayer*>;
 
-class KerasLayer {
-public:
-    KerasLayer() {}
+class KerasLayerInput : public KerasLayer {
+ public:
+  explicit KerasLayerInput(const std::string& name,
+                           const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names) {}
+
+  virtual ~KerasLayerInput() = default;
+
+  bool LoadLayer(std::ifstream* file) override;
+
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
+};
 
-    virtual ~KerasLayer() {}
+class KerasLayerMerge : public KerasLayer {
+ public:
+  explicit KerasLayerMerge(const std::string& name,
+                           const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names) {}
 
-    virtual bool LoadLayer(std::ifstream* file) = 0;
+  virtual ~KerasLayerMerge() = default;
 
-    virtual bool Apply(Tensor* in, Tensor* out) = 0;
+  bool LoadLayer(std::ifstream* file) override;
+
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
 };
 
 class KerasLayerActivation : public KerasLayer {
-public:
-    enum ActivationType
-    {
-        kLinear = 1,
-        kRelu = 2,
-        kSoftPlus = 3
-    };
+ public:
+  enum ActivationType { kLinear = 1, kRelu = 2, kSoftPlus = 3 };
 
-    KerasLayerActivation()
-    : activation_type_(ActivationType::kLinear)
-    {}
+  KerasLayerActivation() : KerasLayerActivation("", {}) {}
 
-    virtual ~KerasLayerActivation() {}
+  explicit KerasLayerActivation(
+      const std::string& name,
+      const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names),
+        activation_type_(ActivationType::kLinear) {}
 
-    virtual bool LoadLayer(std::ifstream* file);
+  virtual ~KerasLayerActivation() = default;
 
-    virtual bool Apply(Tensor* in, Tensor* out);
+  bool LoadLayer(std::ifstream* file) override;
 
-private:
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
 
-    ActivationType activation_type_;
+ private:
+  ActivationType activation_type_;
 };
 
 class KerasLayerDense : public KerasLayer {
-public:
-    KerasLayerDense() {}
-
-    virtual ~KerasLayerDense() {}
+ public:
+  explicit KerasLayerDense(const std::string& name,
+                           const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names) {}
 
-    virtual bool LoadLayer(std::ifstream* file);
+  virtual ~KerasLayerDense() = default;
 
-    virtual bool Apply(Tensor* in, Tensor* out);
+  bool LoadLayer(std::ifstream* file) override;
 
-private:
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
 
-    Tensor weights_;
-    Tensor biases_;
+ private:
+  Tensor weights_;
+  Tensor biases_;
 
-    KerasLayerActivation activation_;
+  KerasLayerActivation activation_;
 };
 
 class KerasLayerConvolution2d : public KerasLayer {
-public:
-    KerasLayerConvolution2d() {}
+ public:
+  explicit KerasLayerConvolution2d(
+      const std::string& name,
+      const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names) {}
 
-    virtual ~KerasLayerConvolution2d() {}
+  virtual ~KerasLayerConvolution2d() = default;
 
-    virtual bool LoadLayer(std::ifstream* file);
+  bool LoadLayer(std::ifstream* file) override;
 
-    virtual bool Apply(Tensor* in, Tensor* out);
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
 
-private:
+ private:
+  Tensor weights_;
+  Tensor biases_;
 
-    Tensor weights_;
-    Tensor biases_;
-
-    KerasLayerActivation activation_;
+  KerasLayerActivation activation_;
 };
 
 class KerasLayerFlatten : public KerasLayer {
-public:
-    KerasLayerFlatten() {}
-
-    virtual ~KerasLayerFlatten() {}
+ public:
+  explicit KerasLayerFlatten(
+      const std::string& name,
+      const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names) {}
 
-    virtual bool LoadLayer(std::ifstream* file);
+  virtual ~KerasLayerFlatten() = default;
 
-    virtual bool Apply(Tensor* in, Tensor* out);
+  bool LoadLayer(std::ifstream* file) override;
 
-private:
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
 
+ private:
 };
 
 class KerasLayerElu : public KerasLayer {
-public:
-    KerasLayerElu()
-    : alpha_(1.0f)
-    {}
+ public:
+  explicit KerasLayerElu(const std::string& name,
+                         const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names), alpha_(1.0f) {}
 
-    virtual ~KerasLayerElu() {}
+  virtual ~KerasLayerElu() = default;
 
-    virtual bool LoadLayer(std::ifstream* file);
+  bool LoadLayer(std::ifstream* file) override;
 
-    virtual bool Apply(Tensor* in, Tensor* out);
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
 
-private:
-
-    float alpha_;
+ private:
+  float alpha_;
 };
 
 class KerasLayerMaxPooling2d : public KerasLayer {
-public:
-    KerasLayerMaxPooling2d()
-    : pool_size_j_(0)
-    , pool_size_k_(0)
-    {}
+ public:
+  explicit KerasLayerMaxPooling2d(
+      const std::string& name,
+      const std::vector<std::string>& inbound_layer_names)
+      : KerasLayer(name, inbound_layer_names),
+        pool_size_j_(0),
+        pool_size_k_(0) {}
+
+  virtual ~KerasLayerMaxPooling2d() = default;
+
+  bool LoadLayer(std::ifstream* file) override;
+
+  bool Apply(const std::vector<Tensor*>& in_list, Tensor* out) override;
 
-    virtual ~KerasLayerMaxPooling2d() {}
+ private:
+  unsigned int pool_size_j_;
+  unsigned int pool_size_k_;
+};
+
+// Represents the graph of layer evaluations needed to materialize one or more
+// output layers from one or more inputs to the model.
+class KerasGraph {
+ public:
+  class KerasNode {
+   public:
+    explicit KerasNode(KerasLayer* layer) : layer_(layer) {}
+
+    bool Initialize(KerasGraph* graph);
+    bool Compute();
+
+    void SetResult(const Tensor& in) {
+      result_.reset(new Tensor());
+      *result_ = in;
+    }
+
+    bool Clear() {
+      result_.reset(nullptr);
+      return true;
+    }
+
+    const std::string& name() const { return layer_->name(); }
+    Tensor* result() const { return result_.get(); }
 
-    virtual bool LoadLayer(std::ifstream* file);
+   private:
+    KerasLayer* layer_;
+    std::vector<KerasNode*> inbound_nodes_;
+    std::unique_ptr<Tensor> result_;
+  };
 
-    virtual bool Apply(Tensor* in, Tensor* out);
+  KerasGraph() = default;
 
-private:
+  bool Initialize(const std::vector<KerasLayer*>& layers);
 
-    unsigned int pool_size_j_;
-    unsigned int pool_size_k_;
+  bool Evaluate(TensorMap& in_map, TensorMap* out_map);
+
+ protected:
+  KerasGraph::KerasNode* GetOrCreateNode(const std::string& layer_name);
+
+ private:
+  KerasLayerMap layer_map_;
+  std::unordered_map<std::string, std::unique_ptr<KerasNode>> node_map_;
 };
 
 class KerasModel {
-public:
-
-    enum LayerType
-    {
-        kDense = 1,
-        kConvolution2d = 2,
-        kFlatten = 3,
-        kElu = 4,
-        kActivation = 5,
-        kMaxPooling2D = 6
-    };
-
-    KerasModel()
-    {}
-
-    ~KerasModel()
-    {
-        for (unsigned int i = 0; i < layers_.size(); i++)
-        {
-            delete layers_[i];
-        }
+ public:
+  enum LayerType {
+    kDense = 1,
+    kConvolution2d = 2,
+    kFlatten = 3,
+    kElu = 4,
+    kActivation = 5,
+    kMaxPooling2D = 6,
+    kInput = 7,
+    kMerge = 8
+  };
+
+  KerasModel() = default;
+
+  ~KerasModel() {
+    for (unsigned int i = 0; i < layers_.size(); i++) {
+      delete layers_[i];
     }
+  }
+
+  bool LoadModel(const std::string& filename);
 
-    bool LoadModel(const std::string& filename);
+  bool Apply(Tensor* in, Tensor* out);
 
-    bool Apply(Tensor* in, Tensor* out);
+  bool Apply(TensorMap& in_map, TensorMap* out_map);
 
-private:
-    std::vector<KerasLayer *> layers_;
+ private:
+  std::vector<KerasLayer*> layers_;
+  std::vector<std::string> input_layer_names_;
+  std::vector<std::string> output_layer_names_;
+
+  KerasGraph graph_;
 };
 
 class KerasTimer {
-public:
-    KerasTimer() {}
+ public:
+  KerasTimer() {}
 
-    void Start()
-    {
-        start_ = std::chrono::high_resolution_clock::now();
-    }
+  void Start() { start_ = std::chrono::high_resolution_clock::now(); }
 
-    double Stop()
-    {
-        std::chrono::time_point<std::chrono::high_resolution_clock> now = 
-            std::chrono::high_resolution_clock::now();
+  double Stop() {
+    std::chrono::time_point<std::chrono::high_resolution_clock> now =
+        std::chrono::high_resolution_clock::now();
 
-        std::chrono::duration<double> diff = now - start_;
+    std::chrono::duration<double> diff = now - start_;
 
-        return diff.count();
-    }
+    return diff.count();
+  }
 
-private:
-    std::chrono::time_point<std::chrono::high_resolution_clock> start_;
+ private:
+  std::chrono::time_point<std::chrono::high_resolution_clock> start_;
 };
 
-#endif // KERAS_MODEL_H_
+}  // namespace kerasify
+
+#endif  // KERAS_MODEL_H_
diff --git a/keras_model_test.cc b/keras_model_test.cc
index 684c809..ecc1455 100644
--- a/keras_model_test.cc
+++ b/keras_model_test.cc
@@ -4,165 +4,178 @@
 #include <stdio.h>
 #include <iostream>
 
-#include "test_dense_1x1.h"
-#include "test_dense_10x1.h"
-#include "test_dense_2x2.h"
-#include "test_dense_10x10.h"
-#include "test_dense_10x10x10.h"
+namespace kerasify {
+#include "test_benchmark.h"
 #include "test_conv_2x2.h"
 #include "test_conv_3x3.h"
 #include "test_conv_3x3x3.h"
-#include "test_elu_10.h"
-#include "test_relu_10.h"
-#include "test_dense_relu_10.h"
 #include "test_conv_softplus_2x2.h"
+#include "test_dense_10x1.h"
+#include "test_dense_10x10.h"
+#include "test_dense_10x10x10.h"
+#include "test_dense_1x1.h"
+#include "test_dense_2x2.h"
+#include "test_dense_relu_10.h"
+#include "test_elu_10.h"
+#include "test_func_conv_2x2.h"
+#include "test_func_conv_2x2_multi_in_out.h"
+#include "test_func_dense_1x1.h"
+#include "test_func_dense_1x1_multi_in.h"
+#include "test_func_dense_1x1_multi_in_out.h"
+#include "test_func_dense_1x1_multi_out.h"
+#include "test_func_maxpool2d_3x3x3.h"
+#include "test_func_merge_1x1.h"
 #include "test_maxpool2d_1x1.h"
 #include "test_maxpool2d_2x2.h"
 #include "test_maxpool2d_3x2x2.h"
 #include "test_maxpool2d_3x3x3.h"
-#include "test_benchmark.h"
+#include "test_relu_10.h"
 
-bool tensor_test()
-{
-    {
-        const int i = 3;
-        const int j = 5;
-        const int k = 10;
-        Tensor t(i, j, k);
-
-        float c = 1.f;
-        for (int ii = 0; ii < i; ii++) {
-            for (int jj = 0; jj < j; jj++) {
-                for (int kk = 0; kk < k; kk++) {
-                    t(ii, jj, kk) = c;
-                    c += 1.f;
-                }
-            }
+bool tensor_test() {
+  {
+    const int i = 3;
+    const int j = 5;
+    const int k = 10;
+    Tensor t(i, j, k);
+
+    float c = 1.f;
+    for (int ii = 0; ii < i; ii++) {
+      for (int jj = 0; jj < j; jj++) {
+        for (int kk = 0; kk < k; kk++) {
+          t(ii, jj, kk) = c;
+          c += 1.f;
         }
+      }
+    }
 
-        c = 1.f;
-        int cc = 0;
-        for (int ii = 0; ii < i; ii++) {
-            for (int jj = 0; jj < j; jj++) {
-                for (int kk = 0; kk < k; kk++) {
-                    KASSERT_EQ(t(ii, jj, kk), c, 1e-9);
-                    KASSERT_EQ(t.data_[cc], c, 1e-9);
-                    c += 1.f;
-                    cc++;
-                }
-            }
+    c = 1.f;
+    int cc = 0;
+    for (int ii = 0; ii < i; ii++) {
+      for (int jj = 0; jj < j; jj++) {
+        for (int kk = 0; kk < k; kk++) {
+          KASSERT_EQ(t(ii, jj, kk), c, 1e-9);
+          KASSERT_EQ(t.data_[cc], c, 1e-9);
+          c += 1.f;
+          cc++;
         }
+      }
     }
-
-    {
-        const int i = 2;
-        const int j = 3;
-        const int k = 4;
-        const int l = 5;
-        Tensor t(i, j, k, l);
-
-        float c = 1.f;
-        for (int ii = 0; ii < i; ii++) {
-            for (int jj = 0; jj < j; jj++) {
-                for (int kk = 0; kk < k; kk++) {
-                    for (int ll = 0; ll < l; ll++) {
-                        t(ii, jj, kk, ll) = c;
-                        c += 1.f;
-                    }
-                }
-            }
+  }
+
+  {
+    const int i = 2;
+    const int j = 3;
+    const int k = 4;
+    const int l = 5;
+    Tensor t(i, j, k, l);
+
+    float c = 1.f;
+    for (int ii = 0; ii < i; ii++) {
+      for (int jj = 0; jj < j; jj++) {
+        for (int kk = 0; kk < k; kk++) {
+          for (int ll = 0; ll < l; ll++) {
+            t(ii, jj, kk, ll) = c;
+            c += 1.f;
+          }
         }
+      }
+    }
 
-        c = 1.f;
-        int cc = 0;
-        for (int ii = 0; ii < i; ii++) {
-            for (int jj = 0; jj < j; jj++) {
-                for (int kk = 0; kk < k; kk++) {
-                    for (int ll = 0; ll < l; ll++) {
-                        KASSERT_EQ(t(ii, jj, kk, ll), c, 1e-9);
-                        KASSERT_EQ(t.data_[cc], c, 1e-9);
-                        c += 1.f;
-                        cc++;
-                    }
-                }
-            }
+    c = 1.f;
+    int cc = 0;
+    for (int ii = 0; ii < i; ii++) {
+      for (int jj = 0; jj < j; jj++) {
+        for (int kk = 0; kk < k; kk++) {
+          for (int ll = 0; ll < l; ll++) {
+            KASSERT_EQ(t(ii, jj, kk, ll), c, 1e-9);
+            KASSERT_EQ(t.data_[cc], c, 1e-9);
+            c += 1.f;
+            cc++;
+          }
         }
+      }
     }
+  }
 
-    return true;
+  return true;
 }
 
-int main()
-{
-    double load_time = 0.0;
-    double apply_time = 0.0;
+}  // namespace kerasify
 
-    if (!tensor_test())
-        return 1;
+int main() {
+  double load_time = 0.0;
+  double apply_time = 0.0;
 
-    if (!test_dense_1x1(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::tensor_test()) return 1;
 
-    if (!test_dense_10x1(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_conv_2x2(&load_time, &apply_time)) return 1;
 
-    if (!test_dense_2x2(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_dense_1x1(&load_time, &apply_time)) return 1;
 
-    if (!test_dense_10x10(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_dense_10x1(&load_time, &apply_time)) return 1;
 
-    if (!test_dense_10x10x10(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_dense_2x2(&load_time, &apply_time)) return 1;
 
-    if (!test_conv_2x2(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_dense_10x10(&load_time, &apply_time)) return 1;
 
-    if (!test_conv_3x3(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_dense_10x10x10(&load_time, &apply_time)) return 1;
 
-    if (!test_conv_3x3x3(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_conv_2x2(&load_time, &apply_time)) return 1;
 
-    if (!test_elu_10(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_conv_3x3(&load_time, &apply_time)) return 1;
 
-    if (!test_relu_10(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_conv_3x3x3(&load_time, &apply_time)) return 1;
 
-    if (!test_dense_relu_10(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_elu_10(&load_time, &apply_time)) return 1;
 
-    if (!test_conv_softplus_2x2(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_relu_10(&load_time, &apply_time)) return 1;
 
-    if (!test_maxpool2d_1x1(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_dense_relu_10(&load_time, &apply_time)) return 1;
 
-    if (!test_maxpool2d_2x2(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_conv_softplus_2x2(&load_time, &apply_time)) return 1;
 
-    if (!test_maxpool2d_3x2x2(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_maxpool2d_1x1(&load_time, &apply_time)) return 1;
 
-    if (!test_maxpool2d_3x3x3(&load_time, &apply_time))
-        return 1;
+  if (!kerasify::test_maxpool2d_2x2(&load_time, &apply_time)) return 1;
 
-    // Run benchmark 5 times and report duration.
-    double total_load_time = 0.0;
-    double total_apply_time = 0.0;
+  if (!kerasify::test_maxpool2d_3x2x2(&load_time, &apply_time)) return 1;
 
-    for (int i = 0; i < 5; i++)
-    {
-        if (!test_benchmark(&load_time, &apply_time))
-            return 1;
+  if (!kerasify::test_maxpool2d_3x3x3(&load_time, &apply_time)) return 1;
 
-        total_load_time += load_time;
-        total_apply_time += apply_time;
-    }
+  if (!kerasify::test_func_dense_1x1(&load_time, &apply_time)) return 1;
+
+  if (!kerasify::test_func_conv_2x2(&load_time, &apply_time)) return 1;
+
+  if (!kerasify::test_func_maxpool2d_3x3x3(&load_time, &apply_time)) return 1;
+
+  if (!kerasify::test_func_merge_1x1(&load_time, &apply_time)) return 1;
 
-    printf("Benchmark network loads in %fs\n", total_load_time / 5);
-    printf("Benchmark network runs in %fs\n", total_apply_time / 5);
+  if (!kerasify::test_func_dense_1x1_multi_in(&load_time, &apply_time))
+    return 1;
 
-    return 0;
-}
\ No newline at end of file
+  if (!kerasify::test_func_dense_1x1_multi_out(&load_time, &apply_time))
+    return 1;
+
+  if (!kerasify::test_func_dense_1x1_multi_in_out(&load_time,
+                                                        &apply_time))
+    return 1;
+
+  if (!kerasify::test_func_conv_2x2_multi_in_out(&load_time, &apply_time))
+    return 1;
+
+  // Run benchmark 5 times and report duration.
+  double total_load_time = 0.0;
+  double total_apply_time = 0.0;
+
+  for (int i = 0; i < 5; i++) {
+    if (!kerasify::test_benchmark(&load_time, &apply_time)) return 1;
+
+    total_load_time += load_time;
+    total_apply_time += apply_time;
+  }
+
+  printf("Benchmark network loads in %fs\n", total_load_time / 5);
+  printf("Benchmark network runs in %fs\n", total_apply_time / 5);
+
+  return 0;
+}
diff --git a/kerasify.py b/kerasify.py
index 30d039c..22e95fd 100644
--- a/kerasify.py
+++ b/kerasify.py
@@ -7,111 +7,174 @@
 LAYER_ELU = 4
 LAYER_ACTIVATION = 5
 LAYER_MAXPOOLING2D = 6
+LAYER_INPUT = 7
+LAYER_MERGE = 8
 
 ACTIVATION_LINEAR = 1
 ACTIVATION_RELU = 2
 ACTIVATION_SOFTPLUS = 3
 
+
 def write_floats(file, floats):
-    '''
-    Writes floats to file in 1024 chunks.. prevents memory explosion
+  """
+    Writes floats to file in 1024 chunks. prevents memory explosion
     writing very large arrays to disk when calling struct.pack().
-    '''
-    step = 1024
-    written = 0
-
-    for i in np.arange(0, len(floats), step):
-        remaining = min(len(floats) - i, step)
-        written += remaining
-        file.write(struct.pack('=%sf' % remaining, *floats[i:i+remaining]))
-
-    assert written == len(floats)
-
-def export_model(model, filename):
-    with open(filename, 'wb') as f:
-
-        def write_activation(activation):
-            if activation == 'linear':
-                f.write(struct.pack('I', ACTIVATION_LINEAR))
-            elif activation == 'relu':
-                f.write(struct.pack('I', ACTIVATION_RELU))
-            elif activation == 'softplus':
-                f.write(struct.pack('I', ACTIVATION_SOFTPLUS))
-            else:
-                assert False, "Unsupported activation type: %s" % activation
-
-        num_layers = len(model.layers)
-        f.write(struct.pack('I', num_layers))
+    """
+  step = 1024
+  written = 0
 
-        for layer in model.layers:
-            layer_type = type(layer).__name__
+  for i in np.arange(0, len(floats), step):
+    remaining = min(len(floats) - i, step)
+    written += remaining
+    file.write(struct.pack('=%sf' % remaining, *floats[i:i + remaining]))
 
-            if layer_type == 'Dense':
-                weights = layer.get_weights()[0]
-                biases = layer.get_weights()[1]
-                activation = layer.get_config()['activation']
+  assert written == len(floats)
 
-                f.write(struct.pack('I', LAYER_DENSE))
-                f.write(struct.pack('I', weights.shape[0]))
-                f.write(struct.pack('I', weights.shape[1]))
-                f.write(struct.pack('I', biases.shape[0]))
 
-                weights = weights.flatten()
-                biases = biases.flatten()
+def write_strings(file, strings):
+  """
+    Writes strings to file keeping to 4-byte aligned.
+    """
+  file.write(struct.pack('I', len(strings)))
+  for string in strings:
+    write_string(file, string)
 
-                write_floats(f, weights)
-                write_floats(f, biases)
 
-                write_activation(activation)
+def write_string(file, string):
+  """
+    Writes strings to file keeping to 4-byte aligned.
+    """
+  length = len(string)
 
-            elif layer_type == 'Convolution2D':
-                assert layer.border_mode == 'valid', "Only border_mode=valid is implemented"
+  # Round length up to the nearest multiple of 4.
+  size = (length + 3 % 4)
 
-                weights = layer.get_weights()[0]
-                biases = layer.get_weights()[1]
-                activation = layer.get_config()['activation']
+  file.write(struct.pack('I', size))
+  file.write(struct.pack('%ds' % size, string))
 
-                # The kernel is accessed in reverse order. To simplify the C side we'll
-                # flip the weight matrix for each kernel.
-                weights = weights[:,:,::-1,::-1]
 
-                f.write(struct.pack('I', LAYER_CONVOLUTION2D))
-                f.write(struct.pack('I', weights.shape[0]))
-                f.write(struct.pack('I', weights.shape[1]))
-                f.write(struct.pack('I', weights.shape[2]))
-                f.write(struct.pack('I', weights.shape[3]))
-                f.write(struct.pack('I', biases.shape[0]))
-
-                weights = weights.flatten()
-                biases = biases.flatten()
-
-                write_floats(f, weights)
-                write_floats(f, biases)
-
-                write_activation(activation)
-
-            elif layer_type == 'Flatten':
-                f.write(struct.pack('I', LAYER_FLATTEN))
-
-            elif layer_type == 'ELU':
-                f.write(struct.pack('I', LAYER_ELU))
-                f.write(struct.pack('f', layer.alpha))
+def export_model(model, filename):
+  with open(filename, 'wb') as f:
+
+    def write_activation(activation):
+      if activation == 'linear':
+        f.write(struct.pack('I', ACTIVATION_LINEAR))
+      elif activation == 'relu':
+        f.write(struct.pack('I', ACTIVATION_RELU))
+      elif activation == 'softplus':
+        f.write(struct.pack('I', ACTIVATION_SOFTPLUS))
+      else:
+        assert False, 'Unsupported activation type: %s' % activation
+
+    # Sequential models hide the Input layer within the first layer's
+    # inbound nodes and these do not appear in the layer list.
+    layers = []
+    layer_map = {}
+    for layer in model.layers:
+      for node in layer.inbound_nodes:
+        for inbound_layer in node.inbound_layers:
+          #TODO(hemalshah): Handle dependent layers recursively.
+          if inbound_layer.name not in layer_map:
+            layer_map[inbound_layer.name] = inbound_layer
+            layers.append(inbound_layer)
+      layer_map[layer.name] = layer
+      layers.append(layer)
+
+    num_layers = len(layers)
+    f.write(struct.pack('I', num_layers))
+
+    write_strings(f, model.input_names)
+    write_strings(f, model.output_names)
+
+    for layer in layers:
+      layer_type = type(layer).__name__
+
+      name = layer.name
+      write_string(f, name)
+
+      inbound_layer_names = []
+      for node in layer.inbound_nodes:
+        for inbound_layer in node.inbound_layers:
+          inbound_layer_names.append(inbound_layer.name)
+      write_strings(f, inbound_layer_names)
+
+      if layer_type == 'Dense':
+        weights = layer.get_weights()[0]
+        biases = layer.get_weights()[1]
+        activation = layer.get_config()['activation']
+
+        f.write(struct.pack('I', LAYER_DENSE))
+        f.write(struct.pack('I', weights.shape[0]))
+        f.write(struct.pack('I', weights.shape[1]))
+        f.write(struct.pack('I', biases.shape[0]))
+
+        weights = weights.flatten()
+        biases = biases.flatten()
+
+        write_floats(f, weights)
+        write_floats(f, biases)
+
+        write_activation(activation)
+
+      elif layer_type == 'InputLayer':
+        f.write(struct.pack('I', LAYER_INPUT))
+
+      elif layer_type == 'Merge':
+        assert layer.concat_axis == -1, ('Only concatenation along batch '
+                                         'dimensions implemented')
+        assert layer.mode == 'concat', 'Only concatenation implemented'
+        f.write(struct.pack('I', LAYER_MERGE))
+
+      elif layer_type == 'Convolution2D':
+        assert layer.border_mode == 'valid', ('Only border_mode=valid is '
+                                              'implemented')
+
+        weights = layer.get_weights()[0]
+        biases = layer.get_weights()[1]
+        activation = layer.get_config()['activation']
+
+        # The kernel is accessed in reverse order. To simplify the C side we'll
+        # flip the weight matrix for each kernel.
+        weights = weights[:, :, ::-1, ::-1]
+
+        f.write(struct.pack('I', LAYER_CONVOLUTION2D))
+        f.write(struct.pack('I', weights.shape[0]))
+        f.write(struct.pack('I', weights.shape[1]))
+        f.write(struct.pack('I', weights.shape[2]))
+        f.write(struct.pack('I', weights.shape[3]))
+        f.write(struct.pack('I', biases.shape[0]))
+
+        weights = weights.flatten()
+        biases = biases.flatten()
+
+        write_floats(f, weights)
+        write_floats(f, biases)
+
+        write_activation(activation)
+
+      elif layer_type == 'Flatten':
+        f.write(struct.pack('I', LAYER_FLATTEN))
+
+      elif layer_type == 'ELU':
+        f.write(struct.pack('I', LAYER_ELU))
+        f.write(struct.pack('f', layer.alpha))
+
+      elif layer_type == 'Activation':
+        activation = layer.get_config()['activation']
 
-            elif layer_type == 'Activation':
-                activation = layer.get_config()['activation']
+        f.write(struct.pack('I', LAYER_ACTIVATION))
 
-                f.write(struct.pack('I', LAYER_ACTIVATION))
-                write_activation(activation)
+        write_activation(activation)
 
-            elif layer_type == 'MaxPooling2D':
-                assert layer.border_mode == 'valid', "Only border_mode=valid is implemented"
+      elif layer_type == 'MaxPooling2D':
+        assert layer.border_mode == 'valid', ('Only border_mode=valid is '
+                                              'implemented')
 
-                pool_size = layer.get_config()['pool_size']
+        pool_size = layer.get_config()['pool_size']
 
-                f.write(struct.pack('I', LAYER_MAXPOOLING2D))
-                f.write(struct.pack('I', pool_size[0]))
-                f.write(struct.pack('I', pool_size[1]))
-                
+        f.write(struct.pack('I', LAYER_MAXPOOLING2D))
+        f.write(struct.pack('I', pool_size[0]))
+        f.write(struct.pack('I', pool_size[1]))
 
-            else:
-                assert False, "Unsupported layer type: %s" % layer_type
+      else:
+        assert False, 'Unsupported layer type: %s' % layer_type
diff --git a/make_tests.py b/make_tests.py
index 153775e..d59292e 100644
--- a/make_tests.py
+++ b/make_tests.py
@@ -1,7 +1,9 @@
 import numpy as np
 import pprint
 
+from keras.models import Model
 from keras.models import Sequential
+from keras.layers import merge, Input
 from keras.layers import Convolution2D, Dense, Flatten, Activation, MaxPooling2D
 from keras.layers.advanced_activations import ELU
 
@@ -9,22 +11,7 @@
 
 np.set_printoptions(precision=25, threshold=np.nan)
 
-def c_array(a):
-    s = pprint.pformat(a.flatten())
-    s = s.replace('[', '{').replace(']', '}').replace('array(', '').replace(')', '').replace(', dtype=float32', '')
-
-    shape = ''
-
-    if a.shape == ():
-        s = '{%s}' % s
-        shape = '(1)'
-    else:
-        shape = repr(a.shape).replace(',)', ')')
-
-    return shape, s
-
-
-TEST_CASE = '''
+TEST_CASE = """
 bool test_%s(double* load_time, double* apply_time)
 {
     printf("TEST %s\\n");
@@ -32,11 +19,16 @@ def c_array(a):
     KASSERT(load_time, "Invalid double");
     KASSERT(apply_time, "Invalid double");
 
-    Tensor in%s;
-    in.data_ = %s;
+    const std::vector<std::string> input_layer_names = {%s};
+    const std::vector<std::string> output_layer_names = {%s};
+
+    std::vector<Tensor> in_tensors = {
+        %s
+    };
 
-    Tensor out%s;
-    out.data_ = %s;
+    std::vector<Tensor> expected = {
+        %s
+    };
 
     KerasTimer load_timer;
     load_timer.Start();
@@ -49,81 +41,131 @@ def c_array(a):
     KerasTimer apply_timer;
     apply_timer.Start();
 
-    Tensor predict = out;
-    KASSERT(model.Apply(&in, &out), "Failed to apply");
+    std::vector<Tensor> predicted = expected;
+
+    // Build input tensor map.
+    TensorMap in;
+    for (unsigned int i = 0; i < in_tensors.size(); i++)
+    {
+        const std::string& input_layer_name = input_layer_names[i];
+        in[input_layer_name] = &(in_tensors[i]);
+    }
+
+    // Build output tensor map.
+    TensorMap out;
+    for (unsigned int i = 0; i < predicted.size(); i++)
+    {
+        const std::string& output_layer_name = output_layer_names[i];
+        out[output_layer_name] = &(predicted[i]);
+    }
+    KASSERT(model.Apply(in, &out), "Failed to apply");
 
     *apply_time = apply_timer.Stop();
 
-    for (int i = 0; i < out.dims_[0]; i++)
+    for (unsigned int i = 0; i < expected.size(); i++)
     {
-        KASSERT_EQ(out(i), predict(i), %s);
+        Tensor& expect = expected[i];
+        Tensor& predict = predicted[i];
+        for (int j = 0; j < expect.dims_[0]; j++)
+        {
+            KASSERT_EQ(expect(j), predict(j), %s);
+        }
     }
 
     return true;
 }
-'''
+"""
+
+
+def c_array_init(a):
+  s = pprint.pformat(a.flatten())
+  s = s.replace('[', '{').replace(']', '}').replace('array(', '').replace(
+      ')', '').replace(', dtype=float32', '')
 
-def output_testcase(model, test_x, test_y, name, eps):
-    print "Processing %s" % name
-    model.compile(loss='mean_squared_error', optimizer='adamax')
-    model.fit(test_x, test_y, nb_epoch=1, verbose=False)
-    predict_y = model.predict(test_x).astype('f')
-    print model.summary()
+  shape = ''
 
-    export_model(model, 'test_%s.model' % name)
+  if a.shape == () or a.shape == (1,):
+    s = '{%s}' % s
+    shape = '{{1}}'
+  elif a.shape:
+    shape = repr(a.shape).replace(',)', ')')
 
-    with open('test_%s.h' % name, 'w') as f:
-        x_shape, x_data = c_array(test_x[0])
-        y_shape, y_data = c_array(predict_y[0])
+  shape = shape.replace('(', '{').replace(')', '}')
+  return shape, s
 
-        f.write(TEST_CASE % (name, name, x_shape, x_data, y_shape, y_data, name, eps))
 
+def tensor_map_init(tensor_list, join_str):
+  y = ['{%s, %s}' % c_array_init(tensor) for tensor in tensor_list]
+  y = join_str.join(y)
+  return y
 
 
-''' Dense 1x1 '''
+def output_testcase(model, test_x_list, test_y_list, name, eps):
+  print 'Processing %s' % name
+  assert isinstance(test_x_list, list), 'test_x_list must be a list.'
+  assert isinstance(test_y_list, list), 'test_y_list must be a list.'
+
+  model.compile(loss='mean_squared_error', optimizer='adamax')
+  model.fit(test_x_list, test_y_list, nb_epoch=1, verbose=False)
+  predict_y_list = model.predict(test_x_list)
+  if not isinstance(predict_y_list, list):
+    predict_y_list = [predict_y_list]
+
+  print model.summary()
+
+  export_model(model, 'test_%s.model' % name)
+
+  with open('test_%s.h' % name, 'w') as f:
+    predict_x_list = [test_x[0] for test_x in test_x_list]
+    x_map = tensor_map_init(predict_x_list, ',\n        ')
+    y_map = tensor_map_init(predict_y_list, ',\n        ')
+    input_layer_names = ', '.join(
+        ["\"%s\"" % layer_name for layer_name in model.input_names])
+    output_layer_names = ', '.join(
+        ["\"%s\"" % layer_name for layer_name in model.output_names])
+    f.write(TEST_CASE % (name, name, input_layer_names, output_layer_names,
+                         x_map, y_map, name, eps))
+
+
+""" Dense 1x1 """
 test_x = np.arange(10)
 test_y = test_x * 10 + 1
 model = Sequential()
 model.add(Dense(1, input_dim=1))
 
-output_testcase(model, test_x, test_y, 'dense_1x1', '1e-6')
-
-''' Dense 10x1 '''
+output_testcase(model, [test_x], [test_y], 'dense_1x1', '1e-6')
+""" Dense 10x1 """
 test_x = np.random.rand(10, 10).astype('f')
 test_y = np.random.rand(10).astype('f')
 model = Sequential()
 model.add(Dense(1, input_dim=10))
 
-output_testcase(model, test_x, test_y, 'dense_10x1', '1e-6')
-
-''' Dense 2x2 '''
+output_testcase(model, [test_x], [test_y], 'dense_10x1', '1e-6')
+""" Dense 2x2 """
 test_x = np.random.rand(10, 2).astype('f')
 test_y = np.random.rand(10).astype('f')
 model = Sequential()
 model.add(Dense(2, input_dim=2))
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'dense_2x2', '1e-6')
-
-''' Dense 10x10 '''
+output_testcase(model, [test_x], [test_y], 'dense_2x2', '1e-6')
+""" Dense 10x10 """
 test_x = np.random.rand(10, 10).astype('f')
 test_y = np.random.rand(10).astype('f')
 model = Sequential()
 model.add(Dense(10, input_dim=10))
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'dense_10x10', '1e-6')
-
-''' Dense 10x10x10 '''
+output_testcase(model, [test_x], [test_y], 'dense_10x10', '1e-6')
+""" Dense 10x10x10 """
 test_x = np.random.rand(10, 10).astype('f')
 test_y = np.random.rand(10, 10).astype('f')
 model = Sequential()
 model.add(Dense(10, input_dim=10))
 model.add(Dense(10))
 
-output_testcase(model, test_x, test_y, 'dense_10x10x10', '1e-6')
-
-''' Conv 2x2 '''
+output_testcase(model, [test_x], [test_y], 'dense_10x10x10', '1e-6')
+""" Conv 2x2 """
 test_x = np.random.rand(10, 1, 2, 2).astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -131,9 +173,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'conv_2x2', '1e-6')
-
-''' Conv 3x3 '''
+output_testcase(model, [test_x], [test_y], 'conv_2x2', '1e-6')
+""" Conv 3x3 """
 test_x = np.random.rand(10, 1, 3, 3).astype('f').astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -141,9 +182,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'conv_3x3', '1e-6')
-
-''' Conv 3x3x3 '''
+output_testcase(model, [test_x], [test_y], 'conv_3x3', '1e-6')
+""" Conv 3x3x3 """
 test_x = np.random.rand(10, 3, 10, 10).astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -151,9 +191,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'conv_3x3x3', '1e-6')
-
-''' Activation ELU '''
+output_testcase(model, [test_x], [test_y], 'conv_3x3x3', '1e-6')
+""" Activation ELU """
 test_x = np.random.rand(1, 10).astype('f')
 test_y = np.random.rand(1, 1).astype('f')
 model = Sequential()
@@ -161,18 +200,16 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(ELU(alpha=0.5))
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'elu_10', '1e-6')
-
-''' Activation relu '''
+output_testcase(model, [test_x], [test_y], 'elu_10', '1e-6')
+""" Activation relu """
 test_x = np.random.rand(1, 10).astype('f')
 test_y = np.random.rand(1, 10).astype('f')
 model = Sequential()
 model.add(Dense(10, input_dim=10))
 model.add(Activation('relu'))
 
-output_testcase(model, test_x, test_y, 'relu_10', '1e-6')
-
-''' Dense relu '''
+output_testcase(model, [test_x], [test_y], 'relu_10', '1e-6')
+""" Dense relu """
 test_x = np.random.rand(1, 10).astype('f')
 test_y = np.random.rand(1, 10).astype('f')
 model = Sequential()
@@ -180,9 +217,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Dense(10, input_dim=10, activation='relu'))
 model.add(Dense(10, input_dim=10, activation='relu'))
 
-output_testcase(model, test_x, test_y, 'dense_relu_10', '1e-6')
-
-''' Conv softplus '''
+output_testcase(model, [test_x], [test_y], 'dense_relu_10', '1e-6')
+""" Conv softplus """
 test_x = np.random.rand(10, 1, 2, 2).astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -190,10 +226,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'conv_softplus_2x2', '1e-6')
-
-
-''' Maxpooling2D 1x1'''
+output_testcase(model, [test_x], [test_y], 'conv_softplus_2x2', '1e-6')
+""" Maxpooling2D 1x1"""
 test_x = np.random.rand(10, 1, 10, 10).astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -201,9 +235,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'maxpool2d_1x1', '1e-6')
-
-''' Maxpooling2D 2x2'''
+output_testcase(model, [test_x], [test_y], 'maxpool2d_1x1', '1e-6')
+""" Maxpooling2D 2x2"""
 test_x = np.random.rand(10, 1, 10, 10).astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -211,9 +244,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'maxpool2d_2x2', '1e-6')
-
-''' Maxpooling2D 3x2x2'''
+output_testcase(model, [test_x], [test_y], 'maxpool2d_2x2', '1e-6')
+""" Maxpooling2D 3x2x2"""
 test_x = np.random.rand(10, 3, 10, 10).astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -221,9 +253,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'maxpool2d_3x2x2', '1e-6')
-
-''' Maxpooling2D 3x3x3'''
+output_testcase(model, [test_x], [test_y], 'maxpool2d_3x2x2', '1e-6')
+""" Maxpooling2D 3x3x3"""
 test_x = np.random.rand(10, 3, 10, 10).astype('f')
 test_y = np.random.rand(10, 1).astype('f')
 model = Sequential()
@@ -231,10 +262,8 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Flatten())
 model.add(Dense(1))
 
-output_testcase(model, test_x, test_y, 'maxpool2d_3x3x3', '1e-6')
-
-
-''' Benchmark '''
+output_testcase(model, [test_x], [test_y], 'maxpool2d_3x3x3', '1e-6')
+""" Benchmark """
 test_x = np.random.rand(1, 3, 128, 128).astype('f')
 test_y = np.random.rand(1, 10).astype('f')
 model = Sequential()
@@ -246,6 +275,125 @@ def output_testcase(model, test_x, test_y, name, eps):
 model.add(Dense(1000, activation='relu'))
 model.add(Dense(10))
 
-output_testcase(model, test_x, test_y, 'benchmark', '1e-3')
+output_testcase(model, [test_x], [test_y], 'benchmark', '1e-3')
+
+###
+# Functional Model Support Tests.
+###
+""" Functional Dense 1x1 """
+test_x = np.arange(10)
+test_y = test_x * 10 + 1
 
+input = Input(name='in1', shape=(1,))
+output = Dense(1, name='out1')(input)
+model = Model(input=input, output=output)
+
+output_testcase(model, [test_x], [test_y], 'func_dense_1x1', '1e-6')
+""" Functional Conv 2x2 """
+test_x = np.random.rand(10, 1, 2, 2).astype('f')
+test_y = np.random.rand(10, 1).astype('f')
+
+input0 = Input(name='in0', shape=(1, 2, 2))
+conv0 = Convolution2D(1, 2, 2)(input0)
+f0 = Flatten()(conv0)
+output0 = Dense(1)(f0)
+
+model = Model(input=input0, output=output0)
+
+output_testcase(model, [test_x], [test_y], 'func_conv_2x2', '1e-6')
+""" Functional Maxpooling2D 3x3x3"""
+test_x = np.random.rand(10, 3, 10, 10).astype('f')
+test_y = np.random.rand(10, 1).astype('f')
+
+input0 = Input(name='in0', shape=(3, 10, 10))
+conv0 = MaxPooling2D(pool_size=(3, 3))(input0)
+f0 = Flatten()(conv0)
+output0 = Dense(1)(f0)
+
+model = Model(input=input0, output=output0)
+
+output_testcase(model, [test_x], [test_y], 'func_maxpool2d_3x3x3', '1e-6')
+""" Functional Merge 1x1"""
+test_x = np.arange(10)
+test_y = test_x * 10 + 1
 
+input = Input(name='in1', shape=(1,))
+h1 = Dense(1, name='hidden1')(input)
+h2 = Dense(1, name='hidden2')(h1)
+hA = Dense(1, name='hiddenA')(input)
+m = merge([h2, hA], mode='concat', concat_axis=-1)
+output = Dense(1, name='out1')(m)
+model = Model(input=input, output=output)
+
+output_testcase(model, [test_x], [test_y], 'func_merge_1x1', '1e-6')
+""" Functional Dense 1x1 Multi In"""
+test_x_list = [np.random.rand(10).astype('f'), np.random.rand(10).astype('f')]
+test_y_list = [test_x_list[0] * 10 + 1]
+
+input0 = Input(name='in0', shape=(1,))
+input1 = Input(name='in1', shape=(1,))
+h1 = Dense(1, name='hidden1')(input0)
+h2 = Dense(1, name='hidden2')(h1)
+
+hA = Dense(1, name='hiddenA')(input1)
+m = merge([h2, hA], mode='concat', concat_axis=-1)
+output0 = Dense(1, name='out0')(m)
+model = Model(input=[input0, input1], output=[output0])
+
+output_testcase(model, test_x_list, test_y_list,
+                'func_dense_1x1_multi_in', '1e-6')
+""" Functional Dense 1x1 Multi Out"""
+test_x_list = [np.arange(10)]
+test_y_list = [test_x_list[0] * 10 + 1, test_x_list[0] * 5 + 2]
+
+input0 = Input(name='in0', shape=(1,))
+h1 = Dense(1, name='hidden1')(input0)
+output0 = Dense(1, name='out0')(h1)
+output1 = Dense(1, name='out1')(h1)
+model = Model(input=[input0], output=[output0, output1])
+
+output_testcase(model, test_x_list, test_y_list,
+                'func_dense_1x1_multi_out', '1e-6')
+""" Functional Dense 1x1 Multi In Out"""
+test_x_list = [np.arange(10), np.arange(10)]
+test_y_list = [test_x_list[0] * 10 + 1, test_x_list[1] * 5 + 2]
+
+input0 = Input(name='in0', shape=(1,))
+input1 = Input(name='in1', shape=(1,))
+h1 = Dense(1, name='hidden1')(input0)
+h2 = Dense(1, name='hidden2')(h1)
+
+hA = Dense(1, name='hiddenA')(input1)
+m = merge([h2, hA], mode='concat', concat_axis=-1)
+output0 = Dense(1, name='out0')(m)
+output1 = Dense(1, name='out1')(m)
+model = Model(input=[input0, input1], output=[output0, output1])
+
+output_testcase(model, test_x_list, test_y_list,
+                'func_dense_1x1_multi_in_out', '1e-6')
+""" Functional Conv 2x2 Multi In Out"""
+test_x_list = [
+    np.random.rand(10, 1, 2, 2).astype('f'),
+    np.random.rand(10, 1, 2, 2).astype('f')
+]
+test_y_list = [
+    np.random.rand(10, 1).astype('f'), np.random.rand(10, 1).astype('f')
+]
+
+input0 = Input(name='in0', shape=(1, 2, 2))
+conv0 = Convolution2D(1, 2, 2)(input0)
+f0 = Flatten()(conv0)
+
+input1 = Input(name='in1', shape=(1, 2, 2))
+conv1 = Convolution2D(1, 2, 2)(input1)
+f1 = Flatten()(conv1)
+
+m = merge([f0, f1], mode='concat', concat_axis=-1)
+
+output0 = Dense(1, name='out0')(m)
+output1 = Dense(1, name='out1')(m)
+
+model = Model(input=[input0, input1], output=[output0, output1])
+
+output_testcase(model, test_x_list, test_y_list,
+                'func_conv_2x2_multi_in_out', '1e-6')