diff --git a/shared/DSPFilters/include/DspFilters/Bessel.h b/shared/DSPFilters/include/DspFilters/Bessel.h
index 4ea3fb07..e507925c 100644
--- a/shared/DSPFilters/include/DspFilters/Bessel.h
+++ b/shared/DSPFilters/include/DspFilters/Bessel.h
@@ -116,7 +116,7 @@ class AnalogLowShelf : public LayoutBase
 
 // Factored implementations to reduce template instantiations
 
-struct LowPassBase : PoleFilterBase <AnalogLowPass>
+struct LowPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -124,7 +124,7 @@ struct LowPassBase : PoleFilterBase <AnalogLowPass>
               WorkspaceBase* w);
 };
 
-struct HighPassBase : PoleFilterBase <AnalogLowPass>
+struct HighPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -132,7 +132,7 @@ struct HighPassBase : PoleFilterBase <AnalogLowPass>
               WorkspaceBase* w);
 };
 
-struct BandPassBase : PoleFilterBase <AnalogLowPass>
+struct BandPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -141,7 +141,7 @@ struct BandPassBase : PoleFilterBase <AnalogLowPass>
               WorkspaceBase* w);
 };
 
-struct BandStopBase : PoleFilterBase <AnalogLowPass>
+struct BandStopBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -150,7 +150,7 @@ struct BandStopBase : PoleFilterBase <AnalogLowPass>
               WorkspaceBase* w);
 };
 
-struct LowShelfBase : PoleFilterBase <AnalogLowShelf>
+struct LowShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
diff --git a/shared/DSPFilters/include/DspFilters/Biquad.h b/shared/DSPFilters/include/DspFilters/Biquad.h
index 01cb8813..a6c2a239 100644
--- a/shared/DSPFilters/include/DspFilters/Biquad.h
+++ b/shared/DSPFilters/include/DspFilters/Biquad.h
@@ -36,6 +36,12 @@ THE SOFTWARE.
 #ifndef DSPFILTERS_BIQUAD_H
 #define DSPFILTERS_BIQUAD_H
 
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#elif defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+
 #include "DspFilters/Common.h"
 #include "DspFilters/MathSupplement.h"
 #include "DspFilters/Types.h"
@@ -81,8 +87,10 @@ class BiquadBase
   template <class StateType, typename Sample>
   void process (int numSamples, Sample* dest, StateType& state) const
   {
-    while (--numSamples >= 0)
-      *dest++ = state.process (*dest, *this);
+    while (--numSamples >= 0) {
+      *dest = state.process (*dest, *this);
+      dest++;
+    }
   }
 
 protected:
@@ -114,12 +122,23 @@ class BiquadBase
   void applyScale (double scale);
 
 public:
-  double m_a0;
-  double m_a1;
-  double m_a2;
-  double m_b1;
-  double m_b2;
-  double m_b0;
+  #ifdef __SSE3__
+                   //   3     2     1     0
+  __m128 m_vab12;  // [m_b2  m_b1  m_a2  m_a1]
+  __m128 m_va0;    // [m_a0  m_a0  m_a0  m_a0]
+  __m128 m_vb0;    // [m_b0  m_b0  m_b0  m_b0]
+#elif defined(__ARM_NEON__)
+                        //   3     2     1     0
+  float32x4_t m_vab12;  // [m_b2  m_a2  m_b1  m_a1]
+  float32x2_t m_va0;    // [m_a0  m_a0]
+  float32x2_t m_vb0;    // [m_b0  m_b0]
+#endif
+  float m_a0;
+  float m_a1;
+  float m_a2;
+  float m_b1;
+  float m_b2;
+  float m_b0;
 };
 
 //------------------------------------------------------------------------------
@@ -169,7 +188,8 @@ class Biquad : public BiquadBase
       sectionPrev.m_b1 += db1;
       sectionPrev.m_b2 += db2;
 
-      *dest++ = state.process (*dest, sectionPrev);
+      *dest = state.process (*dest, sectionPrev);
+      dest++;
     }
   }
 
@@ -198,7 +218,8 @@ class Biquad : public BiquadBase
       zPrev.zeros.second += dz1;
       zPrev.gain += dg;
 
-      *dest++ = state.process (*dest, Biquad (zPrev));
+      *dest = state.process (*dest, Biquad (zPrev));
+      dest++;
     }
   }
 
diff --git a/shared/DSPFilters/include/DspFilters/Butterworth.h b/shared/DSPFilters/include/DspFilters/Butterworth.h
index 8ce16635..9a023353 100644
--- a/shared/DSPFilters/include/DspFilters/Butterworth.h
+++ b/shared/DSPFilters/include/DspFilters/Butterworth.h
@@ -82,21 +82,21 @@ class AnalogLowShelf : public LayoutBase
 
 // Factored implementations to reduce template instantiations
 
-struct LowPassBase : PoleFilterBase <AnalogLowPass>
+struct LowPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
               double cutoffFrequency);
 };
 
-struct HighPassBase : PoleFilterBase <AnalogLowPass>
+struct HighPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
               double cutoffFrequency);
 };
 
-struct BandPassBase : PoleFilterBase <AnalogLowPass>
+struct BandPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -104,7 +104,7 @@ struct BandPassBase : PoleFilterBase <AnalogLowPass>
               double widthFrequency);
 };
 
-struct BandStopBase : PoleFilterBase <AnalogLowPass>
+struct BandStopBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -112,7 +112,7 @@ struct BandStopBase : PoleFilterBase <AnalogLowPass>
               double widthFrequency);
 };
 
-struct LowShelfBase : PoleFilterBase <AnalogLowShelf>
+struct LowShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
@@ -120,7 +120,7 @@ struct LowShelfBase : PoleFilterBase <AnalogLowShelf>
               double gainDb);
 };
 
-struct HighShelfBase : PoleFilterBase <AnalogLowShelf>
+struct HighShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
@@ -128,7 +128,7 @@ struct HighShelfBase : PoleFilterBase <AnalogLowShelf>
               double gainDb);
 };
 
-struct BandShelfBase : PoleFilterBase <AnalogLowShelf>
+struct BandShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
diff --git a/shared/DSPFilters/include/DspFilters/Cascade.h b/shared/DSPFilters/include/DspFilters/Cascade.h
index 139548d8..b738cb45 100644
--- a/shared/DSPFilters/include/DspFilters/Cascade.h
+++ b/shared/DSPFilters/include/DspFilters/Cascade.h
@@ -36,6 +36,22 @@ THE SOFTWARE.
 #ifndef DSPFILTERS_CASCADE_H
 #define DSPFILTERS_CASCADE_H
 
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#ifndef __m128_buggy_gxx_up_to_4_7_type
+#define __m128_buggy_gxx_up_to_4_7_type
+union __m128_buggy_gxx_up_to_4_7 {
+    __m128 v;
+    float e[4];
+};
+#endif
+#endif
+#elif defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+
+#include <type_traits>
 #include "DspFilters/Common.h"
 #include "DspFilters/Biquad.h"
 #include "DspFilters/Filter.h"
@@ -53,23 +69,65 @@ namespace Dsp {
 class Cascade
 {
 public:
-  template <class StateType>
+  template <class StateType, bool Simd = StateType::HasSimd>
   class StateBase : private DenormalPrevention
   {
   public:
     template <typename Sample>
-    inline Sample process (const Sample in, const Cascade& c)
+    inline typename std::enable_if<Simd, Sample>::type
+    process (const Sample in, const Cascade& c)
     {
-      double out = in;
       StateType* state = m_stateArray;
-      Biquad const* stage = c.m_stageArray;
-      const double vsa = ac();
-      int i = c.m_numStages - 1;
-        out = (state++)->process1 (out, *stage++, vsa);
-      for (; --i >= 0;)
+      const Biquad* stage = c.m_stageArray;
+#ifdef __SSE3__
+      const __m128 vsa = _mm_set1_ps(ac());
+      __m128 out = _mm_set1_ps(in);
+      out = (state++)->process1Simd (out, *stage++, vsa);
+#elif defined(__ARM_NEON__)
+      const float32x2_t vsa = vdup_n_f32(ac());
+      float32x2_t out = vdup_n_f32(in);
+      out = (state++)->process1Simd (out, *stage++, vsa);
+#else
+      const typename StateType::FPType vsa = ac();
+      typename StateType::FPType out = in;
+      out = (state++)->process1 (out, *stage++, vsa);
+#endif
+      for (int i = 0; i < c.m_numStages - 1; i++) {
+#ifdef __SSE3__
+        out = (state++)->process1Simd (out, *stage++, _mm_setzero_ps());
+#elif defined(__ARM_NEON__)
+        out = (state++)->process1Simd (out, *stage++, vdup_n_f32(0));
+#else
         out = (state++)->process1 (out, *stage++, 0);
-      //for (int i = c.m_numStages; --i >= 0; ++state, ++stage)
-      //  out = state->process1 (out, *stage, vsa);
+#endif
+      }
+#ifdef __SSE3__
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+      __m128_buggy_gxx_up_to_4_7 out_e;
+      out_e.v = out;
+      return out_e.e[0];
+#else
+      return static_cast<Sample> (out[0]);
+#endif
+#elif defined(__ARM_NEON__)
+      return static_cast<Sample> (vget_lane_f32(out, 0));
+#else
+      return static_cast<Sample> (out);
+#endif
+    }
+
+    template <typename Sample>
+    inline typename std::enable_if<!Simd, Sample>::type
+    process (const Sample in, const Cascade& c)
+    {
+      StateType* state = m_stateArray;
+      const Biquad* stage = c.m_stageArray;
+      const typename StateType::FPType vsa = ac();
+      typename StateType::FPType out = in;
+      out = (state++)->process1 (out, *stage++, vsa);
+      for (int i = 0; i < c.m_numStages - 1; i++) {
+        out = (state++)->process1 (out, *stage++, 0);
+      }
       return static_cast<Sample> (out);
     }
 
@@ -111,6 +169,8 @@ class Cascade
   }
 
 public:
+  virtual ~Cascade() {}
+
   // Calculate filter response at the given normalized frequency.
   complex_t response (double normalizedFrequency) const;
 
@@ -120,8 +180,10 @@ class Cascade
   template <class StateType, typename Sample>
   void process (int numSamples, Sample* dest, StateType& state) const
   {
-    while (--numSamples >= 0)
-      *dest++ = state.process (*dest, *this);
+    while (--numSamples >= 0) {
+      *dest = state.process (*dest, *this);
+      dest++;
+    }
   }
 
 protected:
@@ -129,7 +191,7 @@ class Cascade
 
   void setCascadeStorage (const Storage& storage);
 
-  void applyScale (double scale);
+  virtual void applyScale (double scale);
   void setLayout (const LayoutBase& proto);
 
 private:
diff --git a/shared/DSPFilters/include/DspFilters/ChebyshevI.h b/shared/DSPFilters/include/DspFilters/ChebyshevI.h
index d97a3a19..2ca610c0 100644
--- a/shared/DSPFilters/include/DspFilters/ChebyshevI.h
+++ b/shared/DSPFilters/include/DspFilters/ChebyshevI.h
@@ -87,7 +87,7 @@ class AnalogLowShelf : public LayoutBase
 
 // Factored implementations to reduce template instantiations
 
-struct LowPassBase : PoleFilterBase <AnalogLowPass>
+struct LowPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -95,7 +95,7 @@ struct LowPassBase : PoleFilterBase <AnalogLowPass>
               double rippleDb);
 };
 
-struct HighPassBase : PoleFilterBase <AnalogLowPass>
+struct HighPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -103,7 +103,7 @@ struct HighPassBase : PoleFilterBase <AnalogLowPass>
               double rippleDb);
 };
 
-struct BandPassBase : PoleFilterBase <AnalogLowPass>
+struct BandPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -112,7 +112,7 @@ struct BandPassBase : PoleFilterBase <AnalogLowPass>
               double rippleDb);
 };
 
-struct BandStopBase : PoleFilterBase <AnalogLowPass>
+struct BandStopBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -121,7 +121,7 @@ struct BandStopBase : PoleFilterBase <AnalogLowPass>
               double rippleDb);
 };
 
-struct LowShelfBase : PoleFilterBase <AnalogLowShelf>
+struct LowShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
@@ -130,7 +130,7 @@ struct LowShelfBase : PoleFilterBase <AnalogLowShelf>
               double rippleDb);
 };
 
-struct HighShelfBase : PoleFilterBase <AnalogLowShelf>
+struct HighShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
@@ -139,7 +139,7 @@ struct HighShelfBase : PoleFilterBase <AnalogLowShelf>
               double rippleDb);
 };
 
-struct BandShelfBase : PoleFilterBase <AnalogLowShelf>
+struct BandShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
diff --git a/shared/DSPFilters/include/DspFilters/ChebyshevII.h b/shared/DSPFilters/include/DspFilters/ChebyshevII.h
index 9d0b4673..d0545cd0 100644
--- a/shared/DSPFilters/include/DspFilters/ChebyshevII.h
+++ b/shared/DSPFilters/include/DspFilters/ChebyshevII.h
@@ -87,7 +87,7 @@ class AnalogLowShelf : public LayoutBase
 
 // Factored implementations to reduce template instantiations
 
-struct LowPassBase : PoleFilterBase <AnalogLowPass>
+struct LowPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -95,7 +95,7 @@ struct LowPassBase : PoleFilterBase <AnalogLowPass>
               double stopBandDb);
 };
 
-struct HighPassBase : PoleFilterBase <AnalogLowPass>
+struct HighPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -103,7 +103,7 @@ struct HighPassBase : PoleFilterBase <AnalogLowPass>
               double stopBandDb);
 };
 
-struct BandPassBase : PoleFilterBase <AnalogLowPass>
+struct BandPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -112,7 +112,7 @@ struct BandPassBase : PoleFilterBase <AnalogLowPass>
               double stopBandDb);
 };
 
-struct BandStopBase : PoleFilterBase <AnalogLowPass>
+struct BandStopBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -121,7 +121,7 @@ struct BandStopBase : PoleFilterBase <AnalogLowPass>
               double stopBandDb);
 };
 
-struct LowShelfBase : PoleFilterBase <AnalogLowShelf>
+struct LowShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
@@ -130,7 +130,7 @@ struct LowShelfBase : PoleFilterBase <AnalogLowShelf>
               double stopBandDb);
 };
 
-struct HighShelfBase : PoleFilterBase <AnalogLowShelf>
+struct HighShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
@@ -139,7 +139,7 @@ struct HighShelfBase : PoleFilterBase <AnalogLowShelf>
               double stopBandDb);
 };
 
-struct BandShelfBase : PoleFilterBase <AnalogLowShelf>
+struct BandShelfBase : AnalogPoleFilterBase <AnalogLowShelf>
 {
   void setup (int order,
               double sampleRate,
diff --git a/shared/DSPFilters/include/DspFilters/Dsp.h b/shared/DSPFilters/include/DspFilters/Dsp.h
index bc084e48..a9ef2103 100644
--- a/shared/DSPFilters/include/DspFilters/Dsp.h
+++ b/shared/DSPFilters/include/DspFilters/Dsp.h
@@ -58,5 +58,6 @@ THE SOFTWARE.
 #include "DspFilters/Elliptic.h"
 #include "DspFilters/Legendre.h"
 #include "DspFilters/RBJ.h"
+#include "DspFilters/RASTA.h"
 
 #endif
diff --git a/shared/DSPFilters/include/DspFilters/Elliptic.h b/shared/DSPFilters/include/DspFilters/Elliptic.h
index 341f0a63..a99838d8 100644
--- a/shared/DSPFilters/include/DspFilters/Elliptic.h
+++ b/shared/DSPFilters/include/DspFilters/Elliptic.h
@@ -126,7 +126,7 @@ class AnalogLowPass : public LayoutBase
 
 // Factored implementations to reduce template instantiations
 
-struct LowPassBase : PoleFilterBase <AnalogLowPass>
+struct LowPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -135,7 +135,7 @@ struct LowPassBase : PoleFilterBase <AnalogLowPass>
               double rolloff);
 };
 
-struct HighPassBase : PoleFilterBase <AnalogLowPass>
+struct HighPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -144,7 +144,7 @@ struct HighPassBase : PoleFilterBase <AnalogLowPass>
               double rolloff);
 };
 
-struct BandPassBase : PoleFilterBase <AnalogLowPass>
+struct BandPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -154,7 +154,7 @@ struct BandPassBase : PoleFilterBase <AnalogLowPass>
               double rolloff);
 };
 
-struct BandStopBase : PoleFilterBase <AnalogLowPass>
+struct BandStopBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
diff --git a/shared/DSPFilters/include/DspFilters/Filter.h b/shared/DSPFilters/include/DspFilters/Filter.h
index f110db25..91770b62 100644
--- a/shared/DSPFilters/include/DspFilters/Filter.h
+++ b/shared/DSPFilters/include/DspFilters/Filter.h
@@ -193,7 +193,7 @@ class FilterDesignBase : public Filter
 
 template <class DesignClass,
           int Channels = 0,
-          class StateType = DirectFormII>
+          class StateType = DirectFormII<float>>
 class FilterDesign : public FilterDesignBase <DesignClass>
 {
 public:
@@ -239,7 +239,7 @@ class FilterDesign : public FilterDesignBase <DesignClass>
  */
 template <class FilterClass,
           int Channels = 0,
-          class StateType = DirectFormII>
+          class StateType = DirectFormII<float>>
 class SimpleFilter : public FilterClass
 {
 public:
diff --git a/shared/DSPFilters/include/DspFilters/Layout.h b/shared/DSPFilters/include/DspFilters/Layout.h
index 9d8aab3a..fc3820ec 100644
--- a/shared/DSPFilters/include/DspFilters/Layout.h
+++ b/shared/DSPFilters/include/DspFilters/Layout.h
@@ -107,8 +107,7 @@ class LayoutBase
   void add (const ComplexPair& poles, const ComplexPair& zeros)
   {
     assert (!(m_numPoles&1)); // single comes last
-    assert (poles.isMatchedPair ());
-    assert (zeros.isMatchedPair ());
+    assert (poles.isMatchedPair () || zeros.isMatchedPair ());
     m_pair[m_numPoles/2] = PoleZeroPair (poles.first, zeros.first,
                                          poles.second, zeros.second);
     m_numPoles += 2;
diff --git a/shared/DSPFilters/include/DspFilters/Legendre.h b/shared/DSPFilters/include/DspFilters/Legendre.h
index 5bb03102..a69a93ae 100644
--- a/shared/DSPFilters/include/DspFilters/Legendre.h
+++ b/shared/DSPFilters/include/DspFilters/Legendre.h
@@ -163,7 +163,7 @@ class AnalogLowPass : public LayoutBase
 
 // Factored implementations to reduce template instantiations
 
-struct LowPassBase : PoleFilterBase <AnalogLowPass>
+struct LowPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -171,7 +171,7 @@ struct LowPassBase : PoleFilterBase <AnalogLowPass>
               WorkspaceBase* w);
 };
 
-struct HighPassBase : PoleFilterBase <AnalogLowPass>
+struct HighPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -179,7 +179,7 @@ struct HighPassBase : PoleFilterBase <AnalogLowPass>
               WorkspaceBase* w);
 };
 
-struct BandPassBase : PoleFilterBase <AnalogLowPass>
+struct BandPassBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
@@ -188,7 +188,7 @@ struct BandPassBase : PoleFilterBase <AnalogLowPass>
               WorkspaceBase* w);
 };
 
-struct BandStopBase : PoleFilterBase <AnalogLowPass>
+struct BandStopBase : AnalogPoleFilterBase <AnalogLowPass>
 {
   void setup (int order,
               double sampleRate,
diff --git a/shared/DSPFilters/include/DspFilters/PoleFilter.h b/shared/DSPFilters/include/DspFilters/PoleFilter.h
index 2af80734..a64f746b 100644
--- a/shared/DSPFilters/include/DspFilters/PoleFilter.h
+++ b/shared/DSPFilters/include/DspFilters/PoleFilter.h
@@ -54,7 +54,8 @@ namespace Dsp {
 
 // Factored implementations to reduce template instantiations
 
-class PoleFilterBase2 : public Cascade
+template <class DigitalPrototype = LayoutBase>
+class DigitalPoleFilterBase : public Cascade
 {
 public:
   // This gets the poles/zeros directly from the digital
@@ -80,13 +81,13 @@ class PoleFilterBase2 : public Cascade
 #endif
 
 protected:
-  LayoutBase m_digitalProto;
+  DigitalPrototype m_digitalProto;
 };
 
 // Serves a container to hold the analog prototype
 // and the digital pole/zero layout.
 template <class AnalogPrototype>
-class PoleFilterBase : public PoleFilterBase2
+class AnalogPoleFilterBase : public DigitalPoleFilterBase<>
 {
 protected:
   void setPrototypeStorage (const LayoutBase& analogStorage,
diff --git a/shared/DSPFilters/include/DspFilters/RASTA.h b/shared/DSPFilters/include/DspFilters/RASTA.h
new file mode 100644
index 00000000..d3d04f83
--- /dev/null
+++ b/shared/DSPFilters/include/DspFilters/RASTA.h
@@ -0,0 +1,100 @@
+/*******************************************************************************
+
+"A Collection of Useful C++ Classes for Digital Signal Processing"
+ By Vinnie Falco
+
+Official project location:
+https://github.com/vinniefalco/DSPFilters
+
+See Documentation.cpp for contact information, notes, and bibliography.
+
+--------------------------------------------------------------------------------
+
+License: MIT License (http://www.opensource.org/licenses/mit-license.php)
+Copyright (c) 2009 by Vinnie Falco
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*******************************************************************************
+
+RASTA filter has the following transfer function:
+
+               2 + z^-1 - z^-3 - 2z^-4
+H(z) = scale * -----------------------
+                     1 - a*z^-1
+
+where scale = (2^2 + 1^2 + (-1)^2 + (-2)^2)^-1 and a is around 0.94.
+
+Reference:
+http://labrosa.ee.columbia.edu/~dpwe/papers/HermM94-rasta.pdf
+http://labrosa.ee.columbia.edu/matlab/rastamat/rastafilt.m
+
+We decompose H(z) as the product of 2 biquads:
+
+               (1 - z^-1)(1 + z^-1)       1 + (1/2)z^-1 + z^-2
+H(z) = scale * -------------------- * 2 * --------------------
+                         1                     1 - a*z^-1
+
+The latter numerator has complex roots: -1/4 +- sqrt(15/16)i.
+
+******************************************************************************/
+
+#ifndef RASTA_H_
+#define RASTA_H_
+
+#include "DspFilters/Common.h"
+#include "DspFilters/Cascade.h"
+#include "DspFilters/Design.h"
+#include "DspFilters/Filter.h"
+#include "DspFilters/PoleFilter.h"
+
+namespace Dsp {
+
+namespace RASTA {
+
+class DigitalRASTA : public LayoutBase
+{
+public:
+  DigitalRASTA ();
+
+  void design (double pole);
+
+private:
+  double m_pole;
+  Layout <4> m_storage;
+};
+
+
+class Filter : public DigitalPoleFilterBase <DigitalRASTA>
+             , public CascadeStages <2>
+{
+public:
+  Filter ();
+
+  void setup (double pole);
+
+protected:
+  virtual void applyScale (double scale);
+};
+
+}
+
+}
+
+#endif /* RASTA_H_ */
diff --git a/shared/DSPFilters/include/DspFilters/SmoothedFilter.h b/shared/DSPFilters/include/DspFilters/SmoothedFilter.h
index e4927f07..ec7c173a 100644
--- a/shared/DSPFilters/include/DspFilters/SmoothedFilter.h
+++ b/shared/DSPFilters/include/DspFilters/SmoothedFilter.h
@@ -47,7 +47,7 @@ namespace Dsp {
  */
 template <class DesignClass,
           int Channels,
-          class StateType = DirectFormII>
+          class StateType = DirectFormII<float>>
 class SmoothedFilterDesign
   : public FilterDesign <DesignClass,
                        Channels,
diff --git a/shared/DSPFilters/include/DspFilters/State.h b/shared/DSPFilters/include/DspFilters/State.h
index 3bca2012..845db150 100644
--- a/shared/DSPFilters/include/DspFilters/State.h
+++ b/shared/DSPFilters/include/DspFilters/State.h
@@ -36,6 +36,21 @@ THE SOFTWARE.
 #ifndef DSPFILTERS_STATE_H
 #define DSPFILTERS_STATE_H
 
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#ifndef __m128_buggy_gxx_up_to_4_7_type
+#define __m128_buggy_gxx_up_to_4_7_type
+union __m128_buggy_gxx_up_to_4_7 {
+    __m128 v;
+    float e[4];
+};
+#endif
+#endif
+#elif defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+
 #include "DspFilters/Common.h"
 #include "DspFilters/Biquad.h"
 
@@ -59,9 +74,19 @@ namespace Dsp {
  *  y[n] = (b0/a0)*x[n] + (b1/a0)*x[n-1] + (b2/a0)*x[n-2]
  *                      - (a1/a0)*y[n-1] - (a2/a0)*y[n-2]  
  */
+template <class FP, bool Simd =
+#ifdef __ARM_NEON__
+    true
+#else
+    false
+#endif
+    >
 class DirectFormI
 {
 public:
+  typedef FP FPType;
+  static constexpr bool HasSimd = Simd;
+
   DirectFormI ()
   {
     reset();
@@ -69,33 +94,94 @@ class DirectFormI
 
   void reset ()
   {
+#ifdef __SSE3__
+    m_xy = _mm_setzero_ps();
+#elif defined(__ARM_NEON__)
+    m_xy = vdupq_n_f32(0);
+#endif
     m_x1 = 0;
     m_x2 = 0;
     m_y1 = 0;
     m_y2 = 0;
   }
 
-  template <typename Sample>
-  inline Sample process1 (const Sample in,
+#ifdef __SSE3__
+  inline __m128 process1Simd (const __m128 in,
+#elif defined(__ARM_NEON__)
+  inline float32x2_t process1Simd (const float32x2_t in,
+#else
+  inline FP process1Simd (const FP in,
+#endif
                           const BiquadBase& s,
-                          const double vsa) // very small amount
+#ifdef __SSE3__
+                          const __m128 vsa) // very small amount
+#elif defined(__ARM_NEON__)
+                          const float32x2_t vsa) // very small amount
+#else
+                          const FP vsa) // very small amount
+#endif
   {
-    double out = s.m_b0*in + s.m_b1*m_x1 + s.m_b2*m_x2
-                           - s.m_a1*m_y1 - s.m_a2*m_y2
-                           + vsa;
+#ifdef __SSE3__
+    const __m128 neg = _mm_set_ps(1, 1, -1, -1);
+    __m128 tmp = _mm_mul_ps(s.m_vab12, m_xy);
+    tmp = _mm_mul_ps(tmp, neg);
+    tmp = _mm_hadd_ps(tmp, tmp);
+    tmp = _mm_hadd_ps(tmp, tmp);
+    tmp = _mm_add_ps(tmp, vsa);
+    __m128 out = _mm_mul_ps(in, s.m_vb0);
+    out = _mm_add_ps(out, tmp);
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+    __m128_buggy_gxx_up_to_4_7 m_xy_e, in_e, out_e;
+    m_xy_e.v = m_xy;
+    in_e.v = in;
+    out_e.v = out;
+    m_xy = _mm_set_ps(m_xy_e.e[2], in_e.e[0], m_xy_e.e[0], out_e.e[0]);
+#else
+    m_xy = _mm_set_ps(m_xy[2], in[0], m_xy[0], out[0]);
+#endif
+    return out;
+#elif defined(__ARM_NEON__)
+    const float32x4_t neg = { 1, -1, 1, -1 };
+    float32x4_t tmp = vmulq_f32(s.m_vab12, m_xy);
+    tmp = vmulq_f32(tmp, neg);
+    float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
+    sum = vpadd_f32(sum, sum);
+    float32x2_t out = vmla_f32(sum, s.m_vb0, in);
+    out = vadd_f32(out, vsa);
+    float32x2_t inout = vtrn_f32(in, out).val[0];
+    m_xy = vcombine_f32(inout, vget_low_f32(m_xy));
+    return out;
+#else
+    return in;
+#endif
+  }
+
+  inline FP process1 (const FP in,
+                      const BiquadBase& s,
+                      const FP vsa) // very small amount
+  {
+    FP out = s.m_b0*in + s.m_b1*m_x1 + s.m_b2*m_x2
+                       - s.m_a1*m_y1 - s.m_a2*m_y2
+             + vsa;
     m_x2 = m_x1;
     m_y2 = m_y1;
     m_x1 = in;
     m_y1 = out;
-
-    return static_cast<Sample> (out);
+    return out;
   }
 
 protected:
-  double m_x2; // x[n-2]
-  double m_y2; // y[n-2]
-  double m_x1; // x[n-1]
-  double m_y1; // y[n-1]
+#ifdef __SSE3__
+                //   3     2     1     0
+  __m128 m_xy;  // [m_x2  m_x1  m_y2  m_y1]
+#elif defined(__ARM_NEON__)
+                     //   3     2     1     0
+  float32x4_t m_xy;  // [m_x2  m_y2  m_x1  m_y1]
+#endif
+  FP m_x2; // x[n-2]
+  FP m_y2; // y[n-2]
+  FP m_x1; // x[n-1]
+  FP m_y1; // y[n-1]
 };
 
 //------------------------------------------------------------------------------
@@ -109,9 +195,13 @@ class DirectFormI
  *  y(n) = (b0/a0)*v[n] + (b1/a0)*v[n-1] + (b2/a0)*v[n-2]
  *
  */
+template <class FP>
 class DirectFormII
 {
 public:
+  typedef FP FPType;
+  static constexpr bool HasSimd = false;
+
   DirectFormII ()
   {
     reset ();
@@ -123,27 +213,27 @@ class DirectFormII
     m_v2 = 0;
   }
 
-  template <typename Sample>
-  Sample process1 (const Sample in,
-                   const BiquadBase& s,
-                   const double vsa)
+  FP process1 (const FP in,
+               const BiquadBase& s,
+               const FP vsa)
   {
-    double w   = in - s.m_a1*m_v1 - s.m_a2*m_v2 + vsa;
-    double out =      s.m_b0*w    + s.m_b1*m_v1 + s.m_b2*m_v2;
+    FP w   = in - s.m_a1*m_v1 - s.m_a2*m_v2 + vsa;
+    FP out =      s.m_b0*w    + s.m_b1*m_v1 + s.m_b2*m_v2;
 
     m_v2 = m_v1;
     m_v1 = w;
 
-    return static_cast<Sample> (out);
+    return out;
   }
 
 private:
-  double m_v1; // v[-1]
-  double m_v2; // v[-2]
+  FP m_v1; // v[-1]
+  FP m_v2; // v[-2]
 };
 
 //------------------------------------------------------------------------------
 
+
 /*
  * Transposed Direct Form I and II
  * by lubomir i. ivanov (neolit123 [at] gmail)
@@ -154,9 +244,13 @@ class DirectFormII
  */
 
 // I think this one is broken
+template <class FP>
 class TransposedDirectFormI
 {
 public:
+  typedef FP FPType;
+  static constexpr bool HasSimd = false;
+
   TransposedDirectFormI ()
   {
     reset ();
@@ -175,12 +269,11 @@ class TransposedDirectFormI
     m_s4_1 = 0;
   }
 
-  template <typename Sample>
-  inline Sample process1 (const Sample in,
-                          const BiquadBase& s,
-                          const double vsa)
+  inline FP process1 (const FP in,
+                      const BiquadBase& s,
+                      const FP vsa)
   {
-    double out;
+    FP out;
 
     // can be: in += m_s1_1;
     m_v = in + m_s1_1;
@@ -195,26 +288,29 @@ class TransposedDirectFormI
     m_s2_1 = m_s2;
     m_s1_1 = m_s1;
 
-    return static_cast<Sample> (out);
+    return out;
   }
 
 private:
-  double m_v;
-  double m_s1;
-  double m_s1_1;
-  double m_s2;
-  double m_s2_1;
-  double m_s3;
-  double m_s3_1;
-  double m_s4;
-  double m_s4_1;
+  FP m_v;
+  FP m_s1;
+  FP m_s1_1;
+  FP m_s2;
+  FP m_s2_1;
+  FP m_s3;
+  FP m_s3_1;
+  FP m_s4;
+  FP m_s4_1;
 };
 
 //------------------------------------------------------------------------------
-
+template <class FP>
 class TransposedDirectFormII
 {
 public:
+  typedef FP FPType;
+  static constexpr bool HasSimd = false;
+
   TransposedDirectFormII ()
   {
     reset ();
@@ -231,9 +327,9 @@ class TransposedDirectFormII
   template <typename Sample>
   inline Sample process1 (const Sample in,
                           const BiquadBase& s,
-                          const double vsa)
+                          const FP vsa)
   {
-    double out;
+    FP out;
 
     out = m_s1_1 + s.m_b0*in + vsa;
     m_s1 = m_s2_1 + s.m_b1*in - s.m_a1*out;
@@ -245,10 +341,10 @@ class TransposedDirectFormII
   }
 
 private:
-  double m_s1;
-  double m_s1_1;
-  double m_s2;
-  double m_s2_1;
+  FP m_s1;
+  FP m_s1_1;
+  FP m_s2;
+  FP m_s2_1;
 };
 
 //------------------------------------------------------------------------------
diff --git a/shared/DSPFilters/source/Biquad.cpp b/shared/DSPFilters/source/Biquad.cpp
index 4401e6fe..65f81341 100644
--- a/shared/DSPFilters/source/Biquad.cpp
+++ b/shared/DSPFilters/source/Biquad.cpp
@@ -128,6 +128,18 @@ void BiquadBase::setCoefficients (double a0, double a1, double a2,
   m_b0 = b0/a0;
   m_b1 = b1/a0;
   m_b2 = b2/a0;
+#ifdef __SSE__
+  m_va0 = _mm_set1_ps(m_a0);
+  m_vb0 = _mm_set1_ps(m_b0);
+  m_vab12 = _mm_set_ps(m_b2, m_b1, m_a2, m_a1);
+#elif defined(__ARM_NEON__)
+  m_va0 = vdup_n_f32(m_a0);
+  m_vb0 = vdup_n_f32(m_b0);
+  vsetq_lane_f32(m_a1, m_vab12, 0);
+  vsetq_lane_f32(m_b1, m_vab12, 1);
+  vsetq_lane_f32(m_a2, m_vab12, 2);
+  vsetq_lane_f32(m_b2, m_vab12, 3);
+#endif
 }
 
 void BiquadBase::setOnePole (complex_t pole, complex_t zero)
@@ -214,9 +226,7 @@ void BiquadBase::setIdentity ()
 
 void BiquadBase::applyScale (double scale)
 {
-  m_b0 *= scale;
-  m_b1 *= scale;
-  m_b2 *= scale;
+  setCoefficients(m_a0, m_a1, m_a2, m_b0 * scale, m_b1 * scale, m_b2 * scale);
 }
 
 //------------------------------------------------------------------------------
diff --git a/shared/DSPFilters/source/RASTA.cpp b/shared/DSPFilters/source/RASTA.cpp
new file mode 100644
index 00000000..d8051dc8
--- /dev/null
+++ b/shared/DSPFilters/source/RASTA.cpp
@@ -0,0 +1,77 @@
+/*******************************************************************************
+
+"A Collection of Useful C++ Classes for Digital Signal Processing"
+ By Vinnie Falco
+
+Official project location:
+https://github.com/vinniefalco/DSPFilters
+
+See Documentation.cpp for contact information, notes, and bibliography.
+
+--------------------------------------------------------------------------------
+
+License: MIT License (http://www.opensource.org/licenses/mit-license.php)
+Copyright (c) 2009 by Vinnie Falco
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*******************************************************************************/
+
+#include "DspFilters/Common.h"
+#include "DspFilters/RASTA.h"
+
+namespace Dsp {
+
+namespace RASTA {
+
+Filter::Filter ()
+{
+  setCascadeStorage (getCascadeStorage());
+}
+
+void Filter::setup (double pole)
+{
+  m_digitalProto.design(pole);
+  Cascade::setLayout (m_digitalProto);
+}
+
+void Filter::applyScale(double)
+{
+  Cascade::applyScale(
+      2. / (2*2 + 1*1 + 1*1 + 2*2));
+}
+
+DigitalRASTA::DigitalRASTA() : m_pole(0.94)
+{
+  setStorage(m_storage);
+}
+
+void DigitalRASTA::design (double pole)
+{
+  add(ComplexPair(complex_t(0, 0), complex_t(0, 0)),
+      ComplexPair(complex_t(1, 0), complex_t(-1, 0)));
+  add(ComplexPair(complex_t(pole, 0), complex_t(0, 0)),
+      ComplexPair(complex_t(-0.25, sqrt(15./16.)),
+                  complex_t(-0.25, -sqrt(15./16.))));
+  setNormal(0, 1);
+}
+
+}
+
+}